crossmate

A collaborative crossword app for iOS
Log | Files | Refs | LICENSE

NYTToXDConverter.swift (30026B)


      1 import Foundation
      2 
      3 /// Converts NYT puzzle JSON (from `/v6/puzzle/daily/{date}.json`) to `.xd` format.
      4 enum NYTToXDConverter {
      5     struct ConversionError: LocalizedError {
      6         let message: String
      7         var errorDescription: String? { message }
      8     }
      9 
     10     /// Single-character grid placeholders for multi-letter (rebus) fills. The
     11     /// `.xd` grid is one character per cell, so a cell whose fill is longer than
     12     /// one letter shows one of these in the grid and is expanded via the
     13     /// `Rebus:` header (the `1` in `1=LW`). These are *not* NYT data — they're
     14     /// our `.xd` serialization. The `.xd` spec allows "digits, most symbols,
     15     /// and printable unicode characters (if needed)" here; we take digits first
     16     /// (the conventional, readable encoding) then the symbols, where "most"
     17     /// means every printable ASCII symbol *except* the ones the grid/header
     18     /// parser already reserves — letters (grid fill), `#`/`_`/`.` (block/empty),
     19     /// `@`/`*` (special markers), and `=`/space (`Rebus:` `key=value` syntax).
     20     /// We stop at ASCII rather than walking into unicode and throw past the
     21     /// ceiling; real puzzles use a handful of distinct fills, nowhere near it.
     22     /// (Walking ASCII naïvely from `'1'` overflows into `=` by the 13th key,
     23     /// which produced an unparseable `==VALUE` entry and a rejected grid char.)
     24     private static let rebusPlaceholders: [Character] = {
     25         let reserved: Set<Character> = ["#", "_", ".", "@", "*", "=", " "]
     26         let symbols = (UInt8(33)...UInt8(126))
     27             .map { Character(UnicodeScalar($0)) }
     28             .filter { !$0.isLetter && !$0.isNumber && !reserved.contains($0) }
     29         return Array("123456789") + symbols
     30     }()
     31 
     32     /// Whether a cell fill must ride into the grid via a `Rebus:` placeholder
     33     /// rather than appear literally. The `.xd` grid is one character per cell and
     34     /// the grid parser only takes letters (plus block/special markers and
     35     /// declared placeholders) as direct fills, so anything longer than one
     36     /// character — or a single non-letter character such as the "2"/"3" cells in
     37     /// an "R2D2"/"C3PO" themer — has to be encoded. Routing every non-letter fill
     38     /// through a placeholder means a digit never appears literally in the grid:
     39     /// each grid digit is unambiguously a placeholder reference, so digit fills
     40     /// and digit placeholders can't collide.
     41     private static func needsRebusEncoding(_ answer: String) -> Bool {
     42         if answer.count != 1 { return true }
     43         return !(answer.first?.isLetter ?? false)
     44     }
     45 
     46     /// Converts raw JSON data from the NYT puzzle endpoint to an `.xd` source string.
     47     static func convert(jsonData: Data) throws -> String {
     48         guard let root = try JSONSerialization.jsonObject(with: jsonData) as? [String: Any] else {
     49             throw ConversionError(message: "Invalid JSON root.")
     50         }
     51 
     52         // -- Metadata --
     53 
     54         let publicationDate = root["publicationDate"] as? String ?? ""
     55         let nytTitle = (root["title"] as? String)?
     56             .trimmingCharacters(in: .whitespacesAndNewlines)
     57         let title = if let nytTitle, !nytTitle.isEmpty {
     58             nytTitle
     59         } else {
     60             title(forPublicationDate: publicationDate)
     61         }
     62         let constructors = root["constructors"] as? [String] ?? []
     63         let editor = root["editor"] as? String
     64         let copyright = root["copyright"] as? String
     65 
     66         guard let bodyArray = root["body"] as? [[String: Any]],
     67               let body = bodyArray.first else {
     68             throw ConversionError(message: "Missing body in puzzle JSON.")
     69         }
     70 
     71         guard let dimensions = body["dimensions"] as? [String: Int],
     72               let width = dimensions["width"],
     73               let height = dimensions["height"] else {
     74             throw ConversionError(message: "Missing dimensions.")
     75         }
     76 
     77         guard let cells = body["cells"] as? [Any] else {
     78             throw ConversionError(message: "Missing cells.")
     79         }
     80 
     81         guard cells.count == width * height else {
     82             throw ConversionError(message: "Cell count (\(cells.count)) does not match dimensions (\(width)x\(height)).")
     83         }
     84 
     85         guard let clues = body["clues"] as? [[String: Any]] else {
     86             throw ConversionError(message: "Missing clues.")
     87         }
     88 
     89         // -- Parse cells into answers --
     90 
     91         // Each cell is either an empty dict (block) or a dict with "answer", "type", etc.
     92         var answers: [String?] = []  // nil = block, String = answer
     93         var acceptedAnswersByCellIndex: [Int: [String]] = [:]
     94 
     95         for (index, cell) in cells.enumerated() {
     96             guard let dict = cell as? [String: Any], !dict.isEmpty else {
     97                 answers.append(nil)
     98                 continue
     99             }
    100 
    101             let rawAnswer = dict["answer"] as? String ?? ""
    102             // NYT encodes a "Schrödinger" square — one correct as either of two
    103             // letters — as a slash-joined answer like "L/W", with every
    104             // acceptable keystroke enumerated in moreAnswers.valid. The slash
    105             // is NYT notation, not a grid character (and isn't on our
    106             // keyboard), so collapse it: the letters together ("LW") become the
    107             // canonical rebus fill, and each single letter rides along as an
    108             // accepted alternate via the moreAnswers handling below.
    109             let isSchrodinger = rawAnswer.contains("/")
    110             let answer = isSchrodinger
    111                 ? rawAnswer.replacingOccurrences(of: "/", with: "")
    112                 : rawAnswer
    113             if answer.isEmpty {
    114                 // A playable cell (NYT type 1) with no answer is an intentional
    115                 // blank: the "gap" in themers like the 2006-07-06 "THE GAP"
    116                 // puzzle, where crossing words read straight through a square
    117                 // whose solution is a literal space. NYT flags these with a
    118                 // blank-marker `moreAnswers` ("B"), which we drop by `continue`ing
    119                 // past the alternates below — the square's only correct state is
    120                 // empty. Any other answerless cell is a block.
    121                 answers.append(intValue(dict["type"]) == 1 ? " " : nil)
    122                 continue
    123             }
    124             answers.append(answer)
    125 
    126             if let moreAnswers = dict["moreAnswers"] as? [String: Any],
    127                let valid = moreAnswers["valid"] as? [String] {
    128                 // For Schrödinger cells, strip the slash from each alternate too
    129                 // so only keyboard-enterable letter forms survive, then
    130                 // dedupe/sort for stable output. Other cells pass their
    131                 // alternates through unchanged so arbitrary accepted strings
    132                 // still round-trip.
    133                 let candidates = isSchrodinger
    134                     ? valid.map { $0.replacingOccurrences(of: "/", with: "") }
    135                     : valid
    136                 let cleaned = candidates.filter { !$0.isEmpty && $0 != answer }
    137                 if !cleaned.isEmpty {
    138                     acceptedAnswersByCellIndex[index] = isSchrodinger
    139                         ? Array(Set(cleaned)).sorted()
    140                         : cleaned
    141                 }
    142             }
    143         }
    144 
    145         // -- Build rebus header if needed --
    146 
    147         // Check for multi-character answers. Each distinct multi-letter fill
    148         // claims the next grid placeholder from `rebusPlaceholders`.
    149         var rebusEntries: [(key: Character, value: String)] = []
    150         var rebusLookup: [String: Character] = [:]
    151 
    152         for answer in answers {
    153             guard let answer, needsRebusEncoding(answer) else { continue }
    154             if rebusLookup[answer] == nil {
    155                 guard rebusLookup.count < rebusPlaceholders.count else {
    156                     throw ConversionError(
    157                         message: "Too many distinct rebus fills (\(rebusLookup.count + 1)); ran out of grid placeholders."
    158                     )
    159                 }
    160                 let key = rebusPlaceholders[rebusLookup.count]
    161                 rebusLookup[answer] = key
    162                 rebusEntries.append((key: key, value: answer))
    163             }
    164         }
    165 
    166         // -- Find special (shaded/circled) cells from NYT cell data --
    167 
    168         let specialCells = specialCellInfo(body: body)
    169 
    170         // -- Build grid lines --
    171 
    172         var gridLines: [String] = []
    173         for row in 0..<height {
    174             var line = ""
    175             for col in 0..<width {
    176                 let index = row * width + col
    177                 guard let answer = answers[index] else {
    178                     line += "#"
    179                     continue
    180                 }
    181                 if specialCells.circled.contains(index) {
    182                     line += "@"
    183                     continue
    184                 }
    185                 if specialCells.shaded.contains(index) {
    186                     line += "*"
    187                     continue
    188                 }
    189                 if needsRebusEncoding(answer) {
    190                     line += String(rebusLookup[answer]!)
    191                 } else {
    192                     line += answer.uppercased()
    193                 }
    194             }
    195             gridLines.append(line)
    196         }
    197 
    198         // -- Build clue lines --
    199 
    200         // Sort clues: Across first, then Down; within each group, by label number.
    201         let sortedClues = clues.sorted { a, b in
    202             let dirA = (a["direction"] as? String) ?? ""
    203             let dirB = (b["direction"] as? String) ?? ""
    204             if dirA != dirB { return dirA == "Across" }
    205             let labelA = intValue(a["label"]) ?? 0
    206             let labelB = intValue(b["label"]) ?? 0
    207             return labelA < labelB
    208         }
    209 
    210         var acrossClueLines: [String] = []
    211         var downClueLines: [String] = []
    212 
    213         for clue in sortedClues {
    214             let direction = clue["direction"] as? String ?? ""
    215             let label = intValue(clue["label"]) ?? 0
    216 
    217             // Extract clue text from the nested structure:
    218             // "text": [{"plain": "Clue text", "formatted": "<i>Clue text</i>"}]
    219             // When NYT supplies emphasis markup in `formatted` (italic themers,
    220             // etc.) convert it to .xd brace markup; otherwise fall back to the
    221             // `plain` reading. `formatted` is *not* always richer than plain —
    222             // image clues carry a bare symbol there ("¥") — so it is only
    223             // preferred when it actually carries markup.
    224             let clueText: String
    225             if let textArray = clue["text"] as? [[String: Any]],
    226                let firstText = textArray.first {
    227                 if let formatted = firstText["formatted"] as? String,
    228                    let markup = xdMarkup(fromFormatted: formatted) {
    229                     clueText = stripAriaLabelPrefix(markup)
    230                 } else if let plain = firstText["plain"] as? String {
    231                     clueText = stripAriaLabelPrefix(plain)
    232                 } else {
    233                     clueText = ""
    234                 }
    235             } else {
    236                 clueText = ""
    237             }
    238 
    239             // Build answer from cell indices
    240             let cellIndices = clue["cells"] as? [Int] ?? []
    241             let answerStr = cellIndices.compactMap { answers[$0] }.joined()
    242 
    243             let prefix = direction == "Across" ? "A" : "D"
    244             let line = "\(prefix)\(label). \(clueText) ~ \(answerStr)"
    245             let acceptLine: String?
    246             let acceptedAnswers = acceptedAnswerVariants(
    247                 cellIndices: cellIndices,
    248                 answers: answers,
    249                 acceptedAnswersByCellIndex: acceptedAnswersByCellIndex
    250             )
    251             if acceptedAnswers.isEmpty {
    252                 acceptLine = nil
    253             } else {
    254                 let escaped = acceptedAnswers.map(escapeAcceptToken).joined(separator: " ")
    255                 acceptLine = "\(prefix)\(label) ^Accept: \(escaped)"
    256             }
    257 
    258             if direction == "Across" {
    259                 acrossClueLines.append(line)
    260                 if let acceptLine { acrossClueLines.append(acceptLine) }
    261             } else {
    262                 downClueLines.append(line)
    263                 if let acceptLine { downClueLines.append(acceptLine) }
    264             }
    265         }
    266 
    267         // -- Assemble .xd source --
    268 
    269         var sections: [String] = []
    270 
    271         // Metadata section
    272         var metadata: [String] = []
    273         metadata.append("Title: \(title)")
    274         metadata.append("CmVer: \(XD.currentCmVersion)")
    275         metadata.append("Publisher: New York Times")
    276         if !publicationDate.isEmpty {
    277             metadata.append("Date: \(publicationDate)")
    278         }
    279         if !constructors.isEmpty {
    280             metadata.append("Author: \(constructors.joined(separator: ", "))")
    281         }
    282         if let editor {
    283             metadata.append("Editor: \(editor)")
    284         }
    285         if let copyright {
    286             metadata.append("Copyright: \(copyright)")
    287         }
    288 
    289         if !rebusEntries.isEmpty {
    290             let rebusStr = rebusEntries
    291                 .map { "\($0.key)=\(escapeRebusValue($0.value))" }
    292                 .joined(separator: " ")
    293             metadata.append("Rebus: \(rebusStr)")
    294         }
    295 
    296         let specialMappings = specialMappings(circled: specialCells.circled, shaded: specialCells.shaded)
    297         if !specialMappings.isEmpty {
    298             metadata.append("Specials: \(specialMappings)")
    299         }
    300 
    301         let relatives = buildRelativeGroups(clues: clues)
    302         if !relatives.isEmpty {
    303             let joined = relatives
    304                 .map { $0.joined(separator: ",") }
    305                 .joined(separator: "; ")
    306             metadata.append("Relatives: \(joined)")
    307         }
    308 
    309         sections.append(metadata.joined(separator: "\n"))
    310 
    311         // Grid section
    312         sections.append(gridLines.joined(separator: "\n"))
    313 
    314         // Clue sections (across then down, separated by blank line)
    315         let allClueLines = acrossClueLines + [""] + downClueLines
    316         sections.append(allClueLines.joined(separator: "\n"))
    317 
    318         // The .xd parser splits sections on two or more consecutive blank lines,
    319         // so we need two blank lines (three newlines) between sections.
    320         return sections.joined(separator: "\n\n\n")
    321     }
    322 
    323     private static func acceptedAnswerVariants(
    324         cellIndices: [Int],
    325         answers: [String?],
    326         acceptedAnswersByCellIndex: [Int: [String]]
    327     ) -> [String] {
    328         var variants: [String] = []
    329         var seen = Set<String>()
    330         let canonicalParts = cellIndices.map { answers.indices.contains($0) ? answers[$0] ?? "" : "" }
    331         let canonicalAnswer = canonicalParts.joined()
    332 
    333         for (partIndex, cellIndex) in cellIndices.enumerated() {
    334             guard let accepted = acceptedAnswersByCellIndex[cellIndex] else { continue }
    335             for value in accepted {
    336                 var parts = canonicalParts
    337                 parts[partIndex] = value
    338                 let variant = parts.joined()
    339                 guard variant != canonicalAnswer, seen.insert(variant).inserted else { continue }
    340                 variants.append(variant)
    341             }
    342         }
    343 
    344         return variants
    345     }
    346 
    347     /// Escapes a `Rebus:` value for the header's whitespace-delimited,
    348     /// `=`-keyed grammar (see `XD.parseRebusHeader`). Because the header splits
    349     /// entries on whitespace, a value that *is* whitespace — the space fill of a
    350     /// "gap" cell — can't appear literally; it rides in as the named escape
    351     /// `\space`. Backslash is the escape introducer, so a literal backslash
    352     /// doubles to `\\`. The scheme is deliberately open-ended: further `\name`
    353     /// (or `\u{...}`) escapes can join it to carry any character the header
    354     /// grammar would otherwise eat. `XD.unescapeRebusValue` is the inverse.
    355     private static func escapeRebusValue(_ value: String) -> String {
    356         var out = ""
    357         for ch in value {
    358             switch ch {
    359             case "\\": out += "\\\\"
    360             case " ": out += "\\space"
    361             default: out.append(ch)
    362             }
    363         }
    364         return out
    365     }
    366 
    367     private static func escapeAcceptToken(_ token: String) -> String {
    368         var escaped = ""
    369         for ch in token {
    370             if ch == "\\" || ch.isWhitespace {
    371                 escaped.append("\\")
    372             }
    373             escaped.append(ch)
    374         }
    375         return escaped
    376     }
    377 
    378     private static func title(forPublicationDate publicationDate: String) -> String {
    379         guard let date = date(fromPublicationDate: publicationDate) else {
    380             return "NYT Crossword"
    381         }
    382 
    383         let formatter = DateFormatter()
    384         formatter.calendar = Calendar(identifier: .gregorian)
    385         formatter.locale = Locale(identifier: "en_US_POSIX")
    386         formatter.timeZone = TimeZone(identifier: "America/New_York")
    387         formatter.dateFormat = "EEEE"
    388         return "\(formatter.string(from: date)) Crossword"
    389     }
    390 
    391     private static func date(fromPublicationDate publicationDate: String) -> Date? {
    392         let trimmed = publicationDate.trimmingCharacters(in: .whitespaces)
    393         guard let match = trimmed.firstMatch(of: /^(\d{4})-(\d{2})-(\d{2})$/),
    394               let year = Int(match.1),
    395               let month = Int(match.2),
    396               let day = Int(match.3)
    397         else { return nil }
    398 
    399         var calendar = Calendar(identifier: .gregorian)
    400         calendar.timeZone = TimeZone(identifier: "America/New_York") ?? .gmt
    401         var comps = DateComponents()
    402         comps.calendar = calendar
    403         comps.timeZone = calendar.timeZone
    404         comps.year = year
    405         comps.month = month
    406         comps.day = day
    407         return calendar.date(from: comps)
    408     }
    409 
    410     /// Themer/revealer groups: the structured `relatives` field plus
    411     /// italics-flagged theme answers. These are the connections the
    412     /// constructor did *not* surface in clue text — typically the trick
    413     /// underlying a theme — so they're suitable for catalog/analysis but
    414     /// should not drive any in-grid highlighting that would spoil the solve.
    415     /// Cross-references that live in clue prose ("See 11-Down") are derived
    416     /// at puzzle-load time in `Puzzle.init` instead.
    417     private static func buildRelativeGroups(clues: [[String: Any]]) -> [[String]] {
    418         var groups = buildRelatives(clues: clues)
    419         groups.append(contentsOf: buildFormattedClueGroups(clues: clues))
    420         var seen = Set<Set<String>>()
    421         return groups.filter { group in
    422             let key = Set(group)
    423             guard !key.isEmpty, !seen.contains(key) else { return false }
    424             seen.insert(key)
    425             return true
    426         }
    427     }
    428 
    429     /// Builds groups of cross-referenced clues from the v6 per-clue
    430     /// `relatives` arrays. Two rules admit a group, everything else is
    431     /// discarded:
    432     ///
    433     /// 1. **Revealer** — a clue with ≥2 relatives defines a group consisting
    434     ///    of itself plus every clue it references. The revealer's list is
    435     ///    treated as canonical.
    436     /// 2. **Mutual pair** — two clues that each list the other as their sole
    437     ///    relative form a group of two (the classic "See 14-Across" pattern).
    438     ///
    439     /// Single-direction 1-relative edges (where A references B but B does
    440     /// not reference A back) are dropped. This guards against NYT data
    441     /// errors where a leaf clue points at the wrong revealer.
    442     private static func buildRelatives(clues: [[String: Any]]) -> [[String]] {
    443         // Extract each clue's (label, direction) and relatives array.
    444         var tokens: [String?] = []
    445         var relativeIndices: [[Int]] = []
    446         tokens.reserveCapacity(clues.count)
    447         relativeIndices.reserveCapacity(clues.count)
    448         for clue in clues {
    449             let direction = clue["direction"] as? String ?? ""
    450             let label = intValue(clue["label"]) ?? 0
    451             if label > 0, direction == "Across" || direction == "Down" {
    452                 tokens.append("\(label)\(direction == "Across" ? "A" : "D")")
    453             } else {
    454                 tokens.append(nil)
    455             }
    456             let raw = clue["relatives"] as? [Int] ?? []
    457             let cleaned = Array(Set(raw.filter { $0 >= 0 && $0 < clues.count }))
    458             relativeIndices.append(cleaned)
    459         }
    460 
    461         var groups: [[String]] = []
    462         var seen = Set<Set<Int>>()
    463 
    464         func emit(_ members: Set<Int>) {
    465             guard members.count >= 2, !seen.contains(members) else { return }
    466             seen.insert(members)
    467             let sorted = members.sorted { a, b in
    468                 // Order by (number, direction-is-across-first). Extract from
    469                 // the stored token; fallback to index if a token is missing.
    470                 guard let ta = tokens[a], let tb = tokens[b] else { return a < b }
    471                 let (na, da) = (Int(ta.dropLast()) ?? 0, ta.last!)
    472                 let (nb, db) = (Int(tb.dropLast()) ?? 0, tb.last!)
    473                 if na != nb { return na < nb }
    474                 return da == "A" && db == "D"
    475             }
    476             let toks = sorted.compactMap { tokens[$0] }
    477             if toks.count >= 2 { groups.append(toks) }
    478         }
    479 
    480         // Rule 1: revealers.
    481         for (i, refs) in relativeIndices.enumerated() where refs.count >= 2 {
    482             var members = Set<Int>()
    483             members.insert(i)
    484             for r in refs { members.insert(r) }
    485             emit(members)
    486         }
    487 
    488         // Rule 2: mutual pairs. Only consider clues with exactly one relative
    489         // — revealer-formed groups already cover the multi-relative cases.
    490         for (i, refs) in relativeIndices.enumerated() where refs.count == 1 {
    491             let j = refs[0]
    492             guard j != i, relativeIndices.indices.contains(j) else { continue }
    493             if relativeIndices[j] == [i] {
    494                 emit(Set([i, j]))
    495             }
    496         }
    497 
    498         return groups
    499     }
    500 
    501     /// NYT marks some theme clues by supplying formatted clue text, commonly
    502     /// `<i>...</i>`, without adding `relatives`. Group all such clue refs so
    503     /// their answer cells can be highlighted by Crossmate's thematic mask.
    504     ///
    505     /// A revealer is folded into the same group when its prose names the set —
    506     /// "the answer to each italicized clue", "the five italicized clues". The
    507     /// revealer carries no markup or `relatives` of its own, so this prose
    508     /// reference is the only signal binding it to the themers. The link is only
    509     /// drawn when an italicized set actually exists, which keeps an incidental
    510     /// mention from a clue that isn't a revealer out of the group.
    511     private static func buildFormattedClueGroups(clues: [[String: Any]]) -> [[String]] {
    512         var tokens = clues.compactMap { clue -> String? in
    513             guard clueHasFormattedText(clue) else { return nil }
    514             return clueToken(clue)
    515         }
    516         guard !tokens.isEmpty else { return [] }
    517 
    518         let themers = Set(tokens)
    519         for clue in clues where clueReferencesItalicizedSet(clue) {
    520             guard let token = clueToken(clue), !themers.contains(token) else { continue }
    521             tokens.append(token)
    522         }
    523         return [sortedClueTokens(tokens)]
    524     }
    525 
    526     /// Whether a clue's prose points at the italicized themers — the word
    527     /// "italicized" immediately followed by "clue" or "answer" ("each
    528     /// italicized clue", "answers to the italicized clues"). Italic is the only
    529     /// emphasis NYT pairs with a revealer; bold and underline never are.
    530     private static func clueReferencesItalicizedSet(_ clue: [String: Any]) -> Bool {
    531         cluePlainText(clue).lowercased().contains(/italici[sz]ed\s+(clue|answer)/)
    532     }
    533 
    534     private static func cluePlainText(_ clue: [String: Any]) -> String {
    535         guard let textArray = clue["text"] as? [[String: Any]],
    536               let plain = textArray.first?["plain"] as? String else { return "" }
    537         return plain
    538     }
    539 
    540     /// Orders `{number}{A|D}` tokens by number, Across before Down, so a folded
    541     /// revealer lands in sequence rather than at the end.
    542     private static func sortedClueTokens(_ tokens: [String]) -> [String] {
    543         tokens.sorted { a, b in
    544             let na = Int(a.dropLast()) ?? 0
    545             let nb = Int(b.dropLast()) ?? 0
    546             if na != nb { return na < nb }
    547             return a.last == "A" && b.last == "D"
    548         }
    549     }
    550 
    551     private static func clueHasFormattedText(_ clue: [String: Any]) -> Bool {
    552         guard let textArray = clue["text"] as? [[String: Any]] else { return false }
    553         return textArray.contains { textPart in
    554             guard let formatted = textPart["formatted"] as? String else { return false }
    555             // A non-empty `formatted` field alone isn't a theme signal: image
    556             // clues mirror a bare symbol ("¥") or the plain text there. Only
    557             // genuine emphasis markup marks a themer.
    558             return containsEmphasisMarkup(decodeBasicEntities(formatted))
    559         }
    560     }
    561 
    562     typealias TagMapping = (open: String, close: String, xdOpen: String, xdClose: String)
    563 
    564     /// HTML emphasis tags that mark a *theme* clue. NYT italicizes its themers
    565     /// (`<i>`/`<em>`), the convention this grouping keys on; `<b>`/`<strong>`
    566     /// are included as the same kind of prose emphasis. Underline is handled
    567     /// separately (see `underlineTags`) because NYT uses it for highlight
    568     /// gimmicks — "`<u>John</u> ___`" — not themers, so it must not group.
    569     private static let emphasisTags: [TagMapping] = [
    570         ("<i>", "</i>", "{/", "/}"),
    571         ("<em>", "</em>", "{/", "/}"),
    572         ("<b>", "</b>", "{*", "*}"),
    573         ("<strong>", "</strong>", "{*", "*}"),
    574     ]
    575 
    576     /// Underline markup. Mapped for display fidelity (it is the second most
    577     /// common clue markup in the NYT archive) but deliberately excluded from
    578     /// the theme-grouping signal above.
    579     private static let underlineTags: [TagMapping] = [
    580         ("<u>", "</u>", "{_", "_}"),
    581     ]
    582 
    583     private static var markupTags: [TagMapping] { emphasisTags + underlineTags }
    584 
    585     /// Whether `html` carries emphasis NYT uses to flag a themer. Other markup
    586     /// (underline, sub/sup, layout tags) does not count.
    587     private static func containsEmphasisMarkup(_ html: String) -> Bool {
    588         let lower = html.lowercased()
    589         return emphasisTags.contains { lower.contains($0.open) }
    590     }
    591 
    592     private static func containsConvertibleMarkup(_ html: String) -> Bool {
    593         let lower = html.lowercased()
    594         return markupTags.contains { lower.contains($0.open) }
    595     }
    596 
    597     /// Converts NYT `formatted` clue HTML to `.xd` brace markup, or returns nil
    598     /// when it carries no markup we recognize (so the caller falls back to
    599     /// `plain`). Entities are decoded so prose like `Salt &amp; pepper`
    600     /// round-trips, and any unrecognized residual tags — sub/sup, `<span>`,
    601     /// stray layout markup — are dropped, preserving their text content.
    602     private static func xdMarkup(fromFormatted formatted: String) -> String? {
    603         let decoded = decodeBasicEntities(formatted)
    604         guard containsConvertibleMarkup(decoded) else { return nil }
    605         var out = decoded
    606         for tag in markupTags {
    607             out = out.replacingOccurrences(of: tag.open, with: tag.xdOpen, options: .caseInsensitive)
    608             out = out.replacingOccurrences(of: tag.close, with: tag.xdClose, options: .caseInsensitive)
    609         }
    610         return out.replacing(/<[^>]+>/, with: "")
    611     }
    612 
    613     private static func decodeBasicEntities(_ s: String) -> String {
    614         var out = s
    615         for (entity, char) in [("&lt;", "<"), ("&gt;", ">"), ("&quot;", "\""),
    616                                ("&#39;", "'"), ("&apos;", "'"), ("&amp;", "&")] {
    617             out = out.replacingOccurrences(of: entity, with: char)
    618         }
    619         return out
    620     }
    621 
    622     private static func clueToken(_ clue: [String: Any]) -> String? {
    623         let direction = clue["direction"] as? String ?? ""
    624         let label = intValue(clue["label"]) ?? 0
    625         guard label > 0, direction == "Across" || direction == "Down" else { return nil }
    626         return "\(label)\(direction == "Across" ? "A" : "D")"
    627     }
    628 
    629     private static func specialCellInfo(body: [String: Any]) -> (circled: Set<Int>, shaded: Set<Int>) {
    630         guard let cells = body["cells"] as? [Any] else { return ([], []) }
    631         var circled: Set<Int> = []
    632         var shaded: Set<Int> = []
    633         for (index, cell) in cells.enumerated() {
    634             guard let dict = cell as? [String: Any] else { continue }
    635             switch intValue(dict["type"]) {
    636             case 2:
    637                 circled.insert(index)
    638             case 3:
    639                 shaded.insert(index)
    640             default:
    641                 continue
    642             }
    643         }
    644 
    645         return (circled, shaded)
    646     }
    647 
    648     private static func specialMappings(circled: Set<Int>, shaded: Set<Int>) -> String {
    649         var parts: [String] = []
    650         if !circled.isEmpty {
    651             parts.append("@=circle")
    652         }
    653         if !shaded.isEmpty {
    654             parts.append("*=shaded")
    655         }
    656         return parts.joined(separator: " ")
    657     }
    658 
    659     /// NYT image-based clues mirror the image's aria-label in the `plain`
    660     /// field, prefixed with the literal token `[aria-label]`. The prefix is a
    661     /// machine marker, not part of the clue itself, so strip it.
    662     private static func stripAriaLabelPrefix(_ text: String) -> String {
    663         let trimmed = text.drop(while: { $0 == " " })
    664         guard trimmed.lowercased().hasPrefix("[aria-label]") else { return text }
    665         let afterToken = trimmed.dropFirst("[aria-label]".count)
    666         return String(afterToken.drop(while: { $0 == " " }))
    667     }
    668 
    669     /// Extracts an Int from a JSON value that may be NSNumber, Int, or Double.
    670     private static func intValue(_ value: Any?) -> Int? {
    671         if let n = value as? Int { return n }
    672         if let s = value as? String { return Int(s) }
    673         if let n = value as? NSNumber { return n.intValue }
    674         if let n = value as? Double { return Int(n) }
    675         return nil
    676     }
    677 }