NYTToXDConverter.swift (30026B)
1 import Foundation 2 3 /// Converts NYT puzzle JSON (from `/v6/puzzle/daily/{date}.json`) to `.xd` format. 4 enum NYTToXDConverter { 5 struct ConversionError: LocalizedError { 6 let message: String 7 var errorDescription: String? { message } 8 } 9 10 /// Single-character grid placeholders for multi-letter (rebus) fills. The 11 /// `.xd` grid is one character per cell, so a cell whose fill is longer than 12 /// one letter shows one of these in the grid and is expanded via the 13 /// `Rebus:` header (the `1` in `1=LW`). These are *not* NYT data — they're 14 /// our `.xd` serialization. The `.xd` spec allows "digits, most symbols, 15 /// and printable unicode characters (if needed)" here; we take digits first 16 /// (the conventional, readable encoding) then the symbols, where "most" 17 /// means every printable ASCII symbol *except* the ones the grid/header 18 /// parser already reserves — letters (grid fill), `#`/`_`/`.` (block/empty), 19 /// `@`/`*` (special markers), and `=`/space (`Rebus:` `key=value` syntax). 20 /// We stop at ASCII rather than walking into unicode and throw past the 21 /// ceiling; real puzzles use a handful of distinct fills, nowhere near it. 22 /// (Walking ASCII naïvely from `'1'` overflows into `=` by the 13th key, 23 /// which produced an unparseable `==VALUE` entry and a rejected grid char.) 24 private static let rebusPlaceholders: [Character] = { 25 let reserved: Set<Character> = ["#", "_", ".", "@", "*", "=", " "] 26 let symbols = (UInt8(33)...UInt8(126)) 27 .map { Character(UnicodeScalar($0)) } 28 .filter { !$0.isLetter && !$0.isNumber && !reserved.contains($0) } 29 return Array("123456789") + symbols 30 }() 31 32 /// Whether a cell fill must ride into the grid via a `Rebus:` placeholder 33 /// rather than appear literally. The `.xd` grid is one character per cell and 34 /// the grid parser only takes letters (plus block/special markers and 35 /// declared placeholders) as direct fills, so anything longer than one 36 /// character — or a single non-letter character such as the "2"/"3" cells in 37 /// an "R2D2"/"C3PO" themer — has to be encoded. Routing every non-letter fill 38 /// through a placeholder means a digit never appears literally in the grid: 39 /// each grid digit is unambiguously a placeholder reference, so digit fills 40 /// and digit placeholders can't collide. 41 private static func needsRebusEncoding(_ answer: String) -> Bool { 42 if answer.count != 1 { return true } 43 return !(answer.first?.isLetter ?? false) 44 } 45 46 /// Converts raw JSON data from the NYT puzzle endpoint to an `.xd` source string. 47 static func convert(jsonData: Data) throws -> String { 48 guard let root = try JSONSerialization.jsonObject(with: jsonData) as? [String: Any] else { 49 throw ConversionError(message: "Invalid JSON root.") 50 } 51 52 // -- Metadata -- 53 54 let publicationDate = root["publicationDate"] as? String ?? "" 55 let nytTitle = (root["title"] as? String)? 56 .trimmingCharacters(in: .whitespacesAndNewlines) 57 let title = if let nytTitle, !nytTitle.isEmpty { 58 nytTitle 59 } else { 60 title(forPublicationDate: publicationDate) 61 } 62 let constructors = root["constructors"] as? [String] ?? [] 63 let editor = root["editor"] as? String 64 let copyright = root["copyright"] as? String 65 66 guard let bodyArray = root["body"] as? [[String: Any]], 67 let body = bodyArray.first else { 68 throw ConversionError(message: "Missing body in puzzle JSON.") 69 } 70 71 guard let dimensions = body["dimensions"] as? [String: Int], 72 let width = dimensions["width"], 73 let height = dimensions["height"] else { 74 throw ConversionError(message: "Missing dimensions.") 75 } 76 77 guard let cells = body["cells"] as? [Any] else { 78 throw ConversionError(message: "Missing cells.") 79 } 80 81 guard cells.count == width * height else { 82 throw ConversionError(message: "Cell count (\(cells.count)) does not match dimensions (\(width)x\(height)).") 83 } 84 85 guard let clues = body["clues"] as? [[String: Any]] else { 86 throw ConversionError(message: "Missing clues.") 87 } 88 89 // -- Parse cells into answers -- 90 91 // Each cell is either an empty dict (block) or a dict with "answer", "type", etc. 92 var answers: [String?] = [] // nil = block, String = answer 93 var acceptedAnswersByCellIndex: [Int: [String]] = [:] 94 95 for (index, cell) in cells.enumerated() { 96 guard let dict = cell as? [String: Any], !dict.isEmpty else { 97 answers.append(nil) 98 continue 99 } 100 101 let rawAnswer = dict["answer"] as? String ?? "" 102 // NYT encodes a "Schrödinger" square — one correct as either of two 103 // letters — as a slash-joined answer like "L/W", with every 104 // acceptable keystroke enumerated in moreAnswers.valid. The slash 105 // is NYT notation, not a grid character (and isn't on our 106 // keyboard), so collapse it: the letters together ("LW") become the 107 // canonical rebus fill, and each single letter rides along as an 108 // accepted alternate via the moreAnswers handling below. 109 let isSchrodinger = rawAnswer.contains("/") 110 let answer = isSchrodinger 111 ? rawAnswer.replacingOccurrences(of: "/", with: "") 112 : rawAnswer 113 if answer.isEmpty { 114 // A playable cell (NYT type 1) with no answer is an intentional 115 // blank: the "gap" in themers like the 2006-07-06 "THE GAP" 116 // puzzle, where crossing words read straight through a square 117 // whose solution is a literal space. NYT flags these with a 118 // blank-marker `moreAnswers` ("B"), which we drop by `continue`ing 119 // past the alternates below — the square's only correct state is 120 // empty. Any other answerless cell is a block. 121 answers.append(intValue(dict["type"]) == 1 ? " " : nil) 122 continue 123 } 124 answers.append(answer) 125 126 if let moreAnswers = dict["moreAnswers"] as? [String: Any], 127 let valid = moreAnswers["valid"] as? [String] { 128 // For Schrödinger cells, strip the slash from each alternate too 129 // so only keyboard-enterable letter forms survive, then 130 // dedupe/sort for stable output. Other cells pass their 131 // alternates through unchanged so arbitrary accepted strings 132 // still round-trip. 133 let candidates = isSchrodinger 134 ? valid.map { $0.replacingOccurrences(of: "/", with: "") } 135 : valid 136 let cleaned = candidates.filter { !$0.isEmpty && $0 != answer } 137 if !cleaned.isEmpty { 138 acceptedAnswersByCellIndex[index] = isSchrodinger 139 ? Array(Set(cleaned)).sorted() 140 : cleaned 141 } 142 } 143 } 144 145 // -- Build rebus header if needed -- 146 147 // Check for multi-character answers. Each distinct multi-letter fill 148 // claims the next grid placeholder from `rebusPlaceholders`. 149 var rebusEntries: [(key: Character, value: String)] = [] 150 var rebusLookup: [String: Character] = [:] 151 152 for answer in answers { 153 guard let answer, needsRebusEncoding(answer) else { continue } 154 if rebusLookup[answer] == nil { 155 guard rebusLookup.count < rebusPlaceholders.count else { 156 throw ConversionError( 157 message: "Too many distinct rebus fills (\(rebusLookup.count + 1)); ran out of grid placeholders." 158 ) 159 } 160 let key = rebusPlaceholders[rebusLookup.count] 161 rebusLookup[answer] = key 162 rebusEntries.append((key: key, value: answer)) 163 } 164 } 165 166 // -- Find special (shaded/circled) cells from NYT cell data -- 167 168 let specialCells = specialCellInfo(body: body) 169 170 // -- Build grid lines -- 171 172 var gridLines: [String] = [] 173 for row in 0..<height { 174 var line = "" 175 for col in 0..<width { 176 let index = row * width + col 177 guard let answer = answers[index] else { 178 line += "#" 179 continue 180 } 181 if specialCells.circled.contains(index) { 182 line += "@" 183 continue 184 } 185 if specialCells.shaded.contains(index) { 186 line += "*" 187 continue 188 } 189 if needsRebusEncoding(answer) { 190 line += String(rebusLookup[answer]!) 191 } else { 192 line += answer.uppercased() 193 } 194 } 195 gridLines.append(line) 196 } 197 198 // -- Build clue lines -- 199 200 // Sort clues: Across first, then Down; within each group, by label number. 201 let sortedClues = clues.sorted { a, b in 202 let dirA = (a["direction"] as? String) ?? "" 203 let dirB = (b["direction"] as? String) ?? "" 204 if dirA != dirB { return dirA == "Across" } 205 let labelA = intValue(a["label"]) ?? 0 206 let labelB = intValue(b["label"]) ?? 0 207 return labelA < labelB 208 } 209 210 var acrossClueLines: [String] = [] 211 var downClueLines: [String] = [] 212 213 for clue in sortedClues { 214 let direction = clue["direction"] as? String ?? "" 215 let label = intValue(clue["label"]) ?? 0 216 217 // Extract clue text from the nested structure: 218 // "text": [{"plain": "Clue text", "formatted": "<i>Clue text</i>"}] 219 // When NYT supplies emphasis markup in `formatted` (italic themers, 220 // etc.) convert it to .xd brace markup; otherwise fall back to the 221 // `plain` reading. `formatted` is *not* always richer than plain — 222 // image clues carry a bare symbol there ("¥") — so it is only 223 // preferred when it actually carries markup. 224 let clueText: String 225 if let textArray = clue["text"] as? [[String: Any]], 226 let firstText = textArray.first { 227 if let formatted = firstText["formatted"] as? String, 228 let markup = xdMarkup(fromFormatted: formatted) { 229 clueText = stripAriaLabelPrefix(markup) 230 } else if let plain = firstText["plain"] as? String { 231 clueText = stripAriaLabelPrefix(plain) 232 } else { 233 clueText = "" 234 } 235 } else { 236 clueText = "" 237 } 238 239 // Build answer from cell indices 240 let cellIndices = clue["cells"] as? [Int] ?? [] 241 let answerStr = cellIndices.compactMap { answers[$0] }.joined() 242 243 let prefix = direction == "Across" ? "A" : "D" 244 let line = "\(prefix)\(label). \(clueText) ~ \(answerStr)" 245 let acceptLine: String? 246 let acceptedAnswers = acceptedAnswerVariants( 247 cellIndices: cellIndices, 248 answers: answers, 249 acceptedAnswersByCellIndex: acceptedAnswersByCellIndex 250 ) 251 if acceptedAnswers.isEmpty { 252 acceptLine = nil 253 } else { 254 let escaped = acceptedAnswers.map(escapeAcceptToken).joined(separator: " ") 255 acceptLine = "\(prefix)\(label) ^Accept: \(escaped)" 256 } 257 258 if direction == "Across" { 259 acrossClueLines.append(line) 260 if let acceptLine { acrossClueLines.append(acceptLine) } 261 } else { 262 downClueLines.append(line) 263 if let acceptLine { downClueLines.append(acceptLine) } 264 } 265 } 266 267 // -- Assemble .xd source -- 268 269 var sections: [String] = [] 270 271 // Metadata section 272 var metadata: [String] = [] 273 metadata.append("Title: \(title)") 274 metadata.append("CmVer: \(XD.currentCmVersion)") 275 metadata.append("Publisher: New York Times") 276 if !publicationDate.isEmpty { 277 metadata.append("Date: \(publicationDate)") 278 } 279 if !constructors.isEmpty { 280 metadata.append("Author: \(constructors.joined(separator: ", "))") 281 } 282 if let editor { 283 metadata.append("Editor: \(editor)") 284 } 285 if let copyright { 286 metadata.append("Copyright: \(copyright)") 287 } 288 289 if !rebusEntries.isEmpty { 290 let rebusStr = rebusEntries 291 .map { "\($0.key)=\(escapeRebusValue($0.value))" } 292 .joined(separator: " ") 293 metadata.append("Rebus: \(rebusStr)") 294 } 295 296 let specialMappings = specialMappings(circled: specialCells.circled, shaded: specialCells.shaded) 297 if !specialMappings.isEmpty { 298 metadata.append("Specials: \(specialMappings)") 299 } 300 301 let relatives = buildRelativeGroups(clues: clues) 302 if !relatives.isEmpty { 303 let joined = relatives 304 .map { $0.joined(separator: ",") } 305 .joined(separator: "; ") 306 metadata.append("Relatives: \(joined)") 307 } 308 309 sections.append(metadata.joined(separator: "\n")) 310 311 // Grid section 312 sections.append(gridLines.joined(separator: "\n")) 313 314 // Clue sections (across then down, separated by blank line) 315 let allClueLines = acrossClueLines + [""] + downClueLines 316 sections.append(allClueLines.joined(separator: "\n")) 317 318 // The .xd parser splits sections on two or more consecutive blank lines, 319 // so we need two blank lines (three newlines) between sections. 320 return sections.joined(separator: "\n\n\n") 321 } 322 323 private static func acceptedAnswerVariants( 324 cellIndices: [Int], 325 answers: [String?], 326 acceptedAnswersByCellIndex: [Int: [String]] 327 ) -> [String] { 328 var variants: [String] = [] 329 var seen = Set<String>() 330 let canonicalParts = cellIndices.map { answers.indices.contains($0) ? answers[$0] ?? "" : "" } 331 let canonicalAnswer = canonicalParts.joined() 332 333 for (partIndex, cellIndex) in cellIndices.enumerated() { 334 guard let accepted = acceptedAnswersByCellIndex[cellIndex] else { continue } 335 for value in accepted { 336 var parts = canonicalParts 337 parts[partIndex] = value 338 let variant = parts.joined() 339 guard variant != canonicalAnswer, seen.insert(variant).inserted else { continue } 340 variants.append(variant) 341 } 342 } 343 344 return variants 345 } 346 347 /// Escapes a `Rebus:` value for the header's whitespace-delimited, 348 /// `=`-keyed grammar (see `XD.parseRebusHeader`). Because the header splits 349 /// entries on whitespace, a value that *is* whitespace — the space fill of a 350 /// "gap" cell — can't appear literally; it rides in as the named escape 351 /// `\space`. Backslash is the escape introducer, so a literal backslash 352 /// doubles to `\\`. The scheme is deliberately open-ended: further `\name` 353 /// (or `\u{...}`) escapes can join it to carry any character the header 354 /// grammar would otherwise eat. `XD.unescapeRebusValue` is the inverse. 355 private static func escapeRebusValue(_ value: String) -> String { 356 var out = "" 357 for ch in value { 358 switch ch { 359 case "\\": out += "\\\\" 360 case " ": out += "\\space" 361 default: out.append(ch) 362 } 363 } 364 return out 365 } 366 367 private static func escapeAcceptToken(_ token: String) -> String { 368 var escaped = "" 369 for ch in token { 370 if ch == "\\" || ch.isWhitespace { 371 escaped.append("\\") 372 } 373 escaped.append(ch) 374 } 375 return escaped 376 } 377 378 private static func title(forPublicationDate publicationDate: String) -> String { 379 guard let date = date(fromPublicationDate: publicationDate) else { 380 return "NYT Crossword" 381 } 382 383 let formatter = DateFormatter() 384 formatter.calendar = Calendar(identifier: .gregorian) 385 formatter.locale = Locale(identifier: "en_US_POSIX") 386 formatter.timeZone = TimeZone(identifier: "America/New_York") 387 formatter.dateFormat = "EEEE" 388 return "\(formatter.string(from: date)) Crossword" 389 } 390 391 private static func date(fromPublicationDate publicationDate: String) -> Date? { 392 let trimmed = publicationDate.trimmingCharacters(in: .whitespaces) 393 guard let match = trimmed.firstMatch(of: /^(\d{4})-(\d{2})-(\d{2})$/), 394 let year = Int(match.1), 395 let month = Int(match.2), 396 let day = Int(match.3) 397 else { return nil } 398 399 var calendar = Calendar(identifier: .gregorian) 400 calendar.timeZone = TimeZone(identifier: "America/New_York") ?? .gmt 401 var comps = DateComponents() 402 comps.calendar = calendar 403 comps.timeZone = calendar.timeZone 404 comps.year = year 405 comps.month = month 406 comps.day = day 407 return calendar.date(from: comps) 408 } 409 410 /// Themer/revealer groups: the structured `relatives` field plus 411 /// italics-flagged theme answers. These are the connections the 412 /// constructor did *not* surface in clue text — typically the trick 413 /// underlying a theme — so they're suitable for catalog/analysis but 414 /// should not drive any in-grid highlighting that would spoil the solve. 415 /// Cross-references that live in clue prose ("See 11-Down") are derived 416 /// at puzzle-load time in `Puzzle.init` instead. 417 private static func buildRelativeGroups(clues: [[String: Any]]) -> [[String]] { 418 var groups = buildRelatives(clues: clues) 419 groups.append(contentsOf: buildFormattedClueGroups(clues: clues)) 420 var seen = Set<Set<String>>() 421 return groups.filter { group in 422 let key = Set(group) 423 guard !key.isEmpty, !seen.contains(key) else { return false } 424 seen.insert(key) 425 return true 426 } 427 } 428 429 /// Builds groups of cross-referenced clues from the v6 per-clue 430 /// `relatives` arrays. Two rules admit a group, everything else is 431 /// discarded: 432 /// 433 /// 1. **Revealer** — a clue with ≥2 relatives defines a group consisting 434 /// of itself plus every clue it references. The revealer's list is 435 /// treated as canonical. 436 /// 2. **Mutual pair** — two clues that each list the other as their sole 437 /// relative form a group of two (the classic "See 14-Across" pattern). 438 /// 439 /// Single-direction 1-relative edges (where A references B but B does 440 /// not reference A back) are dropped. This guards against NYT data 441 /// errors where a leaf clue points at the wrong revealer. 442 private static func buildRelatives(clues: [[String: Any]]) -> [[String]] { 443 // Extract each clue's (label, direction) and relatives array. 444 var tokens: [String?] = [] 445 var relativeIndices: [[Int]] = [] 446 tokens.reserveCapacity(clues.count) 447 relativeIndices.reserveCapacity(clues.count) 448 for clue in clues { 449 let direction = clue["direction"] as? String ?? "" 450 let label = intValue(clue["label"]) ?? 0 451 if label > 0, direction == "Across" || direction == "Down" { 452 tokens.append("\(label)\(direction == "Across" ? "A" : "D")") 453 } else { 454 tokens.append(nil) 455 } 456 let raw = clue["relatives"] as? [Int] ?? [] 457 let cleaned = Array(Set(raw.filter { $0 >= 0 && $0 < clues.count })) 458 relativeIndices.append(cleaned) 459 } 460 461 var groups: [[String]] = [] 462 var seen = Set<Set<Int>>() 463 464 func emit(_ members: Set<Int>) { 465 guard members.count >= 2, !seen.contains(members) else { return } 466 seen.insert(members) 467 let sorted = members.sorted { a, b in 468 // Order by (number, direction-is-across-first). Extract from 469 // the stored token; fallback to index if a token is missing. 470 guard let ta = tokens[a], let tb = tokens[b] else { return a < b } 471 let (na, da) = (Int(ta.dropLast()) ?? 0, ta.last!) 472 let (nb, db) = (Int(tb.dropLast()) ?? 0, tb.last!) 473 if na != nb { return na < nb } 474 return da == "A" && db == "D" 475 } 476 let toks = sorted.compactMap { tokens[$0] } 477 if toks.count >= 2 { groups.append(toks) } 478 } 479 480 // Rule 1: revealers. 481 for (i, refs) in relativeIndices.enumerated() where refs.count >= 2 { 482 var members = Set<Int>() 483 members.insert(i) 484 for r in refs { members.insert(r) } 485 emit(members) 486 } 487 488 // Rule 2: mutual pairs. Only consider clues with exactly one relative 489 // — revealer-formed groups already cover the multi-relative cases. 490 for (i, refs) in relativeIndices.enumerated() where refs.count == 1 { 491 let j = refs[0] 492 guard j != i, relativeIndices.indices.contains(j) else { continue } 493 if relativeIndices[j] == [i] { 494 emit(Set([i, j])) 495 } 496 } 497 498 return groups 499 } 500 501 /// NYT marks some theme clues by supplying formatted clue text, commonly 502 /// `<i>...</i>`, without adding `relatives`. Group all such clue refs so 503 /// their answer cells can be highlighted by Crossmate's thematic mask. 504 /// 505 /// A revealer is folded into the same group when its prose names the set — 506 /// "the answer to each italicized clue", "the five italicized clues". The 507 /// revealer carries no markup or `relatives` of its own, so this prose 508 /// reference is the only signal binding it to the themers. The link is only 509 /// drawn when an italicized set actually exists, which keeps an incidental 510 /// mention from a clue that isn't a revealer out of the group. 511 private static func buildFormattedClueGroups(clues: [[String: Any]]) -> [[String]] { 512 var tokens = clues.compactMap { clue -> String? in 513 guard clueHasFormattedText(clue) else { return nil } 514 return clueToken(clue) 515 } 516 guard !tokens.isEmpty else { return [] } 517 518 let themers = Set(tokens) 519 for clue in clues where clueReferencesItalicizedSet(clue) { 520 guard let token = clueToken(clue), !themers.contains(token) else { continue } 521 tokens.append(token) 522 } 523 return [sortedClueTokens(tokens)] 524 } 525 526 /// Whether a clue's prose points at the italicized themers — the word 527 /// "italicized" immediately followed by "clue" or "answer" ("each 528 /// italicized clue", "answers to the italicized clues"). Italic is the only 529 /// emphasis NYT pairs with a revealer; bold and underline never are. 530 private static func clueReferencesItalicizedSet(_ clue: [String: Any]) -> Bool { 531 cluePlainText(clue).lowercased().contains(/italici[sz]ed\s+(clue|answer)/) 532 } 533 534 private static func cluePlainText(_ clue: [String: Any]) -> String { 535 guard let textArray = clue["text"] as? [[String: Any]], 536 let plain = textArray.first?["plain"] as? String else { return "" } 537 return plain 538 } 539 540 /// Orders `{number}{A|D}` tokens by number, Across before Down, so a folded 541 /// revealer lands in sequence rather than at the end. 542 private static func sortedClueTokens(_ tokens: [String]) -> [String] { 543 tokens.sorted { a, b in 544 let na = Int(a.dropLast()) ?? 0 545 let nb = Int(b.dropLast()) ?? 0 546 if na != nb { return na < nb } 547 return a.last == "A" && b.last == "D" 548 } 549 } 550 551 private static func clueHasFormattedText(_ clue: [String: Any]) -> Bool { 552 guard let textArray = clue["text"] as? [[String: Any]] else { return false } 553 return textArray.contains { textPart in 554 guard let formatted = textPart["formatted"] as? String else { return false } 555 // A non-empty `formatted` field alone isn't a theme signal: image 556 // clues mirror a bare symbol ("¥") or the plain text there. Only 557 // genuine emphasis markup marks a themer. 558 return containsEmphasisMarkup(decodeBasicEntities(formatted)) 559 } 560 } 561 562 typealias TagMapping = (open: String, close: String, xdOpen: String, xdClose: String) 563 564 /// HTML emphasis tags that mark a *theme* clue. NYT italicizes its themers 565 /// (`<i>`/`<em>`), the convention this grouping keys on; `<b>`/`<strong>` 566 /// are included as the same kind of prose emphasis. Underline is handled 567 /// separately (see `underlineTags`) because NYT uses it for highlight 568 /// gimmicks — "`<u>John</u> ___`" — not themers, so it must not group. 569 private static let emphasisTags: [TagMapping] = [ 570 ("<i>", "</i>", "{/", "/}"), 571 ("<em>", "</em>", "{/", "/}"), 572 ("<b>", "</b>", "{*", "*}"), 573 ("<strong>", "</strong>", "{*", "*}"), 574 ] 575 576 /// Underline markup. Mapped for display fidelity (it is the second most 577 /// common clue markup in the NYT archive) but deliberately excluded from 578 /// the theme-grouping signal above. 579 private static let underlineTags: [TagMapping] = [ 580 ("<u>", "</u>", "{_", "_}"), 581 ] 582 583 private static var markupTags: [TagMapping] { emphasisTags + underlineTags } 584 585 /// Whether `html` carries emphasis NYT uses to flag a themer. Other markup 586 /// (underline, sub/sup, layout tags) does not count. 587 private static func containsEmphasisMarkup(_ html: String) -> Bool { 588 let lower = html.lowercased() 589 return emphasisTags.contains { lower.contains($0.open) } 590 } 591 592 private static func containsConvertibleMarkup(_ html: String) -> Bool { 593 let lower = html.lowercased() 594 return markupTags.contains { lower.contains($0.open) } 595 } 596 597 /// Converts NYT `formatted` clue HTML to `.xd` brace markup, or returns nil 598 /// when it carries no markup we recognize (so the caller falls back to 599 /// `plain`). Entities are decoded so prose like `Salt & pepper` 600 /// round-trips, and any unrecognized residual tags — sub/sup, `<span>`, 601 /// stray layout markup — are dropped, preserving their text content. 602 private static func xdMarkup(fromFormatted formatted: String) -> String? { 603 let decoded = decodeBasicEntities(formatted) 604 guard containsConvertibleMarkup(decoded) else { return nil } 605 var out = decoded 606 for tag in markupTags { 607 out = out.replacingOccurrences(of: tag.open, with: tag.xdOpen, options: .caseInsensitive) 608 out = out.replacingOccurrences(of: tag.close, with: tag.xdClose, options: .caseInsensitive) 609 } 610 return out.replacing(/<[^>]+>/, with: "") 611 } 612 613 private static func decodeBasicEntities(_ s: String) -> String { 614 var out = s 615 for (entity, char) in [("<", "<"), (">", ">"), (""", "\""), 616 ("'", "'"), ("'", "'"), ("&", "&")] { 617 out = out.replacingOccurrences(of: entity, with: char) 618 } 619 return out 620 } 621 622 private static func clueToken(_ clue: [String: Any]) -> String? { 623 let direction = clue["direction"] as? String ?? "" 624 let label = intValue(clue["label"]) ?? 0 625 guard label > 0, direction == "Across" || direction == "Down" else { return nil } 626 return "\(label)\(direction == "Across" ? "A" : "D")" 627 } 628 629 private static func specialCellInfo(body: [String: Any]) -> (circled: Set<Int>, shaded: Set<Int>) { 630 guard let cells = body["cells"] as? [Any] else { return ([], []) } 631 var circled: Set<Int> = [] 632 var shaded: Set<Int> = [] 633 for (index, cell) in cells.enumerated() { 634 guard let dict = cell as? [String: Any] else { continue } 635 switch intValue(dict["type"]) { 636 case 2: 637 circled.insert(index) 638 case 3: 639 shaded.insert(index) 640 default: 641 continue 642 } 643 } 644 645 return (circled, shaded) 646 } 647 648 private static func specialMappings(circled: Set<Int>, shaded: Set<Int>) -> String { 649 var parts: [String] = [] 650 if !circled.isEmpty { 651 parts.append("@=circle") 652 } 653 if !shaded.isEmpty { 654 parts.append("*=shaded") 655 } 656 return parts.joined(separator: " ") 657 } 658 659 /// NYT image-based clues mirror the image's aria-label in the `plain` 660 /// field, prefixed with the literal token `[aria-label]`. The prefix is a 661 /// machine marker, not part of the clue itself, so strip it. 662 private static func stripAriaLabelPrefix(_ text: String) -> String { 663 let trimmed = text.drop(while: { $0 == " " }) 664 guard trimmed.lowercased().hasPrefix("[aria-label]") else { return text } 665 let afterToken = trimmed.dropFirst("[aria-label]".count) 666 return String(afterToken.drop(while: { $0 == " " })) 667 } 668 669 /// Extracts an Int from a JSON value that may be NSNumber, Int, or Double. 670 private static func intValue(_ value: Any?) -> Int? { 671 if let n = value as? Int { return n } 672 if let s = value as? String { return Int(s) } 673 if let n = value as? NSNumber { return n.intValue } 674 if let n = value as? Double { return Int(n) } 675 return nil 676 } 677 }