crossmate

A collaborative crossword app for iOS
Log | Files | Refs | LICENSE

main.swift (35384B)


      1 import Foundation
      2 
      3 let weekdayNames = ["Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday", "Sunday"]
      4 let weekdayLookup = Dictionary(uniqueKeysWithValues: weekdayNames.enumerated().map { ($0.element.lowercased(), $0.offset) })
      5 
      6 let veryWeakShortWords: Set<String> = [
      7     "aah", "aal", "aar", "aba", "abe", "abo", "abu", "ado", "adz", "aer",
      8     "ane", "ani", "ara", "ase", "ato", "ava", "een", "eme", "ene", "eon",
      9     "ere", "ese", "ess", "est", "eta", "ete", "eth", "ier", "iii", "ile",
     10     "ita", "ite", "lai", "lar", "mee", "nee", "oer", "ona", "onea",
     11     "ort", "ose", "oto", "ree", "rei", "ria", "ser", "tae", "tba", "tbs",
     12     "tko", "tnt", "tpe", "tsp", "ute", "wye", "xed", "xii",
     13     "ance", "elee", "orle", "eder", "ssts"
     14 ]
     15 
     16 let commonShortWords: Set<String> = [
     17     "able", "acre", "act", "acts", "age", "aged", "ages", "air", "airs",
     18     "ale", "ales", "all", "ally", "also", "and", "ant", "ante", "anti",
     19     "ants", "ape", "apes", "area", "are", "ark", "arks", "arm", "arms",
     20     "art", "arts", "ash", "ate", "aunt", "awe", "awed", "awes", "bad",
     21     "bag", "bags", "bar", "bare", "bars", "base", "bash", "bat", "bats",
     22     "bed", "beds", "bee", "beer", "bees", "bell", "belt", "bent", "best",
     23     "bet", "bets", "bite", "blue", "bus", "but", "can", "cap", "car",
     24     "care", "case", "cat", "cats", "clay", "cod", "coil", "cold", "cord",
     25     "cow", "cue", "curl", "cut", "cuts", "deer", "dog", "dogs", "dry",
     26     "ear", "ears", "eat", "eats", "eel", "eels", "era", "eras", "eye",
     27     "eyes", "far", "farm", "fast", "fat", "feet", "few", "fire", "fish",
     28     "fit", "fits", "free", "fun", "game", "gas", "gate", "get", "gets",
     29     "goat", "good", "hair", "hand", "hard", "hat", "hate", "heat", "hen",
     30     "her", "here", "hit", "hits", "ice", "idea", "ink", "inn", "iron",
     31     "its", "kid", "kids", "knee", "land", "last", "late", "law", "lead",
     32     "left", "let", "lets", "lie", "life", "line", "long", "lose", "lost",
     33     "man", "many", "map", "men", "mile", "mind", "more", "name", "near",
     34     "new", "note", "oar", "oars", "oil", "old", "one", "open", "ore",
     35     "ores", "our", "out", "over", "own", "pan", "part", "past", "pen",
     36     "pet", "pets", "pie", "pin", "pins", "play", "pot", "red", "rest",
     37     "rice", "ring", "road", "root", "rose", "run", "runs", "sad", "salt",
     38     "sat", "save", "sea", "seat", "see", "seed", "sees", "set", "sets",
     39     "shoe", "side", "sit", "site", "sky", "snow", "son", "song", "star",
     40     "step", "stop", "sun", "take", "tea", "team", "ten", "tens", "test",
     41     "the", "tie", "ties", "time", "tree", "try", "use", "used", "uses",
     42     "war", "way", "west", "wet", "win", "wine", "word", "work", "yard",
     43     "year", "yes"
     44 ]
     45 
     46 let shortExactFragments: Set<String> = [
     47     "ance", "ence", "enne", "ette", "ible", "iest", "ism", "isms", "itis"
     48 ]
     49 
     50 let crosswordeseWords: Set<String> = [
     51     "adue", "anent", "anet", "antae", "atee", "atit", "atle", "bassi",
     52     "brocatelle", "ecce", "ente", "esne", "etes", "gnar", "imset",
     53     "labile", "laic", "laram", "mtida", "neer", "onor", "relee", "rete",
     54     "terete", "togae", "ulee",
     55     "aereo", "alai", "aretes", "aril", "atri", "aussi", "eaude", "ees",
     56     "emeer", "enote", "erian", "ers", "esnes", "evoe", "ilia", "leisterer",
     57     "mesne", "olea", "ooo", "oss", "ranee", "rorem", "seral",
     58     "snee", "soras", "sri", "tse", "yeses",
     59     "abas", "adano", "agena", "agin", "agorae", "ams", "ans", "aper",
     60     "arear", "bretharte", "donees", "eloi", "endat", "etnas", "etui",
     61     "hammshams", "nin", "oas", "ogees", "olan", "ont", "ossa", "otra",
     62     "poetaster", "rainintheface", "rea", "retia", "roone", "rpi", "seta",
     63     "sisi", "soli", "tet", "tieto", "totoe", "tra", "yser"
     64 ]
     65 
     66 let easyAllowedWords: Set<String> = [
     67     "n", "s", "e", "w", "ne", "nw", "se", "sw",
     68     "nne", "nnw", "ene", "ese", "sse", "ssw", "wsw", "wnw"
     69 ]
     70 
     71 struct Options {
     72     var inputPath = "Data"
     73     var outputPath = "Generated/word_list.json"
     74     var countsOutputPath: String?
     75     var minLength = 2
     76     var maxLength: Int?
     77     var minCount = 1
     78     var lengthThresholds = false
     79     var qualityFilter = false
     80     var easyFillFilter = false
     81     var badWordsPath: String?
     82     var rejectionsOutputPath: String?
     83     var qualityOutputPath: String?
     84     var weekdays: Set<Int>?
     85     var pretty = false
     86 }
     87 
     88 struct AnswerEvidence {
     89     var count = 0
     90     var badClueCount = 0
     91     var obscureNameClueCount = 0
     92     var fillBlankCount = 0
     93     var foreignLanguageClueCount = 0
     94     var cluePartCount = 0
     95     var continuationClueCount = 0
     96     var themeClueCount = 0
     97     var latestSeen: String?
     98     var sampleClues: [String] = []
     99 }
    100 
    101 struct WordmakeError: Error, CustomStringConvertible {
    102     let description: String
    103 }
    104 
    105 let badClueRegex = try! NSRegularExpression(
    106     pattern: #"\b(abbr|var|prefix|suffix|comb\.? form|archaic|poetic|dial|slang|obs|old-style|old fashioned|scot|irish|brit|fr|french|ger|german|lat|latin|span|spanish|ital|italian|port|portuguese|hebr|hebrew|yiddish|jap|japanese|inits|initials|letters|compass point|bearing|direction|shoe width|monogram|degs|degree|of yore|old|olden|heraldry|genus|legal|in law|anatomical|serf|serfs|slave|slaves|vassal|pilaster|opera voices|persian fairy|mideast|eastern v\.?i\.?p|arab prince|arab chieftain|hindu title|indian princess|rajah|dirk|knife of old|seed covering|seed casing|seed envelope|hip bones|pelvic bones|bitter vetch|marsh birds)\b"#,
    107     options: [.caseInsensitive]
    108 )
    109 let obscureNameClueRegex = try! NSRegularExpression(
    110     pattern: #"\b(actor|actress|author|composer|singer|poet|novelist|painter|artist|playwright|violinist|pianist|conductor|golfer|pitcher|baseballer|songwriter|politician|senator|governor|pope)\b"#,
    111     options: [.caseInsensitive]
    112 )
    113 let fillBlankRegex = try! NSRegularExpression(pattern: #"_{2,}|\.{3,}"#)
    114 let foreignLanguageClueRegex = try! NSRegularExpression(
    115     pattern: #":\s*(?:Sp|Span|Fr|Ger|Germ|It|Ital|Lat|Port|Heb|Yid|Jap|Rus|Gr|Gk|Swed|Norw|Dan|Arab|Chin|Hind|Skt|Turk|Finn)\.|,\s+in\s+[A-Z][a-z]"#
    116 )
    117 let cluePartRegex = try! NSRegularExpression(
    118     pattern: #"\b(?:part|section|segment|installment)\s+\d+\b|\b\d+(?:st|nd|rd|th)\s+(?:part|section|segment|installment)\b"#,
    119     options: [.caseInsensitive]
    120 )
    121 let continuationClueRegex = try! NSRegularExpression(
    122     pattern: #"\b(?:end|start|middle|continuation)\s+of\s+(?:the\s+)?(?:quote|quip|remark|saying|joke)\b|^see\s+\d+"#,
    123     options: [.caseInsensitive]
    124 )
    125 let themeClueRegex = try! NSRegularExpression(pattern: #"^\s*\*"#)
    126 let clueNumberRegex = try! NSRegularExpression(pattern: #"^\s*\d+\.\s*"#)
    127 let datePathRegex = try! NSRegularExpression(pattern: #"(\d{4})/(\d{2})/(\d{2})\.json$"#)
    128 
    129 func printUsage() {
    130     print("""
    131     Usage: Wordmake [options]
    132 
    133     Options:
    134       --input PATH                 Directory containing crossword JSON files. Default: Data
    135       --output PATH                Path for the generated JSON word list. Default: Generated/word_list.json
    136       --counts-output PATH         Optional path for a JSON object of normalized answer frequencies.
    137       --min-length N               Minimum normalized answer length to include. Default: 2
    138       --max-length N               Maximum normalized answer length to include.
    139       --min-count N                Minimum number of appearances required to include an answer. Default: 1
    140       --length-thresholds          Use length-based appearance thresholds.
    141       --quality-filter             Use stricter count thresholds intended for generated fill.
    142       --easy-fill-filter           Use clue-aware filters for easier generated fill.
    143       --bad-words PATH             Optional JSON word list to exclude from the generated dictionary.
    144       --rejections-output PATH     Optional path for rejected word metadata from --easy-fill-filter.
    145       --quality-output PATH        Optional path for per-answer quality metadata from --easy-fill-filter.
    146       --weekdays LIST              Optional comma-separated weekday filter, e.g. Monday,Tuesday.
    147       --pretty                     Pretty-print generated JSON.
    148       -h, --help                   Show this help.
    149     """)
    150 }
    151 
    152 func parseOptions(_ arguments: [String]) throws -> Options {
    153     var options = Options()
    154     var index = 1
    155 
    156     func requireValue(_ name: String) throws -> String {
    157         guard index + 1 < arguments.count else {
    158             throw WordmakeError(description: "Missing value for \(name)")
    159         }
    160         index += 1
    161         return arguments[index]
    162     }
    163 
    164     while index < arguments.count {
    165         let argument = arguments[index]
    166         switch argument {
    167         case "--input":
    168             options.inputPath = try requireValue(argument)
    169         case "--output":
    170             options.outputPath = try requireValue(argument)
    171         case "--counts-output":
    172             options.countsOutputPath = try requireValue(argument)
    173         case "--min-length":
    174             guard let value = Int(try requireValue(argument)) else {
    175                 throw WordmakeError(description: "--min-length must be an integer")
    176             }
    177             options.minLength = value
    178         case "--max-length":
    179             guard let value = Int(try requireValue(argument)) else {
    180                 throw WordmakeError(description: "--max-length must be an integer")
    181             }
    182             options.maxLength = value
    183         case "--min-count":
    184             guard let value = Int(try requireValue(argument)) else {
    185                 throw WordmakeError(description: "--min-count must be an integer")
    186             }
    187             options.minCount = value
    188         case "--length-thresholds":
    189             options.lengthThresholds = true
    190         case "--quality-filter":
    191             options.qualityFilter = true
    192         case "--easy-fill-filter":
    193             options.easyFillFilter = true
    194         case "--bad-words":
    195             options.badWordsPath = try requireValue(argument)
    196         case "--rejections-output":
    197             options.rejectionsOutputPath = try requireValue(argument)
    198         case "--quality-output":
    199             options.qualityOutputPath = try requireValue(argument)
    200         case "--weekdays":
    201             options.weekdays = try parseWeekdays(try requireValue(argument))
    202         case "--pretty":
    203             options.pretty = true
    204         case "--help", "-h":
    205             printUsage()
    206             exit(0)
    207         default:
    208             throw WordmakeError(description: "Unknown argument: \(argument)")
    209         }
    210         index += 1
    211     }
    212 
    213     return options
    214 }
    215 
    216 func parseWeekdays(_ value: String) throws -> Set<Int> {
    217     var weekdays: Set<Int> = []
    218     for rawName in value.split(separator: ",") {
    219         let name = rawName.trimmingCharacters(in: .whitespacesAndNewlines).lowercased()
    220         guard !name.isEmpty else {
    221             continue
    222         }
    223         guard let weekday = weekdayLookup[name] else {
    224             throw WordmakeError(description: "Unknown weekday '\(rawName)'; expected one of: \(weekdayNames.joined(separator: ", "))")
    225         }
    226         weekdays.insert(weekday)
    227     }
    228     guard !weekdays.isEmpty else {
    229         throw WordmakeError(description: "--weekdays must include at least one weekday name")
    230     }
    231     return weekdays
    232 }
    233 
    234 func normalizeAnswer(_ text: String) -> String {
    235     String(text.unicodeScalars.compactMap { scalar in
    236         switch scalar.value {
    237         case 65...90:
    238             return Character(UnicodeScalar(scalar.value + 32)!)
    239         case 97...122:
    240             return Character(scalar)
    241         default:
    242             return nil
    243         }
    244     })
    245 }
    246 
    247 func stripClueNumber(_ clue: String) -> String {
    248     let range = NSRange(clue.startIndex..<clue.endIndex, in: clue)
    249     return clueNumberRegex.stringByReplacingMatches(in: clue, range: range, withTemplate: "")
    250         .trimmingCharacters(in: .whitespacesAndNewlines)
    251 }
    252 
    253 func regexMatches(_ regex: NSRegularExpression, _ text: String) -> Bool {
    254     let range = NSRange(text.startIndex..<text.endIndex, in: text)
    255     return regex.firstMatch(in: text, range: range) != nil
    256 }
    257 
    258 func jsonFiles(in directory: URL) throws -> [URL] {
    259     guard let enumerator = FileManager.default.enumerator(
    260         at: directory,
    261         includingPropertiesForKeys: [.isRegularFileKey],
    262         options: [.skipsHiddenFiles]
    263     ) else {
    264         return []
    265     }
    266     return try enumerator.compactMap { item -> URL? in
    267         guard let url = item as? URL, url.pathExtension == "json" else {
    268             return nil
    269         }
    270         let values = try url.resourceValues(forKeys: [.isRegularFileKey])
    271         return values.isRegularFile == true ? url : nil
    272     }
    273     .sorted { $0.path < $1.path }
    274 }
    275 
    276 func loadJSONObject(from url: URL) throws -> Any {
    277     let data = try Foundation.Data(contentsOf: url)
    278     return try JSONSerialization.jsonObject(with: data)
    279 }
    280 
    281 func normalizedAnswers(from puzzle: Any) -> [String] {
    282     guard let puzzle = puzzle as? [String: Any],
    283           let answers = puzzle["answers"] as? [String: Any] else {
    284         return []
    285     }
    286     return ["across", "down"].flatMap { direction -> [String] in
    287         guard let entries = answers[direction] as? [Any] else {
    288             return []
    289         }
    290         return entries.compactMap { entry in
    291             guard let answer = entry as? String else {
    292                 return nil
    293             }
    294             let normalized = normalizeAnswer(answer)
    295             return normalized.isEmpty ? nil : normalized
    296         }
    297     }
    298 }
    299 
    300 func normalizedEntries(from puzzle: Any) -> [(answer: String, clue: String)] {
    301     guard let puzzle = puzzle as? [String: Any],
    302           let answers = puzzle["answers"] as? [String: Any],
    303           let clues = puzzle["clues"] as? [String: Any] else {
    304         return []
    305     }
    306     return ["across", "down"].flatMap { direction -> [(answer: String, clue: String)] in
    307         guard let answerEntries = answers[direction] as? [Any],
    308               let clueEntries = clues[direction] as? [Any] else {
    309             return []
    310         }
    311         return zip(answerEntries, clueEntries).compactMap { answerEntry, clueEntry in
    312             guard let answer = answerEntry as? String else {
    313                 return nil
    314             }
    315             let normalized = normalizeAnswer(answer)
    316             guard !normalized.isEmpty else {
    317                 return nil
    318             }
    319             let clue = stripClueNumber(clueEntry as? String ?? "")
    320             return (normalized, clue)
    321         }
    322     }
    323 }
    324 
    325 func shouldInclude(_ word: String, minLength: Int, maxLength: Int?) -> Bool {
    326     if word.count < minLength {
    327         return false
    328     }
    329     if let maxLength, word.count > maxLength {
    330         return false
    331     }
    332     return true
    333 }
    334 
    335 func weekdayForPath(_ url: URL) -> Int? {
    336     let path = url.path.replacingOccurrences(of: "\\", with: "/")
    337     let range = NSRange(path.startIndex..<path.endIndex, in: path)
    338     guard let match = datePathRegex.firstMatch(in: path, range: range),
    339           match.numberOfRanges == 4,
    340           let yearRange = Range(match.range(at: 1), in: path),
    341           let monthRange = Range(match.range(at: 2), in: path),
    342           let dayRange = Range(match.range(at: 3), in: path),
    343           let year = Int(path[yearRange]),
    344           let month = Int(path[monthRange]),
    345           let day = Int(path[dayRange]) else {
    346         return nil
    347     }
    348 
    349     var components = DateComponents()
    350     components.calendar = Calendar(identifier: .gregorian)
    351     components.year = year
    352     components.month = month
    353     components.day = day
    354     guard let date = components.date,
    355           let calendarWeekday = components.calendar?.component(.weekday, from: date) else {
    356         return nil
    357     }
    358     return (calendarWeekday + 5) % 7
    359 }
    360 
    361 func isoDateForPath(_ url: URL) -> String? {
    362     let path = url.path.replacingOccurrences(of: "\\", with: "/")
    363     let range = NSRange(path.startIndex..<path.endIndex, in: path)
    364     guard let match = datePathRegex.firstMatch(in: path, range: range),
    365           match.numberOfRanges == 4,
    366           let yearRange = Range(match.range(at: 1), in: path),
    367           let monthRange = Range(match.range(at: 2), in: path),
    368           let dayRange = Range(match.range(at: 3), in: path) else {
    369         return nil
    370     }
    371 
    372     return "\(path[yearRange])-\(path[monthRange])-\(path[dayRange])"
    373 }
    374 
    375 func puzzleWeekday(_ puzzle: Any, path: URL) -> Int? {
    376     if let puzzle = puzzle as? [String: Any],
    377        let dow = puzzle["dow"] as? String,
    378        let weekday = weekdayLookup[dow.trimmingCharacters(in: .whitespacesAndNewlines).lowercased()] {
    379         return weekday
    380     }
    381     return weekdayForPath(path)
    382 }
    383 
    384 func shouldIncludePuzzle(_ puzzle: Any, path: URL, weekdays: Set<Int>?) -> Bool {
    385     guard let weekdays else {
    386         return true
    387     }
    388     guard let weekday = puzzleWeekday(puzzle, path: path) else {
    389         return false
    390     }
    391     return weekdays.contains(weekday)
    392 }
    393 
    394 func requiredCount(word: String, flatMinCount: Int, lengthThresholds: Bool, qualityFilter: Bool) -> Int {
    395     if qualityFilter {
    396         if word.count < 5 {
    397             return 15
    398         }
    399         if word.count > 7 {
    400             return 1
    401         }
    402         return 3
    403     }
    404     guard lengthThresholds else {
    405         return flatMinCount
    406     }
    407     if word.count < 5 {
    408         return 10
    409     }
    410     if word.count > 7 {
    411         return 1
    412     }
    413     return 2
    414 }
    415 
    416 func easyRequiredCount(_ word: String) -> Int {
    417     if word.count <= 4 {
    418         return 15
    419     }
    420     if word.count <= 7 {
    421         return 3
    422     }
    423     return 1
    424 }
    425 
    426 func hasVowel(_ word: String) -> Bool {
    427     word.contains { "aeiouy".contains($0) }
    428 }
    429 
    430 func clueRatio(_ part: Int, _ whole: Int) -> Double {
    431     Double(part) / Double(max(whole, 1))
    432 }
    433 
    434 func hasSequentialLetterRun(_ word: String, minimumLength: Int = 4) -> Bool {
    435     let scalars = word.unicodeScalars.map(\.value)
    436     guard scalars.count >= minimumLength else {
    437         return false
    438     }
    439 
    440     var runLength = 1
    441     for index in 1..<scalars.count {
    442         if scalars[index] == scalars[index - 1] + 1 {
    443             runLength += 1
    444             if runLength >= minimumLength {
    445                 return true
    446             }
    447         } else {
    448             runLength = 1
    449         }
    450     }
    451     return false
    452 }
    453 
    454 func isInitialSurnameArtifact(_ word: String) -> Bool {
    455     let fusedNames: Set<String> = [
    456         "ed", "ek", "el", "em", "et",
    457         "jk", "jl", "jr",
    458         "kd", "kt",
    459         "mj",
    460         "oj",
    461         "tj",
    462         "wc"
    463     ]
    464 
    465     return fusedNames.contains { word.hasPrefix($0) && word.count >= 6 }
    466 }
    467 
    468 func isPhraseFragmentArtifact(_ word: String, fillBlankRatio: Double) -> Bool {
    469     guard word.count >= 5 else {
    470         return false
    471     }
    472     let prefixes = [
    473         "aan", "aand", "anold", "asof", "ated", "cani",
    474         "cular",
    475         "igota", "igot", "imeta", "imet", "ihave", "itisa", "itwas",
    476         "weare", "youre"
    477     ]
    478     if prefixes.contains(where: { word.hasPrefix($0) }) {
    479         return true
    480     }
    481     if word.count >= 10 {
    482         return fillBlankRatio >= 0.8
    483     }
    484     return fillBlankRatio >= 0.8 && word.count <= 7
    485 }
    486 
    487 func isVerySuspiciousOneOff(_ word: String, evidence: AnswerEvidence) -> Bool {
    488     guard word.count >= 8, evidence.count == 1, evidence.latestSeen != nil else {
    489         return false
    490     }
    491     if evidence.obscureNameClueCount > 0 {
    492         return true
    493     }
    494     if word.hasSuffix("c") || word.hasSuffix("v") || word.hasSuffix("x") {
    495         return true
    496     }
    497     return false
    498 }
    499 
    500 func isStaleOneOffLongFill(_ word: String, evidence: AnswerEvidence) -> Bool {
    501     guard word.count >= 8,
    502           evidence.count == 1,
    503           let latestSeen = evidence.latestSeen else {
    504         return false
    505     }
    506     return latestSeen < "2000-01-01"
    507 }
    508 
    509 func isPunnyOneOff(_ word: String, evidence: AnswerEvidence) -> Bool {
    510     guard word.count >= 8, evidence.count == 1 else {
    511         return false
    512     }
    513     return evidence.sampleClues.contains { $0.contains("?") }
    514 }
    515 
    516 func stricterShortRequiredCount(_ word: String) -> Int? {
    517     switch word.count {
    518     case ...2:
    519         return nil
    520     case 3:
    521         return 25
    522     case 4:
    523         return 12
    524     case 5:
    525         return 6
    526     default:
    527         return nil
    528     }
    529 }
    530 
    531 func easyRejectionReason(word: String, evidence: AnswerEvidence) -> String? {
    532     let length = word.count
    533     let count = evidence.count
    534 
    535     if easyAllowedWords.contains(word) {
    536         return nil
    537     }
    538 
    539     if crosswordeseWords.contains(word) {
    540         return "known crosswordese/glue fill"
    541     }
    542 
    543     let badRatio = clueRatio(evidence.badClueCount, count)
    544     let obscureNameRatio = clueRatio(evidence.obscureNameClueCount, count)
    545     let fillBlankRatio = clueRatio(evidence.fillBlankCount, count)
    546     let foreignLanguageRatio = clueRatio(evidence.foreignLanguageClueCount, count)
    547     let cluePartRatio = clueRatio(evidence.cluePartCount, count)
    548 
    549     if count < easyRequiredCount(word) {
    550         return "count \(count) below easy threshold \(easyRequiredCount(word))"
    551     }
    552 
    553     if let stricterCount = stricterShortRequiredCount(word),
    554        !commonShortWords.contains(word),
    555        count < stricterCount {
    556         return "short entry count \(count) below generated-fill threshold \(stricterCount)"
    557     }
    558 
    559     if length <= 5 && count < 80 && evidence.badClueCount >= 2 && badRatio >= 0.35 {
    560         return "short entry mostly clued as abbreviation/variant/form"
    561     }
    562 
    563     if length <= 5 &&
    564        !commonShortWords.contains(word),
    565        evidence.latestSeen.map({ $0 < "2000-01-01" }) == true {
    566         return "stale short fill"
    567     }
    568 
    569     if hasSequentialLetterRun(word) {
    570         return "alphabet-run artifact"
    571     }
    572 
    573     if length >= 5 && !hasVowel(word) {
    574         return "long entry with no vowel"
    575     }
    576 
    577     if isInitialSurnameArtifact(word) {
    578         return "initial-plus-name artifact"
    579     }
    580 
    581     if isPhraseFragmentArtifact(word, fillBlankRatio: fillBlankRatio) {
    582         return "phrase fragment artifact"
    583     }
    584 
    585     if evidence.themeClueCount > 0 && (count <= 2 || clueRatio(evidence.themeClueCount, count) >= 0.5) {
    586         return "one-off theme clue artifact"
    587     }
    588 
    589     if evidence.continuationClueCount > 0 && (count <= 3 || clueRatio(evidence.continuationClueCount, count) >= 0.5) {
    590         return "cross-reference or quote-continuation artifact"
    591     }
    592 
    593     if isStaleOneOffLongFill(word, evidence: evidence) {
    594         return "stale one-off long fill"
    595     }
    596 
    597     if isVerySuspiciousOneOff(word, evidence: evidence) {
    598         return "one-off long artifact"
    599     }
    600 
    601     if isPunnyOneOff(word, evidence: evidence) {
    602         return "one-off pun/theme artifact"
    603     }
    604 
    605     if length <= 5 && evidence.badClueCount >= 2 && badRatio >= 0.45 {
    606         return "entry mostly clued as abbreviation/variant/form"
    607     }
    608 
    609     if length <= 5 && evidence.obscureNameClueCount >= 2 && obscureNameRatio >= 0.5 {
    610         return "short entry mostly clued as obscure name/title"
    611     }
    612 
    613     if length <= 4 && commonShortWords.contains(word) {
    614         return nil
    615     }
    616 
    617     if length <= 4 && veryWeakShortWords.contains(word) {
    618         return "known weak short fill"
    619     }
    620 
    621     if length <= 4 && !hasVowel(word) {
    622         return "short entry with no vowel"
    623     }
    624 
    625     if length <= 5 && shortExactFragments.contains(word) {
    626         return "short suffix-like fragment"
    627     }
    628 
    629     if evidence.cluePartCount > 0 && (count <= 3 || cluePartRatio >= 0.5) {
    630         return "entry mostly clued as one part of a longer answer"
    631     }
    632 
    633     if length <= 5 &&
    634         evidence.foreignLanguageClueCount >= 2 &&
    635         (count <= 10 || foreignLanguageRatio >= 0.5) {
    636         return "entry mostly clued as foreign-language translation"
    637     }
    638 
    639     if length <= 4 && evidence.badClueCount >= 2 && badRatio >= 0.18 {
    640         return "short entry mostly clued as abbreviation/variant/foreign/form"
    641     }
    642 
    643     if length <= 4 && evidence.obscureNameClueCount >= 4 && obscureNameRatio >= 0.35 {
    644         return "short entry mostly clued as obscure name"
    645     }
    646 
    647     if length <= 4 && count < 40 && evidence.fillBlankCount >= 8 && fillBlankRatio >= 0.55 {
    648         return "short entry mostly dependent on fill-in-the-blank clues"
    649     }
    650 
    651     if length <= 3 && count < 60 && (badRatio + obscureNameRatio + fillBlankRatio) >= 0.45 {
    652         return "rare short entry with weak clue evidence"
    653     }
    654 
    655     return nil
    656 }
    657 
    658 func loadAnswers(inputURL: URL, options: Options) throws -> (counts: [String: Int], filesRead: Int, filesUsed: Int) {
    659     var counts: [String: Int] = [:]
    660     var filesRead = 0
    661     var filesUsed = 0
    662 
    663     for path in try jsonFiles(in: inputURL) {
    664         do {
    665             let puzzle = try loadJSONObject(from: path)
    666             filesRead += 1
    667             guard shouldIncludePuzzle(puzzle, path: path, weekdays: options.weekdays) else {
    668                 continue
    669             }
    670             filesUsed += 1
    671             for answer in normalizedAnswers(from: puzzle) where shouldInclude(answer, minLength: options.minLength, maxLength: options.maxLength) {
    672                 counts[answer, default: 0] += 1
    673             }
    674         } catch {
    675             fputs("Skipping \(path.path): \(error)\n", stderr)
    676         }
    677     }
    678 
    679     return (counts, filesRead, filesUsed)
    680 }
    681 
    682 func loadAnswerEvidence(inputURL: URL, options: Options) throws -> (evidence: [String: AnswerEvidence], filesRead: Int, filesUsed: Int) {
    683     var evidence: [String: AnswerEvidence] = [:]
    684     var filesRead = 0
    685     var filesUsed = 0
    686 
    687     for path in try jsonFiles(in: inputURL) {
    688         do {
    689             let puzzle = try loadJSONObject(from: path)
    690             filesRead += 1
    691             guard shouldIncludePuzzle(puzzle, path: path, weekdays: options.weekdays) else {
    692                 continue
    693             }
    694             let latestSeen = isoDateForPath(path)
    695             filesUsed += 1
    696             for (answer, clue) in normalizedEntries(from: puzzle) where shouldInclude(answer, minLength: options.minLength, maxLength: options.maxLength) {
    697                 var entry = evidence[answer] ?? AnswerEvidence()
    698                 entry.count += 1
    699                 if let latestSeen {
    700                     if entry.latestSeen.map({ latestSeen > $0 }) ?? true {
    701                         entry.latestSeen = latestSeen
    702                     }
    703                 }
    704                 if entry.sampleClues.count < 5 && !clue.isEmpty {
    705                     entry.sampleClues.append(clue)
    706                 }
    707                 if regexMatches(badClueRegex, clue) {
    708                     entry.badClueCount += 1
    709                 }
    710                 if regexMatches(obscureNameClueRegex, clue) {
    711                     entry.obscureNameClueCount += 1
    712                 }
    713                 if regexMatches(fillBlankRegex, clue) {
    714                     entry.fillBlankCount += 1
    715                 }
    716                 if regexMatches(foreignLanguageClueRegex, clue) {
    717                     entry.foreignLanguageClueCount += 1
    718                 }
    719                 if regexMatches(cluePartRegex, clue) {
    720                     entry.cluePartCount += 1
    721                 }
    722                 if regexMatches(continuationClueRegex, clue) {
    723                     entry.continuationClueCount += 1
    724                 }
    725                 if regexMatches(themeClueRegex, clue) {
    726                     entry.themeClueCount += 1
    727                 }
    728                 evidence[answer] = entry
    729             }
    730         } catch {
    731             fputs("Skipping \(path.path): \(error)\n", stderr)
    732         }
    733     }
    734 
    735     return (evidence, filesRead, filesUsed)
    736 }
    737 
    738 func loadBadWords(path: String) throws -> Set<String> {
    739     let value = try loadJSONObject(from: URL(fileURLWithPath: path))
    740     guard let words = value as? [Any] else {
    741         throw WordmakeError(description: "expected JSON array")
    742     }
    743     return Set(words.compactMap { word in
    744         guard let word = word as? String else {
    745             return nil
    746         }
    747         let normalized = normalizeAnswer(word)
    748         return normalized.isEmpty ? nil : normalized
    749     })
    750 }
    751 
    752 func sortedDictionary(_ dictionary: [String: Any]) -> [String: Any] {
    753     Dictionary(uniqueKeysWithValues: dictionary.keys.sorted().map { ($0, dictionary[$0]!) })
    754 }
    755 
    756 func evidenceObject(_ entry: AnswerEvidence, rejectionReason: String?) -> [String: Any] {
    757     [
    758         "count": entry.count,
    759         "bad_clue_count": entry.badClueCount,
    760         "obscure_name_clue_count": entry.obscureNameClueCount,
    761         "fill_blank_count": entry.fillBlankCount,
    762         "foreign_language_clue_count": entry.foreignLanguageClueCount,
    763         "clue_part_count": entry.cluePartCount,
    764         "continuation_clue_count": entry.continuationClueCount,
    765         "theme_clue_count": entry.themeClueCount,
    766         "latest_seen": entry.latestSeen as Any? ?? NSNull(),
    767         "sample_clues": entry.sampleClues,
    768         "rejection_reason": rejectionReason as Any? ?? NSNull()
    769     ]
    770 }
    771 
    772 func rejectionObject(reason: String, entry: AnswerEvidence) -> [String: Any] {
    773     [
    774         "reason": reason,
    775         "count": entry.count,
    776         "bad_clue_count": entry.badClueCount,
    777         "obscure_name_clue_count": entry.obscureNameClueCount,
    778         "fill_blank_count": entry.fillBlankCount,
    779         "foreign_language_clue_count": entry.foreignLanguageClueCount,
    780         "clue_part_count": entry.cluePartCount,
    781         "continuation_clue_count": entry.continuationClueCount,
    782         "theme_clue_count": entry.themeClueCount,
    783         "latest_seen": entry.latestSeen as Any? ?? NSNull(),
    784         "sample_clues": entry.sampleClues
    785     ]
    786 }
    787 
    788 func writeJSON(_ value: Any, to path: String, pretty: Bool) throws {
    789     let url = URL(fileURLWithPath: path)
    790     let directory = url.deletingLastPathComponent()
    791     if directory.path != "." {
    792         try FileManager.default.createDirectory(at: directory, withIntermediateDirectories: true)
    793     }
    794     var options: JSONSerialization.WritingOptions = [.sortedKeys]
    795     if pretty {
    796         options.insert(.prettyPrinted)
    797     }
    798     let data = try JSONSerialization.data(withJSONObject: value, options: options)
    799     var output = data
    800     output.append(0x0A)
    801     try output.write(to: url)
    802 }
    803 
    804 func validate(_ options: Options, inputURL: URL) throws {
    805     var isDirectory: ObjCBool = false
    806     guard FileManager.default.fileExists(atPath: inputURL.path, isDirectory: &isDirectory), isDirectory.boolValue else {
    807         throw WordmakeError(description: "Input directory not found: \(options.inputPath)")
    808     }
    809     guard options.minLength >= 1 else {
    810         throw WordmakeError(description: "--min-length must be at least 1")
    811     }
    812     if let maxLength = options.maxLength, maxLength < options.minLength {
    813         throw WordmakeError(description: "--max-length must be greater than or equal to --min-length")
    814     }
    815     guard options.minCount >= 1 else {
    816         throw WordmakeError(description: "--min-count must be at least 1")
    817     }
    818     let filters = [options.lengthThresholds, options.qualityFilter, options.easyFillFilter].filter { $0 }.count
    819     guard filters <= 1 else {
    820         throw WordmakeError(description: "Choose only one of --length-thresholds, --quality-filter, or --easy-fill-filter")
    821     }
    822     if (options.rejectionsOutputPath != nil || options.qualityOutputPath != nil) && !options.easyFillFilter {
    823         throw WordmakeError(description: "--rejections-output and --quality-output require --easy-fill-filter")
    824     }
    825 }
    826 
    827 func run() throws {
    828     let options = try parseOptions(CommandLine.arguments)
    829     let inputURL = URL(fileURLWithPath: options.inputPath)
    830     try validate(options, inputURL: inputURL)
    831 
    832     let badWords: Set<String>
    833     if let badWordsPath = options.badWordsPath {
    834         do {
    835             badWords = try loadBadWords(path: badWordsPath)
    836         } catch {
    837             throw WordmakeError(description: "Could not read --bad-words \(badWordsPath): \(error)")
    838         }
    839     } else {
    840         badWords = []
    841     }
    842 
    843     var counts: [String: Int]
    844     let filesRead: Int
    845     let filesUsed: Int
    846     var words: [String]
    847     var easyRejections: [String: Any] = [:]
    848     var wordQuality: [String: Any] = [:]
    849 
    850     if options.easyFillFilter {
    851         let result = try loadAnswerEvidence(inputURL: inputURL, options: options)
    852         filesRead = result.filesRead
    853         filesUsed = result.filesUsed
    854         counts = Dictionary(uniqueKeysWithValues: result.evidence.map { ($0.key, $0.value.count) })
    855         words = []
    856 
    857         for word in result.evidence.keys.sorted() {
    858             guard let entry = result.evidence[word] else {
    859                 continue
    860             }
    861             let reason = easyRejectionReason(word: word, evidence: entry)
    862             let finalReason = badWords.contains(word) ? "bad word" : reason
    863             wordQuality[word] = evidenceObject(entry, rejectionReason: finalReason)
    864             if badWords.contains(word) {
    865                 continue
    866             }
    867             if let reason {
    868                 easyRejections[word] = rejectionObject(reason: reason, entry: entry)
    869             } else {
    870                 words.append(word)
    871             }
    872         }
    873     } else {
    874         let result = try loadAnswers(inputURL: inputURL, options: options)
    875         counts = result.counts
    876         filesRead = result.filesRead
    877         filesUsed = result.filesUsed
    878         words = counts.keys
    879             .filter { word in
    880                 !badWords.contains(word) &&
    881                     (counts[word] ?? 0) >= requiredCount(
    882                         word: word,
    883                         flatMinCount: options.minCount,
    884                         lengthThresholds: options.lengthThresholds,
    885                         qualityFilter: options.qualityFilter
    886                     )
    887             }
    888             .sorted()
    889     }
    890 
    891     try writeJSON(words, to: options.outputPath, pretty: options.pretty)
    892     if let countsOutputPath = options.countsOutputPath {
    893         try writeJSON(Dictionary(uniqueKeysWithValues: counts.keys.sorted().map { ($0, counts[$0]!) }), to: countsOutputPath, pretty: options.pretty)
    894     }
    895     if let rejectionsOutputPath = options.rejectionsOutputPath {
    896         try writeJSON(sortedDictionary(easyRejections), to: rejectionsOutputPath, pretty: options.pretty)
    897     }
    898     if let qualityOutputPath = options.qualityOutputPath {
    899         try writeJSON(sortedDictionary(wordQuality), to: qualityOutputPath, pretty: options.pretty)
    900     }
    901 
    902     print("Read \(filesRead) puzzle files")
    903     if let weekdays = options.weekdays {
    904         let selected = weekdays.sorted().map { weekdayNames[$0] }.joined(separator: ", ")
    905         print("Used \(filesUsed) \(selected) puzzle files")
    906     }
    907     print("Wrote \(words.count) unique answers to \(options.outputPath)")
    908     if options.lengthThresholds {
    909         print("Included answers using length thresholds: <5 letters >=10, 5-7 letters >=2, >7 letters >=1")
    910     } else if options.qualityFilter {
    911         print("Included answers using quality filter: <5 letters >=15, 5-7 letters >=3, >7 letters >=1")
    912     } else if options.easyFillFilter {
    913         print("Included answers using clue-aware easy fill filter")
    914         print("Rejected \(easyRejections.count) answers with easy fill filter")
    915     } else if options.minCount > 1 {
    916         print("Included answers appearing at least \(options.minCount) times")
    917     }
    918     if let badWordsPath = options.badWordsPath {
    919         print("Excluded \(badWords.count) bad words from \(badWordsPath)")
    920     }
    921     if let countsOutputPath = options.countsOutputPath {
    922         print("Wrote answer frequencies to \(countsOutputPath)")
    923     }
    924     if let rejectionsOutputPath = options.rejectionsOutputPath {
    925         print("Wrote easy-filter rejections to \(rejectionsOutputPath)")
    926     }
    927     if let qualityOutputPath = options.qualityOutputPath {
    928         print("Wrote word quality metadata to \(qualityOutputPath)")
    929     }
    930 }
    931 
    932 do {
    933     try run()
    934 } catch {
    935     fputs("Wordmake: \(error)\n", stderr)
    936     exit(1)
    937 }