crossmate

A collaborative crossword app for iOS
Log | Files | Refs | LICENSE

main.swift (28531B)


      1 import Foundation
      2 
      3 let weekdayNames = ["Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday", "Sunday"]
      4 let weekdayLookup = Dictionary(uniqueKeysWithValues: weekdayNames.enumerated().map { ($0.element.lowercased(), $0.offset) })
      5 
      6 let veryWeakShortWords: Set<String> = [
      7     "aah", "aal", "aar", "aba", "abe", "abo", "abu", "ado", "adz", "aer",
      8     "ane", "ani", "ara", "ase", "ato", "ava", "een", "eme", "ene", "eon",
      9     "ere", "ese", "ess", "est", "eta", "ete", "eth", "ier", "iii", "ile",
     10     "ita", "ite", "lai", "lar", "mee", "nee", "oer", "ona", "onea",
     11     "ort", "ose", "oto", "ree", "rei", "ria", "ser", "tae", "tba", "tbs",
     12     "tko", "tnt", "tpe", "tsp", "ute", "wye", "xed", "xii",
     13     "ance", "elee", "orle", "eder", "ssts"
     14 ]
     15 
     16 let commonShortWords: Set<String> = [
     17     "able", "acre", "act", "acts", "age", "aged", "ages", "air", "airs",
     18     "ale", "ales", "all", "ally", "also", "and", "ant", "ante", "anti",
     19     "ants", "ape", "apes", "area", "are", "ark", "arks", "arm", "arms",
     20     "art", "arts", "ash", "ate", "aunt", "awe", "awed", "awes", "bad",
     21     "bag", "bags", "bar", "bare", "bars", "base", "bash", "bat", "bats",
     22     "bed", "beds", "bee", "beer", "bees", "bell", "belt", "bent", "best",
     23     "bet", "bets", "bite", "blue", "bus", "but", "can", "cap", "car",
     24     "care", "case", "cat", "cats", "clay", "cod", "coil", "cold", "cord",
     25     "cow", "cue", "curl", "cut", "cuts", "deer", "dog", "dogs", "dry",
     26     "ear", "ears", "eat", "eats", "eel", "eels", "era", "eras", "eye",
     27     "eyes", "far", "farm", "fast", "fat", "feet", "few", "fire", "fish",
     28     "fit", "fits", "free", "fun", "game", "gas", "gate", "get", "gets",
     29     "goat", "good", "hair", "hand", "hard", "hat", "hate", "heat", "hen",
     30     "her", "here", "hit", "hits", "ice", "idea", "ink", "inn", "iron",
     31     "its", "kid", "kids", "knee", "land", "last", "late", "law", "lead",
     32     "left", "let", "lets", "lie", "life", "line", "long", "lose", "lost",
     33     "man", "many", "map", "men", "mile", "mind", "more", "name", "near",
     34     "new", "note", "oar", "oars", "oil", "old", "one", "open", "ore",
     35     "ores", "our", "out", "over", "own", "pan", "part", "past", "pen",
     36     "pet", "pets", "pie", "pin", "pins", "play", "pot", "red", "rest",
     37     "rice", "ring", "road", "root", "rose", "run", "runs", "sad", "salt",
     38     "sat", "save", "sea", "seat", "see", "seed", "sees", "set", "sets",
     39     "shoe", "side", "sit", "site", "sky", "snow", "son", "song", "star",
     40     "step", "stop", "sun", "take", "tea", "team", "ten", "tens", "test",
     41     "the", "tie", "ties", "time", "tree", "try", "use", "used", "uses",
     42     "war", "way", "west", "wet", "win", "wine", "word", "work", "yard",
     43     "year", "yes"
     44 ]
     45 
     46 let shortExactFragments: Set<String> = [
     47     "ance", "ence", "enne", "ette", "ible", "iest", "ism", "isms", "itis"
     48 ]
     49 
     50 let crosswordeseWords: Set<String> = [
     51     "adue", "anent", "anet", "antae", "atee", "atit", "atle", "bassi",
     52     "brocatelle", "ecce", "ente", "esne", "etes", "gnar", "imset",
     53     "labile", "laic", "laram", "mtida", "neer", "onor", "relee", "rete",
     54     "terete", "togae", "ulee",
     55     "aereo", "alai", "aretes", "aril", "atri", "aussi", "eaude", "ees",
     56     "emeer", "enote", "erian", "ers", "esnes", "evoe", "ilia", "leisterer",
     57     "mesne", "olea", "ooo", "oss", "ranee", "rorem", "seral",
     58     "snee", "soras", "sri", "tse", "yeses",
     59     "abas", "adano", "agena", "agin", "agorae", "ams", "ans", "aper",
     60     "arear", "bretharte", "donees", "eloi", "endat", "etnas", "etui",
     61     "hammshams", "nin", "oas", "ogees", "olan", "ont", "ossa", "otra",
     62     "poetaster", "rainintheface", "rea", "retia", "roone", "rpi", "seta",
     63     "sisi", "soli", "tet", "tieto", "totoe", "tra", "yser"
     64 ]
     65 
     66 let easyAllowedWords: Set<String> = [
     67     "n", "s", "e", "w", "ne", "nw", "se", "sw",
     68     "nne", "nnw", "ene", "ese", "sse", "ssw", "wsw", "wnw"
     69 ]
     70 
     71 struct Options {
     72     var inputPath = "Data"
     73     var outputPath = "Generated/word_list.json"
     74     var countsOutputPath: String?
     75     var minLength = 2
     76     var maxLength: Int?
     77     var minCount = 1
     78     var lengthThresholds = false
     79     var qualityFilter = false
     80     var easyFillFilter = false
     81     var badWordsPath: String?
     82     var rejectionsOutputPath: String?
     83     var qualityOutputPath: String?
     84     var weekdays: Set<Int>?
     85     var pretty = false
     86 }
     87 
     88 struct AnswerEvidence {
     89     var count = 0
     90     var badClueCount = 0
     91     var obscureNameClueCount = 0
     92     var fillBlankCount = 0
     93     var foreignLanguageClueCount = 0
     94     var sampleClues: [String] = []
     95 }
     96 
     97 struct WordmakeError: Error, CustomStringConvertible {
     98     let description: String
     99 }
    100 
    101 let badClueRegex = try! NSRegularExpression(
    102     pattern: #"\b(abbr|var|prefix|suffix|archaic|poetic|dial|slang|obs|old-style|old fashioned|scot|irish|brit|fr|french|ger|german|lat|latin|span|spanish|ital|italian|port|portuguese|hebr|hebrew|yiddish|jap|japanese|inits|initials|letters|compass point|bearing|direction|shoe width|monogram|degs|degree|of yore|old|olden|heraldry|genus|legal|in law|anatomical|serf|serfs|slave|slaves|vassal|pilaster|opera voices|persian fairy|mideast|eastern v\.?i\.?p|arab prince|arab chieftain|hindu title|indian princess|rajah|dirk|knife of old|seed covering|seed casing|seed envelope|hip bones|pelvic bones|bitter vetch|marsh birds)\b"#,
    103     options: [.caseInsensitive]
    104 )
    105 let obscureNameClueRegex = try! NSRegularExpression(
    106     pattern: #"\b(actor|actress|author|composer|singer|poet|novelist|painter|artist|playwright|violinist|pianist|conductor|golfer|pitcher|baseballer|songwriter|politician|senator|governor)\b"#,
    107     options: [.caseInsensitive]
    108 )
    109 let fillBlankRegex = try! NSRegularExpression(pattern: #"_{2,}|\.{3,}"#)
    110 let foreignLanguageClueRegex = try! NSRegularExpression(
    111     pattern: #":\s*(?:Sp|Span|Fr|Ger|Germ|It|Ital|Lat|Port|Heb|Yid|Jap|Rus|Gr|Gk|Swed|Norw|Dan|Arab|Chin|Hind|Skt|Turk|Finn)\.|,\s+in\s+[A-Z][a-z]"#
    112 )
    113 let clueNumberRegex = try! NSRegularExpression(pattern: #"^\s*\d+\.\s*"#)
    114 let datePathRegex = try! NSRegularExpression(pattern: #"(\d{4})/(\d{2})/(\d{2})\.json$"#)
    115 
    116 func printUsage() {
    117     print("""
    118     Usage: Wordmake [options]
    119 
    120     Options:
    121       --input PATH                 Directory containing crossword JSON files. Default: Data
    122       --output PATH                Path for the generated JSON word list. Default: Generated/word_list.json
    123       --counts-output PATH         Optional path for a JSON object of normalized answer frequencies.
    124       --min-length N               Minimum normalized answer length to include. Default: 2
    125       --max-length N               Maximum normalized answer length to include.
    126       --min-count N                Minimum number of appearances required to include an answer. Default: 1
    127       --length-thresholds          Use length-based appearance thresholds.
    128       --quality-filter             Use stricter count thresholds intended for generated fill.
    129       --easy-fill-filter           Use clue-aware filters for easier generated fill.
    130       --bad-words PATH             Optional JSON word list to exclude from the generated dictionary.
    131       --rejections-output PATH     Optional path for rejected word metadata from --easy-fill-filter.
    132       --quality-output PATH        Optional path for per-answer quality metadata from --easy-fill-filter.
    133       --weekdays LIST              Optional comma-separated weekday filter, e.g. Monday,Tuesday.
    134       --pretty                     Pretty-print generated JSON.
    135       -h, --help                   Show this help.
    136     """)
    137 }
    138 
    139 func parseOptions(_ arguments: [String]) throws -> Options {
    140     var options = Options()
    141     var index = 1
    142 
    143     func requireValue(_ name: String) throws -> String {
    144         guard index + 1 < arguments.count else {
    145             throw WordmakeError(description: "Missing value for \(name)")
    146         }
    147         index += 1
    148         return arguments[index]
    149     }
    150 
    151     while index < arguments.count {
    152         let argument = arguments[index]
    153         switch argument {
    154         case "--input":
    155             options.inputPath = try requireValue(argument)
    156         case "--output":
    157             options.outputPath = try requireValue(argument)
    158         case "--counts-output":
    159             options.countsOutputPath = try requireValue(argument)
    160         case "--min-length":
    161             guard let value = Int(try requireValue(argument)) else {
    162                 throw WordmakeError(description: "--min-length must be an integer")
    163             }
    164             options.minLength = value
    165         case "--max-length":
    166             guard let value = Int(try requireValue(argument)) else {
    167                 throw WordmakeError(description: "--max-length must be an integer")
    168             }
    169             options.maxLength = value
    170         case "--min-count":
    171             guard let value = Int(try requireValue(argument)) else {
    172                 throw WordmakeError(description: "--min-count must be an integer")
    173             }
    174             options.minCount = value
    175         case "--length-thresholds":
    176             options.lengthThresholds = true
    177         case "--quality-filter":
    178             options.qualityFilter = true
    179         case "--easy-fill-filter":
    180             options.easyFillFilter = true
    181         case "--bad-words":
    182             options.badWordsPath = try requireValue(argument)
    183         case "--rejections-output":
    184             options.rejectionsOutputPath = try requireValue(argument)
    185         case "--quality-output":
    186             options.qualityOutputPath = try requireValue(argument)
    187         case "--weekdays":
    188             options.weekdays = try parseWeekdays(try requireValue(argument))
    189         case "--pretty":
    190             options.pretty = true
    191         case "--help", "-h":
    192             printUsage()
    193             exit(0)
    194         default:
    195             throw WordmakeError(description: "Unknown argument: \(argument)")
    196         }
    197         index += 1
    198     }
    199 
    200     return options
    201 }
    202 
    203 func parseWeekdays(_ value: String) throws -> Set<Int> {
    204     var weekdays: Set<Int> = []
    205     for rawName in value.split(separator: ",") {
    206         let name = rawName.trimmingCharacters(in: .whitespacesAndNewlines).lowercased()
    207         guard !name.isEmpty else {
    208             continue
    209         }
    210         guard let weekday = weekdayLookup[name] else {
    211             throw WordmakeError(description: "Unknown weekday '\(rawName)'; expected one of: \(weekdayNames.joined(separator: ", "))")
    212         }
    213         weekdays.insert(weekday)
    214     }
    215     guard !weekdays.isEmpty else {
    216         throw WordmakeError(description: "--weekdays must include at least one weekday name")
    217     }
    218     return weekdays
    219 }
    220 
    221 func normalizeAnswer(_ text: String) -> String {
    222     String(text.unicodeScalars.compactMap { scalar in
    223         switch scalar.value {
    224         case 65...90:
    225             return Character(UnicodeScalar(scalar.value + 32)!)
    226         case 97...122:
    227             return Character(scalar)
    228         default:
    229             return nil
    230         }
    231     })
    232 }
    233 
    234 func stripClueNumber(_ clue: String) -> String {
    235     let range = NSRange(clue.startIndex..<clue.endIndex, in: clue)
    236     return clueNumberRegex.stringByReplacingMatches(in: clue, range: range, withTemplate: "")
    237         .trimmingCharacters(in: .whitespacesAndNewlines)
    238 }
    239 
    240 func regexMatches(_ regex: NSRegularExpression, _ text: String) -> Bool {
    241     let range = NSRange(text.startIndex..<text.endIndex, in: text)
    242     return regex.firstMatch(in: text, range: range) != nil
    243 }
    244 
    245 func jsonFiles(in directory: URL) throws -> [URL] {
    246     guard let enumerator = FileManager.default.enumerator(
    247         at: directory,
    248         includingPropertiesForKeys: [.isRegularFileKey],
    249         options: [.skipsHiddenFiles]
    250     ) else {
    251         return []
    252     }
    253     return try enumerator.compactMap { item -> URL? in
    254         guard let url = item as? URL, url.pathExtension == "json" else {
    255             return nil
    256         }
    257         let values = try url.resourceValues(forKeys: [.isRegularFileKey])
    258         return values.isRegularFile == true ? url : nil
    259     }
    260     .sorted { $0.path < $1.path }
    261 }
    262 
    263 func loadJSONObject(from url: URL) throws -> Any {
    264     let data = try Foundation.Data(contentsOf: url)
    265     return try JSONSerialization.jsonObject(with: data)
    266 }
    267 
    268 func normalizedAnswers(from puzzle: Any) -> [String] {
    269     guard let puzzle = puzzle as? [String: Any],
    270           let answers = puzzle["answers"] as? [String: Any] else {
    271         return []
    272     }
    273     return ["across", "down"].flatMap { direction -> [String] in
    274         guard let entries = answers[direction] as? [Any] else {
    275             return []
    276         }
    277         return entries.compactMap { entry in
    278             guard let answer = entry as? String else {
    279                 return nil
    280             }
    281             let normalized = normalizeAnswer(answer)
    282             return normalized.isEmpty ? nil : normalized
    283         }
    284     }
    285 }
    286 
    287 func normalizedEntries(from puzzle: Any) -> [(answer: String, clue: String)] {
    288     guard let puzzle = puzzle as? [String: Any],
    289           let answers = puzzle["answers"] as? [String: Any],
    290           let clues = puzzle["clues"] as? [String: Any] else {
    291         return []
    292     }
    293     return ["across", "down"].flatMap { direction -> [(answer: String, clue: String)] in
    294         guard let answerEntries = answers[direction] as? [Any],
    295               let clueEntries = clues[direction] as? [Any] else {
    296             return []
    297         }
    298         return zip(answerEntries, clueEntries).compactMap { answerEntry, clueEntry in
    299             guard let answer = answerEntry as? String else {
    300                 return nil
    301             }
    302             let normalized = normalizeAnswer(answer)
    303             guard !normalized.isEmpty else {
    304                 return nil
    305             }
    306             let clue = stripClueNumber(clueEntry as? String ?? "")
    307             return (normalized, clue)
    308         }
    309     }
    310 }
    311 
    312 func shouldInclude(_ word: String, minLength: Int, maxLength: Int?) -> Bool {
    313     if word.count < minLength {
    314         return false
    315     }
    316     if let maxLength, word.count > maxLength {
    317         return false
    318     }
    319     return true
    320 }
    321 
    322 func weekdayForPath(_ url: URL) -> Int? {
    323     let path = url.path.replacingOccurrences(of: "\\", with: "/")
    324     let range = NSRange(path.startIndex..<path.endIndex, in: path)
    325     guard let match = datePathRegex.firstMatch(in: path, range: range),
    326           match.numberOfRanges == 4,
    327           let yearRange = Range(match.range(at: 1), in: path),
    328           let monthRange = Range(match.range(at: 2), in: path),
    329           let dayRange = Range(match.range(at: 3), in: path),
    330           let year = Int(path[yearRange]),
    331           let month = Int(path[monthRange]),
    332           let day = Int(path[dayRange]) else {
    333         return nil
    334     }
    335 
    336     var components = DateComponents()
    337     components.calendar = Calendar(identifier: .gregorian)
    338     components.year = year
    339     components.month = month
    340     components.day = day
    341     guard let date = components.date,
    342           let calendarWeekday = components.calendar?.component(.weekday, from: date) else {
    343         return nil
    344     }
    345     return (calendarWeekday + 5) % 7
    346 }
    347 
    348 func puzzleWeekday(_ puzzle: Any, path: URL) -> Int? {
    349     if let puzzle = puzzle as? [String: Any],
    350        let dow = puzzle["dow"] as? String,
    351        let weekday = weekdayLookup[dow.trimmingCharacters(in: .whitespacesAndNewlines).lowercased()] {
    352         return weekday
    353     }
    354     return weekdayForPath(path)
    355 }
    356 
    357 func shouldIncludePuzzle(_ puzzle: Any, path: URL, weekdays: Set<Int>?) -> Bool {
    358     guard let weekdays else {
    359         return true
    360     }
    361     guard let weekday = puzzleWeekday(puzzle, path: path) else {
    362         return false
    363     }
    364     return weekdays.contains(weekday)
    365 }
    366 
    367 func requiredCount(word: String, flatMinCount: Int, lengthThresholds: Bool, qualityFilter: Bool) -> Int {
    368     if qualityFilter {
    369         if word.count < 5 {
    370             return 15
    371         }
    372         if word.count > 7 {
    373             return 1
    374         }
    375         return 3
    376     }
    377     guard lengthThresholds else {
    378         return flatMinCount
    379     }
    380     if word.count < 5 {
    381         return 10
    382     }
    383     if word.count > 7 {
    384         return 1
    385     }
    386     return 2
    387 }
    388 
    389 func easyRequiredCount(_ word: String) -> Int {
    390     if word.count <= 4 {
    391         return 15
    392     }
    393     if word.count <= 7 {
    394         return 3
    395     }
    396     return 1
    397 }
    398 
    399 func hasVowel(_ word: String) -> Bool {
    400     word.contains { "aeiouy".contains($0) }
    401 }
    402 
    403 func clueRatio(_ part: Int, _ whole: Int) -> Double {
    404     Double(part) / Double(max(whole, 1))
    405 }
    406 
    407 func easyRejectionReason(word: String, evidence: AnswerEvidence) -> String? {
    408     let length = word.count
    409     let count = evidence.count
    410 
    411     if easyAllowedWords.contains(word) {
    412         return nil
    413     }
    414 
    415     if count < easyRequiredCount(word) {
    416         return "count \(count) below easy threshold \(easyRequiredCount(word))"
    417     }
    418 
    419     if crosswordeseWords.contains(word) {
    420         return "known crosswordese/glue fill"
    421     }
    422 
    423     if length <= 4 && commonShortWords.contains(word) {
    424         return nil
    425     }
    426 
    427     if length <= 4 && veryWeakShortWords.contains(word) {
    428         return "known weak short fill"
    429     }
    430 
    431     if length <= 4 && !hasVowel(word) {
    432         return "short entry with no vowel"
    433     }
    434 
    435     if length <= 5 && shortExactFragments.contains(word) {
    436         return "short suffix-like fragment"
    437     }
    438 
    439     let badRatio = clueRatio(evidence.badClueCount, count)
    440     let obscureNameRatio = clueRatio(evidence.obscureNameClueCount, count)
    441     let fillBlankRatio = clueRatio(evidence.fillBlankCount, count)
    442     let foreignLanguageRatio = clueRatio(evidence.foreignLanguageClueCount, count)
    443 
    444     if length <= 5 &&
    445         evidence.foreignLanguageClueCount >= 2 &&
    446         (count <= 10 || foreignLanguageRatio >= 0.5) {
    447         return "entry mostly clued as foreign-language translation"
    448     }
    449 
    450     if length <= 4 && evidence.badClueCount >= 2 && badRatio >= 0.18 {
    451         return "short entry mostly clued as abbreviation/variant/foreign/form"
    452     }
    453 
    454     if length <= 4 && evidence.obscureNameClueCount >= 4 && obscureNameRatio >= 0.35 {
    455         return "short entry mostly clued as obscure name"
    456     }
    457 
    458     if length <= 4 && count < 40 && evidence.fillBlankCount >= 8 && fillBlankRatio >= 0.55 {
    459         return "short entry mostly dependent on fill-in-the-blank clues"
    460     }
    461 
    462     if length <= 3 && count < 60 && (badRatio + obscureNameRatio + fillBlankRatio) >= 0.45 {
    463         return "rare short entry with weak clue evidence"
    464     }
    465 
    466     return nil
    467 }
    468 
    469 func loadAnswers(inputURL: URL, options: Options) throws -> (counts: [String: Int], filesRead: Int, filesUsed: Int) {
    470     var counts: [String: Int] = [:]
    471     var filesRead = 0
    472     var filesUsed = 0
    473 
    474     for path in try jsonFiles(in: inputURL) {
    475         do {
    476             let puzzle = try loadJSONObject(from: path)
    477             filesRead += 1
    478             guard shouldIncludePuzzle(puzzle, path: path, weekdays: options.weekdays) else {
    479                 continue
    480             }
    481             filesUsed += 1
    482             for answer in normalizedAnswers(from: puzzle) where shouldInclude(answer, minLength: options.minLength, maxLength: options.maxLength) {
    483                 counts[answer, default: 0] += 1
    484             }
    485         } catch {
    486             fputs("Skipping \(path.path): \(error)\n", stderr)
    487         }
    488     }
    489 
    490     return (counts, filesRead, filesUsed)
    491 }
    492 
    493 func loadAnswerEvidence(inputURL: URL, options: Options) throws -> (evidence: [String: AnswerEvidence], filesRead: Int, filesUsed: Int) {
    494     var evidence: [String: AnswerEvidence] = [:]
    495     var filesRead = 0
    496     var filesUsed = 0
    497 
    498     for path in try jsonFiles(in: inputURL) {
    499         do {
    500             let puzzle = try loadJSONObject(from: path)
    501             filesRead += 1
    502             guard shouldIncludePuzzle(puzzle, path: path, weekdays: options.weekdays) else {
    503                 continue
    504             }
    505             filesUsed += 1
    506             for (answer, clue) in normalizedEntries(from: puzzle) where shouldInclude(answer, minLength: options.minLength, maxLength: options.maxLength) {
    507                 var entry = evidence[answer] ?? AnswerEvidence()
    508                 entry.count += 1
    509                 if entry.sampleClues.count < 5 && !clue.isEmpty {
    510                     entry.sampleClues.append(clue)
    511                 }
    512                 if regexMatches(badClueRegex, clue) {
    513                     entry.badClueCount += 1
    514                 }
    515                 if regexMatches(obscureNameClueRegex, clue) {
    516                     entry.obscureNameClueCount += 1
    517                 }
    518                 if regexMatches(fillBlankRegex, clue) {
    519                     entry.fillBlankCount += 1
    520                 }
    521                 if regexMatches(foreignLanguageClueRegex, clue) {
    522                     entry.foreignLanguageClueCount += 1
    523                 }
    524                 evidence[answer] = entry
    525             }
    526         } catch {
    527             fputs("Skipping \(path.path): \(error)\n", stderr)
    528         }
    529     }
    530 
    531     return (evidence, filesRead, filesUsed)
    532 }
    533 
    534 func loadBadWords(path: String) throws -> Set<String> {
    535     let value = try loadJSONObject(from: URL(fileURLWithPath: path))
    536     guard let words = value as? [Any] else {
    537         throw WordmakeError(description: "expected JSON array")
    538     }
    539     return Set(words.compactMap { word in
    540         guard let word = word as? String else {
    541             return nil
    542         }
    543         let normalized = normalizeAnswer(word)
    544         return normalized.isEmpty ? nil : normalized
    545     })
    546 }
    547 
    548 func sortedDictionary(_ dictionary: [String: Any]) -> [String: Any] {
    549     Dictionary(uniqueKeysWithValues: dictionary.keys.sorted().map { ($0, dictionary[$0]!) })
    550 }
    551 
    552 func evidenceObject(_ entry: AnswerEvidence, rejectionReason: String?) -> [String: Any] {
    553     [
    554         "count": entry.count,
    555         "bad_clue_count": entry.badClueCount,
    556         "obscure_name_clue_count": entry.obscureNameClueCount,
    557         "fill_blank_count": entry.fillBlankCount,
    558         "foreign_language_clue_count": entry.foreignLanguageClueCount,
    559         "sample_clues": entry.sampleClues,
    560         "rejection_reason": rejectionReason as Any? ?? NSNull()
    561     ]
    562 }
    563 
    564 func rejectionObject(reason: String, entry: AnswerEvidence) -> [String: Any] {
    565     [
    566         "reason": reason,
    567         "count": entry.count,
    568         "bad_clue_count": entry.badClueCount,
    569         "obscure_name_clue_count": entry.obscureNameClueCount,
    570         "fill_blank_count": entry.fillBlankCount,
    571         "foreign_language_clue_count": entry.foreignLanguageClueCount,
    572         "sample_clues": entry.sampleClues
    573     ]
    574 }
    575 
    576 func writeJSON(_ value: Any, to path: String, pretty: Bool) throws {
    577     let url = URL(fileURLWithPath: path)
    578     let directory = url.deletingLastPathComponent()
    579     if directory.path != "." {
    580         try FileManager.default.createDirectory(at: directory, withIntermediateDirectories: true)
    581     }
    582     var options: JSONSerialization.WritingOptions = [.sortedKeys]
    583     if pretty {
    584         options.insert(.prettyPrinted)
    585     }
    586     let data = try JSONSerialization.data(withJSONObject: value, options: options)
    587     var output = data
    588     output.append(0x0A)
    589     try output.write(to: url)
    590 }
    591 
    592 func validate(_ options: Options, inputURL: URL) throws {
    593     var isDirectory: ObjCBool = false
    594     guard FileManager.default.fileExists(atPath: inputURL.path, isDirectory: &isDirectory), isDirectory.boolValue else {
    595         throw WordmakeError(description: "Input directory not found: \(options.inputPath)")
    596     }
    597     guard options.minLength >= 1 else {
    598         throw WordmakeError(description: "--min-length must be at least 1")
    599     }
    600     if let maxLength = options.maxLength, maxLength < options.minLength {
    601         throw WordmakeError(description: "--max-length must be greater than or equal to --min-length")
    602     }
    603     guard options.minCount >= 1 else {
    604         throw WordmakeError(description: "--min-count must be at least 1")
    605     }
    606     let filters = [options.lengthThresholds, options.qualityFilter, options.easyFillFilter].filter { $0 }.count
    607     guard filters <= 1 else {
    608         throw WordmakeError(description: "Choose only one of --length-thresholds, --quality-filter, or --easy-fill-filter")
    609     }
    610     if (options.rejectionsOutputPath != nil || options.qualityOutputPath != nil) && !options.easyFillFilter {
    611         throw WordmakeError(description: "--rejections-output and --quality-output require --easy-fill-filter")
    612     }
    613 }
    614 
    615 func run() throws {
    616     let options = try parseOptions(CommandLine.arguments)
    617     let inputURL = URL(fileURLWithPath: options.inputPath)
    618     try validate(options, inputURL: inputURL)
    619 
    620     let badWords: Set<String>
    621     if let badWordsPath = options.badWordsPath {
    622         do {
    623             badWords = try loadBadWords(path: badWordsPath)
    624         } catch {
    625             throw WordmakeError(description: "Could not read --bad-words \(badWordsPath): \(error)")
    626         }
    627     } else {
    628         badWords = []
    629     }
    630 
    631     var counts: [String: Int]
    632     let filesRead: Int
    633     let filesUsed: Int
    634     var words: [String]
    635     var easyRejections: [String: Any] = [:]
    636     var wordQuality: [String: Any] = [:]
    637 
    638     if options.easyFillFilter {
    639         let result = try loadAnswerEvidence(inputURL: inputURL, options: options)
    640         filesRead = result.filesRead
    641         filesUsed = result.filesUsed
    642         counts = Dictionary(uniqueKeysWithValues: result.evidence.map { ($0.key, $0.value.count) })
    643         words = []
    644 
    645         for word in result.evidence.keys.sorted() {
    646             guard let entry = result.evidence[word] else {
    647                 continue
    648             }
    649             let reason = easyRejectionReason(word: word, evidence: entry)
    650             let finalReason = badWords.contains(word) ? "bad word" : reason
    651             wordQuality[word] = evidenceObject(entry, rejectionReason: finalReason)
    652             if badWords.contains(word) {
    653                 continue
    654             }
    655             if let reason {
    656                 easyRejections[word] = rejectionObject(reason: reason, entry: entry)
    657             } else {
    658                 words.append(word)
    659             }
    660         }
    661     } else {
    662         let result = try loadAnswers(inputURL: inputURL, options: options)
    663         counts = result.counts
    664         filesRead = result.filesRead
    665         filesUsed = result.filesUsed
    666         words = counts.keys
    667             .filter { word in
    668                 !badWords.contains(word) &&
    669                     (counts[word] ?? 0) >= requiredCount(
    670                         word: word,
    671                         flatMinCount: options.minCount,
    672                         lengthThresholds: options.lengthThresholds,
    673                         qualityFilter: options.qualityFilter
    674                     )
    675             }
    676             .sorted()
    677     }
    678 
    679     try writeJSON(words, to: options.outputPath, pretty: options.pretty)
    680     if let countsOutputPath = options.countsOutputPath {
    681         try writeJSON(Dictionary(uniqueKeysWithValues: counts.keys.sorted().map { ($0, counts[$0]!) }), to: countsOutputPath, pretty: options.pretty)
    682     }
    683     if let rejectionsOutputPath = options.rejectionsOutputPath {
    684         try writeJSON(sortedDictionary(easyRejections), to: rejectionsOutputPath, pretty: options.pretty)
    685     }
    686     if let qualityOutputPath = options.qualityOutputPath {
    687         try writeJSON(sortedDictionary(wordQuality), to: qualityOutputPath, pretty: options.pretty)
    688     }
    689 
    690     print("Read \(filesRead) puzzle files")
    691     if let weekdays = options.weekdays {
    692         let selected = weekdays.sorted().map { weekdayNames[$0] }.joined(separator: ", ")
    693         print("Used \(filesUsed) \(selected) puzzle files")
    694     }
    695     print("Wrote \(words.count) unique answers to \(options.outputPath)")
    696     if options.lengthThresholds {
    697         print("Included answers using length thresholds: <5 letters >=10, 5-7 letters >=2, >7 letters >=1")
    698     } else if options.qualityFilter {
    699         print("Included answers using quality filter: <5 letters >=15, 5-7 letters >=3, >7 letters >=1")
    700     } else if options.easyFillFilter {
    701         print("Included answers using clue-aware easy fill filter")
    702         print("Rejected \(easyRejections.count) answers with easy fill filter")
    703     } else if options.minCount > 1 {
    704         print("Included answers appearing at least \(options.minCount) times")
    705     }
    706     if let badWordsPath = options.badWordsPath {
    707         print("Excluded \(badWords.count) bad words from \(badWordsPath)")
    708     }
    709     if let countsOutputPath = options.countsOutputPath {
    710         print("Wrote answer frequencies to \(countsOutputPath)")
    711     }
    712     if let rejectionsOutputPath = options.rejectionsOutputPath {
    713         print("Wrote easy-filter rejections to \(rejectionsOutputPath)")
    714     }
    715     if let qualityOutputPath = options.qualityOutputPath {
    716         print("Wrote word quality metadata to \(qualityOutputPath)")
    717     }
    718 }
    719 
    720 do {
    721     try run()
    722 } catch {
    723     fputs("Wordmake: \(error)\n", stderr)
    724     exit(1)
    725 }