main.swift (28531B)
1 import Foundation 2 3 let weekdayNames = ["Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday", "Sunday"] 4 let weekdayLookup = Dictionary(uniqueKeysWithValues: weekdayNames.enumerated().map { ($0.element.lowercased(), $0.offset) }) 5 6 let veryWeakShortWords: Set<String> = [ 7 "aah", "aal", "aar", "aba", "abe", "abo", "abu", "ado", "adz", "aer", 8 "ane", "ani", "ara", "ase", "ato", "ava", "een", "eme", "ene", "eon", 9 "ere", "ese", "ess", "est", "eta", "ete", "eth", "ier", "iii", "ile", 10 "ita", "ite", "lai", "lar", "mee", "nee", "oer", "ona", "onea", 11 "ort", "ose", "oto", "ree", "rei", "ria", "ser", "tae", "tba", "tbs", 12 "tko", "tnt", "tpe", "tsp", "ute", "wye", "xed", "xii", 13 "ance", "elee", "orle", "eder", "ssts" 14 ] 15 16 let commonShortWords: Set<String> = [ 17 "able", "acre", "act", "acts", "age", "aged", "ages", "air", "airs", 18 "ale", "ales", "all", "ally", "also", "and", "ant", "ante", "anti", 19 "ants", "ape", "apes", "area", "are", "ark", "arks", "arm", "arms", 20 "art", "arts", "ash", "ate", "aunt", "awe", "awed", "awes", "bad", 21 "bag", "bags", "bar", "bare", "bars", "base", "bash", "bat", "bats", 22 "bed", "beds", "bee", "beer", "bees", "bell", "belt", "bent", "best", 23 "bet", "bets", "bite", "blue", "bus", "but", "can", "cap", "car", 24 "care", "case", "cat", "cats", "clay", "cod", "coil", "cold", "cord", 25 "cow", "cue", "curl", "cut", "cuts", "deer", "dog", "dogs", "dry", 26 "ear", "ears", "eat", "eats", "eel", "eels", "era", "eras", "eye", 27 "eyes", "far", "farm", "fast", "fat", "feet", "few", "fire", "fish", 28 "fit", "fits", "free", "fun", "game", "gas", "gate", "get", "gets", 29 "goat", "good", "hair", "hand", "hard", "hat", "hate", "heat", "hen", 30 "her", "here", "hit", "hits", "ice", "idea", "ink", "inn", "iron", 31 "its", "kid", "kids", "knee", "land", "last", "late", "law", "lead", 32 "left", "let", "lets", "lie", "life", "line", "long", "lose", "lost", 33 "man", "many", "map", "men", "mile", "mind", "more", "name", "near", 34 "new", "note", "oar", "oars", "oil", "old", "one", "open", "ore", 35 "ores", "our", "out", "over", "own", "pan", "part", "past", "pen", 36 "pet", "pets", "pie", "pin", "pins", "play", "pot", "red", "rest", 37 "rice", "ring", "road", "root", "rose", "run", "runs", "sad", "salt", 38 "sat", "save", "sea", "seat", "see", "seed", "sees", "set", "sets", 39 "shoe", "side", "sit", "site", "sky", "snow", "son", "song", "star", 40 "step", "stop", "sun", "take", "tea", "team", "ten", "tens", "test", 41 "the", "tie", "ties", "time", "tree", "try", "use", "used", "uses", 42 "war", "way", "west", "wet", "win", "wine", "word", "work", "yard", 43 "year", "yes" 44 ] 45 46 let shortExactFragments: Set<String> = [ 47 "ance", "ence", "enne", "ette", "ible", "iest", "ism", "isms", "itis" 48 ] 49 50 let crosswordeseWords: Set<String> = [ 51 "adue", "anent", "anet", "antae", "atee", "atit", "atle", "bassi", 52 "brocatelle", "ecce", "ente", "esne", "etes", "gnar", "imset", 53 "labile", "laic", "laram", "mtida", "neer", "onor", "relee", "rete", 54 "terete", "togae", "ulee", 55 "aereo", "alai", "aretes", "aril", "atri", "aussi", "eaude", "ees", 56 "emeer", "enote", "erian", "ers", "esnes", "evoe", "ilia", "leisterer", 57 "mesne", "olea", "ooo", "oss", "ranee", "rorem", "seral", 58 "snee", "soras", "sri", "tse", "yeses", 59 "abas", "adano", "agena", "agin", "agorae", "ams", "ans", "aper", 60 "arear", "bretharte", "donees", "eloi", "endat", "etnas", "etui", 61 "hammshams", "nin", "oas", "ogees", "olan", "ont", "ossa", "otra", 62 "poetaster", "rainintheface", "rea", "retia", "roone", "rpi", "seta", 63 "sisi", "soli", "tet", "tieto", "totoe", "tra", "yser" 64 ] 65 66 let easyAllowedWords: Set<String> = [ 67 "n", "s", "e", "w", "ne", "nw", "se", "sw", 68 "nne", "nnw", "ene", "ese", "sse", "ssw", "wsw", "wnw" 69 ] 70 71 struct Options { 72 var inputPath = "Data" 73 var outputPath = "Generated/word_list.json" 74 var countsOutputPath: String? 75 var minLength = 2 76 var maxLength: Int? 77 var minCount = 1 78 var lengthThresholds = false 79 var qualityFilter = false 80 var easyFillFilter = false 81 var badWordsPath: String? 82 var rejectionsOutputPath: String? 83 var qualityOutputPath: String? 84 var weekdays: Set<Int>? 85 var pretty = false 86 } 87 88 struct AnswerEvidence { 89 var count = 0 90 var badClueCount = 0 91 var obscureNameClueCount = 0 92 var fillBlankCount = 0 93 var foreignLanguageClueCount = 0 94 var sampleClues: [String] = [] 95 } 96 97 struct WordmakeError: Error, CustomStringConvertible { 98 let description: String 99 } 100 101 let badClueRegex = try! NSRegularExpression( 102 pattern: #"\b(abbr|var|prefix|suffix|archaic|poetic|dial|slang|obs|old-style|old fashioned|scot|irish|brit|fr|french|ger|german|lat|latin|span|spanish|ital|italian|port|portuguese|hebr|hebrew|yiddish|jap|japanese|inits|initials|letters|compass point|bearing|direction|shoe width|monogram|degs|degree|of yore|old|olden|heraldry|genus|legal|in law|anatomical|serf|serfs|slave|slaves|vassal|pilaster|opera voices|persian fairy|mideast|eastern v\.?i\.?p|arab prince|arab chieftain|hindu title|indian princess|rajah|dirk|knife of old|seed covering|seed casing|seed envelope|hip bones|pelvic bones|bitter vetch|marsh birds)\b"#, 103 options: [.caseInsensitive] 104 ) 105 let obscureNameClueRegex = try! NSRegularExpression( 106 pattern: #"\b(actor|actress|author|composer|singer|poet|novelist|painter|artist|playwright|violinist|pianist|conductor|golfer|pitcher|baseballer|songwriter|politician|senator|governor)\b"#, 107 options: [.caseInsensitive] 108 ) 109 let fillBlankRegex = try! NSRegularExpression(pattern: #"_{2,}|\.{3,}"#) 110 let foreignLanguageClueRegex = try! NSRegularExpression( 111 pattern: #":\s*(?:Sp|Span|Fr|Ger|Germ|It|Ital|Lat|Port|Heb|Yid|Jap|Rus|Gr|Gk|Swed|Norw|Dan|Arab|Chin|Hind|Skt|Turk|Finn)\.|,\s+in\s+[A-Z][a-z]"# 112 ) 113 let clueNumberRegex = try! NSRegularExpression(pattern: #"^\s*\d+\.\s*"#) 114 let datePathRegex = try! NSRegularExpression(pattern: #"(\d{4})/(\d{2})/(\d{2})\.json$"#) 115 116 func printUsage() { 117 print(""" 118 Usage: Wordmake [options] 119 120 Options: 121 --input PATH Directory containing crossword JSON files. Default: Data 122 --output PATH Path for the generated JSON word list. Default: Generated/word_list.json 123 --counts-output PATH Optional path for a JSON object of normalized answer frequencies. 124 --min-length N Minimum normalized answer length to include. Default: 2 125 --max-length N Maximum normalized answer length to include. 126 --min-count N Minimum number of appearances required to include an answer. Default: 1 127 --length-thresholds Use length-based appearance thresholds. 128 --quality-filter Use stricter count thresholds intended for generated fill. 129 --easy-fill-filter Use clue-aware filters for easier generated fill. 130 --bad-words PATH Optional JSON word list to exclude from the generated dictionary. 131 --rejections-output PATH Optional path for rejected word metadata from --easy-fill-filter. 132 --quality-output PATH Optional path for per-answer quality metadata from --easy-fill-filter. 133 --weekdays LIST Optional comma-separated weekday filter, e.g. Monday,Tuesday. 134 --pretty Pretty-print generated JSON. 135 -h, --help Show this help. 136 """) 137 } 138 139 func parseOptions(_ arguments: [String]) throws -> Options { 140 var options = Options() 141 var index = 1 142 143 func requireValue(_ name: String) throws -> String { 144 guard index + 1 < arguments.count else { 145 throw WordmakeError(description: "Missing value for \(name)") 146 } 147 index += 1 148 return arguments[index] 149 } 150 151 while index < arguments.count { 152 let argument = arguments[index] 153 switch argument { 154 case "--input": 155 options.inputPath = try requireValue(argument) 156 case "--output": 157 options.outputPath = try requireValue(argument) 158 case "--counts-output": 159 options.countsOutputPath = try requireValue(argument) 160 case "--min-length": 161 guard let value = Int(try requireValue(argument)) else { 162 throw WordmakeError(description: "--min-length must be an integer") 163 } 164 options.minLength = value 165 case "--max-length": 166 guard let value = Int(try requireValue(argument)) else { 167 throw WordmakeError(description: "--max-length must be an integer") 168 } 169 options.maxLength = value 170 case "--min-count": 171 guard let value = Int(try requireValue(argument)) else { 172 throw WordmakeError(description: "--min-count must be an integer") 173 } 174 options.minCount = value 175 case "--length-thresholds": 176 options.lengthThresholds = true 177 case "--quality-filter": 178 options.qualityFilter = true 179 case "--easy-fill-filter": 180 options.easyFillFilter = true 181 case "--bad-words": 182 options.badWordsPath = try requireValue(argument) 183 case "--rejections-output": 184 options.rejectionsOutputPath = try requireValue(argument) 185 case "--quality-output": 186 options.qualityOutputPath = try requireValue(argument) 187 case "--weekdays": 188 options.weekdays = try parseWeekdays(try requireValue(argument)) 189 case "--pretty": 190 options.pretty = true 191 case "--help", "-h": 192 printUsage() 193 exit(0) 194 default: 195 throw WordmakeError(description: "Unknown argument: \(argument)") 196 } 197 index += 1 198 } 199 200 return options 201 } 202 203 func parseWeekdays(_ value: String) throws -> Set<Int> { 204 var weekdays: Set<Int> = [] 205 for rawName in value.split(separator: ",") { 206 let name = rawName.trimmingCharacters(in: .whitespacesAndNewlines).lowercased() 207 guard !name.isEmpty else { 208 continue 209 } 210 guard let weekday = weekdayLookup[name] else { 211 throw WordmakeError(description: "Unknown weekday '\(rawName)'; expected one of: \(weekdayNames.joined(separator: ", "))") 212 } 213 weekdays.insert(weekday) 214 } 215 guard !weekdays.isEmpty else { 216 throw WordmakeError(description: "--weekdays must include at least one weekday name") 217 } 218 return weekdays 219 } 220 221 func normalizeAnswer(_ text: String) -> String { 222 String(text.unicodeScalars.compactMap { scalar in 223 switch scalar.value { 224 case 65...90: 225 return Character(UnicodeScalar(scalar.value + 32)!) 226 case 97...122: 227 return Character(scalar) 228 default: 229 return nil 230 } 231 }) 232 } 233 234 func stripClueNumber(_ clue: String) -> String { 235 let range = NSRange(clue.startIndex..<clue.endIndex, in: clue) 236 return clueNumberRegex.stringByReplacingMatches(in: clue, range: range, withTemplate: "") 237 .trimmingCharacters(in: .whitespacesAndNewlines) 238 } 239 240 func regexMatches(_ regex: NSRegularExpression, _ text: String) -> Bool { 241 let range = NSRange(text.startIndex..<text.endIndex, in: text) 242 return regex.firstMatch(in: text, range: range) != nil 243 } 244 245 func jsonFiles(in directory: URL) throws -> [URL] { 246 guard let enumerator = FileManager.default.enumerator( 247 at: directory, 248 includingPropertiesForKeys: [.isRegularFileKey], 249 options: [.skipsHiddenFiles] 250 ) else { 251 return [] 252 } 253 return try enumerator.compactMap { item -> URL? in 254 guard let url = item as? URL, url.pathExtension == "json" else { 255 return nil 256 } 257 let values = try url.resourceValues(forKeys: [.isRegularFileKey]) 258 return values.isRegularFile == true ? url : nil 259 } 260 .sorted { $0.path < $1.path } 261 } 262 263 func loadJSONObject(from url: URL) throws -> Any { 264 let data = try Foundation.Data(contentsOf: url) 265 return try JSONSerialization.jsonObject(with: data) 266 } 267 268 func normalizedAnswers(from puzzle: Any) -> [String] { 269 guard let puzzle = puzzle as? [String: Any], 270 let answers = puzzle["answers"] as? [String: Any] else { 271 return [] 272 } 273 return ["across", "down"].flatMap { direction -> [String] in 274 guard let entries = answers[direction] as? [Any] else { 275 return [] 276 } 277 return entries.compactMap { entry in 278 guard let answer = entry as? String else { 279 return nil 280 } 281 let normalized = normalizeAnswer(answer) 282 return normalized.isEmpty ? nil : normalized 283 } 284 } 285 } 286 287 func normalizedEntries(from puzzle: Any) -> [(answer: String, clue: String)] { 288 guard let puzzle = puzzle as? [String: Any], 289 let answers = puzzle["answers"] as? [String: Any], 290 let clues = puzzle["clues"] as? [String: Any] else { 291 return [] 292 } 293 return ["across", "down"].flatMap { direction -> [(answer: String, clue: String)] in 294 guard let answerEntries = answers[direction] as? [Any], 295 let clueEntries = clues[direction] as? [Any] else { 296 return [] 297 } 298 return zip(answerEntries, clueEntries).compactMap { answerEntry, clueEntry in 299 guard let answer = answerEntry as? String else { 300 return nil 301 } 302 let normalized = normalizeAnswer(answer) 303 guard !normalized.isEmpty else { 304 return nil 305 } 306 let clue = stripClueNumber(clueEntry as? String ?? "") 307 return (normalized, clue) 308 } 309 } 310 } 311 312 func shouldInclude(_ word: String, minLength: Int, maxLength: Int?) -> Bool { 313 if word.count < minLength { 314 return false 315 } 316 if let maxLength, word.count > maxLength { 317 return false 318 } 319 return true 320 } 321 322 func weekdayForPath(_ url: URL) -> Int? { 323 let path = url.path.replacingOccurrences(of: "\\", with: "/") 324 let range = NSRange(path.startIndex..<path.endIndex, in: path) 325 guard let match = datePathRegex.firstMatch(in: path, range: range), 326 match.numberOfRanges == 4, 327 let yearRange = Range(match.range(at: 1), in: path), 328 let monthRange = Range(match.range(at: 2), in: path), 329 let dayRange = Range(match.range(at: 3), in: path), 330 let year = Int(path[yearRange]), 331 let month = Int(path[monthRange]), 332 let day = Int(path[dayRange]) else { 333 return nil 334 } 335 336 var components = DateComponents() 337 components.calendar = Calendar(identifier: .gregorian) 338 components.year = year 339 components.month = month 340 components.day = day 341 guard let date = components.date, 342 let calendarWeekday = components.calendar?.component(.weekday, from: date) else { 343 return nil 344 } 345 return (calendarWeekday + 5) % 7 346 } 347 348 func puzzleWeekday(_ puzzle: Any, path: URL) -> Int? { 349 if let puzzle = puzzle as? [String: Any], 350 let dow = puzzle["dow"] as? String, 351 let weekday = weekdayLookup[dow.trimmingCharacters(in: .whitespacesAndNewlines).lowercased()] { 352 return weekday 353 } 354 return weekdayForPath(path) 355 } 356 357 func shouldIncludePuzzle(_ puzzle: Any, path: URL, weekdays: Set<Int>?) -> Bool { 358 guard let weekdays else { 359 return true 360 } 361 guard let weekday = puzzleWeekday(puzzle, path: path) else { 362 return false 363 } 364 return weekdays.contains(weekday) 365 } 366 367 func requiredCount(word: String, flatMinCount: Int, lengthThresholds: Bool, qualityFilter: Bool) -> Int { 368 if qualityFilter { 369 if word.count < 5 { 370 return 15 371 } 372 if word.count > 7 { 373 return 1 374 } 375 return 3 376 } 377 guard lengthThresholds else { 378 return flatMinCount 379 } 380 if word.count < 5 { 381 return 10 382 } 383 if word.count > 7 { 384 return 1 385 } 386 return 2 387 } 388 389 func easyRequiredCount(_ word: String) -> Int { 390 if word.count <= 4 { 391 return 15 392 } 393 if word.count <= 7 { 394 return 3 395 } 396 return 1 397 } 398 399 func hasVowel(_ word: String) -> Bool { 400 word.contains { "aeiouy".contains($0) } 401 } 402 403 func clueRatio(_ part: Int, _ whole: Int) -> Double { 404 Double(part) / Double(max(whole, 1)) 405 } 406 407 func easyRejectionReason(word: String, evidence: AnswerEvidence) -> String? { 408 let length = word.count 409 let count = evidence.count 410 411 if easyAllowedWords.contains(word) { 412 return nil 413 } 414 415 if count < easyRequiredCount(word) { 416 return "count \(count) below easy threshold \(easyRequiredCount(word))" 417 } 418 419 if crosswordeseWords.contains(word) { 420 return "known crosswordese/glue fill" 421 } 422 423 if length <= 4 && commonShortWords.contains(word) { 424 return nil 425 } 426 427 if length <= 4 && veryWeakShortWords.contains(word) { 428 return "known weak short fill" 429 } 430 431 if length <= 4 && !hasVowel(word) { 432 return "short entry with no vowel" 433 } 434 435 if length <= 5 && shortExactFragments.contains(word) { 436 return "short suffix-like fragment" 437 } 438 439 let badRatio = clueRatio(evidence.badClueCount, count) 440 let obscureNameRatio = clueRatio(evidence.obscureNameClueCount, count) 441 let fillBlankRatio = clueRatio(evidence.fillBlankCount, count) 442 let foreignLanguageRatio = clueRatio(evidence.foreignLanguageClueCount, count) 443 444 if length <= 5 && 445 evidence.foreignLanguageClueCount >= 2 && 446 (count <= 10 || foreignLanguageRatio >= 0.5) { 447 return "entry mostly clued as foreign-language translation" 448 } 449 450 if length <= 4 && evidence.badClueCount >= 2 && badRatio >= 0.18 { 451 return "short entry mostly clued as abbreviation/variant/foreign/form" 452 } 453 454 if length <= 4 && evidence.obscureNameClueCount >= 4 && obscureNameRatio >= 0.35 { 455 return "short entry mostly clued as obscure name" 456 } 457 458 if length <= 4 && count < 40 && evidence.fillBlankCount >= 8 && fillBlankRatio >= 0.55 { 459 return "short entry mostly dependent on fill-in-the-blank clues" 460 } 461 462 if length <= 3 && count < 60 && (badRatio + obscureNameRatio + fillBlankRatio) >= 0.45 { 463 return "rare short entry with weak clue evidence" 464 } 465 466 return nil 467 } 468 469 func loadAnswers(inputURL: URL, options: Options) throws -> (counts: [String: Int], filesRead: Int, filesUsed: Int) { 470 var counts: [String: Int] = [:] 471 var filesRead = 0 472 var filesUsed = 0 473 474 for path in try jsonFiles(in: inputURL) { 475 do { 476 let puzzle = try loadJSONObject(from: path) 477 filesRead += 1 478 guard shouldIncludePuzzle(puzzle, path: path, weekdays: options.weekdays) else { 479 continue 480 } 481 filesUsed += 1 482 for answer in normalizedAnswers(from: puzzle) where shouldInclude(answer, minLength: options.minLength, maxLength: options.maxLength) { 483 counts[answer, default: 0] += 1 484 } 485 } catch { 486 fputs("Skipping \(path.path): \(error)\n", stderr) 487 } 488 } 489 490 return (counts, filesRead, filesUsed) 491 } 492 493 func loadAnswerEvidence(inputURL: URL, options: Options) throws -> (evidence: [String: AnswerEvidence], filesRead: Int, filesUsed: Int) { 494 var evidence: [String: AnswerEvidence] = [:] 495 var filesRead = 0 496 var filesUsed = 0 497 498 for path in try jsonFiles(in: inputURL) { 499 do { 500 let puzzle = try loadJSONObject(from: path) 501 filesRead += 1 502 guard shouldIncludePuzzle(puzzle, path: path, weekdays: options.weekdays) else { 503 continue 504 } 505 filesUsed += 1 506 for (answer, clue) in normalizedEntries(from: puzzle) where shouldInclude(answer, minLength: options.minLength, maxLength: options.maxLength) { 507 var entry = evidence[answer] ?? AnswerEvidence() 508 entry.count += 1 509 if entry.sampleClues.count < 5 && !clue.isEmpty { 510 entry.sampleClues.append(clue) 511 } 512 if regexMatches(badClueRegex, clue) { 513 entry.badClueCount += 1 514 } 515 if regexMatches(obscureNameClueRegex, clue) { 516 entry.obscureNameClueCount += 1 517 } 518 if regexMatches(fillBlankRegex, clue) { 519 entry.fillBlankCount += 1 520 } 521 if regexMatches(foreignLanguageClueRegex, clue) { 522 entry.foreignLanguageClueCount += 1 523 } 524 evidence[answer] = entry 525 } 526 } catch { 527 fputs("Skipping \(path.path): \(error)\n", stderr) 528 } 529 } 530 531 return (evidence, filesRead, filesUsed) 532 } 533 534 func loadBadWords(path: String) throws -> Set<String> { 535 let value = try loadJSONObject(from: URL(fileURLWithPath: path)) 536 guard let words = value as? [Any] else { 537 throw WordmakeError(description: "expected JSON array") 538 } 539 return Set(words.compactMap { word in 540 guard let word = word as? String else { 541 return nil 542 } 543 let normalized = normalizeAnswer(word) 544 return normalized.isEmpty ? nil : normalized 545 }) 546 } 547 548 func sortedDictionary(_ dictionary: [String: Any]) -> [String: Any] { 549 Dictionary(uniqueKeysWithValues: dictionary.keys.sorted().map { ($0, dictionary[$0]!) }) 550 } 551 552 func evidenceObject(_ entry: AnswerEvidence, rejectionReason: String?) -> [String: Any] { 553 [ 554 "count": entry.count, 555 "bad_clue_count": entry.badClueCount, 556 "obscure_name_clue_count": entry.obscureNameClueCount, 557 "fill_blank_count": entry.fillBlankCount, 558 "foreign_language_clue_count": entry.foreignLanguageClueCount, 559 "sample_clues": entry.sampleClues, 560 "rejection_reason": rejectionReason as Any? ?? NSNull() 561 ] 562 } 563 564 func rejectionObject(reason: String, entry: AnswerEvidence) -> [String: Any] { 565 [ 566 "reason": reason, 567 "count": entry.count, 568 "bad_clue_count": entry.badClueCount, 569 "obscure_name_clue_count": entry.obscureNameClueCount, 570 "fill_blank_count": entry.fillBlankCount, 571 "foreign_language_clue_count": entry.foreignLanguageClueCount, 572 "sample_clues": entry.sampleClues 573 ] 574 } 575 576 func writeJSON(_ value: Any, to path: String, pretty: Bool) throws { 577 let url = URL(fileURLWithPath: path) 578 let directory = url.deletingLastPathComponent() 579 if directory.path != "." { 580 try FileManager.default.createDirectory(at: directory, withIntermediateDirectories: true) 581 } 582 var options: JSONSerialization.WritingOptions = [.sortedKeys] 583 if pretty { 584 options.insert(.prettyPrinted) 585 } 586 let data = try JSONSerialization.data(withJSONObject: value, options: options) 587 var output = data 588 output.append(0x0A) 589 try output.write(to: url) 590 } 591 592 func validate(_ options: Options, inputURL: URL) throws { 593 var isDirectory: ObjCBool = false 594 guard FileManager.default.fileExists(atPath: inputURL.path, isDirectory: &isDirectory), isDirectory.boolValue else { 595 throw WordmakeError(description: "Input directory not found: \(options.inputPath)") 596 } 597 guard options.minLength >= 1 else { 598 throw WordmakeError(description: "--min-length must be at least 1") 599 } 600 if let maxLength = options.maxLength, maxLength < options.minLength { 601 throw WordmakeError(description: "--max-length must be greater than or equal to --min-length") 602 } 603 guard options.minCount >= 1 else { 604 throw WordmakeError(description: "--min-count must be at least 1") 605 } 606 let filters = [options.lengthThresholds, options.qualityFilter, options.easyFillFilter].filter { $0 }.count 607 guard filters <= 1 else { 608 throw WordmakeError(description: "Choose only one of --length-thresholds, --quality-filter, or --easy-fill-filter") 609 } 610 if (options.rejectionsOutputPath != nil || options.qualityOutputPath != nil) && !options.easyFillFilter { 611 throw WordmakeError(description: "--rejections-output and --quality-output require --easy-fill-filter") 612 } 613 } 614 615 func run() throws { 616 let options = try parseOptions(CommandLine.arguments) 617 let inputURL = URL(fileURLWithPath: options.inputPath) 618 try validate(options, inputURL: inputURL) 619 620 let badWords: Set<String> 621 if let badWordsPath = options.badWordsPath { 622 do { 623 badWords = try loadBadWords(path: badWordsPath) 624 } catch { 625 throw WordmakeError(description: "Could not read --bad-words \(badWordsPath): \(error)") 626 } 627 } else { 628 badWords = [] 629 } 630 631 var counts: [String: Int] 632 let filesRead: Int 633 let filesUsed: Int 634 var words: [String] 635 var easyRejections: [String: Any] = [:] 636 var wordQuality: [String: Any] = [:] 637 638 if options.easyFillFilter { 639 let result = try loadAnswerEvidence(inputURL: inputURL, options: options) 640 filesRead = result.filesRead 641 filesUsed = result.filesUsed 642 counts = Dictionary(uniqueKeysWithValues: result.evidence.map { ($0.key, $0.value.count) }) 643 words = [] 644 645 for word in result.evidence.keys.sorted() { 646 guard let entry = result.evidence[word] else { 647 continue 648 } 649 let reason = easyRejectionReason(word: word, evidence: entry) 650 let finalReason = badWords.contains(word) ? "bad word" : reason 651 wordQuality[word] = evidenceObject(entry, rejectionReason: finalReason) 652 if badWords.contains(word) { 653 continue 654 } 655 if let reason { 656 easyRejections[word] = rejectionObject(reason: reason, entry: entry) 657 } else { 658 words.append(word) 659 } 660 } 661 } else { 662 let result = try loadAnswers(inputURL: inputURL, options: options) 663 counts = result.counts 664 filesRead = result.filesRead 665 filesUsed = result.filesUsed 666 words = counts.keys 667 .filter { word in 668 !badWords.contains(word) && 669 (counts[word] ?? 0) >= requiredCount( 670 word: word, 671 flatMinCount: options.minCount, 672 lengthThresholds: options.lengthThresholds, 673 qualityFilter: options.qualityFilter 674 ) 675 } 676 .sorted() 677 } 678 679 try writeJSON(words, to: options.outputPath, pretty: options.pretty) 680 if let countsOutputPath = options.countsOutputPath { 681 try writeJSON(Dictionary(uniqueKeysWithValues: counts.keys.sorted().map { ($0, counts[$0]!) }), to: countsOutputPath, pretty: options.pretty) 682 } 683 if let rejectionsOutputPath = options.rejectionsOutputPath { 684 try writeJSON(sortedDictionary(easyRejections), to: rejectionsOutputPath, pretty: options.pretty) 685 } 686 if let qualityOutputPath = options.qualityOutputPath { 687 try writeJSON(sortedDictionary(wordQuality), to: qualityOutputPath, pretty: options.pretty) 688 } 689 690 print("Read \(filesRead) puzzle files") 691 if let weekdays = options.weekdays { 692 let selected = weekdays.sorted().map { weekdayNames[$0] }.joined(separator: ", ") 693 print("Used \(filesUsed) \(selected) puzzle files") 694 } 695 print("Wrote \(words.count) unique answers to \(options.outputPath)") 696 if options.lengthThresholds { 697 print("Included answers using length thresholds: <5 letters >=10, 5-7 letters >=2, >7 letters >=1") 698 } else if options.qualityFilter { 699 print("Included answers using quality filter: <5 letters >=15, 5-7 letters >=3, >7 letters >=1") 700 } else if options.easyFillFilter { 701 print("Included answers using clue-aware easy fill filter") 702 print("Rejected \(easyRejections.count) answers with easy fill filter") 703 } else if options.minCount > 1 { 704 print("Included answers appearing at least \(options.minCount) times") 705 } 706 if let badWordsPath = options.badWordsPath { 707 print("Excluded \(badWords.count) bad words from \(badWordsPath)") 708 } 709 if let countsOutputPath = options.countsOutputPath { 710 print("Wrote answer frequencies to \(countsOutputPath)") 711 } 712 if let rejectionsOutputPath = options.rejectionsOutputPath { 713 print("Wrote easy-filter rejections to \(rejectionsOutputPath)") 714 } 715 if let qualityOutputPath = options.qualityOutputPath { 716 print("Wrote word quality metadata to \(qualityOutputPath)") 717 } 718 } 719 720 do { 721 try run() 722 } catch { 723 fputs("Wordmake: \(error)\n", stderr) 724 exit(1) 725 }