crossmate

A collaborative crossword app for iOS
Log | Files | Refs | LICENSE

commit 4dd693c5392f7c8f48b5ef10a67fc678931338cb
parent c1c1bdca5cf80da53f9099e7294235d85eaf3578
Author: Michael Camilleri <[email protected]>
Date:   Tue, 19 May 2026 05:49:36 +0900

Rebuild puzzle generation around Fillmake, Gridmake and Pickmake

The single `Crossmake` executable that filled grids is renamed to `Fillmake`
(carrying its grid_list.json/LICENSE resources), and the SPM package now also
builds two new tools. `Gridmake` ranks grid_list.json entries by slot shape —
rewarding long slots, penalising short-slot clusters and excess slot counts —
and prints the ranking as TSV or JSON. `Pickmake` chooses a final batch from
already-filled puzzles. generate_puzzles.sh was previously git-ignored and is
now tracked under Scripts/ alongside a new select_puzzles.sh.

Fillmake's fill scoring is extended to favour cleaner, more current fill: a
per-length usage adjustment rewards common answers and punishes rare ones, a
freshness bonus keys off each answer's latest-seen year, and new concentration
and per-entry penalties push back on obscure short fill, awkward medium fill,
theme/quote fragments, novelty one-offs and phrase fragments — most applied
only past per-grid thresholds, so a handful of acceptable long or uncommon
entries still survive. The exported XD now carries Crossmate metadata:
Publisher Crossmate, CmVer 3, and Grid Index / Seed / Fill Score lines in
place of the old Source Grid date. Fillmake also gains --fill-report,
--min-fill-score and --max-answer-usages so a run can reject a weak or
answer-repeating fill on its own.

generate_puzzles.sh drives this: it builds the release binaries, selects grids
randomly by default (or from Gridmake's top-ranked set via
CROSSMAKE_GRID_SELECTION=ranked), generates EXAMPLES_PER_GRID candidates per
grid under a per-run timeout, and keeps the best-scoring fill per grid group
until the requested count is accepted.

Pickmake ranks discovered .xd candidates by their stored Fill Score (falling
back to a computed quality score), rejects any puzzle reusing an answer more
than --max-answer-usages times, then takes the requested --count while keeping
low-scoring "unusual" answers from repeating across the chosen puzzles;
--disallow-all-repeats tightens that to forbid any shared answer at all.

Wordmake records the extra evidence this relies on — clue-part, continuation
and theme clue counts plus each answer's latest-seen date — and adds curation
rejections for crosswordese, stale short/long fill, alphabet-run and
initial-plus-name artifacts, phrase fragments, and theme/continuation one-offs.

Co-Authored-By: Codex GPT 5.5 <[email protected]>

Diffstat:
MCrossmake/.gitignore | 1-
MCrossmake/Package.swift | 8++++++--
ACrossmake/Scripts/generate_puzzles.sh | 362+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
ACrossmake/Scripts/select_puzzles.sh | 40++++++++++++++++++++++++++++++++++++++++
DCrossmake/Sources/Crossmake/main.swift | 726-------------------------------------------------------------------------------
RCrossmake/Sources/Crossmake/Resources/LICENSE -> Crossmake/Sources/Fillmake/Resources/LICENSE | 0
RCrossmake/Sources/Crossmake/Resources/grid_list.json -> Crossmake/Sources/Fillmake/Resources/grid_list.json | 0
ACrossmake/Sources/Fillmake/main.swift | 1291+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
ACrossmake/Sources/Gridmake/main.swift | 279+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
ACrossmake/Sources/Pickmake/main.swift | 346+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
MCrossmake/Sources/Wordmake/main.swift | 228++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++---
11 files changed, 2544 insertions(+), 737 deletions(-)

diff --git a/Crossmake/.gitignore b/Crossmake/.gitignore @@ -3,4 +3,3 @@ DerivedData/ Data/ Generated/ -Scripts/generate_puzzles.sh diff --git a/Crossmake/Package.swift b/Crossmake/Package.swift @@ -8,16 +8,20 @@ let package = Package( .macOS(.v13) ], products: [ - .executable(name: "Crossmake", targets: ["Crossmake"]), + .executable(name: "Fillmake", targets: ["Fillmake"]), + .executable(name: "Gridmake", targets: ["Gridmake"]), + .executable(name: "Pickmake", targets: ["Pickmake"]), .executable(name: "Wordmake", targets: ["Wordmake"]) ], targets: [ .executableTarget( - name: "Crossmake", + name: "Fillmake", resources: [ .process("Resources") ] ), + .executableTarget(name: "Gridmake"), + .executableTarget(name: "Pickmake"), .executableTarget(name: "Wordmake") ] ) diff --git a/Crossmake/Scripts/generate_puzzles.sh b/Crossmake/Scripts/generate_puzzles.sh @@ -0,0 +1,362 @@ +#!/usr/bin/env bash +set -euo pipefail + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +CROSSMAKE_DIR="$(cd "${SCRIPT_DIR}/.." && pwd)" +cd "$CROSSMAKE_DIR" + +DATA_DIR="Data" +GENERATED_DIR="Generated" +GRID_LIST="Sources/Fillmake/Resources/grid_list.json" +WORD_LIST="${GENERATED_DIR}/word_list.json" +COUNTS_FILE="${GENERATED_DIR}/answer_counts.json" +QUALITY_FILE="${GENERATED_DIR}/word_quality.json" +BAD_WORDS_FILE="${DATA_DIR}/bad_words.json" +TIMEOUT_SECONDS="${CROSSMAKE_TIMEOUT_SECONDS:-10}" +EXAMPLES_PER_GRID="${CROSSMAKE_EXAMPLES_PER_GRID:-4}" +OUTPUT_PREFIX="Crossmate" +MIN_FILL_SCORE="${CROSSMAKE_MIN_FILL_SCORE:-7500}" +BREADTH="${CROSSMAKE_BREADTH:-80}" +MAX_ANSWER_USAGES="${CROSSMAKE_MAX_ANSWER_USAGES:-1}" +MAX_PARALLEL_JOBS="${CROSSMAKE_JOBS:-}" +# Use CROSSMAKE_GRID_SELECTION=ranked to sample from Gridmake's top-ranked grids. +GRID_SELECTION="${CROSSMAKE_GRID_SELECTION:-random}" +GRID_RANK_LIMIT="${CROSSMAKE_GRID_RANK_LIMIT:-250}" +FIXED_GRID_INDEX="${CROSSMAKE_GRID_INDEX:-}" +FIXED_SEED="${CROSSMAKE_SEED:-}" +TARGET_PUZZLES="${1:-10}" +FILLMAKE_EXECUTABLE=".build/release/Fillmake" +GRIDMAKE_EXECUTABLE=".build/release/Gridmake" + +if [[ ! -f "$WORD_LIST" ]]; then + echo "Word list not found: $WORD_LIST" >&2 + exit 1 +fi + +if [[ ! -f "$GRID_LIST" ]]; then + echo "Grid list not found: $GRID_LIST" >&2 + exit 1 +fi + +if [[ ! -f "$COUNTS_FILE" ]]; then + echo "Answer counts not found: $COUNTS_FILE" >&2 + exit 1 +fi + +if [[ ! -f "$QUALITY_FILE" ]]; then + echo "Word quality metadata not found: $QUALITY_FILE" >&2 + exit 1 +fi + +if [[ ! -f "$BAD_WORDS_FILE" ]]; then + echo "Bad-word list not found: $BAD_WORDS_FILE" >&2 + exit 1 +fi + +if ! command -v jq >/dev/null 2>&1; then + echo "jq is required to read $GRID_LIST" >&2 + exit 1 +fi + +if [[ -z "$MAX_PARALLEL_JOBS" ]]; then + MAX_PARALLEL_JOBS="$(sysctl -n hw.ncpu 2>/dev/null || getconf _NPROCESSORS_ONLN 2>/dev/null || echo 4)" + if ((MAX_PARALLEL_JOBS > 4)); then + MAX_PARALLEL_JOBS=4 + fi +fi + +if ! [[ "$MAX_PARALLEL_JOBS" =~ ^[0-9]+$ ]] || ((10#$MAX_PARALLEL_JOBS < 1)); then + echo "CROSSMAKE_JOBS must be a positive integer" >&2 + exit 1 +fi + +if ! [[ "$TIMEOUT_SECONDS" =~ ^[0-9]+([.][0-9]+)?$ ]]; then + echo "CROSSMAKE_TIMEOUT_SECONDS must be a positive number" >&2 + exit 1 +fi + +if ! [[ "$EXAMPLES_PER_GRID" =~ ^[0-9]+$ ]] || ((10#$EXAMPLES_PER_GRID < 1)); then + echo "CROSSMAKE_EXAMPLES_PER_GRID must be a positive integer" >&2 + exit 1 +fi + +if ! [[ "$MIN_FILL_SCORE" =~ ^-?[0-9]+$ ]]; then + echo "CROSSMAKE_MIN_FILL_SCORE must be an integer" >&2 + exit 1 +fi + +if ! [[ "$BREADTH" =~ ^[0-9]+$ ]] || ((10#$BREADTH < 1)); then + echo "CROSSMAKE_BREADTH must be a positive integer" >&2 + exit 1 +fi + +if ! [[ "$MAX_ANSWER_USAGES" =~ ^[0-9]+$ ]] || ((10#$MAX_ANSWER_USAGES < 1)); then + echo "CROSSMAKE_MAX_ANSWER_USAGES must be a positive integer" >&2 + exit 1 +fi + +if [[ "$GRID_SELECTION" != "ranked" && "$GRID_SELECTION" != "random" ]]; then + echo "CROSSMAKE_GRID_SELECTION must be ranked or random" >&2 + exit 1 +fi + +if [[ -n "$FIXED_GRID_INDEX" ]] && ! [[ "$FIXED_GRID_INDEX" =~ ^[0-9]+$ ]]; then + echo "CROSSMAKE_GRID_INDEX must be a non-negative integer" >&2 + exit 1 +fi + +if [[ -n "$FIXED_SEED" ]] && ! [[ "$FIXED_SEED" =~ ^[0-9]+$ ]]; then + echo "CROSSMAKE_SEED must be a non-negative integer" >&2 + exit 1 +fi + +if ! [[ "$GRID_RANK_LIMIT" =~ ^[0-9]+$ ]] || ((10#$GRID_RANK_LIMIT < 1)); then + echo "CROSSMAKE_GRID_RANK_LIMIT must be a positive integer" >&2 + exit 1 +fi + +if ! [[ "$TARGET_PUZZLES" =~ ^[0-9]+$ ]]; then + echo "Usage: $0 [positive-puzzle-count]" >&2 + exit 1 +fi + +target_puzzle_count=$((10#$TARGET_PUZZLES)) +if ((target_puzzle_count < 1)); then + echo "Usage: $0 [positive-puzzle-count]" >&2 + exit 1 +fi + +next_output_number() { + local highest=0 + local path filename number number_value + + for path in "${GENERATED_DIR}/${OUTPUT_PREFIX}-"*.xd; do + [[ -e "$path" ]] || continue + filename="${path##*/}" + [[ "$filename" =~ ^${OUTPUT_PREFIX}-([0-9]+)\.xd$ ]] || continue + number="${BASH_REMATCH[1]}" + number_value=$((10#$number)) + if ((number_value > highest)); then + highest="$number_value" + fi + done + + echo "$((highest + 1))" +} + +output_number="$(next_output_number)" +mkdir -p "$GENERATED_DIR" +tmp_dir="$(mktemp -d)" +trap 'rm -rf "$tmp_dir"' EXIT + +echo "Building Fillmake in release mode" +swift build -c release + +if [[ ! -x "$FILLMAKE_EXECUTABLE" ]]; then + echo "Fillmake executable not found after build: $FILLMAKE_EXECUTABLE" >&2 + exit 1 +fi + +if [[ ! -x "$GRIDMAKE_EXECUTABLE" ]]; then + echo "Gridmake executable not found after build: $GRIDMAKE_EXECUTABLE" >&2 + exit 1 +fi + +available_grid_count="$(jq 'length' "$GRID_LIST")" +grid_indices="$tmp_dir/grid-indices.txt" + +if [[ -n "$FIXED_GRID_INDEX" ]]; then + if ((10#$FIXED_GRID_INDEX >= available_grid_count)); then + echo "CROSSMAKE_GRID_INDEX ${FIXED_GRID_INDEX} is out of range; found ${available_grid_count} grids" >&2 + exit 1 + fi + echo "$FIXED_GRID_INDEX" >"$grid_indices" +elif [[ "$GRID_SELECTION" == "ranked" ]]; then + "$GRIDMAKE_EXECUTABLE" --grids "$GRID_LIST" --limit "$GRID_RANK_LIMIT" \ + | awk 'NR > 1 { print $2 }' >"$grid_indices" +else + for ((grid_index = 0; grid_index < available_grid_count; grid_index++)); do + echo "$grid_index" + done >"$grid_indices" +fi + +selection_grid_count="$(wc -l <"$grid_indices" | tr -d '[:space:]')" +if ((10#$selection_grid_count < 1)); then + echo "No grids available for selection" >&2 + exit 1 +fi + +selected_indices=() +while IFS= read -r selected_index; do + selected_indices+=("$selected_index") +done <"$grid_indices" + +random_grid_index() { + echo "${selected_indices[$((RANDOM % selection_grid_count))]}" +} + +if [[ -n "$FIXED_GRID_INDEX" ]]; then + echo "Generating ${target_puzzle_count} puzzle(s) from fixed grid ${FIXED_GRID_INDEX} with ${EXAMPLES_PER_GRID} fill attempt(s) per puzzle, ${MAX_PARALLEL_JOBS} parallel job(s), timeout ${TIMEOUT_SECONDS}s per run, breadth ${BREADTH}, min score ${MIN_FILL_SCORE}, max answer usage ${MAX_ANSWER_USAGES}" +elif [[ "$GRID_SELECTION" == "ranked" ]]; then + echo "Generating ${target_puzzle_count} puzzle(s) from the top ${selection_grid_count} ranked grid(s) with ${EXAMPLES_PER_GRID} fill attempt(s) per puzzle, ${MAX_PARALLEL_JOBS} parallel job(s), timeout ${TIMEOUT_SECONDS}s per run, breadth ${BREADTH}, min score ${MIN_FILL_SCORE}, max answer usage ${MAX_ANSWER_USAGES}" +else + echo "Generating ${target_puzzle_count} puzzle(s) from ${selection_grid_count} random grid(s) with ${EXAMPLES_PER_GRID} fill attempt(s) per puzzle, ${MAX_PARALLEL_JOBS} parallel job(s), timeout ${TIMEOUT_SECONDS}s per run, breadth ${BREADTH}, min score ${MIN_FILL_SCORE}, max answer usage ${MAX_ANSWER_USAGES}" +fi + +running_job_count() { + jobs -rp | wc -l | tr -d '[:space:]' +} + +wait_for_job_slot() { + while ((10#$(running_job_count) >= 10#$MAX_PARALLEL_JOBS)); do + sleep 1 + done +} + +candidate_index=0 + +start_candidate() { + local group_id="$1" + local grid_index="$2" + local example="$3" + local candidate_id="$4" + local seed="$5" + local output_path="${tmp_dir}/candidate-${candidate_id}.xd" + local log_path="${tmp_dir}/candidate-${candidate_id}.log" + local ok_path="${tmp_dir}/candidate-${candidate_id}.ok" + local status_path="${tmp_dir}/candidate-${candidate_id}.status" + local group_path="${tmp_dir}/candidate-${candidate_id}.group" + + candidate_outputs+=("$output_path") + echo "$group_id" >"$group_path" + echo "Generating candidate ${candidate_id} from grid index ${grid_index} (${example}/${EXAMPLES_PER_GRID}) with seed ${seed}" + + ( + set +e + "$FILLMAKE_EXECUTABLE" \ + --words "$WORD_LIST" \ + --counts "$COUNTS_FILE" \ + --quality "$QUALITY_FILE" \ + --bad-words "$BAD_WORDS_FILE" \ + --grid-index "$grid_index" \ + --seed "$seed" \ + --timeout "$TIMEOUT_SECONDS" \ + --breadth "$BREADTH" \ + --optimize-fill \ + --min-fill-score "$MIN_FILL_SCORE" \ + --max-answer-usages "$MAX_ANSWER_USAGES" \ + --output "$output_path" >"$log_path" 2>&1 + status="$?" + echo "$status" >"$status_path" + if [[ "$status" == "0" ]]; then + touch "$ok_path" + fi + ) & +} + +accepted_count=0 +rejected_count=0 +discarded_count=0 +attempted_group_count=0 + +fill_score_for_log() { + local log_path="$1" + local score + + score="$( + awk ' + /kept best fill score/ { + value = $(NF) + gsub(/\./, "", value) + print value + } + /Selected fill score:/ { + print $NF + } + ' "$log_path" | tail -1 + )" + + if [[ -n "$score" ]]; then + echo "$score" + else + echo "-999999999" + fi +} + +run_grid_group() { + local group_id="$1" + local grid_index="$2" + best_candidate_id="" + best_output_path="" + best_score="-999999999" + candidate_outputs=() + + echo "Starting grid group ${group_id} from grid index ${grid_index}" + + for ((example = 1; example <= EXAMPLES_PER_GRID; example++)); do + wait_for_job_slot + candidate_index=$((candidate_index + 1)) + if [[ -n "$FIXED_SEED" ]]; then + seed="$FIXED_SEED" + else + seed="$(($(date +%s) * 1000000 + candidate_index * 1000 + RANDOM))" + fi + start_candidate "$group_id" "$grid_index" "$example" "$candidate_index" "$seed" + done + + wait + + for output_path in "${candidate_outputs[@]}"; do + candidate_id="${output_path##*/candidate-}" + candidate_id="${candidate_id%.xd}" + group_path="${tmp_dir}/candidate-${candidate_id}.group" + ok_path="${tmp_dir}/candidate-${candidate_id}.ok" + log_path="${tmp_dir}/candidate-${candidate_id}.log" + + [[ -f "$group_path" ]] || continue + [[ "$(cat "$group_path")" == "$group_id" ]] || continue + [[ -f "$ok_path" ]] || continue + + score="$(fill_score_for_log "$log_path")" + if ((score > best_score)); then + best_score="$score" + best_candidate_id="$candidate_id" + best_output_path="$output_path" + fi + done + + if [[ -n "$best_candidate_id" ]]; then + printf -v final_output_path "%s/%s-%04d.xd" "$GENERATED_DIR" "$OUTPUT_PREFIX" "$output_number" + mv "$best_output_path" "$final_output_path" + echo "Wrote ${final_output_path} from candidate ${best_candidate_id}" + grep -E "Searched .* seconds|Selected fill score:" "${tmp_dir}/candidate-${best_candidate_id}.log" || true + output_number=$((output_number + 1)) + accepted_count=$((accepted_count + 1)) + else + echo "No accepted fill for grid group ${group_id}." >&2 + fi + + for output_path in "${candidate_outputs[@]}"; do + candidate_id="${output_path##*/candidate-}" + candidate_id="${candidate_id%.xd}" + log_path="${tmp_dir}/candidate-${candidate_id}.log" + ok_path="${tmp_dir}/candidate-${candidate_id}.ok" + + if [[ -f "$ok_path" ]]; then + if [[ -e "$output_path" ]]; then + discarded_count=$((discarded_count + 1)) + fi + else + rejected_count=$((rejected_count + 1)) + echo "Rejected candidate ${candidate_id}:" >&2 + tail -3 "$log_path" >&2 || true + fi + done +} + +while ((accepted_count < target_puzzle_count)); do + attempted_group_count=$((attempted_group_count + 1)) + run_grid_group "$attempted_group_count" "$(random_grid_index)" +done + +echo "Accepted ${accepted_count} puzzle(s) after ${attempted_group_count} grid group(s); discarded ${discarded_count} lower-scoring accepted fill(s); rejected ${rejected_count}." diff --git a/Crossmake/Scripts/select_puzzles.sh b/Crossmake/Scripts/select_puzzles.sh @@ -0,0 +1,40 @@ +#!/usr/bin/env bash +set -euo pipefail + +COUNT="${1:-10}" +CANDIDATE_DIR="${2:-Generated}" +PICKMAKE_EXECUTABLE=".build/release/Pickmake" +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +CROSSMAKE_DIR="$(cd "${SCRIPT_DIR}/.." && pwd)" +CALLER_DIR="$(pwd)" + +if ! [[ "$COUNT" =~ ^[0-9]+$ ]] || ((10#$COUNT < 1)); then + echo "Usage: $0 [positive-puzzle-count] [candidate-dir-or-xd-file ...]" >&2 + exit 1 +fi + +shift || true +inputs=() +if (($# == 0)); then + inputs+=("${CROSSMAKE_DIR}/${CANDIDATE_DIR}") +else + for input in "$@"; do + if [[ "$input" = /* ]]; then + inputs+=("$input") + else + inputs+=("${CALLER_DIR}/${input}") + fi + done +fi + +cd "$CROSSMAKE_DIR" + +echo "Building Pickmake in release mode" >&2 +swift build -c release --product Pickmake + +if [[ ! -x "$PICKMAKE_EXECUTABLE" ]]; then + echo "Pickmake executable not found after build: $PICKMAKE_EXECUTABLE" >&2 + exit 1 +fi + +"$PICKMAKE_EXECUTABLE" --count "$COUNT" --verbose "${inputs[@]}" diff --git a/Crossmake/Sources/Crossmake/main.swift b/Crossmake/Sources/Crossmake/main.swift @@ -1,726 +0,0 @@ -import Foundation - -let gridSize = 15 -let letters = Array("ABCDEFGHIJKLMNOPQRSTUVWXYZ".utf8) -let allLettersMask: UInt32 = (1 << 26) - 1 -let letterWeights: [UInt8: Int] = [ - 65: 9, 66: 2, 67: 2, 68: 4, 69: 12, 70: 2, 71: 3, 72: 2, 73: 9, - 74: 1, 75: 1, 76: 4, 77: 2, 78: 6, 79: 8, 80: 2, 81: 1, 82: 6, - 83: 4, 84: 6, 85: 4, 86: 2, 87: 2, 88: 1, 89: 2, 90: 1 -] - -struct Options { - var wordsPath = "Generated/word_list.json" - var gridsPath: String? - var countsPath = "Generated/answer_counts.json" - var qualityPath = "Generated/word_quality.json" - var badWordsPath = "Data/bad_words.json" - var outputPath = "Generated/crossmake.xd" - var gridIndex = 1 - var gridDate: String? - var seed: UInt64 = UInt64(Date().timeIntervalSince1970) - var timeout: TimeInterval = 30 - var breadth = 15 - var title = "Crossmake Puzzle" - var author = "Crossmake" - var optimizeFill = false -} - -struct GridEntry: Decodable { - let grid: [String] - let date: String -} - -struct Slot { - enum Direction { - case across - case down - } - - let id: Int - let cells: [Int] - let direction: Direction -} - -struct Word { - let text: String - let bytes: [UInt8] - let score: Int -} - -struct WordQuality: Decodable { - let count: Int? - let badClueCount: Int? - let obscureNameClueCount: Int? - let fillBlankCount: Int? - let rejectionReason: String? - - enum CodingKeys: String, CodingKey { - case count - case badClueCount = "bad_clue_count" - case obscureNameClueCount = "obscure_name_clue_count" - case fillBlankCount = "fill_blank_count" - case rejectionReason = "rejection_reason" - } -} - -struct Dictionary { - let wordsByLength: [Int: [Word]] - let scoresByText: [String: Int] -} - -struct PuzzleState { - var board: [UInt8?] - var options: [UInt32] -} - -struct GeneratorError: Error, CustomStringConvertible { - let description: String -} - -struct RandomNumberGenerator64: RandomNumberGenerator { - private var state: UInt64 - - init(seed: UInt64) { - self.state = seed == 0 ? 0x9E3779B97F4A7C15 : seed - } - - mutating func next() -> UInt64 { - state &+= 0x9E3779B97F4A7C15 - var z = state - z = (z ^ (z >> 30)) &* 0xBF58476D1CE4E5B9 - z = (z ^ (z >> 27)) &* 0x94D049BB133111EB - return z ^ (z >> 31) - } -} - -func parseOptions(_ arguments: [String]) throws -> Options { - var options = Options() - var index = 1 - - func requireValue(_ name: String) throws -> String { - guard index + 1 < arguments.count else { - throw GeneratorError(description: "Missing value for \(name)") - } - index += 1 - return arguments[index] - } - - while index < arguments.count { - let arg = arguments[index] - switch arg { - case "--words": - options.wordsPath = try requireValue(arg) - case "--grids": - options.gridsPath = try requireValue(arg) - case "--counts": - options.countsPath = try requireValue(arg) - case "--quality": - options.qualityPath = try requireValue(arg) - case "--bad-words": - options.badWordsPath = try requireValue(arg) - case "--output", "-o": - options.outputPath = try requireValue(arg) - case "--grid-index": - let value = try requireValue(arg) - guard let parsed = Int(value), parsed >= 0 else { - throw GeneratorError(description: "--grid-index must be a non-negative integer") - } - options.gridIndex = parsed - case "--grid-date": - options.gridDate = try requireValue(arg) - case "--seed": - let value = try requireValue(arg) - guard let parsed = UInt64(value) else { - throw GeneratorError(description: "--seed must be an unsigned integer") - } - options.seed = parsed - case "--timeout": - let value = try requireValue(arg) - guard let parsed = Double(value), parsed > 0 else { - throw GeneratorError(description: "--timeout must be a positive number of seconds") - } - options.timeout = parsed - case "--breadth": - let value = try requireValue(arg) - guard let parsed = Int(value), parsed > 0 else { - throw GeneratorError(description: "--breadth must be a positive integer") - } - options.breadth = parsed - case "--title": - options.title = try requireValue(arg) - case "--author": - options.author = try requireValue(arg) - case "--optimize-fill": - options.optimizeFill = true - case "--help", "-h": - printUsage() - exit(0) - default: - throw GeneratorError(description: "Unknown argument: \(arg)") - } - index += 1 - } - - return options -} - -func printUsage() { - print(""" - Usage: Crossmake [options] - - Options: - -o, --output PATH Output XD file path. Default: Generated/crossmake.xd - --words PATH Word list JSON path. Default: Generated/word_list.json - --grids PATH Grid list JSON path. Default: bundled grid_list.json - --counts PATH Optional answer frequency JSON path. Default: Generated/answer_counts.json - --quality PATH Optional word quality metadata JSON path. Default: Generated/word_quality.json - --bad-words PATH Optional JSON list of blocked words. Default: Data/bad_words.json - --grid-index N Zero-based grid index. Default: 1 - --grid-date YYYY-MM-DD Select the first grid with this date. - --seed N Deterministic random seed. - --timeout SECONDS Stop after this many seconds. Default: 30 - --breadth N Randomly try from the top N scored candidates. Default: 15 - --title TEXT XD title metadata. - --author TEXT XD author metadata. - --optimize-fill Keep searching until timeout and return the best-scored fill found. - -h, --help Show this help. - """) -} - -func loadJSON<T: Decodable>(_ type: T.Type, from path: String) throws -> T { - let url = URL(fileURLWithPath: path) - let data = try Data(contentsOf: url) - return try JSONDecoder().decode(T.self, from: data) -} - -func loadOptionalJSON<T: Decodable>(_ type: T.Type, from path: String) throws -> T? { - guard FileManager.default.fileExists(atPath: path) else { - return nil - } - return try loadJSON(type, from: path) -} - -func loadBundledJSON<T: Decodable>(_ type: T.Type, resource: String, extension resourceExtension: String) throws -> T { - guard let url = Bundle.module.url(forResource: resource, withExtension: resourceExtension) else { - throw GeneratorError(description: "Bundled resource not found: \(resource).\(resourceExtension)") - } - let data = try Data(contentsOf: url) - return try JSONDecoder().decode(T.self, from: data) -} - -func normalizedGridDate(_ value: String) -> String { - let parts = value.split(separator: "/") - if parts.count == 3, - let month = Int(parts[0]), - let day = Int(parts[1]), - let year = Int(parts[2]) { - return String(format: "%04d-%02d-%02d", year, month, day) - } - return value -} - -func vowelCount(_ bytes: [UInt8]) -> Int { - bytes.filter { $0 == 65 || $0 == 69 || $0 == 73 || $0 == 79 || $0 == 85 }.count -} - -func hasRunOfThree(_ bytes: [UInt8]) -> Bool { - guard bytes.count >= 3 else { - return false - } - for index in 2..<bytes.count where bytes[index] == bytes[index - 1] && bytes[index] == bytes[index - 2] { - return true - } - return false -} - -func qualityScore(text: String, bytes: [UInt8], count: Int?, quality: WordQuality?) -> Int { - let length = bytes.count - let frequency = count ?? 0 - var score = length * 12 - - if frequency > 0 { - score += min(80, frequency * 4) - } else { - score -= length >= 8 ? 30 : 80 - } - - if length >= 8 { - score += min(80, (length - 7) * 14) - } - if (5...7).contains(length) && frequency <= 2 { - score -= 70 - } - if length == 4 && frequency < 10 { - score -= 60 - } - if length <= 3 && frequency < 25 { - score -= 80 - } - - let vowels = vowelCount(bytes) - if vowels == 0 { - score -= 90 - } else if length >= 5 && Double(vowels) / Double(length) < 0.22 { - score -= 35 - } - if hasRunOfThree(bytes) { - score -= 110 - } - - if let quality { - let evidenceCount = max(quality.count ?? frequency, 1) - let badRatio = Double(quality.badClueCount ?? 0) / Double(evidenceCount) - let obscureRatio = Double(quality.obscureNameClueCount ?? 0) / Double(evidenceCount) - let blankRatio = Double(quality.fillBlankCount ?? 0) / Double(evidenceCount) - - if quality.rejectionReason != nil { - score -= length >= 8 ? 80 : 140 - } - if length <= 4 { - score -= Int(badRatio * 100) - score -= Int(obscureRatio * 70) - score -= Int(blankRatio * 45) - } else if length <= 7 { - score -= Int(badRatio * 45) - score -= Int(obscureRatio * 35) - score -= Int(blankRatio * 20) - } else { - score -= Int(badRatio * 20) - score -= Int(obscureRatio * 15) - } - } - - let letterScore = bytes.reduce(0) { $0 + (letterWeights[$1] ?? 0) } - return score + letterScore -} - -func loadDictionary(path: String, countsPath: String, qualityPath: String, badWordsPath: String) throws -> Dictionary { - let rawWords = try loadJSON([String].self, from: path) - let counts = try loadOptionalJSON([String: Int].self, from: countsPath) ?? [:] - let quality = try loadOptionalJSON([String: WordQuality].self, from: qualityPath) ?? [:] - let badWords = Set((try loadOptionalJSON([String].self, from: badWordsPath) ?? []).map { $0.uppercased() }) - var seen = Set<String>() - var grouped: [Int: [Word]] = [:] - var scoresByText: [String: Int] = [:] - - for rawWord in rawWords { - let upper = rawWord.uppercased() - let bytes = Array(upper.utf8) - guard (2...gridSize).contains(bytes.count), bytes.allSatisfy({ $0 >= 65 && $0 <= 90 }) else { - continue - } - guard !badWords.contains(upper) else { - continue - } - guard seen.insert(upper).inserted else { - continue - } - let key = upper.lowercased() - let score = qualityScore(text: upper, bytes: bytes, count: counts[key], quality: quality[key]) - grouped[bytes.count, default: []].append(Word(text: upper, bytes: bytes, score: score)) - scoresByText[upper] = score - } - - for length in grouped.keys { - grouped[length]?.sort { - if $0.score != $1.score { - return $0.score > $1.score - } - return $0.text < $1.text - } - } - - return Dictionary(wordsByLength: grouped, scoresByText: scoresByText) -} - -func loadGrid(options: Options) throws -> ([Bool], String) { - let grids: [GridEntry] - if let gridsPath = options.gridsPath { - grids = try loadJSON([GridEntry].self, from: gridsPath) - } else { - grids = try loadBundledJSON([GridEntry].self, resource: "grid_list", extension: "json") - } - - let entry: GridEntry - if let date = options.gridDate { - let normalizedDate = normalizedGridDate(date) - guard let match = grids.first(where: { $0.date == normalizedDate }) else { - throw GeneratorError(description: "No grid found for date \(date)") - } - entry = match - } else { - guard options.gridIndex < grids.count else { - throw GeneratorError(description: "--grid-index \(options.gridIndex) is out of range; found \(grids.count) grids") - } - entry = grids[options.gridIndex] - } - - guard entry.grid.count == gridSize * gridSize else { - throw GeneratorError(description: "Grid \(entry.date) has \(entry.grid.count) cells, expected \(gridSize * gridSize)") - } - - return (entry.grid.map { $0 == "." }, entry.date) -} - -func buildSlots(black: [Bool]) -> [Slot] { - var slots: [Slot] = [] - - for row in 0..<gridSize { - var column = 0 - while column < gridSize { - let cell = row * gridSize + column - if black[cell] { - column += 1 - continue - } - var cells: [Int] = [] - while column < gridSize && !black[row * gridSize + column] { - cells.append(row * gridSize + column) - column += 1 - } - if cells.count > 1 { - slots.append(Slot(id: slots.count, cells: cells, direction: .across)) - } - } - } - - for column in 0..<gridSize { - var row = 0 - while row < gridSize { - let cell = row * gridSize + column - if black[cell] { - row += 1 - continue - } - var cells: [Int] = [] - while row < gridSize && !black[row * gridSize + column] { - cells.append(row * gridSize + column) - row += 1 - } - if cells.count > 1 { - slots.append(Slot(id: slots.count, cells: cells, direction: .down)) - } - } - } - - return slots -} - -func bit(for letter: UInt8) -> UInt32 { - 1 << UInt32(letter - 65) -} - -func popcount(_ mask: UInt32) -> Int { - Int(mask.nonzeroBitCount) -} - -func allowedLetters(from mask: UInt32) -> [UInt8] { - letters.filter { mask & bit(for: $0) != 0 } -} - -func matches(_ word: Word, slot: Slot, state: PuzzleState) -> Bool { - for (offset, cell) in slot.cells.enumerated() { - let char = word.bytes[offset] - if let existing = state.board[cell], existing != char { - return false - } - if state.options[cell] & bit(for: char) == 0 { - return false - } - } - return true -} - -func candidates(for slot: Slot, state: PuzzleState, dictionary: Dictionary) -> [Word] { - (dictionary.wordsByLength[slot.cells.count] ?? []).filter { matches($0, slot: slot, state: state) } -} - -func propagate(_ input: PuzzleState, slots: [Slot], dictionary: Dictionary) -> PuzzleState? { - var state = input - var changed = true - - while changed { - changed = false - for slot in slots { - let words = candidates(for: slot, state: state, dictionary: dictionary) - if words.isEmpty { - return nil - } - - var allowed = Array(repeating: UInt32(0), count: slot.cells.count) - for word in words { - for (offset, char) in word.bytes.enumerated() { - allowed[offset] |= bit(for: char) - } - } - - for (offset, cell) in slot.cells.enumerated() { - let next = state.options[cell] & allowed[offset] - if next == 0 { - return nil - } - if next != state.options[cell] { - state.options[cell] = next - changed = true - } - if popcount(next) == 1, state.board[cell] == nil { - state.board[cell] = allowedLetters(from: next)[0] - changed = true - } - } - } - } - - return state -} - -func isSolved(_ state: PuzzleState, black: [Bool]) -> Bool { - for cell in 0..<black.count where !black[cell] && state.board[cell] == nil { - return false - } - return true -} - -func bestSlot(in state: PuzzleState, slots: [Slot], dictionary: Dictionary) -> (Slot, [Word])? { - var best: (Slot, [Word])? - - for slot in slots where slot.cells.contains(where: { state.board[$0] == nil }) { - let words = candidates(for: slot, state: state, dictionary: dictionary) - if words.isEmpty { - return (slot, []) - } - if let current = best { - if words.count < current.1.count || (words.count == current.1.count && slot.cells.count > current.0.cells.count) { - best = (slot, words) - } - } else { - best = (slot, words) - } - } - - return best -} - -func candidateOrder(_ words: [Word], breadth: Int, rng: inout RandomNumberGenerator64) -> [Word] { - let prefix = Array(words.prefix(max(1, min(breadth, words.count)))).shuffled(using: &rng) - if words.count <= prefix.count { - return prefix - } - return prefix + words.dropFirst(prefix.count) -} - -func placing(_ word: Word, in slot: Slot, state: PuzzleState) -> PuzzleState? { - var next = state - for (offset, cell) in slot.cells.enumerated() { - let char = word.bytes[offset] - if let existing = next.board[cell], existing != char { - return nil - } - next.board[cell] = char - next.options[cell] = bit(for: char) - } - return next -} - -func solve( - black: [Bool], - slots: [Slot], - dictionary: Dictionary, - timeout: TimeInterval, - breadth: Int, - optimizeFill: Bool, - rng: inout RandomNumberGenerator64 -) -> PuzzleState? { - var initial = PuzzleState( - board: Array(repeating: nil, count: gridSize * gridSize), - options: black.map { $0 ? 0 : allLettersMask } - ) - for cell in 0..<black.count where black[cell] { - initial.board[cell] = nil - } - - let deadline = Date().addingTimeInterval(timeout) - var attempts = 0 - var bestSolved: PuzzleState? - var bestSolvedScore = Int.min - - func search(_ state: PuzzleState) -> PuzzleState? { - if Date() >= deadline { - return nil - } - attempts += 1 - - guard let propagated = propagate(state, slots: slots, dictionary: dictionary) else { - return nil - } - if isSolved(propagated, black: black) { - if optimizeFill { - let score = fillScore(state: propagated, slots: slots, dictionary: dictionary) - if score > bestSolvedScore { - bestSolved = propagated - bestSolvedScore = score - } - return nil - } - return propagated - } - guard let (slot, slotCandidates) = bestSlot(in: propagated, slots: slots, dictionary: dictionary), !slotCandidates.isEmpty else { - return nil - } - - for word in candidateOrder(slotCandidates, breadth: breadth, rng: &rng) { - guard let next = placing(word, in: slot, state: propagated) else { - continue - } - if let solved = search(next) { - return solved - } - } - - return nil - } - - let result = search(initial) - if let result { - return result - } - if let bestSolved { - fputs("Searched \(attempts) nodes and kept best fill score \(bestSolvedScore).\n", stderr) - return bestSolved - } - if result == nil { - fputs("Stopped after \(attempts) search nodes.\n", stderr) - } - return nil -} - -func clueNumberGrid(black: [Bool]) -> [Int?] { - var numbers = Array<Int?>(repeating: nil, count: gridSize * gridSize) - var nextNumber = 1 - - for row in 0..<gridSize { - for column in 0..<gridSize { - let cell = row * gridSize + column - if black[cell] { - continue - } - let startsAcross = (column == 0 || black[cell - 1]) && column + 1 < gridSize && !black[cell + 1] - let startsDown = (row == 0 || black[cell - gridSize]) && row + 1 < gridSize && !black[cell + gridSize] - if startsAcross || startsDown { - numbers[cell] = nextNumber - nextNumber += 1 - } - } - } - - return numbers -} - -func wordText(for slot: Slot, state: PuzzleState) -> String { - String(bytes: slot.cells.map { state.board[$0] ?? 63 }, encoding: .utf8) ?? "" -} - -func fillScore(state: PuzzleState, slots: [Slot], dictionary: Dictionary) -> Int { - var score = 0 - var seen = Set<String>() - - for slot in slots { - let word = wordText(for: slot, state: state) - score += dictionary.scoresByText[word] ?? 0 - if !seen.insert(word).inserted { - score -= 500 - } - } - - return score -} - -func exportXD(state: PuzzleState, black: [Bool], slots: [Slot], options: Options, gridDate: String) -> String { - let numbers = clueNumberGrid(black: black) - var lines: [String] = [ - "Title: \(options.title)", - "CmVer: 1", - "Author: \(options.author)", - "Publisher: Crossmake", - "Date: \(ISO8601DateFormatter().string(from: Date()).prefix(10))", - "Copyright: Generated", - "Source Grid: \(gridDate)", - "" - ] - - for row in 0..<gridSize { - var rowBytes: [UInt8] = [] - for column in 0..<gridSize { - let cell = row * gridSize + column - rowBytes.append(black[cell] ? 35 : (state.board[cell] ?? 63)) - } - lines.append(String(bytes: rowBytes, encoding: .utf8) ?? "") - } - - lines.append("") - lines.append("") - - let across = slots - .filter { $0.direction == .across } - .sorted { (numbers[$0.cells[0]] ?? 0) < (numbers[$1.cells[0]] ?? 0) } - let down = slots - .filter { $0.direction == .down } - .sorted { (numbers[$0.cells[0]] ?? 0) < (numbers[$1.cells[0]] ?? 0) } - - for slot in across { - let number = numbers[slot.cells[0]] ?? 0 - lines.append("A\(number). [No clue] ~ \(wordText(for: slot, state: state))") - } - - lines.append("") - - for slot in down { - let number = numbers[slot.cells[0]] ?? 0 - lines.append("D\(number). [No clue] ~ \(wordText(for: slot, state: state))") - } - - lines.append("") - return lines.joined(separator: "\n") -} - -func run() throws { - let options = try parseOptions(CommandLine.arguments) - let dictionary = try loadDictionary( - path: options.wordsPath, - countsPath: options.countsPath, - qualityPath: options.qualityPath, - badWordsPath: options.badWordsPath - ) - let (black, gridDate) = try loadGrid(options: options) - let slots = buildSlots(black: black) - var rng = RandomNumberGenerator64(seed: options.seed) - - guard let state = solve( - black: black, - slots: slots, - dictionary: dictionary, - timeout: options.timeout, - breadth: options.breadth, - optimizeFill: options.optimizeFill, - rng: &rng - ) else { - throw GeneratorError(description: "Could not fill the grid within \(options.timeout) seconds. Try a different --seed, --grid-index, or --breadth.") - } - - let xd = exportXD(state: state, black: black, slots: slots, options: options, gridDate: gridDate) - let outputURL = URL(fileURLWithPath: options.outputPath) - let outputDirectory = outputURL.deletingLastPathComponent() - if outputDirectory.path != "." { - try FileManager.default.createDirectory(at: outputDirectory, withIntermediateDirectories: true) - } - try xd.write(to: outputURL, atomically: true, encoding: .utf8) - print("Wrote \(options.outputPath)") -} - -do { - try run() -} catch { - fputs("Crossmake: \(error)\n", stderr) - exit(1) -} diff --git a/Crossmake/Sources/Crossmake/Resources/LICENSE b/Crossmake/Sources/Fillmake/Resources/LICENSE diff --git a/Crossmake/Sources/Crossmake/Resources/grid_list.json b/Crossmake/Sources/Fillmake/Resources/grid_list.json diff --git a/Crossmake/Sources/Fillmake/main.swift b/Crossmake/Sources/Fillmake/main.swift @@ -0,0 +1,1291 @@ +import Foundation + +let gridSize = 15 +let letters = Array("ABCDEFGHIJKLMNOPQRSTUVWXYZ".utf8) +let allLettersMask: UInt32 = (1 << 26) - 1 +let letterWeights: [UInt8: Int] = [ + 65: 9, 66: 2, 67: 2, 68: 4, 69: 12, 70: 2, 71: 3, 72: 2, 73: 9, + 74: 1, 75: 1, 76: 4, 77: 2, 78: 6, 79: 8, 80: 2, 81: 1, 82: 6, + 83: 4, 84: 6, 85: 4, 86: 2, 87: 2, 88: 1, 89: 2, 90: 1 +] + +struct Options { + var wordsPath = "Generated/word_list.json" + var gridsPath: String? + var countsPath = "Generated/answer_counts.json" + var qualityPath = "Generated/word_quality.json" + var badWordsPath = "Data/bad_words.json" + var outputPath = "Generated/fillmake.xd" + var gridIndex = 1 + var gridDate: String? + var seed: UInt64 = UInt64(Date().timeIntervalSince1970) + var timeout: TimeInterval = 30 + var breadth = 15 + var title = "Fillmake Puzzle" + var author = "Fillmake" + var optimizeFill = false + var fillReport = false + var minFillScore: Int? + var maxAnswerUsages = 2 +} + +struct GridEntry: Decodable { + let grid: [String] + let date: String +} + +struct Slot { + enum Direction { + case across + case down + } + + let id: Int + let cells: [Int] + let direction: Direction +} + +struct Word { + let text: String + let bytes: [UInt8] + let score: Int +} + +struct WordBucket { + let words: [Word] + let indexesByPositionAndLetter: [[UInt8: [Int]]] +} + +struct WordQuality: Decodable { + let count: Int? + let badClueCount: Int? + let obscureNameClueCount: Int? + let fillBlankCount: Int? + let foreignLanguageClueCount: Int? + let cluePartCount: Int? + let continuationClueCount: Int? + let themeClueCount: Int? + let latestSeen: String? + let sampleClues: [String]? + let rejectionReason: String? + + enum CodingKeys: String, CodingKey { + case count + case badClueCount = "bad_clue_count" + case obscureNameClueCount = "obscure_name_clue_count" + case fillBlankCount = "fill_blank_count" + case foreignLanguageClueCount = "foreign_language_clue_count" + case cluePartCount = "clue_part_count" + case continuationClueCount = "continuation_clue_count" + case themeClueCount = "theme_clue_count" + case latestSeen = "latest_seen" + case sampleClues = "sample_clues" + case rejectionReason = "rejection_reason" + } +} + +struct WordProfile { + let text: String + let length: Int + let score: Int + let count: Int + let badClueCount: Int + let obscureNameClueCount: Int + let fillBlankCount: Int + let foreignLanguageClueCount: Int + let cluePartCount: Int + let continuationClueCount: Int + let themeClueCount: Int + let latestSeen: String? + let sampleClues: [String] + let freshnessScore: Int + let rejectionReason: String? +} + +struct Dictionary { + let bucketsByLength: [Int: WordBucket] + let scoresByText: [String: Int] + let profilesByText: [String: WordProfile] +} + +struct PuzzleState { + var board: [UInt8?] + var options: [UInt32] +} + +struct GeneratorError: Error, CustomStringConvertible { + let description: String +} + +struct RandomNumberGenerator64: RandomNumberGenerator { + private var state: UInt64 + + init(seed: UInt64) { + self.state = seed == 0 ? 0x9E3779B97F4A7C15 : seed + } + + mutating func next() -> UInt64 { + state &+= 0x9E3779B97F4A7C15 + var z = state + z = (z ^ (z >> 30)) &* 0xBF58476D1CE4E5B9 + z = (z ^ (z >> 27)) &* 0x94D049BB133111EB + return z ^ (z >> 31) + } +} + +func parseOptions(_ arguments: [String]) throws -> Options { + var options = Options() + var index = 1 + + func requireValue(_ name: String) throws -> String { + guard index + 1 < arguments.count else { + throw GeneratorError(description: "Missing value for \(name)") + } + index += 1 + return arguments[index] + } + + while index < arguments.count { + let arg = arguments[index] + switch arg { + case "--words": + options.wordsPath = try requireValue(arg) + case "--grids": + options.gridsPath = try requireValue(arg) + case "--counts": + options.countsPath = try requireValue(arg) + case "--quality": + options.qualityPath = try requireValue(arg) + case "--bad-words": + options.badWordsPath = try requireValue(arg) + case "--output", "-o": + options.outputPath = try requireValue(arg) + case "--grid-index": + let value = try requireValue(arg) + guard let parsed = Int(value), parsed >= 0 else { + throw GeneratorError(description: "--grid-index must be a non-negative integer") + } + options.gridIndex = parsed + case "--grid-date": + options.gridDate = try requireValue(arg) + case "--seed": + let value = try requireValue(arg) + guard let parsed = UInt64(value) else { + throw GeneratorError(description: "--seed must be an unsigned integer") + } + options.seed = parsed + case "--timeout": + let value = try requireValue(arg) + guard let parsed = Double(value), parsed > 0 else { + throw GeneratorError(description: "--timeout must be a positive number of seconds") + } + options.timeout = parsed + case "--breadth": + let value = try requireValue(arg) + guard let parsed = Int(value), parsed > 0 else { + throw GeneratorError(description: "--breadth must be a positive integer") + } + options.breadth = parsed + case "--title": + options.title = try requireValue(arg) + case "--author": + options.author = try requireValue(arg) + case "--optimize-fill": + options.optimizeFill = true + case "--fill-report": + options.fillReport = true + case "--min-fill-score": + let value = try requireValue(arg) + guard let parsed = Int(value) else { + throw GeneratorError(description: "--min-fill-score must be an integer") + } + options.minFillScore = parsed + case "--max-answer-usages": + let value = try requireValue(arg) + guard let parsed = Int(value), parsed > 0 else { + throw GeneratorError(description: "--max-answer-usages must be a positive integer") + } + options.maxAnswerUsages = parsed + case "--help", "-h": + printUsage() + exit(0) + default: + throw GeneratorError(description: "Unknown argument: \(arg)") + } + index += 1 + } + + return options +} + +func printUsage() { + print(""" + Usage: Fillmake [options] + + Options: + -o, --output PATH Output XD file path. Default: Generated/fillmake.xd + --words PATH Word list JSON path. Default: Generated/word_list.json + --grids PATH Grid list JSON path. Default: bundled grid_list.json + --counts PATH Optional answer frequency JSON path. Default: Generated/answer_counts.json + --quality PATH Optional word quality metadata JSON path. Default: Generated/word_quality.json + --bad-words PATH Optional JSON list of blocked words. Default: Data/bad_words.json + --grid-index N Zero-based grid index. Default: 1 + --grid-date YYYY-MM-DD Select the first grid with this date. + --seed N Deterministic random seed. + --timeout SECONDS Stop after this many seconds. Default: 30 + --breadth N Randomly try from the top N scored candidates. Default: 15 + --title TEXT XD title metadata. + --author TEXT XD author metadata. + --optimize-fill Keep searching until timeout and return the best-scored fill found. + --fill-report Print the weakest entries in the selected fill. + --min-fill-score N Reject the selected fill if its final score is below N. + --max-answer-usages N Reject fills using one answer more than N times. Default: 2 + -h, --help Show this help. + """) +} + +func loadJSON<T: Decodable>(_ type: T.Type, from path: String) throws -> T { + let url = URL(fileURLWithPath: path) + let data = try Data(contentsOf: url) + return try JSONDecoder().decode(T.self, from: data) +} + +func loadOptionalJSON<T: Decodable>(_ type: T.Type, from path: String) throws -> T? { + guard FileManager.default.fileExists(atPath: path) else { + return nil + } + return try loadJSON(type, from: path) +} + +func loadBundledJSON<T: Decodable>(_ type: T.Type, resource: String, extension resourceExtension: String) throws -> T { + guard let url = Bundle.module.url(forResource: resource, withExtension: resourceExtension) else { + throw GeneratorError(description: "Bundled resource not found: \(resource).\(resourceExtension)") + } + let data = try Data(contentsOf: url) + return try JSONDecoder().decode(T.self, from: data) +} + +func normalizedGridDate(_ value: String) -> String { + let parts = value.split(separator: "/") + if parts.count == 3, + let month = Int(parts[0]), + let day = Int(parts[1]), + let year = Int(parts[2]) { + return String(format: "%04d-%02d-%02d", year, month, day) + } + return value +} + +func vowelCount(_ bytes: [UInt8]) -> Int { + bytes.filter { $0 == 65 || $0 == 69 || $0 == 73 || $0 == 79 || $0 == 85 }.count +} + +func hasRunOfThree(_ bytes: [UInt8]) -> Bool { + guard bytes.count >= 3 else { + return false + } + for index in 2..<bytes.count where bytes[index] == bytes[index - 1] && bytes[index] == bytes[index - 2] { + return true + } + return false +} + +func year(fromISODate value: String?) -> Int? { + guard let value, value.count >= 4 else { + return nil + } + return Int(value.prefix(4)) +} + +func freshnessScore(latestSeen: String?) -> Int { + guard let year = year(fromISODate: latestSeen) else { + return 0 + } + if year >= 2015 { + return 55 + } + if year >= 2010 { + return 35 + } + if year >= 2000 { + return 15 + } + if year < 1985 { + return -25 + } + return 0 +} + +func usageAdjustment(length: Int, frequency: Int) -> Int { + guard frequency > 0 else { + return 0 + } + + switch length { + case ...3: + if frequency >= 100 { return 70 } + if frequency >= 50 { return 35 } + return -70 + case 4: + if frequency >= 100 { return 60 } + if frequency >= 50 { return 30 } + if frequency < 20 { return -55 } + case 5: + if frequency >= 50 { return 45 } + if frequency >= 20 { return 20 } + if frequency < 10 { return -35 } + case 6...7: + if frequency >= 30 { return 25 } + if frequency < 5 { return -30 } + default: + if frequency >= 10 { return 15 } + if frequency == 1 { return -15 } + } + + return 0 +} + +func lowUsageThreshold(length: Int) -> Int? { + switch length { + case ...3: + return 50 + case 4: + return 20 + case 5: + return 10 + case 6...7: + return 5 + default: + return nil + } +} + +func isLowUsageFill(_ profile: WordProfile) -> Bool { + guard let threshold = lowUsageThreshold(length: profile.length) else { + return false + } + return profile.count < threshold +} + +func isObscureShortFill(_ profile: WordProfile) -> Bool { + guard profile.length <= 4 else { + return false + } + + let badRatio = ratio(profile.badClueCount, profile.count) + let obscureRatio = ratio(profile.obscureNameClueCount, profile.count) + let blankRatio = ratio(profile.fillBlankCount, profile.count) + let foreignRatio = ratio(profile.foreignLanguageClueCount, profile.count) + + if isLowUsageFill(profile) { + return true + } + if profile.length <= 3 && profile.count < 80 { + return true + } + if profile.score < 130 { + return true + } + + return badRatio >= 0.18 + || obscureRatio >= 0.18 + || blankRatio >= 0.25 + || foreignRatio >= 0.18 +} + +func sampleCluesContain(_ profile: WordProfile, patterns: [String]) -> Bool { + profile.sampleClues.contains { clue in + let normalized = clue.lowercased() + return patterns.contains { normalized.contains($0) } + } +} + +func isThemeOrQuoteFragment(_ profile: WordProfile) -> Bool { + if profile.continuationClueCount > 0 || profile.themeClueCount > 0 || profile.cluePartCount > 0 { + return true + } + + let text = profile.text + if text.hasPrefix("IFA") || text.hasPrefix("IFAT") || text.hasPrefix("ITSREALLY") { + return true + } + if profile.length >= 8 && sampleCluesContain(profile, patterns: ["-across", "-down"]) { + return true + } + + return sampleCluesContain(profile, patterns: [ + "start of", + "end of", + "middle of", + "part of", + "continuation of", + "quote", + "quip", + "reason the", + "17-across", + "58-across", + "39-down" + ]) +} + +func isAwkwardMediumFill(_ profile: WordProfile) -> Bool { + guard (5...7).contains(profile.length) else { + return false + } + + if profile.count <= 8 { + return true + } + if ratio(profile.fillBlankCount, profile.count) >= 0.35 { + return true + } + return false +} + +func isNoveltyOneOff(_ profile: WordProfile) -> Bool { + guard profile.length >= 8, profile.count == 1 else { + return false + } + + if isThemeOrQuoteFragment(profile) { + return true + } + + return sampleCluesContain(profile, patterns: [ + "guinness", + "world record", + "record at", + "#", + "dumbest", + "largest", + "longest", + "smallest" + ]) +} + +func qualityScore(text: String, bytes: [UInt8], count: Int?, quality: WordQuality?) -> Int { + let length = bytes.count + let frequency = count ?? 0 + var score = length * 12 + + if frequency > 0 { + score += min(80, frequency * 4) + score += usageAdjustment(length: length, frequency: frequency) + } else { + score -= length >= 8 ? 30 : 80 + } + + if length >= 8 { + score += min(80, (length - 7) * 14) + } + if (5...7).contains(length) && frequency <= 2 { + score -= 70 + } + if length == 4 && frequency < 10 { + score -= 60 + } + if length <= 3 && frequency < 25 { + score -= 80 + } + + let vowels = vowelCount(bytes) + if vowels == 0 { + score -= 90 + } else if length >= 5 && Double(vowels) / Double(length) < 0.22 { + score -= 35 + } + if hasRunOfThree(bytes) { + score -= 110 + } + + if let quality { + let evidenceCount = max(quality.count ?? frequency, 1) + let badRatio = Double(quality.badClueCount ?? 0) / Double(evidenceCount) + let obscureRatio = Double(quality.obscureNameClueCount ?? 0) / Double(evidenceCount) + let blankRatio = Double(quality.fillBlankCount ?? 0) / Double(evidenceCount) + + if quality.rejectionReason != nil { + score -= length >= 8 ? 80 : 140 + } + if length <= 4 { + score -= Int(badRatio * 100) + score -= Int(obscureRatio * 70) + score -= Int(blankRatio * 45) + } else if length <= 7 { + score -= Int(badRatio * 45) + score -= Int(obscureRatio * 35) + score -= Int(blankRatio * 20) + } else { + score -= Int(badRatio * 20) + score -= Int(obscureRatio * 15) + } + } + + let letterScore = bytes.reduce(0) { $0 + (letterWeights[$1] ?? 0) } + return score + letterScore + freshnessScore(latestSeen: quality?.latestSeen) +} + +func loadDictionary(path: String, countsPath: String, qualityPath: String, badWordsPath: String) throws -> Dictionary { + let rawWords = try loadJSON([String].self, from: path) + let counts = try loadOptionalJSON([String: Int].self, from: countsPath) ?? [:] + let quality = try loadOptionalJSON([String: WordQuality].self, from: qualityPath) ?? [:] + let badWords = Set((try loadOptionalJSON([String].self, from: badWordsPath) ?? []).map { $0.uppercased() }) + var seen = Set<String>() + var grouped: [Int: [Word]] = [:] + var scoresByText: [String: Int] = [:] + var profilesByText: [String: WordProfile] = [:] + + for rawWord in rawWords { + let upper = rawWord.uppercased() + let bytes = Array(upper.utf8) + guard (2...gridSize).contains(bytes.count), bytes.allSatisfy({ $0 >= 65 && $0 <= 90 }) else { + continue + } + guard !badWords.contains(upper) else { + continue + } + guard seen.insert(upper).inserted else { + continue + } + let key = upper.lowercased() + let wordQuality = quality[key] + let frequency = counts[key] ?? wordQuality?.count ?? 0 + let score = qualityScore(text: upper, bytes: bytes, count: counts[key], quality: wordQuality) + grouped[bytes.count, default: []].append(Word(text: upper, bytes: bytes, score: score)) + scoresByText[upper] = score + profilesByText[upper] = WordProfile( + text: upper, + length: bytes.count, + score: score, + count: frequency, + badClueCount: wordQuality?.badClueCount ?? 0, + obscureNameClueCount: wordQuality?.obscureNameClueCount ?? 0, + fillBlankCount: wordQuality?.fillBlankCount ?? 0, + foreignLanguageClueCount: wordQuality?.foreignLanguageClueCount ?? 0, + cluePartCount: wordQuality?.cluePartCount ?? 0, + continuationClueCount: wordQuality?.continuationClueCount ?? 0, + themeClueCount: wordQuality?.themeClueCount ?? 0, + latestSeen: wordQuality?.latestSeen, + sampleClues: wordQuality?.sampleClues ?? [], + freshnessScore: freshnessScore(latestSeen: wordQuality?.latestSeen), + rejectionReason: wordQuality?.rejectionReason + ) + } + + var bucketsByLength: [Int: WordBucket] = [:] + for length in grouped.keys { + grouped[length]?.sort { + if $0.score != $1.score { + return $0.score > $1.score + } + return $0.text < $1.text + } + guard let words = grouped[length] else { + continue + } + var indexesByPositionAndLetter = Array(repeating: [UInt8: [Int]](), count: length) + for (index, word) in words.enumerated() { + for (position, letter) in word.bytes.enumerated() { + indexesByPositionAndLetter[position][letter, default: []].append(index) + } + } + bucketsByLength[length] = WordBucket(words: words, indexesByPositionAndLetter: indexesByPositionAndLetter) + } + + return Dictionary(bucketsByLength: bucketsByLength, scoresByText: scoresByText, profilesByText: profilesByText) +} + +func loadGrid(options: Options) throws -> ([Bool], Int) { + let grids: [GridEntry] + if let gridsPath = options.gridsPath { + grids = try loadJSON([GridEntry].self, from: gridsPath) + } else { + grids = try loadBundledJSON([GridEntry].self, resource: "grid_list", extension: "json") + } + + let entry: GridEntry + let gridIndex: Int + if let date = options.gridDate { + let normalizedDate = normalizedGridDate(date) + guard let matchIndex = grids.firstIndex(where: { $0.date == normalizedDate }) else { + throw GeneratorError(description: "No grid found for date \(date)") + } + entry = grids[matchIndex] + gridIndex = matchIndex + } else { + guard options.gridIndex < grids.count else { + throw GeneratorError(description: "--grid-index \(options.gridIndex) is out of range; found \(grids.count) grids") + } + entry = grids[options.gridIndex] + gridIndex = options.gridIndex + } + + guard entry.grid.count == gridSize * gridSize else { + throw GeneratorError(description: "Grid \(entry.date) has \(entry.grid.count) cells, expected \(gridSize * gridSize)") + } + + return (entry.grid.map { $0 == "." }, gridIndex) +} + +func buildSlots(black: [Bool]) -> [Slot] { + var slots: [Slot] = [] + + for row in 0..<gridSize { + var column = 0 + while column < gridSize { + let cell = row * gridSize + column + if black[cell] { + column += 1 + continue + } + var cells: [Int] = [] + while column < gridSize && !black[row * gridSize + column] { + cells.append(row * gridSize + column) + column += 1 + } + if cells.count > 1 { + slots.append(Slot(id: slots.count, cells: cells, direction: .across)) + } + } + } + + for column in 0..<gridSize { + var row = 0 + while row < gridSize { + let cell = row * gridSize + column + if black[cell] { + row += 1 + continue + } + var cells: [Int] = [] + while row < gridSize && !black[row * gridSize + column] { + cells.append(row * gridSize + column) + row += 1 + } + if cells.count > 1 { + slots.append(Slot(id: slots.count, cells: cells, direction: .down)) + } + } + } + + return slots +} + +func bit(for letter: UInt8) -> UInt32 { + 1 << UInt32(letter - 65) +} + +func popcount(_ mask: UInt32) -> Int { + Int(mask.nonzeroBitCount) +} + +func allowedLetters(from mask: UInt32) -> [UInt8] { + letters.filter { mask & bit(for: $0) != 0 } +} + +func matches(_ word: Word, slot: Slot, state: PuzzleState) -> Bool { + for (offset, cell) in slot.cells.enumerated() { + let char = word.bytes[offset] + if let existing = state.board[cell], existing != char { + return false + } + if state.options[cell] & bit(for: char) == 0 { + return false + } + } + return true +} + +func intersectSorted(_ lhs: [Int], _ rhs: [Int]) -> [Int] { + var result: [Int] = [] + result.reserveCapacity(min(lhs.count, rhs.count)) + var leftIndex = 0 + var rightIndex = 0 + + while leftIndex < lhs.count && rightIndex < rhs.count { + let left = lhs[leftIndex] + let right = rhs[rightIndex] + if left == right { + result.append(left) + leftIndex += 1 + rightIndex += 1 + } else if left < right { + leftIndex += 1 + } else { + rightIndex += 1 + } + } + + return result +} + +func candidates(for slot: Slot, state: PuzzleState, dictionary: Dictionary) -> [Word] { + guard let bucket = dictionary.bucketsByLength[slot.cells.count] else { + return [] + } + + var indexedMatches: [Int]? + for (offset, cell) in slot.cells.enumerated() { + guard let existing = state.board[cell] else { + continue + } + guard let letterMatches = bucket.indexesByPositionAndLetter[offset][existing] else { + return [] + } + if let current = indexedMatches { + indexedMatches = intersectSorted(current, letterMatches) + if indexedMatches?.isEmpty == true { + return [] + } + } else { + indexedMatches = letterMatches + } + } + + let words: [Word] + if let indexedMatches { + words = indexedMatches.map { bucket.words[$0] } + } else { + words = bucket.words + } + + return words.filter { matches($0, slot: slot, state: state) } +} + +func propagate(_ input: PuzzleState, slots: [Slot], dictionary: Dictionary) -> PuzzleState? { + var state = input + var changed = true + + while changed { + changed = false + for slot in slots { + let words = candidates(for: slot, state: state, dictionary: dictionary) + if words.isEmpty { + return nil + } + + var allowed = Array(repeating: UInt32(0), count: slot.cells.count) + for word in words { + for (offset, char) in word.bytes.enumerated() { + allowed[offset] |= bit(for: char) + } + } + + for (offset, cell) in slot.cells.enumerated() { + let next = state.options[cell] & allowed[offset] + if next == 0 { + return nil + } + if next != state.options[cell] { + state.options[cell] = next + changed = true + } + if popcount(next) == 1, state.board[cell] == nil { + state.board[cell] = allowedLetters(from: next)[0] + changed = true + } + } + } + } + + return state +} + +func isSolved(_ state: PuzzleState, black: [Bool]) -> Bool { + for cell in 0..<black.count where !black[cell] && state.board[cell] == nil { + return false + } + return true +} + +func bestSlot(in state: PuzzleState, slots: [Slot], dictionary: Dictionary) -> (Slot, [Word])? { + var best: (Slot, [Word])? + + for slot in slots where slot.cells.contains(where: { state.board[$0] == nil }) { + let words = candidates(for: slot, state: state, dictionary: dictionary) + if words.isEmpty { + return (slot, []) + } + if let current = best { + if words.count < current.1.count || (words.count == current.1.count && slot.cells.count > current.0.cells.count) { + best = (slot, words) + } + } else { + best = (slot, words) + } + } + + return best +} + +func candidateOrder(_ words: [Word], breadth: Int, rng: inout RandomNumberGenerator64) -> [Word] { + let prefix = Array(words.prefix(max(1, min(breadth, words.count)))).shuffled(using: &rng) + if words.count <= prefix.count { + return prefix + } + return prefix + words.dropFirst(prefix.count) +} + +func placing(_ word: Word, in slot: Slot, state: PuzzleState) -> PuzzleState? { + var next = state + for (offset, cell) in slot.cells.enumerated() { + let char = word.bytes[offset] + if let existing = next.board[cell], existing != char { + return nil + } + next.board[cell] = char + next.options[cell] = bit(for: char) + } + return next +} + +func solve( + black: [Bool], + slots: [Slot], + dictionary: Dictionary, + timeout: TimeInterval, + breadth: Int, + optimizeFill: Bool, + maxAnswerUsages: Int, + rng: inout RandomNumberGenerator64 +) -> PuzzleState? { + var initial = PuzzleState( + board: Array(repeating: nil, count: gridSize * gridSize), + options: black.map { $0 ? 0 : allLettersMask } + ) + for cell in 0..<black.count where black[cell] { + initial.board[cell] = nil + } + + let startedAt = Date() + let deadline = startedAt.addingTimeInterval(timeout) + var attempts = 0 + var bestSolved: PuzzleState? + var bestSolvedScore = Int.min + + func search(_ state: PuzzleState) -> PuzzleState? { + if Date() >= deadline { + return nil + } + attempts += 1 + + guard let propagated = propagate(state, slots: slots, dictionary: dictionary) else { + return nil + } + if isSolved(propagated, black: black) { + guard repeatedAnswers(in: propagated, slots: slots, maxUsage: maxAnswerUsages).isEmpty else { + return nil + } + if optimizeFill { + let score = fillScore(state: propagated, slots: slots, dictionary: dictionary) + if score > bestSolvedScore { + bestSolved = propagated + bestSolvedScore = score + } + return nil + } + return propagated + } + guard let (slot, slotCandidates) = bestSlot(in: propagated, slots: slots, dictionary: dictionary), !slotCandidates.isEmpty else { + return nil + } + + for word in candidateOrder(slotCandidates, breadth: breadth, rng: &rng) { + guard let next = placing(word, in: slot, state: propagated) else { + continue + } + if let solved = search(next) { + return solved + } + } + + return nil + } + + let result = search(initial) + if let result { + return result + } + if let bestSolved { + let elapsed = Date().timeIntervalSince(startedAt) + fputs("Searched \(attempts) nodes in \(String(format: "%.2f", elapsed)) seconds and kept best fill score \(bestSolvedScore).\n", stderr) + return bestSolved + } + if result == nil { + let elapsed = Date().timeIntervalSince(startedAt) + fputs("Stopped after \(attempts) search nodes in \(String(format: "%.2f", elapsed)) seconds.\n", stderr) + } + return nil +} + +func clueNumberGrid(black: [Bool]) -> [Int?] { + var numbers = Array<Int?>(repeating: nil, count: gridSize * gridSize) + var nextNumber = 1 + + for row in 0..<gridSize { + for column in 0..<gridSize { + let cell = row * gridSize + column + if black[cell] { + continue + } + let startsAcross = (column == 0 || black[cell - 1]) && column + 1 < gridSize && !black[cell + 1] + let startsDown = (row == 0 || black[cell - gridSize]) && row + 1 < gridSize && !black[cell + gridSize] + if startsAcross || startsDown { + numbers[cell] = nextNumber + nextNumber += 1 + } + } + } + + return numbers +} + +func wordText(for slot: Slot, state: PuzzleState) -> String { + String(bytes: slot.cells.map { state.board[$0] ?? 63 }, encoding: .utf8) ?? "" +} + +func ratio(_ part: Int, _ whole: Int) -> Double { + Double(part) / Double(max(whole, 1)) +} + +func isPartialish(_ profile: WordProfile) -> Bool { + let text = profile.text + if text.hasPrefix("INA") && profile.length >= 5 { + return true + } + if text.hasPrefix("IHAVE") || text.hasPrefix("WEARE") || text.hasPrefix("ITIS") || text.hasPrefix("ITSA") { + return true + } + if text.hasPrefix("RATED") || text.hasSuffix("IN") && profile.length >= 5 { + return true + } + return false +} + +func weaknessReasons(for profile: WordProfile) -> [String] { + var reasons: [String] = [] + let blankRatio = ratio(profile.fillBlankCount, profile.count) + let foreignRatio = ratio(profile.foreignLanguageClueCount, profile.count) + + if profile.score < 90 { + reasons.append("low score \(profile.score)") + } + if profile.freshnessScore < 0 { + reasons.append("last seen \(profile.latestSeen ?? "unknown")") + } + if profile.length <= 4 && profile.score < 115 { + reasons.append("weak short fill") + } + if (5...7).contains(profile.length) && profile.count <= 8 && profile.score < 145 { + reasons.append("rare medium fill") + } + if profile.length >= 6 && profile.count <= 3 && profile.score < 180 { + reasons.append("low frequency \(profile.count)") + } + if isLowUsageFill(profile) { + reasons.append("low usage count \(profile.count)") + } + if isObscureShortFill(profile) { + reasons.append("obscure short fill") + } + if isAwkwardMediumFill(profile) { + reasons.append("awkward medium fill") + } + if profile.fillBlankCount >= 2 && blankRatio >= 0.5 { + reasons.append("fill-in clue dependent") + } + if profile.foreignLanguageClueCount >= 2 && foreignRatio >= 0.5 { + reasons.append("mostly foreign-language clues") + } + if profile.cluePartCount > 0 { + reasons.append("part-of-longer-answer clues") + } + if isThemeOrQuoteFragment(profile) { + reasons.append("theme/quote fragment") + } + if isNoveltyOneOff(profile) { + reasons.append("novelty one-off") + } + if isPartialish(profile) { + reasons.append("phrase fragment") + } + if let rejectionReason = profile.rejectionReason { + reasons.append(rejectionReason) + } + return reasons +} + +func concentrationPenalty(for profiles: [WordProfile]) -> Int { + var weakShort = 0 + var lowFrequencyLong = 0 + var blankDependent = 0 + var heavyBlankDependent = 0 + var partialish = 0 + var rejected = 0 + var severeLowFrequency = 0 + var rareMedium = 0 + var veryWeakShort = 0 + var lowUsageFill = 0 + var obscureShort = 0 + var awkwardMedium = 0 + var themeOrQuoteFragment = 0 + var noveltyOneOff = 0 + + for profile in profiles { + if isThemeOrQuoteFragment(profile) { + themeOrQuoteFragment += 1 + } + if isNoveltyOneOff(profile) { + noveltyOneOff += 1 + } + if isLowUsageFill(profile) { + lowUsageFill += 1 + } + if isObscureShortFill(profile) { + obscureShort += 1 + } + if isAwkwardMediumFill(profile) { + awkwardMedium += 1 + } + if profile.length <= 4 && profile.score < 115 { + weakShort += 1 + } + if profile.length <= 3 && profile.score < 65 { + veryWeakShort += 1 + } + if (5...7).contains(profile.length) && profile.count <= 8 && profile.score < 145 { + rareMedium += 1 + } + if profile.length >= 6 && profile.count <= 3 && profile.score < 180 { + lowFrequencyLong += 1 + } + if profile.length >= 6 && profile.count <= 1 && profile.score < 180 { + severeLowFrequency += 1 + } + if profile.fillBlankCount >= 2 && ratio(profile.fillBlankCount, profile.count) >= 0.5 { + blankDependent += 1 + } + if profile.fillBlankCount >= 3 && ratio(profile.fillBlankCount, profile.count) >= 0.65 { + heavyBlankDependent += 1 + } + if isPartialish(profile) { + partialish += 1 + } + if profile.rejectionReason != nil { + rejected += 1 + } + } + + return max(0, weakShort - 6) * 85 + + max(0, veryWeakShort - 2) * 90 + + max(0, rareMedium - 2) * 95 + + max(0, lowUsageFill - 4) * 90 + + max(0, obscureShort - 1) * 170 + + max(0, obscureShort - 3) * 130 + + max(0, obscureShort - 6) * 120 + + max(0, awkwardMedium - 2) * 120 + + max(0, awkwardMedium - 5) * 90 + + max(0, lowFrequencyLong - 3) * 80 + + severeLowFrequency * 40 + + themeOrQuoteFragment * 150 + + noveltyOneOff * 75 + + max(0, blankDependent - 1) * 110 + + heavyBlankDependent * 80 + + max(0, partialish - 2) * 95 + + rejected * 150 +} + +func entryPenalty(for profile: WordProfile) -> Int { + var penalty = 0 + + if (5...7).contains(profile.length) && profile.score < 90 { + penalty += 220 + } else if (5...7).contains(profile.length) && profile.score < 115 && profile.count <= 8 { + penalty += 120 + } + if profile.length <= 3 && profile.score < 65 { + penalty += 80 + } + if isLowUsageFill(profile) { + if profile.length <= 4 { + penalty += 90 + } else if profile.length <= 5 { + penalty += 55 + } else { + penalty += 30 + } + } + if isThemeOrQuoteFragment(profile) { + penalty += 170 + } + if isAwkwardMediumFill(profile) { + penalty += 60 + } + if isNoveltyOneOff(profile) { + penalty += 60 + } + if profile.fillBlankCount >= 3 && ratio(profile.fillBlankCount, profile.count) >= 0.65 { + penalty += 120 + } + if profile.rejectionReason != nil { + penalty += 200 + } + + return penalty +} + +func fillScore(state: PuzzleState, slots: [Slot], dictionary: Dictionary) -> Int { + var score = 0 + var seen = Set<String>() + var profiles: [WordProfile] = [] + + for slot in slots { + let word = wordText(for: slot, state: state) + score += dictionary.scoresByText[word] ?? 0 + if let profile = dictionary.profilesByText[word] { + profiles.append(profile) + score -= entryPenalty(for: profile) + } + if !seen.insert(word).inserted { + score -= 500 + } + } + + return score - concentrationPenalty(for: profiles) +} + +func printFillReport(state: PuzzleState, slots: [Slot], dictionary: Dictionary) { + let profiles = slots.compactMap { dictionary.profilesByText[wordText(for: $0, state: state)] } + let weakEntries = profiles + .map { profile in (profile, weaknessReasons(for: profile)) } + .filter { !$0.1.isEmpty } + .sorted { + if $0.0.score != $1.0.score { + return $0.0.score < $1.0.score + } + return $0.0.text < $1.0.text + } + .prefix(12) + + guard !weakEntries.isEmpty else { + fputs("Fill report: no weak entries flagged.\n", stderr) + return + } + + fputs("Fill report: weakest entries\n", stderr) + for (profile, reasons) in weakEntries { + let latestSeen = profile.latestSeen ?? "unknown" + fputs(" \(profile.text): score \(profile.score), count \(profile.count), latest \(latestSeen), \(reasons.joined(separator: "; "))\n", stderr) + } +} + +func repeatedAnswers(in state: PuzzleState, slots: [Slot], maxUsage: Int) -> [String: Int] { + var counts: [String: Int] = [:] + + for slot in slots { + counts[wordText(for: slot, state: state), default: 0] += 1 + } + + return counts.filter { $0.value > maxUsage } +} + +func exportXD(state: PuzzleState, black: [Bool], slots: [Slot], options: Options, gridIndex: Int, fillScore: Int) -> String { + let numbers = clueNumberGrid(black: black) + var lines: [String] = [ + "Title: \(options.title)", + "CmVer: 3", + "Author: \(options.author)", + "Publisher: Crossmate", + "Date: \(ISO8601DateFormatter().string(from: Date()).prefix(10))", + "Copyright: Generated", + "Grid Index: \(gridIndex)", + "Seed: \(options.seed)", + "Fill Score: \(fillScore)", + "" + ] + + for row in 0..<gridSize { + var rowBytes: [UInt8] = [] + for column in 0..<gridSize { + let cell = row * gridSize + column + rowBytes.append(black[cell] ? 35 : (state.board[cell] ?? 63)) + } + lines.append(String(bytes: rowBytes, encoding: .utf8) ?? "") + } + + lines.append("") + lines.append("") + + let across = slots + .filter { $0.direction == .across } + .sorted { (numbers[$0.cells[0]] ?? 0) < (numbers[$1.cells[0]] ?? 0) } + let down = slots + .filter { $0.direction == .down } + .sorted { (numbers[$0.cells[0]] ?? 0) < (numbers[$1.cells[0]] ?? 0) } + + for slot in across { + let number = numbers[slot.cells[0]] ?? 0 + lines.append("A\(number). [No clue] ~ \(wordText(for: slot, state: state))") + } + + lines.append("") + + for slot in down { + let number = numbers[slot.cells[0]] ?? 0 + lines.append("D\(number). [No clue] ~ \(wordText(for: slot, state: state))") + } + + lines.append("") + return lines.joined(separator: "\n") +} + +func run() throws { + let options = try parseOptions(CommandLine.arguments) + let dictionary = try loadDictionary( + path: options.wordsPath, + countsPath: options.countsPath, + qualityPath: options.qualityPath, + badWordsPath: options.badWordsPath + ) + let (black, gridIndex) = try loadGrid(options: options) + let slots = buildSlots(black: black) + var rng = RandomNumberGenerator64(seed: options.seed) + + guard let state = solve( + black: black, + slots: slots, + dictionary: dictionary, + timeout: options.timeout, + breadth: options.breadth, + optimizeFill: options.optimizeFill, + maxAnswerUsages: options.maxAnswerUsages, + rng: &rng + ) else { + throw GeneratorError(description: "Could not fill the grid within \(options.timeout) seconds. Try a different --seed, --grid-index, or --breadth.") + } + let selectedFillScore = fillScore(state: state, slots: slots, dictionary: dictionary) + if let minFillScore = options.minFillScore, selectedFillScore < minFillScore { + throw GeneratorError(description: "Best fill score \(selectedFillScore) was below --min-fill-score \(minFillScore). Try a different --seed, --grid-index, or more timeout.") + } + + if options.fillReport { + fputs("Selected fill score: \(selectedFillScore)\n", stderr) + printFillReport(state: state, slots: slots, dictionary: dictionary) + } + + let xd = exportXD(state: state, black: black, slots: slots, options: options, gridIndex: gridIndex, fillScore: selectedFillScore) + let outputURL = URL(fileURLWithPath: options.outputPath) + let outputDirectory = outputURL.deletingLastPathComponent() + if outputDirectory.path != "." { + try FileManager.default.createDirectory(at: outputDirectory, withIntermediateDirectories: true) + } + try xd.write(to: outputURL, atomically: true, encoding: .utf8) + print("Wrote \(options.outputPath)") +} + +do { + try run() +} catch { + fputs("Fillmake: \(error)\n", stderr) + exit(1) +} diff --git a/Crossmake/Sources/Gridmake/main.swift b/Crossmake/Sources/Gridmake/main.swift @@ -0,0 +1,279 @@ +import Foundation + +let gridSize = 15 + +struct Options { + var gridsPath = "Sources/Fillmake/Resources/grid_list.json" + var limit: Int? + var format = "tsv" +} + +struct GridEntry: Decodable { + let grid: [String] + let date: String +} + +struct Slot { + enum Direction: String { + case across + case down + } + + let id: Int + let cells: [Int] + let direction: Direction +} + +struct GridStats { + let index: Int + let date: String + let score: Int + let slotCount: Int + let openCellCount: Int + let threeLetterSlots: Int + let fourLetterSlots: Int + let fiveSixLetterSlots: Int + let sevenPlusLetterSlots: Int + let maxShortCluster: Int + let averageSlotLength: Double +} + +struct GridmakeError: Error, CustomStringConvertible { + let description: String +} + +func printUsage() { + print(""" + Usage: Gridmake [options] + + Options: + --grids PATH Grid list JSON path. Default: Sources/Fillmake/Resources/grid_list.json + --limit N Print only the top N ranked grids. + --format tsv|json Output format. Default: tsv. + -h, --help Show this help. + """) +} + +func requireValue(_ arguments: [String], _ index: inout Int, _ name: String) throws -> String { + index += 1 + guard index < arguments.count else { + throw GridmakeError(description: "Missing value for \(name)") + } + return arguments[index] +} + +func parseOptions() throws -> Options { + var options = Options() + let arguments = Array(CommandLine.arguments.dropFirst()) + var index = 0 + + while index < arguments.count { + let argument = arguments[index] + switch argument { + case "--grids": + options.gridsPath = try requireValue(arguments, &index, argument) + case "--limit": + let value = try requireValue(arguments, &index, argument) + guard let limit = Int(value), limit > 0 else { + throw GridmakeError(description: "--limit must be a positive integer") + } + options.limit = limit + case "--format": + let format = try requireValue(arguments, &index, argument) + guard ["tsv", "json"].contains(format) else { + throw GridmakeError(description: "--format must be tsv or json") + } + options.format = format + case "-h", "--help": + printUsage() + exit(0) + default: + throw GridmakeError(description: "Unknown argument: \(argument)") + } + index += 1 + } + + return options +} + +func loadJSON<T: Decodable>(_ type: T.Type, from path: String) throws -> T { + let url = URL(fileURLWithPath: path) + let data = try Data(contentsOf: url) + return try JSONDecoder().decode(T.self, from: data) +} + +func buildSlots(black: [Bool]) -> [Slot] { + var slots: [Slot] = [] + + for row in 0..<gridSize { + var column = 0 + while column < gridSize { + let cell = row * gridSize + column + if black[cell] { + column += 1 + continue + } + var cells: [Int] = [] + while column < gridSize && !black[row * gridSize + column] { + cells.append(row * gridSize + column) + column += 1 + } + if cells.count > 1 { + slots.append(Slot(id: slots.count, cells: cells, direction: .across)) + } + } + } + + for column in 0..<gridSize { + var row = 0 + while row < gridSize { + let cell = row * gridSize + column + if black[cell] { + row += 1 + continue + } + var cells: [Int] = [] + while row < gridSize && !black[row * gridSize + column] { + cells.append(row * gridSize + column) + row += 1 + } + if cells.count > 1 { + slots.append(Slot(id: slots.count, cells: cells, direction: .down)) + } + } + } + + return slots +} + +func maxShortCluster(in slots: [Slot]) -> Int { + let shortSlotIDs = Set(slots.filter { $0.cells.count <= 4 }.map(\.id)) + guard !shortSlotIDs.isEmpty else { + return 0 + } + + var cellToShortSlots: [Int: [Int]] = [:] + for slot in slots where shortSlotIDs.contains(slot.id) { + for cell in slot.cells { + cellToShortSlots[cell, default: []].append(slot.id) + } + } + + var adjacency: [Int: Set<Int>] = [:] + for connectedSlots in cellToShortSlots.values where connectedSlots.count > 1 { + for left in connectedSlots { + for right in connectedSlots where right != left { + adjacency[left, default: []].insert(right) + } + } + } + + var visited = Set<Int>() + var largest = 1 + + for slotID in shortSlotIDs where !visited.contains(slotID) { + var stack = [slotID] + visited.insert(slotID) + var size = 0 + + while let current = stack.popLast() { + size += 1 + for next in adjacency[current, default: []] where !visited.contains(next) { + visited.insert(next) + stack.append(next) + } + } + + largest = max(largest, size) + } + + return largest +} + +func stats(for entry: GridEntry, index: Int) throws -> GridStats { + guard entry.grid.count == gridSize * gridSize else { + throw GridmakeError(description: "Grid \(entry.date) has \(entry.grid.count) cells, expected \(gridSize * gridSize)") + } + + let black = entry.grid.map { $0 == "." } + let slots = buildSlots(black: black) + let lengths = slots.map { $0.cells.count } + let slotCount = slots.count + let openCellCount = black.filter { !$0 }.count + let threeLetterSlots = lengths.filter { $0 == 3 }.count + let fourLetterSlots = lengths.filter { $0 == 4 }.count + let fiveSixLetterSlots = lengths.filter { (5...6).contains($0) }.count + let sevenPlusLetterSlots = lengths.filter { $0 >= 7 }.count + let shortCluster = maxShortCluster(in: slots) + let averageSlotLength = lengths.isEmpty ? 0 : Double(lengths.reduce(0, +)) / Double(lengths.count) + + let score = sevenPlusLetterSlots * 20 + + fiveSixLetterSlots * 8 + - threeLetterSlots * 35 + - fourLetterSlots * 15 + - shortCluster * 50 + + Int(averageSlotLength * 6) + - max(0, slotCount - 78) * 6 + + return GridStats( + index: index, + date: entry.date, + score: score, + slotCount: slotCount, + openCellCount: openCellCount, + threeLetterSlots: threeLetterSlots, + fourLetterSlots: fourLetterSlots, + fiveSixLetterSlots: fiveSixLetterSlots, + sevenPlusLetterSlots: sevenPlusLetterSlots, + maxShortCluster: shortCluster, + averageSlotLength: averageSlotLength + ) +} + +func printTSV(_ rankedStats: [GridStats]) { + print("rank\tindex\tdate\tscore\tslots\topen_cells\tlen3\tlen4\tlen5_6\tlen7_plus\tmax_short_cluster\tavg_slot_length") + for (rank, stats) in rankedStats.enumerated() { + let average = String(format: "%.2f", stats.averageSlotLength) + print("\(rank + 1)\t\(stats.index)\t\(stats.date)\t\(stats.score)\t\(stats.slotCount)\t\(stats.openCellCount)\t\(stats.threeLetterSlots)\t\(stats.fourLetterSlots)\t\(stats.fiveSixLetterSlots)\t\(stats.sevenPlusLetterSlots)\t\(stats.maxShortCluster)\t\(average)") + } +} + +func jsonString(_ value: String) -> String { + let data = try! JSONEncoder().encode(value) + return String(data: data, encoding: .utf8) ?? "\"\"" +} + +func printJSON(_ rankedStats: [GridStats]) { + print("[") + for (offset, stats) in rankedStats.enumerated() { + let comma = offset == rankedStats.count - 1 ? "" : "," + let average = String(format: "%.4f", stats.averageSlotLength) + print(""" + {"rank":\(offset + 1),"index":\(stats.index),"date":\(jsonString(stats.date)),"score":\(stats.score),"slots":\(stats.slotCount),"open_cells":\(stats.openCellCount),"len3":\(stats.threeLetterSlots),"len4":\(stats.fourLetterSlots),"len5_6":\(stats.fiveSixLetterSlots),"len7_plus":\(stats.sevenPlusLetterSlots),"max_short_cluster":\(stats.maxShortCluster),"avg_slot_length":\(average)}\(comma) + """) + } + print("]") +} + +do { + let options = try parseOptions() + let grids = try loadJSON([GridEntry].self, from: options.gridsPath) + let rankedStats = try grids.enumerated() + .map { try stats(for: $0.element, index: $0.offset) } + .sorted { + if $0.score != $1.score { + return $0.score > $1.score + } + return $0.index < $1.index + } + .prefix(options.limit ?? grids.count) + + if options.format == "json" { + printJSON(Array(rankedStats)) + } else { + printTSV(Array(rankedStats)) + } +} catch { + fputs("Gridmake: \(error)\n", stderr) + exit(1) +} diff --git a/Crossmake/Sources/Pickmake/main.swift b/Crossmake/Sources/Pickmake/main.swift @@ -0,0 +1,346 @@ +import Foundation + +let gridSize = 15 +let letterWeights: [UInt8: Int] = [ + 65: 9, 66: 2, 67: 2, 68: 4, 69: 12, 70: 2, 71: 3, 72: 2, 73: 9, + 74: 1, 75: 1, 76: 4, 77: 2, 78: 6, 79: 8, 80: 2, 81: 1, 82: 6, + 83: 4, 84: 6, 85: 4, 86: 2, 87: 2, 88: 1, 89: 2, 90: 1 +] + +struct Options { + var countsPath = "Generated/answer_counts.json" + var qualityPath = "Generated/word_quality.json" + var count = 10 + var maxAnswerUsages = 2 + var disallowRepeatedSelectedAnswers = false + var unusualScoreThreshold = 110 + var inputs: [String] = ["Generated"] + var verbose = false +} + +struct WordQuality: Decodable { + let count: Int? + let badClueCount: Int? + let obscureNameClueCount: Int? + let fillBlankCount: Int? + let rejectionReason: String? + + enum CodingKeys: String, CodingKey { + case count + case badClueCount = "bad_clue_count" + case obscureNameClueCount = "obscure_name_clue_count" + case fillBlankCount = "fill_blank_count" + case rejectionReason = "rejection_reason" + } +} + +struct Candidate { + let path: String + let answers: [String] + let counts: [String: Int] + let unusualAnswers: Set<String> + let score: Int +} + +struct PickmakeError: Error, CustomStringConvertible { + let description: String +} + +func parseOptions(_ arguments: [String]) throws -> Options { + var options = Options() + var index = 1 + var inputs: [String] = [] + + func requireValue(_ name: String) throws -> String { + guard index + 1 < arguments.count else { + throw PickmakeError(description: "Missing value for \(name)") + } + index += 1 + return arguments[index] + } + + while index < arguments.count { + let arg = arguments[index] + switch arg { + case "--counts": + options.countsPath = try requireValue(arg) + case "--quality": + options.qualityPath = try requireValue(arg) + case "--count": + let value = try requireValue(arg) + guard let parsed = Int(value), parsed > 0 else { + throw PickmakeError(description: "--count must be a positive integer") + } + options.count = parsed + case "--max-answer-usages": + let value = try requireValue(arg) + guard let parsed = Int(value), parsed > 0 else { + throw PickmakeError(description: "--max-answer-usages must be a positive integer") + } + options.maxAnswerUsages = parsed + case "--disallow-all-repeats": + options.disallowRepeatedSelectedAnswers = true + case "--unusual-score-threshold": + let value = try requireValue(arg) + guard let parsed = Int(value) else { + throw PickmakeError(description: "--unusual-score-threshold must be an integer") + } + options.unusualScoreThreshold = parsed + case "--verbose": + options.verbose = true + case "--help", "-h": + printUsage() + exit(0) + default: + inputs.append(arg) + } + index += 1 + } + + if !inputs.isEmpty { + options.inputs = inputs + } + return options +} + +func printUsage() { + print(""" + Usage: Pickmake [options] [XD files or directories...] + + Options: + --count N Number of puzzles to select. Default: 10 + --counts PATH Answer frequency JSON path. Default: Generated/answer_counts.json + --quality PATH Word quality metadata JSON path. Default: Generated/word_quality.json + --max-answer-usages N Reject puzzles using one answer more than N times. Default: 2 + --disallow-all-repeats + Disallow any repeated answers across selected puzzles. Default: only unusual answers cannot repeat. + --unusual-score-threshold N Answers scoring below N cannot repeat across selected puzzles. Default: 110 + --verbose Print rejected puzzle details to stderr. + -h, --help Show this help. + """) +} + +func loadJSON<T: Decodable>(_ type: T.Type, from path: String) throws -> T { + let data = try Data(contentsOf: URL(fileURLWithPath: path)) + return try JSONDecoder().decode(T.self, from: data) +} + +func vowelCount(_ bytes: [UInt8]) -> Int { + bytes.filter { $0 == 65 || $0 == 69 || $0 == 73 || $0 == 79 || $0 == 85 }.count +} + +func hasRunOfThree(_ bytes: [UInt8]) -> Bool { + guard bytes.count >= 3 else { + return false + } + for index in 2..<bytes.count where bytes[index] == bytes[index - 1] && bytes[index] == bytes[index - 2] { + return true + } + return false +} + +func qualityScore(text: String, count: Int?, quality: WordQuality?) -> Int { + let bytes = Array(text.utf8) + let length = bytes.count + let frequency = count ?? 0 + var score = length * 12 + + if frequency > 0 { + score += min(80, frequency * 4) + } else { + score -= length >= 8 ? 30 : 80 + } + + if length >= 8 { + score += min(80, (length - 7) * 14) + } + if (5...7).contains(length) && frequency <= 2 { + score -= 70 + } + if length == 4 && frequency < 10 { + score -= 60 + } + if length <= 3 && frequency < 25 { + score -= 80 + } + + let vowels = vowelCount(bytes) + if vowels == 0 { + score -= 90 + } else if length >= 5 && Double(vowels) / Double(length) < 0.22 { + score -= 35 + } + if hasRunOfThree(bytes) { + score -= 110 + } + + if let quality { + let evidenceCount = max(quality.count ?? frequency, 1) + let badRatio = Double(quality.badClueCount ?? 0) / Double(evidenceCount) + let obscureRatio = Double(quality.obscureNameClueCount ?? 0) / Double(evidenceCount) + let blankRatio = Double(quality.fillBlankCount ?? 0) / Double(evidenceCount) + + if quality.rejectionReason != nil { + score -= length >= 8 ? 80 : 140 + } + if length <= 4 { + score -= Int(badRatio * 100) + score -= Int(obscureRatio * 70) + score -= Int(blankRatio * 45) + } else if length <= 7 { + score -= Int(badRatio * 45) + score -= Int(obscureRatio * 35) + score -= Int(blankRatio * 20) + } else { + score -= Int(badRatio * 20) + score -= Int(obscureRatio * 15) + } + } + + return score + bytes.reduce(0) { $0 + (letterWeights[$1] ?? 0) } +} + +func discoverXDPaths(inputs: [String]) throws -> [String] { + let fileManager = FileManager.default + var paths: [String] = [] + + for input in inputs { + var isDirectory: ObjCBool = false + guard fileManager.fileExists(atPath: input, isDirectory: &isDirectory) else { + throw PickmakeError(description: "Input not found: \(input)") + } + + if isDirectory.boolValue { + let url = URL(fileURLWithPath: input) + guard let enumerator = fileManager.enumerator(at: url, includingPropertiesForKeys: [.isRegularFileKey]) else { + continue + } + for case let fileURL as URL in enumerator where fileURL.pathExtension.lowercased() == "xd" { + paths.append(fileURL.path) + } + } else if input.lowercased().hasSuffix(".xd") { + paths.append(URL(fileURLWithPath: input).path) + } + } + + return Array(Set(paths)).sorted() +} + +func answers(in path: String) throws -> [String] { + let contents = try String(contentsOfFile: path, encoding: .utf8) + return contents.split(separator: "\n").compactMap { line in + guard let separator = line.range(of: " ~ ") else { + return nil + } + let answer = line[separator.upperBound...].trimmingCharacters(in: .whitespaces) + guard !answer.isEmpty, answer.allSatisfy({ $0 >= "A" && $0 <= "Z" }) else { + return nil + } + return answer + } +} + +func storedFillScore(in path: String) throws -> Int? { + let contents = try String(contentsOfFile: path, encoding: .utf8) + for line in contents.split(separator: "\n") { + guard line.hasPrefix("Fill Score:") else { + continue + } + let value = line.dropFirst("Fill Score:".count).trimmingCharacters(in: .whitespaces) + return Int(value) + } + return nil +} + +func makeCandidate(path: String, counts: [String: Int], quality: [String: WordQuality], unusualScoreThreshold: Int) throws -> Candidate { + let words = try answers(in: path) + let answerCounts = Dictionary(grouping: words, by: { $0 }).mapValues(\.count) + var unusual = Set<String>() + var score = 0 + + for word in words { + let key = word.lowercased() + let wordScore = qualityScore(text: word, count: counts[key], quality: quality[key]) + score += wordScore + if wordScore < unusualScoreThreshold { + unusual.insert(word) + } + } + + return Candidate(path: path, answers: words, counts: answerCounts, unusualAnswers: unusual, score: try storedFillScore(in: path) ?? score) +} + +func run() throws { + let options = try parseOptions(CommandLine.arguments) + let counts = try loadJSON([String: Int].self, from: options.countsPath) + let quality = try loadJSON([String: WordQuality].self, from: options.qualityPath) + let paths = try discoverXDPaths(inputs: options.inputs) + let candidates = try paths.map { + try makeCandidate(path: $0, counts: counts, quality: quality, unusualScoreThreshold: options.unusualScoreThreshold) + }.sorted { + if $0.score != $1.score { + return $0.score > $1.score + } + return $0.path < $1.path + } + + var selected: [Candidate] = [] + var selectedUnusual = Set<String>() + var selectedAnswers = Set<String>() + var rejectedForInternalRepeats = 0 + var rejectedForAnswerRepeats = 0 + var rejectedForUnusualRepeats = 0 + + for candidate in candidates { + let overused = candidate.counts + .filter { $0.value > options.maxAnswerUsages } + .sorted { $0.key < $1.key } + if !overused.isEmpty { + rejectedForInternalRepeats += 1 + if options.verbose { + let details = overused.map { "\($0.key)=\($0.value)" }.joined(separator: ", ") + fputs("Rejected \(candidate.path): answer usage > \(options.maxAnswerUsages) (\(details))\n", stderr) + } + continue + } + let repeatedAnswers = Set(candidate.answers).intersection(selectedAnswers).sorted() + if options.disallowRepeatedSelectedAnswers && !repeatedAnswers.isEmpty { + rejectedForAnswerRepeats += 1 + if options.verbose { + fputs("Rejected \(candidate.path): repeated selected answers (\(repeatedAnswers.joined(separator: ", ")))\n", stderr) + } + continue + } + if !options.disallowRepeatedSelectedAnswers { + let repeatedUnusual = candidate.unusualAnswers.intersection(selectedUnusual).sorted() + if !repeatedUnusual.isEmpty { + rejectedForUnusualRepeats += 1 + if options.verbose { + fputs("Rejected \(candidate.path): repeated unusual answers (\(repeatedUnusual.joined(separator: ", ")))\n", stderr) + } + continue + } + } + + selected.append(candidate) + selectedAnswers.formUnion(candidate.answers) + selectedUnusual.formUnion(candidate.unusualAnswers) + if selected.count == options.count { + break + } + } + + for candidate in selected { + print(candidate.path) + } + + fputs("Selected \(selected.count) of \(options.count) requested puzzles from \(candidates.count) candidates.\n", stderr) + fputs("Rejected \(rejectedForInternalRepeats) for answer usage > \(options.maxAnswerUsages); \(rejectedForAnswerRepeats) for repeated selected answers; \(rejectedForUnusualRepeats) for repeated unusual answers below score \(options.unusualScoreThreshold).\n", stderr) +} + +do { + try run() +} catch { + fputs("Pickmake: \(error)\n", stderr) + exit(1) +} diff --git a/Crossmake/Sources/Wordmake/main.swift b/Crossmake/Sources/Wordmake/main.swift @@ -91,6 +91,10 @@ struct AnswerEvidence { var obscureNameClueCount = 0 var fillBlankCount = 0 var foreignLanguageClueCount = 0 + var cluePartCount = 0 + var continuationClueCount = 0 + var themeClueCount = 0 + var latestSeen: String? var sampleClues: [String] = [] } @@ -99,17 +103,26 @@ struct WordmakeError: Error, CustomStringConvertible { } let badClueRegex = try! NSRegularExpression( - pattern: #"\b(abbr|var|prefix|suffix|archaic|poetic|dial|slang|obs|old-style|old fashioned|scot|irish|brit|fr|french|ger|german|lat|latin|span|spanish|ital|italian|port|portuguese|hebr|hebrew|yiddish|jap|japanese|inits|initials|letters|compass point|bearing|direction|shoe width|monogram|degs|degree|of yore|old|olden|heraldry|genus|legal|in law|anatomical|serf|serfs|slave|slaves|vassal|pilaster|opera voices|persian fairy|mideast|eastern v\.?i\.?p|arab prince|arab chieftain|hindu title|indian princess|rajah|dirk|knife of old|seed covering|seed casing|seed envelope|hip bones|pelvic bones|bitter vetch|marsh birds)\b"#, + pattern: #"\b(abbr|var|prefix|suffix|comb\.? form|archaic|poetic|dial|slang|obs|old-style|old fashioned|scot|irish|brit|fr|french|ger|german|lat|latin|span|spanish|ital|italian|port|portuguese|hebr|hebrew|yiddish|jap|japanese|inits|initials|letters|compass point|bearing|direction|shoe width|monogram|degs|degree|of yore|old|olden|heraldry|genus|legal|in law|anatomical|serf|serfs|slave|slaves|vassal|pilaster|opera voices|persian fairy|mideast|eastern v\.?i\.?p|arab prince|arab chieftain|hindu title|indian princess|rajah|dirk|knife of old|seed covering|seed casing|seed envelope|hip bones|pelvic bones|bitter vetch|marsh birds)\b"#, options: [.caseInsensitive] ) let obscureNameClueRegex = try! NSRegularExpression( - pattern: #"\b(actor|actress|author|composer|singer|poet|novelist|painter|artist|playwright|violinist|pianist|conductor|golfer|pitcher|baseballer|songwriter|politician|senator|governor)\b"#, + pattern: #"\b(actor|actress|author|composer|singer|poet|novelist|painter|artist|playwright|violinist|pianist|conductor|golfer|pitcher|baseballer|songwriter|politician|senator|governor|pope)\b"#, options: [.caseInsensitive] ) let fillBlankRegex = try! NSRegularExpression(pattern: #"_{2,}|\.{3,}"#) let foreignLanguageClueRegex = try! NSRegularExpression( pattern: #":\s*(?:Sp|Span|Fr|Ger|Germ|It|Ital|Lat|Port|Heb|Yid|Jap|Rus|Gr|Gk|Swed|Norw|Dan|Arab|Chin|Hind|Skt|Turk|Finn)\.|,\s+in\s+[A-Z][a-z]"# ) +let cluePartRegex = try! NSRegularExpression( + pattern: #"\b(?:part|section|segment|installment)\s+\d+\b|\b\d+(?:st|nd|rd|th)\s+(?:part|section|segment|installment)\b"#, + options: [.caseInsensitive] +) +let continuationClueRegex = try! NSRegularExpression( + pattern: #"\b(?:end|start|middle|continuation)\s+of\s+(?:the\s+)?(?:quote|quip|remark|saying|joke)\b|^see\s+\d+"#, + options: [.caseInsensitive] +) +let themeClueRegex = try! NSRegularExpression(pattern: #"^\s*\*"#) let clueNumberRegex = try! NSRegularExpression(pattern: #"^\s*\d+\.\s*"#) let datePathRegex = try! NSRegularExpression(pattern: #"(\d{4})/(\d{2})/(\d{2})\.json$"#) @@ -345,6 +358,20 @@ func weekdayForPath(_ url: URL) -> Int? { return (calendarWeekday + 5) % 7 } +func isoDateForPath(_ url: URL) -> String? { + let path = url.path.replacingOccurrences(of: "\\", with: "/") + let range = NSRange(path.startIndex..<path.endIndex, in: path) + guard let match = datePathRegex.firstMatch(in: path, range: range), + match.numberOfRanges == 4, + let yearRange = Range(match.range(at: 1), in: path), + let monthRange = Range(match.range(at: 2), in: path), + let dayRange = Range(match.range(at: 3), in: path) else { + return nil + } + + return "\(path[yearRange])-\(path[monthRange])-\(path[dayRange])" +} + func puzzleWeekday(_ puzzle: Any, path: URL) -> Int? { if let puzzle = puzzle as? [String: Any], let dow = puzzle["dow"] as? String, @@ -404,6 +431,103 @@ func clueRatio(_ part: Int, _ whole: Int) -> Double { Double(part) / Double(max(whole, 1)) } +func hasSequentialLetterRun(_ word: String, minimumLength: Int = 4) -> Bool { + let scalars = word.unicodeScalars.map(\.value) + guard scalars.count >= minimumLength else { + return false + } + + var runLength = 1 + for index in 1..<scalars.count { + if scalars[index] == scalars[index - 1] + 1 { + runLength += 1 + if runLength >= minimumLength { + return true + } + } else { + runLength = 1 + } + } + return false +} + +func isInitialSurnameArtifact(_ word: String) -> Bool { + let fusedNames: Set<String> = [ + "ed", "ek", "el", "em", "et", + "jk", "jl", "jr", + "kd", "kt", + "mj", + "oj", + "tj", + "wc" + ] + + return fusedNames.contains { word.hasPrefix($0) && word.count >= 6 } +} + +func isPhraseFragmentArtifact(_ word: String, fillBlankRatio: Double) -> Bool { + guard word.count >= 5 else { + return false + } + let prefixes = [ + "aan", "aand", "anold", "asof", "ated", "cani", + "cular", + "igota", "igot", "imeta", "imet", "ihave", "itisa", "itwas", + "weare", "youre" + ] + if prefixes.contains(where: { word.hasPrefix($0) }) { + return true + } + if word.count >= 10 { + return fillBlankRatio >= 0.8 + } + return fillBlankRatio >= 0.8 && word.count <= 7 +} + +func isVerySuspiciousOneOff(_ word: String, evidence: AnswerEvidence) -> Bool { + guard word.count >= 8, evidence.count == 1, evidence.latestSeen != nil else { + return false + } + if evidence.obscureNameClueCount > 0 { + return true + } + if word.hasSuffix("c") || word.hasSuffix("v") || word.hasSuffix("x") { + return true + } + return false +} + +func isStaleOneOffLongFill(_ word: String, evidence: AnswerEvidence) -> Bool { + guard word.count >= 8, + evidence.count == 1, + let latestSeen = evidence.latestSeen else { + return false + } + return latestSeen < "2000-01-01" +} + +func isPunnyOneOff(_ word: String, evidence: AnswerEvidence) -> Bool { + guard word.count >= 8, evidence.count == 1 else { + return false + } + return evidence.sampleClues.contains { $0.contains("?") } +} + +func stricterShortRequiredCount(_ word: String) -> Int? { + switch word.count { + case ...2: + return nil + case 3: + return 25 + case 4: + return 12 + case 5: + return 6 + default: + return nil + } +} + func easyRejectionReason(word: String, evidence: AnswerEvidence) -> String? { let length = word.count let count = evidence.count @@ -412,12 +536,78 @@ func easyRejectionReason(word: String, evidence: AnswerEvidence) -> String? { return nil } + if crosswordeseWords.contains(word) { + return "known crosswordese/glue fill" + } + + let badRatio = clueRatio(evidence.badClueCount, count) + let obscureNameRatio = clueRatio(evidence.obscureNameClueCount, count) + let fillBlankRatio = clueRatio(evidence.fillBlankCount, count) + let foreignLanguageRatio = clueRatio(evidence.foreignLanguageClueCount, count) + let cluePartRatio = clueRatio(evidence.cluePartCount, count) + if count < easyRequiredCount(word) { return "count \(count) below easy threshold \(easyRequiredCount(word))" } - if crosswordeseWords.contains(word) { - return "known crosswordese/glue fill" + if let stricterCount = stricterShortRequiredCount(word), + !commonShortWords.contains(word), + count < stricterCount { + return "short entry count \(count) below generated-fill threshold \(stricterCount)" + } + + if length <= 5 && count < 80 && evidence.badClueCount >= 2 && badRatio >= 0.35 { + return "short entry mostly clued as abbreviation/variant/form" + } + + if length <= 5 && + !commonShortWords.contains(word), + evidence.latestSeen.map({ $0 < "2000-01-01" }) == true { + return "stale short fill" + } + + if hasSequentialLetterRun(word) { + return "alphabet-run artifact" + } + + if length >= 5 && !hasVowel(word) { + return "long entry with no vowel" + } + + if isInitialSurnameArtifact(word) { + return "initial-plus-name artifact" + } + + if isPhraseFragmentArtifact(word, fillBlankRatio: fillBlankRatio) { + return "phrase fragment artifact" + } + + if evidence.themeClueCount > 0 && (count <= 2 || clueRatio(evidence.themeClueCount, count) >= 0.5) { + return "one-off theme clue artifact" + } + + if evidence.continuationClueCount > 0 && (count <= 3 || clueRatio(evidence.continuationClueCount, count) >= 0.5) { + return "cross-reference or quote-continuation artifact" + } + + if isStaleOneOffLongFill(word, evidence: evidence) { + return "stale one-off long fill" + } + + if isVerySuspiciousOneOff(word, evidence: evidence) { + return "one-off long artifact" + } + + if isPunnyOneOff(word, evidence: evidence) { + return "one-off pun/theme artifact" + } + + if length <= 5 && evidence.badClueCount >= 2 && badRatio >= 0.45 { + return "entry mostly clued as abbreviation/variant/form" + } + + if length <= 5 && evidence.obscureNameClueCount >= 2 && obscureNameRatio >= 0.5 { + return "short entry mostly clued as obscure name/title" } if length <= 4 && commonShortWords.contains(word) { @@ -436,10 +626,9 @@ func easyRejectionReason(word: String, evidence: AnswerEvidence) -> String? { return "short suffix-like fragment" } - let badRatio = clueRatio(evidence.badClueCount, count) - let obscureNameRatio = clueRatio(evidence.obscureNameClueCount, count) - let fillBlankRatio = clueRatio(evidence.fillBlankCount, count) - let foreignLanguageRatio = clueRatio(evidence.foreignLanguageClueCount, count) + if evidence.cluePartCount > 0 && (count <= 3 || cluePartRatio >= 0.5) { + return "entry mostly clued as one part of a longer answer" + } if length <= 5 && evidence.foreignLanguageClueCount >= 2 && @@ -502,10 +691,16 @@ func loadAnswerEvidence(inputURL: URL, options: Options) throws -> (evidence: [S guard shouldIncludePuzzle(puzzle, path: path, weekdays: options.weekdays) else { continue } + let latestSeen = isoDateForPath(path) filesUsed += 1 for (answer, clue) in normalizedEntries(from: puzzle) where shouldInclude(answer, minLength: options.minLength, maxLength: options.maxLength) { var entry = evidence[answer] ?? AnswerEvidence() entry.count += 1 + if let latestSeen { + if entry.latestSeen.map({ latestSeen > $0 }) ?? true { + entry.latestSeen = latestSeen + } + } if entry.sampleClues.count < 5 && !clue.isEmpty { entry.sampleClues.append(clue) } @@ -521,6 +716,15 @@ func loadAnswerEvidence(inputURL: URL, options: Options) throws -> (evidence: [S if regexMatches(foreignLanguageClueRegex, clue) { entry.foreignLanguageClueCount += 1 } + if regexMatches(cluePartRegex, clue) { + entry.cluePartCount += 1 + } + if regexMatches(continuationClueRegex, clue) { + entry.continuationClueCount += 1 + } + if regexMatches(themeClueRegex, clue) { + entry.themeClueCount += 1 + } evidence[answer] = entry } } catch { @@ -556,6 +760,10 @@ func evidenceObject(_ entry: AnswerEvidence, rejectionReason: String?) -> [Strin "obscure_name_clue_count": entry.obscureNameClueCount, "fill_blank_count": entry.fillBlankCount, "foreign_language_clue_count": entry.foreignLanguageClueCount, + "clue_part_count": entry.cluePartCount, + "continuation_clue_count": entry.continuationClueCount, + "theme_clue_count": entry.themeClueCount, + "latest_seen": entry.latestSeen as Any? ?? NSNull(), "sample_clues": entry.sampleClues, "rejection_reason": rejectionReason as Any? ?? NSNull() ] @@ -569,6 +777,10 @@ func rejectionObject(reason: String, entry: AnswerEvidence) -> [String: Any] { "obscure_name_clue_count": entry.obscureNameClueCount, "fill_blank_count": entry.fillBlankCount, "foreign_language_clue_count": entry.foreignLanguageClueCount, + "clue_part_count": entry.cluePartCount, + "continuation_clue_count": entry.continuationClueCount, + "theme_clue_count": entry.themeClueCount, + "latest_seen": entry.latestSeen as Any? ?? NSNull(), "sample_clues": entry.sampleClues ] }