generate_puzzles.sh (11227B)
1 #!/usr/bin/env bash 2 set -euo pipefail 3 4 SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" 5 CROSSMAKE_DIR="$(cd "${SCRIPT_DIR}/.." && pwd)" 6 cd "$CROSSMAKE_DIR" 7 8 DATA_DIR="Data" 9 GENERATED_DIR="Generated" 10 GRID_LIST="Sources/Fillmake/Resources/grid_list.json" 11 WORD_LIST="${GENERATED_DIR}/word_list.json" 12 COUNTS_FILE="${GENERATED_DIR}/answer_counts.json" 13 QUALITY_FILE="${GENERATED_DIR}/word_quality.json" 14 BAD_WORDS_FILE="${DATA_DIR}/bad_words.json" 15 TIMEOUT_SECONDS="${CROSSMAKE_TIMEOUT_SECONDS:-10}" 16 EXAMPLES_PER_GRID="${CROSSMAKE_EXAMPLES_PER_GRID:-4}" 17 OUTPUT_PREFIX="Crossmate" 18 MIN_FILL_SCORE="${CROSSMAKE_MIN_FILL_SCORE:-7500}" 19 BREADTH="${CROSSMAKE_BREADTH:-80}" 20 MAX_ANSWER_USAGES="${CROSSMAKE_MAX_ANSWER_USAGES:-1}" 21 MAX_PARALLEL_JOBS="${CROSSMAKE_JOBS:-}" 22 # Use CROSSMAKE_GRID_SELECTION=ranked to sample from Gridmake's top-ranked grids. 23 GRID_SELECTION="${CROSSMAKE_GRID_SELECTION:-random}" 24 GRID_RANK_LIMIT="${CROSSMAKE_GRID_RANK_LIMIT:-250}" 25 FIXED_GRID_INDEX="${CROSSMAKE_GRID_INDEX:-}" 26 FIXED_SEED="${CROSSMAKE_SEED:-}" 27 TARGET_PUZZLES="${1:-10}" 28 FILLMAKE_EXECUTABLE=".build/release/Fillmake" 29 GRIDMAKE_EXECUTABLE=".build/release/Gridmake" 30 31 if [[ ! -f "$WORD_LIST" ]]; then 32 echo "Word list not found: $WORD_LIST" >&2 33 exit 1 34 fi 35 36 if [[ ! -f "$GRID_LIST" ]]; then 37 echo "Grid list not found: $GRID_LIST" >&2 38 exit 1 39 fi 40 41 if [[ ! -f "$COUNTS_FILE" ]]; then 42 echo "Answer counts not found: $COUNTS_FILE" >&2 43 exit 1 44 fi 45 46 if [[ ! -f "$QUALITY_FILE" ]]; then 47 echo "Word quality metadata not found: $QUALITY_FILE" >&2 48 exit 1 49 fi 50 51 if [[ ! -f "$BAD_WORDS_FILE" ]]; then 52 echo "Bad-word list not found: $BAD_WORDS_FILE" >&2 53 exit 1 54 fi 55 56 if ! command -v jq >/dev/null 2>&1; then 57 echo "jq is required to read $GRID_LIST" >&2 58 exit 1 59 fi 60 61 if [[ -z "$MAX_PARALLEL_JOBS" ]]; then 62 MAX_PARALLEL_JOBS="$(sysctl -n hw.ncpu 2>/dev/null || getconf _NPROCESSORS_ONLN 2>/dev/null || echo 4)" 63 if ((MAX_PARALLEL_JOBS > 4)); then 64 MAX_PARALLEL_JOBS=4 65 fi 66 fi 67 68 if ! [[ "$MAX_PARALLEL_JOBS" =~ ^[0-9]+$ ]] || ((10#$MAX_PARALLEL_JOBS < 1)); then 69 echo "CROSSMAKE_JOBS must be a positive integer" >&2 70 exit 1 71 fi 72 73 if ! [[ "$TIMEOUT_SECONDS" =~ ^[0-9]+([.][0-9]+)?$ ]]; then 74 echo "CROSSMAKE_TIMEOUT_SECONDS must be a positive number" >&2 75 exit 1 76 fi 77 78 if ! [[ "$EXAMPLES_PER_GRID" =~ ^[0-9]+$ ]] || ((10#$EXAMPLES_PER_GRID < 1)); then 79 echo "CROSSMAKE_EXAMPLES_PER_GRID must be a positive integer" >&2 80 exit 1 81 fi 82 83 if ! [[ "$MIN_FILL_SCORE" =~ ^-?[0-9]+$ ]]; then 84 echo "CROSSMAKE_MIN_FILL_SCORE must be an integer" >&2 85 exit 1 86 fi 87 88 if ! [[ "$BREADTH" =~ ^[0-9]+$ ]] || ((10#$BREADTH < 1)); then 89 echo "CROSSMAKE_BREADTH must be a positive integer" >&2 90 exit 1 91 fi 92 93 if ! [[ "$MAX_ANSWER_USAGES" =~ ^[0-9]+$ ]] || ((10#$MAX_ANSWER_USAGES < 1)); then 94 echo "CROSSMAKE_MAX_ANSWER_USAGES must be a positive integer" >&2 95 exit 1 96 fi 97 98 if [[ "$GRID_SELECTION" != "ranked" && "$GRID_SELECTION" != "random" ]]; then 99 echo "CROSSMAKE_GRID_SELECTION must be ranked or random" >&2 100 exit 1 101 fi 102 103 if [[ -n "$FIXED_GRID_INDEX" ]] && ! [[ "$FIXED_GRID_INDEX" =~ ^[0-9]+$ ]]; then 104 echo "CROSSMAKE_GRID_INDEX must be a non-negative integer" >&2 105 exit 1 106 fi 107 108 if [[ -n "$FIXED_SEED" ]] && ! [[ "$FIXED_SEED" =~ ^[0-9]+$ ]]; then 109 echo "CROSSMAKE_SEED must be a non-negative integer" >&2 110 exit 1 111 fi 112 113 if ! [[ "$GRID_RANK_LIMIT" =~ ^[0-9]+$ ]] || ((10#$GRID_RANK_LIMIT < 1)); then 114 echo "CROSSMAKE_GRID_RANK_LIMIT must be a positive integer" >&2 115 exit 1 116 fi 117 118 if ! [[ "$TARGET_PUZZLES" =~ ^[0-9]+$ ]]; then 119 echo "Usage: $0 [positive-puzzle-count]" >&2 120 exit 1 121 fi 122 123 target_puzzle_count=$((10#$TARGET_PUZZLES)) 124 if ((target_puzzle_count < 1)); then 125 echo "Usage: $0 [positive-puzzle-count]" >&2 126 exit 1 127 fi 128 129 next_output_number() { 130 local highest=0 131 local path filename number number_value 132 133 for path in "${GENERATED_DIR}/${OUTPUT_PREFIX}-"*.xd; do 134 [[ -e "$path" ]] || continue 135 filename="${path##*/}" 136 [[ "$filename" =~ ^${OUTPUT_PREFIX}-([0-9]+)\.xd$ ]] || continue 137 number="${BASH_REMATCH[1]}" 138 number_value=$((10#$number)) 139 if ((number_value > highest)); then 140 highest="$number_value" 141 fi 142 done 143 144 echo "$((highest + 1))" 145 } 146 147 output_number="$(next_output_number)" 148 mkdir -p "$GENERATED_DIR" 149 tmp_dir="$(mktemp -d)" 150 trap 'rm -rf "$tmp_dir"' EXIT 151 152 echo "Building Fillmake in release mode" 153 swift build -c release 154 155 if [[ ! -x "$FILLMAKE_EXECUTABLE" ]]; then 156 echo "Fillmake executable not found after build: $FILLMAKE_EXECUTABLE" >&2 157 exit 1 158 fi 159 160 if [[ ! -x "$GRIDMAKE_EXECUTABLE" ]]; then 161 echo "Gridmake executable not found after build: $GRIDMAKE_EXECUTABLE" >&2 162 exit 1 163 fi 164 165 available_grid_count="$(jq 'length' "$GRID_LIST")" 166 grid_indices="$tmp_dir/grid-indices.txt" 167 168 if [[ -n "$FIXED_GRID_INDEX" ]]; then 169 if ((10#$FIXED_GRID_INDEX >= available_grid_count)); then 170 echo "CROSSMAKE_GRID_INDEX ${FIXED_GRID_INDEX} is out of range; found ${available_grid_count} grids" >&2 171 exit 1 172 fi 173 echo "$FIXED_GRID_INDEX" >"$grid_indices" 174 elif [[ "$GRID_SELECTION" == "ranked" ]]; then 175 "$GRIDMAKE_EXECUTABLE" --grids "$GRID_LIST" --limit "$GRID_RANK_LIMIT" \ 176 | awk 'NR > 1 { print $2 }' >"$grid_indices" 177 else 178 for ((grid_index = 0; grid_index < available_grid_count; grid_index++)); do 179 echo "$grid_index" 180 done >"$grid_indices" 181 fi 182 183 selection_grid_count="$(wc -l <"$grid_indices" | tr -d '[:space:]')" 184 if ((10#$selection_grid_count < 1)); then 185 echo "No grids available for selection" >&2 186 exit 1 187 fi 188 189 selected_indices=() 190 while IFS= read -r selected_index; do 191 selected_indices+=("$selected_index") 192 done <"$grid_indices" 193 194 random_grid_index() { 195 echo "${selected_indices[$((RANDOM % selection_grid_count))]}" 196 } 197 198 if [[ -n "$FIXED_GRID_INDEX" ]]; then 199 echo "Generating ${target_puzzle_count} puzzle(s) from fixed grid ${FIXED_GRID_INDEX} with ${EXAMPLES_PER_GRID} fill attempt(s) per puzzle, ${MAX_PARALLEL_JOBS} parallel job(s), timeout ${TIMEOUT_SECONDS}s per run, breadth ${BREADTH}, min score ${MIN_FILL_SCORE}, max answer usage ${MAX_ANSWER_USAGES}" 200 elif [[ "$GRID_SELECTION" == "ranked" ]]; then 201 echo "Generating ${target_puzzle_count} puzzle(s) from the top ${selection_grid_count} ranked grid(s) with ${EXAMPLES_PER_GRID} fill attempt(s) per puzzle, ${MAX_PARALLEL_JOBS} parallel job(s), timeout ${TIMEOUT_SECONDS}s per run, breadth ${BREADTH}, min score ${MIN_FILL_SCORE}, max answer usage ${MAX_ANSWER_USAGES}" 202 else 203 echo "Generating ${target_puzzle_count} puzzle(s) from ${selection_grid_count} random grid(s) with ${EXAMPLES_PER_GRID} fill attempt(s) per puzzle, ${MAX_PARALLEL_JOBS} parallel job(s), timeout ${TIMEOUT_SECONDS}s per run, breadth ${BREADTH}, min score ${MIN_FILL_SCORE}, max answer usage ${MAX_ANSWER_USAGES}" 204 fi 205 206 running_job_count() { 207 jobs -rp | wc -l | tr -d '[:space:]' 208 } 209 210 wait_for_job_slot() { 211 while ((10#$(running_job_count) >= 10#$MAX_PARALLEL_JOBS)); do 212 sleep 1 213 done 214 } 215 216 candidate_index=0 217 218 start_candidate() { 219 local group_id="$1" 220 local grid_index="$2" 221 local example="$3" 222 local candidate_id="$4" 223 local seed="$5" 224 local output_path="${tmp_dir}/candidate-${candidate_id}.xd" 225 local log_path="${tmp_dir}/candidate-${candidate_id}.log" 226 local ok_path="${tmp_dir}/candidate-${candidate_id}.ok" 227 local status_path="${tmp_dir}/candidate-${candidate_id}.status" 228 local group_path="${tmp_dir}/candidate-${candidate_id}.group" 229 230 candidate_outputs+=("$output_path") 231 echo "$group_id" >"$group_path" 232 echo "Generating candidate ${candidate_id} from grid index ${grid_index} (${example}/${EXAMPLES_PER_GRID}) with seed ${seed}" 233 234 ( 235 set +e 236 "$FILLMAKE_EXECUTABLE" \ 237 --words "$WORD_LIST" \ 238 --counts "$COUNTS_FILE" \ 239 --quality "$QUALITY_FILE" \ 240 --bad-words "$BAD_WORDS_FILE" \ 241 --grid-index "$grid_index" \ 242 --seed "$seed" \ 243 --timeout "$TIMEOUT_SECONDS" \ 244 --breadth "$BREADTH" \ 245 --optimize-fill \ 246 --min-fill-score "$MIN_FILL_SCORE" \ 247 --max-answer-usages "$MAX_ANSWER_USAGES" \ 248 --output "$output_path" >"$log_path" 2>&1 249 status="$?" 250 echo "$status" >"$status_path" 251 if [[ "$status" == "0" ]]; then 252 touch "$ok_path" 253 fi 254 ) & 255 } 256 257 accepted_count=0 258 rejected_count=0 259 discarded_count=0 260 attempted_group_count=0 261 262 fill_score_for_log() { 263 local log_path="$1" 264 local score 265 266 score="$( 267 awk ' 268 /kept best fill score/ { 269 value = $(NF) 270 gsub(/\./, "", value) 271 print value 272 } 273 /Selected fill score:/ { 274 print $NF 275 } 276 ' "$log_path" | tail -1 277 )" 278 279 if [[ -n "$score" ]]; then 280 echo "$score" 281 else 282 echo "-999999999" 283 fi 284 } 285 286 run_grid_group() { 287 local group_id="$1" 288 local grid_index="$2" 289 best_candidate_id="" 290 best_output_path="" 291 best_score="-999999999" 292 candidate_outputs=() 293 294 echo "Starting grid group ${group_id} from grid index ${grid_index}" 295 296 for ((example = 1; example <= EXAMPLES_PER_GRID; example++)); do 297 wait_for_job_slot 298 candidate_index=$((candidate_index + 1)) 299 if [[ -n "$FIXED_SEED" ]]; then 300 seed="$FIXED_SEED" 301 else 302 seed="$(($(date +%s) * 1000000 + candidate_index * 1000 + RANDOM))" 303 fi 304 start_candidate "$group_id" "$grid_index" "$example" "$candidate_index" "$seed" 305 done 306 307 wait 308 309 for output_path in "${candidate_outputs[@]}"; do 310 candidate_id="${output_path##*/candidate-}" 311 candidate_id="${candidate_id%.xd}" 312 group_path="${tmp_dir}/candidate-${candidate_id}.group" 313 ok_path="${tmp_dir}/candidate-${candidate_id}.ok" 314 log_path="${tmp_dir}/candidate-${candidate_id}.log" 315 316 [[ -f "$group_path" ]] || continue 317 [[ "$(cat "$group_path")" == "$group_id" ]] || continue 318 [[ -f "$ok_path" ]] || continue 319 320 score="$(fill_score_for_log "$log_path")" 321 if ((score > best_score)); then 322 best_score="$score" 323 best_candidate_id="$candidate_id" 324 best_output_path="$output_path" 325 fi 326 done 327 328 if [[ -n "$best_candidate_id" ]]; then 329 printf -v final_output_path "%s/%s-%04d.xd" "$GENERATED_DIR" "$OUTPUT_PREFIX" "$output_number" 330 mv "$best_output_path" "$final_output_path" 331 echo "Wrote ${final_output_path} from candidate ${best_candidate_id}" 332 grep -E "Searched .* seconds|Selected fill score:" "${tmp_dir}/candidate-${best_candidate_id}.log" || true 333 output_number=$((output_number + 1)) 334 accepted_count=$((accepted_count + 1)) 335 else 336 echo "No accepted fill for grid group ${group_id}." >&2 337 fi 338 339 for output_path in "${candidate_outputs[@]}"; do 340 candidate_id="${output_path##*/candidate-}" 341 candidate_id="${candidate_id%.xd}" 342 log_path="${tmp_dir}/candidate-${candidate_id}.log" 343 ok_path="${tmp_dir}/candidate-${candidate_id}.ok" 344 345 if [[ -f "$ok_path" ]]; then 346 if [[ -e "$output_path" ]]; then 347 discarded_count=$((discarded_count + 1)) 348 fi 349 else 350 rejected_count=$((rejected_count + 1)) 351 echo "Rejected candidate ${candidate_id}:" >&2 352 tail -3 "$log_path" >&2 || true 353 fi 354 done 355 } 356 357 while ((accepted_count < target_puzzle_count)); do 358 attempted_group_count=$((attempted_group_count + 1)) 359 run_grid_group "$attempted_group_count" "$(random_grid_index)" 360 done 361 362 echo "Accepted ${accepted_count} puzzle(s) after ${attempted_group_count} grid group(s); discarded ${discarded_count} lower-scoring accepted fill(s); rejected ${rejected_count}."