commit b34d2e41ba6e9192a8c9d505cc10889fe1a1b47f
parent a29c0bf6e330c106992664e4265766459c322ac2
Author: Michael Camilleri <[email protected]>
Date: Wed, 10 Jun 2026 13:05:33 +0900
Trace inbound-apply save failures to the diagnostics log
A Core Data failure while applying fetched records is the worst kind of
sync error: CKSyncEngine advances its change token when the delegate
returns, so the records are gone from the engine's to-deliver set and a
dropped save never redelivers. The fetch-side handlers reported these
failures with print(), which reaches the Xcode console and nothing else
-- in Production the on-device diagnostics log is the only observability,
so the one error class that silently loses data was invisible in exactly
the logs collected from testers' devices.
This commit routes those failures through the tracer, using the shape
the send path already established: messages accumulate inside the
performAndWait batch context (which cannot await) and are traced once it
unwinds.
- BatchEffects gains a traces accumulator. The record-zone fetch
handler and the direct-push apply path append their ctx.save
failures to it and trace the batch's messages afterwards.
- replayCellCache returns its fetch-failure messages instead of
printing them; both callers fold the result into the batch's traces.
The print-only logSyncError helper becomes the syncErrorMessage
formatter.
- handleFetchedDatabaseChanges and applyZoneOrphaning return failure
messages from their performAndWait tuples and trace them after,
covering the placeholder-row / revocation-flag saves and the
orphaned-zone teardown the same way.
The engine-state persistence paths (saveEngineState, resetSyncState)
keep their console fallback: they run in synchronous contexts where the
tracer is not reachable, and they do not drop fetched records.
Co-Authored-By: Claude Fable 5 <[email protected]>
Diffstat:
2 files changed, 61 insertions(+), 32 deletions(-)
diff --git a/Crossmate/Sync/RecordApplier.swift b/Crossmate/Sync/RecordApplier.swift
@@ -38,6 +38,14 @@ struct BatchEffects {
/// address (see `RecordSerializer.deriveGameAddress`); the version gates
/// adoption so a stale inbound copy can't undo a rotation.
var accountPushSecrets: [(secret: String, version: Int64)] = []
+ /// Diagnostics emitted while applying the batch inside `performAndWait` —
+ /// chiefly Core Data fetch/save failures, which silently drop records (the
+ /// engine's change token has already advanced, so they never redeliver).
+ /// The batch context can't `await`, so messages accumulate here and the
+ /// caller traces them afterwards — the same shape as the send path's
+ /// failure messages. A bare `print` is invisible in Production, where the
+ /// on-device diagnostics log is the only observability.
+ var traces: [String] = []
}
extension SyncEngine {
@@ -105,15 +113,15 @@ extension SyncEngine {
}
}
for gameID in effects.movesUpdated {
- self.replayCellCache(for: gameID, in: ctx)
+ effects.traces += self.replayCellCache(for: gameID, in: ctx)
}
if ctx.hasChanges {
do {
try ctx.save()
} catch {
let nsError = error as NSError
- print(
- "SyncEngine: direct-push ctx.save failed " +
+ effects.traces.append(
+ "direct-push ctx.save FAILED " +
"— domain=\(nsError.domain) code=\(nsError.code) " +
"\(nsError.localizedDescription)"
)
@@ -122,6 +130,9 @@ extension SyncEngine {
return effects
}
+ for message in effects.traces {
+ await trace(message)
+ }
if let onRemoteMovesUpdated, !effects.movesUpdated.isEmpty {
await onRemoteMovesUpdated(effects.movesUpdated)
}
@@ -299,11 +310,14 @@ extension SyncEngine {
/// Merges every device's `MovesEntity` row for `gameID` and reconciles the
/// `CellEntity` cache against the resulting grid. Must be called inside a
- /// `performAndWait` block on the same context.
+ /// `performAndWait` block on the same context. Returns diagnostic messages
+ /// for any fetch failure (normally empty) — the caller folds them into
+ /// `BatchEffects.traces` so they reach the diagnostics log once the batch
+ /// context unwinds.
nonisolated func replayCellCache(
for gameID: UUID,
in ctx: NSManagedObjectContext
- ) {
+ ) -> [String] {
let gameReq = NSFetchRequest<GameEntity>(entityName: "GameEntity")
gameReq.predicate = NSPredicate(format: "id == %@", gameID as CVarArg)
gameReq.fetchLimit = 1
@@ -313,12 +327,11 @@ extension SyncEngine {
} catch {
// CKSyncEngine commits the batch when the delegate returns
// (see fetchedRecordZoneChanges save-failure note), so re-throwing
- // won't redeliver — surface the failure to console instead of
- // silently leaving the cell cache stale for this game.
- Self.logSyncError("replayCellCache game fetch", gameID: gameID, error: error)
- return
+ // won't redeliver — surface the failure instead of silently
+ // leaving the cell cache stale for this game.
+ return [Self.syncErrorMessage("replayCellCache game fetch", gameID: gameID, error: error)]
}
- guard let game else { return }
+ guard let game else { return [] }
let movesReq = NSFetchRequest<MovesEntity>(entityName: "MovesEntity")
movesReq.predicate = NSPredicate(format: "game == %@", game)
@@ -326,8 +339,7 @@ extension SyncEngine {
do {
movesEntities = try ctx.fetch(movesReq)
} catch {
- Self.logSyncError("replayCellCache moves fetch", gameID: gameID, error: error)
- return
+ return [Self.syncErrorMessage("replayCellCache moves fetch", gameID: gameID, error: error)]
}
let values: [MovesValue] = movesEntities.compactMap { Self.movesValue(from: $0) }
let gridState = GridStateMerger.merge(values)
@@ -358,6 +370,7 @@ extension SyncEngine {
cell.markCode = 0
cell.letterAuthorID = nil
}
+ return []
}
/// Hydrates a `MovesValue` from a `MovesEntity`. Returns `nil` if the row
@@ -379,17 +392,16 @@ extension SyncEngine {
)
}
- /// Sync-context error surfacing — mirrors the `print(...)` format used by
- /// the surrounding save-failure handlers. The engine's change token has
- /// already advanced by the time these helpers run inside performAndWait,
- /// so the only available remediation is making the drop visible.
- nonisolated static func logSyncError(_ label: String, gameID: UUID, error: Error) {
+ /// Formats a sync-context fetch/save failure for the diagnostics log. The
+ /// engine's change token has already advanced by the time these helpers
+ /// run inside performAndWait, so the only available remediation is making
+ /// the drop visible — and visible means traced (the on-device diagnostics
+ /// log), not printed: console output never reaches a collected log.
+ nonisolated static func syncErrorMessage(_ label: String, gameID: UUID, error: Error) -> String {
let nsError = error as NSError
- print(
- "SyncEngine: \(label) failed for \(gameID.uuidString) " +
+ return "\(label) FAILED for \(gameID.uuidString) " +
"— domain=\(nsError.domain) code=\(nsError.code) " +
"\(nsError.localizedDescription)"
- )
}
/// Applies an inbound `Archive` record. Inert while a live (non-revoked)
diff --git a/Crossmate/Sync/SyncEngine.swift b/Crossmate/Sync/SyncEngine.swift
@@ -1235,10 +1235,12 @@ actor SyncEngine {
// create placeholder GameEntities for newly-joined shares.
let ctx = persistence.container.newBackgroundContext()
ctx.mergePolicy = NSMergePolicy.mergeByPropertyObjectTrump
- let (removedIDs, revokedIDs, rejoinedIDs): ([UUID], [UUID], [UUID]) = ctx.performAndWait {
+ let (removedIDs, revokedIDs, rejoinedIDs, failureMessages):
+ ([UUID], [UUID], [UUID], [String]) = ctx.performAndWait {
var removed: [UUID] = []
var revoked: [UUID] = []
var rejoined: [UUID] = []
+ var messages: [String] = []
if !isPrivate {
for mod in event.modifications {
let zoneID = mod.zoneID
@@ -1290,16 +1292,22 @@ actor SyncEngine {
do {
try ctx.save()
} catch {
+ // A dropped save here loses placeholder rows / revocation
+ // flags with no redelivery (the change token advances on
+ // return) — trace it so the diagnostics log shows the drop.
let nsError = error as NSError
- print(
- "SyncEngine: db-changes ctx.save failed — domain=\(nsError.domain) " +
+ messages.append(
+ "db-changes ctx.save FAILED — domain=\(nsError.domain) " +
"code=\(nsError.code) \(nsError.localizedDescription)"
)
}
}
- return (removed, revoked, rejoined)
+ return (removed, revoked, rejoined, messages)
}
+ for message in failureMessages {
+ await trace(message)
+ }
for id in removedIDs {
if let cb = onGameRemoved { await cb(id) }
}
@@ -1433,20 +1441,22 @@ actor SyncEngine {
}
}
for gameID in effects.movesUpdated {
- self.replayCellCache(for: gameID, in: ctx)
+ effects.traces += self.replayCellCache(for: gameID, in: ctx)
}
// CKSyncEngine advances its change token whenever the delegate
// returns from fetchedRecordZoneChanges, regardless of whether we
// persisted anything. A silent failure here means the records are
// gone from the engine's "to deliver" set — they won't come back
- // without a `resetSyncState`. Surface failures so we can act.
+ // without a `resetSyncState`. Surface failures so we can act —
+ // through the tracer, which is the only channel that reaches the
+ // on-device diagnostics log this project debugs Production from.
if ctx.hasChanges {
do {
try ctx.save()
} catch {
let nsError = error as NSError
- print(
- "SyncEngine: fetchedRecordZoneChanges ctx.save failed " +
+ effects.traces.append(
+ "fetchedRecordZoneChanges ctx.save FAILED " +
"— domain=\(nsError.domain) code=\(nsError.code) " +
"\(nsError.localizedDescription)"
)
@@ -1455,6 +1465,9 @@ actor SyncEngine {
return effects
}
+ for message in effects.traces {
+ await trace(message)
+ }
if let onRemoteMovesUpdated, !effects.movesUpdated.isEmpty {
await onRemoteMovesUpdated(effects.movesUpdated)
}
@@ -1826,9 +1839,10 @@ actor SyncEngine {
let ctx = persistence.container.newBackgroundContext()
ctx.mergePolicy = NSMergePolicy.mergeByPropertyObjectTrump
- let (removedIDs, revokedIDs): ([UUID], [UUID]) = ctx.performAndWait {
+ let (removedIDs, revokedIDs, failureMessages): ([UUID], [UUID], [String]) = ctx.performAndWait {
var removed: [UUID] = []
var revoked: [UUID] = []
+ var messages: [String] = []
for zone in zones {
let zoneName = zone.zoneName
guard zoneName.hasPrefix("game-") else { continue }
@@ -1851,15 +1865,18 @@ actor SyncEngine {
try ctx.save()
} catch {
let nsError = error as NSError
- print(
- "SyncEngine: orphan-zone ctx.save failed — domain=\(nsError.domain) " +
+ messages.append(
+ "orphan-zone ctx.save FAILED — domain=\(nsError.domain) " +
"code=\(nsError.code) \(nsError.localizedDescription)"
)
}
}
- return (removed, revoked)
+ return (removed, revoked, messages)
}
+ for message in failureMessages {
+ await trace(message)
+ }
await trace(
"\(isPrivate ? "private" : "shared") orphaned \(zones.count) zone(s) on send: " +
zones.map(\.zoneName).sorted().joined(separator: ", ")