commit 7e2135a5d3e2604a0e49bd6ea80b40e4e4a2d8e4
parent d7967b2dabaae6be1fe23c5e7690df5671cbebf6
Author: Michael Camilleri <[email protected]>
Date: Mon, 25 May 2026 18:24:55 +0900
Defer engagement .live promotion until the channel opens
The offerer was transitioning to .live the moment host.acceptReply returned —
which is just RTCPeerConnection.setRemoteDescription(answer) and tells us
nothing about whether the data channel ever came up. If ICE failed to produce a
connectable candidate pair, the state machine sat in .live forever while
peer.channel.readyState stayed at 'connecting', and any send call then threw
'Engagement channel is not open' from the JS shim.
The post-SDP, awaiting-channel state already existed for the answerer side as
.replied; this commit renames it to .handshaking (so it reads correctly from
either end), routes acceptReply into it, and keeps channelOpened as the only
path to .live. sweepStaleHandshakes already covered the old .replied case, so a
stuck .handshaking demotes after handshakeTimeout (30s) and the next presence
tick retries from .idle. The coordinator log line changes from 'engagement:
live …' to 'engagement: reply accepted …, awaiting channel', which leaves a
visible gap until AppServices logs 'engagement: channel open …' — the absence
of that second line is now a diagnostic signal for ICE failures rather than
being masked by an optimistic state.
Co-Authored-By: Claude Opus 4.7 <[email protected]>
Diffstat:
2 files changed, 120 insertions(+), 16 deletions(-)
diff --git a/Crossmate/Sync/EngagementCoordinator.swift b/Crossmate/Sync/EngagementCoordinator.swift
@@ -148,7 +148,12 @@ actor EngagementCoordinator {
private enum State: Equatable {
case idle
case offered(peerAuthorID: String, engagementID: UUID, at: Date)
- case replied(peerAuthorID: String, engagementID: UUID, at: Date)
+ /// SDP exchange complete from this device's POV — the answerer has sent
+ /// its reply, or the offerer has accepted the reply — but the WebRTC
+ /// data channel hasn't fired `onChannelOpen` yet. Treated as
+ /// not-yet-live: sends are rejected, and the handshake-sweep can
+ /// still demote it back to idle if the channel never comes up.
+ case handshaking(peerAuthorID: String, engagementID: UUID, at: Date)
case live(peerAuthorID: String, engagementID: UUID)
var engagementID: UUID? {
@@ -156,7 +161,7 @@ actor EngagementCoordinator {
case .idle:
nil
case .offered(_, let engagementID, _),
- .replied(_, let engagementID, _),
+ .handshaking(_, let engagementID, _),
.live(_, let engagementID):
engagementID
}
@@ -220,13 +225,16 @@ actor EngagementCoordinator {
await createOffer(gameID: gameID, peerAuthorID: localAuthorID)
}
- /// Demotes any `.offered`/`.replied` state that has been waiting longer
- /// than `handshakeTimeout` back to `.idle`, tearing down the host's
- /// peer connection so the next presence tick can retry cleanly. The
- /// peer-side discard window (`hailMaxAge`, 120 s) is much longer than
- /// the local handshake budget, so by the time we demote, the peer has
- /// either already replied (and we'd be `.live` or `.replied`) or is
- /// unreachable for reasons that a retry might fix.
+ /// Demotes any `.offered`/`.handshaking` state that has been waiting
+ /// longer than `handshakeTimeout` back to `.idle`, tearing down the
+ /// host's peer connection so the next presence tick can retry cleanly.
+ /// The peer-side discard window (`hailMaxAge`, 120 s) is much longer
+ /// than the local handshake budget, so by the time we demote, the peer
+ /// has either already replied (and we'd be `.live` or `.handshaking`)
+ /// or is unreachable for reasons that a retry might fix. `.handshaking`
+ /// also catches the case where SDP exchanged successfully but ICE
+ /// never produced a connectable candidate pair, so `onChannelOpen`
+ /// never fired.
private func sweepStaleHandshakes() async {
let cutoff = now()
var demoted: [(gameID: UUID, engagementID: UUID, kind: String, age: TimeInterval)] = []
@@ -238,10 +246,10 @@ actor EngagementCoordinator {
demoted.append((gameID, engagementID, "offer", age))
states[gameID] = .idle
}
- case .replied(_, let engagementID, let at):
+ case .handshaking(_, let engagementID, let at):
let age = cutoff.timeIntervalSince(at)
if age > handshakeTimeout {
- demoted.append((gameID, engagementID, "reply", age))
+ demoted.append((gameID, engagementID, "handshake", age))
states[gameID] = .idle
}
case .idle, .live:
@@ -308,7 +316,7 @@ actor EngagementCoordinator {
case .idle:
return nil
case .offered(let peerAuthorID, _, _),
- .replied(let peerAuthorID, _, _),
+ .handshaking(let peerAuthorID, _, _),
.live(let peerAuthorID, _):
states[gameID] = .live(peerAuthorID: peerAuthorID, engagementID: engagementID)
return gameID
@@ -397,7 +405,7 @@ actor EngagementCoordinator {
"with offer \(payload.engagementID.uuidString)"
)
}
- states[ping.gameID] = .replied(peerAuthorID: ping.authorID, engagementID: payload.engagementID, at: now())
+ states[ping.gameID] = .handshaking(peerAuthorID: ping.authorID, engagementID: payload.engagementID, at: now())
do {
let reply = try await host.acceptOfferAndReply(
engagementID: payload.engagementID,
@@ -433,9 +441,13 @@ actor EngagementCoordinator {
engagementID: payload.engagementID,
signal: payload.signal
)
- states[ping.gameID] = .live(peerAuthorID: ping.authorID, engagementID: payload.engagementID)
+ states[ping.gameID] = .handshaking(
+ peerAuthorID: ping.authorID,
+ engagementID: payload.engagementID,
+ at: now()
+ )
await deletePing(ping.recordName, ping.gameID)
- await log("engagement: live \(payload.engagementID.uuidString)")
+ await log("engagement: reply accepted \(payload.engagementID.uuidString), awaiting channel")
} catch {
states[ping.gameID] = .idle
await log("engagement: accept reply failed for \(ping.gameID.uuidString): \(error.localizedDescription)")
diff --git a/Tests/Unit/Sync/EngagementCoordinatorTests.swift b/Tests/Unit/Sync/EngagementCoordinatorTests.swift
@@ -526,6 +526,7 @@ struct EngagementCoordinatorTests {
payload: try reply.encoded(),
addressee: "alice:deviceA"
))
+ #expect(await coordinator.channelOpened(engagementID: offer.engagementID) == gameID)
await coordinator.sendDebugMessage(gameID: gameID, text: "debug hello")
@@ -567,6 +568,7 @@ struct EngagementCoordinatorTests {
payload: try reply.encoded(),
addressee: "alice:deviceA"
))
+ #expect(await coordinator.channelOpened(engagementID: offer.engagementID) == gameID)
let selection = EngagementSelectionUpdate(
gameID: gameID,
authorID: "alice",
@@ -582,6 +584,96 @@ struct EngagementCoordinatorTests {
#expect(EngagementMessage.decode(try #require(host.sentMessages.first?.message))?.selection == selection)
}
+ @Test("reply alone does not enable sends until the channel opens")
+ @MainActor
+ func sendIsSkippedUntilChannelOpens() async throws {
+ let gameID = UUID(uuidString: "A2A2A2A2-A2A2-A2A2-A2A2-A2A2A2A2A2A2")!
+ let host = MockEngagementHost()
+ let sink = EngagementCoordinatorTestSink()
+ let coordinator = EngagementCoordinator(
+ host: host,
+ localAuthorID: { "alice" },
+ localDeviceID: "deviceA",
+ presentPeers: { _ in [gameID: ["bob"]] },
+ sendHail: { gameID, payload, addressee in
+ await sink.send(gameID: gameID, payload: payload, addressee: addressee)
+ },
+ deletePing: { recordName, gameID in
+ await sink.delete(recordName: recordName, gameID: gameID)
+ }
+ )
+ await coordinator.peerPresenceMayHaveChanged(gameIDs: [gameID])
+ let offer = try #require(EngagementHailPayload.decode(await sink.sentHails().first?.payload))
+ let reply = EngagementHailPayload(
+ role: .reply,
+ engagementID: offer.engagementID,
+ signal: EngagementSignal(sdp: "reply-sdp", candidates: [])
+ )
+ await coordinator.handle(ping(
+ recordName: "reply-record",
+ gameID: gameID,
+ authorID: "bob",
+ deviceID: "deviceB",
+ payload: try reply.encoded(),
+ addressee: "alice:deviceA"
+ ))
+
+ await coordinator.sendDebugMessage(gameID: gameID, text: "too early")
+ #expect(host.sentMessages.isEmpty)
+
+ #expect(await coordinator.channelOpened(engagementID: offer.engagementID) == gameID)
+ await coordinator.sendDebugMessage(gameID: gameID, text: "now ok")
+ #expect(host.sentMessages.count == 1)
+ }
+
+ @Test("handshaking state demotes to idle if the channel never opens")
+ @MainActor
+ func staleHandshakingDemotesOnPresenceTick() async throws {
+ let gameID = UUID(uuidString: "C0FFEE00-0000-0000-0000-000000000005")!
+ let host = MockEngagementHost()
+ let sink = EngagementCoordinatorTestSink()
+ let clock = TestClock(time: Date(timeIntervalSince1970: 10_000))
+ let coordinator = EngagementCoordinator(
+ host: host,
+ localAuthorID: { "alice" },
+ localDeviceID: "deviceA",
+ presentPeers: { _ in [gameID: ["bob"]] },
+ sendHail: { gameID, payload, addressee in
+ await sink.send(gameID: gameID, payload: payload, addressee: addressee)
+ },
+ deletePing: { recordName, gameID in
+ await sink.delete(recordName: recordName, gameID: gameID)
+ },
+ now: { clock.now },
+ handshakeTimeout: 30
+ )
+ await coordinator.peerPresenceMayHaveChanged(gameIDs: [gameID])
+ let firstOffer = try #require(EngagementHailPayload.decode(await sink.sentHails().first?.payload))
+ let reply = EngagementHailPayload(
+ role: .reply,
+ engagementID: firstOffer.engagementID,
+ signal: EngagementSignal(sdp: "reply-sdp", candidates: [])
+ )
+ await coordinator.handle(ping(
+ recordName: "reply-record",
+ gameID: gameID,
+ authorID: "bob",
+ deviceID: "deviceB",
+ payload: try reply.encoded(),
+ addressee: "alice:deviceA"
+ ))
+
+ clock.advance(by: 31)
+ await coordinator.peerPresenceMayHaveChanged(gameIDs: [gameID])
+
+ #expect(host.tornDown == [firstOffer.engagementID])
+ let sent = await sink.sentHails()
+ #expect(sent.count == 2)
+ let secondOffer = try #require(EngagementHailPayload.decode(sent.last?.payload))
+ #expect(secondOffer.role == .offer)
+ #expect(secondOffer.engagementID != firstOffer.engagementID)
+ }
+
@Test("closed engagement can re-offer when peer remains present")
@MainActor
func channelCloseAllowsReconnectOffer() async throws {
@@ -705,7 +797,7 @@ struct EngagementCoordinatorTests {
let clock = TestClock(time: Date(timeIntervalSince1970: 10_000))
let coordinator = EngagementCoordinator(
host: host,
- // Local author is bob so an incoming offer from alice lands us in `.replied`
+ // Local author is bob so an incoming offer from alice lands us in `.handshaking`
// (peerPresenceMayHaveChanged would not initiate because alice < bob).
localAuthorID: { "bob" },
localDeviceID: "deviceB",