diff --git a/lib/src/cef_web_controller.dart b/lib/src/cef_web_controller.dart index d0c763c..47f5882 100644 --- a/lib/src/cef_web_controller.dart +++ b/lib/src/cef_web_controller.dart @@ -108,6 +108,12 @@ class CefWebController { /// `"crashed"` for a generic process death. void Function(String reason)? onProcessGone; + /// C1: the browser was created but never painted its first frame, even after the + /// native host re-kicked a repaint. The texture is (still) blank with no other + /// signal — the consumer can use this to recover (e.g. recreate the view) rather + /// than leaving a permanently blank tile. + VoidCallback? onPaintStalled; + /// The caret rect (view-local logical px) of the active IME composition. /// Wired by [CefWebView] to position the OS candidate window under the text; /// you generally don't set this yourself. @@ -234,10 +240,17 @@ class CefWebController { )); break; case 'processGone': - // The native host dropped this session (crash or cache-lock loss). The - // texture is dead; let the consumer react (show a reload affordance). + // The native host dropped this session (crash, cache-lock loss, or a + // create that failed — reason 'createFailed'). The texture is dead; let the + // consumer react (show a reload affordance / recreate). onProcessGone?.call(a['reason'] as String? ?? 'crashed'); break; + case 'paintStalled': + // C1: the browser came up but never delivered its first frame even after a + // re-kick — the texture is (still) blank with no other signal. Surface it so + // the consumer can recover (e.g. recreate the view) instead of a silent blank. + onPaintStalled?.call(); + break; } } diff --git a/packages/flutter_cef_macos/macos/Classes/CdpRelay.swift b/packages/flutter_cef_macos/macos/Classes/CdpRelay.swift index fa325e9..181f123 100644 --- a/packages/flutter_cef_macos/macos/Classes/CdpRelay.swift +++ b/packages/flutter_cef_macos/macos/Classes/CdpRelay.swift @@ -95,6 +95,22 @@ final class CdpRelay { private var pipeIdToClientId: [Int: Int] = [:] private var nextLocalId = 0 private let multiplexLock = NSLock() + // H2: this relay's OWN Target.attachToTarget pipe id (used to learn our page's CDP + // session order-independently, instead of passively witnessing a fire-once + // browser-wide attachedToTarget event we may register too late to see), plus the + // client setAutoAttach ids to ack once we've attached. multiplexLock. + // + // The relay OUTLIVES a client connection (it is browser-keyed, freed only on + // agent-control toggle / browser disposal). So this state is reset on client detach + // and the in-flight attach is stamped with the client GENERATION that issued it — a + // late self-attach response from a departed client must NOT be written to whatever + // client connected next (stale ack / spurious event). pendingAutoAttachClientIds is a + // LIST so a second browser-level setAutoAttach issued before the first resolves is + // queued (one attachToTarget in flight) and acked too, instead of orphaning the first. + private var selfAttachPipeId: Int? + private var pendingAutoAttachClientIds: [Int] = [] + private var attachClientGeneration = 0 // the clientGeneration that issued the in-flight attach + private var clientGeneration = 0 // bumped on each client detach init(sendToPipe: @escaping (String) -> Void, scopeTargetId: String? = nil, relayId: Int = 0) { self.sendToPipe = sendToPipe @@ -263,6 +279,10 @@ final class CdpRelay { } clientFd = fd clientLock.unlock() + // H2: stamp a fresh client identity. A self-attach response still in flight from a + // PRIOR client connection captured the old generation, so handleSelfAttachResponse + // will refuse to deliver its synthesized event / ack to THIS connection. + multiplexLock.lock(); clientGeneration &+= 1; multiplexLock.unlock() let accept = Data((key + CdpRelay.wsGUID).utf8) let digest = Insecure.SHA1.hash(data: accept) @@ -284,6 +304,16 @@ final class CdpRelay { if owned { clientFd = -1 } clientLock.unlock() if owned { close(fd) } + // H2: the relay persists past this client — drop the in-flight attach so a late + // self-attach response isn't delivered to the next client (a stale ack / spurious + // attachedToTarget). The generation is bumped at the NEXT connect, so even a + // response that races this reset is gated by attachClientGeneration. ourSessionId/ + // allowedSessions stay (the page is unchanged; the next client re-issues + // setAutoAttach and we synthesize from the known session). + multiplexLock.lock() + selfAttachPipeId = nil + pendingAutoAttachClientIds.removeAll() + multiplexLock.unlock() dlog("[cef][relay] client detached") } @@ -459,6 +489,10 @@ final class CdpRelay { func demuxPipeToClient(_ json: String) -> String? { if scopeTargetId != nil, let m = parseJson(json), m["method"] == nil, let pipeId = m["id"] as? Int { + // H2: our OWN Target.attachToTarget response — learn the page session + hand the + // client the synthesized attachedToTarget; never forward the raw response. + multiplexLock.lock(); let isSelfAttach = (pipeId == selfAttachPipeId); multiplexLock.unlock() + if isSelfAttach { handleSelfAttachResponse(m); return nil } multiplexLock.lock(); let clientId = pipeIdToClientId.removeValue(forKey: pipeId); multiplexLock.unlock() guard let clientId = clientId else { return nil } // sibling relay's response — drop var restored = m; restored["id"] = clientId @@ -549,10 +583,14 @@ final class CdpRelay { case "Target.attachedToTarget": let attachedTid = (params?["targetInfo"] as? [String: Any])?["targetId"] as? String let childSession = params?["sessionId"] as? String - if sid == nil { // browser-level auto-attach of a top-level target (a tile) + if sid == nil { // browser-level attach of a top-level target (a tile) guard attachedTid == tid else { return nil } // sibling tile — hide + // H2: learn our page session, but DON'T forward the raw browser-level event — + // we hand the client exactly one SYNTHESIZED attachedToTarget (beginPageAttach / + // handleSelfAttachResponse), so a real one our active attach may have triggered + // isn't delivered as a duplicate. if let cs = childSession { allowedSessions.insert(cs); ourSessionId = cs } - return json + return nil } guard let s = sid, allowedSessions.contains(s) else { return nil } // sub-target of ours if let cs = childSession { allowedSessions.insert(cs) } @@ -629,7 +667,17 @@ final class CdpRelay { guard (params?["flatten"] as? Bool) == true else { sendClientError(id, "non-flatten setAutoAttach is not permitted"); return nil } - return json + // H2: a BROWSER-LEVEL setAutoAttach (no sessionId — reached here because the + // sessionId branch above didn't claim it) is browser-context-wide. Forwarding + // it (a) lets us change a SIBLING tile's auto-attach params (cross-tile control + // leak) and (b) relies on a fire-once attachedToTarget storm we'll miss if we + // registered after a sibling already triggered it ("No page found"). Instead we + // don't forward it: we actively attach to OUR target and synthesize the page's + // attachedToTarget to the client ourselves (order-independent + scoped). A + // SESSION-scoped setAutoAttach for our page's sub-frames (sid != nil, our + // session) is forwarded by the session-routed branch above. + beginPageAttach(clientAutoAttachId: id) + return nil case "Target.attachToTarget": guard qTid == scopeTargetId else { sendClientError(id, "No target with given id found"); return nil } guard (params?["flatten"] as? Bool) == true else { @@ -687,6 +735,72 @@ final class CdpRelay { sendClientJson(["id": id, "result": ["targetInfos": [info]]]) } + /// H2: ensure this relay knows its page's CDP session, then hand the client the page's + /// Target.attachedToTarget directly — independent of the browser-wide auto-attach + /// storm (fire-once, and which we must not forward: it would change sibling tiles' + /// auto-attach). If we already learned our session, synthesize now; otherwise issue + /// our OWN scoped Target.attachToTarget and finish on its response. attachToTarget on + /// an already-attached target idempotently returns the existing sessionId, so this + /// resolves regardless of relay creation order (the "No page found" fix). + private func beginPageAttach(clientAutoAttachId: Int?) { + filterLock.lock(); let known = ourSessionId; filterLock.unlock() + if let s = known { + synthesizeAttachedToTarget(sessionId: s) + synthesizeOk(clientAutoAttachId) + return + } + guard let tid = scopeTargetId else { synthesizeOk(clientAutoAttachId); return } + multiplexLock.lock() + if let ack = clientAutoAttachId { pendingAutoAttachClientIds.append(ack) } + // Only ONE self-attach in flight: a second setAutoAttach arriving before the first + // resolves just queues its ack above (the single attachToTarget resolves the session + // for both); issuing a second would leak its pipeId mapping and lose an ack. + guard selfAttachPipeId == nil else { multiplexLock.unlock(); return } + let pipeId = (relayId << 21) | (nextLocalId & 0x1FFFFF) + nextLocalId &+= 1 + selfAttachPipeId = pipeId + attachClientGeneration = clientGeneration + multiplexLock.unlock() + let cmd: [String: Any] = ["id": pipeId, "method": "Target.attachToTarget", + "params": ["targetId": tid, "flatten": true]] + if let s = jsonString(cmd) { sendToPipe(s) } + } + + /// H2: our scoped attachToTarget came back — record the page session, and (if the + /// client that issued it is still attached) hand it the synthesized attachedToTarget + + /// ack every queued setAutoAttach. If that client has since detached + /// (clientGeneration moved on), learn the session but write NOTHING — otherwise we'd + /// deliver a stale ack / spurious event to whatever client connected next. + private func handleSelfAttachResponse(_ m: [String: Any]) { + let sessionId = (m["result"] as? [String: Any])?["sessionId"] as? String + multiplexLock.lock() + selfAttachPipeId = nil + let acks = pendingAutoAttachClientIds + pendingAutoAttachClientIds.removeAll() + let sameClient = (attachClientGeneration == clientGeneration) + multiplexLock.unlock() + if let s = sessionId { + filterLock.lock(); allowedSessions.insert(s); ourSessionId = s; filterLock.unlock() + guard sameClient else { return } // issuing client gone — don't write to its successor + synthesizeAttachedToTarget(sessionId: s) + } else if !sameClient { + return + } + for ack in acks { synthesizeOk(ack) } + } + + /// H2: fabricate the page's Target.attachedToTarget for the client (flatten mode) so + /// Playwright/connectOverCDP discovers our page without us forwarding the browser-wide + /// auto-attach — mirrors synthesizeGetTargets' single-tile view. + private func synthesizeAttachedToTarget(sessionId: String) { + guard let tid = scopeTargetId else { return } + let info: [String: Any] = ["targetId": tid, "type": "page", "title": "", "url": "", + "attached": true, "canAccessOpener": false, "browserContextId": ""] + sendClientJson(["method": "Target.attachedToTarget", + "params": ["sessionId": sessionId, "targetInfo": info, + "waitingForDebugger": false]]) + } + /// Send a CDP error reply to the client. Built via JSONSerialization so the message /// can't break the frame. No-op without an id (a notification has nothing to error). private func sendClientError(_ id: Int?, _ message: String) { diff --git a/packages/flutter_cef_macos/macos/Classes/CefProfileHost.swift b/packages/flutter_cef_macos/macos/Classes/CefProfileHost.swift index 1ee304e..74332a1 100644 --- a/packages/flutter_cef_macos/macos/Classes/CefProfileHost.swift +++ b/packages/flutter_cef_macos/macos/Classes/CefProfileHost.swift @@ -26,6 +26,11 @@ final class CefProfileHost { static let opResize: UInt8 = 0x11 static let opTargetId: UInt8 = 0x1b // cef_host -> us: a browser's CDP targetId (CEF-2b) static let opResolveTargetId: UInt8 = 0x36 // us -> cef_host: resolve this browser's CDP targetId + static let opPresent: UInt8 = 0x01 // cef_host -> us: a browser painted a frame (C1 watchdog peek) + static let opCreated: UInt8 = 0x1c // cef_host -> us: OnAfterCreated — advance the create pacer (H3) + static let opCreateFailed: UInt8 = 0x1d // cef_host -> us: create dispatch failed — drop the session (H7) + static let opInvalidate: UInt8 = 0x37 // us -> cef_host: force a repaint to re-kick a stalled first frame (C1) + static let opSetVisible: UInt8 = 0x35 // us -> cef_host: WasHidden(!visible); peeked to make the C1 watchdog visibility-aware // Profile identity / config. let profileId: String @@ -122,6 +127,31 @@ final class CefProfileHost { private var adhocHost = false // host reported a mock-keychain (ad-hoc) build private var createEnqueued: Set = [] // browserIds whose create has been sent + // Per-host create pacing (guarded by writeLock). A BURST of opCreateBrowser frames + // would otherwise make cef_host run a pile of browser creates concurrently, contending + // the one shared GPU/Viz accelerated-surface handshake so later browsers get NO surface + // and never paint. So we send creates ONE AT A TIME and advance only when cef_host acks + // the create (H3: opCreated, off OnAfterCreated) — serialized by COMPLETION, not a + // wall-clock guess. `createAckTimeout` is a backstop so a create that never acks (a + // wedged renderer) can't stall the queue forever. `createInFlight` is the browserId we + // are currently awaiting the ack for. + private var createSendQueue: [(id: UInt32, session: CefWebSession, url: String)] = [] + private var createPacerRunning = false + private var createInFlight: UInt32? + private let createAckTimeout: TimeInterval = 8 + + // C1 first-present watchdog (guarded by presentLock). browserIds awaiting their FIRST + // opPresent: if none arrives within the deadline we re-kick via opInvalidate, then (if + // still blank) surface paintStalled to Dart — converting a silent never-painted tile + // into self-healing-or-signalled. + private let presentLock = NSLock() + private var firstPresentPending: Set = [] + // C1: browsers the host has hidden (WasHidden(true) via opSetVisible). A hidden CEF + // browser stops producing frames entirely, so it legitimately never sends opPresent — + // the watchdog must NOT treat that as a stall (work_canvas creates tiles already + // off-screen as a normal lazy-spawn pattern). Guarded by presentLock. + private var hiddenBrowsers: Set = [] + // Invoked (off the reader thread) when an ad-hoc host refuses to load a named // profile (no creds were written — see F.5). The plugin tears this host down // and respawns an ephemeral one for the same session. @@ -136,6 +166,14 @@ final class CefProfileHost { var onHostDied: ((Int32) -> Void)? private var diedFired = false // guarded by writeLock; one onHostDied per host + // H7: a SINGLE browser's create failed (the host is otherwise fine) — the plugin drops + // that one session + emits processGone for it. C1: a browser never painted its first + // frame despite a re-kick — the plugin surfaces paintStalled so the consumer can + // recover (e.g. recreate the view) instead of staring at a silent blank tile. Both + // carry the wire browserId; invoked off the reader / a timer thread. + var onBrowserFailed: ((UInt32) -> Void)? + var onPaintStalled: ((UInt32) -> Void)? + init(profileId: String, profileDir: String, isEphemeral: Bool) { self.profileId = profileId self.profileDir = profileDir @@ -401,7 +439,14 @@ final class CefProfileHost { // CEF-2b relayId<->target binding (CdpRelay's pipeId = relayId<<21 | localSeq) // relies on this for global uniqueness across N concurrent relays, so guard it — // the slot we're about to hand out must be FREE (never previously registered). - assert(browsers[id] == nil, "browserId must be monotonic / never reused") + // H8: a UInt32 wrap (or any bug) reusing an id would SILENTLY overwrite a live + // sibling's slot in a release build (the old guard was a debug-only `assert`, + // compiled out) → the reader misroutes that wire id's frames (paint/cookies/CDP) + // to the wrong tile, and CdpRelay's `relayId<<21` pipeId collides → cross-tile + // agent-control leak. Make it a hard runtime invariant (a free, non-reserved slot). + // Unreachable in practice (2^32 creates per host), so fail-fast >> silent corruption. + precondition(id != 0 && browsers[id] == nil, + "cef browserId space exhausted/occupied — refusing to corrupt cross-tile routing") nextBrowserId += 1 browsers[id] = session browsersLock.unlock() @@ -417,11 +462,11 @@ final class CefProfileHost { // size, so capturing them now would ship a since-freed id and a stale size. pendingCreates.append { [weak self, weak session] in guard let self = self, let session = session else { return } - self.sendCreate(id, session, url) + self.enqueueCreate(id, session, url) } } writeLock.unlock() - if isReady { sendCreate(id, session, url) } + if isReady { enqueueCreate(id, session, url) } return id } @@ -439,11 +484,17 @@ final class CefProfileHost { // Any resize after this lands after the create, so cef_host has a slot and // self-heals the surface via DoResize. (writeLock→bufferLock here is safe: no // path holds bufferLock then takes writeLock.) + // H4: read (w, h, dpr, surfaceId) as ONE atomic snapshot rather than four separate + // bufferLock acquisitions — otherwise a resize interleaving between the reads could + // ship e.g. old width + new surfaceId, blitting the first paint into a mis-sized + // surface. (create-pacing widened this window: a browser can sit queued for N× + // spacing, giving layout resizes more time to interleave.) + let g = session.createSnapshot() var payload = [UInt8]() - appendU32(&payload, UInt32(session.w)) - appendU32(&payload, UInt32(session.h)) - appendF64(&payload, Double(session.scale)) - appendU32(&payload, session.surfaceId) + appendU32(&payload, UInt32(g.w)) + appendU32(&payload, UInt32(g.h)) + appendF64(&payload, Double(g.dpr)) + appendU32(&payload, g.sid) payload.append(contentsOf: Array(url.utf8)) createEnqueued.insert(id) let frame = frameBytes(id, Self.opCreateBrowser, payload) @@ -458,12 +509,157 @@ final class CefProfileHost { if !ok { handleHostDeath() } } + /// Enqueue a create for PACED sending instead of writing its opCreateBrowser + /// frame immediately. See `createSendQueue`: many tiles on one shared host + /// created in a burst would otherwise hand cef_host's single UI thread a pile of + /// blocking CreateBrowserSync calls at once. Idempotent pump kicks the pacer. + private func enqueueCreate(_ id: UInt32, _ session: CefWebSession, _ url: String) { + writeLock.lock() + createSendQueue.append((id, session, url)) + writeLock.unlock() + pumpCreateQueue() + } + + /// Send the NEXT queued create and wait for cef_host to ack it (opCreated) before + /// sending the following one (H3) — so browsers create one-at-a-time and each one's + /// render + accelerated-surface handshake completes before the next contends the shared + /// GPU/Viz process. `createAckTimeout` backstops a create that never acks (wedged + /// renderer). A create whose browser was disposed while queued is skipped. + private func pumpCreateQueue() { + writeLock.lock() + // H6: never pump on a dead/dying host — the queue was abandoned in + // shutdown()/handleHostDeath(); pumping would sendCreate into a closed pipe and a + // stuck `createPacerRunning` could wedge a reused host. + if !running || crashed || createPacerRunning || createSendQueue.isEmpty { + writeLock.unlock() + return + } + createPacerRunning = true + let next = createSendQueue.removeFirst() + createInFlight = next.id + writeLock.unlock() + + browsersLock.lock() + let stillLive = browsers[next.id] != nil + browsersLock.unlock() + guard stillLive else { + // Disposed while queued — drop it and advance. M1: trampoline rather than + // recurse synchronously (a "close all tiles" mid-burst could skip many disposed + // creates and blow the stack). + writeLock.lock(); createPacerRunning = false; createInFlight = nil; writeLock.unlock() + DispatchQueue.global().async { [weak self] in self?.pumpCreateQueue() } + return + } + + sendCreate(next.id, next.session, next.url) + armFirstPresentWatchdog(next.id) // C1 + // H3: advance on the create ack (opCreated, via advanceCreatePacer in the reader); + // this timer is only the backstop if that ack never comes. + DispatchQueue.global().asyncAfter(deadline: .now() + createAckTimeout) { [weak self] in + self?.advanceCreatePacer(after: next.id, timedOut: true) + } + } + + /// H3: the in-flight create for `browserId` completed (opCreated), failed + /// (opCreateFailed), or timed out — release the pacer and send the next queued create. + /// Idempotent: only the FIRST of {ack, timeout} for the current in-flight id advances. + private func advanceCreatePacer(after browserId: UInt32, timedOut: Bool) { + writeLock.lock() + guard createInFlight == browserId else { writeLock.unlock(); return } + createInFlight = nil + createPacerRunning = false + writeLock.unlock() + if timedOut { + NSLog("[cef] profile '\(profileId)': create-ack timeout for browser \(browserId) — advancing pacer") + } + // Dispatch the next create OFF the reader thread (advanceCreatePacer is called from + // it on opCreated): pumpCreateQueue -> sendCreate writes to the same pipe the reader + // reads, and the reader must never block on a write. + DispatchQueue.global().async { [weak self] in self?.pumpCreateQueue() } + } + + /// H7: cef_host couldn't create this browser — drop the session (the plugin emits + /// processGone) and advance the pacer so the rest of the burst still proceeds. + private func handleCreateFailed(_ browserId: UInt32) { + firstPresentArrived(browserId) // cancel the C1 watchdog for a browser that won't paint + onBrowserFailed?(browserId) + advanceCreatePacer(after: browserId, timedOut: false) + } + + // MARK: C1 first-present watchdog + + /// Arm the first-present watchdog for a freshly-sent create. If no opPresent arrives + /// within ~3s we re-kick a repaint (opInvalidate); if still blank ~4s later we surface + /// paintStalled so the consumer can recover (recreate) instead of a silent blank tile. + private func armFirstPresentWatchdog(_ browserId: UInt32) { + presentLock.lock(); firstPresentPending.insert(browserId); presentLock.unlock() + DispatchQueue.global().asyncAfter(deadline: .now() + 3) { [weak self] in + self?.checkFirstPresent(browserId, phase: 1) + } + } + + /// Reader: a browser painted its first frame — cancel its watchdog. + private func firstPresentArrived(_ browserId: UInt32) { + presentLock.lock(); firstPresentPending.remove(browserId); presentLock.unlock() + } + + /// C1: track WasHidden state (peeked from opSetVisible). A hidden browser produces no + /// frames, so the watchdog suspends rather than flagging it stalled. On UNHIDE, re-arm + /// the watchdog for a browser that's still blank, so a genuinely-stuck now-visible tile + /// is still caught. + private func noteVisibility(_ browserId: UInt32, visible: Bool) { + presentLock.lock() + if !visible { + hiddenBrowsers.insert(browserId) + presentLock.unlock() + return + } + hiddenBrowsers.remove(browserId) + let reArm = firstPresentPending.contains(browserId) + presentLock.unlock() + guard reArm else { return } + DispatchQueue.global().asyncAfter(deadline: .now() + 3) { [weak self] in + self?.checkFirstPresent(browserId, phase: 1) + } + } + + private func checkFirstPresent(_ browserId: UInt32, phase: Int) { + presentLock.lock() + let stillBlank = firstPresentPending.contains(browserId) + let hidden = hiddenBrowsers.contains(browserId) + // Don't retire the watch while hidden — a hidden browser is suspended, not stalled; + // noteVisibility re-arms it on unhide if still blank. + if stillBlank && !hidden && phase >= 2 { firstPresentPending.remove(browserId) } + presentLock.unlock() + guard stillBlank else { return } // it painted — nothing to do + guard !hidden else { return } // hidden by design — suspended; re-armed on unhide + // Only act while the browser is still live + the host healthy. + browsersLock.lock(); let live = browsers[browserId] != nil; browsersLock.unlock() + writeLock.lock(); let healthy = running && !crashed; writeLock.unlock() + guard live, healthy else { firstPresentArrived(browserId); return } + if phase == 1 { + NSLog("[cef] profile '\(profileId)': browser \(browserId) hasn't painted — re-kicking (opInvalidate)") + send(browserId, Self.opInvalidate, []) + DispatchQueue.global().asyncAfter(deadline: .now() + 4) { [weak self] in + self?.checkFirstPresent(browserId, phase: 2) + } + } else { + NSLog("[cef] profile '\(profileId)': browser \(browserId) never painted — surfacing paintStalled") + onPaintStalled?(browserId) + } + } + /// Frame `[u32 bodyLen=4+1+payload.count][u32 browserId][op][payload]` and /// write it, or queue it if the pipe isn't up yet. A pre-connect opResize whose /// browserId hasn't had its create enqueued is DROPPED — that create carries /// the current geometry, so replaying the resize could reference a since-freed /// IOSurface id. func send(_ browserId: UInt32, _ op: UInt8, _ payload: [UInt8]) { + // C1: peek visibility so the first-present watchdog doesn't flag an intentionally + // hidden (WasHidden) browser as stalled — it produces no frames by design. + if op == Self.opSetVisible, let v = payload.first { + noteVisibility(browserId, visible: v != 0) + } let frame = frameBytes(browserId, op, payload) writeLock.lock() if connFd < 0 { @@ -514,6 +710,12 @@ final class CefProfileHost { writeLock.lock() createEnqueued.remove(browserId) writeLock.unlock() + // C1: drop any watchdog/visibility bookkeeping for the gone browser so the sets + // don't grow across a long session of tile churn. + presentLock.lock() + firstPresentPending.remove(browserId) + hiddenBrowsers.remove(browserId) + presentLock.unlock() return remaining } @@ -531,8 +733,13 @@ final class CefProfileHost { // guards on `running`, so flipping it false here keeps onHostDied from firing // on the shutdown path (C1). writeLock.lock() - let wasRunning = running running = false + // H6: abandon any paced creates so a stuck pacer can't wedge a reused host and + // queued-never-sent sessions don't linger. The browsers map still holds them, so + // disposeSession/onHostDied path cleans them up. + createSendQueue.removeAll() + createPacerRunning = false + createInFlight = nil writeLock.unlock() // CEF-2a/b: drop ALL relays (each a listener + any client) before tearing down // the pipe, so none keeps bridging into a closing fd. Snapshot under the lock, @@ -552,10 +759,19 @@ final class CefProfileHost { // which Swift would otherwise resolve these unqualified calls to. if c >= 0 { Darwin.shutdown(c, SHUT_RDWR) } if l >= 0 { Darwin.shutdown(l, SHUT_RDWR) } - if readerStarted && wasRunning { _ = readerDone.wait(timeout: .now() + 2) } + // H1: gate the join on `readerStarted` ALONE (not the old `wasRunning`). The + // semaphore is level-triggered — if the reader already exited (e.g. it drove the + // crash path and signalled readerDone before this runs), wait() returns at once. + // Gating on `wasRunning` could SKIP the join while the reader is still blocked in + // read()/accept() on these fds and then close them under it (use-after-free). And + // on a join TIMEOUT the reader is, by definition, still inside read()/accept() on + // these fds — so do NOT close them; leak the fd rather than risk an fd-reuse UAF + // (the same discipline CdpRelay.stop() uses). The fds were already Darwin.shutdown + // -ed above to wake the reader, so a timeout here is genuinely pathological. + let readerJoined = !readerStarted || readerDone.wait(timeout: .now() + 2) == .success writeLock.lock() - if connFd >= 0 { close(connFd); connFd = -1 } - if listenFd >= 0 { close(listenFd); listenFd = -1 } + if connFd >= 0 { if readerJoined { close(connFd) }; connFd = -1 } + if listenFd >= 0 { if readerJoined { close(listenFd) }; listenFd = -1 } writeLock.unlock() if !socketPath.isEmpty { unlink(socketPath); socketPath = "" } if isEphemeral && !profileDir.isEmpty { @@ -574,16 +790,24 @@ final class CefProfileHost { // which closes its CDP write end (fd 4) and yields EOF on cdpReadFd so the // CDP reader loop returns. Done before the CDP reader join for that reason. terminateProcess() - if cdpReaderStarted && wasRunning { _ = cdpReaderDone.wait(timeout: .now() + 2) } - if cdpReadFd >= 0 { close(cdpReadFd); cdpReadFd = -1 } + // H1: same discipline for the CDP reader — gate on cdpReaderStarted alone, and + // never close the read fd on a join timeout (the reader is still in read() on it). + let cdpJoined = !cdpReaderStarted || cdpReaderDone.wait(timeout: .now() + 2) == .success + if cdpReadFd >= 0 { if cdpJoined { close(cdpReadFd) }; cdpReadFd = -1 } } /// SIGTERM (then SIGKILL escalation) the cef_host process. Handles BOTH launch /// paths: `process` (Foundation.Process, default) and `spawnedPid` (posix_spawn, /// agent-control). Idempotent — clears whichever handle it used. private func terminateProcess() { - if let p = process { - process = nil + // H5: take BOTH handles atomically under writeLock so this is the sole owner of + // its terminate/waitpid — handleHostDeath's reaper can't be reaping the same pid + // concurrently (it took ownership the same way, or handed it back to us). + writeLock.lock() + let p = process; process = nil + let pid = spawnedPid; spawnedPid = 0 + writeLock.unlock() + if let p = p { p.terminate() // SIGTERM // Escalate to SIGKILL if the host is wedged and ignores SIGTERM. DispatchQueue.global().asyncAfter(deadline: .now() + 1.5) { @@ -594,9 +818,7 @@ final class CefProfileHost { // posix_spawn path: we own a raw pid, not a Process. SIGTERM then SIGKILL. // Reap with a non-blocking waitpid so the child doesn't linger as a zombie // (Foundation.Process reaps for us; for a bare pid we must do it ourselves). - let pid = spawnedPid guard pid > 0 else { return } - spawnedPid = 0 kill(pid, SIGTERM) DispatchQueue.global().asyncAfter(deadline: .now() + 1.5) { var status: Int32 = 0 @@ -706,7 +928,14 @@ final class CefProfileHost { if !readAll(fd, &hdr, 4) { break } let bodyLen = (Int(hdr[0]) << 24) | (Int(hdr[1]) << 16) | (Int(hdr[2]) << 8) | Int(hdr[3]) // Minimum valid body is 5 bytes (4 browserId + 1 op + 0 payload). - if bodyLen <= 4 || bodyLen > (64 << 20) { break } + // H9: a malformed/oversized length means a wire desync and tears down EVERY + // browser on this host — log the rejected length first so it isn't a silent, + // breadcrumb-less all-tiles crash (the IPC peer is trusted, so this only fires + // on a genuine framing bug). + if bodyLen <= 4 || bodyLen > (64 << 20) { + NSLog("[cef] profile '\(profileId)': rejecting malformed IPC frame, bodyLen=\(bodyLen) — tearing down host") + break + } var body = [UInt8](repeating: 0, count: bodyLen) if !readAll(fd, &body, bodyLen) { break } let bid = beU32(body, 0) @@ -718,10 +947,20 @@ final class CefProfileHost { // CEF-2b: a targetId resolution result — route to the pending completion, // not the session. handleTargetId(bid, String(bytes: payload, encoding: .utf8)) + } else if op == Self.opCreated { + advanceCreatePacer(after: bid, timedOut: false) // H3: create acked → send the next + } else if op == Self.opCreateFailed { + handleCreateFailed(bid) // H7 } else { browsersLock.lock() let session = browsers[bid] + // C1: detect the FIRST present under the browsersLock we already hold, via a + // per-session flag, so the watchdog-cancel (presentLock) fires once per browser + // instead of acquiring a second lock on every (up to 60fps) present frame. + let firstPaint = op == Self.opPresent && session != nil && !session!.firstPresentSeen + if firstPaint { session!.firstPresentSeen = true } browsersLock.unlock() + if firstPaint { firstPresentArrived(bid) } // cancel the watchdog (once) session?.handleFrame(op, payload) } } @@ -747,8 +986,19 @@ final class CefProfileHost { guard running, !diedFired else { writeLock.unlock(); return } diedFired = true crashed = true // forces hasLiveBrowser false so the named profile reopens + // H6: abandon paced creates — the host is gone. Sessions stay in `browsers`, so + // the onHostDied → plugin path still emits processGone for each queued one. + createSendQueue.removeAll() + createPacerRunning = false + createInFlight = nil let p = process + // H5: TAKE the posix_spawn pid (zero it) so this reaper is the SOLE owner of its + // waitpid — a later terminateProcess()/shutdown() then sees 0 and won't + // double-reap a pid this thread is about to harvest (which could kill an + // OS-recycled pid). If it's wedged and we can't reap within the grace window + // below, we hand it back (restoreSpawnedPid) so terminateProcess can SIGKILL it. let pid = spawnedPid + spawnedPid = 0 let died = onHostDied writeLock.unlock() // Resolve the exit status + invoke onHostDied off the caller's thread: this @@ -771,33 +1021,36 @@ final class CefProfileHost { usleep(50_000) } } else if pid > 0 { + // We already TOOK ownership of `pid` (zeroed spawnedPid under writeLock), so + // we are the only thread that may waitpid it here. + var reaped = false for _ in 0 ..< 20 { // up to ~1s var raw: Int32 = 0 let r = waitpid(pid, &raw, WNOHANG) if r == pid { // Reaped. Mirror terminationStatus: exit code, or -1 if signaled. status = (raw & 0o177) == 0 ? ((raw >> 8) & 0xff) : -1 - // We reaped the pid, freeing it for OS reuse — clear the shared handle so a - // subsequent terminateProcess() (via onHostDied -> shutdown) doesn't - // kill()/waitpid() a now-reaped, possibly-recycled pid. - self?.clearReapedPid(pid) - break + reaped = true; break } else if r < 0 { - break // already reaped by terminateProcess() (or no child) — give up. + reaped = true; break // ECHILD / already gone — nothing to hand back. } usleep(50_000) } + // H5: still alive after the grace window (wedged child that didn't exit on + // EOF) — hand the pid back so terminateProcess()/shutdown() can SIGTERM/SIGKILL + // + reap it. Without this, a taken-but-unreaped pid would never be killed. + if !reaped { self?.restoreSpawnedPid(pid) } } DispatchQueue.main.async { died?(status) } } } - /// Clear `spawnedPid` once we've reaped it, so no later kill()/waitpid() targets a - /// reaped (possibly OS-recycled) pid. Guarded by writeLock and matched against `pid` - /// so a racing relaunch's new pid is never cleared. - private func clearReapedPid(_ pid: pid_t) { + /// H5: hand a TAKEN-but-unreaped pid back to `spawnedPid` so terminateProcess() can + /// finish it (SIGTERM/SIGKILL + reap). No-op if a relaunch already installed a new + /// pid (so a racing relaunch is never clobbered). + private func restoreSpawnedPid(_ pid: pid_t) { writeLock.lock() - if spawnedPid == pid { spawnedPid = 0 } + if spawnedPid == 0 { spawnedPid = pid } writeLock.unlock() } @@ -983,11 +1236,38 @@ final class CefProfileHost { targetIdLock.unlock() guard first else { return } // a resolve is already in flight for this browser send(browserId, Self.opResolveTargetId, []) + // The page target may not have COMMITTED when the first probe fires — common + // for a tile force-spawned in a burst, where GPU/page init is async after + // create(). cef_host then finds no targetInfo and never sends opTargetId, so the + // old fire-once probe silently timed out to nil (empty `webview snapshot`). + // Re-probe within the deadline so a late-committing page still resolves. Each + // opResolveTargetId uses a fresh per-browser DevTools message id (see the + // 33858fb fix), so extra probes are harmless; handleTargetId removes the entry + // on the first reply, stopping the retries. + scheduleTargetIdRetry(browserId, epoch, attemptsLeft: 9) // ~9 × 0.5s ≈ 4.5s DispatchQueue.global().asyncAfter(deadline: .now() + 5) { [weak self] in self?.timeoutTargetId(browserId, epoch) // fulfill with nil only if still this resolve } } + /// Re-send opResolveTargetId every 0.5s while this exact resolve is still pending + /// (not yet answered by handleTargetId, not superseded by a newer epoch), up to + /// `attemptsLeft` times — so a page that commits a second or two after create() + /// still resolves its targetId instead of the fire-once probe missing it. + private func scheduleTargetIdRetry(_ browserId: UInt32, _ epoch: Int, attemptsLeft: Int) { + guard attemptsLeft > 0 else { return } + DispatchQueue.global().asyncAfter(deadline: .now() + 0.5) { [weak self] in + guard let self = self else { return } + self.targetIdLock.lock() + let stillPending = + self.targetIdEpoch[browserId] == epoch && self.pendingTargetId[browserId] != nil + self.targetIdLock.unlock() + guard stillPending else { return } // resolved or superseded — stop + self.send(browserId, Self.opResolveTargetId, []) + self.scheduleTargetIdRetry(browserId, epoch, attemptsLeft: attemptsLeft - 1) + } + } + /// Fulfill all pending targetId waiters for a browser with a real result (reader /// thread). The matching resolve's timer is left to no-op via the epoch guard. private func handleTargetId(_ browserId: UInt32, _ tid: String?) { @@ -1083,9 +1363,12 @@ final class CefProfileHost { if msg.contains("\"id\":1") { logged = true NSLog("[cef][cdp-pipe:\(self.profileId)] Browser.getVersion round-trip OK: \(msg)") - // Restore the prior handler now that the gate has passed, so the probe - // wiring doesn't linger on the hot path. - self.cdpHandlerLock.lock(); self.onCdpMessage = prior; self.cdpHandlerLock.unlock() + // Do NOT restore onCdpMessage here. enableAgentControl may have chained the relay + // fan-out ON TOP of this probe handler (it captures the then-current handler as + // its own `prior`); overwriting back to OUR captured `prior` (the pre-probe + // handler, usually nil) would silently DROP that fan-out so relays receive no + // pipe messages. The handler is harmless once `logged`: it forwards to `prior` + // and short-circuits the id:1 check. } } cdpHandlerLock.unlock() diff --git a/packages/flutter_cef_macos/macos/Classes/CefWebSession.swift b/packages/flutter_cef_macos/macos/Classes/CefWebSession.swift index 6e3ca68..3d9e12e 100644 --- a/packages/flutter_cef_macos/macos/Classes/CefWebSession.swift +++ b/packages/flutter_cef_macos/macos/Classes/CefWebSession.swift @@ -98,6 +98,10 @@ final class CefWebSession: NSObject, FlutterTexture { // CefProfileHost.createBrowser() allocates the id. private weak var host: CefProfileHost? private(set) var browserId: UInt32 = 0 + // C1: set once when this browser delivers its first present frame. Owned/guarded by + // CefProfileHost under its browsersLock (the reader flips it there) — a cheap per-frame + // first-paint check that avoids a second lock on the hot paint path. + var firstPresentSeen = false private weak var registry: FlutterTextureRegistry? private var width: Int @@ -129,7 +133,9 @@ final class CefWebSession: NSObject, FlutterTexture { self.dpr = dpr self.registry = registry super.init() - _ = allocateBuffers(self.width, self.height) + if let (surf, buffer) = makeBuffers(self.width, self.height) { + publishBuffers(surf, buffer, self.width, self.height) + } self.textureId = registry.register(self) } @@ -158,15 +164,12 @@ final class CefWebSession: NSObject, FlutterTexture { let unchanged = (w == width && h == height) bufferLock.unlock() if unchanged { return } - guard allocateBuffers(w, h) else { return } // takes bufferLock internally - // Publish the new geometry and read the fresh surface id under bufferLock so a - // concurrent host read (sendCreate on the reader thread) sees a consistent - // (w, h, surfaceId). Released before sendFrame — no bufferLock→writeLock nest. - bufferLock.lock() - width = w - height = h - let sid = ioSurface.map { IOSurfaceGetID($0) } ?? 0 - bufferLock.unlock() + // H4: create the new surface OUTSIDE the lock (expensive), then publish surface + + // new dims ATOMICALLY in one bufferLock section — so a concurrent host read + // (sendCreate's createSnapshot on the reader thread) can never see the new surface + // with the old dims. Released before sendFrame — no bufferLock→writeLock nest. + guard let (surf, buffer) = makeBuffers(w, h) else { return } + let sid = publishBuffers(surf, buffer, w, h) guard sid != 0 else { return } var payload = [UInt8]() appendU32(&payload, UInt32(w)) @@ -311,7 +314,10 @@ final class CefWebSession: NSObject, FlutterTexture { // MARK: Buffers - private func allocateBuffers(_ w: Int, _ h: Int) -> Bool { + /// H4: CREATE an IOSurface + CVPixelBuffer for (w,h) but do NOT publish them — the + /// caller publishes surface + geometry atomically via publishBuffers so a concurrent + /// createSnapshot()/copyPixelBuffer never sees a surface and dims out of sync. + private func makeBuffers(_ w: Int, _ h: Int) -> (IOSurfaceRef, CVPixelBuffer)? { // Allocate at PHYSICAL (Retina) resolution = logical * dpr, so the texture // is crisp on HiDPI displays; cef_host renders the OSR buffer at the same // scale (via GetScreenInfo.device_scale_factor). 64-byte-aligned stride keeps @@ -330,7 +336,7 @@ final class CefWebSession: NSObject, FlutterTexture { ] guard let surf = IOSurfaceCreate(props as CFDictionary) else { NSLog("[cef] IOSurfaceCreate failed \(w)x\(h)") - return false + return nil } var pbOut: Unmanaged? let attrs: [CFString: Any] = [ @@ -341,14 +347,33 @@ final class CefWebSession: NSObject, FlutterTexture { kCFAllocatorDefault, surf, attrs as CFDictionary, &pbOut) guard rc == kCVReturnSuccess, let buffer = pbOut?.takeRetainedValue() else { NSLog("[cef] CVPixelBufferCreateWithIOSurface failed rc=\(rc)") - return false + return nil } - bufferLock.lock() + NSLog("[cef] allocated IOSurface id=\(IOSurfaceGetID(surf)) \(pw)x\(ph) (logical \(w)x\(h) @\(dpr)x) stride=\(bytesPerRow)") + return (surf, buffer) + } + + /// H4: publish a new (surface, buffer, width, height) as ONE atomic update, so a + /// concurrent createSnapshot()/copyPixelBuffer never observes the new surface with + /// the old dims (or vice-versa). Returns the new surface id. The old IOSurface/ + /// CVPixelBuffer are released by the overwrite. + @discardableResult + private func publishBuffers(_ surf: IOSurfaceRef, _ buffer: CVPixelBuffer, + _ w: Int, _ h: Int) -> UInt32 { + bufferLock.lock(); defer { bufferLock.unlock() } ioSurface = surf pixelBuffer = buffer - bufferLock.unlock() - NSLog("[cef] allocated IOSurface id=\(IOSurfaceGetID(surf)) \(pw)x\(ph) (logical \(w)x\(h) @\(dpr)x) stride=\(bytesPerRow)") - return true + width = w + height = h + return IOSurfaceGetID(surf) + } + + /// H4: read (w, h, dpr, surfaceId) as ONE consistent tuple under a single bufferLock + /// acquisition — the host builds opCreateBrowser from this so its payload can't + /// capture a torn mix of stale dims + a freshly-reallocated surface id. + func createSnapshot() -> (w: Int, h: Int, dpr: CGFloat, sid: UInt32) { + bufferLock.lock(); defer { bufferLock.unlock() } + return (width, height, dpr, ioSurface.map { IOSurfaceGetID($0) } ?? 0) } // MARK: Inbound frames diff --git a/packages/flutter_cef_macos/macos/Classes/FlutterCefPlugin.swift b/packages/flutter_cef_macos/macos/Classes/FlutterCefPlugin.swift index f86a54f..d2f6906 100644 --- a/packages/flutter_cef_macos/macos/Classes/FlutterCefPlugin.swift +++ b/packages/flutter_cef_macos/macos/Classes/FlutterCefPlugin.swift @@ -18,6 +18,14 @@ public class FlutterCefPlugin: NSObject, FlutterPlugin { private var sessions: [String: CefWebSession] = [:] // sessionId -> session (verb routing) private var sessionHost: [String: CefProfileHost] = [:] // sessionId -> its host private var sessionKey: [String: String] = [:] // sessionId -> profiles[] key, for teardown + // C2: per-session create args, so when a shared host turns out to be ad-hoc and + // refuses its named profile we can re-home EVERY session on it onto ephemeral hosts + // (not just the last one whose closure was installed), preserving each session's + // url + schemes + agent-control transport. + private var sessionCreateArgs: [String: (url: String, allowedSchemes: String, agentControl: Bool)] = [:] + // C2: named profiles a running ad-hoc host already refused — future creates for them + // go straight to ephemeral instead of racing onto a doomed shared host. + private var adhocBlockedProfiles: Set = [] public static func register(with registrar: FlutterPluginRegistrar) { let instance = FlutterCefPlugin() @@ -293,8 +301,11 @@ public class FlutterCefPlugin: NSObject, FlutterPlugin { // per-target CDP relay (one relay per browserId, demuxed over the shared pipe // by the per-relay CDP-id rewrite — see CdpRelay's multiplex note). - let (profileDir, isEphemeral) = resolveProfileDir(namedProfile ? profile : nil) - let key = namedProfile ? profile! : "~ephemeral~" + sessionId + // C2: if a running ad-hoc host already refused this named profile, don't race onto + // a doomed shared host — go ephemeral directly. + let effectiveNamed = namedProfile && !adhocBlockedProfiles.contains(profile ?? "") + let (profileDir, isEphemeral) = resolveProfileDir(effectiveNamed ? profile : nil) + let key = effectiveNamed ? profile! : "~ephemeral~" + sessionId guard let host = resolveOrSpawnHost( key: key, profileDir: profileDir, isEphemeral: isEphemeral, @@ -311,11 +322,14 @@ public class FlutterCefPlugin: NSObject, FlutterPlugin { // When that fires, tear the host down and respawn an EPHEMERAL host for this // same session, then re-issue createBrowser. Wired only for named profiles; // an already-ephemeral host never refuses. - if namedProfile { - host.onInsecureProfileRefused = { [weak self] in + if effectiveNamed { + // C2: re-home the WHOLE shared host's sessions onto ephemeral hosts on refusal — + // not just this one. The closure captures the host, not a single sessionId, so a + // burst of tiles that all attached before opReady are all rescued. + host.onInsecureProfileRefused = { [weak self, weak host] in DispatchQueue.main.async { - self?.respawnEphemeral(sessionId: sessionId, url: url, - allowedSchemes: allowedSchemes) + guard let self = self, let host = host, let prof = profile else { return } + self.respawnHostEphemeral(host, refusedProfile: prof) } } } @@ -396,6 +410,7 @@ public class FlutterCefPlugin: NSObject, FlutterPlugin { sessions[sessionId] = session sessionHost[sessionId] = host sessionKey[sessionId] = key + sessionCreateArgs[sessionId] = (url, allowedSchemes, agentControl) // C2 re-home result([ "textureId": session.textureId, "width": width, "height": height, "cdpPort": host.cdpPort, @@ -451,6 +466,7 @@ public class FlutterCefPlugin: NSObject, FlutterPlugin { self.sessions[sid] = nil self.sessionHost[sid] = nil self.sessionKey[sid] = nil + self.sessionCreateArgs[sid] = nil } // Drop the host from the profile registry so a re-create spawns a fresh // one. Snapshot the matching keys first — never mutate a Dictionary while @@ -460,42 +476,87 @@ public class FlutterCefPlugin: NSObject, FlutterPlugin { // Reap: idempotent SIGTERM(+SIGKILL escalation), a no-op if already exited. host.shutdown() } + // H7: a SINGLE browser's create failed (host otherwise healthy) — drop just that + // session + emit processGone for it, so Dart stops waiting on a browser that will + // never paint (the host's create-pacer already advanced). + host.onBrowserFailed = { [weak self, weak host] browserId in + DispatchQueue.main.async { + guard let self = self, let host = host, + let sid = self.sessionId(forBrowserId: browserId, on: host) else { return } + self.emit("processGone", ["sessionId": sid, "reason": "createFailed"]) + let session = self.sessions[sid] + self.sessions[sid] = nil + self.sessionHost[sid] = nil + self.sessionKey[sid] = nil + self.sessionCreateArgs[sid] = nil + _ = host.removeBrowser(browserId) + session?.dispose() + } + } + // C1: a browser never painted its first frame despite a re-kick — surface + // paintStalled so Dart/the consumer can recover (e.g. recreate the view) instead of + // a silent, unrecoverable blank tile. The browser stays alive (it may yet paint). + host.onPaintStalled = { [weak self, weak host] browserId in + DispatchQueue.main.async { + guard let self = self, let host = host, + let sid = self.sessionId(forBrowserId: browserId, on: host) else { return } + self.emit("paintStalled", ["sessionId": sid]) + } + } + } + + /// Find the sessionId of the session bound to `browserId` on `host` (main-thread maps). + private func sessionId(forBrowserId browserId: UInt32, on host: CefProfileHost) -> String? { + for (sid, s) in sessions where s.browserId == browserId && sessionHost[sid] === host { + return sid + } + return nil } - /// F.5: the running cef_host turned out to be an ad-hoc (mock-keychain) build - /// and refused the named profile. Tear that host down and recreate this session - /// on a fresh EPHEMERAL host (recomputing dir/key), then re-issue createBrowser - /// with the original url/allowedSchemes. Because nothing was written to the - /// persistent dir, no creds leak. - private func respawnEphemeral(sessionId: String, url: String, - allowedSchemes: String) { - // The unlocked session/profile dictionaries are confined to the main thread - // (H3); this is reached from onInsecureProfileRefused via DispatchQueue.main. + /// C2/F.5: a running cef_host turned out to be an ad-hoc (mock-keychain) build and + /// refused its named profile (at opReady, BEFORE any browser was created — so nothing + /// rendered or leaked). Re-home EVERY session that was on that shared host onto its + /// own ephemeral host, preserving each session's url/schemes/agent-control, and + /// remember the profile so later creates skip the doomed shared host. This replaces + /// the old per-session respawn that shut the whole shared host down — which stranded + /// every sibling tile blank-and-dead with no error. + private func respawnHostEphemeral(_ oldHost: CefProfileHost, refusedProfile: String) { + // The unlocked session/profile dictionaries are confined to the main thread (H3); + // reached from onInsecureProfileRefused via DispatchQueue.main. dispatchPrecondition(condition: .onQueue(.main)) - guard let session = sessions[sessionId], - let cefHost = resolveCefHostPath() else { return } - // Tear down the refused (named) host and forget it. - if let oldKey = sessionKey[sessionId], let oldHost = profiles[oldKey] { - oldHost.shutdown() - profiles[oldKey] = nil - } - // The slimmed session keeps its texture/buffers; just re-bind it to a fresh - // ephemeral host. CDP is never enabled here (a named profile rejects CDP, so - // a refused-then-downgraded session had none). - let (profileDir, isEphemeral) = resolveProfileDir(nil) - let key = "~ephemeral~" + sessionId - let host = CefProfileHost( - profileId: key, profileDir: profileDir, isEphemeral: isEphemeral) - guard host.spawn(cefHostPath: cefHost, enableCdp: false, - allowedSchemes: allowedSchemes) else { - NSLog("[cef] respawn ephemeral host failed for \(sessionId)") - return + guard let cefHost = resolveCefHostPath() else { return } + adhocBlockedProfiles.insert(refusedProfile) + let victims = sessionHost.compactMap { $0.value === oldHost ? $0.key : nil } + // Forget + tear down the refused host (every session on it is about to move off). + let goneKeys = profiles.compactMap { $0.value === oldHost ? $0.key : nil } + for k in goneKeys { profiles[k] = nil } + oldHost.shutdown() + for sid in victims { + guard let session = sessions[sid], let args = sessionCreateArgs[sid] else { continue } + let (profileDir, isEphemeral) = resolveProfileDir(nil) + let key = "~ephemeral~" + sid + let host = CefProfileHost(profileId: key, profileDir: profileDir, isEphemeral: isEphemeral) + guard host.spawn(cefHostPath: cefHost, enableCdp: false, + allowedSchemes: args.allowedSchemes, + agentControl: args.agentControl) else { + // The old host is already shut down, so a bare `continue` would strand this + // session bound to a dead host: blank tile, no signal, leaked session+texture. + // Fail it explicitly instead — processGone lets the consumer recreate. + NSLog("[cef] C2 respawn ephemeral host failed for \(sid)") + emit("processGone", ["sessionId": sid, "reason": "respawnFailed"]) + sessions[sid] = nil + sessionHost[sid] = nil + sessionKey[sid] = nil + sessionCreateArgs[sid] = nil + session.dispose() + continue + } + wireHostDied(host) + profiles[key] = host + _ = host.createBrowser(session, url: args.url, allowedSchemes: args.allowedSchemes) + sessionHost[sid] = host + sessionKey[sid] = key } - wireHostDied(host) - profiles[key] = host - _ = host.createBrowser(session, url: url, allowedSchemes: allowedSchemes) - sessionHost[sessionId] = host - sessionKey[sessionId] = key } private func navigate(_ a: [String: Any], _ result: @escaping FlutterResult) { @@ -544,6 +605,7 @@ public class FlutterCefPlugin: NSObject, FlutterPlugin { sessions[id] = nil sessionHost[id] = nil sessionKey[id] = nil + sessionCreateArgs[id] = nil guard let host = host else { // No host on record (shouldn't happen) — just release the session. session.dispose() diff --git a/packages/flutter_cef_macos/native/cef_host/main.mm b/packages/flutter_cef_macos/native/cef_host/main.mm index bcb84b9..d2b4305 100644 --- a/packages/flutter_cef_macos/native/cef_host/main.mm +++ b/packages/flutter_cef_macos/native/cef_host/main.mm @@ -116,6 +116,8 @@ constexpr uint8_t kOpImeBounds = 0x19; // {u32 x}{u32 y}{u32 w}{u32 h} caret rect (DIP) constexpr uint8_t kOpCookies = 0x1a; // {u32 id}{utf8 json-array} visitAllCookies result constexpr uint8_t kOpTargetId = 0x1b; // {utf8 targetId} -> plugin: this browser's CDP targetId (CEF-2b) +constexpr uint8_t kOpCreated = 0x1c; // {} H3: OnAfterCreated — browser is up; host's pacer sends the next create +constexpr uint8_t kOpCreateFailed = 0x1d; // {} H7: async CreateBrowser dispatch failed; host drops the session constexpr uint8_t kOpPointer = 0x10; constexpr uint8_t kOpResize = 0x11; constexpr uint8_t kOpKey = 0x12; @@ -145,6 +147,7 @@ constexpr uint8_t kOpLoadTrusted = 0x34; // {utf8 url} host content-load, exempt from allowlist constexpr uint8_t kOpSetVisible = 0x35; // {u8 visible} -> CefBrowserHost::WasHidden(!visible) constexpr uint8_t kOpResolveTargetId = 0x36; // {} resolve this browser's CDP targetId (CEF-2b) -> kOpTargetId +constexpr uint8_t kOpInvalidate = 0x37; // {} C1: force a repaint (Invalidate PET_VIEW) to re-kick a stalled first frame // ---- Shared runtime state ---- // Atomic: the reader thread reads it (ReadAll), SendFrame on any thread reads it, @@ -165,6 +168,13 @@ struct Slot { uint32_t browser_id = 0; // Swift-assigned wire id (>=1); NOT GetIdentifier(). CefRefPtr browser; + // H3 async-create dispose-loss guard: a dispose arriving while the async + // CreateBrowser is still in flight (browser == null) can't CloseBrowser yet, so it + // records intent here and OnAfterCreated honors it the instant the browser binds — + // otherwise that browser is a live orphan (renderer + IOSurface) nothing reclaims + // until whole-host shutdown. UI-thread-confined (DoDisposeBrowser + OnAfterCreated + // both run on the CEF UI thread), so no lock. + bool close_requested = false; // Guards surface / width / height / dpr / popup_* for THIS browser. Per-slot // (not a single global) so paints on independent browsers don't contend. @@ -321,8 +331,15 @@ bool ReadAll(int fd, void* buf, size_t len) { // payloadLen, counting every byte after the length prefix. void SendFrame(uint32_t browser_id, uint8_t opcode, const void* payload, uint32_t payload_len) { - if (g_ipc_fd < 0) return; + if (g_ipc_fd < 0) return; // racy early-out; the authoritative check is under the lock std::lock_guard lock(g_ipc_write_mutex); + // C3: SNAPSHOT the fd under the write lock and write to the snapshot, never re-loading + // g_ipc_fd at write time. Teardown sets g_ipc_fd=-1 (exchange) and close()s the old fd + // under this same lock, so once we hold it the fd is either still valid (write) or + // already -1 (skip) — a paint thread can no longer pass the early-out and then write + // into a closed/recycled fd. + int fd = g_ipc_fd.load(); + if (fd < 0) return; uint32_t body_len = 4 + 1 + payload_len; // Assemble the whole frame and write it in one WriteAll so a partial write // never leaves the peer with a length prefix it can't satisfy (stream desync). @@ -337,7 +354,7 @@ void SendFrame(uint32_t browser_id, uint8_t opcode, const void* payload, frame[7] = static_cast(browser_id & 0xff); frame[8] = opcode; if (payload_len) memcpy(frame.data() + 9, payload, payload_len); - WriteAll(g_ipc_fd, frame.data(), frame.size()); + WriteAll(fd, frame.data(), frame.size()); } void SendLog(uint32_t browser_id, const std::string& msg) { @@ -776,10 +793,15 @@ void OnLoadStart(CefRefPtr, CefRefPtr frame, // (Re)install JS-channel shims for this freshly-loaded frame. for (const auto& name : g_channels) InjectChannelShim(frame, name); } - void OnLoadEnd(CefRefPtr, CefRefPtr frame, + void OnLoadEnd(CefRefPtr browser, CefRefPtr frame, int /*httpStatusCode*/) override { - if (frame && frame->IsMain()) + if (frame && frame->IsMain()) { SendUtf8(slot_->browser_id, kOpPageFinish, frame->GetURL().ToString()); + // C1: force a repaint when the main frame finishes — a first paint dropped + // during load (e.g. a GPU surface not yet ready) self-heals here instead of + // leaving a permanently blank texture with no signal. + if (browser && browser->GetHost()) browser->GetHost()->Invalidate(PET_VIEW); + } } void OnLoadError(CefRefPtr, CefRefPtr, ErrorCode code, const CefString& text, const CefString& url) override { @@ -815,6 +837,20 @@ void OnLoadingProgressChange(CefRefPtr, double progress) override { SendFrame(slot_->browser_id, kOpProgress, p, 4); } + // H3: async create completes here on the CEF UI thread. Bind the browser to its slot + // (DoCreateBrowser no longer does — it dropped the blocking CreateBrowserSync) and ack + // the host so its create-pacer sends the NEXT create: creates serialize by COMPLETION + // (each browser's render + GPU/Viz accelerated-surface handshake done before the next + // contends the shared GPU process), not a wall-clock guess. + void OnAfterCreated(CefRefPtr browser) override { + slot_->browser = browser; + SendFrame(slot_->browser_id, kOpCreated, nullptr, 0); + // H3: a dispose arrived during the async-create window and recorded intent — honor + // it now (OnBeforeClose then does the normal map-erase + surface release + retain- + // cycle break) so we don't leak a live orphan browser the Swift side already forgot. + if (slot_->close_requested) browser->GetHost()->CloseBrowser(true); + } + // CefLifeSpanHandler: route popups (window.open / target=_blank) to the host // instead of opening a native window. Returning true cancels the native popup; // the host decides what to do (commonly load the URL in the same view). This @@ -1079,15 +1115,22 @@ void DoCreateBrowser(uint32_t wire_id, int w, int h, double dpr, uint32_t sid, CefBrowserSettings settings; settings.windowless_frame_rate = 60; CefRefPtr client = new HostClient(slot); - CefRefPtr browser = CefBrowserHost::CreateBrowserSync( + // H3: ASYNC create. CreateBrowserSync BLOCKS this (the single CEF UI) thread until + // the renderer + GPU/Viz accelerated-surface handshake completes — so a burst of + // creates serialized here, contended the one shared GPU process (later browsers got + // no surface, never painted), and one hung create wedged input/resize/dispose for + // every sibling. CreateBrowser returns immediately; the browser is bound to its slot + // in HostClient::OnAfterCreated, which acks kOpCreated so the host's pacer sends the + // NEXT create — serialized by COMPLETION, not a wall-clock guess. + bool dispatched = CefBrowserHost::CreateBrowser( window_info, client, url, settings, nullptr, nullptr); - slot->browser = browser; - if (!browser) { - // CreateBrowserSync failed: OnBeforeClose (the only path that erases the - // wire-id entry and releases slot->surface) can never fire without a - // browser, so reclaim here or the slot + the looked-up IOSurface (+1 ref) - // leak forever and the wire id is stranded. - SendLog(wire_id, "createBrowser: CreateBrowserSync returned null"); + if (!dispatched) { + // H7: the create couldn't even be dispatched — OnAfterCreated/OnBeforeClose will + // never fire, so reclaim the slot + the looked-up IOSurface (+1 ref) here (else + // they leak and the wire id is stranded) and tell the host so it drops the session + // (processGone) and its create-pacer advances instead of stalling on the ack. + SendLog(wire_id, "createBrowser: CreateBrowser dispatch failed"); + SendFrame(wire_id, kOpCreateFailed, nullptr, 0); { std::lock_guard lock(g_slots_mutex); g_slots_by_wire_id.erase(wire_id); @@ -1099,8 +1142,8 @@ void DoCreateBrowser(uint32_t wire_id, int w, int h, double dpr, uint32_t sid, } } if (std::getenv("FLUTTER_CEF_DEBUG")) - fprintf(stderr, "[cef_host] createBrowser wire=%u browser=%p\n", wire_id, - (void*)browser.get()); + fprintf(stderr, "[cef_host] createBrowser wire=%u dispatched=%d\n", wire_id, + dispatched); } // Close one browser (kOpDisposeBrowser). Runs on the CEF UI thread. The actual @@ -1108,7 +1151,16 @@ void DoCreateBrowser(uint32_t wire_id, int w, int h, double dpr, uint32_t sid, void DoDisposeBrowser(uint32_t wire_id) { CEF_REQUIRE_UI_THREAD(); std::shared_ptr slot = LookupWireId(wire_id); - if (slot && slot->browser) slot->browser->GetHost()->CloseBrowser(true); + if (!slot) return; + if (slot->browser) { + slot->browser->GetHost()->CloseBrowser(true); + } else { + // H3: the async CreateBrowser hasn't bound the browser yet — record the close so + // OnAfterCreated closes it the instant it lands. Without this the create completes + // into a live orphan browser the Swift side has already forgotten (browsers[id] + // cleared), leaking a renderer + IOSurface until whole-host shutdown. + slot->close_requested = true; + } } void DoResize(const std::shared_ptr& slot, int w, int h, @@ -1493,6 +1545,15 @@ void DoKey(const std::shared_ptr& slot, int type, uint32_t modifiers, slot->browser->GetHost()->SendKeyEvent(ev); } +// C1: force a repaint. The host's first-present watchdog sends kOpInvalidate when a +// browser hasn't delivered its first frame within the deadline — re-requesting the +// frame self-heals a dropped/raced first paint instead of a permanently blank texture. +void DoInvalidate(const std::shared_ptr& slot) { + CEF_REQUIRE_UI_THREAD(); + if (slot && slot->browser && slot->browser->GetHost()) + slot->browser->GetHost()->Invalidate(PET_VIEW); +} + // Tear down the WHOLE process: close every browser, then quit the message loop. // Each browser's per-slot cleanup (maps, surface, retain-cycle break) runs in // OnBeforeClose as CEF processes the CloseBrowser(true). Sent when the host @@ -1517,7 +1578,14 @@ void IpcReadLoop() { if (!ReadAll(g_ipc_fd, hdr, 4)) break; uint32_t body_len = ReadU32BE(hdr); // Minimum valid body is 5 bytes (4 browserId + 1 op + 0 payload). - if (body_len < 5 || body_len > (64u << 20)) break; + // H9: a malformed/oversized length is a wire desync and tears down EVERY browser in + // this process — log it first so it isn't a silent, breadcrumb-less all-tiles exit + // (the IPC peer is trusted, so this only fires on a genuine framing bug). + if (body_len < 5 || body_len > (64u << 20)) { + fprintf(stderr, "[cef_host] rejecting malformed IPC frame, body_len=%u — exiting\n", + body_len); + break; + } std::vector body(body_len); if (!ReadAll(g_ipc_fd, body.data(), body_len)) break; uint32_t wire_id = ReadU32BE(body.data()); @@ -1699,6 +1767,10 @@ void IpcReadLoop() { if (!slot) break; CefPostTask(TID_UI, base::BindOnce(&DoResolveTargetId, slot)); break; + case kOpInvalidate: + if (!slot) break; + CefPostTask(TID_UI, base::BindOnce(&DoInvalidate, slot)); + break; case kOpImeCancel: if (!slot) break; CefPostTask(TID_UI, base::BindOnce(&DoImeCancel, slot)); @@ -2026,10 +2098,13 @@ int main(int argc, char* argv[]) { // (no concurrent SendFrame) and clear the fd so any late write is a no-op. { std::lock_guard lock(g_ipc_write_mutex); - if (g_ipc_fd >= 0) { - close(g_ipc_fd); - g_ipc_fd = -1; - } + // C3: store -1 FIRST (atomic exchange), THEN close — so a SendFrame that snapshots + // the fd under this lock never holds a value that's already closed/recycled. The + // GPU/compositor threads that call SendFrame aren't joined until CefShutdown below, + // so this ordering (not close-then-clear) is what makes a late paint write a safe + // no-op instead of a write into an unrelated recycled fd. + int fd = g_ipc_fd.exchange(-1); + if (fd >= 0) close(fd); } CefShutdown(); }