From 98903ddd61fbadf1baa4789e4d269ab6e26172fc Mon Sep 17 00:00:00 2001 From: Sebastian Hanss Date: Wed, 27 May 2026 13:08:52 +0200 Subject: [PATCH 01/10] fix: replace StartQuiz with GetRun to eliminate question-subset race StartQuiz assembled the question list by filtering this.state.questions (the frontend WS cache), which is populated incrementally as QuestionUpdateMessage responses arrive. When triggered from handleRemoteUpdates or SetQuiz before the cache was fully hydrated, only the questions received so far passed the filter, and QuizRunActor persisted that truncated list to MongoDB. A subsequent reload returned the same incomplete run. GetRun reads question IDs directly from this.state.quiz.groups (already loaded synchronously in SetQuiz), so the question list is always complete at run-creation time. The approved filter is preserved with a safe default: include any question not yet in the WS cache, since GetForUser is get-or-create and ignores the questions param if a run already exists. Shuffle logic is also moved into GetRun so it is applied on the initial get-or-create call. --- .../frontend/src/actors/CurrentQuizActor.ts | 20 ++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/packages/frontend/src/actors/CurrentQuizActor.ts b/packages/frontend/src/actors/CurrentQuizActor.ts index 45dea603..58e60d8a 100644 --- a/packages/frontend/src/actors/CurrentQuizActor.ts +++ b/packages/frontend/src/actors/CurrentQuizActor.ts @@ -150,7 +150,7 @@ export class CurrentQuizActor extends StatefulActor [...acc, ...(g.questions ?? [])], [] as Id[]); + // Build question IDs from quiz metadata. Filter by approved when Question + // objects are already in the WS cache; if not yet loaded, include the + // question (safe default — GetForUser is get-or-create, so an existing + // run is returned unchanged and the questions param is ignored). + let questionIds: Id[] = (this.state.quiz?.groups ?? []) + .reduce((acc, g) => [...acc, ...(g.questions ?? [])], [] as Id[]) + .filter(q => { + const question = this.state.questions.find(qu => qu.uid === q); + return !question || question.approved; + }); + if (this.state.quiz.shuffleQuestions) { + questionIds = shuffle(Math.random)(questionIds); + } // IMPORTANT: quiz-scoped run actor (prefix + quizId), and use GetForUser (get-or-create) const run = await this.ask( @@ -799,7 +809,7 @@ export class CurrentQuizActor extends StatefulActor u.uid).orElse(toId("")); const quizId: Id = this.quiz.orElse(toId("")); From 6d75d235fb0a447ece6f23b90f455dbc72e2597a Mon Sep 17 00:00:00 2001 From: Sebastian Hanss Date: Wed, 27 May 2026 14:42:40 +0200 Subject: [PATCH 02/10] =?UTF-8?q?Fix=20question=20repetition=20glitch=20vi?= =?UTF-8?q?a=20optimistic=20counter=20increment=20When=20a=20student=20cli?= =?UTF-8?q?cked=20"Next=20question",=20the=20local=20answered/answers=20st?= =?UTF-8?q?ate=20was=20reset=20synchronously=20before=20the=20backend=20We?= =?UTF-8?q?bSocket=20round-trip=20completed.=20During=20that=20window=20ru?= =?UTF-8?q?n.counter=20was=20still=20the=20old=20value,=20so=20the=20old?= =?UTF-8?q?=20question=20reappeared=20with=20Submit=20re-enabled=20?= =?UTF-8?q?=E2=80=94=20allowing=20a=20second=20answer=20submission.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two complementary changes close the race: 1. CurrentQuizActor LogAnswer handler now performs an optimistic local updateState (counter, answers, correct, wrong) before sending the QuizRunActorMessages.Update to the backend. The actor's run state advances immediately, so React re-renders with the new counter and shows the next question without waiting for the WebSocket echo. The nextCounter value is captured before updateState to avoid a double-increment when the send() call reads this.state.run.counter. 2. RunningQuizTab ties the local state resets (answered, textAnswer, answers) to a useEffect on run?.counter rather than to the click handler. This means state cleanup is always driven by the actual question change, providing defence-in-depth for TEXT questions (which never set answered=true and previously had no Submit guard during the transition). --- packages/frontend/src/actors/CurrentQuizActor.ts | 13 ++++++++++++- .../src/components/quiz-tabs/RunningQuizTab.tsx | 13 ++++++++----- 2 files changed, 20 insertions(+), 6 deletions(-) diff --git a/packages/frontend/src/actors/CurrentQuizActor.ts b/packages/frontend/src/actors/CurrentQuizActor.ts index 58e60d8a..7897bc65 100644 --- a/packages/frontend/src/actors/CurrentQuizActor.ts +++ b/packages/frontend/src/actors/CurrentQuizActor.ts @@ -446,12 +446,23 @@ export class CurrentQuizActor extends StatefulActor { + if (draft.run) { + draft.run.counter = nextCounter; + draft.run.answers = answers as QuizRun["answers"]; + draft.run.correct = correct; + draft.run.wrong = wrong; + } + }); + this.send( `${actorUris.QuizRunActorPrefix}${this.quiz.orElse(toId("-"))}`, QuizRunActorMessages.Update({ uid: this.state.run.uid, answers, - counter: this.state.run.counter + 1, + counter: nextCounter, correct, wrong, }) diff --git a/packages/frontend/src/components/quiz-tabs/RunningQuizTab.tsx b/packages/frontend/src/components/quiz-tabs/RunningQuizTab.tsx index 35d2edaa..bd291742 100644 --- a/packages/frontend/src/components/quiz-tabs/RunningQuizTab.tsx +++ b/packages/frontend/src/components/quiz-tabs/RunningQuizTab.tsx @@ -1,4 +1,4 @@ -import { useState } from "react"; +import { useEffect, useState } from "react"; import { i18n } from "@lingui/core"; import MDEditor, { commands } from "@uiw/react-md-editor"; import "katex/dist/katex.css"; @@ -30,6 +30,13 @@ export const RunningQuizTab: React.FC<{ const [textAnswer, setTextAnswer] = useState(""); const [answers, setAnswers] = useState([]); const { run, questions: qData } = quizState; + + useEffect(() => { + setAnswered(false); + setTextAnswer(""); + setAnswers([]); + }, [run?.counter]); + console.log( "QUES", quizState.questions.length, @@ -71,10 +78,6 @@ export const RunningQuizTab: React.FC<{ const nextQuestion = () => { logQuestionClicked(); - - setAnswered(false); - setTextAnswer(""); - setAnswers([]); }; const updateAnswer = (index: number, value: boolean) => { From c9acdfa5453e9c6028c4dae30134e6547c6a5525 Mon Sep 17 00:00:00 2001 From: Sebastian Hanss Date: Wed, 10 Jun 2026 13:52:35 +0200 Subject: [PATCH 03/10] Fix question reset and repetition glitch via race-free GetRun MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Both bugs traced to a single race: up to four GetRun messages fired concurrently during quiz startup (one from SetQuiz, up to three more from QuizUpdateMessage arrivals in handleRemoteUpdates). The handlers ran in parallel because ts-actors does not serialize message processing per actor (send dispatches via setTimeout + RxJS Subject, and the inbox's async handler yields at the first await without blocking the next dispatch). Each handler hit a non-atomic findOne + conditional insert in QuizRunActor.GetForUser, so each created a sibling run document with a distinct UID. The slowest GetRun completed 700-1300 ms later — after the student had begun answering — and its unconditional state replacement overwrote the optimistic run with a sibling at counter=0. The resulting counter regression triggered RunningQuizTab's useEffect, manifesting as: - the "Next question" button disappearing (Question Reset Bug), or - the previous question reappearing with Submit re-enabled (Question Repetition Glitch recurrence on this branch). Four complementary changes: 1. QuizRunActor.GetForUser: replace findOne + conditional insert with an atomic findOneAndUpdate({ $setOnInsert }, { upsert: true, returnDocument: "after" }). Only the actually-inserting call notifies collection subscribers. 2. QuizRunActor.beforeStart: create a unique index on (studentId, quizId) so concurrent upserts cannot race past document-level atomicity. Index creation is wrapped in try/catch so pre-existing duplicates from the legacy race don't block startup; the atomic upsert remains a strict improvement on its own. 3. CurrentQuizActor.GetRun: add a synchronous runFetchStarted flag set before the first await. JavaScript run-to-completion guarantees later concurrent invocations see the flag and bail out, deduplicating both the SetQuiz-dispatched and the handleRemoteUpdates-dispatched calls without a per-call-site guard. Error path resets the flag for retry. State replacement now uses incoming.counter >= s.run.counter so even a single GetRun cannot regress optimistic state. 4. CurrentQuizActor QuizRunUpdateMessage handler: defence-in-depth counter guard rejecting WS updates whose counter is below the current. With the optimistic increment in LogAnswer, the authoritative update arrives with an equal counter and is already a no-op via spread-merge; the guard only blocks stale regressions from out-of-order delivery. runFetchStarted (plus the previously-leaking runReady and questionsSubscribed) is reset in Reset, SetQuiz's new-quiz branch, and the QuizRunDeletedMessage handler. Deployment note: legacy duplicate (studentId, quizId) docs in MongoDB will block the unique index creation. Cleanup runbook is in coding/recapp/issues/Question reset Bug.md. --- packages/backend/src/actors/QuizRunActor.ts | 104 +++++++++++------- .../frontend/src/actors/CurrentQuizActor.ts | 62 +++++++++-- packages/frontend/src/pages/QuestionEdit.tsx | 1 + 3 files changed, 119 insertions(+), 48 deletions(-) diff --git a/packages/backend/src/actors/QuizRunActor.ts b/packages/backend/src/actors/QuizRunActor.ts index 8d9cb6ee..4a21c275 100644 --- a/packages/backend/src/actors/QuizRunActor.ts +++ b/packages/backend/src/actors/QuizRunActor.ts @@ -15,7 +15,6 @@ import { create } from "mutative"; import { identity, pick } from "rambda"; import { logger } from "../logger"; import { v4 } from "uuid"; -import { maybe } from "tsmonads"; type State = { cache: Map; @@ -53,6 +52,25 @@ export class QuizRunActor extends SubscribableActor { + try { + const db = await this.connector.db(); + await db + .collection(this.collectionName) + .createIndex({ studentId: 1, quizId: 1 }, { unique: true, name: "studentId_quizId_unique" }); + } catch (e) { + // Pre-existing duplicate (studentId, quizId) docs from the prior race condition + // will block this index creation. Continue without the index — the atomic upsert + // is still narrower than the previous read-then-create. Deduplicate and re-deploy + // to gain the strict guarantee. + this.logger.warn( + `QUIZRUNACTOR could not create unique index on (studentId, quizId): ${ + e instanceof Error ? e.message : String(e) + }` + ); + } + } + public async receive(from: ActorRef, message: QuizRunActorMessage): Promise { const [clientUserRole, clientUserId] = await this.determineRole(from); if (typeof message === "string" && message === "SHUTDOWN") { @@ -67,48 +85,52 @@ export class QuizRunActor extends SubscribableActor>(message, { GetForUser: async ({ studentId, questions }) => { + if (questions.length === 0) return undefined as any; const db = await this.connector.db(); - const mbRunId = maybe( - await db - .collection(this.collectionName) - .findOne({ studentId, quizId: this.uid }, { uid: 1, _id: 0 } as any) - ); - const result = mbRunId.match>( - async runId => { - const run = await this.getEntity(runId.uid); - // console.log("Found existing run", run); - this.logger.debug(`QUIZRUNACTOR found existing run present=${run ? "maybe" : "none"}`); - return run.match(identity, () => new Error()); - }, - async () => { - if (questions.length === 0) return undefined as any; - const run: QuizRun = { - uid: v4() as Id, - studentId, - quizId: this.uid, - counter: 0, - questions, - answers: [], - created: toTimestamp(), - updated: toTimestamp(), - correct: [], - wrong: [], - }; - await this.storeEntity(run); - for (const [subscriber, subscription] of this.state.collectionSubscribers) { - this.send( - subscriber, - new QuizRunUpdateMessage( - subscription.properties.length > 0 ? pick(subscription.properties, run) : run - ) - ); - } - // console.log("Created new run", run); - this.logger.info(`QUIZRUNACTOR created new run`); - return run; + const candidate: QuizRun = { + uid: v4() as Id, + studentId, + quizId: this.uid, + counter: 0, + questions, + answers: [], + created: toTimestamp(), + updated: toTimestamp(), + correct: [], + wrong: [], + }; + // Atomic upsert. Replaces the previous findOne + conditional insert, + // which allowed concurrent GetForUser calls to each create sibling + // run documents (Question Reset / Repetition glitch root cause). + const stored = (await db + .collection(this.collectionName) + .findOneAndUpdate( + { studentId, quizId: this.uid }, + { $setOnInsert: candidate }, + { upsert: true, returnDocument: "after" } + )) as unknown as QuizRun | null; + if (!stored) { + return new Error("Failed to upsert quiz run"); + } + if (stored.uid === candidate.uid) { + // We just inserted — notify collection subscribers using the + // in-memory candidate object (avoids leaking MongoDB's _id field). + for (const [subscriber, subscription] of this.state.collectionSubscribers) { + this.send( + subscriber, + new QuizRunUpdateMessage( + subscription.properties.length > 0 + ? pick(subscription.properties, candidate) + : candidate + ) + ); } - ); - return result; + this.logger.info(`QUIZRUNACTOR created new run`); + return candidate; + } + const existing = await this.getEntity(stored.uid); + this.logger.debug(`QUIZRUNACTOR returning existing run`); + return existing.match(identity, () => new Error()); }, Update: async run => { const existingRun = await this.getEntity(run.uid); diff --git a/packages/frontend/src/actors/CurrentQuizActor.ts b/packages/frontend/src/actors/CurrentQuizActor.ts index 7897bc65..6182d459 100644 --- a/packages/frontend/src/actors/CurrentQuizActor.ts +++ b/packages/frontend/src/actors/CurrentQuizActor.ts @@ -110,6 +110,7 @@ export type CurrentQuizState = { runReady: boolean; hasInitialQuestions: boolean; questionsSubscribed: boolean; + runFetchStarted: boolean; }; export class CurrentQuizActor extends StatefulActor { @@ -135,6 +136,7 @@ export class CurrentQuizActor extends StatefulActor { + const incomingCounter = (message.run as Partial).counter; + const currentCounter = draft.run?.counter ?? 0; + if ( + draft.run && + typeof incomingCounter === "number" && + incomingCounter < currentCounter + ) { + // Defence-in-depth: a WS update carrying a counter lower than the + // current (optimistic) state would regress run.counter and trigger + // RunningQuizTab's useEffect, resetting answered/answers and either + // re-enabling the previous question (repetition) or hiding the Next + // button (reset). Ignore it. + return; + } draft.run = { ...draft.run, ...message.run } as QuizRun; draft.result = { ...draft.result, ...message.run } as QuizRun; }); @@ -247,6 +263,7 @@ export class CurrentQuizActor extends StatefulActor { draft.run = undefined; + draft.runFetchStarted = false; }); return nothing(); } else if (message.tag === "StatisticsUpdateMessage") { @@ -331,10 +348,22 @@ export class CurrentQuizActor extends StatefulActor { + // Synchronous dedup. ts-actors does not serialize handler + // invocations per actor (Actor.send dispatches via setTimeout + + // RxJS Subject), so multiple GetRun messages queued by + // SetQuiz + handleRemoteUpdates would otherwise all start + // concurrent asks. The flag is set before the first await, so + // JS run-to-completion guarantees later invocations see it. + if (this.state.runFetchStarted) { + return (this.state.run ?? (undefined as unknown)) as QuizRun; + } + this.updateState(s => { s.runFetchStarted = true; }); + const studentId: Id = this.user.map(u => u.uid).orElse(toId("")); const quizId: Id = this.quiz.orElse(toId("")); @@ -355,14 +384,30 @@ export class CurrentQuizActor extends StatefulActor { s.runFetchStarted = false; }); + throw e; + } d.run({ quizId, studentIdHash: anonUserKey(String(studentId), String(quizId)), action: "ok" }); - this.updateState(s => { s.run = run as QuizRun; s.runReady = true; }); + this.updateState(s => { + // Counter guard. The backend atomic upsert removes the + // sibling-run race, but a late completion that races with + // LogAnswer's optimistic counter increment must not regress + // the visible state. + if (!s.run || (run?.counter ?? 0) >= (s.run.counter ?? 0)) { + s.run = run; + } + s.runReady = true; + }); // Subscribe & fetch questions exactly once, AFTER run is ready if (!this.state.questionsSubscribed) { @@ -372,7 +417,7 @@ export class CurrentQuizActor extends StatefulActor { const quiz: Quiz = await this.ask(actorUris.QuizActor, QuizActorMessages.Get(quizId)); @@ -787,6 +832,9 @@ export class CurrentQuizActor extends StatefulActor { runReady: false, hasInitialQuestions: false, questionsSubscribed: false, + runFetchStarted: false, }); const q = mbQuiz.map(q => q.quiz).orUndefined(); From 53b956c6b48904419f1c5b4089381a545daf200d Mon Sep 17 00:00:00 2001 From: Sebastian Hanss Date: Wed, 10 Jun 2026 14:44:13 +0200 Subject: [PATCH 04/10] fix(backend): reject expired sessions in bearerValid MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit bearerValid awaited SessionStore.GetSessionForUserId but discarded the result. SessionStore.sessionValid returns an Error *value* (not a rejection) for expired sessions, so the await completed normally and bearerValid resolved with the userId regardless. The "Accessed session … that expired …" warning was emitted but never seen by the caller — the WS handshake then merged into the stale session via StoreSession({ uid, actorSystem }) and proceeded, which is how a persistent 30-day cookie could serve a previous week's quiz despite the server-side session being expired. Check the result and reject with "Session expired" so the upstream authenticationMiddleware fails the handshake and the client falls through to its normal refresh/login flow. --- packages/backend/src/utils.ts | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/packages/backend/src/utils.ts b/packages/backend/src/utils.ts index e47aafc2..c45cb274 100644 --- a/packages/backend/src/utils.ts +++ b/packages/backend/src/utils.ts @@ -31,7 +31,10 @@ export const bearerValid = async (idTokenString: string): Promise => { .orUndefined(); try { if (userId) { - await system.ask(createActorUri("SessionStore"), SessionStoreMessages.GetSessionForUserId(userId)); + const result = await system.ask(createActorUri("SessionStore"), SessionStoreMessages.GetSessionForUserId(userId)); + if (result instanceof Error) { + return Promise.reject(new Error("Session expired")); + } return Promise.resolve(userId); } else { return Promise.reject(new Error("Unknown user")); From 9e12b66b026d87e95f86bfaf9151bdb10a32e454 Mon Sep 17 00:00:00 2001 From: Sebastian Hanss Date: Thu, 11 Jun 2026 10:41:54 +0200 Subject: [PATCH 05/10] fix(quiz): guard counter regression in SetQuiz/StartQuiz; instrument state.run Mirror GetRun's `incoming.counter >= current.counter` guard in the SetQuiz same-quiz branch and StartQuiz handler. Closes a defense-in-depth gap against late-returning GetUserRun/GetForUser races. Adds RUN_STATE_WRITE debugLog tag and instruments every state.run mutation in CurrentQuizActor + RunningQuizTab's counter useEffect. Backend QuizActor.GetUserRun logs returned runUid+counter; Clear() WARN includes quizId, deletedCount, subscriber counts, and caller. The 2026-06-10 failures under --workers=4 are not explained by these paths (Clear didn't fire in the failure window). The telemetry is to identify the actual mechanism in the next test run. --- packages/backend/src/actors/QuizActor.ts | 6 +- packages/backend/src/actors/QuizRunActor.ts | 11 +- .../frontend/src/actors/CurrentQuizActor.ts | 114 +++++++++++++++++- .../components/quiz-tabs/RunningQuizTab.tsx | 8 ++ packages/frontend/src/utils/debugLog.ts | 24 ++++ 5 files changed, 157 insertions(+), 6 deletions(-) diff --git a/packages/backend/src/actors/QuizActor.ts b/packages/backend/src/actors/QuizActor.ts index 3ede3f44..543ef3d9 100644 --- a/packages/backend/src/actors/QuizActor.ts +++ b/packages/backend/src/actors/QuizActor.ts @@ -284,10 +284,12 @@ export class QuizActor extends SubscribableActor { const db = await this.connector.db(); - const mbRun = maybe(await db.collection("quizruns").findOne({ studentId, quizId })); + const found = await db.collection("quizruns").findOne({ studentId, quizId }); + const mbRun = maybe(found); this.logger.debug( `GETUSERRUN studentId=${String(studentId)} quizId=${String(quizId)} ` + - `runPresent=${mbRun ? "maybe" : "none"}` + `runPresent=${found ? "yes" : "no"} ` + + `runUid=${found?.uid ?? "-"} counter=${found?.counter ?? "-"}` ); return mbRun.match(identity, () => new Error("No run for user")); }, diff --git a/packages/backend/src/actors/QuizRunActor.ts b/packages/backend/src/actors/QuizRunActor.ts index 4a21c275..38631fb0 100644 --- a/packages/backend/src/actors/QuizRunActor.ts +++ b/packages/backend/src/actors/QuizRunActor.ts @@ -162,7 +162,16 @@ export class QuizRunActor extends SubscribableActor { const db = await this.connector.db(); const result = await db.collection(this.collectionName).deleteMany({ quizId: this.uid }); - logger.warn(JSON.stringify(result)); + const runSubscriberCount = Array.from(this.state.subscribers.values()) + .reduce((acc, set) => acc + set.size, 0); + const collectionSubscriberCount = this.state.collectionSubscribers.size; + logger.warn( + `QUIZRUNACTOR_CLEAR quizId=${String(this.uid)} ` + + `deletedCount=${result.deletedCount} ` + + `runSubscribers=${runSubscriberCount} ` + + `collectionSubscribers=${collectionSubscriberCount} ` + + `from=${String((from as any)?.name ?? from)}` + ); this.state.cache = new Map(); this.state.subscribers.forEach(subscriberSet => subscriberSet.forEach(subscriber => this.send(subscriber, new QuizRunDeletedMessage())) diff --git a/packages/frontend/src/actors/CurrentQuizActor.ts b/packages/frontend/src/actors/CurrentQuizActor.ts index 6182d459..fd8b6ce2 100644 --- a/packages/frontend/src/actors/CurrentQuizActor.ts +++ b/packages/frontend/src/actors/CurrentQuizActor.ts @@ -243,6 +243,7 @@ export class CurrentQuizActor extends StatefulActor { const incomingCounter = (message.run as Partial).counter; + const beforeCounter = draft.run?.counter ?? null; const currentCounter = draft.run?.counter ?? 0; if ( draft.run && @@ -254,14 +255,35 @@ export class CurrentQuizActor extends StatefulActor { + d.runState({ + source: "QuizRunDeleted", + beforeCounter: draft.run?.counter ?? null, + afterCounter: null, + runUidBefore: draft.run?.uid, + }); draft.run = undefined; draft.runFetchStarted = false; }); @@ -334,6 +356,12 @@ export class CurrentQuizActor extends StatefulActor>(m, { Reset: async () => { this.updateState(draft => { + d.runState({ + source: "Reset", + beforeCounter: draft.run?.counter ?? null, + afterCounter: null, + runUidBefore: draft.run?.uid, + }); draft.quiz = {} as Quiz; draft.comments = []; draft.questions = []; @@ -403,8 +431,26 @@ export class CurrentQuizActor extends StatefulActor= (s.run.counter ?? 0)) { s.run = run; + d.runState({ + source: "GetRun", + beforeCounter, + afterCounter: s.run?.counter ?? null, + runUidBefore, + runUidAfter: s.run?.uid, + }); + } else { + d.runState({ + source: "GetRun", + beforeCounter, + afterCounter: beforeCounter, + runUidBefore, + blocked: true, + reason: "stale-counter", + }); } s.runReady = true; }); @@ -450,8 +496,30 @@ export class CurrentQuizActor extends StatefulActor { - draft.run = run; - draft.result = run; + const beforeCounter = draft.run?.counter ?? null; + const runUidBefore = draft.run?.uid; + // Same guard as GetRun: a delayed StartQuiz completion + // must not regress an already-advanced optimistic state. + if (!draft.run || (run?.counter ?? 0) >= (draft.run.counter ?? 0)) { + draft.run = run; + draft.result = run; + d.runState({ + source: "StartQuiz", + beforeCounter, + afterCounter: draft.run?.counter ?? null, + runUidBefore, + runUidAfter: draft.run?.uid, + }); + } else { + d.runState({ + source: "StartQuiz", + beforeCounter, + afterCounter: beforeCounter, + runUidBefore, + blocked: true, + reason: "stale-counter", + }); + } }); return unit(); @@ -495,10 +563,18 @@ export class CurrentQuizActor extends StatefulActor { if (draft.run) { + const beforeCounter = draft.run.counter; draft.run.counter = nextCounter; draft.run.answers = answers as QuizRun["answers"]; draft.run.correct = correct; draft.run.wrong = wrong; + d.runState({ + source: "LogAnswer", + beforeCounter, + afterCounter: nextCounter, + runUidBefore: draft.run.uid, + runUidAfter: draft.run.uid, + }); } }); @@ -794,7 +870,33 @@ export class CurrentQuizActor extends StatefulActor 0) { this.updateState(draft => { - draft.run = run as QuizRun; + // Counter guard parallels GetRun. Without this, a + // late-returning GetUserRun whose findOne saw the + // DB before LogAnswer's Update committed can + // regress state.run.counter against an optimistic + // state already populated by a parallel GetRun. + const incoming = run as QuizRun; + const beforeCounter = draft.run?.counter ?? null; + const runUidBefore = draft.run?.uid; + if (!draft.run || (incoming.counter ?? 0) >= (draft.run.counter ?? 0)) { + draft.run = incoming; + d.runState({ + source: "SetQuiz-same", + beforeCounter, + afterCounter: draft.run?.counter ?? null, + runUidBefore, + runUidAfter: draft.run?.uid, + }); + } else { + d.runState({ + source: "SetQuiz-same", + beforeCounter, + afterCounter: beforeCounter, + runUidBefore, + blocked: true, + reason: "stale-counter", + }); + } }); } else { this.send(this.ref, CurrentQuizMessages.StartQuiz()); @@ -825,6 +927,12 @@ export class CurrentQuizActor extends StatefulActor { + d.runState({ + source: "SetQuiz-different", + beforeCounter: draft.run?.counter ?? null, + afterCounter: null, + runUidBefore: draft.run?.uid, + }); draft.run = undefined; draft.result = undefined; draft.questionStats = undefined; diff --git a/packages/frontend/src/components/quiz-tabs/RunningQuizTab.tsx b/packages/frontend/src/components/quiz-tabs/RunningQuizTab.tsx index bd291742..dee299d8 100644 --- a/packages/frontend/src/components/quiz-tabs/RunningQuizTab.tsx +++ b/packages/frontend/src/components/quiz-tabs/RunningQuizTab.tsx @@ -18,6 +18,7 @@ import { isMultiChoiceAnsweredCorrectly } from "../../utils"; import { Trans } from "@lingui/react"; import { CHECK_SYMBOL, X_SYMBOL } from "../../constants/layout"; import { CORRECT_COLOR, WRONG_COLOR, CORRECT_COLOR_TEXT, WRONG_COLOR_TEXT } from "../../colorPalette"; +import { d } from "../../utils/debugLog"; export const RunningQuizTab: React.FC<{ isUserInTeachersList:boolean; @@ -32,6 +33,13 @@ export const RunningQuizTab: React.FC<{ const { run, questions: qData } = quizState; useEffect(() => { + d.runState({ + source: "useEffect-counter", + beforeCounter: null, + afterCounter: run?.counter ?? null, + runUidAfter: run?.uid, + reason: "counter-dep-fired", + }); setAnswered(false); setTextAnswer(""); setAnswers([]); diff --git a/packages/frontend/src/utils/debugLog.ts b/packages/frontend/src/utils/debugLog.ts index 629c6337..b46153d8 100644 --- a/packages/frontend/src/utils/debugLog.ts +++ b/packages/frontend/src/utils/debugLog.ts @@ -1,6 +1,7 @@ type LogTag = | "AUTH" | "RUN" + | "RUN_STATE_WRITE" | "LIST_REQUEST" | "LIST_RESULT" | "WS_OPEN" @@ -23,6 +24,28 @@ type RunLog = BaseLog & { error?: string; }; +// Tracks every write to CurrentQuizActor's state.run (or attempted write +// blocked by a counter guard). Use to diagnose counter regressions and +// to verify which code path produced any given state change. +type RunStateWriteLog = BaseLog & { + source: + | "GetRun" + | "StartQuiz" + | "SetQuiz-same" + | "SetQuiz-different" + | "LogAnswer" + | "QuizRunUpdate" + | "QuizRunDeleted" + | "Reset" + | "useEffect-counter"; + beforeCounter: number | null; // null = state.run was undefined + afterCounter: number | null; // null = state.run set/left as undefined + runUidBefore?: string; + runUidAfter?: string; + blocked?: boolean; // true = guard rejected the write + reason?: string; +}; + type ListRequestLog = BaseLog & { transport: "http" | "actor"; urlOrMsg?: string; @@ -72,6 +95,7 @@ export function dlog( export const d = { auth: (p: Omit) => dlog("AUTH", p), run: (p: Omit) => dlog("RUN", p), + runState: (p: Omit) => dlog("RUN_STATE_WRITE", p), listReq: (p: Omit) => dlog("LIST_REQUEST", p), listRes: (p: Omit) => dlog("LIST_RESULT", p), wsLife: (p: Omit) => dlog("WS_OPEN", p), From 5497b375c04c4718541748c818787a9929a36506 Mon Sep 17 00:00:00 2001 From: Sebastian Hanss Date: Thu, 11 Jun 2026 17:02:08 +0200 Subject: [PATCH 06/10] chore(frontend): remove dev console.log noise in RunningQuizTab Four console.log statements that fire on every render and on every radio click (one of them serializes the full quizState). Under CPU throttling these measurably extend the per-render cost and crowd the trace viewer. No behavior change. --- .../src/components/quiz-tabs/RunningQuizTab.tsx | 15 --------------- 1 file changed, 15 deletions(-) diff --git a/packages/frontend/src/components/quiz-tabs/RunningQuizTab.tsx b/packages/frontend/src/components/quiz-tabs/RunningQuizTab.tsx index dee299d8..9960f74b 100644 --- a/packages/frontend/src/components/quiz-tabs/RunningQuizTab.tsx +++ b/packages/frontend/src/components/quiz-tabs/RunningQuizTab.tsx @@ -45,25 +45,12 @@ export const RunningQuizTab: React.FC<{ setAnswers([]); }, [run?.counter]); - console.log( - "QUES", - quizState.questions.length, - "RUN", - quizState.run, - "ENTRY", - quizState.run?.counter, - "FOO", - quizState.questions[0] - ); - const questions = run?.questions.map(id => qData.find(q => q.uid === id)) ?? []; const currentQuestion = questions[run?.counter ?? 0]; const questionId = currentQuestion?.uid ?? toId(""); const questionText = questions.at(run?.counter ?? 0)?.text; const { rendered, isStale } = useRendered({ value: questionText ?? "" }); - console.log("ANSWERSTATE", quizState, run); - if (!quizState.run || !quizState.questions) { return null; } @@ -97,12 +84,10 @@ export const RunningQuizTab: React.FC<{ a[i] = false; } a[index] = value; - console.log("ANSWERS NEW", a, value); setAnswers(a); } else { const a = answersCopy; a[index] = value; - console.log("ANSWERS", a, value); setAnswers(a); } }; From 75cd149d105aed9a6c9cb48eb762fe6fe59ca095 Mon Sep 17 00:00:00 2001 From: Sebastian Hanss Date: Thu, 11 Jun 2026 18:19:59 +0200 Subject: [PATCH 07/10] Adds diagnostics for debugging --- packages/frontend/Dockerfile | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/packages/frontend/Dockerfile b/packages/frontend/Dockerfile index 56f45ff6..7dd7bfce 100644 --- a/packages/frontend/Dockerfile +++ b/packages/frontend/Dockerfile @@ -13,6 +13,21 @@ COPY packages/models/tsconfig*.json ./packages/models/ RUN npm ci +# DIAGNOSTIC: surface actor-crash stacks. ts-actors swallows the underlying +# Error in its supervisor warning; this rewrites the catch block to include +# the full stack and a separate console.error. Revert after the +# getuserrun-counter-regression investigation concludes. +RUN node -e " \ + const fs = require('fs'); \ + const p = '/app/node_modules/ts-actors/lib/src/ActorSystem.js'; \ + const s = fs.readFileSync(p, 'utf8'); \ + const oldLine = 'this.logger.warn(\`Unhandled exception in \${target.name}, applying strategy \${target.strategy}\`);'; \ + const newLines = 'this.logger.warn(\`Unhandled exception in \${target.name}, applying strategy \${target.strategy}: \${(e && e.stack) ? e.stack : String(e)}\`);\n console.error(\"[ts-actors crash]\", target.name, e);'; \ + if (!s.includes(oldLine)) { console.error('PATCH FAILED: target line not found in', p); process.exit(1); } \ + fs.writeFileSync(p, s.replace(oldLine, newLines)); \ + console.log('Patched ts-actors ActorSystem.js to surface error stacks'); \ +" + COPY packages/frontend ./packages/frontend COPY packages/models ./packages/models From 04cb46db61189de4ae794fdc831008721c8eff10 Mon Sep 17 00:00:00 2001 From: Sebastian Hanss Date: Thu, 11 Jun 2026 19:49:42 +0200 Subject: [PATCH 08/10] fix(frontend): guard UserStore.GetNames asks from supervisor-shutdown MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit DistributedActorSystem.js:41 rejects timed-out asks with a plain string Promise.reject(...). When the awaiting handler doesn't catch, the rejection unwinds out of the receive method, the supervisor catches it, and applies the Shutdown strategy — taking down the whole CurrentQuiz (or LocalUser) actor and leaving the page frozen. Observed via the getuserrun-counter-regression Playwright runs under CPU + network throttle: the client-side actor inbox saturates, the 5s ask timer fires before the message is even dispatched to NATS, and the session dies. Both GetNames sites fetch cosmetic data (teacher display names) and fire on every incoming QuizUpdateMessage broadcast, so they were hitting the timeout repeatedly. Now they degrade to an empty array on failure; the next QuizUpdateMessage retries. A new RUN_STATE_WRITE source "AskFailure" is added so we can count post-fix timeout occurrences in the trace data. Out of scope: - ~18 other ask() sites have the same vulnerability. To be assessed case-by-case after we confirm this surgical fix eliminates the observed crash. - The string-rejection contract in DistributedActorSystem.js itself is the root cause; fixing that upstream would let callers use a single `instanceof Error` check. --- .../frontend/src/actors/CurrentQuizActor.ts | 26 ++++++++++++++++--- .../frontend/src/actors/LocalUserActor.ts | 18 ++++++++++--- packages/frontend/src/utils/debugLog.ts | 3 ++- 3 files changed, 39 insertions(+), 8 deletions(-) diff --git a/packages/frontend/src/actors/CurrentQuizActor.ts b/packages/frontend/src/actors/CurrentQuizActor.ts index fd8b6ce2..29122472 100644 --- a/packages/frontend/src/actors/CurrentQuizActor.ts +++ b/packages/frontend/src/actors/CurrentQuizActor.ts @@ -1024,10 +1024,28 @@ export class CurrentQuizActor extends StatefulActor { - const names: Array<{ nickname?: string; username: string }> = await this.ask( - actorUris.UserStore, - UserStoreMessages.GetNames(this.state.quiz.teachers) - ); + // DistributedActorSystem rejects the ask Promise with a string on + // timeout; without the try/catch the rejection unwinds out of the + // handler and the supervisor shuts CurrentQuiz down (see + // getuserrun-counter-regression investigation). Names are cosmetic; + // degrade to empty rather than killing the session. + let names: Array<{ nickname?: string; username: string }> = []; + try { + const result = await this.ask( + actorUris.UserStore, + UserStoreMessages.GetNames(this.state.quiz.teachers) + ); + if (Array.isArray(result)) { + names = result; + } + } catch (e) { + d.runState({ + source: "AskFailure", + beforeCounter: null, + afterCounter: null, + reason: `GetTeacherNames ask failed: ${String(e)}`, + }); + } this.updateState(draft => { draft.teacherNames = names.map(n => n.nickname ? `${n.username} (${n.nickname})` : n.username diff --git a/packages/frontend/src/actors/LocalUserActor.ts b/packages/frontend/src/actors/LocalUserActor.ts index cf673477..0763b8fa 100644 --- a/packages/frontend/src/actors/LocalUserActor.ts +++ b/packages/frontend/src/actors/LocalUserActor.ts @@ -115,9 +115,21 @@ export class LocalUserActor extends StatefulActor { if (message.quiz.uid) { const isTeacher = message.quiz.teachers?.includes(this.state.user?.uid ?? toId("")); diff --git a/packages/frontend/src/utils/debugLog.ts b/packages/frontend/src/utils/debugLog.ts index b46153d8..3a04c002 100644 --- a/packages/frontend/src/utils/debugLog.ts +++ b/packages/frontend/src/utils/debugLog.ts @@ -37,7 +37,8 @@ type RunStateWriteLog = BaseLog & { | "QuizRunUpdate" | "QuizRunDeleted" | "Reset" - | "useEffect-counter"; + | "useEffect-counter" + | "AskFailure"; beforeCounter: number | null; // null = state.run was undefined afterCounter: number | null; // null = state.run set/left as undefined runUidBefore?: string; From 870162ba1f5008d8f5318b29010ace7f693decd4 Mon Sep 17 00:00:00 2001 From: Sebastian Hanss Date: Thu, 11 Jun 2026 21:00:22 +0200 Subject: [PATCH 09/10] fix(frontend): make CurrentQuiz + LocalUser actors resilient to handler exceptions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ts-actors' default supervisor strategy is "Shutdown" — any unhandled throw from a message handler kills the whole actor. For long-lived, user-facing session actors that own UI state, this is the wrong default: one timed-out ask (string-rejected via DistributedActorSystem.js:41) takes the page down. Switch both CurrentQuizActor and LocalUserActor to "Resume". The supervisor catch block now becomes a no-op after logging (ActorSystem.js:246) — the warning + console.error from the existing Dockerfile patch still fire, so we don't lose diagnostic visibility, but the actor keeps processing the next message. This subsumes the GetTeacherNames try/catch shipped in the previous commit for the ask-timeout case, but the try/catch is still kept for the useful side benefit (graceful empty-array fallback so the next QuizUpdateMessage retries). --- packages/frontend/src/actors/CurrentQuizActor.ts | 7 +++++++ packages/frontend/src/actors/LocalUserActor.ts | 7 +++++++ 2 files changed, 14 insertions(+) diff --git a/packages/frontend/src/actors/CurrentQuizActor.ts b/packages/frontend/src/actors/CurrentQuizActor.ts index 29122472..f17546ba 100644 --- a/packages/frontend/src/actors/CurrentQuizActor.ts +++ b/packages/frontend/src/actors/CurrentQuizActor.ts @@ -114,6 +114,13 @@ export type CurrentQuizState = { }; export class CurrentQuizActor extends StatefulActor { + // Override the ts-actors default ("Shutdown"). A long-lived session actor + // shouldn't die from one handler exception (e.g. a timed-out ask raising + // the string-rejection contract from DistributedActorSystem.js:41). The + // supervisor still logs the warning + console.error; we just keep + // processing the next message instead of freezing the page. + strategy = "Resume" as const; + private quiz: Maybe = nothing(); private user: Maybe = nothing(); private firstListReported = false; // for debugging: emit a single LIST_RESULT when the list goes from 0 → N for the first time. diff --git a/packages/frontend/src/actors/LocalUserActor.ts b/packages/frontend/src/actors/LocalUserActor.ts index 0763b8fa..ad337a57 100644 --- a/packages/frontend/src/actors/LocalUserActor.ts +++ b/packages/frontend/src/actors/LocalUserActor.ts @@ -60,6 +60,13 @@ export type LocalUserState = { }; export class LocalUserActor extends StatefulActor { + // Override the ts-actors default ("Shutdown"). A long-lived session actor + // shouldn't die from one handler exception (e.g. a timed-out ask raising + // the string-rejection contract from DistributedActorSystem.js:41). The + // supervisor still logs the warning + console.error; we just keep + // processing the next message instead of freezing the page. + strategy = "Resume" as const; + constructor(name: string, system: ActorSystem) { super(name, system); this.state = { From 2ac8af0fc6b5cb9ac0ecd18f65f3039717ee64b0 Mon Sep 17 00:00:00 2001 From: Sebastian Hanss Date: Thu, 11 Jun 2026 21:57:21 +0200 Subject: [PATCH 10/10] Cleanup --- docker/docker-compose.prod.yaml | 4 ++-- packages/frontend/Dockerfile | 13 +++++++------ 2 files changed, 9 insertions(+), 8 deletions(-) diff --git a/docker/docker-compose.prod.yaml b/docker/docker-compose.prod.yaml index b00131f6..077fa2b1 100644 --- a/docker/docker-compose.prod.yaml +++ b/docker/docker-compose.prod.yaml @@ -10,11 +10,11 @@ services: - VITE_API_URL=${BACKEND_URI:-http://localhost:3123} - VITE_APP_URL=${FRONTEND_URI:-http://localhost:5173} - VITE_INACTIVITY_LIMIT=1 - - VITE_DEBUG_RECAPP=1 # change to 0 for production + - VITE_DEBUG_RECAPP=0 # change to 1 for diagnostic builds environment: - FRONTEND_URI=${FRONTEND_URI} - BACKEND_URI=${BACKEND_URI} - - VITE_DEBUG_RECAPP=1 # change to 0 for production + - VITE_DEBUG_RECAPP=0 # change to 1 for diagnostic builds ports: - "5173:80" depends_on: diff --git a/packages/frontend/Dockerfile b/packages/frontend/Dockerfile index 7dd7bfce..a587f820 100644 --- a/packages/frontend/Dockerfile +++ b/packages/frontend/Dockerfile @@ -13,18 +13,19 @@ COPY packages/models/tsconfig*.json ./packages/models/ RUN npm ci -# DIAGNOSTIC: surface actor-crash stacks. ts-actors swallows the underlying -# Error in its supervisor warning; this rewrites the catch block to include -# the full stack and a separate console.error. Revert after the -# getuserrun-counter-regression investigation concludes. +# Surface actor-crash context. ts-actors swallows the underlying Error in +# its supervisor warning; this rewrites the catch block to include the +# full stack inline. Kept on for production so backend log inspection +# stays useful. Browser-side noise stays minimal — only the warn line is +# affected; no extra console.error per crash. RUN node -e " \ const fs = require('fs'); \ const p = '/app/node_modules/ts-actors/lib/src/ActorSystem.js'; \ const s = fs.readFileSync(p, 'utf8'); \ const oldLine = 'this.logger.warn(\`Unhandled exception in \${target.name}, applying strategy \${target.strategy}\`);'; \ - const newLines = 'this.logger.warn(\`Unhandled exception in \${target.name}, applying strategy \${target.strategy}: \${(e && e.stack) ? e.stack : String(e)}\`);\n console.error(\"[ts-actors crash]\", target.name, e);'; \ + const newLine = 'this.logger.warn(\`Unhandled exception in \${target.name}, applying strategy \${target.strategy}: \${(e && e.stack) ? e.stack : String(e)}\`);'; \ if (!s.includes(oldLine)) { console.error('PATCH FAILED: target line not found in', p); process.exit(1); } \ - fs.writeFileSync(p, s.replace(oldLine, newLines)); \ + fs.writeFileSync(p, s.replace(oldLine, newLine)); \ console.log('Patched ts-actors ActorSystem.js to surface error stacks'); \ "