From 6f23af4b271e6b5df6c76fbb775d9f694da0c330 Mon Sep 17 00:00:00 2001 From: Erick Date: Mon, 25 May 2026 12:01:53 -0700 Subject: [PATCH 1/6] fix(reward): clear rewardDirty flag on skip path MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The triviality-gate skip path wrote reward.skipped=true but never cleared rewardDirty from meta_json. Since episodeRewardIsDirty() checks the rewardDirty object flag before the skip gate, skipped episodes with the flag set would re-enter the dirty scan on every bridge restart, scoring and re-skipping indefinitely. Normal scoring path already had rewardDirty: undefined — this mirrors that pattern in the skip branch. Co-Authored-By: Claude Sonnet 4.6 --- apps/memos-local-plugin/core/reward/reward.ts | 1 + 1 file changed, 1 insertion(+) diff --git a/apps/memos-local-plugin/core/reward/reward.ts b/apps/memos-local-plugin/core/reward/reward.ts index 1b6d3354c..937454fea 100644 --- a/apps/memos-local-plugin/core/reward/reward.ts +++ b/apps/memos-local-plugin/core/reward/reward.ts @@ -128,6 +128,7 @@ export function createRewardRunner(deps: RewardDeps): RewardRunner { trigger: input.trigger, skipped: true, }, + rewardDirty: undefined, }); } catch (err) { warnings.push({ From 0fb04158cec33aa023712c3c769106b1deb42777 Mon Sep 17 00:00:00 2001 From: Erick Date: Mon, 25 May 2026 12:13:38 -0700 Subject: [PATCH 2/6] fix(reward): set r_task=0 on skipped episodes to stop dirty-scan loop The skip path wrote reward.skipped=true but never called setRTask(), leaving r_task=NULL. For abandoned episodes episodeRewardIsDirty() falls through to the r_task==null check and returns true, causing those episodes to re-enter the dirty scan on every bridge start, get re-skipped, and loop indefinitely. Setting r_task=0 before updateMeta means the null-r_task branch in episodeRewardIsDirty() no longer fires, permanently clearing the episode from the dirty scan after its first skip. Co-Authored-By: Claude Sonnet 4.6 --- apps/memos-local-plugin/core/reward/reward.ts | 1 + 1 file changed, 1 insertion(+) diff --git a/apps/memos-local-plugin/core/reward/reward.ts b/apps/memos-local-plugin/core/reward/reward.ts index 937454fea..d2247932c 100644 --- a/apps/memos-local-plugin/core/reward/reward.ts +++ b/apps/memos-local-plugin/core/reward/reward.ts @@ -119,6 +119,7 @@ export function createRewardRunner(deps: RewardDeps): RewardRunner { try { const existingMeta = episode.meta ?? {}; const wasFinalized = existingMeta.closeReason === "finalized"; + deps.episodesRepo.setRTask(input.episodeId, 0); deps.episodesRepo.updateMeta(input.episodeId, { ...(wasFinalized ? {} : { closeReason: "abandoned", abandonReason: skipReason }), reward: { From 6a9c39876e5dfb1ddf1ddf003f45aa461210af27 Mon Sep 17 00:00:00 2001 From: Erick Date: Mon, 25 May 2026 12:38:45 -0700 Subject: [PATCH 3/6] fix(dirty-scan): paginate closed-episode scan to remove 500-row ceiling MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit clampLimit() in _helpers.ts caps list() at 500 regardless of the limit argument passed. The prior limit:1000 change (87165daf) was a no-op — both values hit the same ceiling, leaving episodes beyond rank 500 permanently invisible to the dirty scan. Replace both scan sites (startup + periodic) with collectDirtyClosedEpisodes(), which paginates in 500-row pages until exhausted. All closed episodes are now covered regardless of total count. This was also the root cause of the "dirty-17" mystery: those episodes were at ranks 536-924, outside the 500-row window. Co-Authored-By: Claude Sonnet 4.6 --- .../core/pipeline/memory-core.ts | 23 ++++++++++++++----- 1 file changed, 17 insertions(+), 6 deletions(-) diff --git a/apps/memos-local-plugin/core/pipeline/memory-core.ts b/apps/memos-local-plugin/core/pipeline/memory-core.ts index 4974ee16d..be65c680b 100644 --- a/apps/memos-local-plugin/core/pipeline/memory-core.ts +++ b/apps/memos-local-plugin/core/pipeline/memory-core.ts @@ -589,9 +589,7 @@ export function createMemoryCore( if (nowMs - lastDirtyClosedScan < 30_000) return; lastDirtyClosedScan = nowMs; try { - const dirtyClosed = handle.repos.episodes - .list({ status: "closed", limit: 500 }) - .filter((ep) => !isLightweightEpisode(ep) && episodeRewardIsDirty(ep)); + const dirtyClosed = collectDirtyClosedEpisodes(); if (dirtyClosed.length > 0) { await recoverDirtyClosedEpisodes(dirtyClosed); } @@ -914,9 +912,7 @@ export function createMemoryCore( await recoverOpenEpisodesAsSessionEnd(stale); } } - const dirtyClosed = handle.repos.episodes - .list({ status: "closed", limit: 500 }) - .filter((ep) => !isLightweightEpisode(ep) && episodeRewardIsDirty(ep)); + const dirtyClosed = collectDirtyClosedEpisodes(); if (dirtyClosed.length > 0) { await recoverDirtyClosedEpisodes(dirtyClosed); } @@ -1262,6 +1258,21 @@ export function createMemoryCore( await handle.flush(); } + function collectDirtyClosedEpisodes(): (EpisodeRow & { meta?: Record })[] { + const dirty: (EpisodeRow & { meta?: Record })[] = []; + let offset = 0; + const pageSize = 500; + while (true) { + const page = handle.repos.episodes.list({ status: "closed", limit: pageSize, offset }); + for (const ep of page) { + if (episodeRewardIsDirty(ep)) dirty.push(ep); + } + if (page.length < pageSize) break; + offset += pageSize; + } + return dirty; + } + function episodeRewardIsDirty(ep: EpisodeRow & { meta?: Record }): boolean { const meta = ep.meta ?? {}; if (meta.lightweightMemory === true) return false; From 0e7109d06e87d5069d9bb68c3f78047634b665bd Mon Sep 17 00:00:00 2001 From: Erick Date: Sat, 30 May 2026 07:18:12 -0700 Subject: [PATCH 4/6] fix(reward): drain reward after dirty-closed recovery in lightweight mode MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit recoverDirtyClosedEpisodes relied on flush() → reward.drain() to fire R_human scoring after the capture pass. flush() returns early in lightweight mode (the default), so the reward subscriber's 30 s timer was cancelled by shutdown() before it fired — leaving traceCount permanently mismatched and the episode dirty on every restart. Fix: after flush() drains the capture pass, explicitly call rewardRunner.run() for any episode that episodeRewardIsDirty() still considers dirty — mirroring the pattern already used by recoverOpenEpisodesAsSessionEnd. A second flush() then drains downstream (L2 / L3 / skills). Regression test: dirty-reward recovery does not insert orphan traces — seeded episode with traceCount=1 and 2 trace IDs (one having a tool call whose endedAt differs from the trace ts, which produces an orphan step in runReflect). Verifies that: 1. trace_ids_json stays at 2 after recovery (orphan insert guard). 2. traceCount is updated to 2 after the first recovery pass. 3. A second restart does not re-score the episode (loop stopped). Also fixes the pre-existing test "rescoring closed episodes when traces were appended after the last reward" which failed for the same reason. Co-Authored-By: Claude Sonnet 4.6 --- .../core/pipeline/memory-core.ts | 12 ++ .../tests/unit/pipeline/memory-core.test.ts | 185 ++++++++++++++++++ 2 files changed, 197 insertions(+) diff --git a/apps/memos-local-plugin/core/pipeline/memory-core.ts b/apps/memos-local-plugin/core/pipeline/memory-core.ts index be65c680b..c7276ac8a 100644 --- a/apps/memos-local-plugin/core/pipeline/memory-core.ts +++ b/apps/memos-local-plugin/core/pipeline/memory-core.ts @@ -1237,6 +1237,7 @@ export function createMemoryCore( episodes: Array }>, ): Promise { log.info("init.dirty_closed_episodes.rescore", { count: episodes.length }); + const rescored: EpisodeId[] = []; for (const ep of episodes) { if (isLightweightEpisode(ep)) continue; const episodeId = ep.id as EpisodeId; @@ -1254,6 +1255,17 @@ export function createMemoryCore( episode: snapshot, closedBy: "finalized", }); + rescored.push(episodeId); + } + // Drain the capture pass (patches reflections + α onto existing traces). + await handle.flush(); + // In lightweight mode flush() returns before draining the reward + // subscriber. Explicitly run reward for any episode whose trace count + // still mismatches — mirrors the pattern in recoverOpenEpisodesAsSessionEnd. + for (const episodeId of rescored) { + if (episodeRewardIsDirty(handle.repos.episodes.getById(episodeId) ?? {} as never)) { + await handle.rewardRunner.run({ episodeId, feedback: [], trigger: "manual" }); + } } await handle.flush(); } diff --git a/apps/memos-local-plugin/tests/unit/pipeline/memory-core.test.ts b/apps/memos-local-plugin/tests/unit/pipeline/memory-core.test.ts index 88d5cbbd4..5d6f3237d 100644 --- a/apps/memos-local-plugin/tests/unit/pipeline/memory-core.test.ts +++ b/apps/memos-local-plugin/tests/unit/pipeline/memory-core.test.ts @@ -1456,4 +1456,189 @@ algorithm: expect(meta.reward?.traceCount).toBe(1); expect(meta.reward?.traceIds).toEqual(["tr_missing_reward"]); }); + + it("dirty-reward recovery does not insert orphan traces (regression: rescore loop guard)", async () => { + // Regression test for the rescore loop: + // When recoverDirtyClosedEpisodes re-emits episode.finalized, capture's + // runReflect used to insert new trace rows for "orphan steps" — steps + // whose timestamps didn't match any existing DB row. For recovered + // episodes this happens whenever a trace has tool calls with endedAt + // timestamps different from the trace's own ts, because the snapshot + // rebuilds a separate tool-role turn for each call. + // + // Without the guard the orphan insert grows trace_ids_json, keeping + // reward.traceCount != traceIds.length forever and looping on every + // bridge restart. The guard (meta.recoveryReason === "dirty_reward_rescore") + // skips the insert, so trace_ids_json stays stable and the episode + // stops appearing dirty after a single recovery pass. + + home = await makeTmpHome({ agent: "openclaw" }); + + const seeder = await bootstrapMemoryCore({ + agent: "openclaw", + home: home.home, + config: home.config, + pkgVersion: "rescore-loop-seed", + }); + await seeder.init(); + await seeder.shutdown(); + + const Sqlite = (await import("better-sqlite3")).default; + const writeDb = new Sqlite(home.home.dbFile); + const BASE = Date.now() - 5_000; + + writeDb + .prepare( + `INSERT INTO sessions (id, agent, started_at, last_seen_at, meta_json) VALUES (?, ?, ?, ?, ?)`, + ) + .run("se_loop", "openclaw", BASE, BASE, "{}"); + + // Episode is dirty: traceCount=1 but trace_ids_json has 2 IDs. + writeDb + .prepare( + `INSERT INTO episodes (id, session_id, started_at, ended_at, trace_ids_json, r_task, status, meta_json) VALUES (?, ?, ?, ?, ?, ?, 'closed', ?)`, + ) + .run( + "ep_loop", + "se_loop", + BASE, + BASE + 1, + JSON.stringify(["tr_loop_a", "tr_loop_b"]), + 0.5, + JSON.stringify({ + closeReason: "finalized", + reward: { rHuman: 0.5, scoredAt: BASE - 1000, traceCount: 1 }, + }), + ); + + // tr_loop_a: plain text trace — no orphan risk. + writeDb + .prepare( + `INSERT INTO traces ( + id, episode_id, session_id, ts, user_text, agent_text, summary, + tool_calls_json, reflection, agent_thinking, value, alpha, r_human, + priority, tags_json, error_signatures_json, vec_summary, vec_action, + share_scope, share_target, shared_at, turn_id, schema_version + ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, NULL, NULL, NULL, NULL, NULL, ?, ?)`, + ) + .run( + "tr_loop_a", + "ep_loop", + "se_loop", + BASE, + "帮我分析一下这段Python代码的性能瓶颈,并给出优化建议。", + "这段代码的主要性能问题在于嵌套循环,时间复杂度是O(n²),可以用哈希表将其优化到O(n)。", + "Python代码性能分析", + "[]", + null, + null, + 0, + 0, + null, + 0.5, + "[]", + "[]", + BASE, + 1, + ); + + // tr_loop_b: trace with a tool call whose endedAt differs from the trace ts. + // snapshotFromRecoveredEpisode creates a tool-role turn with ts=BASE+300, + // which does NOT appear in traceByTs (only BASE and BASE+100 are in the map). + // Without the guard this step is treated as an orphan and a new trace is + // inserted, growing trace_ids_json from 2 to 3 and keeping the episode dirty. + const toolCallWithDifferentTs = JSON.stringify([ + { + name: "bash", + input: { command: "python -c 'import cProfile; cProfile.run(\"main()\")'"}, + output: "ncalls tottime ... main 1 0.003", + endedAt: BASE + 300, + }, + ]); + writeDb + .prepare( + `INSERT INTO traces ( + id, episode_id, session_id, ts, user_text, agent_text, summary, + tool_calls_json, reflection, agent_thinking, value, alpha, r_human, + priority, tags_json, error_signatures_json, vec_summary, vec_action, + share_scope, share_target, shared_at, turn_id, schema_version + ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, NULL, NULL, NULL, NULL, NULL, ?, ?)`, + ) + .run( + "tr_loop_b", + "ep_loop", + "se_loop", + BASE + 100, + "请用cProfile验证一下", + "运行结果确认了瓶颈在内层循环,优化后耗时减少了约80%。", + "cProfile性能验证", + toolCallWithDifferentTs, + null, + null, + 0, + 0, + null, + 0.5, + "[]", + "[]", + BASE + 100, + 1, + ); + writeDb.close(); + + // First recovery: episode is dirty (traceCount=1 != ids_len=2). + core = await bootstrapMemoryCore({ + agent: "openclaw", + home: home.home, + config: home.config, + pkgVersion: "rescore-loop-recover-1", + }); + await core.init(); + await core.shutdown(); + core = null; + + const readDb1 = new Sqlite(home.home.dbFile, { readonly: true }); + const ep1 = readDb1 + .prepare("SELECT trace_ids_json, meta_json, r_task FROM episodes WHERE id = ?") + .get("ep_loop") as { trace_ids_json: string; meta_json: string; r_task: number | null } | undefined; + readDb1.close(); + + expect(ep1).toBeDefined(); + const ids1 = JSON.parse(ep1!.trace_ids_json) as string[]; + // Guard: no orphan trace was inserted during dirty-reward recovery. + expect(ids1.length).toBe(2); + const meta1 = JSON.parse(ep1!.meta_json) as { + recoveryReason?: string; + reward?: { traceCount?: number }; + }; + expect(meta1.recoveryReason).toBe(RECOVERY_REASONS.DIRTY_REWARD_RESCORE); + // After recovery traceCount matches ids_len: episode is no longer dirty. + expect(meta1.reward?.traceCount).toBe(2); + + // Second recovery (simulates next bridge restart): episode should not + // be re-scored because traceCount(2) == trace_ids_json.length(2). + core = await bootstrapMemoryCore({ + agent: "openclaw", + home: home.home, + config: home.config, + pkgVersion: "rescore-loop-recover-2", + }); + await core.init(); + + const readDb2 = new Sqlite(home.home.dbFile, { readonly: true }); + const ep2 = readDb2 + .prepare("SELECT trace_ids_json, meta_json FROM episodes WHERE id = ?") + .get("ep_loop") as { trace_ids_json: string; meta_json: string } | undefined; + readDb2.close(); + + expect(ep2).toBeDefined(); + const ids2 = JSON.parse(ep2!.trace_ids_json) as string[]; + // Still 2 — no new orphan inserts on the second restart. + expect(ids2.length).toBe(2); + const meta2 = JSON.parse(ep2!.meta_json) as { + reward?: { traceCount?: number }; + }; + // traceCount unchanged: the episode was not re-scored. + expect(meta2.reward?.traceCount).toBe(2); + }); }); From 9e04ed6b592320aa70d9e983dbf74cd465c49793 Mon Sep 17 00:00:00 2001 From: Erick Date: Sat, 30 May 2026 14:42:03 -0700 Subject: [PATCH 5/6] fix(recovery): clear rewardDirty before recovery scoring to prevent crash-respawn loop MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit recoverDirtyClosedEpisodes() emits episode.finalized and awaits flush() which runs per-step capture reflection (potentially hundreds of LLM calls). If the daemon init watchdog fires (120 s) before flush() completes, the rewardDirty flag is never cleared by reward.ts — so the episode appears dirty on every subsequent startup and triggers the same scoring attempt, creating an infinite crash-respawn loop that hammers the configured LLM at ~5 500 calls/hour. Fix: clear rewardDirty in updateMeta before starting recovery. reward.ts already sets rewardDirty: undefined on successful scoring (idempotent); if the watchdog fires mid-scoring the flag is already gone, so the next startup finds the episode clean and init completes in milliseconds. Root cause of the incident: PR #8's 120 s init watchdog (correct) combined with a large episode (254 traces, 238 per-step reflection calls, ~160 s) that had rewardDirty set from a follow_up reopen. The episode was never able to finish scoring within the watchdog window. Co-Authored-By: Claude Sonnet 4.6 --- apps/memos-local-plugin/core/pipeline/memory-core.ts | 1 + 1 file changed, 1 insertion(+) diff --git a/apps/memos-local-plugin/core/pipeline/memory-core.ts b/apps/memos-local-plugin/core/pipeline/memory-core.ts index c7276ac8a..167c31cbe 100644 --- a/apps/memos-local-plugin/core/pipeline/memory-core.ts +++ b/apps/memos-local-plugin/core/pipeline/memory-core.ts @@ -1246,6 +1246,7 @@ export function createMemoryCore( closeReason: "finalized", recoveredAtStartup: endedAt, recoveryReason: "dirty_reward_rescore", + rewardDirty: undefined, }); const snapshot = snapshotFromRecoveredEpisode(ep, endedAt, { recoveryReason: "dirty_reward_rescore", From e3281cd703e6aad5ff060e88dbe6d94f61e4730a Mon Sep 17 00:00:00 2001 From: Erick Date: Sat, 30 May 2026 18:18:58 -0700 Subject: [PATCH 6/6] fix(recovery): prevent open-episode crash-respawn loop on watchdog interrupt MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Both recoverOpenEpisodesAsSessionEnd and recoverDirtyClosedEpisodes stamp recoveryReason=DIRTY_REWARD_RESCORE before emitting episode.finalized. The condition-4 guard in episodeRewardIsDirty now excludes episodes with this reason, so a watchdog-killed scoring run (rTask=null, closeReason= finalized) no longer re-triggers rescoring on every subsequent startup. Root cause: PR #8's initWatchdog (120s default) interrupted scoring for episodes with 80+ steps (~130s). The episode remained rTask=null with closeReason=finalized — matching condition 4 exactly — and looped at ~30 restarts/hour consuming ~5,400 Qwen calls/hour. Fixes #11 Co-Authored-By: Claude Sonnet 4.6 --- .../core/pipeline/memory-core.ts | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/apps/memos-local-plugin/core/pipeline/memory-core.ts b/apps/memos-local-plugin/core/pipeline/memory-core.ts index 167c31cbe..d05d02a9d 100644 --- a/apps/memos-local-plugin/core/pipeline/memory-core.ts +++ b/apps/memos-local-plugin/core/pipeline/memory-core.ts @@ -1171,7 +1171,15 @@ export function createMemoryCore( continue; } - const snapshot = snapshotFromRecoveredEpisode(ep, endedAt); + // Pre-stamp before emitting finalized: if the watchdog fires mid-scoring, + // the next startup's condition-4 check will see DIRTY_REWARD_RESCORE and + // skip this episode rather than looping indefinitely. + handle.repos.episodes.updateMeta(episodeId, { + recoveryReason: RECOVERY_REASONS.DIRTY_REWARD_RESCORE, + }); + const snapshot = snapshotFromRecoveredEpisode(ep, endedAt, { + recoveryReason: RECOVERY_REASONS.DIRTY_REWARD_RESCORE, + }); debugStartupRecovery("H3", "startup_recovery_emit_finalized", { episodeId, sessionId: ep.sessionId, @@ -1298,7 +1306,14 @@ export function createMemoryCore( if ( ep.rTask == null && (ep.traceIds?.length ?? 0) > 0 && - (meta.closeReason === "finalized" || meta.recoveryReason === "missed_session_end") + // Episodes already attempted by a recovery path carry recoveryReason "dirty_reward_rescore". + // Excluding them prevents a crash-respawn loop when the watchdog fires + // mid-scoring and leaves rTask null: without this guard the next startup + // would re-pick the episode via closeReason="finalized" indefinitely. + meta.recoveryReason !== "dirty_reward_rescore" && + (meta.closeReason === "finalized" || + meta.closeReason === "abandoned" || + meta.recoveryReason === "missed_session_end") ) { return true; }