From 6f23af4b271e6b5df6c76fbb775d9f694da0c330 Mon Sep 17 00:00:00 2001
From: Erick <chiefmojo@chiefmojo.com>
Date: Mon, 25 May 2026 12:01:53 -0700
Subject: [PATCH 1/6] fix(reward): clear rewardDirty flag on skip path
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The triviality-gate skip path wrote reward.skipped=true but never
cleared rewardDirty from meta_json. Since episodeRewardIsDirty()
checks the rewardDirty object flag before the skip gate, skipped
episodes with the flag set would re-enter the dirty scan on every
bridge restart, scoring and re-skipping indefinitely.

Normal scoring path already had rewardDirty: undefined — this
mirrors that pattern in the skip branch.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 apps/memos-local-plugin/core/reward/reward.ts | 1 +
 1 file changed, 1 insertion(+)

diff --git a/apps/memos-local-plugin/core/reward/reward.ts b/apps/memos-local-plugin/core/reward/reward.ts
index 1b6d3354c..937454fea 100644
--- a/apps/memos-local-plugin/core/reward/reward.ts
+++ b/apps/memos-local-plugin/core/reward/reward.ts
@@ -128,6 +128,7 @@ export function createRewardRunner(deps: RewardDeps): RewardRunner {
             trigger: input.trigger,
             skipped: true,
           },
+          rewardDirty: undefined,
         });
       } catch (err) {
         warnings.push({

From 0fb04158cec33aa023712c3c769106b1deb42777 Mon Sep 17 00:00:00 2001
From: Erick <chiefmojo@chiefmojo.com>
Date: Mon, 25 May 2026 12:13:38 -0700
Subject: [PATCH 2/6] fix(reward): set r_task=0 on skipped episodes to stop
 dirty-scan loop

The skip path wrote reward.skipped=true but never called setRTask(),
leaving r_task=NULL. For abandoned episodes episodeRewardIsDirty()
falls through to the r_task==null check and returns true, causing
those episodes to re-enter the dirty scan on every bridge start,
get re-skipped, and loop indefinitely.

Setting r_task=0 before updateMeta means the null-r_task branch in
episodeRewardIsDirty() no longer fires, permanently clearing the
episode from the dirty scan after its first skip.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 apps/memos-local-plugin/core/reward/reward.ts | 1 +
 1 file changed, 1 insertion(+)

diff --git a/apps/memos-local-plugin/core/reward/reward.ts b/apps/memos-local-plugin/core/reward/reward.ts
index 937454fea..d2247932c 100644
--- a/apps/memos-local-plugin/core/reward/reward.ts
+++ b/apps/memos-local-plugin/core/reward/reward.ts
@@ -119,6 +119,7 @@ export function createRewardRunner(deps: RewardDeps): RewardRunner {
       try {
         const existingMeta = episode.meta ?? {};
         const wasFinalized = existingMeta.closeReason === "finalized";
+        deps.episodesRepo.setRTask(input.episodeId, 0);
         deps.episodesRepo.updateMeta(input.episodeId, {
           ...(wasFinalized ? {} : { closeReason: "abandoned", abandonReason: skipReason }),
           reward: {

From 6a9c39876e5dfb1ddf1ddf003f45aa461210af27 Mon Sep 17 00:00:00 2001
From: Erick <chiefmojo@chiefmojo.com>
Date: Mon, 25 May 2026 12:38:45 -0700
Subject: [PATCH 3/6] fix(dirty-scan): paginate closed-episode scan to remove
 500-row ceiling
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

clampLimit() in _helpers.ts caps list() at 500 regardless of the
limit argument passed. The prior limit:1000 change (87165daf) was
a no-op — both values hit the same ceiling, leaving episodes beyond
rank 500 permanently invisible to the dirty scan.

Replace both scan sites (startup + periodic) with collectDirtyClosedEpisodes(),
which paginates in 500-row pages until exhausted. All closed episodes
are now covered regardless of total count.

This was also the root cause of the "dirty-17" mystery: those episodes
were at ranks 536-924, outside the 500-row window.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 .../core/pipeline/memory-core.ts              | 23 ++++++++++++++-----
 1 file changed, 17 insertions(+), 6 deletions(-)

diff --git a/apps/memos-local-plugin/core/pipeline/memory-core.ts b/apps/memos-local-plugin/core/pipeline/memory-core.ts
index 4974ee16d..be65c680b 100644
--- a/apps/memos-local-plugin/core/pipeline/memory-core.ts
+++ b/apps/memos-local-plugin/core/pipeline/memory-core.ts
@@ -589,9 +589,7 @@ export function createMemoryCore(
     if (nowMs - lastDirtyClosedScan < 30_000) return;
     lastDirtyClosedScan = nowMs;
     try {
-      const dirtyClosed = handle.repos.episodes
-        .list({ status: "closed", limit: 500 })
-        .filter((ep) => !isLightweightEpisode(ep) && episodeRewardIsDirty(ep));
+      const dirtyClosed = collectDirtyClosedEpisodes();
       if (dirtyClosed.length > 0) {
         await recoverDirtyClosedEpisodes(dirtyClosed);
       }
@@ -914,9 +912,7 @@ export function createMemoryCore(
           await recoverOpenEpisodesAsSessionEnd(stale);
         }
       }
-      const dirtyClosed = handle.repos.episodes
-        .list({ status: "closed", limit: 500 })
-        .filter((ep) => !isLightweightEpisode(ep) && episodeRewardIsDirty(ep));
+      const dirtyClosed = collectDirtyClosedEpisodes();
       if (dirtyClosed.length > 0) {
         await recoverDirtyClosedEpisodes(dirtyClosed);
       }
@@ -1262,6 +1258,21 @@ export function createMemoryCore(
     await handle.flush();
   }
 
+  function collectDirtyClosedEpisodes(): (EpisodeRow & { meta?: Record<string, unknown> })[] {
+    const dirty: (EpisodeRow & { meta?: Record<string, unknown> })[] = [];
+    let offset = 0;
+    const pageSize = 500;
+    while (true) {
+      const page = handle.repos.episodes.list({ status: "closed", limit: pageSize, offset });
+      for (const ep of page) {
+        if (episodeRewardIsDirty(ep)) dirty.push(ep);
+      }
+      if (page.length < pageSize) break;
+      offset += pageSize;
+    }
+    return dirty;
+  }
+
   function episodeRewardIsDirty(ep: EpisodeRow & { meta?: Record<string, unknown> }): boolean {
     const meta = ep.meta ?? {};
     if (meta.lightweightMemory === true) return false;

From 0e7109d06e87d5069d9bb68c3f78047634b665bd Mon Sep 17 00:00:00 2001
From: Erick <chiefmojo@chiefmojo.com>
Date: Sat, 30 May 2026 07:18:12 -0700
Subject: [PATCH 4/6] fix(reward): drain reward after dirty-closed recovery in
 lightweight mode
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

recoverDirtyClosedEpisodes relied on flush() → reward.drain() to fire
R_human scoring after the capture pass. flush() returns early in
lightweight mode (the default), so the reward subscriber's 30 s timer
was cancelled by shutdown() before it fired — leaving traceCount
permanently mismatched and the episode dirty on every restart.

Fix: after flush() drains the capture pass, explicitly call
rewardRunner.run() for any episode that episodeRewardIsDirty() still
considers dirty — mirroring the pattern already used by
recoverOpenEpisodesAsSessionEnd. A second flush() then drains
downstream (L2 / L3 / skills).

Regression test: dirty-reward recovery does not insert orphan traces
— seeded episode with traceCount=1 and 2 trace IDs (one having a tool
call whose endedAt differs from the trace ts, which produces an orphan
step in runReflect). Verifies that:
  1. trace_ids_json stays at 2 after recovery (orphan insert guard).
  2. traceCount is updated to 2 after the first recovery pass.
  3. A second restart does not re-score the episode (loop stopped).

Also fixes the pre-existing test "rescoring closed episodes when traces
were appended after the last reward" which failed for the same reason.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 .../core/pipeline/memory-core.ts              |  12 ++
 .../tests/unit/pipeline/memory-core.test.ts   | 185 ++++++++++++++++++
 2 files changed, 197 insertions(+)

diff --git a/apps/memos-local-plugin/core/pipeline/memory-core.ts b/apps/memos-local-plugin/core/pipeline/memory-core.ts
index be65c680b..c7276ac8a 100644
--- a/apps/memos-local-plugin/core/pipeline/memory-core.ts
+++ b/apps/memos-local-plugin/core/pipeline/memory-core.ts
@@ -1237,6 +1237,7 @@ export function createMemoryCore(
     episodes: Array<EpisodeRow & { meta?: Record<string, unknown> }>,
   ): Promise<void> {
     log.info("init.dirty_closed_episodes.rescore", { count: episodes.length });
+    const rescored: EpisodeId[] = [];
     for (const ep of episodes) {
       if (isLightweightEpisode(ep)) continue;
       const episodeId = ep.id as EpisodeId;
@@ -1254,6 +1255,17 @@ export function createMemoryCore(
         episode: snapshot,
         closedBy: "finalized",
       });
+      rescored.push(episodeId);
+    }
+    // Drain the capture pass (patches reflections + α onto existing traces).
+    await handle.flush();
+    // In lightweight mode flush() returns before draining the reward
+    // subscriber. Explicitly run reward for any episode whose trace count
+    // still mismatches — mirrors the pattern in recoverOpenEpisodesAsSessionEnd.
+    for (const episodeId of rescored) {
+      if (episodeRewardIsDirty(handle.repos.episodes.getById(episodeId) ?? {} as never)) {
+        await handle.rewardRunner.run({ episodeId, feedback: [], trigger: "manual" });
+      }
     }
     await handle.flush();
   }
diff --git a/apps/memos-local-plugin/tests/unit/pipeline/memory-core.test.ts b/apps/memos-local-plugin/tests/unit/pipeline/memory-core.test.ts
index 88d5cbbd4..5d6f3237d 100644
--- a/apps/memos-local-plugin/tests/unit/pipeline/memory-core.test.ts
+++ b/apps/memos-local-plugin/tests/unit/pipeline/memory-core.test.ts
@@ -1456,4 +1456,189 @@ algorithm:
     expect(meta.reward?.traceCount).toBe(1);
     expect(meta.reward?.traceIds).toEqual(["tr_missing_reward"]);
   });
+
+  it("dirty-reward recovery does not insert orphan traces (regression: rescore loop guard)", async () => {
+    // Regression test for the rescore loop:
+    // When recoverDirtyClosedEpisodes re-emits episode.finalized, capture's
+    // runReflect used to insert new trace rows for "orphan steps" — steps
+    // whose timestamps didn't match any existing DB row.  For recovered
+    // episodes this happens whenever a trace has tool calls with endedAt
+    // timestamps different from the trace's own ts, because the snapshot
+    // rebuilds a separate tool-role turn for each call.
+    //
+    // Without the guard the orphan insert grows trace_ids_json, keeping
+    // reward.traceCount != traceIds.length forever and looping on every
+    // bridge restart.  The guard (meta.recoveryReason === "dirty_reward_rescore")
+    // skips the insert, so trace_ids_json stays stable and the episode
+    // stops appearing dirty after a single recovery pass.
+
+    home = await makeTmpHome({ agent: "openclaw" });
+
+    const seeder = await bootstrapMemoryCore({
+      agent: "openclaw",
+      home: home.home,
+      config: home.config,
+      pkgVersion: "rescore-loop-seed",
+    });
+    await seeder.init();
+    await seeder.shutdown();
+
+    const Sqlite = (await import("better-sqlite3")).default;
+    const writeDb = new Sqlite(home.home.dbFile);
+    const BASE = Date.now() - 5_000;
+
+    writeDb
+      .prepare(
+        `INSERT INTO sessions (id, agent, started_at, last_seen_at, meta_json) VALUES (?, ?, ?, ?, ?)`,
+      )
+      .run("se_loop", "openclaw", BASE, BASE, "{}");
+
+    // Episode is dirty: traceCount=1 but trace_ids_json has 2 IDs.
+    writeDb
+      .prepare(
+        `INSERT INTO episodes (id, session_id, started_at, ended_at, trace_ids_json, r_task, status, meta_json) VALUES (?, ?, ?, ?, ?, ?, 'closed', ?)`,
+      )
+      .run(
+        "ep_loop",
+        "se_loop",
+        BASE,
+        BASE + 1,
+        JSON.stringify(["tr_loop_a", "tr_loop_b"]),
+        0.5,
+        JSON.stringify({
+          closeReason: "finalized",
+          reward: { rHuman: 0.5, scoredAt: BASE - 1000, traceCount: 1 },
+        }),
+      );
+
+    // tr_loop_a: plain text trace — no orphan risk.
+    writeDb
+      .prepare(
+        `INSERT INTO traces (
+          id, episode_id, session_id, ts, user_text, agent_text, summary,
+          tool_calls_json, reflection, agent_thinking, value, alpha, r_human,
+          priority, tags_json, error_signatures_json, vec_summary, vec_action,
+          share_scope, share_target, shared_at, turn_id, schema_version
+        ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, NULL, NULL, NULL, NULL, NULL, ?, ?)`,
+      )
+      .run(
+        "tr_loop_a",
+        "ep_loop",
+        "se_loop",
+        BASE,
+        "帮我分析一下这段Python代码的性能瓶颈，并给出优化建议。",
+        "这段代码的主要性能问题在于嵌套循环，时间复杂度是O(n²)，可以用哈希表将其优化到O(n)。",
+        "Python代码性能分析",
+        "[]",
+        null,
+        null,
+        0,
+        0,
+        null,
+        0.5,
+        "[]",
+        "[]",
+        BASE,
+        1,
+      );
+
+    // tr_loop_b: trace with a tool call whose endedAt differs from the trace ts.
+    // snapshotFromRecoveredEpisode creates a tool-role turn with ts=BASE+300,
+    // which does NOT appear in traceByTs (only BASE and BASE+100 are in the map).
+    // Without the guard this step is treated as an orphan and a new trace is
+    // inserted, growing trace_ids_json from 2 to 3 and keeping the episode dirty.
+    const toolCallWithDifferentTs = JSON.stringify([
+      {
+        name: "bash",
+        input: { command: "python -c 'import cProfile; cProfile.run(\"main()\")'"},
+        output: "ncalls tottime ... main 1 0.003",
+        endedAt: BASE + 300,
+      },
+    ]);
+    writeDb
+      .prepare(
+        `INSERT INTO traces (
+          id, episode_id, session_id, ts, user_text, agent_text, summary,
+          tool_calls_json, reflection, agent_thinking, value, alpha, r_human,
+          priority, tags_json, error_signatures_json, vec_summary, vec_action,
+          share_scope, share_target, shared_at, turn_id, schema_version
+        ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, NULL, NULL, NULL, NULL, NULL, ?, ?)`,
+      )
+      .run(
+        "tr_loop_b",
+        "ep_loop",
+        "se_loop",
+        BASE + 100,
+        "请用cProfile验证一下",
+        "运行结果确认了瓶颈在内层循环，优化后耗时减少了约80%。",
+        "cProfile性能验证",
+        toolCallWithDifferentTs,
+        null,
+        null,
+        0,
+        0,
+        null,
+        0.5,
+        "[]",
+        "[]",
+        BASE + 100,
+        1,
+      );
+    writeDb.close();
+
+    // First recovery: episode is dirty (traceCount=1 != ids_len=2).
+    core = await bootstrapMemoryCore({
+      agent: "openclaw",
+      home: home.home,
+      config: home.config,
+      pkgVersion: "rescore-loop-recover-1",
+    });
+    await core.init();
+    await core.shutdown();
+    core = null;
+
+    const readDb1 = new Sqlite(home.home.dbFile, { readonly: true });
+    const ep1 = readDb1
+      .prepare("SELECT trace_ids_json, meta_json, r_task FROM episodes WHERE id = ?")
+      .get("ep_loop") as { trace_ids_json: string; meta_json: string; r_task: number | null } | undefined;
+    readDb1.close();
+
+    expect(ep1).toBeDefined();
+    const ids1 = JSON.parse(ep1!.trace_ids_json) as string[];
+    // Guard: no orphan trace was inserted during dirty-reward recovery.
+    expect(ids1.length).toBe(2);
+    const meta1 = JSON.parse(ep1!.meta_json) as {
+      recoveryReason?: string;
+      reward?: { traceCount?: number };
+    };
+    expect(meta1.recoveryReason).toBe(RECOVERY_REASONS.DIRTY_REWARD_RESCORE);
+    // After recovery traceCount matches ids_len: episode is no longer dirty.
+    expect(meta1.reward?.traceCount).toBe(2);
+
+    // Second recovery (simulates next bridge restart): episode should not
+    // be re-scored because traceCount(2) == trace_ids_json.length(2).
+    core = await bootstrapMemoryCore({
+      agent: "openclaw",
+      home: home.home,
+      config: home.config,
+      pkgVersion: "rescore-loop-recover-2",
+    });
+    await core.init();
+
+    const readDb2 = new Sqlite(home.home.dbFile, { readonly: true });
+    const ep2 = readDb2
+      .prepare("SELECT trace_ids_json, meta_json FROM episodes WHERE id = ?")
+      .get("ep_loop") as { trace_ids_json: string; meta_json: string } | undefined;
+    readDb2.close();
+
+    expect(ep2).toBeDefined();
+    const ids2 = JSON.parse(ep2!.trace_ids_json) as string[];
+    // Still 2 — no new orphan inserts on the second restart.
+    expect(ids2.length).toBe(2);
+    const meta2 = JSON.parse(ep2!.meta_json) as {
+      reward?: { traceCount?: number };
+    };
+    // traceCount unchanged: the episode was not re-scored.
+    expect(meta2.reward?.traceCount).toBe(2);
+  });
 });

From 9e04ed6b592320aa70d9e983dbf74cd465c49793 Mon Sep 17 00:00:00 2001
From: Erick <chiefmojo@chiefmojo.com>
Date: Sat, 30 May 2026 14:42:03 -0700
Subject: [PATCH 5/6] fix(recovery): clear rewardDirty before recovery scoring
 to prevent crash-respawn loop
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

recoverDirtyClosedEpisodes() emits episode.finalized and awaits flush()
which runs per-step capture reflection (potentially hundreds of LLM calls).
If the daemon init watchdog fires (120 s) before flush() completes, the
rewardDirty flag is never cleared by reward.ts — so the episode appears
dirty on every subsequent startup and triggers the same scoring attempt,
creating an infinite crash-respawn loop that hammers the configured LLM
at ~5 500 calls/hour.

Fix: clear rewardDirty in updateMeta before starting recovery. reward.ts
already sets rewardDirty: undefined on successful scoring (idempotent);
if the watchdog fires mid-scoring the flag is already gone, so the next
startup finds the episode clean and init completes in milliseconds.

Root cause of the incident: PR #8's 120 s init watchdog (correct) combined
with a large episode (254 traces, 238 per-step reflection calls, ~160 s)
that had rewardDirty set from a follow_up reopen. The episode was never
able to finish scoring within the watchdog window.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 apps/memos-local-plugin/core/pipeline/memory-core.ts | 1 +
 1 file changed, 1 insertion(+)

diff --git a/apps/memos-local-plugin/core/pipeline/memory-core.ts b/apps/memos-local-plugin/core/pipeline/memory-core.ts
index c7276ac8a..167c31cbe 100644
--- a/apps/memos-local-plugin/core/pipeline/memory-core.ts
+++ b/apps/memos-local-plugin/core/pipeline/memory-core.ts
@@ -1246,6 +1246,7 @@ export function createMemoryCore(
         closeReason: "finalized",
         recoveredAtStartup: endedAt,
         recoveryReason: "dirty_reward_rescore",
+        rewardDirty: undefined,
       });
       const snapshot = snapshotFromRecoveredEpisode(ep, endedAt, {
         recoveryReason: "dirty_reward_rescore",

From e3281cd703e6aad5ff060e88dbe6d94f61e4730a Mon Sep 17 00:00:00 2001
From: Erick <chiefmojo@chiefmojo.com>
Date: Sat, 30 May 2026 18:18:58 -0700
Subject: [PATCH 6/6] fix(recovery): prevent open-episode crash-respawn loop on
 watchdog interrupt
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Both recoverOpenEpisodesAsSessionEnd and recoverDirtyClosedEpisodes stamp
recoveryReason=DIRTY_REWARD_RESCORE before emitting episode.finalized.
The condition-4 guard in episodeRewardIsDirty now excludes episodes with
this reason, so a watchdog-killed scoring run (rTask=null, closeReason=
finalized) no longer re-triggers rescoring on every subsequent startup.

Root cause: PR #8's initWatchdog (120s default) interrupted scoring for
episodes with 80+ steps (~130s). The episode remained rTask=null with
closeReason=finalized — matching condition 4 exactly — and looped at
~30 restarts/hour consuming ~5,400 Qwen calls/hour.

Fixes #11

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 .../core/pipeline/memory-core.ts              | 19 +++++++++++++++++--
 1 file changed, 17 insertions(+), 2 deletions(-)

diff --git a/apps/memos-local-plugin/core/pipeline/memory-core.ts b/apps/memos-local-plugin/core/pipeline/memory-core.ts
index 167c31cbe..d05d02a9d 100644
--- a/apps/memos-local-plugin/core/pipeline/memory-core.ts
+++ b/apps/memos-local-plugin/core/pipeline/memory-core.ts
@@ -1171,7 +1171,15 @@ export function createMemoryCore(
           continue;
         }
 
-        const snapshot = snapshotFromRecoveredEpisode(ep, endedAt);
+        // Pre-stamp before emitting finalized: if the watchdog fires mid-scoring,
+        // the next startup's condition-4 check will see DIRTY_REWARD_RESCORE and
+        // skip this episode rather than looping indefinitely.
+        handle.repos.episodes.updateMeta(episodeId, {
+          recoveryReason: RECOVERY_REASONS.DIRTY_REWARD_RESCORE,
+        });
+        const snapshot = snapshotFromRecoveredEpisode(ep, endedAt, {
+          recoveryReason: RECOVERY_REASONS.DIRTY_REWARD_RESCORE,
+        });
         debugStartupRecovery("H3", "startup_recovery_emit_finalized", {
           episodeId,
           sessionId: ep.sessionId,
@@ -1298,7 +1306,14 @@ export function createMemoryCore(
     if (
       ep.rTask == null &&
       (ep.traceIds?.length ?? 0) > 0 &&
-      (meta.closeReason === "finalized" || meta.recoveryReason === "missed_session_end")
+      // Episodes already attempted by a recovery path carry recoveryReason "dirty_reward_rescore".
+      // Excluding them prevents a crash-respawn loop when the watchdog fires
+      // mid-scoring and leaves rTask null: without this guard the next startup
+      // would re-pick the episode via closeReason="finalized" indefinitely.
+      meta.recoveryReason !== "dirty_reward_rescore" &&
+      (meta.closeReason === "finalized" ||
+        meta.closeReason === "abandoned" ||
+        meta.recoveryReason === "missed_session_end")
     ) {
       return true;
     }