From 6599ef87abf4a1eb4a9af4d3b73756d195056255 Mon Sep 17 00:00:00 2001 From: Erick Date: Sun, 31 May 2026 11:47:25 -0700 Subject: [PATCH] feat(reward): bypass exchange-count gate for cron episodes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Cron jobs always produce exactly 1 user↔agent exchange — the task prompt plus one reply — so minExchangesForCompletion: 2 zero-scores every cron episode before content is even evaluated. This starves L2 induction of signal after the bridge stabilises. Adds `cronSentinels` to RewardConfig (schema, defaults, types). When the first user turn starts with a sentinel prefix, check 1 (exchange count) is skipped; content/triviality checks still apply. Default sentinel covers the Hermes cron prompt. The `snapshot.meta?.initialUserText` fallback handles episodes scored during recovery when turns aren't materialised. If the field is absent the episode falls back to the old skip behaviour — no false positives. Co-Authored-By: Claude Sonnet 4.6 --- .../core/config/defaults.ts | 1 + apps/memos-local-plugin/core/config/schema.ts | 8 +++ apps/memos-local-plugin/core/reward/reward.ts | 33 +++++++++-- apps/memos-local-plugin/core/reward/types.ts | 8 +++ .../unit/reward/reward.integration.test.ts | 55 +++++++++++++++++++ 5 files changed, 101 insertions(+), 4 deletions(-) diff --git a/apps/memos-local-plugin/core/config/defaults.ts b/apps/memos-local-plugin/core/config/defaults.ts index 1cf2d2cf6..1f2b9bd91 100644 --- a/apps/memos-local-plugin/core/config/defaults.ts +++ b/apps/memos-local-plugin/core/config/defaults.ts @@ -135,6 +135,7 @@ export const DEFAULT_CONFIG: ResolvedConfig = { minContentCharsForCompletion: 40, toolHeavyRatio: 0.7, minAssistantCharsForToolHeavy: 80, + cronSentinels: [], }, l2Induction: { minSimilarity: 0.65, diff --git a/apps/memos-local-plugin/core/config/schema.ts b/apps/memos-local-plugin/core/config/schema.ts index 7c9ff193b..cc3ef4b80 100644 --- a/apps/memos-local-plugin/core/config/schema.ts +++ b/apps/memos-local-plugin/core/config/schema.ts @@ -212,6 +212,14 @@ const AlgorithmSchema = Type.Object({ * scored normally even if tool calls dominate. Default 80. */ minAssistantCharsForToolHeavy: NumberInRange(80, 0, 10_000), + /** + * User-turn prefixes that identify cron/scheduled episodes. When + * the first user turn starts with any of these, the exchange-count + * gate is bypassed. Default: Hermes cron sentinel. + */ + cronSentinels: Type.Array(Type.String(), { + default: ["[IMPORTANT: You are running as a scheduled cron job"], + }), }, { default: {} }), l2Induction: Type.Object({ /** Cosine ≥ this to associate a new trace with an existing L2 policy. */ diff --git a/apps/memos-local-plugin/core/reward/reward.ts b/apps/memos-local-plugin/core/reward/reward.ts index 1b6d3354c..51791c918 100644 --- a/apps/memos-local-plugin/core/reward/reward.ts +++ b/apps/memos-local-plugin/core/reward/reward.ts @@ -392,7 +392,7 @@ function looksLikeTrivialContent(text: string): boolean { function decideSkipReason( snapshot: import("../session/types.js").EpisodeSnapshot, traces: readonly TraceRow[], - cfg: Pick, + cfg: Pick, ): string | null { // Prefer the live snapshot's turn list; fall back to traces when the // snapshot came from a SQLite row (no turns materialised). @@ -443,9 +443,34 @@ function decideSkipReason( // 1. Not enough real conversation turns (need at least N user-assistant exchanges) const exchanges = Math.min(userTurns, assistantTurns); if (exchanges < cfg.minExchangesForCompletion) { - return ( - `对话轮次不足(${exchanges} 轮),需要至少 ${cfg.minExchangesForCompletion} 轮完整的问答交互才能生成摘要。` - ); + // Cron episodes always have exactly 1 user turn (the task prompt) so this + // gate always fires for them. Bypass it when the first user content starts + // with a known cron sentinel — cron jobs are inherently substantive, and + // the content/triviality checks below provide the real substance filter. + // + // Fallback: if snapshot.turns is empty (recovery path), we also check + // snapshot.meta?.initialUserText. This field is set by the pipeline on + // episode creation and is generally reliable, but could be absent or + // stale in unusual recovery scenarios — if so, the episode falls through + // to the old "skip" behavior (no false positives, just a missed score). + const sentinels = cfg.cronSentinels ?? []; + if (sentinels.length > 0) { + const firstUserContent = + userContents[0] ?? + (snapshot.meta?.initialUserText as string | undefined) ?? + ""; + if (sentinels.some((s) => firstUserContent.startsWith(s))) { + // Cron episode — skip the exchange-count gate, fall through to content checks. + } else { + return ( + `对话轮次不足(${exchanges} 轮),需要至少 ${cfg.minExchangesForCompletion} 轮完整的问答交互才能生成摘要。` + ); + } + } else { + return ( + `对话轮次不足(${exchanges} 轮),需要至少 ${cfg.minExchangesForCompletion} 轮完整的问答交互才能生成摘要。` + ); + } } // 2. No user messages at all diff --git a/apps/memos-local-plugin/core/reward/types.ts b/apps/memos-local-plugin/core/reward/types.ts index 0e545ef0e..e1aa8e941 100644 --- a/apps/memos-local-plugin/core/reward/types.ts +++ b/apps/memos-local-plugin/core/reward/types.ts @@ -64,6 +64,14 @@ export interface RewardConfig { * that the tool-heavy heuristic would otherwise skip. Default 80. */ minAssistantCharsForToolHeavy: number; + /** + * User-turn prefixes that identify cron/scheduled episodes. When the + * first user turn starts with any of these prefixes, the exchange-count + * gate (minExchangesForCompletion) is bypassed — cron jobs are + * inherently substantive regardless of turn count. Content and + * triviality checks still apply. Default: Hermes cron sentinel. + */ + cronSentinels?: string[]; } // ─── User feedback inputs ────────────────────────────────────────────────── diff --git a/apps/memos-local-plugin/tests/unit/reward/reward.integration.test.ts b/apps/memos-local-plugin/tests/unit/reward/reward.integration.test.ts index 238713a29..b0692b487 100644 --- a/apps/memos-local-plugin/tests/unit/reward/reward.integration.test.ts +++ b/apps/memos-local-plugin/tests/unit/reward/reward.integration.test.ts @@ -376,4 +376,59 @@ describe("reward/integration", () => { expect(res.feedbackCount).toBe(2); expect(res.rHuman).toBeGreaterThan(0); }); + + it("cron episode with 1 exchange is not skipped when sentinel matches", async () => { + const sid = "s_cron_1"; + const eid = "ep_cron_1"; + seedEpisode(handle, eid, sid, ["tr_cron_1"]); + seedTrace(handle, "tr_cron_1", eid, sid, { + userText: + "[IMPORTANT: You are running as a scheduled cron job. Please review recent activity and write a reflection card.", + agentText: + "Reviewed the last 48 hours of conversation traces and wrote a reflection card to ~/Faye/memory/reflections/2026-05-31.md. Commit succeeded.", + }); + + const events: RewardEvent[] = []; + const bus = createRewardEventBus(); + bus.onAny((e) => events.push(e)); + + const runner = createRewardRunner({ + tracesRepo: handle.repos.traces, + episodesRepo: handle.repos.episodes, + feedbackRepo: handle.repos.feedback, + llm: fakeLlm({ + completeJson: { + "reward.reward.r_human.v3": { + goal_achievement: 0.8, + process_quality: 0.8, + user_satisfaction: 0.8, + label: "success", + reason: "cron reflection card written and committed", + }, + }, + }), + bus, + cfg: { + ...cfg(), + minExchangesForCompletion: 2, + minContentCharsForCompletion: 40, + cronSentinels: ["[IMPORTANT: You are running as a scheduled cron job"], + }, + now: () => NOW, + }); + + const res = await runner.run({ + episodeId: eid as unknown as Parameters[0]["episodeId"], + feedback: [], + trigger: "implicit_fallback", + }); + + // Must reach the LLM scorer, not be abandoned as trivial. + expect(res.humanScore.source).toBe("llm"); + expect(res.rHuman).toBeGreaterThan(0); + expect(events.some((e) => e.kind === "reward.updated")).toBe(true); + // Must NOT be heuristic-skipped (reward.scored with source=heuristic means skipped). + const scoredEvent = events.find((e) => e.kind === "reward.scored"); + expect((scoredEvent as { source?: string } | undefined)?.source).not.toBe("heuristic"); + }); });