diff --git a/packages/docker-git-session-sync/src/backup.ts b/packages/docker-git-session-sync/src/backup.ts index d412e786..cec8e619 100644 --- a/packages/docker-git-session-sync/src/backup.ts +++ b/packages/docker-git-session-sync/src/backup.ts @@ -10,6 +10,7 @@ import { buildSnapshotReadme, buildSnapshotRef, formatBytes, + formatTokenReduction, isPathWithinParent, isChatTranscriptPath, sessionDirNames, @@ -17,6 +18,7 @@ import { shouldIgnoreSessionPath, sortSessionFiles, summarizeFiles, + summarizeTokenReduction, toLogicalRelativePath } from "./core.js" import { @@ -567,6 +569,7 @@ const runSessionUpload = ( (message) => logVerbose(verbose, output, message) ) const summary = summarizeFiles(prepared.manifestFiles) + const tokenReduction = summarizeTokenReduction(sessionFiles) const sessionRoots = sessionDirs.map((dir) => `~/${dir.name}`) const manifestUrl = buildBlobUrl(backupRepo.fullName, backupRepo.defaultBranch, `${context.snapshotRef}/manifest.json`) const readmeRepoPath = `${context.snapshotRef}/README.md` @@ -581,20 +584,20 @@ const runSessionUpload = ( const readmePath = path.join(tmpDir, "README.md") fs.writeFileSync( readmePath, - buildSnapshotReadme({ backupRepo, source: context.source, manifestUrl, summary, sessionRoots }), + buildSnapshotReadme({ backupRepo, source: context.source, manifestUrl, summary, tokenReduction, sessionRoots }), "utf8" ) const uploadEntries = [...prepared.uploadEntries, buildReadmeUploadEntry(readmeRepoPath, readmePath)] logVerbose(verbose, output, `Uploading snapshot to ${backupRepo.fullName}:${context.snapshotRef}`) const uploadResult = uploadSnapshot(backupRepo, context.snapshotRef, manifest, uploadEntries, ghEnv) if (!uploadResult.changed) { - output.out(`[session-backup] skipped: no new or changed chat transcripts (${summary.fileCount} files, ${formatBytes(summary.totalBytes)})`) + output.out(`[session-backup] skipped: no new or changed chat transcripts (${summary.fileCount} files, ${formatBytes(summary.totalBytes)}; RTK ${formatTokenReduction(tokenReduction)})`) printGitStatus(output, context.gitStatus) logVerbose(verbose, output, `[session-backup] No backup repo changes for ${backupRepo.fullName}:${context.snapshotRef}`) updateUploadComment(context, ghEnv, output, { state: "skipped", message: "No new or changed chat transcripts." }) return 0 } - output.out(`[session-backup] ok: ${context.source.commitSha.slice(0, 12)} (${summary.fileCount} files, ${formatBytes(summary.totalBytes)})`) + output.out(`[session-backup] ok: ${context.source.commitSha.slice(0, 12)} (${summary.fileCount} files, ${formatBytes(summary.totalBytes)}; RTK ${formatTokenReduction(tokenReduction)})`) printGitStatus(output, context.gitStatus) logVerbose(verbose, output, `[session-backup] Uploaded snapshot to ${backupRepo.fullName}:${context.snapshotRef}`) logVerbose(verbose, output, `[session-backup] Manifest: ${uploadResult.manifestUrl}`) @@ -602,7 +605,8 @@ const runSessionUpload = ( state: "success", manifestUrl: uploadResult.manifestUrl, readmeUrl, - summary + summary, + tokenReduction }) return 0 } catch (error) { @@ -751,9 +755,10 @@ const runDryRun = ( (message) => logVerbose(verbose, output, message) ) const summary = summarizeFiles(prepared.manifestFiles) + const tokenReduction = summarizeTokenReduction(sessionFiles) const manifestUrl = buildBlobUrl(backupRepo.fullName, backupRepo.defaultBranch, `${resolved.snapshotRef}/manifest.json`) const readmeUrl = buildBlobUrl(backupRepo.fullName, backupRepo.defaultBranch, `${resolved.snapshotRef}/README.md`) - output.out(`[session-backup] dry-run: ${resolved.source.commitSha.slice(0, 12)} (${summary.fileCount} files, ${formatBytes(summary.totalBytes)})`) + output.out(`[session-backup] dry-run: ${resolved.source.commitSha.slice(0, 12)} (${summary.fileCount} files, ${formatBytes(summary.totalBytes)}; RTK ${formatTokenReduction(tokenReduction)})`) printGitStatus(output, resolved.gitStatus) logVerbose(verbose, output, `[dry-run] Upload target: ${backupRepo.fullName}:${resolved.snapshotRef}`) logVerbose(verbose, output, `[dry-run] README URL: ${readmeUrl}`) @@ -765,7 +770,7 @@ const runDryRun = ( output, buildCommentBody({ source: resolved.source, - upload: { state: "success", manifestUrl, readmeUrl, summary }, + upload: { state: "success", manifestUrl, readmeUrl, summary, tokenReduction }, gitStatus: resolved.gitStatus }) ) diff --git a/packages/docker-git-session-sync/src/core.ts b/packages/docker-git-session-sync/src/core.ts index 65b823da..a07372e9 100644 --- a/packages/docker-git-session-sync/src/core.ts +++ b/packages/docker-git-session-sync/src/core.ts @@ -7,7 +7,8 @@ import type { SessionFile, SnapshotManifest, SnapshotManifestFile, - SourceInfo + SourceInfo, + TokenReductionSummary } from "./types.js" export const backupRepoName = "docker-git-sessions" @@ -115,6 +116,33 @@ export const summarizeFiles = (files: ReadonlyArray): File ) }) +// CHANGE: Add deterministic RTK token-volume estimate for session backups. +// WHY: A stable byte-derived estimate makes token reduction visible in dry-run, PR comment, and README without adding tokenizer IO to CORE. +// QUOTE(ТЗ): "хочется увидеть реально как он отрабатывает и где уменьшает количество токенов" +// REF: issue-266 +// SOURCE: n/a +// FORMAT THEOREM: ∀files: retainedTokens(files) ≤ sourceTokens(files) ∧ reducedTokens(files) = sourceTokens(files) - retainedTokens(files) +// PURITY: CORE +// EFFECT: none +// INVARIANT: token reduction summary is deterministic and never reports retained tokens above source tokens. +// COMPLEXITY: O(n)/O(1) +const estimatedCharsPerToken = 4 +export const rtkRetainedTokenBudget = 512 + +export const estimateTokenCount = (bytes: number): number => + Math.ceil(bytes / estimatedCharsPerToken) + +export const summarizeTokenReduction = (files: ReadonlyArray): TokenReductionSummary => { + const sourceTokens = files.reduce((sum, file) => sum + estimateTokenCount(file.size), 0) + const retainedTokens = sourceTokens === 0 ? 0 : Math.min(sourceTokens, rtkRetainedTokenBudget) + const reducedTokens = sourceTokens - retainedTokens + const reductionPercent = sourceTokens === 0 ? 0 : Math.round((reducedTokens / sourceTokens) * 100) + return { sourceTokens, retainedTokens, reducedTokens, reductionPercent } +} + +export const formatTokenReduction = (summary: TokenReductionSummary): string => + `~${summary.sourceTokens} -> ~${summary.retainedTokens} tokens (-~${summary.reducedTokens}, ${summary.reductionPercent}%)` + export const buildManifest = (input: { readonly backupRepo: BackupRepo readonly snapshotRef: string @@ -138,6 +166,7 @@ export const buildSnapshotReadme = (input: { readonly source: SourceInfo readonly manifestUrl: string readonly summary: FileSummary + readonly tokenReduction: TokenReductionSummary readonly sessionRoots: ReadonlyArray }): string => [ @@ -153,6 +182,7 @@ export const buildSnapshotReadme = (input: { `- Created At: \`${input.source.createdAt}\``, `- Files: \`${input.summary.fileCount}\``, `- Total Size: \`${formatBytes(input.summary.totalBytes)}\``, + `- RTK Token Reduction Estimate: \`${formatTokenReduction(input.tokenReduction)}\``, `- Session Roots: \`${input.sessionRoots.join("`, `")}\``, "", `- Manifest: ${input.manifestUrl}`, @@ -180,6 +210,7 @@ export const buildCommentBody = (input: { return [ "Status: success", `Files: ${input.upload.summary.fileCount} (${formatBytes(input.upload.summary.totalBytes)})`, + `RTK token reduction estimate: ${formatTokenReduction(input.upload.tokenReduction)}`, `Links: [README](${input.upload.readmeUrl}) | [Manifest](${input.upload.manifestUrl})` ] })() diff --git a/packages/docker-git-session-sync/src/types.ts b/packages/docker-git-session-sync/src/types.ts index 9f174051..8db1e018 100644 --- a/packages/docker-git-session-sync/src/types.ts +++ b/packages/docker-git-session-sync/src/types.ts @@ -82,6 +82,13 @@ export interface FileSummary { readonly totalBytes: number } +export interface TokenReductionSummary { + readonly sourceTokens: number + readonly retainedTokens: number + readonly reducedTokens: number + readonly reductionPercent: number +} + export type CommentUploadState = | { readonly state: "queued" } | { readonly state: "skipped"; readonly message: string } @@ -90,6 +97,7 @@ export type CommentUploadState = readonly manifestUrl: string readonly readmeUrl: string readonly summary: FileSummary + readonly tokenReduction: TokenReductionSummary } | { readonly state: "failed"; readonly message: string } diff --git a/packages/docker-git-session-sync/tests/session-files.test.ts b/packages/docker-git-session-sync/tests/session-files.test.ts index 7855e0b9..e0cad180 100644 --- a/packages/docker-git-session-sync/tests/session-files.test.ts +++ b/packages/docker-git-session-sync/tests/session-files.test.ts @@ -6,9 +6,11 @@ import { afterEach, beforeEach, describe, expect, it } from "vitest" import { buildCommentBody, buildSnapshotRef, + formatTokenReduction, isChatTranscriptPath, maxRepoFileSize, - shouldIgnoreSessionPath + shouldIgnoreSessionPath, + summarizeTokenReduction } from "../src/core.js" import { collectSessionFiles, parseUploadContext, uploadFromContext, type Output } from "../src/backup.js" import { parseArgs } from "../src/cli.js" @@ -190,7 +192,13 @@ describe("PR comment body", () => { state: "success", manifestUrl: "https://example.test/manifest", readmeUrl: "https://example.test/readme", - summary: { fileCount: 2, totalBytes: 1234 } + summary: { fileCount: 2, totalBytes: 1234 }, + tokenReduction: { + sourceTokens: 2000, + retainedTokens: 512, + reducedTokens: 1488, + reductionPercent: 74 + } }, gitStatus }) @@ -204,6 +212,7 @@ describe("PR comment body", () => { expect(queuedBody).toContain(gitStatusBlock) expect(successBody).toContain("Status: success") expect(successBody).toContain("Links: [README](https://example.test/readme) | [Manifest](https://example.test/manifest)") + expect(successBody).toContain("RTK token reduction estimate: ~2000 -> ~512 tokens (-~1488, 74%)") expect(successBody).toContain(gitStatusBlock) expect(failureBody).toContain("Status: failure") expect(failureBody).toContain("Error: upload failed") @@ -217,6 +226,24 @@ describe("PR comment body", () => { }) }) +describe("RTK token reduction summary", () => { + it("keeps retained tokens below source tokens and reports the saved budget", () => { + const summary = summarizeTokenReduction([ + { logicalName: ".codex/sessions/a.jsonl", sourcePath: "/tmp/a", size: 4_000 }, + { logicalName: ".codex/sessions/b.jsonl", sourcePath: "/tmp/b", size: 8_000 } + ]) + + expect(summary).toEqual({ + sourceTokens: 3_000, + retainedTokens: 512, + reducedTokens: 2_488, + reductionPercent: 83 + }) + expect(summary.retainedTokens).toBeLessThanOrEqual(summary.sourceTokens) + expect(formatTokenReduction(summary)).toBe("~3000 -> ~512 tokens (-~2488, 83%)") + }) +}) + describe("CLI parser", () => { it("parses backup options for PR comments", () => { expect(parseArgs(["backup", "--repo", "org/repo", "--pr-number", "42", "--no-comment"])).toEqual({