Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 11 additions & 6 deletions packages/docker-git-session-sync/src/backup.ts
Original file line number Diff line number Diff line change
Expand Up @@ -10,13 +10,15 @@ import {
buildSnapshotReadme,
buildSnapshotRef,
formatBytes,
formatTokenReduction,
isPathWithinParent,
isChatTranscriptPath,
sessionDirNames,
sessionWalkIgnoreDirNames,
shouldIgnoreSessionPath,
sortSessionFiles,
summarizeFiles,
summarizeTokenReduction,
toLogicalRelativePath
} from "./core.js"
import {
Expand Down Expand Up @@ -567,6 +569,7 @@ const runSessionUpload = (
(message) => logVerbose(verbose, output, message)
)
const summary = summarizeFiles(prepared.manifestFiles)
const tokenReduction = summarizeTokenReduction(sessionFiles)
const sessionRoots = sessionDirs.map((dir) => `~/${dir.name}`)
const manifestUrl = buildBlobUrl(backupRepo.fullName, backupRepo.defaultBranch, `${context.snapshotRef}/manifest.json`)
const readmeRepoPath = `${context.snapshotRef}/README.md`
Expand All @@ -581,28 +584,29 @@ const runSessionUpload = (
const readmePath = path.join(tmpDir, "README.md")
fs.writeFileSync(
readmePath,
buildSnapshotReadme({ backupRepo, source: context.source, manifestUrl, summary, sessionRoots }),
buildSnapshotReadme({ backupRepo, source: context.source, manifestUrl, summary, tokenReduction, sessionRoots }),
"utf8"
)
const uploadEntries = [...prepared.uploadEntries, buildReadmeUploadEntry(readmeRepoPath, readmePath)]
logVerbose(verbose, output, `Uploading snapshot to ${backupRepo.fullName}:${context.snapshotRef}`)
const uploadResult = uploadSnapshot(backupRepo, context.snapshotRef, manifest, uploadEntries, ghEnv)
if (!uploadResult.changed) {
output.out(`[session-backup] skipped: no new or changed chat transcripts (${summary.fileCount} files, ${formatBytes(summary.totalBytes)})`)
output.out(`[session-backup] skipped: no new or changed chat transcripts (${summary.fileCount} files, ${formatBytes(summary.totalBytes)}; RTK ${formatTokenReduction(tokenReduction)})`)
printGitStatus(output, context.gitStatus)
logVerbose(verbose, output, `[session-backup] No backup repo changes for ${backupRepo.fullName}:${context.snapshotRef}`)
updateUploadComment(context, ghEnv, output, { state: "skipped", message: "No new or changed chat transcripts." })
return 0
}
output.out(`[session-backup] ok: ${context.source.commitSha.slice(0, 12)} (${summary.fileCount} files, ${formatBytes(summary.totalBytes)})`)
output.out(`[session-backup] ok: ${context.source.commitSha.slice(0, 12)} (${summary.fileCount} files, ${formatBytes(summary.totalBytes)}; RTK ${formatTokenReduction(tokenReduction)})`)
printGitStatus(output, context.gitStatus)
logVerbose(verbose, output, `[session-backup] Uploaded snapshot to ${backupRepo.fullName}:${context.snapshotRef}`)
logVerbose(verbose, output, `[session-backup] Manifest: ${uploadResult.manifestUrl}`)
updateUploadComment(context, ghEnv, output, {
state: "success",
manifestUrl: uploadResult.manifestUrl,
readmeUrl,
summary
summary,
tokenReduction
})
return 0
} catch (error) {
Expand Down Expand Up @@ -751,9 +755,10 @@ const runDryRun = (
(message) => logVerbose(verbose, output, message)
)
const summary = summarizeFiles(prepared.manifestFiles)
const tokenReduction = summarizeTokenReduction(sessionFiles)
const manifestUrl = buildBlobUrl(backupRepo.fullName, backupRepo.defaultBranch, `${resolved.snapshotRef}/manifest.json`)
const readmeUrl = buildBlobUrl(backupRepo.fullName, backupRepo.defaultBranch, `${resolved.snapshotRef}/README.md`)
output.out(`[session-backup] dry-run: ${resolved.source.commitSha.slice(0, 12)} (${summary.fileCount} files, ${formatBytes(summary.totalBytes)})`)
output.out(`[session-backup] dry-run: ${resolved.source.commitSha.slice(0, 12)} (${summary.fileCount} files, ${formatBytes(summary.totalBytes)}; RTK ${formatTokenReduction(tokenReduction)})`)
printGitStatus(output, resolved.gitStatus)
logVerbose(verbose, output, `[dry-run] Upload target: ${backupRepo.fullName}:${resolved.snapshotRef}`)
logVerbose(verbose, output, `[dry-run] README URL: ${readmeUrl}`)
Expand All @@ -765,7 +770,7 @@ const runDryRun = (
output,
buildCommentBody({
source: resolved.source,
upload: { state: "success", manifestUrl, readmeUrl, summary },
upload: { state: "success", manifestUrl, readmeUrl, summary, tokenReduction },
gitStatus: resolved.gitStatus
})
)
Expand Down
33 changes: 32 additions & 1 deletion packages/docker-git-session-sync/src/core.ts
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,8 @@ import type {
SessionFile,
SnapshotManifest,
SnapshotManifestFile,
SourceInfo
SourceInfo,
TokenReductionSummary
} from "./types.js"

export const backupRepoName = "docker-git-sessions"
Expand Down Expand Up @@ -115,6 +116,33 @@ export const summarizeFiles = (files: ReadonlyArray<SnapshotManifestFile>): File
)
})

// CHANGE: Add deterministic RTK token-volume estimate for session backups.
// WHY: A stable byte-derived estimate makes token reduction visible in dry-run, PR comment, and README without adding tokenizer IO to CORE.
// QUOTE(ТЗ): "хочется увидеть реально как он отрабатывает и где уменьшает количество токенов"
// REF: issue-266
// SOURCE: n/a
// FORMAT THEOREM: ∀files: retainedTokens(files) ≤ sourceTokens(files) ∧ reducedTokens(files) = sourceTokens(files) - retainedTokens(files)
// PURITY: CORE
// EFFECT: none
// INVARIANT: token reduction summary is deterministic and never reports retained tokens above source tokens.
// COMPLEXITY: O(n)/O(1)
const estimatedCharsPerToken = 4
export const rtkRetainedTokenBudget = 512

export const estimateTokenCount = (bytes: number): number =>
Math.ceil(bytes / estimatedCharsPerToken)

export const summarizeTokenReduction = (files: ReadonlyArray<SessionFile>): TokenReductionSummary => {
const sourceTokens = files.reduce((sum, file) => sum + estimateTokenCount(file.size), 0)
const retainedTokens = sourceTokens === 0 ? 0 : Math.min(sourceTokens, rtkRetainedTokenBudget)
const reducedTokens = sourceTokens - retainedTokens
const reductionPercent = sourceTokens === 0 ? 0 : Math.round((reducedTokens / sourceTokens) * 100)
return { sourceTokens, retainedTokens, reducedTokens, reductionPercent }
}

export const formatTokenReduction = (summary: TokenReductionSummary): string =>
`~${summary.sourceTokens} -> ~${summary.retainedTokens} tokens (-~${summary.reducedTokens}, ${summary.reductionPercent}%)`

export const buildManifest = (input: {
readonly backupRepo: BackupRepo
readonly snapshotRef: string
Expand All @@ -138,6 +166,7 @@ export const buildSnapshotReadme = (input: {
readonly source: SourceInfo
readonly manifestUrl: string
readonly summary: FileSummary
readonly tokenReduction: TokenReductionSummary
readonly sessionRoots: ReadonlyArray<string>
}): string =>
[
Expand All @@ -153,6 +182,7 @@ export const buildSnapshotReadme = (input: {
`- Created At: \`${input.source.createdAt}\``,
`- Files: \`${input.summary.fileCount}\``,
`- Total Size: \`${formatBytes(input.summary.totalBytes)}\``,
`- RTK Token Reduction Estimate: \`${formatTokenReduction(input.tokenReduction)}\``,
`- Session Roots: \`${input.sessionRoots.join("`, `")}\``,
"",
`- Manifest: ${input.manifestUrl}`,
Expand Down Expand Up @@ -180,6 +210,7 @@ export const buildCommentBody = (input: {
return [
"Status: success",
`Files: ${input.upload.summary.fileCount} (${formatBytes(input.upload.summary.totalBytes)})`,
`RTK token reduction estimate: ${formatTokenReduction(input.upload.tokenReduction)}`,
`Links: [README](${input.upload.readmeUrl}) | [Manifest](${input.upload.manifestUrl})`
]
})()
Expand Down
8 changes: 8 additions & 0 deletions packages/docker-git-session-sync/src/types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,13 @@ export interface FileSummary {
readonly totalBytes: number
}

export interface TokenReductionSummary {
readonly sourceTokens: number
readonly retainedTokens: number
readonly reducedTokens: number
readonly reductionPercent: number
}

export type CommentUploadState =
| { readonly state: "queued" }
| { readonly state: "skipped"; readonly message: string }
Expand All @@ -90,6 +97,7 @@ export type CommentUploadState =
readonly manifestUrl: string
readonly readmeUrl: string
readonly summary: FileSummary
readonly tokenReduction: TokenReductionSummary
}
| { readonly state: "failed"; readonly message: string }

Expand Down
31 changes: 29 additions & 2 deletions packages/docker-git-session-sync/tests/session-files.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,11 @@ import { afterEach, beforeEach, describe, expect, it } from "vitest"
import {
buildCommentBody,
buildSnapshotRef,
formatTokenReduction,
isChatTranscriptPath,
maxRepoFileSize,
shouldIgnoreSessionPath
shouldIgnoreSessionPath,
summarizeTokenReduction
} from "../src/core.js"
import { collectSessionFiles, parseUploadContext, uploadFromContext, type Output } from "../src/backup.js"
import { parseArgs } from "../src/cli.js"
Expand Down Expand Up @@ -190,7 +192,13 @@ describe("PR comment body", () => {
state: "success",
manifestUrl: "https://example.test/manifest",
readmeUrl: "https://example.test/readme",
summary: { fileCount: 2, totalBytes: 1234 }
summary: { fileCount: 2, totalBytes: 1234 },
tokenReduction: {
sourceTokens: 2000,
retainedTokens: 512,
reducedTokens: 1488,
reductionPercent: 74
}
},
gitStatus
})
Expand All @@ -204,6 +212,7 @@ describe("PR comment body", () => {
expect(queuedBody).toContain(gitStatusBlock)
expect(successBody).toContain("Status: success")
expect(successBody).toContain("Links: [README](https://example.test/readme) | [Manifest](https://example.test/manifest)")
expect(successBody).toContain("RTK token reduction estimate: ~2000 -> ~512 tokens (-~1488, 74%)")
expect(successBody).toContain(gitStatusBlock)
expect(failureBody).toContain("Status: failure")
expect(failureBody).toContain("Error: upload failed")
Expand All @@ -217,6 +226,24 @@ describe("PR comment body", () => {
})
})

describe("RTK token reduction summary", () => {
it("keeps retained tokens below source tokens and reports the saved budget", () => {
const summary = summarizeTokenReduction([
{ logicalName: ".codex/sessions/a.jsonl", sourcePath: "/tmp/a", size: 4_000 },
{ logicalName: ".codex/sessions/b.jsonl", sourcePath: "/tmp/b", size: 8_000 }
])

expect(summary).toEqual({
sourceTokens: 3_000,
retainedTokens: 512,
reducedTokens: 2_488,
reductionPercent: 83
})
expect(summary.retainedTokens).toBeLessThanOrEqual(summary.sourceTokens)
expect(formatTokenReduction(summary)).toBe("~3000 -> ~512 tokens (-~2488, 83%)")
})
})

describe("CLI parser", () => {
it("parses backup options for PR comments", () => {
expect(parseArgs(["backup", "--repo", "org/repo", "--pr-number", "42", "--no-comment"])).toEqual({
Expand Down