Fix model ID and clean up parallelism comments

jahooma · claude · jahooma · commit af14ae3fac47 · 2026-03-30T13:08:45.000-07:00
Co-Authored-By: Claude Opus 4.6 &lt;noreply@anthropic.com&gt;
diff --git a/evalbuff/src/docs-optimizer.ts b/evalbuff/src/docs-optimizer.ts
@@ -340,13 +340,8 @@ export function revertDocEdit(
 /**
  * Compare scores to determine if a doc edit improved things.
  *
- * With parallelism=1, score variance is very high (often 3+ points on
- * the same task). To avoid rejecting good docs due to noise:
- * - Require only small improvement to accept (0.3 threshold)
- * - Require large decline to reject (1.5 threshold) — benefit of the doubt
- *
- * With higher parallelism, averages are more stable so we can use
- * tighter thresholds.
+ * With parallelism=5, averages are reasonably stable. A 0.3 threshold
+ * catches real improvements without being too sensitive to noise.
  */
 export function compareScores(
   oldScore: number,
diff --git a/evalbuff/src/llm.ts b/evalbuff/src/llm.ts
@@ -12,7 +12,7 @@ import { generateText } from 'ai'
 
 const anthropic = createAnthropic()
 
-const DEFAULT_MODEL = 'claude-sonnet-4-6-20250415'
+const DEFAULT_MODEL = 'claude-sonnet-4-6'
 
 /**
  * Generate a task prompt from a commit diff using the LLM API directly.
diff --git a/evalbuff/src/run-evalbuff.ts b/evalbuff/src/run-evalbuff.ts
@@ -473,8 +473,7 @@ async function improveDocs(opts: {
 
     if (comparison === 'improved' || comparison === 'same') {
       // 'improved' = clear signal the doc helps
-      // 'same' = within noise range — keep it (benefit of the doubt,
-      //   especially at low parallelism where variance is high)
+      // 'same' = within noise range — keep it (benefit of the doubt)
       const reason = comparison === 'improved' ? 'score improved' : 'within noise range, keeping'
       console.log(`  Keeping doc: ${docSuggestion.suggestedDocPath} (${reason})`)
       docsKept.push({