diff --git a/apps/elf-eval/src/bin/real_world_job_benchmark/cli.rs b/apps/elf-eval/src/bin/real_world_job_benchmark/cli.rs
index e1bc6f32..bae29a2e 100644
--- a/apps/elf-eval/src/bin/real_world_job_benchmark/cli.rs
+++ b/apps/elf-eval/src/bin/real_world_job_benchmark/cli.rs
@@ -51,6 +51,12 @@ pub(super) struct RunArgs {
 	/// Skip loading the real-world external adapter coverage manifest.
 	#[arg(long)]
 	pub(super) skip_external_adapter_manifest: bool,
+	/// Optional same-corpus quantitative product manifest to merge into the report.
+	#[arg(long, value_name = "FILE")]
+	pub(super) quantitative_product_manifest: Option<PathBuf>,
+	/// Optional audit manifest proving the current quantitative row's held-out/leakage gates.
+	#[arg(long, value_name = "FILE")]
+	pub(super) quantitative_audit_manifest: Option<PathBuf>,
 }
 
 #[derive(Debug, Parser)]
@@ -63,9 +69,69 @@ pub(super) struct PublishArgs {
 	pub(super) out: Option<PathBuf>,
 }
 
+#[derive(Debug, Parser)]
+pub(super) struct ExportQuantitativeProductManifestArgs {
+	/// Generated real_world_job JSON report to export.
+	#[arg(long, value_name = "FILE", default_value = DEFAULT_REPORT_PATH)]
+	pub(super) report: PathBuf,
+	/// Write product manifest JSON to this file. Omit to print to stdout.
+	#[arg(long, value_name = "FILE")]
+	pub(super) out: Option<PathBuf>,
+	/// Stable manifest id. Defaults to <run_id>-quantitative-product-manifest.
+	#[arg(long)]
+	pub(super) manifest_id: Option<String>,
+	/// Override the exported product name.
+	#[arg(long)]
+	pub(super) product: Option<String>,
+	/// Override the exported adapter id.
+	#[arg(long)]
+	pub(super) adapter_id: Option<String>,
+	/// Override the exported adapter name.
+	#[arg(long)]
+	pub(super) adapter_name: Option<String>,
+}
+
+#[derive(Debug, Parser)]
+pub(super) struct ExportQuantitativeAuditManifestArgs {
+	/// Fixture file or directory containing current product-runtime real_world_job outputs.
+	#[arg(long, value_name = "PATH", default_value = DEFAULT_FIXTURE_PATH)]
+	pub(super) fixtures: PathBuf,
+	/// Write audit manifest JSON to this file. Omit to print to stdout.
+	#[arg(long, value_name = "FILE")]
+	pub(super) out: Option<PathBuf>,
+	/// Stable run id that the audit manifest is allowed to attest.
+	#[arg(long, default_value = DEFAULT_RUN_ID)]
+	pub(super) run_id: String,
+	/// Stable manifest id. Defaults to <run_id>-quantitative-audit-manifest.
+	#[arg(long)]
+	pub(super) manifest_id: Option<String>,
+	/// Product name for the current row.
+	#[arg(long, default_value = "ELF")]
+	pub(super) product: String,
+	/// Adapter id for the current row.
+	#[arg(long, default_value = DEFAULT_ADAPTER_ID)]
+	pub(super) adapter_id: String,
+	/// Mark the current row as held-out only when query ids were locked before runtime.
+	#[arg(long)]
+	pub(super) held_out: bool,
+	/// Mark the current row as leakage audited only when runtime inputs excluded answers/qrels.
+	#[arg(long)]
+	pub(super) leakage_audited: bool,
+	/// Audit control string. Repeat for multiple controls.
+	#[arg(long = "control")]
+	pub(super) controls: Vec<String>,
+	/// Claim boundary recorded in the audit manifest.
+	#[arg(long)]
+	pub(super) claim_boundary: Option<String>,
+}
+
 #[derive(Debug, Subcommand)]
 #[command(rename_all = "kebab")]
 pub(super) enum Command {
+	/// Export a quantitative audit manifest for the current fixture set.
+	ExportQuantitativeAuditManifest(ExportQuantitativeAuditManifestArgs),
+	/// Export the primary quantitative row as a reusable product manifest.
+	ExportQuantitativeProductManifest(ExportQuantitativeProductManifestArgs),
 	/// Parse and score real_world_job fixtures, then emit a JSON report.
 	Run(RunArgs),
 	/// Render Markdown from a generated real_world_job JSON report.
diff --git a/apps/elf-eval/src/bin/real_world_job_benchmark/commands.rs b/apps/elf-eval/src/bin/real_world_job_benchmark/commands.rs
index 91dc476f..a151e6da 100644
--- a/apps/elf-eval/src/bin/real_world_job_benchmark/commands.rs
+++ b/apps/elf-eval/src/bin/real_world_job_benchmark/commands.rs
@@ -1,7 +1,8 @@
 use crate::{
-	AdapterReport, BTreeSet, CaptureIntegrationReport, CorpusProfile, OffsetDateTime, Path,
-	PathBuf, PrivateCorpusRedaction, PublishArgs, REPORT_SCHEMA, RealWorldJob, RealWorldReport,
-	Result, Rfc3339, RunArgs, TypedStatus, VERSION, eyre, fs,
+	AdapterReport, BTreeSet, CaptureIntegrationReport, CorpusProfile,
+	ExportQuantitativeAuditManifestArgs, ExportQuantitativeProductManifestArgs, OffsetDateTime,
+	Path, PathBuf, PrivateCorpusRedaction, PublishArgs, QuantitativeReportInput, REPORT_SCHEMA,
+	RealWorldJob, RealWorldReport, Result, Rfc3339, RunArgs, TypedStatus, VERSION, eyre, fs,
 };
 
 pub(super) fn run_command(args: RunArgs) -> Result<()> {
@@ -20,6 +21,27 @@ pub(super) fn publish_command(args: PublishArgs) -> Result<()> {
 	write_or_print(args.out.as_deref(), markdown.as_str())
 }
 
+pub(super) fn export_quantitative_product_manifest_command(
+	args: ExportQuantitativeProductManifestArgs,
+) -> Result<()> {
+	let raw = fs::read_to_string(&args.report)?;
+	let report = serde_json::from_str::<RealWorldReport>(&raw)?;
+	let manifest = crate::quantitative_product_manifest_from_report(&report, &args)?;
+	let json = serde_json::to_string_pretty(&manifest)?;
+
+	write_or_print(args.out.as_deref(), json.as_str())
+}
+
+pub(super) fn export_quantitative_audit_manifest_command(
+	args: ExportQuantitativeAuditManifestArgs,
+) -> Result<()> {
+	let jobs = load_jobs(&args.fixtures)?;
+	let manifest = crate::quantitative_audit_manifest_from_jobs(jobs.as_slice(), &args)?;
+	let json = serde_json::to_string_pretty(&manifest)?;
+
+	write_or_print(args.out.as_deref(), json.as_str())
+}
+
 fn load_jobs(path: &Path) -> Result<Vec<RealWorldJob>> {
 	let paths = fixture_paths(path)?;
 	let mut jobs = Vec::with_capacity(paths.len());
@@ -103,16 +125,29 @@ fn build_report(jobs: &[RealWorldJob], args: &RunArgs) -> Result<RealWorldReport
 	)?;
 	let scoreboard = crate::scoreboard_report(jobs, &job_reports, &summary, &external_adapters);
 	let operational_evidence = crate::operational_evidence_report(jobs, &job_reports);
+	let adapter = adapter_report(args)?;
+	let generated_at = OffsetDateTime::now_utc().format(&Rfc3339)?;
+	let quantitative_scoreboard = crate::quantitative_scoreboard_report(QuantitativeReportInput {
+		run_id: args.run_id.as_str(),
+		generated_at: generated_at.as_str(),
+		adapter: &adapter,
+		source_jobs: jobs,
+		jobs: &job_reports,
+		summary: &summary,
+		product_manifest_path: args.quantitative_product_manifest.as_deref(),
+		audit_manifest_path: args.quantitative_audit_manifest.as_deref(),
+	})?;
 
 	Ok(RealWorldReport {
 		schema: REPORT_SCHEMA.to_string(),
 		run_id: args.run_id.clone(),
-		generated_at: OffsetDateTime::now_utc().format(&Rfc3339)?,
+		generated_at,
 		runner_version: VERSION.to_string(),
 		corpus_profile: corpus_profile(jobs),
-		adapter: adapter_report(args)?,
+		adapter,
 		scoreboard,
 		operational_evidence,
+		quantitative_scoreboard,
 		external_adapters,
 		capture_integration: capture_integration_report(jobs),
 		summary,
diff --git a/apps/elf-eval/src/bin/real_world_job_benchmark/fixtures.rs b/apps/elf-eval/src/bin/real_world_job_benchmark/fixtures.rs
index 32a5eb13..ad8dd669 100644
--- a/apps/elf-eval/src/bin/real_world_job_benchmark/fixtures.rs
+++ b/apps/elf-eval/src/bin/real_world_job_benchmark/fixtures.rs
@@ -87,6 +87,8 @@ pub(super) struct ExpectedAnswer {
 	pub(super) must_not_include: Vec<String>,
 	#[serde(default)]
 	pub(super) evidence_links: BTreeMap<String, EvidenceLink>,
+	#[serde(default)]
+	pub(super) relevance_judgments: Vec<RelevanceJudgment>,
 	pub(super) answer_type: String,
 	#[serde(default)]
 	pub(super) accepted_alternates: Vec<Value>,
@@ -96,6 +98,13 @@ pub(super) struct ExpectedAnswer {
 	pub(super) requires_refusal: bool,
 }
 
+#[derive(Debug, Deserialize)]
+pub(super) struct RelevanceJudgment {
+	pub(super) evidence_id: String,
+	#[serde(default = "default_relevance_grade")]
+	pub(super) grade: f64,
+}
+
 #[derive(Debug, Deserialize)]
 pub(super) struct RequiredEvidence {
 	pub(super) evidence_id: String,
@@ -250,3 +259,7 @@ pub(super) struct AdapterResponse {
 	pub(super) answer: ProducedAnswer,
 	pub(super) consolidation: Option<ConsolidationFixture>,
 }
+
+fn default_relevance_grade() -> f64 {
+	1.0
+}
diff --git a/apps/elf-eval/src/bin/real_world_job_benchmark/main.rs b/apps/elf-eval/src/bin/real_world_job_benchmark/main.rs
index 9815886f..dc77d8f0 100644
--- a/apps/elf-eval/src/bin/real_world_job_benchmark/main.rs
+++ b/apps/elf-eval/src/bin/real_world_job_benchmark/main.rs
@@ -16,6 +16,8 @@ mod job_reports;
 mod markdown;
 mod operational;
 mod operational_reports;
+mod quantitative;
+mod quantitative_reports;
 mod recovery;
 mod report_root;
 mod scoreboard;
@@ -49,7 +51,10 @@ use artifacts::{
 	WorkJournalNextStepArtifact, WorkJournalReadbackArtifact, WorkJournalRejectedOptionArtifact,
 	WorkJournalWhereStoppedArtifact,
 };
-use cli::{Args, Command, PublishArgs, RunArgs};
+use cli::{
+	Args, Command, ExportQuantitativeAuditManifestArgs, ExportQuantitativeProductManifestArgs,
+	PublishArgs, RunArgs,
+};
 use diagnostic_reports::{
 	OperatorDebugEvidence, OperatorUxGap, TraceExplainability, TraceStageExplainability,
 };
@@ -84,6 +89,15 @@ use operational_reports::{
 	OperationalEvidenceReport, OperationalEvidenceTierReport, OperationalLatencyReport,
 	OperationalResourceSummary,
 };
+use quantitative::{
+	QuantitativeReportInput, quantitative_audit_manifest_from_jobs,
+	quantitative_product_manifest_from_report, quantitative_scoreboard_report,
+};
+use quantitative_reports::{
+	QuantitativeAuditArtifact, QuantitativeAuditManifest, QuantitativeBenchmarkControls,
+	QuantitativeBenchmarkReport, QuantitativeBenchmarkRow, QuantitativeConfidenceInterval,
+	QuantitativePerQueryRow, QuantitativeProductManifest,
+};
 use report_root::RealWorldReport;
 use scoreboard::scoreboard_report;
 use scoreboard_reports::{
@@ -167,6 +181,10 @@ fn main() -> Result<()> {
 	color_eyre::install()?;
 
 	match Args::parse().command {
+		Command::ExportQuantitativeAuditManifest(args) =>
+			commands::export_quantitative_audit_manifest_command(args),
+		Command::ExportQuantitativeProductManifest(args) =>
+			commands::export_quantitative_product_manifest_command(args),
 		Command::Run(args) => commands::run_command(args),
 		Command::Publish(args) => commands::publish_command(args),
 	}
diff --git a/apps/elf-eval/src/bin/real_world_job_benchmark/markdown.rs b/apps/elf-eval/src/bin/real_world_job_benchmark/markdown.rs
index 36f9dba6..68bcb12a 100644
--- a/apps/elf-eval/src/bin/real_world_job_benchmark/markdown.rs
+++ b/apps/elf-eval/src/bin/real_world_job_benchmark/markdown.rs
@@ -6,6 +6,7 @@ mod followups;
 mod header;
 mod jobs;
 mod operational;
+mod quantitative;
 mod scoreboard;
 mod trace;
 
@@ -16,9 +17,9 @@ use crate::{
 	AdapterScenarioJudgment, AdapterSource, AdapterStatusCounts, AdapterSuiteCoverage, CostReport,
 	DEFAULT_ADAPTER_BEHAVIOR, EvolutionJobReport, ExternalAdapterReport, KnowledgeSummary,
 	MemorySummaryReport, OperatorDebugEvidence, OperatorUxGap, ProactiveBriefSummaryReport,
-	RealWorldReport, ReportSummary, SCOREBOARD_EVIDENCE_CLASSES, ScenarioOutcomeCounts,
-	ScenarioPositionCounts, ScheduledMemorySummaryReport, ScoreboardReport, ScoreboardRow,
-	TraceExplainability, WorkContinuitySummaryReport,
+	QuantitativeBenchmarkRow, RealWorldReport, ReportSummary, SCOREBOARD_EVIDENCE_CLASSES,
+	ScenarioOutcomeCounts, ScenarioPositionCounts, ScheduledMemorySummaryReport, ScoreboardReport,
+	ScoreboardRow, TraceExplainability, WorkContinuitySummaryReport,
 	formatting::{
 		adapter_status_str, round3, scenario_comparison_outcome_str, status_str,
 		trace_failure_stage,
@@ -32,6 +33,7 @@ pub(super) fn render_markdown(report: &RealWorldReport, report_path: &Path) -> S
 
 	self::header::render_markdown_header(&mut out, report, report_path.as_str());
 	self::scoreboard::render_markdown_scoreboard(&mut out, report);
+	self::quantitative::render_markdown_quantitative_scoreboard(&mut out, report);
 	self::operational::render_markdown_operational_evidence(&mut out, report);
 	self::adapters::render_markdown_external_adapters(&mut out, report);
 	self::adapters::render_markdown_capture_integration(&mut out, report);
diff --git a/apps/elf-eval/src/bin/real_world_job_benchmark/markdown/quantitative.rs b/apps/elf-eval/src/bin/real_world_job_benchmark/markdown/quantitative.rs
new file mode 100644
index 00000000..1c3ec195
--- /dev/null
+++ b/apps/elf-eval/src/bin/real_world_job_benchmark/markdown/quantitative.rs
@@ -0,0 +1,84 @@
+use crate::markdown::{self, QuantitativeBenchmarkRow, RealWorldReport};
+
+pub(super) fn render_markdown_quantitative_scoreboard(out: &mut String, report: &RealWorldReport) {
+	let scoreboard = &report.quantitative_scoreboard;
+
+	if scoreboard.schema.is_empty() {
+		return;
+	}
+
+	out.push_str("## Quantitative Benchmark Report\n\n");
+	out.push_str(concat!(
+		"Quantitative rows expose ranking metrics and their claim controls. ",
+		"Fixture-backed rows verify benchmark mechanics; leaderboard claims require explicit qrels, ",
+		"enough queries, and leakage controls.\n\n"
+	));
+	out.push_str(&format!("- Schema: `{}`\n", markdown::md_inline(scoreboard.schema.as_str())));
+	out.push_str(&format!("- Corpus: `{}`\n", markdown::md_inline(scoreboard.corpus_id.as_str())));
+	out.push_str(&format!(
+		"- k values: `{}`\n",
+		markdown::md_inline(
+			scoreboard
+				.k_values
+				.iter()
+				.map(usize::to_string)
+				.collect::<Vec<_>>()
+				.join(", ")
+				.as_str()
+		)
+	));
+	out.push_str(&format!(
+		"- Ranking queries: `{}` of `{}`; explicit-qrel queries: `{}`\n",
+		scoreboard.controls.current_ranking_query_count,
+		scoreboard.controls.current_query_count,
+		scoreboard.controls.current_explicit_qrel_query_count
+	));
+	out.push_str(&format!(
+		"- Leaderboard claim allowed: `{}`\n",
+		scoreboard.controls.leaderboard_claim_allowed
+	));
+	out.push_str(&format!(
+		"- Claim boundary: {}\n\n",
+		markdown::md_cell(scoreboard.claim_boundary.as_str())
+	));
+	out.push_str("| Product | State | Evidence | Qrels | Sample | Ranking Queries | Recall@5 | ");
+	out.push_str("Precision@5 | MRR | nDCG@5 | AP | Leaderboard |\n");
+	out.push_str(
+		"| --- | --- | --- | --- | ---: | ---: | ---: | ---: | ---: | ---: | ---: | --- |\n",
+	);
+
+	for row in &scoreboard.rows {
+		out.push_str(&format!(
+			"| {} | `{}` | `{}` | `{}` | `{}` | `{}` | {} | {} | {} | {} | {} | `{}` |\n",
+			markdown::md_cell(row.product.as_str()),
+			markdown::md_inline(row.result_state.as_str()),
+			markdown::md_inline(row.evidence_class.as_str()),
+			markdown::md_inline(row.qrel_source.as_str()),
+			row.sample_size,
+			row.ranking_query_count,
+			quantitative_metric(row, "recall_at_5"),
+			quantitative_metric(row, "precision_at_5"),
+			quantitative_metric(row, "mrr"),
+			quantitative_metric(row, "ndcg_at_5"),
+			quantitative_metric(row, "average_precision"),
+			row.leaderboard_eligible
+		));
+	}
+
+	if !scoreboard.metrics_not_encoded.is_empty() {
+		out.push_str("\nMetrics not encoded:\n");
+
+		for metric in &scoreboard.metrics_not_encoded {
+			out.push_str(&format!("- `{}`\n", markdown::md_inline(metric.as_str())));
+		}
+
+		out.push('\n');
+	}
+}
+
+fn quantitative_metric(row: &QuantitativeBenchmarkRow, metric: &str) -> String {
+	row.metrics
+		.get(metric)
+		.and_then(|value| *value)
+		.map_or_else(|| "`n/a`".to_string(), |value| format!("`{}`", markdown::round3(value)))
+}
diff --git a/apps/elf-eval/src/bin/real_world_job_benchmark/quantitative.rs b/apps/elf-eval/src/bin/real_world_job_benchmark/quantitative.rs
new file mode 100644
index 00000000..4032c770
--- /dev/null
+++ b/apps/elf-eval/src/bin/real_world_job_benchmark/quantitative.rs
@@ -0,0 +1,118 @@
+mod audit_manifest;
+mod metrics;
+mod product_manifest;
+mod report;
+
+pub(super) use self::{
+	audit_manifest::quantitative_audit_manifest_from_jobs,
+	product_manifest::quantitative_product_manifest_from_report,
+	report::{QuantitativeReportInput, quantitative_scoreboard_report},
+};
+
+use self::audit_manifest::QuantitativeAuditEvidence;
+use crate::{AdapterReport, BTreeSet, JobReport, RealWorldJob, ReportSummary};
+
+const QUANTITATIVE_SCOREBOARD_SCHEMA: &str = "elf.agent_memory_quantitative_benchmark/v1";
+const QUANTITATIVE_PRODUCT_MANIFEST_SCHEMA: &str =
+	"elf.agent_memory_quantitative_product_manifest/v1";
+const QUANTITATIVE_AUDIT_MANIFEST_SCHEMA: &str = "elf.agent_memory_quantitative_audit_manifest/v1";
+const REQUIRED_HELD_OUT_AUDIT_CONTROL: &str = "query_ids_locked_before_product_runtime";
+const REQUIRED_QREL_LEAKAGE_AUDIT_CONTROL: &str =
+	"product_runtime_did_not_receive_expected_answers_or_qrels";
+const REQUIRED_CANDIDATE_LEAKAGE_AUDIT_CONTROL: &str =
+	"ranked_candidates_emitted_by_product_runtime";
+const QUANTITATIVE_K_VALUES: &[usize] = &[1, 3, 5, 10];
+const MIN_LEADERBOARD_QUERY_COUNT: usize = 30;
+const WILSON_95_Z: f64 = 1.959963984540054;
+const QUANTITATIVE_ROW_CLAIM_BOUNDARY: &str = concat!(
+	"Quantitative metrics are bounded to this generated report. ",
+	"Fixture-backed rows prove benchmark mechanics, not product-runtime or leaderboard claims."
+);
+
+fn quantitative_metrics_not_encoded(
+	imported_row_count: usize,
+	imported_per_query_count: usize,
+) -> Vec<String> {
+	let mut metrics =
+		vec!["paired_significance".to_string(), "audit_manifest_validation".to_string()];
+
+	if imported_row_count == 0 {
+		metrics.push("external_product_manifest_import".to_string());
+	}
+	if imported_row_count > 0 && imported_per_query_count == 0 {
+		metrics.push("imported_product_per_query_rows".to_string());
+	}
+
+	metrics
+}
+
+fn quantitative_corpus_id(source_jobs: &[RealWorldJob]) -> String {
+	let ids = source_jobs.iter().map(|job| job.corpus.corpus_id.as_str()).collect::<BTreeSet<_>>();
+
+	if ids.len() == 1 {
+		ids.into_iter().next().unwrap_or("unknown").to_string()
+	} else {
+		"mixed".to_string()
+	}
+}
+
+fn quantitative_suite_id(jobs: &[JobReport]) -> String {
+	let suites = jobs.iter().map(|job| job.suite_id.as_str()).collect::<BTreeSet<_>>();
+
+	if suites.len() == 1 {
+		suites.into_iter().next().unwrap_or("unknown").to_string()
+	} else {
+		"mixed".to_string()
+	}
+}
+
+fn quantitative_result_state(summary: &ReportSummary) -> &'static str {
+	if summary.unsupported_claim > 0 {
+		"unsupported_claim"
+	} else if summary.wrong_result > 0 {
+		"wrong_result"
+	} else if summary.incomplete > 0 {
+		"incomplete"
+	} else if summary.blocked > 0 {
+		"blocked"
+	} else if summary.not_encoded > 0 {
+		"not_encoded"
+	} else {
+		"pass"
+	}
+}
+
+fn quantitative_evidence_class(adapter: &AdapterReport, jobs: &[JobReport]) -> &'static str {
+	if adapter.behavior == "live_real_world_adapter" {
+		"live_real_world"
+	} else if jobs.iter().any(|job| job.operational_evidence_tier == "private_corpus") {
+		"private_corpus"
+	} else if jobs.iter().any(|job| job.operational_evidence_tier == "provider_backed") {
+		"provider_backed"
+	} else if adapter.behavior.contains("public_proxy") {
+		"public_proxy"
+	} else {
+		"fixture_backed"
+	}
+}
+
+fn quantitative_row_leaderboard_eligible(
+	evidence_class: &str,
+	sample_size: usize,
+	ranking_query_count: usize,
+	explicit_qrel_query_count: usize,
+	metric_comparable: bool,
+	audit_evidence: &QuantitativeAuditEvidence,
+) -> bool {
+	metric_comparable
+		&& evidence_class == "live_real_world"
+		&& sample_size >= MIN_LEADERBOARD_QUERY_COUNT
+		&& ranking_query_count == sample_size
+		&& explicit_qrel_query_count == ranking_query_count
+		&& audit_evidence.held_out
+		&& audit_evidence.leakage_audited
+		&& audit_evidence
+			.audit_manifest_id
+			.as_deref()
+			.is_some_and(|audit_manifest_id| !audit_manifest_id.trim().is_empty())
+}
diff --git a/apps/elf-eval/src/bin/real_world_job_benchmark/quantitative/audit_manifest.rs b/apps/elf-eval/src/bin/real_world_job_benchmark/quantitative/audit_manifest.rs
new file mode 100644
index 00000000..01f7e463
--- /dev/null
+++ b/apps/elf-eval/src/bin/real_world_job_benchmark/quantitative/audit_manifest.rs
@@ -0,0 +1,31 @@
+mod artifacts;
+mod evidence;
+mod export;
+mod validation;
+
+pub(crate) use self::export::quantitative_audit_manifest_from_jobs;
+
+use crate::{Path, RealWorldJob, Result};
+
+pub(super) struct QuantitativeAuditContext<'a> {
+	pub(super) run_id: &'a str,
+	pub(super) corpus_id: &'a str,
+	pub(super) product: &'a str,
+	pub(super) adapter_id: &'a str,
+	pub(super) source_jobs: &'a [RealWorldJob],
+	pub(super) ranking_query_count: usize,
+	pub(super) explicit_qrel_query_count: usize,
+}
+
+pub(super) struct QuantitativeAuditEvidence {
+	pub(super) held_out: bool,
+	pub(super) leakage_audited: bool,
+	pub(super) audit_manifest_id: Option<String>,
+}
+
+pub(super) fn quantitative_audit_evidence(
+	path: Option<&Path>,
+	context: QuantitativeAuditContext<'_>,
+) -> Result<QuantitativeAuditEvidence> {
+	evidence::quantitative_audit_evidence(path, context)
+}
diff --git a/apps/elf-eval/src/bin/real_world_job_benchmark/quantitative/audit_manifest/artifacts.rs b/apps/elf-eval/src/bin/real_world_job_benchmark/quantitative/audit_manifest/artifacts.rs
new file mode 100644
index 00000000..855af455
--- /dev/null
+++ b/apps/elf-eval/src/bin/real_world_job_benchmark/quantitative/audit_manifest/artifacts.rs
@@ -0,0 +1,8 @@
+mod digest;
+mod paths;
+mod validation;
+
+pub(super) use self::{
+	digest::fixture_path_digest, paths::audit_artifact_display_path,
+	validation::validate_quantitative_audit_artifacts,
+};
diff --git a/apps/elf-eval/src/bin/real_world_job_benchmark/quantitative/audit_manifest/artifacts/digest.rs b/apps/elf-eval/src/bin/real_world_job_benchmark/quantitative/audit_manifest/artifacts/digest.rs
new file mode 100644
index 00000000..d87860d9
--- /dev/null
+++ b/apps/elf-eval/src/bin/real_world_job_benchmark/quantitative/audit_manifest/artifacts/digest.rs
@@ -0,0 +1,39 @@
+mod paths;
+
+use crate::{Path, Result, fs};
+
+pub(in crate::quantitative::audit_manifest) fn fixture_path_digest(path: &Path) -> Result<String> {
+	let mut hasher = blake3::Hasher::new();
+
+	if path.is_file() {
+		hash_fixture_file(
+			path,
+			path.file_name().and_then(|name| name.to_str()).unwrap_or("fixture"),
+			&mut hasher,
+		)?;
+
+		return Ok(hasher.finalize().to_hex().to_string());
+	}
+
+	let paths = paths::audit_fixture_paths(path)?;
+
+	for fixture in paths {
+		let relative = fixture
+			.strip_prefix(path)
+			.map(|relative| relative.to_string_lossy().replace('\\', "/"))
+			.unwrap_or_else(|_| fixture.to_string_lossy().replace('\\', "/"));
+
+		hash_fixture_file(fixture.as_path(), relative.as_str(), &mut hasher)?;
+	}
+
+	Ok(hasher.finalize().to_hex().to_string())
+}
+
+fn hash_fixture_file(path: &Path, logical_path: &str, hasher: &mut blake3::Hasher) -> Result<()> {
+	hasher.update(logical_path.as_bytes());
+	hasher.update(b"\0");
+	hasher.update(&fs::read(path)?);
+	hasher.update(b"\0");
+
+	Ok(())
+}
diff --git a/apps/elf-eval/src/bin/real_world_job_benchmark/quantitative/audit_manifest/artifacts/digest/paths.rs b/apps/elf-eval/src/bin/real_world_job_benchmark/quantitative/audit_manifest/artifacts/digest/paths.rs
new file mode 100644
index 00000000..a7ba276c
--- /dev/null
+++ b/apps/elf-eval/src/bin/real_world_job_benchmark/quantitative/audit_manifest/artifacts/digest/paths.rs
@@ -0,0 +1,31 @@
+use crate::{Path, PathBuf, Result, fs};
+
+pub(super) fn audit_fixture_paths(path: &Path) -> Result<Vec<PathBuf>> {
+	let mut paths = Vec::new();
+
+	collect_audit_fixture_paths(path, &mut paths)?;
+
+	paths.sort();
+
+	Ok(paths)
+}
+
+fn collect_audit_fixture_paths(path: &Path, paths: &mut Vec<PathBuf>) -> Result<()> {
+	if path.is_file() {
+		paths.push(path.to_path_buf());
+
+		return Ok(());
+	}
+
+	for entry in fs::read_dir(path)? {
+		let entry_path = entry?.path();
+
+		if entry_path.is_dir() {
+			collect_audit_fixture_paths(entry_path.as_path(), paths)?;
+		} else if entry_path.extension().and_then(|ext| ext.to_str()) == Some("json") {
+			paths.push(entry_path);
+		}
+	}
+
+	Ok(())
+}
diff --git a/apps/elf-eval/src/bin/real_world_job_benchmark/quantitative/audit_manifest/artifacts/paths.rs b/apps/elf-eval/src/bin/real_world_job_benchmark/quantitative/audit_manifest/artifacts/paths.rs
new file mode 100644
index 00000000..3dd15d54
--- /dev/null
+++ b/apps/elf-eval/src/bin/real_world_job_benchmark/quantitative/audit_manifest/artifacts/paths.rs
@@ -0,0 +1,35 @@
+use std::env;
+
+use crate::{Path, PathBuf};
+
+pub(in crate::quantitative::audit_manifest) fn audit_artifact_display_path(path: &Path) -> String {
+	let display_path = if path.is_absolute() {
+		env::current_dir()
+			.ok()
+			.and_then(|cwd| path.strip_prefix(cwd).ok().map(Path::to_path_buf))
+			.unwrap_or_else(|| path.to_path_buf())
+	} else {
+		path.to_path_buf()
+	};
+
+	display_path.to_string_lossy().replace('\\', "/")
+}
+
+pub(super) fn resolve_quantitative_audit_artifact_path(
+	manifest_path: &Path,
+	artifact_path: &str,
+) -> PathBuf {
+	let raw = PathBuf::from(artifact_path);
+
+	if raw.is_absolute() {
+		return raw;
+	}
+
+	let cwd_path = env::current_dir().map(|cwd| cwd.join(&raw)).unwrap_or_else(|_| raw.clone());
+
+	if cwd_path.exists() {
+		return cwd_path;
+	}
+
+	manifest_path.parent().map(|parent| parent.join(&raw)).unwrap_or(cwd_path)
+}
diff --git a/apps/elf-eval/src/bin/real_world_job_benchmark/quantitative/audit_manifest/artifacts/validation.rs b/apps/elf-eval/src/bin/real_world_job_benchmark/quantitative/audit_manifest/artifacts/validation.rs
new file mode 100644
index 00000000..21c5e7bb
--- /dev/null
+++ b/apps/elf-eval/src/bin/real_world_job_benchmark/quantitative/audit_manifest/artifacts/validation.rs
@@ -0,0 +1,20 @@
+mod digest;
+mod fields;
+
+use crate::{Path, QuantitativeAuditManifest, Result, eyre};
+
+pub(in crate::quantitative::audit_manifest) fn validate_quantitative_audit_artifacts(
+	manifest: &QuantitativeAuditManifest,
+	path: &Path,
+) -> Result<()> {
+	if manifest.artifacts.is_empty() {
+		return Err(eyre::eyre!("{} has no quantitative audit artifacts.", path.display()));
+	}
+
+	for artifact in &manifest.artifacts {
+		fields::validate_audit_artifact_fields(path, artifact)?;
+		digest::validate_audit_artifact_digest(path, artifact)?;
+	}
+
+	Ok(())
+}
diff --git a/apps/elf-eval/src/bin/real_world_job_benchmark/quantitative/audit_manifest/artifacts/validation/digest.rs b/apps/elf-eval/src/bin/real_world_job_benchmark/quantitative/audit_manifest/artifacts/validation/digest.rs
new file mode 100644
index 00000000..e6af0f61
--- /dev/null
+++ b/apps/elf-eval/src/bin/real_world_job_benchmark/quantitative/audit_manifest/artifacts/validation/digest.rs
@@ -0,0 +1,33 @@
+use crate::{
+	Path, QuantitativeAuditArtifact, Result, eyre,
+	quantitative::audit_manifest::artifacts::{digest, paths},
+};
+
+pub(super) fn validate_audit_artifact_digest(
+	path: &Path,
+	artifact: &QuantitativeAuditArtifact,
+) -> Result<()> {
+	let artifact_path =
+		paths::resolve_quantitative_audit_artifact_path(path, artifact.path.as_str());
+	let actual = digest::fixture_path_digest(artifact_path.as_path()).map_err(|err| {
+		eyre::eyre!(
+			"{} artifact {} could not be digested at {}: {err}",
+			path.display(),
+			artifact.role,
+			artifact_path.display()
+		)
+	})?;
+
+	if actual != artifact.sha256 {
+		return Err(eyre::eyre!(
+			"{} artifact {} sha256 mismatch for {}: manifest {}, actual {}.",
+			path.display(),
+			artifact.role,
+			artifact_path.display(),
+			artifact.sha256,
+			actual
+		));
+	}
+
+	Ok(())
+}
diff --git a/apps/elf-eval/src/bin/real_world_job_benchmark/quantitative/audit_manifest/artifacts/validation/fields.rs b/apps/elf-eval/src/bin/real_world_job_benchmark/quantitative/audit_manifest/artifacts/validation/fields.rs
new file mode 100644
index 00000000..af6c149c
--- /dev/null
+++ b/apps/elf-eval/src/bin/real_world_job_benchmark/quantitative/audit_manifest/artifacts/validation/fields.rs
@@ -0,0 +1,26 @@
+use crate::{Path, QuantitativeAuditArtifact, Result, eyre};
+
+pub(super) fn validate_audit_artifact_fields(
+	path: &Path,
+	artifact: &QuantitativeAuditArtifact,
+) -> Result<()> {
+	if artifact.role.trim().is_empty()
+		|| artifact.path.trim().is_empty()
+		|| artifact.sha256.trim().is_empty()
+	{
+		return Err(eyre::eyre!(
+			"{} has an incomplete quantitative audit artifact.",
+			path.display()
+		));
+	}
+	if artifact.sha256.len() != 64 || !artifact.sha256.chars().all(|ch| ch.is_ascii_hexdigit()) {
+		return Err(eyre::eyre!(
+			"{} artifact {} has invalid sha256 digest {}.",
+			path.display(),
+			artifact.role,
+			artifact.sha256
+		));
+	}
+
+	Ok(())
+}
diff --git a/apps/elf-eval/src/bin/real_world_job_benchmark/quantitative/audit_manifest/evidence.rs b/apps/elf-eval/src/bin/real_world_job_benchmark/quantitative/audit_manifest/evidence.rs
new file mode 100644
index 00000000..f9b2e0d4
--- /dev/null
+++ b/apps/elf-eval/src/bin/real_world_job_benchmark/quantitative/audit_manifest/evidence.rs
@@ -0,0 +1,31 @@
+use crate::{
+	Path, QuantitativeAuditManifest, Result, eyre, fs,
+	quantitative::audit_manifest::{
+		QuantitativeAuditContext, QuantitativeAuditEvidence, validation,
+	},
+};
+
+pub(super) fn quantitative_audit_evidence(
+	path: Option<&Path>,
+	context: QuantitativeAuditContext<'_>,
+) -> Result<QuantitativeAuditEvidence> {
+	let Some(path) = path else {
+		return Ok(QuantitativeAuditEvidence {
+			held_out: false,
+			leakage_audited: false,
+			audit_manifest_id: None,
+		});
+	};
+	let raw = fs::read_to_string(path)?;
+	let manifest = serde_json::from_str::<QuantitativeAuditManifest>(&raw).map_err(|err| {
+		eyre::eyre!("Failed to parse quantitative audit manifest {}: {err}", path.display())
+	})?;
+
+	validation::validate_quantitative_audit_manifest(&manifest, path, context)?;
+
+	Ok(QuantitativeAuditEvidence {
+		held_out: manifest.held_out,
+		leakage_audited: manifest.leakage_audited,
+		audit_manifest_id: Some(manifest.manifest_id),
+	})
+}
diff --git a/apps/elf-eval/src/bin/real_world_job_benchmark/quantitative/audit_manifest/export.rs b/apps/elf-eval/src/bin/real_world_job_benchmark/quantitative/audit_manifest/export.rs
new file mode 100644
index 00000000..6b23ccfa
--- /dev/null
+++ b/apps/elf-eval/src/bin/real_world_job_benchmark/quantitative/audit_manifest/export.rs
@@ -0,0 +1,36 @@
+mod claim_boundary;
+mod identity;
+mod manifest;
+
+use crate::{
+	ExportQuantitativeAuditManifestArgs, QuantitativeAuditManifest, RealWorldJob, Result,
+	quantitative::audit_manifest::{QuantitativeAuditContext, validation},
+};
+
+pub(crate) fn quantitative_audit_manifest_from_jobs(
+	jobs: &[RealWorldJob],
+	args: &ExportQuantitativeAuditManifestArgs,
+) -> Result<QuantitativeAuditManifest> {
+	let product = args.product.trim();
+	let adapter_id = args.adapter_id.trim();
+
+	identity::validate_audit_export_identity(product, adapter_id)?;
+
+	let manifest = manifest::quantitative_audit_manifest(jobs, args, product, adapter_id)?;
+
+	validation::validate_quantitative_audit_manifest(
+		&manifest,
+		args.fixtures.as_path(),
+		QuantitativeAuditContext {
+			run_id: args.run_id.as_str(),
+			corpus_id: manifest.corpus_id.as_str(),
+			product,
+			adapter_id,
+			source_jobs: jobs,
+			ranking_query_count: manifest.ranking_query_count,
+			explicit_qrel_query_count: manifest.explicit_qrel_query_count,
+		},
+	)?;
+
+	Ok(manifest)
+}
diff --git a/apps/elf-eval/src/bin/real_world_job_benchmark/quantitative/audit_manifest/export/claim_boundary.rs b/apps/elf-eval/src/bin/real_world_job_benchmark/quantitative/audit_manifest/export/claim_boundary.rs
new file mode 100644
index 00000000..3d572c61
--- /dev/null
+++ b/apps/elf-eval/src/bin/real_world_job_benchmark/quantitative/audit_manifest/export/claim_boundary.rs
@@ -0,0 +1,21 @@
+use crate::ExportQuantitativeAuditManifestArgs;
+
+pub(super) fn quantitative_audit_claim_boundary(
+	args: &ExportQuantitativeAuditManifestArgs,
+) -> String {
+	args.claim_boundary.clone().unwrap_or_else(|| {
+		if args.held_out || args.leakage_audited {
+			concat!(
+				"Audit manifest supplied by operator; runner validates run/corpus/product/",
+				"adapter/count/query-id/artifact bindings before opening row gates."
+			)
+			.to_string()
+		} else {
+			concat!(
+				"Diagnostic audit manifest binds the current product-runtime fixture set to ",
+				"query ids and counts, but it does not prove held-out or leakage-audited status."
+			)
+			.to_string()
+		}
+	})
+}
diff --git a/apps/elf-eval/src/bin/real_world_job_benchmark/quantitative/audit_manifest/export/identity.rs b/apps/elf-eval/src/bin/real_world_job_benchmark/quantitative/audit_manifest/export/identity.rs
new file mode 100644
index 00000000..872da0e6
--- /dev/null
+++ b/apps/elf-eval/src/bin/real_world_job_benchmark/quantitative/audit_manifest/export/identity.rs
@@ -0,0 +1,9 @@
+use crate::{Result, eyre};
+
+pub(super) fn validate_audit_export_identity(product: &str, adapter_id: &str) -> Result<()> {
+	if product.is_empty() || adapter_id.is_empty() {
+		return Err(eyre::eyre!("quantitative audit export requires product and adapter_id."));
+	}
+
+	Ok(())
+}
diff --git a/apps/elf-eval/src/bin/real_world_job_benchmark/quantitative/audit_manifest/export/manifest.rs b/apps/elf-eval/src/bin/real_world_job_benchmark/quantitative/audit_manifest/export/manifest.rs
new file mode 100644
index 00000000..dad5a99e
--- /dev/null
+++ b/apps/elf-eval/src/bin/real_world_job_benchmark/quantitative/audit_manifest/export/manifest.rs
@@ -0,0 +1,45 @@
+use crate::{
+	ExportQuantitativeAuditManifestArgs, QuantitativeAuditArtifact, QuantitativeAuditManifest,
+	RealWorldJob, Result,
+	quantitative::{
+		self, QUANTITATIVE_AUDIT_MANIFEST_SCHEMA,
+		audit_manifest::{artifacts, export::claim_boundary},
+		metrics,
+	},
+};
+
+pub(super) fn quantitative_audit_manifest(
+	jobs: &[RealWorldJob],
+	args: &ExportQuantitativeAuditManifestArgs,
+	product: &str,
+	adapter_id: &str,
+) -> Result<QuantitativeAuditManifest> {
+	let corpus_id = quantitative::quantitative_corpus_id(jobs);
+	let ranking_query_count = metrics::ranking_query_count(jobs);
+	let explicit_qrel_query_count = metrics::explicit_qrel_query_count(jobs);
+
+	Ok(QuantitativeAuditManifest {
+		schema: QUANTITATIVE_AUDIT_MANIFEST_SCHEMA.to_string(),
+		manifest_id: args
+			.manifest_id
+			.clone()
+			.unwrap_or_else(|| format!("{}-quantitative-audit-manifest", args.run_id)),
+		run_id: args.run_id.clone(),
+		corpus_id,
+		product: product.to_string(),
+		adapter_id: adapter_id.to_string(),
+		held_out: args.held_out,
+		leakage_audited: args.leakage_audited,
+		sample_size: jobs.len(),
+		ranking_query_count,
+		explicit_qrel_query_count,
+		query_ids: metrics::ranking_query_ids(jobs).into_iter().map(str::to_string).collect(),
+		controls: args.controls.clone(),
+		artifacts: vec![QuantitativeAuditArtifact {
+			role: "product_runtime_fixtures".to_string(),
+			path: artifacts::audit_artifact_display_path(args.fixtures.as_path()),
+			sha256: artifacts::fixture_path_digest(args.fixtures.as_path())?,
+		}],
+		claim_boundary: claim_boundary::quantitative_audit_claim_boundary(args),
+	})
+}
diff --git a/apps/elf-eval/src/bin/real_world_job_benchmark/quantitative/audit_manifest/validation.rs b/apps/elf-eval/src/bin/real_world_job_benchmark/quantitative/audit_manifest/validation.rs
new file mode 100644
index 00000000..5a37d191
--- /dev/null
+++ b/apps/elf-eval/src/bin/real_world_job_benchmark/quantitative/audit_manifest/validation.rs
@@ -0,0 +1,20 @@
+mod controls;
+mod identity;
+mod queries;
+
+use crate::{
+	Path, QuantitativeAuditManifest, Result,
+	quantitative::audit_manifest::{QuantitativeAuditContext, artifacts},
+};
+
+pub(super) fn validate_quantitative_audit_manifest(
+	manifest: &QuantitativeAuditManifest,
+	path: &Path,
+	context: QuantitativeAuditContext<'_>,
+) -> Result<()> {
+	identity::validate_quantitative_audit_identity(manifest, path, &context)?;
+	queries::validate_quantitative_audit_query_ids(manifest, path, context.source_jobs)?;
+	controls::validate_quantitative_audit_controls(manifest, path)?;
+
+	artifacts::validate_quantitative_audit_artifacts(manifest, path)
+}
diff --git a/apps/elf-eval/src/bin/real_world_job_benchmark/quantitative/audit_manifest/validation/controls.rs b/apps/elf-eval/src/bin/real_world_job_benchmark/quantitative/audit_manifest/validation/controls.rs
new file mode 100644
index 00000000..9b15c1ae
--- /dev/null
+++ b/apps/elf-eval/src/bin/real_world_job_benchmark/quantitative/audit_manifest/validation/controls.rs
@@ -0,0 +1,42 @@
+use crate::{
+	BTreeSet, Path, QuantitativeAuditManifest, Result, eyre,
+	quantitative::{
+		REQUIRED_CANDIDATE_LEAKAGE_AUDIT_CONTROL, REQUIRED_HELD_OUT_AUDIT_CONTROL,
+		REQUIRED_QREL_LEAKAGE_AUDIT_CONTROL,
+	},
+};
+
+pub(super) fn validate_quantitative_audit_controls(
+	manifest: &QuantitativeAuditManifest,
+	path: &Path,
+) -> Result<()> {
+	let controls = manifest.controls.iter().map(String::as_str).collect::<BTreeSet<_>>();
+
+	if manifest.held_out && !controls.contains(REQUIRED_HELD_OUT_AUDIT_CONTROL) {
+		return Err(eyre::eyre!(
+			"{} marks held_out=true without required control {}.",
+			path.display(),
+			REQUIRED_HELD_OUT_AUDIT_CONTROL
+		));
+	}
+	if manifest.leakage_audited
+		&& (!controls.contains(REQUIRED_QREL_LEAKAGE_AUDIT_CONTROL)
+			|| !controls.contains(REQUIRED_CANDIDATE_LEAKAGE_AUDIT_CONTROL))
+	{
+		return Err(eyre::eyre!(
+			"{} marks leakage_audited=true without required controls {} and {}.",
+			path.display(),
+			REQUIRED_QREL_LEAKAGE_AUDIT_CONTROL,
+			REQUIRED_CANDIDATE_LEAKAGE_AUDIT_CONTROL
+		));
+	}
+	if (manifest.held_out || manifest.leakage_audited) && manifest.claim_boundary.trim().is_empty()
+	{
+		return Err(eyre::eyre!(
+			"{} marks audit controls true but has an empty claim_boundary.",
+			path.display()
+		));
+	}
+
+	Ok(())
+}
diff --git a/apps/elf-eval/src/bin/real_world_job_benchmark/quantitative/audit_manifest/validation/identity.rs b/apps/elf-eval/src/bin/real_world_job_benchmark/quantitative/audit_manifest/validation/identity.rs
new file mode 100644
index 00000000..6444cdea
--- /dev/null
+++ b/apps/elf-eval/src/bin/real_world_job_benchmark/quantitative/audit_manifest/validation/identity.rs
@@ -0,0 +1,16 @@
+mod context;
+mod schema;
+
+use crate::{
+	Path, QuantitativeAuditManifest, Result, quantitative::audit_manifest::QuantitativeAuditContext,
+};
+
+pub(super) fn validate_quantitative_audit_identity(
+	manifest: &QuantitativeAuditManifest,
+	path: &Path,
+	context: &QuantitativeAuditContext<'_>,
+) -> Result<()> {
+	schema::validate_quantitative_audit_schema(manifest, path)?;
+
+	context::validate_quantitative_audit_context(manifest, path, context)
+}
diff --git a/apps/elf-eval/src/bin/real_world_job_benchmark/quantitative/audit_manifest/validation/identity/context.rs b/apps/elf-eval/src/bin/real_world_job_benchmark/quantitative/audit_manifest/validation/identity/context.rs
new file mode 100644
index 00000000..1d6be494
--- /dev/null
+++ b/apps/elf-eval/src/bin/real_world_job_benchmark/quantitative/audit_manifest/validation/identity/context.rs
@@ -0,0 +1,17 @@
+mod counts;
+mod fields;
+
+use crate::{
+	Path, QuantitativeAuditManifest, Result, quantitative::audit_manifest::QuantitativeAuditContext,
+};
+
+pub(super) fn validate_quantitative_audit_context(
+	manifest: &QuantitativeAuditManifest,
+	path: &Path,
+	context: &QuantitativeAuditContext<'_>,
+) -> Result<()> {
+	fields::validate_quantitative_audit_context_fields(manifest, path, context)?;
+	counts::validate_quantitative_audit_context_counts(manifest, path, context)?;
+
+	Ok(())
+}
diff --git a/apps/elf-eval/src/bin/real_world_job_benchmark/quantitative/audit_manifest/validation/identity/context/counts.rs b/apps/elf-eval/src/bin/real_world_job_benchmark/quantitative/audit_manifest/validation/identity/context/counts.rs
new file mode 100644
index 00000000..a9e61f1f
--- /dev/null
+++ b/apps/elf-eval/src/bin/real_world_job_benchmark/quantitative/audit_manifest/validation/identity/context/counts.rs
@@ -0,0 +1,37 @@
+use crate::{
+	Path, QuantitativeAuditManifest, Result, eyre,
+	quantitative::audit_manifest::QuantitativeAuditContext,
+};
+
+pub(super) fn validate_quantitative_audit_context_counts(
+	manifest: &QuantitativeAuditManifest,
+	path: &Path,
+	context: &QuantitativeAuditContext<'_>,
+) -> Result<()> {
+	if manifest.sample_size != context.source_jobs.len() {
+		return Err(eyre::eyre!(
+			"{} has sample_size {}, expected {}.",
+			path.display(),
+			manifest.sample_size,
+			context.source_jobs.len()
+		));
+	}
+	if manifest.ranking_query_count != context.ranking_query_count {
+		return Err(eyre::eyre!(
+			"{} has ranking_query_count {}, expected {}.",
+			path.display(),
+			manifest.ranking_query_count,
+			context.ranking_query_count
+		));
+	}
+	if manifest.explicit_qrel_query_count != context.explicit_qrel_query_count {
+		return Err(eyre::eyre!(
+			"{} has explicit_qrel_query_count {}, expected {}.",
+			path.display(),
+			manifest.explicit_qrel_query_count,
+			context.explicit_qrel_query_count
+		));
+	}
+
+	Ok(())
+}
diff --git a/apps/elf-eval/src/bin/real_world_job_benchmark/quantitative/audit_manifest/validation/identity/context/fields.rs b/apps/elf-eval/src/bin/real_world_job_benchmark/quantitative/audit_manifest/validation/identity/context/fields.rs
new file mode 100644
index 00000000..1b39ccad
--- /dev/null
+++ b/apps/elf-eval/src/bin/real_world_job_benchmark/quantitative/audit_manifest/validation/identity/context/fields.rs
@@ -0,0 +1,39 @@
+use crate::{
+	Path, QuantitativeAuditManifest, Result, eyre,
+	quantitative::audit_manifest::QuantitativeAuditContext,
+};
+
+pub(super) fn validate_quantitative_audit_context_fields(
+	manifest: &QuantitativeAuditManifest,
+	path: &Path,
+	context: &QuantitativeAuditContext<'_>,
+) -> Result<()> {
+	if manifest.run_id != context.run_id {
+		return Err(eyre::eyre!(
+			"{} has run_id {}, expected {}.",
+			path.display(),
+			manifest.run_id,
+			context.run_id
+		));
+	}
+	if manifest.corpus_id != context.corpus_id {
+		return Err(eyre::eyre!(
+			"{} has corpus_id {}, expected {}.",
+			path.display(),
+			manifest.corpus_id,
+			context.corpus_id
+		));
+	}
+	if manifest.product != context.product || manifest.adapter_id != context.adapter_id {
+		return Err(eyre::eyre!(
+			"{} has product {}:{} but current row is {}:{}.",
+			path.display(),
+			manifest.product,
+			manifest.adapter_id,
+			context.product,
+			context.adapter_id
+		));
+	}
+
+	Ok(())
+}
diff --git a/apps/elf-eval/src/bin/real_world_job_benchmark/quantitative/audit_manifest/validation/identity/schema.rs b/apps/elf-eval/src/bin/real_world_job_benchmark/quantitative/audit_manifest/validation/identity/schema.rs
new file mode 100644
index 00000000..f288eeba
--- /dev/null
+++ b/apps/elf-eval/src/bin/real_world_job_benchmark/quantitative/audit_manifest/validation/identity/schema.rs
@@ -0,0 +1,21 @@
+use crate::{
+	Path, QuantitativeAuditManifest, Result, eyre, quantitative::QUANTITATIVE_AUDIT_MANIFEST_SCHEMA,
+};
+
+pub(super) fn validate_quantitative_audit_schema(
+	manifest: &QuantitativeAuditManifest,
+	path: &Path,
+) -> Result<()> {
+	if manifest.schema != QUANTITATIVE_AUDIT_MANIFEST_SCHEMA {
+		return Err(eyre::eyre!(
+			"{} has schema {}, expected {QUANTITATIVE_AUDIT_MANIFEST_SCHEMA}.",
+			path.display(),
+			manifest.schema
+		));
+	}
+	if manifest.manifest_id.trim().is_empty() {
+		return Err(eyre::eyre!("{} has an empty manifest_id.", path.display()));
+	}
+
+	Ok(())
+}
diff --git a/apps/elf-eval/src/bin/real_world_job_benchmark/quantitative/audit_manifest/validation/queries.rs b/apps/elf-eval/src/bin/real_world_job_benchmark/quantitative/audit_manifest/validation/queries.rs
new file mode 100644
index 00000000..9910b436
--- /dev/null
+++ b/apps/elf-eval/src/bin/real_world_job_benchmark/quantitative/audit_manifest/validation/queries.rs
@@ -0,0 +1,29 @@
+use crate::{
+	BTreeSet, Path, QuantitativeAuditManifest, RealWorldJob, Result, eyre, quantitative::metrics,
+};
+
+pub(super) fn validate_quantitative_audit_query_ids(
+	manifest: &QuantitativeAuditManifest,
+	path: &Path,
+	source_jobs: &[RealWorldJob],
+) -> Result<()> {
+	let expected = metrics::ranking_query_ids(source_jobs);
+	let actual = manifest.query_ids.iter().map(String::as_str).collect::<BTreeSet<_>>();
+
+	if actual.len() != manifest.query_ids.len() {
+		return Err(eyre::eyre!("{} has duplicate quantitative audit query_ids.", path.display()));
+	}
+	if actual != expected {
+		let missing = expected.difference(&actual).copied().collect::<Vec<_>>();
+		let extra = actual.difference(&expected).copied().collect::<Vec<_>>();
+
+		return Err(eyre::eyre!(
+			"{} audit query_ids do not match current ranked-query set; missing: {:?}, extra: {:?}.",
+			path.display(),
+			missing,
+			extra
+		));
+	}
+
+	Ok(())
+}
diff --git a/apps/elf-eval/src/bin/real_world_job_benchmark/quantitative/metrics.rs b/apps/elf-eval/src/bin/real_world_job_benchmark/quantitative/metrics.rs
new file mode 100644
index 00000000..6ee91f58
--- /dev/null
+++ b/apps/elf-eval/src/bin/real_world_job_benchmark/quantitative/metrics.rs
@@ -0,0 +1,15 @@
+mod aggregate;
+mod per_query;
+mod ranking;
+
+pub(super) use self::{
+	aggregate::{
+		aggregate_confidence_intervals, aggregate_denominators, aggregate_metric_states,
+		aggregate_metrics,
+	},
+	per_query::quantitative_per_query_rows,
+	ranking::{
+		aggregate_qrel_source, explicit_qrel_query_count, ranked_candidate_source,
+		ranking_coverage_state, ranking_query_count, ranking_query_ids,
+	},
+};
diff --git a/apps/elf-eval/src/bin/real_world_job_benchmark/quantitative/metrics/aggregate.rs b/apps/elf-eval/src/bin/real_world_job_benchmark/quantitative/metrics/aggregate.rs
new file mode 100644
index 00000000..992201a6
--- /dev/null
+++ b/apps/elf-eval/src/bin/real_world_job_benchmark/quantitative/metrics/aggregate.rs
@@ -0,0 +1,32 @@
+mod confidence;
+mod denominators;
+mod metrics;
+mod names;
+mod states;
+
+use crate::{BTreeMap, QuantitativeConfidenceInterval, QuantitativePerQueryRow};
+
+pub(in crate::quantitative) fn aggregate_metrics(
+	rows: &[QuantitativePerQueryRow],
+) -> BTreeMap<String, Option<f64>> {
+	metrics::aggregate_metrics(rows)
+}
+
+pub(in crate::quantitative) fn aggregate_metric_states(
+	result_state: &str,
+	metric_comparable: bool,
+) -> BTreeMap<String, String> {
+	states::aggregate_metric_states(result_state, metric_comparable)
+}
+
+pub(in crate::quantitative) fn aggregate_denominators(
+	rows: &[QuantitativePerQueryRow],
+) -> BTreeMap<String, usize> {
+	denominators::aggregate_denominators(rows)
+}
+
+pub(in crate::quantitative) fn aggregate_confidence_intervals(
+	rows: &[QuantitativePerQueryRow],
+) -> BTreeMap<String, QuantitativeConfidenceInterval> {
+	confidence::aggregate_confidence_intervals(rows)
+}
diff --git a/apps/elf-eval/src/bin/real_world_job_benchmark/quantitative/metrics/aggregate/confidence.rs b/apps/elf-eval/src/bin/real_world_job_benchmark/quantitative/metrics/aggregate/confidence.rs
new file mode 100644
index 00000000..2a454bdc
--- /dev/null
+++ b/apps/elf-eval/src/bin/real_world_job_benchmark/quantitative/metrics/aggregate/confidence.rs
@@ -0,0 +1,24 @@
+mod rates;
+mod wilson;
+
+use crate::{BTreeMap, QuantitativeConfidenceInterval, QuantitativePerQueryRow};
+
+pub(super) fn aggregate_confidence_intervals(
+	rows: &[QuantitativePerQueryRow],
+) -> BTreeMap<String, QuantitativeConfidenceInterval> {
+	let mut confidence_intervals = BTreeMap::new();
+
+	for metric in rates::rate_metric_names() {
+		let (numerator, denominator) =
+			rates::aggregate_rate_numerator_denominator(rows, metric.as_str());
+
+		if denominator > 0 {
+			confidence_intervals.insert(
+				metric,
+				wilson::wilson_confidence_interval(numerator.min(denominator), denominator),
+			);
+		}
+	}
+
+	confidence_intervals
+}
diff --git a/apps/elf-eval/src/bin/real_world_job_benchmark/quantitative/metrics/aggregate/confidence/rates.rs b/apps/elf-eval/src/bin/real_world_job_benchmark/quantitative/metrics/aggregate/confidence/rates.rs
new file mode 100644
index 00000000..4cfb3b7f
--- /dev/null
+++ b/apps/elf-eval/src/bin/real_world_job_benchmark/quantitative/metrics/aggregate/confidence/rates.rs
@@ -0,0 +1,39 @@
+use crate::{QuantitativePerQueryRow, quantitative::QUANTITATIVE_K_VALUES};
+
+pub(super) fn rate_metric_names() -> Vec<String> {
+	let mut metrics = Vec::new();
+
+	for k in QUANTITATIVE_K_VALUES {
+		metrics.push(format!("recall_at_{k}"));
+		metrics.push(format!("precision_at_{k}"));
+		metrics.push(format!("success_at_{k}"));
+	}
+
+	metrics
+}
+
+pub(super) fn aggregate_rate_numerator_denominator(
+	rows: &[QuantitativePerQueryRow],
+	metric: &str,
+) -> (usize, usize) {
+	let mut numerator = 0;
+	let mut denominator = 0;
+
+	for row in rows {
+		let Some(value) = row.metrics.get(metric).and_then(|value| *value) else {
+			continue;
+		};
+		let Some(row_denominator) = row.denominators.get(metric).copied() else {
+			continue;
+		};
+
+		if row_denominator == 0 {
+			continue;
+		}
+
+		denominator += row_denominator;
+		numerator += (value * row_denominator as f64).round() as usize;
+	}
+
+	(numerator, denominator)
+}
diff --git a/apps/elf-eval/src/bin/real_world_job_benchmark/quantitative/metrics/aggregate/confidence/wilson.rs b/apps/elf-eval/src/bin/real_world_job_benchmark/quantitative/metrics/aggregate/confidence/wilson.rs
new file mode 100644
index 00000000..99c3029d
--- /dev/null
+++ b/apps/elf-eval/src/bin/real_world_job_benchmark/quantitative/metrics/aggregate/confidence/wilson.rs
@@ -0,0 +1,22 @@
+use crate::{QuantitativeConfidenceInterval, formatting, quantitative::WILSON_95_Z};
+
+pub(super) fn wilson_confidence_interval(
+	numerator: usize,
+	denominator: usize,
+) -> QuantitativeConfidenceInterval {
+	let n = denominator as f64;
+	let p = numerator as f64 / n;
+	let z2 = WILSON_95_Z * WILSON_95_Z;
+	let center = (p + z2 / (2.0 * n)) / (1.0 + z2 / n);
+	let half_width =
+		WILSON_95_Z * ((p * (1.0 - p) / n + z2 / (4.0 * n * n)).sqrt()) / (1.0 + z2 / n);
+
+	QuantitativeConfidenceInterval {
+		method: "wilson_score".to_string(),
+		confidence: 0.95,
+		lower: formatting::round3((center - half_width).clamp(0.0, 1.0)),
+		upper: formatting::round3((center + half_width).clamp(0.0, 1.0)),
+		numerator,
+		denominator,
+	}
+}
diff --git a/apps/elf-eval/src/bin/real_world_job_benchmark/quantitative/metrics/aggregate/denominators.rs b/apps/elf-eval/src/bin/real_world_job_benchmark/quantitative/metrics/aggregate/denominators.rs
new file mode 100644
index 00000000..3ddd044f
--- /dev/null
+++ b/apps/elf-eval/src/bin/real_world_job_benchmark/quantitative/metrics/aggregate/denominators.rs
@@ -0,0 +1,33 @@
+use crate::{BTreeMap, QuantitativePerQueryRow, quantitative::QUANTITATIVE_K_VALUES};
+
+pub(super) fn aggregate_denominators(rows: &[QuantitativePerQueryRow]) -> BTreeMap<String, usize> {
+	let mut denominators = BTreeMap::new();
+
+	for k in QUANTITATIVE_K_VALUES {
+		denominators.insert(
+			format!("recall_at_{k}"),
+			sum_per_query_denominator(rows, &format!("recall_at_{k}")),
+		);
+		denominators.insert(
+			format!("precision_at_{k}"),
+			sum_per_query_denominator(rows, &format!("precision_at_{k}")),
+		);
+		denominators.insert(
+			format!("success_at_{k}"),
+			sum_per_query_denominator(rows, &format!("success_at_{k}")),
+		);
+	}
+
+	denominators.insert("mrr".to_string(), sum_per_query_denominator(rows, "mrr"));
+	denominators.insert("ndcg_at_5".to_string(), sum_per_query_denominator(rows, "ndcg_at_5"));
+	denominators.insert(
+		"average_precision".to_string(),
+		sum_per_query_denominator(rows, "average_precision"),
+	);
+
+	denominators
+}
+
+fn sum_per_query_denominator(rows: &[QuantitativePerQueryRow], metric: &str) -> usize {
+	rows.iter().filter_map(|row| row.denominators.get(metric)).sum()
+}
diff --git a/apps/elf-eval/src/bin/real_world_job_benchmark/quantitative/metrics/aggregate/metrics.rs b/apps/elf-eval/src/bin/real_world_job_benchmark/quantitative/metrics/aggregate/metrics.rs
new file mode 100644
index 00000000..db17c0c1
--- /dev/null
+++ b/apps/elf-eval/src/bin/real_world_job_benchmark/quantitative/metrics/aggregate/metrics.rs
@@ -0,0 +1,27 @@
+use crate::{
+	BTreeMap, QuantitativePerQueryRow, formatting, quantitative::metrics::aggregate::names,
+};
+
+pub(super) fn aggregate_metrics(rows: &[QuantitativePerQueryRow]) -> BTreeMap<String, Option<f64>> {
+	let mut sums = BTreeMap::<String, (f64, usize)>::new();
+	let mut metrics = names::quantitative_metric_names()
+		.into_iter()
+		.map(|metric| (metric, None))
+		.collect::<BTreeMap<_, _>>();
+
+	for row in rows {
+		for (metric, value) in &row.metrics {
+			if let Some(value) = value {
+				let (sum, count) = sums.entry(metric.clone()).or_default();
+
+				*sum += *value;
+				*count += 1;
+			}
+		}
+	}
+	for (metric, (sum, count)) in sums {
+		metrics.insert(metric, (count > 0).then(|| formatting::round3(sum / count as f64)));
+	}
+
+	metrics
+}
diff --git a/apps/elf-eval/src/bin/real_world_job_benchmark/quantitative/metrics/aggregate/names.rs b/apps/elf-eval/src/bin/real_world_job_benchmark/quantitative/metrics/aggregate/names.rs
new file mode 100644
index 00000000..90055feb
--- /dev/null
+++ b/apps/elf-eval/src/bin/real_world_job_benchmark/quantitative/metrics/aggregate/names.rs
@@ -0,0 +1,16 @@
+use crate::quantitative::QUANTITATIVE_K_VALUES;
+
+pub(super) fn quantitative_metric_names() -> Vec<String> {
+	let mut metrics = Vec::new();
+
+	for k in QUANTITATIVE_K_VALUES {
+		metrics.push(format!("recall_at_{k}"));
+		metrics.push(format!("precision_at_{k}"));
+		metrics.push(format!("success_at_{k}"));
+	}
+	for metric in ["mrr", "ndcg_at_5", "average_precision"] {
+		metrics.push(metric.to_string());
+	}
+
+	metrics
+}
diff --git a/apps/elf-eval/src/bin/real_world_job_benchmark/quantitative/metrics/aggregate/states.rs b/apps/elf-eval/src/bin/real_world_job_benchmark/quantitative/metrics/aggregate/states.rs
new file mode 100644
index 00000000..c9f631bb
--- /dev/null
+++ b/apps/elf-eval/src/bin/real_world_job_benchmark/quantitative/metrics/aggregate/states.rs
@@ -0,0 +1,20 @@
+use crate::{BTreeMap, quantitative::QUANTITATIVE_K_VALUES};
+
+pub(super) fn aggregate_metric_states(
+	result_state: &str,
+	metric_comparable: bool,
+) -> BTreeMap<String, String> {
+	let state = if metric_comparable { result_state } else { "not_encoded" };
+	let mut states = BTreeMap::new();
+
+	for k in QUANTITATIVE_K_VALUES {
+		states.insert(format!("recall_at_{k}"), state.to_string());
+		states.insert(format!("precision_at_{k}"), state.to_string());
+		states.insert(format!("success_at_{k}"), state.to_string());
+	}
+	for metric in ["mrr", "ndcg_at_5", "average_precision"] {
+		states.insert(metric.to_string(), state.to_string());
+	}
+
+	states
+}
diff --git a/apps/elf-eval/src/bin/real_world_job_benchmark/quantitative/metrics/per_query.rs b/apps/elf-eval/src/bin/real_world_job_benchmark/quantitative/metrics/per_query.rs
new file mode 100644
index 00000000..1c1bf433
--- /dev/null
+++ b/apps/elf-eval/src/bin/real_world_job_benchmark/quantitative/metrics/per_query.rs
@@ -0,0 +1,21 @@
+mod evidence;
+mod query_metrics;
+mod row;
+
+use crate::{JobReport, QuantitativePerQueryRow, RealWorldJob};
+
+pub(in crate::quantitative) fn quantitative_per_query_rows(
+	source_jobs: &[RealWorldJob],
+	jobs: &[JobReport],
+	corpus_id: &str,
+	evidence_class: &str,
+	adapter_id: &str,
+) -> Vec<QuantitativePerQueryRow> {
+	source_jobs
+		.iter()
+		.zip(jobs.iter())
+		.map(|(source_job, job)| {
+			row::quantitative_per_query_row(source_job, job, corpus_id, evidence_class, adapter_id)
+		})
+		.collect()
+}
diff --git a/apps/elf-eval/src/bin/real_world_job_benchmark/quantitative/metrics/per_query/evidence.rs b/apps/elf-eval/src/bin/real_world_job_benchmark/quantitative/metrics/per_query/evidence.rs
new file mode 100644
index 00000000..1a13fac2
--- /dev/null
+++ b/apps/elf-eval/src/bin/real_world_job_benchmark/quantitative/metrics/per_query/evidence.rs
@@ -0,0 +1,29 @@
+use crate::{BTreeMap, JobReport, RealWorldJob};
+
+pub(super) fn relevance_grades(
+	source_job: &RealWorldJob,
+	job: &JobReport,
+) -> BTreeMap<String, f64> {
+	let explicit = source_job
+		.expected_answer
+		.relevance_judgments
+		.iter()
+		.map(|judgment| (judgment.evidence_id.clone(), judgment.grade))
+		.collect::<BTreeMap<_, _>>();
+
+	if !explicit.is_empty() {
+		return explicit;
+	}
+
+	job.expected_evidence.iter().map(|evidence| (evidence.evidence_id.clone(), 1.0)).collect()
+}
+
+pub(super) fn qrel_source(source_job: &RealWorldJob, empty: bool) -> &'static str {
+	if !source_job.expected_answer.relevance_judgments.is_empty() {
+		"explicit_qrels"
+	} else if empty {
+		"not_encoded"
+	} else {
+		"expected_evidence_fallback"
+	}
+}
diff --git a/apps/elf-eval/src/bin/real_world_job_benchmark/quantitative/metrics/per_query/query_metrics.rs b/apps/elf-eval/src/bin/real_world_job_benchmark/quantitative/metrics/per_query/query_metrics.rs
new file mode 100644
index 00000000..6685aa6e
--- /dev/null
+++ b/apps/elf-eval/src/bin/real_world_job_benchmark/quantitative/metrics/per_query/query_metrics.rs
@@ -0,0 +1,35 @@
+mod denominators;
+mod ranking;
+mod relevance;
+
+pub(super) use self::{denominators::per_query_denominators, relevance::positive_qrel_count};
+
+use crate::{BTreeMap, quantitative::QUANTITATIVE_K_VALUES};
+
+pub(super) fn per_query_metrics(
+	candidates: &[String],
+	relevance: &BTreeMap<String, f64>,
+) -> BTreeMap<String, Option<f64>> {
+	let mut metrics = BTreeMap::new();
+
+	for k in QUANTITATIVE_K_VALUES {
+		let relevant_at_k = relevance::relevant_at_k(candidates, relevance, *k);
+
+		metrics.insert(
+			format!("recall_at_{k}"),
+			relevance::rate(relevant_at_k, positive_qrel_count(relevance)),
+		);
+		metrics.insert(format!("precision_at_{k}"), relevance::rate(relevant_at_k, *k));
+		metrics.insert(
+			format!("success_at_{k}"),
+			Some(f64::from(relevant_at_k > 0 && positive_qrel_count(relevance) > 0)),
+		);
+	}
+
+	metrics.insert("mrr".to_string(), ranking::reciprocal_rank(candidates, relevance));
+	metrics.insert("ndcg_at_5".to_string(), ranking::ndcg_at_k(candidates, relevance, 5));
+	metrics
+		.insert("average_precision".to_string(), ranking::average_precision(candidates, relevance));
+
+	metrics
+}
diff --git a/apps/elf-eval/src/bin/real_world_job_benchmark/quantitative/metrics/per_query/query_metrics/denominators.rs b/apps/elf-eval/src/bin/real_world_job_benchmark/quantitative/metrics/per_query/query_metrics/denominators.rs
new file mode 100644
index 00000000..7ef22bc8
--- /dev/null
+++ b/apps/elf-eval/src/bin/real_world_job_benchmark/quantitative/metrics/per_query/query_metrics/denominators.rs
@@ -0,0 +1,21 @@
+use crate::{BTreeMap, quantitative::QUANTITATIVE_K_VALUES};
+
+pub(in crate::quantitative::metrics::per_query) fn per_query_denominators(
+	candidate_count: usize,
+	expected_relevant_count: usize,
+) -> BTreeMap<String, usize> {
+	let mut denominators = BTreeMap::new();
+
+	for k in QUANTITATIVE_K_VALUES {
+		denominators.insert(format!("recall_at_{k}"), expected_relevant_count);
+		denominators.insert(format!("precision_at_{k}"), *k);
+		denominators.insert(format!("success_at_{k}"), 1);
+	}
+
+	denominators.insert("mrr".to_string(), expected_relevant_count);
+	denominators.insert("ndcg_at_5".to_string(), expected_relevant_count.min(5));
+	denominators.insert("average_precision".to_string(), expected_relevant_count);
+	denominators.insert("candidate_count".to_string(), candidate_count);
+
+	denominators
+}
diff --git a/apps/elf-eval/src/bin/real_world_job_benchmark/quantitative/metrics/per_query/query_metrics/ranking.rs b/apps/elf-eval/src/bin/real_world_job_benchmark/quantitative/metrics/per_query/query_metrics/ranking.rs
new file mode 100644
index 00000000..e9d7dbf7
--- /dev/null
+++ b/apps/elf-eval/src/bin/real_world_job_benchmark/quantitative/metrics/per_query/query_metrics/ranking.rs
@@ -0,0 +1,27 @@
+mod average_precision;
+mod ndcg;
+mod reciprocal_rank;
+
+use crate::BTreeMap;
+
+pub(super) fn reciprocal_rank(
+	candidates: &[String],
+	relevance: &BTreeMap<String, f64>,
+) -> Option<f64> {
+	reciprocal_rank::reciprocal_rank(candidates, relevance)
+}
+
+pub(super) fn ndcg_at_k(
+	candidates: &[String],
+	relevance: &BTreeMap<String, f64>,
+	k: usize,
+) -> Option<f64> {
+	ndcg::ndcg_at_k(candidates, relevance, k)
+}
+
+pub(super) fn average_precision(
+	candidates: &[String],
+	relevance: &BTreeMap<String, f64>,
+) -> Option<f64> {
+	average_precision::average_precision(candidates, relevance)
+}
diff --git a/apps/elf-eval/src/bin/real_world_job_benchmark/quantitative/metrics/per_query/query_metrics/ranking/average_precision.rs b/apps/elf-eval/src/bin/real_world_job_benchmark/quantitative/metrics/per_query/query_metrics/ranking/average_precision.rs
new file mode 100644
index 00000000..13c196ca
--- /dev/null
+++ b/apps/elf-eval/src/bin/real_world_job_benchmark/quantitative/metrics/per_query/query_metrics/ranking/average_precision.rs
@@ -0,0 +1,28 @@
+use crate::{BTreeMap, BTreeSet, quantitative::metrics::per_query::query_metrics};
+
+pub(super) fn average_precision(
+	candidates: &[String],
+	relevance: &BTreeMap<String, f64>,
+) -> Option<f64> {
+	let positive_count = query_metrics::positive_qrel_count(relevance);
+
+	if positive_count == 0 {
+		return None;
+	}
+
+	let mut hit_count = 0;
+	let mut precision_sum = 0.0;
+	let mut seen = BTreeSet::new();
+
+	for (index, candidate) in candidates.iter().enumerate() {
+		if !seen.insert(candidate.as_str()) {
+			continue;
+		}
+		if relevance.get(candidate.as_str()).is_some_and(|grade| *grade > 0.0) {
+			hit_count += 1;
+			precision_sum += hit_count as f64 / (index + 1) as f64;
+		}
+	}
+
+	Some(precision_sum / positive_count as f64)
+}
diff --git a/apps/elf-eval/src/bin/real_world_job_benchmark/quantitative/metrics/per_query/query_metrics/ranking/ndcg.rs b/apps/elf-eval/src/bin/real_world_job_benchmark/quantitative/metrics/per_query/query_metrics/ranking/ndcg.rs
new file mode 100644
index 00000000..540d2f66
--- /dev/null
+++ b/apps/elf-eval/src/bin/real_world_job_benchmark/quantitative/metrics/per_query/query_metrics/ranking/ndcg.rs
@@ -0,0 +1,33 @@
+use crate::{BTreeMap, quantitative::metrics::per_query::query_metrics};
+
+pub(super) fn ndcg_at_k(
+	candidates: &[String],
+	relevance: &BTreeMap<String, f64>,
+	k: usize,
+) -> Option<f64> {
+	if query_metrics::positive_qrel_count(relevance) == 0 {
+		return None;
+	}
+
+	let dcg = candidates
+		.iter()
+		.take(k)
+		.enumerate()
+		.map(|(index, candidate)| {
+			relevance.get(candidate.as_str()).copied().unwrap_or(0.0).max(0.0)
+				/ ((index + 2) as f64).log2()
+		})
+		.sum::<f64>();
+	let mut ideal = relevance.values().copied().filter(|grade| *grade > 0.0).collect::<Vec<_>>();
+
+	ideal.sort_by(|left, right| right.total_cmp(left));
+
+	let idcg = ideal
+		.iter()
+		.take(k)
+		.enumerate()
+		.map(|(index, grade)| grade / ((index + 2) as f64).log2())
+		.sum::<f64>();
+
+	Some(if idcg > 0.0 { dcg / idcg } else { 0.0 })
+}
diff --git a/apps/elf-eval/src/bin/real_world_job_benchmark/quantitative/metrics/per_query/query_metrics/ranking/reciprocal_rank.rs b/apps/elf-eval/src/bin/real_world_job_benchmark/quantitative/metrics/per_query/query_metrics/ranking/reciprocal_rank.rs
new file mode 100644
index 00000000..99956367
--- /dev/null
+++ b/apps/elf-eval/src/bin/real_world_job_benchmark/quantitative/metrics/per_query/query_metrics/ranking/reciprocal_rank.rs
@@ -0,0 +1,19 @@
+use crate::{BTreeMap, quantitative::metrics::per_query::query_metrics};
+
+pub(super) fn reciprocal_rank(
+	candidates: &[String],
+	relevance: &BTreeMap<String, f64>,
+) -> Option<f64> {
+	if query_metrics::positive_qrel_count(relevance) == 0 {
+		return None;
+	}
+
+	Some(
+		candidates
+			.iter()
+			.position(|candidate| {
+				relevance.get(candidate.as_str()).is_some_and(|grade| *grade > 0.0)
+			})
+			.map_or(0.0, |index| 1.0 / (index + 1) as f64),
+	)
+}
diff --git a/apps/elf-eval/src/bin/real_world_job_benchmark/quantitative/metrics/per_query/query_metrics/relevance.rs b/apps/elf-eval/src/bin/real_world_job_benchmark/quantitative/metrics/per_query/query_metrics/relevance.rs
new file mode 100644
index 00000000..a3644eb1
--- /dev/null
+++ b/apps/elf-eval/src/bin/real_world_job_benchmark/quantitative/metrics/per_query/query_metrics/relevance.rs
@@ -0,0 +1,23 @@
+use crate::{BTreeMap, formatting};
+
+pub(in crate::quantitative::metrics::per_query) fn positive_qrel_count(
+	relevance: &BTreeMap<String, f64>,
+) -> usize {
+	relevance.values().filter(|grade| **grade > 0.0).count()
+}
+
+pub(super) fn relevant_at_k(
+	candidates: &[String],
+	relevance: &BTreeMap<String, f64>,
+	k: usize,
+) -> usize {
+	candidates
+		.iter()
+		.take(k)
+		.filter(|candidate| relevance.get(candidate.as_str()).is_some_and(|grade| *grade > 0.0))
+		.count()
+}
+
+pub(super) fn rate(numerator: usize, denominator: usize) -> Option<f64> {
+	(denominator > 0).then(|| formatting::round3(numerator as f64 / denominator as f64))
+}
diff --git a/apps/elf-eval/src/bin/real_world_job_benchmark/quantitative/metrics/per_query/row.rs b/apps/elf-eval/src/bin/real_world_job_benchmark/quantitative/metrics/per_query/row.rs
new file mode 100644
index 00000000..7378fd72
--- /dev/null
+++ b/apps/elf-eval/src/bin/real_world_job_benchmark/quantitative/metrics/per_query/row.rs
@@ -0,0 +1,34 @@
+mod basis;
+
+use crate::{
+	JobReport, QuantitativePerQueryRow, RealWorldJob, formatting,
+	quantitative::QUANTITATIVE_ROW_CLAIM_BOUNDARY,
+};
+
+pub(super) fn quantitative_per_query_row(
+	source_job: &RealWorldJob,
+	job: &JobReport,
+	corpus_id: &str,
+	evidence_class: &str,
+	adapter_id: &str,
+) -> QuantitativePerQueryRow {
+	let basis = basis::quantitative_per_query_row_basis(source_job, job);
+
+	QuantitativePerQueryRow {
+		job_id: job.job_id.clone(),
+		suite: job.suite_id.clone(),
+		evidence_class: evidence_class.to_string(),
+		source_manifest_corpus_id: Some(corpus_id.to_string()),
+		result_state: formatting::status_str(job.status).to_string(),
+		expected_relevant_count: basis.positive_relevance_count,
+		candidate_count: basis.candidate_count,
+		qrel_source: basis.qrel_source,
+		relevance_grade_sum: basis.relevance_grade_sum,
+		product: "ELF".to_string(),
+		adapter_id: adapter_id.to_string(),
+		metrics: basis.metrics,
+		metric_states: basis.metric_states,
+		denominators: basis.denominators,
+		claim_boundary: QUANTITATIVE_ROW_CLAIM_BOUNDARY.to_string(),
+	}
+}
diff --git a/apps/elf-eval/src/bin/real_world_job_benchmark/quantitative/metrics/per_query/row/basis.rs b/apps/elf-eval/src/bin/real_world_job_benchmark/quantitative/metrics/per_query/row/basis.rs
new file mode 100644
index 00000000..42ed6323
--- /dev/null
+++ b/apps/elf-eval/src/bin/real_world_job_benchmark/quantitative/metrics/per_query/row/basis.rs
@@ -0,0 +1,47 @@
+mod states;
+
+use crate::{
+	BTreeMap, JobReport, RealWorldJob, formatting,
+	quantitative::metrics::per_query::{evidence, query_metrics},
+	scoring,
+};
+
+pub(super) struct QuantitativePerQueryRowBasis {
+	pub(super) positive_relevance_count: usize,
+	pub(super) candidate_count: usize,
+	pub(super) qrel_source: String,
+	pub(super) relevance_grade_sum: f64,
+	pub(super) metrics: BTreeMap<String, Option<f64>>,
+	pub(super) metric_states: BTreeMap<String, String>,
+	pub(super) denominators: BTreeMap<String, usize>,
+}
+
+pub(super) fn quantitative_per_query_row_basis(
+	source_job: &RealWorldJob,
+	job: &JobReport,
+) -> QuantitativePerQueryRowBasis {
+	let relevance = evidence::relevance_grades(source_job, job);
+	let candidates = scoring::produced_evidence_order(source_job);
+	let positive_relevance_count = query_metrics::positive_qrel_count(&relevance);
+	let metrics = query_metrics::per_query_metrics(candidates.as_slice(), &relevance);
+	let candidate_count = candidates.len();
+	let metric_states = states::per_query_metric_states(
+		metrics.keys(),
+		positive_relevance_count,
+		candidate_count,
+		formatting::status_str(job.status),
+	);
+
+	QuantitativePerQueryRowBasis {
+		positive_relevance_count,
+		candidate_count,
+		qrel_source: evidence::qrel_source(source_job, relevance.is_empty()).to_string(),
+		relevance_grade_sum: formatting::round3(relevance.values().sum::<f64>()),
+		metrics,
+		metric_states,
+		denominators: query_metrics::per_query_denominators(
+			candidate_count,
+			positive_relevance_count,
+		),
+	}
+}
diff --git a/apps/elf-eval/src/bin/real_world_job_benchmark/quantitative/metrics/per_query/row/basis/states.rs b/apps/elf-eval/src/bin/real_world_job_benchmark/quantitative/metrics/per_query/row/basis/states.rs
new file mode 100644
index 00000000..7c987253
--- /dev/null
+++ b/apps/elf-eval/src/bin/real_world_job_benchmark/quantitative/metrics/per_query/row/basis/states.rs
@@ -0,0 +1,16 @@
+use crate::BTreeMap;
+
+pub(super) fn per_query_metric_states<'a>(
+	metric_names: impl Iterator<Item = &'a String>,
+	positive_relevance_count: usize,
+	candidate_count: usize,
+	result_state: &str,
+) -> BTreeMap<String, String> {
+	let metric_state = if positive_relevance_count == 0 || candidate_count == 0 {
+		"not_encoded"
+	} else {
+		result_state
+	};
+
+	metric_names.map(|key| (key.clone(), metric_state.to_string())).collect()
+}
diff --git a/apps/elf-eval/src/bin/real_world_job_benchmark/quantitative/metrics/ranking.rs b/apps/elf-eval/src/bin/real_world_job_benchmark/quantitative/metrics/ranking.rs
new file mode 100644
index 00000000..6805ca30
--- /dev/null
+++ b/apps/elf-eval/src/bin/real_world_job_benchmark/quantitative/metrics/ranking.rs
@@ -0,0 +1,10 @@
+mod counts;
+mod coverage;
+mod qrels;
+mod queries;
+
+pub(in crate::quantitative) use self::{
+	counts::{explicit_qrel_query_count, ranking_query_count, ranking_query_ids},
+	coverage::{ranked_candidate_source, ranking_coverage_state},
+	qrels::aggregate_qrel_source,
+};
diff --git a/apps/elf-eval/src/bin/real_world_job_benchmark/quantitative/metrics/ranking/counts.rs b/apps/elf-eval/src/bin/real_world_job_benchmark/quantitative/metrics/ranking/counts.rs
new file mode 100644
index 00000000..c8dd4408
--- /dev/null
+++ b/apps/elf-eval/src/bin/real_world_job_benchmark/quantitative/metrics/ranking/counts.rs
@@ -0,0 +1,17 @@
+use crate::{BTreeSet, RealWorldJob, quantitative::metrics::ranking::queries};
+
+pub(in crate::quantitative) fn ranking_query_ids(source_jobs: &[RealWorldJob]) -> BTreeSet<&str> {
+	source_jobs
+		.iter()
+		.filter(|job| queries::is_ranking_query(job))
+		.map(|job| job.job_id.as_str())
+		.collect()
+}
+
+pub(in crate::quantitative) fn ranking_query_count(source_jobs: &[RealWorldJob]) -> usize {
+	ranking_query_ids(source_jobs).len()
+}
+
+pub(in crate::quantitative) fn explicit_qrel_query_count(source_jobs: &[RealWorldJob]) -> usize {
+	source_jobs.iter().filter(|job| !job.expected_answer.relevance_judgments.is_empty()).count()
+}
diff --git a/apps/elf-eval/src/bin/real_world_job_benchmark/quantitative/metrics/ranking/coverage.rs b/apps/elf-eval/src/bin/real_world_job_benchmark/quantitative/metrics/ranking/coverage.rs
new file mode 100644
index 00000000..eb419d40
--- /dev/null
+++ b/apps/elf-eval/src/bin/real_world_job_benchmark/quantitative/metrics/ranking/coverage.rs
@@ -0,0 +1,19 @@
+use crate::ReportSummary;
+
+pub(in crate::quantitative) fn ranking_coverage_state(
+	summary: &ReportSummary,
+	source_job_count: usize,
+	ranking_query_count: usize,
+) -> &'static str {
+	if ranking_query_count == 0 {
+		"not_encoded"
+	} else if ranking_query_count == source_job_count && summary.not_encoded == 0 {
+		"complete"
+	} else {
+		"partial_coverage"
+	}
+}
+
+pub(in crate::quantitative) fn ranked_candidate_source(ranking_query_count: usize) -> &'static str {
+	if ranking_query_count == 0 { "not_encoded" } else { "produced_evidence_order" }
+}
diff --git a/apps/elf-eval/src/bin/real_world_job_benchmark/quantitative/metrics/ranking/qrels.rs b/apps/elf-eval/src/bin/real_world_job_benchmark/quantitative/metrics/ranking/qrels.rs
new file mode 100644
index 00000000..9b5c3daa
--- /dev/null
+++ b/apps/elf-eval/src/bin/real_world_job_benchmark/quantitative/metrics/ranking/qrels.rs
@@ -0,0 +1,14 @@
+pub(in crate::quantitative) fn aggregate_qrel_source(
+	ranking_query_count: usize,
+	explicit_qrel_query_count: usize,
+) -> &'static str {
+	if ranking_query_count == 0 {
+		"not_encoded"
+	} else if explicit_qrel_query_count == ranking_query_count {
+		"explicit_qrels"
+	} else if explicit_qrel_query_count == 0 {
+		"expected_evidence_fallback"
+	} else {
+		"mixed"
+	}
+}
diff --git a/apps/elf-eval/src/bin/real_world_job_benchmark/quantitative/metrics/ranking/queries.rs b/apps/elf-eval/src/bin/real_world_job_benchmark/quantitative/metrics/ranking/queries.rs
new file mode 100644
index 00000000..8ada5678
--- /dev/null
+++ b/apps/elf-eval/src/bin/real_world_job_benchmark/quantitative/metrics/ranking/queries.rs
@@ -0,0 +1,38 @@
+use crate::{BTreeMap, RealWorldJob, scoring};
+
+pub(super) fn is_ranking_query(job: &RealWorldJob) -> bool {
+	!ranking_relevance_grades(job).is_empty() && ranking_query_attempted(job)
+}
+
+fn ranking_relevance_grades(source_job: &RealWorldJob) -> BTreeMap<String, f64> {
+	if !source_job.expected_answer.relevance_judgments.is_empty() {
+		return source_job
+			.expected_answer
+			.relevance_judgments
+			.iter()
+			.filter(|judgment| judgment.grade > 0.0)
+			.map(|judgment| (judgment.evidence_id.clone(), judgment.grade))
+			.collect();
+	}
+
+	source_job
+		.required_evidence
+		.iter()
+		.filter(|evidence| matches!(evidence.requirement.as_str(), "cite" | "use" | "explain"))
+		.map(|evidence| (evidence.evidence_id.clone(), 1.0))
+		.collect()
+}
+
+fn ranking_query_attempted(job: &RealWorldJob) -> bool {
+	if !scoring::produced_evidence_order(job).is_empty() {
+		return true;
+	}
+
+	let Some(answer) = job.corpus.adapter_response.as_ref().map(|response| &response.answer) else {
+		return false;
+	};
+
+	answer.trace_explainability.as_ref().is_some_and(|trace| {
+		trace.stages.iter().any(|stage| stage.stage_name == "live_adapter.retrieve")
+	}) && answer.latency_ms.is_some_and(|latency| latency.is_finite() && latency > 0.0)
+}
diff --git a/apps/elf-eval/src/bin/real_world_job_benchmark/quantitative/product_manifest.rs b/apps/elf-eval/src/bin/real_world_job_benchmark/quantitative/product_manifest.rs
new file mode 100644
index 00000000..4cd8b6c0
--- /dev/null
+++ b/apps/elf-eval/src/bin/real_world_job_benchmark/quantitative/product_manifest.rs
@@ -0,0 +1,14 @@
+mod export;
+mod import;
+mod validation;
+
+pub(crate) use self::export::quantitative_product_manifest_from_report;
+
+use crate::{Path, QuantitativeProductManifest, Result};
+
+pub(super) fn quantitative_product_manifest(
+	path: Option<&Path>,
+	corpus_id: &str,
+) -> Result<QuantitativeProductManifest> {
+	import::quantitative_product_manifest(path, corpus_id)
+}
diff --git a/apps/elf-eval/src/bin/real_world_job_benchmark/quantitative/product_manifest/export.rs b/apps/elf-eval/src/bin/real_world_job_benchmark/quantitative/product_manifest/export.rs
new file mode 100644
index 00000000..d72509f8
--- /dev/null
+++ b/apps/elf-eval/src/bin/real_world_job_benchmark/quantitative/product_manifest/export.rs
@@ -0,0 +1,32 @@
+mod identity;
+mod manifest;
+mod rows;
+mod source;
+
+use crate::{
+	ExportQuantitativeProductManifestArgs, QuantitativeProductManifest, REPORT_SCHEMA,
+	RealWorldReport, Result, eyre, quantitative::product_manifest::validation,
+};
+
+pub(crate) fn quantitative_product_manifest_from_report(
+	report: &RealWorldReport,
+	args: &ExportQuantitativeProductManifestArgs,
+) -> Result<QuantitativeProductManifest> {
+	if report.schema != REPORT_SCHEMA {
+		return Err(eyre::eyre!(
+			"{} has schema {}, expected {REPORT_SCHEMA}.",
+			args.report.display(),
+			report.schema
+		));
+	}
+
+	let manifest = manifest::quantitative_product_manifest(report, args)?;
+
+	validation::validate_quantitative_product_manifest(
+		&manifest,
+		&args.report,
+		manifest.corpus_id.as_str(),
+	)?;
+
+	Ok(manifest)
+}
diff --git a/apps/elf-eval/src/bin/real_world_job_benchmark/quantitative/product_manifest/export/identity.rs b/apps/elf-eval/src/bin/real_world_job_benchmark/quantitative/product_manifest/export/identity.rs
new file mode 100644
index 00000000..4f1f6453
--- /dev/null
+++ b/apps/elf-eval/src/bin/real_world_job_benchmark/quantitative/product_manifest/export/identity.rs
@@ -0,0 +1,23 @@
+use crate::{ExportQuantitativeProductManifestArgs, Result, eyre};
+
+pub(super) fn validate_export_identity(
+	args: &ExportQuantitativeProductManifestArgs,
+	product: &str,
+	adapter_id: &str,
+	adapter_name: &str,
+) -> Result<()> {
+	if product.is_empty() || adapter_id.is_empty() || adapter_name.is_empty() {
+		return Err(eyre::eyre!(
+			"{} cannot export an incomplete quantitative product identity.",
+			args.report.display()
+		));
+	}
+	if product == "ELF" {
+		return Err(eyre::eyre!(
+			"{} exports product ELF; use --product for external product manifest exports.",
+			args.report.display()
+		));
+	}
+
+	Ok(())
+}
diff --git a/apps/elf-eval/src/bin/real_world_job_benchmark/quantitative/product_manifest/export/manifest.rs b/apps/elf-eval/src/bin/real_world_job_benchmark/quantitative/product_manifest/export/manifest.rs
new file mode 100644
index 00000000..592cb19f
--- /dev/null
+++ b/apps/elf-eval/src/bin/real_world_job_benchmark/quantitative/product_manifest/export/manifest.rs
@@ -0,0 +1,46 @@
+use crate::{
+	ExportQuantitativeProductManifestArgs, QuantitativeProductManifest, RealWorldReport, Result,
+	quantitative::{
+		QUANTITATIVE_PRODUCT_MANIFEST_SCHEMA,
+		product_manifest::export::{identity, rows, source},
+	},
+};
+
+pub(super) fn quantitative_product_manifest(
+	report: &RealWorldReport,
+	args: &ExportQuantitativeProductManifestArgs,
+) -> Result<QuantitativeProductManifest> {
+	let source = source::product_export_identity(report, args)?;
+
+	identity::validate_export_identity(
+		args,
+		source.product,
+		source.adapter_id,
+		source.adapter_name,
+	)?;
+
+	let row = rows::exported_product_row(
+		source.row,
+		source.product,
+		source.adapter_id,
+		source.adapter_name,
+	);
+	let per_query_rows = rows::exported_per_query_rows(
+		report,
+		source.source_product,
+		source.source_adapter_id,
+		source.product,
+		source.adapter_id,
+	);
+
+	Ok(QuantitativeProductManifest {
+		schema: QUANTITATIVE_PRODUCT_MANIFEST_SCHEMA.to_string(),
+		manifest_id: args
+			.manifest_id
+			.clone()
+			.unwrap_or_else(|| format!("{}-quantitative-product-manifest", report.run_id)),
+		corpus_id: report.quantitative_scoreboard.corpus_id.clone(),
+		rows: vec![row],
+		per_query_rows,
+	})
+}
diff --git a/apps/elf-eval/src/bin/real_world_job_benchmark/quantitative/product_manifest/export/rows.rs b/apps/elf-eval/src/bin/real_world_job_benchmark/quantitative/product_manifest/export/rows.rs
new file mode 100644
index 00000000..e29f4f74
--- /dev/null
+++ b/apps/elf-eval/src/bin/real_world_job_benchmark/quantitative/product_manifest/export/rows.rs
@@ -0,0 +1,4 @@
+mod per_query;
+mod product;
+
+pub(super) use self::{per_query::exported_per_query_rows, product::exported_product_row};
diff --git a/apps/elf-eval/src/bin/real_world_job_benchmark/quantitative/product_manifest/export/rows/per_query.rs b/apps/elf-eval/src/bin/real_world_job_benchmark/quantitative/product_manifest/export/rows/per_query.rs
new file mode 100644
index 00000000..fcc61d9e
--- /dev/null
+++ b/apps/elf-eval/src/bin/real_world_job_benchmark/quantitative/product_manifest/export/rows/per_query.rs
@@ -0,0 +1,35 @@
+use crate::{QuantitativePerQueryRow, RealWorldReport};
+
+pub(in crate::quantitative::product_manifest::export) fn exported_per_query_rows(
+	report: &RealWorldReport,
+	source_product: &str,
+	source_adapter_id: &str,
+	product: &str,
+	adapter_id: &str,
+) -> Vec<QuantitativePerQueryRow> {
+	report
+		.quantitative_scoreboard
+		.per_query_rows
+		.iter()
+		.filter(|row| row.product == source_product && row.adapter_id == source_adapter_id)
+		.map(|row| exported_per_query_row(row, product, adapter_id))
+		.collect()
+}
+
+fn exported_per_query_row(
+	source_row: &QuantitativePerQueryRow,
+	product: &str,
+	adapter_id: &str,
+) -> QuantitativePerQueryRow {
+	let mut row = source_row.clone();
+
+	row.product = product.to_string();
+	row.adapter_id = adapter_id.to_string();
+	row.claim_boundary = concat!(
+		"Exported from generated report per-query quantitative evidence; ",
+		"import does not relax paired-significance or leaderboard gates."
+	)
+	.to_string();
+
+	row
+}
diff --git a/apps/elf-eval/src/bin/real_world_job_benchmark/quantitative/product_manifest/export/rows/product.rs b/apps/elf-eval/src/bin/real_world_job_benchmark/quantitative/product_manifest/export/rows/product.rs
new file mode 100644
index 00000000..2551c2ff
--- /dev/null
+++ b/apps/elf-eval/src/bin/real_world_job_benchmark/quantitative/product_manifest/export/rows/product.rs
@@ -0,0 +1,21 @@
+use crate::QuantitativeBenchmarkRow;
+
+pub(in crate::quantitative::product_manifest::export) fn exported_product_row(
+	source_row: &QuantitativeBenchmarkRow,
+	product: &str,
+	adapter_id: &str,
+	adapter_name: &str,
+) -> QuantitativeBenchmarkRow {
+	let mut row = source_row.clone();
+
+	row.product = product.to_string();
+	row.adapter_id = adapter_id.to_string();
+	row.adapter_name = adapter_name.to_string();
+	row.claim_boundary = concat!(
+		"Exported from a generated real_world_job_report quantitative row; ",
+		"import remains subject to same-corpus, per-query, explicit-qrel, and leaderboard gates."
+	)
+	.to_string();
+
+	row
+}
diff --git a/apps/elf-eval/src/bin/real_world_job_benchmark/quantitative/product_manifest/export/source.rs b/apps/elf-eval/src/bin/real_world_job_benchmark/quantitative/product_manifest/export/source.rs
new file mode 100644
index 00000000..6a3b7ed9
--- /dev/null
+++ b/apps/elf-eval/src/bin/real_world_job_benchmark/quantitative/product_manifest/export/source.rs
@@ -0,0 +1,37 @@
+use crate::{
+	ExportQuantitativeProductManifestArgs, QuantitativeBenchmarkRow, RealWorldReport, Result, eyre,
+};
+
+pub(super) struct ProductExportIdentity<'report> {
+	pub(super) row: &'report QuantitativeBenchmarkRow,
+	pub(super) source_product: &'report str,
+	pub(super) source_adapter_id: &'report str,
+	pub(super) product: &'report str,
+	pub(super) adapter_id: &'report str,
+	pub(super) adapter_name: &'report str,
+}
+
+pub(super) fn product_export_identity<'report>(
+	report: &'report RealWorldReport,
+	args: &'report ExportQuantitativeProductManifestArgs,
+) -> Result<ProductExportIdentity<'report>> {
+	let source_row =
+		report.quantitative_scoreboard.rows.first().ok_or_else(|| {
+			eyre::eyre!("{} has no quantitative product row.", args.report.display())
+		})?;
+	let source_product = source_row.product.as_str();
+	let source_adapter_id = source_row.adapter_id.as_str();
+	let product = args.product.as_deref().unwrap_or(source_product).trim();
+	let adapter_id = args.adapter_id.as_deref().unwrap_or(source_adapter_id).trim();
+	let adapter_name =
+		args.adapter_name.as_deref().unwrap_or(source_row.adapter_name.as_str()).trim();
+
+	Ok(ProductExportIdentity {
+		row: source_row,
+		source_product,
+		source_adapter_id,
+		product,
+		adapter_id,
+		adapter_name,
+	})
+}
diff --git a/apps/elf-eval/src/bin/real_world_job_benchmark/quantitative/product_manifest/import.rs b/apps/elf-eval/src/bin/real_world_job_benchmark/quantitative/product_manifest/import.rs
new file mode 100644
index 00000000..12df9a92
--- /dev/null
+++ b/apps/elf-eval/src/bin/real_world_job_benchmark/quantitative/product_manifest/import.rs
@@ -0,0 +1,32 @@
+use crate::{
+	Path, QuantitativeProductManifest, Result, eyre, fs, quantitative::product_manifest::validation,
+};
+
+pub(super) fn quantitative_product_manifest(
+	path: Option<&Path>,
+	corpus_id: &str,
+) -> Result<QuantitativeProductManifest> {
+	let Some(path) = path else {
+		return Ok(QuantitativeProductManifest::default());
+	};
+	let raw = fs::read_to_string(path)?;
+	let mut manifest =
+		serde_json::from_str::<QuantitativeProductManifest>(&raw).map_err(|err| {
+			eyre::eyre!("Failed to parse quantitative product manifest {}: {err}", path.display())
+		})?;
+
+	populate_source_manifest_corpus_ids(&mut manifest);
+
+	validation::validate_quantitative_product_manifest(&manifest, path, corpus_id)?;
+
+	Ok(manifest)
+}
+
+fn populate_source_manifest_corpus_ids(manifest: &mut QuantitativeProductManifest) {
+	for row in &mut manifest.rows {
+		row.source_manifest_corpus_id.get_or_insert_with(|| manifest.corpus_id.clone());
+	}
+	for row in &mut manifest.per_query_rows {
+		row.source_manifest_corpus_id.get_or_insert_with(|| manifest.corpus_id.clone());
+	}
+}
diff --git a/apps/elf-eval/src/bin/real_world_job_benchmark/quantitative/product_manifest/validation.rs b/apps/elf-eval/src/bin/real_world_job_benchmark/quantitative/product_manifest/validation.rs
new file mode 100644
index 00000000..fe86d636
--- /dev/null
+++ b/apps/elf-eval/src/bin/real_world_job_benchmark/quantitative/product_manifest/validation.rs
@@ -0,0 +1,40 @@
+mod rows;
+
+use crate::{
+	Path, QuantitativeProductManifest, Result, eyre,
+	quantitative::QUANTITATIVE_PRODUCT_MANIFEST_SCHEMA,
+};
+
+pub(super) fn validate_quantitative_product_manifest(
+	manifest: &QuantitativeProductManifest,
+	path: &Path,
+	corpus_id: &str,
+) -> Result<()> {
+	if manifest.schema != QUANTITATIVE_PRODUCT_MANIFEST_SCHEMA {
+		return Err(eyre::eyre!(
+			"{} has schema {}, expected {QUANTITATIVE_PRODUCT_MANIFEST_SCHEMA}.",
+			path.display(),
+			manifest.schema
+		));
+	}
+	if manifest.manifest_id.trim().is_empty() {
+		return Err(eyre::eyre!("{} has an empty manifest_id.", path.display()));
+	}
+	if manifest.corpus_id != corpus_id {
+		return Err(eyre::eyre!(
+			"{} has corpus_id {}, expected same-corpus {}.",
+			path.display(),
+			manifest.corpus_id,
+			corpus_id
+		));
+	}
+	if manifest.rows.is_empty() {
+		return Err(eyre::eyre!("{} declares no quantitative product rows.", path.display()));
+	}
+
+	rows::validate_quantitative_product_rows(manifest, path, corpus_id)?;
+	rows::validate_quantitative_per_query_rows(manifest, path, corpus_id)?;
+	rows::validate_ranked_row_evidence(manifest, path)?;
+
+	Ok(())
+}
diff --git a/apps/elf-eval/src/bin/real_world_job_benchmark/quantitative/product_manifest/validation/rows.rs b/apps/elf-eval/src/bin/real_world_job_benchmark/quantitative/product_manifest/validation/rows.rs
new file mode 100644
index 00000000..36009dfa
--- /dev/null
+++ b/apps/elf-eval/src/bin/real_world_job_benchmark/quantitative/product_manifest/validation/rows.rs
@@ -0,0 +1,28 @@
+mod per_query;
+mod product;
+mod ranking;
+
+use crate::{Path, QuantitativeProductManifest, Result};
+
+pub(super) fn validate_quantitative_product_rows(
+	manifest: &QuantitativeProductManifest,
+	path: &Path,
+	corpus_id: &str,
+) -> Result<()> {
+	product::validate_quantitative_product_rows(manifest, path, corpus_id)
+}
+
+pub(super) fn validate_quantitative_per_query_rows(
+	manifest: &QuantitativeProductManifest,
+	path: &Path,
+	corpus_id: &str,
+) -> Result<()> {
+	per_query::validate_quantitative_per_query_rows(manifest, path, corpus_id)
+}
+
+pub(super) fn validate_ranked_row_evidence(
+	manifest: &QuantitativeProductManifest,
+	path: &Path,
+) -> Result<()> {
+	ranking::validate_ranked_row_evidence(manifest, path)
+}
diff --git a/apps/elf-eval/src/bin/real_world_job_benchmark/quantitative/product_manifest/validation/rows/per_query.rs b/apps/elf-eval/src/bin/real_world_job_benchmark/quantitative/product_manifest/validation/rows/per_query.rs
new file mode 100644
index 00000000..12dc5508
--- /dev/null
+++ b/apps/elf-eval/src/bin/real_world_job_benchmark/quantitative/product_manifest/validation/rows/per_query.rs
@@ -0,0 +1,21 @@
+mod identity;
+
+use crate::{BTreeSet, Path, QuantitativeProductManifest, Result};
+
+pub(super) fn validate_quantitative_per_query_rows(
+	manifest: &QuantitativeProductManifest,
+	path: &Path,
+	corpus_id: &str,
+) -> Result<()> {
+	let row_keys = manifest
+		.rows
+		.iter()
+		.map(|row| (row.product.as_str(), row.adapter_id.as_str()))
+		.collect::<BTreeSet<_>>();
+
+	for row in &manifest.per_query_rows {
+		identity::validate_per_query_row_identity(path, row, &row_keys, corpus_id)?;
+	}
+
+	Ok(())
+}
diff --git a/apps/elf-eval/src/bin/real_world_job_benchmark/quantitative/product_manifest/validation/rows/per_query/identity.rs b/apps/elf-eval/src/bin/real_world_job_benchmark/quantitative/product_manifest/validation/rows/per_query/identity.rs
new file mode 100644
index 00000000..737e869e
--- /dev/null
+++ b/apps/elf-eval/src/bin/real_world_job_benchmark/quantitative/product_manifest/validation/rows/per_query/identity.rs
@@ -0,0 +1,17 @@
+mod corpus;
+mod fields;
+mod product;
+
+use crate::{BTreeSet, Path, QuantitativePerQueryRow, Result};
+
+pub(super) fn validate_per_query_row_identity(
+	path: &Path,
+	row: &QuantitativePerQueryRow,
+	row_keys: &BTreeSet<(&str, &str)>,
+	corpus_id: &str,
+) -> Result<()> {
+	fields::validate_complete_per_query_row(path, row)?;
+	product::validate_matching_product_row(path, row, row_keys)?;
+
+	corpus::validate_same_corpus_per_query_row(path, row, corpus_id)
+}
diff --git a/apps/elf-eval/src/bin/real_world_job_benchmark/quantitative/product_manifest/validation/rows/per_query/identity/corpus.rs b/apps/elf-eval/src/bin/real_world_job_benchmark/quantitative/product_manifest/validation/rows/per_query/identity/corpus.rs
new file mode 100644
index 00000000..45d0c11c
--- /dev/null
+++ b/apps/elf-eval/src/bin/real_world_job_benchmark/quantitative/product_manifest/validation/rows/per_query/identity/corpus.rs
@@ -0,0 +1,19 @@
+use crate::{Path, QuantitativePerQueryRow, Result, eyre};
+
+pub(super) fn validate_same_corpus_per_query_row(
+	path: &Path,
+	row: &QuantitativePerQueryRow,
+	corpus_id: &str,
+) -> Result<()> {
+	if row.source_manifest_corpus_id.as_deref() != Some(corpus_id) {
+		return Err(eyre::eyre!(
+			"{} per-query row {}:{} is not same-corpus {}.",
+			path.display(),
+			row.product,
+			row.adapter_id,
+			corpus_id
+		));
+	}
+
+	Ok(())
+}
diff --git a/apps/elf-eval/src/bin/real_world_job_benchmark/quantitative/product_manifest/validation/rows/per_query/identity/fields.rs b/apps/elf-eval/src/bin/real_world_job_benchmark/quantitative/product_manifest/validation/rows/per_query/identity/fields.rs
new file mode 100644
index 00000000..049614f1
--- /dev/null
+++ b/apps/elf-eval/src/bin/real_world_job_benchmark/quantitative/product_manifest/validation/rows/per_query/identity/fields.rs
@@ -0,0 +1,22 @@
+use crate::{Path, QuantitativePerQueryRow, Result, eyre};
+
+pub(super) fn validate_complete_per_query_row(
+	path: &Path,
+	row: &QuantitativePerQueryRow,
+) -> Result<()> {
+	if row.job_id.trim().is_empty()
+		|| row.suite.trim().is_empty()
+		|| row.evidence_class.trim().is_empty()
+		|| row.result_state.trim().is_empty()
+		|| row.product.trim().is_empty()
+		|| row.adapter_id.trim().is_empty()
+		|| row.qrel_source.trim().is_empty()
+	{
+		return Err(eyre::eyre!(
+			"{} has an incomplete quantitative per-query product row.",
+			path.display()
+		));
+	}
+
+	Ok(())
+}
diff --git a/apps/elf-eval/src/bin/real_world_job_benchmark/quantitative/product_manifest/validation/rows/per_query/identity/product.rs b/apps/elf-eval/src/bin/real_world_job_benchmark/quantitative/product_manifest/validation/rows/per_query/identity/product.rs
new file mode 100644
index 00000000..dfed81b1
--- /dev/null
+++ b/apps/elf-eval/src/bin/real_world_job_benchmark/quantitative/product_manifest/validation/rows/per_query/identity/product.rs
@@ -0,0 +1,18 @@
+use crate::{BTreeSet, Path, QuantitativePerQueryRow, Result, eyre};
+
+pub(super) fn validate_matching_product_row(
+	path: &Path,
+	row: &QuantitativePerQueryRow,
+	row_keys: &BTreeSet<(&str, &str)>,
+) -> Result<()> {
+	if !row_keys.contains(&(row.product.as_str(), row.adapter_id.as_str())) {
+		return Err(eyre::eyre!(
+			"{} per-query row {}:{} has no matching product row.",
+			path.display(),
+			row.product,
+			row.adapter_id
+		));
+	}
+
+	Ok(())
+}
diff --git a/apps/elf-eval/src/bin/real_world_job_benchmark/quantitative/product_manifest/validation/rows/product.rs b/apps/elf-eval/src/bin/real_world_job_benchmark/quantitative/product_manifest/validation/rows/product.rs
new file mode 100644
index 00000000..ac009d59
--- /dev/null
+++ b/apps/elf-eval/src/bin/real_world_job_benchmark/quantitative/product_manifest/validation/rows/product.rs
@@ -0,0 +1,20 @@
+mod identity;
+mod leaderboard;
+
+use crate::{Path, QuantitativeProductManifest, Result};
+
+pub(super) fn validate_quantitative_product_rows(
+	manifest: &QuantitativeProductManifest,
+	path: &Path,
+	corpus_id: &str,
+) -> Result<()> {
+	for row in &manifest.rows {
+		identity::validate_product_row_identity(path, row, corpus_id)?;
+
+		if row.leaderboard_eligible {
+			leaderboard::validate_leaderboard_eligible_product_row(path, row)?;
+		}
+	}
+
+	Ok(())
+}
diff --git a/apps/elf-eval/src/bin/real_world_job_benchmark/quantitative/product_manifest/validation/rows/product/identity.rs b/apps/elf-eval/src/bin/real_world_job_benchmark/quantitative/product_manifest/validation/rows/product/identity.rs
new file mode 100644
index 00000000..5dd82465
--- /dev/null
+++ b/apps/elf-eval/src/bin/real_world_job_benchmark/quantitative/product_manifest/validation/rows/product/identity.rs
@@ -0,0 +1,34 @@
+use crate::{Path, QuantitativeBenchmarkRow, Result, eyre};
+
+pub(super) fn validate_product_row_identity(
+	path: &Path,
+	row: &QuantitativeBenchmarkRow,
+	corpus_id: &str,
+) -> Result<()> {
+	if row.product == "ELF" {
+		return Err(eyre::eyre!(
+			"{} quantitative product manifest must not inject ELF self rows.",
+			path.display()
+		));
+	}
+	if row.product.trim().is_empty()
+		|| row.adapter_id.trim().is_empty()
+		|| row.adapter_name.trim().is_empty()
+		|| row.suite.trim().is_empty()
+		|| row.evidence_class.trim().is_empty()
+		|| row.result_state.trim().is_empty()
+	{
+		return Err(eyre::eyre!("{} has an incomplete quantitative product row.", path.display()));
+	}
+	if row.source_manifest_corpus_id.as_deref() != Some(corpus_id) {
+		return Err(eyre::eyre!(
+			"{} row {}:{} is not same-corpus {}.",
+			path.display(),
+			row.product,
+			row.adapter_id,
+			corpus_id
+		));
+	}
+
+	Ok(())
+}
diff --git a/apps/elf-eval/src/bin/real_world_job_benchmark/quantitative/product_manifest/validation/rows/product/leaderboard.rs b/apps/elf-eval/src/bin/real_world_job_benchmark/quantitative/product_manifest/validation/rows/product/leaderboard.rs
new file mode 100644
index 00000000..e5f76ae2
--- /dev/null
+++ b/apps/elf-eval/src/bin/real_world_job_benchmark/quantitative/product_manifest/validation/rows/product/leaderboard.rs
@@ -0,0 +1,31 @@
+use crate::{
+	Path, QuantitativeBenchmarkRow, Result, eyre, quantitative::MIN_LEADERBOARD_QUERY_COUNT,
+};
+
+pub(super) fn validate_leaderboard_eligible_product_row(
+	path: &Path,
+	row: &QuantitativeBenchmarkRow,
+) -> Result<()> {
+	let has_audit_manifest_id = row
+		.audit_manifest_id
+		.as_deref()
+		.is_some_and(|audit_manifest_id| !audit_manifest_id.trim().is_empty());
+
+	if row.evidence_class != "live_real_world"
+		|| row.sample_size < MIN_LEADERBOARD_QUERY_COUNT
+		|| row.ranking_query_count != row.sample_size
+		|| row.explicit_qrel_query_count != row.ranking_query_count
+		|| !row.held_out
+		|| !row.leakage_audited
+		|| !has_audit_manifest_id
+	{
+		return Err(eyre::eyre!(
+			"{} row {}:{} is marked leaderboard_eligible without the required live/product-runtime, query-count, explicit-qrel, held-out, leakage-audit, and audit-manifest controls.",
+			path.display(),
+			row.product,
+			row.adapter_id
+		));
+	}
+
+	Ok(())
+}
diff --git a/apps/elf-eval/src/bin/real_world_job_benchmark/quantitative/product_manifest/validation/rows/ranking.rs b/apps/elf-eval/src/bin/real_world_job_benchmark/quantitative/product_manifest/validation/rows/ranking.rs
new file mode 100644
index 00000000..8206e54b
--- /dev/null
+++ b/apps/elf-eval/src/bin/real_world_job_benchmark/quantitative/product_manifest/validation/rows/ranking.rs
@@ -0,0 +1,33 @@
+use crate::{Path, QuantitativeProductManifest, Result, eyre};
+
+pub(super) fn validate_ranked_row_evidence(
+	manifest: &QuantitativeProductManifest,
+	path: &Path,
+) -> Result<()> {
+	for row in &manifest.rows {
+		if row.ranking_query_count == 0 {
+			continue;
+		}
+
+		let per_query_count = manifest
+			.per_query_rows
+			.iter()
+			.filter(|per_query| {
+				per_query.product == row.product && per_query.adapter_id == row.adapter_id
+			})
+			.count();
+
+		if per_query_count < row.ranking_query_count {
+			return Err(eyre::eyre!(
+				"{} row {}:{} declares {} ranked queries but only {} per-query rows.",
+				path.display(),
+				row.product,
+				row.adapter_id,
+				row.ranking_query_count,
+				per_query_count
+			));
+		}
+	}
+
+	Ok(())
+}
diff --git a/apps/elf-eval/src/bin/real_world_job_benchmark/quantitative/report.rs b/apps/elf-eval/src/bin/real_world_job_benchmark/quantitative/report.rs
new file mode 100644
index 00000000..08b4b84a
--- /dev/null
+++ b/apps/elf-eval/src/bin/real_world_job_benchmark/quantitative/report.rs
@@ -0,0 +1,53 @@
+mod controls;
+mod imported;
+mod input;
+mod row;
+
+pub(crate) use self::input::QuantitativeReportInput;
+
+use crate::{
+	QuantitativeBenchmarkReport, Result,
+	quantitative::{self, QUANTITATIVE_K_VALUES, QUANTITATIVE_SCOREBOARD_SCHEMA},
+};
+
+pub(crate) fn quantitative_scoreboard_report(
+	input: QuantitativeReportInput<'_>,
+) -> Result<QuantitativeBenchmarkReport> {
+	let current_row = row::current_quantitative_row(&input)?;
+	let imported_rows = imported::imported_quantitative_rows(
+		input.product_manifest_path,
+		current_row.corpus_id.as_str(),
+	)?;
+	let mut rows = vec![current_row.row];
+	let mut merged_per_query_rows = current_row.per_query_rows;
+
+	rows.extend(imported_rows.rows);
+	merged_per_query_rows.extend(imported_rows.per_query_rows);
+
+	let leaderboard_claim_allowed = rows.iter().filter(|row| row.leaderboard_eligible).count() >= 2;
+	let controls = controls::quantitative_benchmark_controls(
+		&input,
+		current_row.ranking_query_count,
+		current_row.explicit_qrel_query_count,
+		leaderboard_claim_allowed,
+	);
+
+	Ok(QuantitativeBenchmarkReport {
+		schema: QUANTITATIVE_SCOREBOARD_SCHEMA.to_string(),
+		generated_at: input.generated_at.to_string(),
+		corpus_id: current_row.corpus_id,
+		k_values: QUANTITATIVE_K_VALUES.to_vec(),
+		rows,
+		per_query_rows: merged_per_query_rows,
+		metrics_not_encoded: quantitative::quantitative_metrics_not_encoded(
+			imported_rows.row_count,
+			imported_rows.per_query_count,
+		),
+		controls,
+		claim_boundary: concat!(
+			"Do not convert fixture mechanics, missing explicit qrels, ",
+			"or partial candidate coverage into product leaderboard claims."
+		)
+		.to_string(),
+	})
+}
diff --git a/apps/elf-eval/src/bin/real_world_job_benchmark/quantitative/report/controls.rs b/apps/elf-eval/src/bin/real_world_job_benchmark/quantitative/report/controls.rs
new file mode 100644
index 00000000..78d4b723
--- /dev/null
+++ b/apps/elf-eval/src/bin/real_world_job_benchmark/quantitative/report/controls.rs
@@ -0,0 +1,26 @@
+use crate::{
+	QuantitativeBenchmarkControls,
+	quantitative::{MIN_LEADERBOARD_QUERY_COUNT, report::QuantitativeReportInput},
+};
+
+pub(super) fn quantitative_benchmark_controls(
+	input: &QuantitativeReportInput<'_>,
+	ranking_query_count: usize,
+	explicit_qrel_query_count: usize,
+	leaderboard_claim_allowed: bool,
+) -> QuantitativeBenchmarkControls {
+	QuantitativeBenchmarkControls {
+		same_corpus_required: true,
+		same_task_required: true,
+		ranked_candidates_required_for_ranking_metrics: true,
+		explicit_relevance_judgments_required_for_leaderboard: true,
+		minimum_query_count_for_leaderboard: MIN_LEADERBOARD_QUERY_COUNT,
+		current_query_count: input.source_jobs.len(),
+		current_ranking_query_count: ranking_query_count,
+		current_explicit_qrel_query_count: explicit_qrel_query_count,
+		leaderboard_claim_allowed,
+		leakage_control:
+			"held_out_or_leakage_audited_runtime_rows_required_before_leaderboard_claims"
+				.to_string(),
+	}
+}
diff --git a/apps/elf-eval/src/bin/real_world_job_benchmark/quantitative/report/imported.rs b/apps/elf-eval/src/bin/real_world_job_benchmark/quantitative/report/imported.rs
new file mode 100644
index 00000000..2b2a2515
--- /dev/null
+++ b/apps/elf-eval/src/bin/real_world_job_benchmark/quantitative/report/imported.rs
@@ -0,0 +1,27 @@
+use crate::{
+	Path, QuantitativeBenchmarkRow, QuantitativePerQueryRow, Result, quantitative::product_manifest,
+};
+
+pub(super) struct ImportedQuantitativeRows {
+	pub(super) rows: Vec<QuantitativeBenchmarkRow>,
+	pub(super) per_query_rows: Vec<QuantitativePerQueryRow>,
+	pub(super) row_count: usize,
+	pub(super) per_query_count: usize,
+}
+
+pub(super) fn imported_quantitative_rows(
+	product_manifest_path: Option<&Path>,
+	corpus_id: &str,
+) -> Result<ImportedQuantitativeRows> {
+	let product_manifest =
+		product_manifest::quantitative_product_manifest(product_manifest_path, corpus_id)?;
+	let row_count = product_manifest.rows.len();
+	let per_query_count = product_manifest.per_query_rows.len();
+
+	Ok(ImportedQuantitativeRows {
+		rows: product_manifest.rows,
+		per_query_rows: product_manifest.per_query_rows,
+		row_count,
+		per_query_count,
+	})
+}
diff --git a/apps/elf-eval/src/bin/real_world_job_benchmark/quantitative/report/input.rs b/apps/elf-eval/src/bin/real_world_job_benchmark/quantitative/report/input.rs
new file mode 100644
index 00000000..c4412050
--- /dev/null
+++ b/apps/elf-eval/src/bin/real_world_job_benchmark/quantitative/report/input.rs
@@ -0,0 +1,12 @@
+use crate::{AdapterReport, JobReport, Path, RealWorldJob, ReportSummary};
+
+pub(crate) struct QuantitativeReportInput<'a> {
+	pub(crate) run_id: &'a str,
+	pub(crate) generated_at: &'a str,
+	pub(crate) adapter: &'a AdapterReport,
+	pub(crate) source_jobs: &'a [RealWorldJob],
+	pub(crate) jobs: &'a [JobReport],
+	pub(crate) summary: &'a ReportSummary,
+	pub(crate) product_manifest_path: Option<&'a Path>,
+	pub(crate) audit_manifest_path: Option<&'a Path>,
+}
diff --git a/apps/elf-eval/src/bin/real_world_job_benchmark/quantitative/report/row.rs b/apps/elf-eval/src/bin/real_world_job_benchmark/quantitative/report/row.rs
new file mode 100644
index 00000000..ee420902
--- /dev/null
+++ b/apps/elf-eval/src/bin/real_world_job_benchmark/quantitative/report/row.rs
@@ -0,0 +1,53 @@
+mod audit_gates;
+mod basis;
+mod benchmark_row;
+mod query_counts;
+
+use crate::{
+	QuantitativeBenchmarkRow, QuantitativePerQueryRow, Result,
+	quantitative::report::{
+		QuantitativeReportInput, row::benchmark_row::QuantitativeBenchmarkRowInput,
+	},
+};
+
+pub(super) struct CurrentQuantitativeRow {
+	pub(super) corpus_id: String,
+	pub(super) row: QuantitativeBenchmarkRow,
+	pub(super) per_query_rows: Vec<QuantitativePerQueryRow>,
+	pub(super) ranking_query_count: usize,
+	pub(super) explicit_qrel_query_count: usize,
+}
+
+pub(super) fn current_quantitative_row(
+	input: &QuantitativeReportInput<'_>,
+) -> Result<CurrentQuantitativeRow> {
+	let basis = basis::quantitative_row_basis(input);
+	let audit_gates = audit_gates::quantitative_audit_gates(
+		input,
+		basis.corpus_id.as_str(),
+		basis.evidence_class,
+		basis.ranking_query_count,
+		basis.explicit_qrel_query_count,
+		basis.metric_comparable,
+	)?;
+	let row = benchmark_row::quantitative_benchmark_row(QuantitativeBenchmarkRowInput {
+		input,
+		corpus_id: basis.corpus_id.as_str(),
+		evidence_class: basis.evidence_class,
+		per_query_rows: basis.per_query_rows.as_slice(),
+		ranking_query_count: basis.ranking_query_count,
+		explicit_qrel_query_count: basis.explicit_qrel_query_count,
+		metric_comparable: basis.metric_comparable,
+		result_state: basis.result_state,
+		audit_evidence: audit_gates.audit_evidence,
+		leaderboard_eligible: audit_gates.leaderboard_eligible,
+	});
+
+	Ok(CurrentQuantitativeRow {
+		corpus_id: basis.corpus_id,
+		row,
+		per_query_rows: basis.per_query_rows,
+		ranking_query_count: basis.ranking_query_count,
+		explicit_qrel_query_count: basis.explicit_qrel_query_count,
+	})
+}
diff --git a/apps/elf-eval/src/bin/real_world_job_benchmark/quantitative/report/row/audit_gates.rs b/apps/elf-eval/src/bin/real_world_job_benchmark/quantitative/report/row/audit_gates.rs
new file mode 100644
index 00000000..31d2ddee
--- /dev/null
+++ b/apps/elf-eval/src/bin/real_world_job_benchmark/quantitative/report/row/audit_gates.rs
@@ -0,0 +1,45 @@
+use crate::{
+	Result,
+	quantitative::{
+		self,
+		audit_manifest::{self, QuantitativeAuditContext, QuantitativeAuditEvidence},
+		report::QuantitativeReportInput,
+	},
+};
+
+pub(super) struct QuantitativeAuditGates {
+	pub(super) audit_evidence: QuantitativeAuditEvidence,
+	pub(super) leaderboard_eligible: bool,
+}
+
+pub(super) fn quantitative_audit_gates(
+	input: &QuantitativeReportInput<'_>,
+	corpus_id: &str,
+	evidence_class: &str,
+	ranking_query_count: usize,
+	explicit_qrel_query_count: usize,
+	metric_comparable: bool,
+) -> Result<QuantitativeAuditGates> {
+	let audit_evidence = audit_manifest::quantitative_audit_evidence(
+		input.audit_manifest_path,
+		QuantitativeAuditContext {
+			run_id: input.run_id,
+			corpus_id,
+			product: "ELF",
+			adapter_id: input.adapter.adapter_id.as_str(),
+			source_jobs: input.source_jobs,
+			ranking_query_count,
+			explicit_qrel_query_count,
+		},
+	)?;
+	let leaderboard_eligible = quantitative::quantitative_row_leaderboard_eligible(
+		evidence_class,
+		input.source_jobs.len(),
+		ranking_query_count,
+		explicit_qrel_query_count,
+		metric_comparable,
+		&audit_evidence,
+	);
+
+	Ok(QuantitativeAuditGates { audit_evidence, leaderboard_eligible })
+}
diff --git a/apps/elf-eval/src/bin/real_world_job_benchmark/quantitative/report/row/basis.rs b/apps/elf-eval/src/bin/real_world_job_benchmark/quantitative/report/row/basis.rs
new file mode 100644
index 00000000..0f1a7e47
--- /dev/null
+++ b/apps/elf-eval/src/bin/real_world_job_benchmark/quantitative/report/row/basis.rs
@@ -0,0 +1,41 @@
+use crate::{
+	QuantitativePerQueryRow,
+	quantitative::{
+		self, metrics,
+		report::{QuantitativeReportInput, row::query_counts},
+	},
+};
+
+pub(super) struct QuantitativeRowBasis {
+	pub(super) corpus_id: String,
+	pub(super) evidence_class: &'static str,
+	pub(super) per_query_rows: Vec<QuantitativePerQueryRow>,
+	pub(super) ranking_query_count: usize,
+	pub(super) explicit_qrel_query_count: usize,
+	pub(super) metric_comparable: bool,
+	pub(super) result_state: &'static str,
+}
+
+pub(super) fn quantitative_row_basis(input: &QuantitativeReportInput<'_>) -> QuantitativeRowBasis {
+	let corpus_id = quantitative::quantitative_corpus_id(input.source_jobs);
+	let evidence_class = quantitative::quantitative_evidence_class(input.adapter, input.jobs);
+	let per_query_rows = metrics::quantitative_per_query_rows(
+		input.source_jobs,
+		input.jobs,
+		corpus_id.as_str(),
+		evidence_class,
+		input.adapter.adapter_id.as_str(),
+	);
+	let query_counts = query_counts::quantitative_query_counts(per_query_rows.as_slice());
+	let ranking_query_count = query_counts.ranking_query_count;
+
+	QuantitativeRowBasis {
+		corpus_id,
+		evidence_class,
+		per_query_rows,
+		ranking_query_count,
+		explicit_qrel_query_count: query_counts.explicit_qrel_query_count,
+		metric_comparable: ranking_query_count > 0,
+		result_state: quantitative::quantitative_result_state(input.summary),
+	}
+}
diff --git a/apps/elf-eval/src/bin/real_world_job_benchmark/quantitative/report/row/benchmark_row.rs b/apps/elf-eval/src/bin/real_world_job_benchmark/quantitative/report/row/benchmark_row.rs
new file mode 100644
index 00000000..4b8b2e31
--- /dev/null
+++ b/apps/elf-eval/src/bin/real_world_job_benchmark/quantitative/report/row/benchmark_row.rs
@@ -0,0 +1,59 @@
+mod input;
+
+pub(super) use self::input::QuantitativeBenchmarkRowInput;
+
+use crate::{
+	QuantitativeBenchmarkRow,
+	quantitative::{self, QUANTITATIVE_ROW_CLAIM_BOUNDARY, metrics},
+};
+
+pub(super) fn quantitative_benchmark_row(
+	row_input: QuantitativeBenchmarkRowInput<'_, '_>,
+) -> QuantitativeBenchmarkRow {
+	let QuantitativeBenchmarkRowInput {
+		input,
+		corpus_id,
+		evidence_class,
+		per_query_rows,
+		ranking_query_count,
+		explicit_qrel_query_count,
+		metric_comparable,
+		result_state,
+		audit_evidence,
+		leaderboard_eligible,
+	} = row_input;
+
+	QuantitativeBenchmarkRow {
+		product: "ELF".to_string(),
+		adapter_id: input.adapter.adapter_id.clone(),
+		adapter_name: input.adapter.name.clone(),
+		suite: quantitative::quantitative_suite_id(input.jobs),
+		evidence_class: evidence_class.to_string(),
+		source_manifest_corpus_id: Some(corpus_id.to_string()),
+		result_state: result_state.to_string(),
+		comparable: metric_comparable,
+		metric_comparable,
+		leaderboard_eligible,
+		held_out: audit_evidence.held_out,
+		leakage_audited: audit_evidence.leakage_audited,
+		audit_manifest_id: audit_evidence.audit_manifest_id,
+		fixture_regression_only: evidence_class == "fixture_backed",
+		sample_size: input.jobs.len(),
+		ranking_query_count,
+		ranking_coverage_state: metrics::ranking_coverage_state(
+			input.summary,
+			input.source_jobs.len(),
+			ranking_query_count,
+		)
+		.to_string(),
+		ranked_candidate_source: metrics::ranked_candidate_source(ranking_query_count).to_string(),
+		qrel_source: metrics::aggregate_qrel_source(ranking_query_count, explicit_qrel_query_count)
+			.to_string(),
+		explicit_qrel_query_count,
+		metrics: metrics::aggregate_metrics(per_query_rows),
+		metric_states: metrics::aggregate_metric_states(result_state, metric_comparable),
+		denominators: metrics::aggregate_denominators(per_query_rows),
+		confidence_intervals: metrics::aggregate_confidence_intervals(per_query_rows),
+		claim_boundary: QUANTITATIVE_ROW_CLAIM_BOUNDARY.to_string(),
+	}
+}
diff --git a/apps/elf-eval/src/bin/real_world_job_benchmark/quantitative/report/row/benchmark_row/input.rs b/apps/elf-eval/src/bin/real_world_job_benchmark/quantitative/report/row/benchmark_row/input.rs
new file mode 100644
index 00000000..a8e3f96a
--- /dev/null
+++ b/apps/elf-eval/src/bin/real_world_job_benchmark/quantitative/report/row/benchmark_row/input.rs
@@ -0,0 +1,17 @@
+use crate::{
+	QuantitativePerQueryRow,
+	quantitative::{audit_manifest::QuantitativeAuditEvidence, report::QuantitativeReportInput},
+};
+
+pub(in crate::quantitative::report::row) struct QuantitativeBenchmarkRowInput<'a, 'b> {
+	pub(in crate::quantitative::report::row) input: &'a QuantitativeReportInput<'b>,
+	pub(in crate::quantitative::report::row) corpus_id: &'a str,
+	pub(in crate::quantitative::report::row) evidence_class: &'a str,
+	pub(in crate::quantitative::report::row) per_query_rows: &'a [QuantitativePerQueryRow],
+	pub(in crate::quantitative::report::row) ranking_query_count: usize,
+	pub(in crate::quantitative::report::row) explicit_qrel_query_count: usize,
+	pub(in crate::quantitative::report::row) metric_comparable: bool,
+	pub(in crate::quantitative::report::row) result_state: &'a str,
+	pub(in crate::quantitative::report::row) audit_evidence: QuantitativeAuditEvidence,
+	pub(in crate::quantitative::report::row) leaderboard_eligible: bool,
+}
diff --git a/apps/elf-eval/src/bin/real_world_job_benchmark/quantitative/report/row/query_counts.rs b/apps/elf-eval/src/bin/real_world_job_benchmark/quantitative/report/row/query_counts.rs
new file mode 100644
index 00000000..12632f0a
--- /dev/null
+++ b/apps/elf-eval/src/bin/real_world_job_benchmark/quantitative/report/row/query_counts.rs
@@ -0,0 +1,21 @@
+use crate::QuantitativePerQueryRow;
+
+pub(super) struct QuantitativeQueryCounts {
+	pub(super) ranking_query_count: usize,
+	pub(super) explicit_qrel_query_count: usize,
+}
+
+pub(super) fn quantitative_query_counts(
+	per_query_rows: &[QuantitativePerQueryRow],
+) -> QuantitativeQueryCounts {
+	QuantitativeQueryCounts {
+		ranking_query_count: per_query_rows
+			.iter()
+			.filter(|row| row.candidate_count > 0 && row.expected_relevant_count > 0)
+			.count(),
+		explicit_qrel_query_count: per_query_rows
+			.iter()
+			.filter(|row| row.qrel_source == "explicit_qrels")
+			.count(),
+	}
+}
diff --git a/apps/elf-eval/src/bin/real_world_job_benchmark/quantitative_reports.rs b/apps/elf-eval/src/bin/real_world_job_benchmark/quantitative_reports.rs
new file mode 100644
index 00000000..a3bff704
--- /dev/null
+++ b/apps/elf-eval/src/bin/real_world_job_benchmark/quantitative_reports.rs
@@ -0,0 +1,12 @@
+mod audit;
+mod benchmark;
+mod product;
+
+pub(crate) use self::{
+	audit::{QuantitativeAuditArtifact, QuantitativeAuditManifest},
+	benchmark::{
+		QuantitativeBenchmarkControls, QuantitativeBenchmarkReport, QuantitativeBenchmarkRow,
+		QuantitativeConfidenceInterval, QuantitativePerQueryRow,
+	},
+	product::QuantitativeProductManifest,
+};
diff --git a/apps/elf-eval/src/bin/real_world_job_benchmark/quantitative_reports/audit.rs b/apps/elf-eval/src/bin/real_world_job_benchmark/quantitative_reports/audit.rs
new file mode 100644
index 00000000..4b2ce584
--- /dev/null
+++ b/apps/elf-eval/src/bin/real_world_job_benchmark/quantitative_reports/audit.rs
@@ -0,0 +1,29 @@
+use crate::{Deserialize, Serialize};
+
+#[derive(Clone, Debug, Deserialize, Serialize)]
+pub(crate) struct QuantitativeAuditManifest {
+	pub(crate) schema: String,
+	pub(crate) manifest_id: String,
+	pub(crate) run_id: String,
+	pub(crate) corpus_id: String,
+	pub(crate) product: String,
+	pub(crate) adapter_id: String,
+	pub(crate) held_out: bool,
+	pub(crate) leakage_audited: bool,
+	pub(crate) sample_size: usize,
+	pub(crate) ranking_query_count: usize,
+	pub(crate) explicit_qrel_query_count: usize,
+	pub(crate) query_ids: Vec<String>,
+	#[serde(default)]
+	pub(crate) controls: Vec<String>,
+	#[serde(default)]
+	pub(crate) artifacts: Vec<QuantitativeAuditArtifact>,
+	pub(crate) claim_boundary: String,
+}
+
+#[derive(Clone, Debug, Deserialize, Serialize)]
+pub(crate) struct QuantitativeAuditArtifact {
+	pub(crate) role: String,
+	pub(crate) path: String,
+	pub(crate) sha256: String,
+}
diff --git a/apps/elf-eval/src/bin/real_world_job_benchmark/quantitative_reports/benchmark.rs b/apps/elf-eval/src/bin/real_world_job_benchmark/quantitative_reports/benchmark.rs
new file mode 100644
index 00000000..50d36ff1
--- /dev/null
+++ b/apps/elf-eval/src/bin/real_world_job_benchmark/quantitative_reports/benchmark.rs
@@ -0,0 +1,11 @@
+mod confidence;
+mod controls;
+mod per_query;
+mod report;
+mod row;
+
+pub(crate) use self::{
+	confidence::QuantitativeConfidenceInterval, controls::QuantitativeBenchmarkControls,
+	per_query::QuantitativePerQueryRow, report::QuantitativeBenchmarkReport,
+	row::QuantitativeBenchmarkRow,
+};
diff --git a/apps/elf-eval/src/bin/real_world_job_benchmark/quantitative_reports/benchmark/confidence.rs b/apps/elf-eval/src/bin/real_world_job_benchmark/quantitative_reports/benchmark/confidence.rs
new file mode 100644
index 00000000..7a3da458
--- /dev/null
+++ b/apps/elf-eval/src/bin/real_world_job_benchmark/quantitative_reports/benchmark/confidence.rs
@@ -0,0 +1,11 @@
+use crate::{Deserialize, Serialize};
+
+#[derive(Clone, Debug, Default, Deserialize, Serialize)]
+pub(crate) struct QuantitativeConfidenceInterval {
+	pub(crate) method: String,
+	pub(crate) confidence: f64,
+	pub(crate) lower: f64,
+	pub(crate) upper: f64,
+	pub(crate) numerator: usize,
+	pub(crate) denominator: usize,
+}
diff --git a/apps/elf-eval/src/bin/real_world_job_benchmark/quantitative_reports/benchmark/controls.rs b/apps/elf-eval/src/bin/real_world_job_benchmark/quantitative_reports/benchmark/controls.rs
new file mode 100644
index 00000000..1e8ea05f
--- /dev/null
+++ b/apps/elf-eval/src/bin/real_world_job_benchmark/quantitative_reports/benchmark/controls.rs
@@ -0,0 +1,15 @@
+use crate::{Deserialize, Serialize};
+
+#[derive(Clone, Debug, Default, Deserialize, Serialize)]
+pub(crate) struct QuantitativeBenchmarkControls {
+	pub(crate) same_corpus_required: bool,
+	pub(crate) same_task_required: bool,
+	pub(crate) ranked_candidates_required_for_ranking_metrics: bool,
+	pub(crate) explicit_relevance_judgments_required_for_leaderboard: bool,
+	pub(crate) minimum_query_count_for_leaderboard: usize,
+	pub(crate) current_query_count: usize,
+	pub(crate) current_ranking_query_count: usize,
+	pub(crate) current_explicit_qrel_query_count: usize,
+	pub(crate) leaderboard_claim_allowed: bool,
+	pub(crate) leakage_control: String,
+}
diff --git a/apps/elf-eval/src/bin/real_world_job_benchmark/quantitative_reports/benchmark/per_query.rs b/apps/elf-eval/src/bin/real_world_job_benchmark/quantitative_reports/benchmark/per_query.rs
new file mode 100644
index 00000000..35ce6d6f
--- /dev/null
+++ b/apps/elf-eval/src/bin/real_world_job_benchmark/quantitative_reports/benchmark/per_query.rs
@@ -0,0 +1,20 @@
+use crate::{BTreeMap, Deserialize, Serialize};
+
+#[derive(Clone, Debug, Default, Deserialize, Serialize)]
+pub(crate) struct QuantitativePerQueryRow {
+	pub(crate) job_id: String,
+	pub(crate) suite: String,
+	pub(crate) evidence_class: String,
+	pub(crate) source_manifest_corpus_id: Option<String>,
+	pub(crate) result_state: String,
+	pub(crate) expected_relevant_count: usize,
+	pub(crate) candidate_count: usize,
+	pub(crate) qrel_source: String,
+	pub(crate) relevance_grade_sum: f64,
+	pub(crate) product: String,
+	pub(crate) adapter_id: String,
+	pub(crate) metrics: BTreeMap<String, Option<f64>>,
+	pub(crate) metric_states: BTreeMap<String, String>,
+	pub(crate) denominators: BTreeMap<String, usize>,
+	pub(crate) claim_boundary: String,
+}
diff --git a/apps/elf-eval/src/bin/real_world_job_benchmark/quantitative_reports/benchmark/report.rs b/apps/elf-eval/src/bin/real_world_job_benchmark/quantitative_reports/benchmark/report.rs
new file mode 100644
index 00000000..1a57e138
--- /dev/null
+++ b/apps/elf-eval/src/bin/real_world_job_benchmark/quantitative_reports/benchmark/report.rs
@@ -0,0 +1,19 @@
+use crate::{
+	Deserialize, QuantitativeBenchmarkControls, QuantitativeBenchmarkRow, QuantitativePerQueryRow,
+	Serialize,
+};
+
+#[derive(Clone, Debug, Default, Deserialize, Serialize)]
+pub(crate) struct QuantitativeBenchmarkReport {
+	pub(crate) schema: String,
+	pub(crate) generated_at: String,
+	pub(crate) corpus_id: String,
+	pub(crate) k_values: Vec<usize>,
+	pub(crate) rows: Vec<QuantitativeBenchmarkRow>,
+	#[serde(default)]
+	pub(crate) per_query_rows: Vec<QuantitativePerQueryRow>,
+	#[serde(default)]
+	pub(crate) metrics_not_encoded: Vec<String>,
+	pub(crate) controls: QuantitativeBenchmarkControls,
+	pub(crate) claim_boundary: String,
+}
diff --git a/apps/elf-eval/src/bin/real_world_job_benchmark/quantitative_reports/benchmark/row.rs b/apps/elf-eval/src/bin/real_world_job_benchmark/quantitative_reports/benchmark/row.rs
new file mode 100644
index 00000000..cdef9042
--- /dev/null
+++ b/apps/elf-eval/src/bin/real_world_job_benchmark/quantitative_reports/benchmark/row.rs
@@ -0,0 +1,31 @@
+use crate::{BTreeMap, Deserialize, QuantitativeConfidenceInterval, Serialize};
+
+#[derive(Clone, Debug, Default, Deserialize, Serialize)]
+pub(crate) struct QuantitativeBenchmarkRow {
+	pub(crate) product: String,
+	pub(crate) adapter_id: String,
+	pub(crate) adapter_name: String,
+	pub(crate) suite: String,
+	pub(crate) evidence_class: String,
+	pub(crate) source_manifest_corpus_id: Option<String>,
+	pub(crate) result_state: String,
+	pub(crate) comparable: bool,
+	pub(crate) metric_comparable: bool,
+	pub(crate) leaderboard_eligible: bool,
+	pub(crate) held_out: bool,
+	pub(crate) leakage_audited: bool,
+	pub(crate) audit_manifest_id: Option<String>,
+	pub(crate) fixture_regression_only: bool,
+	pub(crate) sample_size: usize,
+	pub(crate) ranking_query_count: usize,
+	pub(crate) ranking_coverage_state: String,
+	pub(crate) ranked_candidate_source: String,
+	pub(crate) qrel_source: String,
+	pub(crate) explicit_qrel_query_count: usize,
+	pub(crate) metrics: BTreeMap<String, Option<f64>>,
+	pub(crate) metric_states: BTreeMap<String, String>,
+	pub(crate) denominators: BTreeMap<String, usize>,
+	#[serde(default)]
+	pub(crate) confidence_intervals: BTreeMap<String, QuantitativeConfidenceInterval>,
+	pub(crate) claim_boundary: String,
+}
diff --git a/apps/elf-eval/src/bin/real_world_job_benchmark/quantitative_reports/product.rs b/apps/elf-eval/src/bin/real_world_job_benchmark/quantitative_reports/product.rs
new file mode 100644
index 00000000..efc5c357
--- /dev/null
+++ b/apps/elf-eval/src/bin/real_world_job_benchmark/quantitative_reports/product.rs
@@ -0,0 +1,12 @@
+use crate::{Deserialize, QuantitativeBenchmarkRow, QuantitativePerQueryRow, Serialize};
+
+#[derive(Clone, Debug, Default, Deserialize, Serialize)]
+pub(crate) struct QuantitativeProductManifest {
+	pub(crate) schema: String,
+	pub(crate) manifest_id: String,
+	pub(crate) corpus_id: String,
+	#[serde(default)]
+	pub(crate) rows: Vec<QuantitativeBenchmarkRow>,
+	#[serde(default)]
+	pub(crate) per_query_rows: Vec<QuantitativePerQueryRow>,
+}
diff --git a/apps/elf-eval/src/bin/real_world_job_benchmark/report_root.rs b/apps/elf-eval/src/bin/real_world_job_benchmark/report_root.rs
index 9ee62f1e..797eb2ba 100644
--- a/apps/elf-eval/src/bin/real_world_job_benchmark/report_root.rs
+++ b/apps/elf-eval/src/bin/real_world_job_benchmark/report_root.rs
@@ -1,7 +1,8 @@
 use crate::{
 	AdapterReport, CaptureIntegrationReport, Deserialize, EvolutionSummary, ExternalAdapterSection,
-	FollowUpReport, JobReport, OperationalEvidenceReport, PrivateCorpusRedaction, ReportSummary,
-	ScoreboardReport, Serialize, SuiteReport, UnsupportedClaimReport,
+	FollowUpReport, JobReport, OperationalEvidenceReport, PrivateCorpusRedaction,
+	QuantitativeBenchmarkReport, ReportSummary, ScoreboardReport, Serialize, SuiteReport,
+	UnsupportedClaimReport,
 };
 
 #[derive(Debug, Deserialize, Serialize)]
@@ -17,6 +18,8 @@ pub(super) struct RealWorldReport {
 	#[serde(default)]
 	pub(super) operational_evidence: OperationalEvidenceReport,
 	#[serde(default)]
+	pub(super) quantitative_scoreboard: QuantitativeBenchmarkReport,
+	#[serde(default)]
 	pub(super) external_adapters: ExternalAdapterSection,
 	pub(super) capture_integration: CaptureIntegrationReport,
 	pub(super) summary: ReportSummary,
diff --git a/apps/elf-eval/src/bin/real_world_job_benchmark/scoring.rs b/apps/elf-eval/src/bin/real_world_job_benchmark/scoring.rs
index 088a8842..2f0f34a7 100644
--- a/apps/elf-eval/src/bin/real_world_job_benchmark/scoring.rs
+++ b/apps/elf-eval/src/bin/real_world_job_benchmark/scoring.rs
@@ -27,6 +27,10 @@ pub(super) fn job_report(job: &RealWorldJob, scoring: JobScoring) -> JobReport {
 	reports::job_report(job, scoring)
 }
 
+pub(super) fn produced_evidence_order(job: &RealWorldJob) -> Vec<String> {
+	self::answers::ordered_produced_evidence_ids(self::answers::produced_answer(job))
+}
+
 pub(super) fn score_job(job: &RealWorldJob) -> JobScoring {
 	let answer = self::answers::produced_answer(job);
 	let produced_evidence = self::answers::produced_evidence_ids(answer);
diff --git a/apps/elf-eval/src/bin/real_world_job_benchmark/scoring/answers.rs b/apps/elf-eval/src/bin/real_world_job_benchmark/scoring/answers.rs
index 3e60e5b1..1e2d85ed 100644
--- a/apps/elf-eval/src/bin/real_world_job_benchmark/scoring/answers.rs
+++ b/apps/elf-eval/src/bin/real_world_job_benchmark/scoring/answers.rs
@@ -61,28 +61,7 @@ pub(super) fn trap_ids_used(
 		.collect()
 }
 
-fn synthetic_answer(job: &RealWorldJob) -> &ProducedAnswer {
-	let _ = job;
-
-	static EMPTY_ANSWER: std::sync::OnceLock<ProducedAnswer> = std::sync::OnceLock::new();
-
-	EMPTY_ANSWER.get_or_init(|| ProducedAnswer {
-		content: String::new(),
-		claims: Vec::new(),
-		evidence_ids: Vec::new(),
-		pages: Vec::new(),
-		memory_summaries: Vec::new(),
-		proactive_briefs: Vec::new(),
-		scheduled_tasks: Vec::new(),
-		work_journal_readbacks: Vec::new(),
-		recovery_drills: Vec::new(),
-		latency_ms: None,
-		cost: None,
-		trace_explainability: None,
-	})
-}
-
-fn ordered_produced_evidence_ids(answer: &ProducedAnswer) -> Vec<String> {
+pub(super) fn ordered_produced_evidence_ids(answer: &ProducedAnswer) -> Vec<String> {
 	let mut seen = BTreeSet::new();
 	let mut evidence = Vec::new();
 
@@ -180,6 +159,27 @@ fn ordered_produced_evidence_ids(answer: &ProducedAnswer) -> Vec<String> {
 	evidence
 }
 
+fn synthetic_answer(job: &RealWorldJob) -> &ProducedAnswer {
+	let _ = job;
+
+	static EMPTY_ANSWER: std::sync::OnceLock<ProducedAnswer> = std::sync::OnceLock::new();
+
+	EMPTY_ANSWER.get_or_init(|| ProducedAnswer {
+		content: String::new(),
+		claims: Vec::new(),
+		evidence_ids: Vec::new(),
+		pages: Vec::new(),
+		memory_summaries: Vec::new(),
+		proactive_briefs: Vec::new(),
+		scheduled_tasks: Vec::new(),
+		work_journal_readbacks: Vec::new(),
+		recovery_drills: Vec::new(),
+		latency_ms: None,
+		cost: None,
+		trace_explainability: None,
+	})
+}
+
 fn push_ordered_evidence(
 	evidence: &mut Vec<String>,
 	seen: &mut BTreeSet<String>,
diff --git a/apps/elf-eval/tests/real_world_job_benchmark.rs b/apps/elf-eval/tests/real_world_job_benchmark.rs
index 6df392ce..6aa5cecb 100644
--- a/apps/elf-eval/tests/real_world_job_benchmark.rs
+++ b/apps/elf-eval/tests/real_world_job_benchmark.rs
@@ -20,6 +20,7 @@
 #[path = "real_world_job_benchmark/operator_debug.rs"] mod operator_debug;
 #[path = "real_world_job_benchmark/proactive_brief.rs"] mod proactive_brief;
 #[path = "real_world_job_benchmark/production_ops.rs"] mod production_ops;
+#[path = "real_world_job_benchmark/quantitative.rs"] mod quantitative;
 #[path = "real_world_job_benchmark/recall_debug_reports.rs"] mod recall_debug_reports;
 #[path = "real_world_job_benchmark/retrieval.rs"] mod retrieval;
 #[path = "real_world_job_benchmark/root_aggregate.rs"] mod root_aggregate;
diff --git a/apps/elf-eval/tests/real_world_job_benchmark/markdown_rendering_generated.rs b/apps/elf-eval/tests/real_world_job_benchmark/markdown_rendering_generated.rs
index f5a395c8..dc83515a 100644
--- a/apps/elf-eval/tests/real_world_job_benchmark/markdown_rendering_generated.rs
+++ b/apps/elf-eval/tests/real_world_job_benchmark/markdown_rendering_generated.rs
@@ -38,6 +38,9 @@ fn generated_json_report_renders_markdown() -> Result<()> {
 	assert!(markdown.contains("# Real-World Job Benchmark Report"));
 	assert!(markdown.contains("work_resume"));
 	assert!(markdown.contains("Capture And Integration Coverage"));
+	assert!(markdown.contains("Quantitative Benchmark Report"));
+	assert!(markdown.contains("leaderboard claims require explicit qrels"));
+	assert!(markdown.contains("| ELF | `pass` | `fixture_backed`"));
 	assert!(markdown.contains("External Adapter Coverage"));
 	assert!(markdown.contains("live-baseline-only"));
 	assert!(markdown.contains("live real-world"));
diff --git a/apps/elf-eval/tests/real_world_job_benchmark/quantitative.rs b/apps/elf-eval/tests/real_world_job_benchmark/quantitative.rs
new file mode 100644
index 00000000..9bcc07c8
--- /dev/null
+++ b/apps/elf-eval/tests/real_world_job_benchmark/quantitative.rs
@@ -0,0 +1,49 @@
+#[path = "quantitative/audit_manifest.rs"] mod audit_manifest;
+#[path = "quantitative/contracts.rs"] mod contracts;
+#[path = "quantitative/metrics.rs"] mod metrics;
+#[path = "quantitative/product_manifest.rs"] mod product_manifest;
+
+use std::{path::Path, process::Command};
+
+use color_eyre::Result;
+use serde_json::Value;
+
+use crate::support;
+
+fn run_report_with_quantitative_manifest(manifest_path: &Path) -> Result<Value> {
+	let output = Command::new(env!("CARGO_BIN_EXE_real_world_job_benchmark"))
+		.arg("run")
+		.arg("--fixtures")
+		.arg(support::adversarial_quality_fixture_dir())
+		.arg("--quantitative-product-manifest")
+		.arg(manifest_path)
+		.output()?;
+
+	assert!(
+		output.status.success(),
+		"real_world_job runner failed: {}",
+		String::from_utf8_lossy(&output.stderr)
+	);
+
+	Ok(serde_json::from_slice(&output.stdout)?)
+}
+
+fn run_report_with_quantitative_audit(manifest_path: &Path, run_id: &str) -> Result<Value> {
+	let output = Command::new(env!("CARGO_BIN_EXE_real_world_job_benchmark"))
+		.arg("run")
+		.arg("--fixtures")
+		.arg(support::adversarial_quality_fixture_dir())
+		.arg("--run-id")
+		.arg(run_id)
+		.arg("--quantitative-audit-manifest")
+		.arg(manifest_path)
+		.output()?;
+
+	assert!(
+		output.status.success(),
+		"real_world_job runner failed: {}",
+		String::from_utf8_lossy(&output.stderr)
+	);
+
+	Ok(serde_json::from_slice(&output.stdout)?)
+}
diff --git a/apps/elf-eval/tests/real_world_job_benchmark/quantitative/audit_manifest.rs b/apps/elf-eval/tests/real_world_job_benchmark/quantitative/audit_manifest.rs
new file mode 100644
index 00000000..5d8777cd
--- /dev/null
+++ b/apps/elf-eval/tests/real_world_job_benchmark/quantitative/audit_manifest.rs
@@ -0,0 +1,110 @@
+use std::{
+	env, fs,
+	process::{self, Command},
+};
+
+use color_eyre::{Result, eyre};
+use serde_json::Value;
+
+use crate::support;
+
+#[test]
+fn quantitative_audit_manifest_exports_and_opens_current_row_gates() -> Result<()> {
+	let temp_dir =
+		env::temp_dir().join(format!("elf-quantitative-audit-manifest-test-{}", process::id()));
+	let manifest_path = temp_dir.join("audit-manifest.json");
+
+	fs::create_dir_all(&temp_dir)?;
+
+	let export = Command::new(env!("CARGO_BIN_EXE_real_world_job_benchmark"))
+		.arg("export-quantitative-audit-manifest")
+		.arg("--fixtures")
+		.arg(support::adversarial_quality_fixture_dir())
+		.arg("--out")
+		.arg(&manifest_path)
+		.arg("--run-id")
+		.arg("audit-import-test")
+		.arg("--held-out")
+		.arg("--leakage-audited")
+		.arg("--control")
+		.arg("query_ids_locked_before_product_runtime")
+		.arg("--control")
+		.arg("product_runtime_did_not_receive_expected_answers_or_qrels")
+		.arg("--control")
+		.arg("ranked_candidates_emitted_by_product_runtime")
+		.output()?;
+
+	assert!(
+		export.status.success(),
+		"quantitative audit export failed: {}",
+		String::from_utf8_lossy(&export.stderr)
+	);
+
+	let manifest = support::load_json(&manifest_path)?;
+
+	assert_eq!(
+		manifest.pointer("/schema").and_then(Value::as_str),
+		Some("elf.agent_memory_quantitative_audit_manifest/v1")
+	);
+	assert_eq!(manifest.pointer("/held_out").and_then(Value::as_bool), Some(true));
+	assert_eq!(manifest.pointer("/leakage_audited").and_then(Value::as_bool), Some(true));
+	assert_eq!(
+		support::array_at(&manifest, "/query_ids")?.len() as u64,
+		manifest.pointer("/ranking_query_count").and_then(Value::as_u64).unwrap_or_default()
+	);
+
+	let imported = super::run_report_with_quantitative_audit(&manifest_path, "audit-import-test")?;
+	let row = support::array_at(&imported, "/quantitative_scoreboard/rows")?
+		.first()
+		.ok_or_else(|| eyre::eyre!("missing quantitative row"))?;
+
+	assert_eq!(row.pointer("/held_out").and_then(Value::as_bool), Some(true));
+	assert_eq!(row.pointer("/leakage_audited").and_then(Value::as_bool), Some(true));
+	assert_eq!(
+		row.pointer("/audit_manifest_id").and_then(Value::as_str),
+		Some("audit-import-test-quantitative-audit-manifest")
+	);
+	assert_eq!(row.pointer("/leaderboard_eligible").and_then(Value::as_bool), Some(false));
+
+	Ok(())
+}
+
+#[test]
+fn quantitative_audit_manifest_rejects_wrong_run_id_imports() -> Result<()> {
+	let temp_dir =
+		env::temp_dir().join(format!("elf-quantitative-audit-manifest-run-test-{}", process::id()));
+	let manifest_path = temp_dir.join("audit-manifest.json");
+
+	fs::create_dir_all(&temp_dir)?;
+
+	let export = Command::new(env!("CARGO_BIN_EXE_real_world_job_benchmark"))
+		.arg("export-quantitative-audit-manifest")
+		.arg("--fixtures")
+		.arg(support::adversarial_quality_fixture_dir())
+		.arg("--out")
+		.arg(&manifest_path)
+		.arg("--run-id")
+		.arg("audit-import-test")
+		.output()?;
+
+	assert!(
+		export.status.success(),
+		"quantitative audit export failed: {}",
+		String::from_utf8_lossy(&export.stderr)
+	);
+
+	let output = Command::new(env!("CARGO_BIN_EXE_real_world_job_benchmark"))
+		.arg("run")
+		.arg("--fixtures")
+		.arg(support::adversarial_quality_fixture_dir())
+		.arg("--run-id")
+		.arg("different-run")
+		.arg("--quantitative-audit-manifest")
+		.arg(&manifest_path)
+		.output()?;
+
+	assert!(!output.status.success());
+	assert!(String::from_utf8_lossy(&output.stderr).contains("expected different-run"));
+
+	Ok(())
+}
diff --git a/apps/elf-eval/tests/real_world_job_benchmark/quantitative/contracts.rs b/apps/elf-eval/tests/real_world_job_benchmark/quantitative/contracts.rs
new file mode 100644
index 00000000..fc158b77
--- /dev/null
+++ b/apps/elf-eval/tests/real_world_job_benchmark/quantitative/contracts.rs
@@ -0,0 +1,127 @@
+use color_eyre::Result;
+use serde_json::Value;
+
+use crate::support;
+
+#[test]
+fn adversarial_quality_report_exposes_quantitative_scoreboard() -> Result<()> {
+	let report = support::run_json_report_from(support::adversarial_quality_fixture_dir())?;
+
+	assert_eq!(
+		report.pointer("/quantitative_scoreboard/schema").and_then(Value::as_str),
+		Some("elf.agent_memory_quantitative_benchmark/v1")
+	);
+	assert_eq!(
+		report.pointer("/quantitative_scoreboard/generated_at").and_then(Value::as_str),
+		report.pointer("/generated_at").and_then(Value::as_str)
+	);
+	assert_eq!(
+		report.pointer("/quantitative_scoreboard/k_values").and_then(Value::as_array),
+		Some(&vec![Value::from(1), Value::from(3), Value::from(5), Value::from(10),])
+	);
+	assert_eq!(
+		report
+			.pointer("/quantitative_scoreboard/controls/leaderboard_claim_allowed")
+			.and_then(Value::as_bool),
+		Some(false)
+	);
+	assert_eq!(
+		report
+			.pointer("/quantitative_scoreboard/controls/current_query_count")
+			.and_then(Value::as_u64),
+		report.pointer("/summary/job_count").and_then(Value::as_u64)
+	);
+
+	assert_quantitative_row_contract(&report)?;
+	assert_quantitative_per_query_contract(&report)?;
+
+	Ok(())
+}
+
+fn assert_quantitative_row_contract(report: &Value) -> Result<()> {
+	let rows = support::array_at(report, "/quantitative_scoreboard/rows")?;
+
+	assert_eq!(rows.len(), 1);
+
+	let row = &rows[0];
+
+	assert_eq!(row.pointer("/product").and_then(Value::as_str), Some("ELF"));
+	assert_eq!(row.pointer("/adapter_id").and_then(Value::as_str), Some("fixture_smoke"));
+	assert_eq!(row.pointer("/suite").and_then(Value::as_str), Some("adversarial_quality"));
+	assert_eq!(row.pointer("/evidence_class").and_then(Value::as_str), Some("fixture_backed"));
+	assert_eq!(row.pointer("/result_state").and_then(Value::as_str), Some("pass"));
+	assert_eq!(row.pointer("/comparable").and_then(Value::as_bool), Some(true));
+	assert_eq!(row.pointer("/metric_comparable").and_then(Value::as_bool), Some(true));
+	assert_eq!(row.pointer("/leaderboard_eligible").and_then(Value::as_bool), Some(false));
+	assert_eq!(row.pointer("/fixture_regression_only").and_then(Value::as_bool), Some(true));
+	assert_eq!(row.pointer("/ranking_coverage_state").and_then(Value::as_str), Some("complete"));
+	assert_eq!(
+		row.pointer("/ranked_candidate_source").and_then(Value::as_str),
+		Some("produced_evidence_order")
+	);
+	assert_eq!(
+		row.pointer("/qrel_source").and_then(Value::as_str),
+		Some("expected_evidence_fallback")
+	);
+	assert_eq!(row.pointer("/explicit_qrel_query_count").and_then(Value::as_u64), Some(0));
+
+	for metric in [
+		"recall_at_1",
+		"precision_at_1",
+		"success_at_1",
+		"recall_at_5",
+		"precision_at_5",
+		"success_at_5",
+		"mrr",
+		"ndcg_at_5",
+		"average_precision",
+	] {
+		assert!(row.pointer(&format!("/metrics/{metric}")).and_then(Value::as_f64).is_some());
+		assert_eq!(
+			row.pointer(&format!("/metric_states/{metric}")).and_then(Value::as_str),
+			Some("pass")
+		);
+		assert!(row.pointer(&format!("/denominators/{metric}")).and_then(Value::as_u64).is_some());
+	}
+	for metric in ["recall_at_5", "precision_at_5", "success_at_5"] {
+		assert_eq!(
+			row.pointer(&format!("/confidence_intervals/{metric}/method")).and_then(Value::as_str),
+			Some("wilson_score")
+		);
+		assert_eq!(
+			row.pointer(&format!("/confidence_intervals/{metric}/confidence"))
+				.and_then(Value::as_f64),
+			Some(0.95)
+		);
+		assert!(
+			row.pointer(&format!("/confidence_intervals/{metric}/denominator"))
+				.and_then(Value::as_u64)
+				.is_some()
+		);
+	}
+
+	Ok(())
+}
+
+fn assert_quantitative_per_query_contract(report: &Value) -> Result<()> {
+	let rows = support::array_at(report, "/quantitative_scoreboard/per_query_rows")?;
+	let job_count = report.pointer("/summary/job_count").and_then(Value::as_u64).unwrap_or(0);
+
+	assert_eq!(rows.len() as u64, job_count);
+
+	for row in rows {
+		assert_eq!(row.pointer("/evidence_class").and_then(Value::as_str), Some("fixture_backed"));
+		assert_eq!(
+			row.pointer("/qrel_source").and_then(Value::as_str),
+			Some("expected_evidence_fallback")
+		);
+		assert!(row.pointer("/candidate_count").and_then(Value::as_u64).is_some());
+		assert!(row.pointer("/expected_relevant_count").and_then(Value::as_u64).is_some());
+		assert!(row.pointer("/metrics/recall_at_5").is_some());
+		assert!(row.pointer("/metrics/precision_at_5").is_some());
+		assert!(row.pointer("/metrics/ndcg_at_5").is_some());
+		assert!(row.pointer("/metrics/average_precision").is_some());
+	}
+
+	Ok(())
+}
diff --git a/apps/elf-eval/tests/real_world_job_benchmark/quantitative/metrics.rs b/apps/elf-eval/tests/real_world_job_benchmark/quantitative/metrics.rs
new file mode 100644
index 00000000..3b9262a0
--- /dev/null
+++ b/apps/elf-eval/tests/real_world_job_benchmark/quantitative/metrics.rs
@@ -0,0 +1,53 @@
+use std::{env, fs, process};
+
+use color_eyre::{Result, eyre};
+use serde_json::Value;
+
+use crate::support;
+
+#[test]
+fn explicit_qrels_preserve_candidate_order_for_ranking_metrics() -> Result<()> {
+	let source_path =
+		support::adversarial_quality_fixture_dir().join("conflicting_source_authority.json");
+	let mut job = serde_json::from_str::<Value>(&fs::read_to_string(source_path)?)?;
+
+	support::set_json_pointer(
+		&mut job,
+		"/corpus/adapter_response/answer/evidence_ids",
+		serde_json::json!(["old-provider-note", "current-provider-report"]),
+	)?;
+
+	job.pointer_mut("/expected_answer")
+		.and_then(Value::as_object_mut)
+		.ok_or_else(|| eyre::eyre!("missing expected_answer object"))?
+		.insert(
+			"relevance_judgments".to_string(),
+			serde_json::json!([{ "evidence_id": "current-provider-report", "grade": 1.0 }]),
+		);
+
+	let temp_dir = env::temp_dir().join(format!("elf-explicit-qrel-order-test-{}", process::id()));
+
+	fs::create_dir_all(&temp_dir)?;
+	fs::write(temp_dir.join("explicit_qrel_order.json"), serde_json::to_vec_pretty(&job)?)?;
+
+	let report = support::run_json_report_from(temp_dir)?;
+	let rows = support::array_at(&report, "/quantitative_scoreboard/rows")?;
+	let row = rows.first().ok_or_else(|| eyre::eyre!("missing quantitative row"))?;
+
+	assert_eq!(row.pointer("/qrel_source").and_then(Value::as_str), Some("explicit_qrels"));
+	assert_eq!(row.pointer("/explicit_qrel_query_count").and_then(Value::as_u64), Some(1));
+	assert_eq!(row.pointer("/metrics/recall_at_1").and_then(Value::as_f64), Some(0.0));
+	assert_eq!(row.pointer("/metrics/recall_at_3").and_then(Value::as_f64), Some(1.0));
+	assert_eq!(row.pointer("/metrics/mrr").and_then(Value::as_f64), Some(0.5));
+	assert_eq!(row.pointer("/metrics/average_precision").and_then(Value::as_f64), Some(0.5));
+	assert_eq!(row.pointer("/denominators/recall_at_5").and_then(Value::as_u64), Some(1));
+
+	let per_query_rows = support::array_at(&report, "/quantitative_scoreboard/per_query_rows")?;
+	let per_query = per_query_rows.first().ok_or_else(|| eyre::eyre!("missing per-query row"))?;
+
+	assert_eq!(per_query.pointer("/qrel_source").and_then(Value::as_str), Some("explicit_qrels"));
+	assert_eq!(per_query.pointer("/metrics/mrr").and_then(Value::as_f64), Some(0.5));
+	assert_eq!(per_query.pointer("/denominators/recall_at_5").and_then(Value::as_u64), Some(1));
+
+	Ok(())
+}
diff --git a/apps/elf-eval/tests/real_world_job_benchmark/quantitative/product_manifest.rs b/apps/elf-eval/tests/real_world_job_benchmark/quantitative/product_manifest.rs
new file mode 100644
index 00000000..054e70f3
--- /dev/null
+++ b/apps/elf-eval/tests/real_world_job_benchmark/quantitative/product_manifest.rs
@@ -0,0 +1,68 @@
+#[path = "product_manifest/export.rs"] mod export;
+#[path = "product_manifest/validation.rs"] mod validation;
+
+use std::{
+	env, fs,
+	path::PathBuf,
+	process::{self, Command},
+};
+
+use color_eyre::Result;
+use serde_json::Value;
+
+use crate::support;
+
+struct ProductManifestPaths {
+	temp_dir: PathBuf,
+	report_path: PathBuf,
+	manifest_path: PathBuf,
+}
+
+fn product_manifest_paths(temp_name: &str, manifest_file: &str) -> ProductManifestPaths {
+	let temp_dir = env::temp_dir().join(format!("{temp_name}-{}", process::id()));
+
+	ProductManifestPaths {
+		report_path: temp_dir.join("report.json"),
+		manifest_path: temp_dir.join(manifest_file),
+		temp_dir,
+	}
+}
+
+fn write_adversarial_report(paths: &ProductManifestPaths) -> Result<()> {
+	let report = support::run_json_report_from(support::adversarial_quality_fixture_dir())?;
+
+	fs::create_dir_all(&paths.temp_dir)?;
+	fs::write(&paths.report_path, serde_json::to_vec_pretty(&report)?)?;
+
+	Ok(())
+}
+
+fn export_synthetic_rival_manifest(paths: &ProductManifestPaths) -> Result<()> {
+	write_adversarial_report(paths)?;
+
+	let export = Command::new(env!("CARGO_BIN_EXE_real_world_job_benchmark"))
+		.arg("export-quantitative-product-manifest")
+		.arg("--report")
+		.arg(&paths.report_path)
+		.arg("--out")
+		.arg(&paths.manifest_path)
+		.arg("--product")
+		.arg("Synthetic Rival")
+		.arg("--adapter-id")
+		.arg("synthetic_rival")
+		.arg("--adapter-name")
+		.arg("Synthetic Rival adapter")
+		.output()?;
+
+	assert!(
+		export.status.success(),
+		"product manifest export failed: {}",
+		String::from_utf8_lossy(&export.stderr)
+	);
+
+	Ok(())
+}
+
+fn run_report_with_manifest(paths: &ProductManifestPaths) -> Result<Value> {
+	super::run_report_with_quantitative_manifest(&paths.manifest_path)
+}
diff --git a/apps/elf-eval/tests/real_world_job_benchmark/quantitative/product_manifest/export.rs b/apps/elf-eval/tests/real_world_job_benchmark/quantitative/product_manifest/export.rs
new file mode 100644
index 00000000..d56f2bd7
--- /dev/null
+++ b/apps/elf-eval/tests/real_world_job_benchmark/quantitative/product_manifest/export.rs
@@ -0,0 +1,73 @@
+use std::process::Command;
+
+use color_eyre::Result;
+use serde_json::Value;
+
+use crate::support;
+
+#[test]
+fn quantitative_product_manifest_exports_and_reimports_same_corpus_rows() -> Result<()> {
+	let paths = super::product_manifest_paths(
+		"elf-quantitative-product-manifest-test",
+		"synthetic-rival-product-manifest.json",
+	);
+
+	super::export_synthetic_rival_manifest(&paths)?;
+
+	let manifest = support::load_json(&paths.manifest_path)?;
+
+	assert_eq!(
+		manifest.pointer("/schema").and_then(Value::as_str),
+		Some("elf.agent_memory_quantitative_product_manifest/v1")
+	);
+	assert_eq!(
+		manifest.pointer("/rows/0/product").and_then(Value::as_str),
+		Some("Synthetic Rival")
+	);
+	assert_eq!(
+		manifest.pointer("/per_query_rows/0/adapter_id").and_then(Value::as_str),
+		Some("synthetic_rival")
+	);
+
+	let imported = super::run_report_with_manifest(&paths)?;
+	let rows = support::array_at(&imported, "/quantitative_scoreboard/rows")?;
+	let rival = support::find_by_field(rows, "/adapter_id", "synthetic_rival")?;
+
+	assert_eq!(rows.len(), 2);
+	assert_eq!(rival.pointer("/product").and_then(Value::as_str), Some("Synthetic Rival"));
+	assert!(!support::array_contains_str(
+		&imported,
+		"/quantitative_scoreboard/metrics_not_encoded",
+		"external_product_manifest_import"
+	)?);
+	assert!(
+		support::array_at(&imported, "/quantitative_scoreboard/per_query_rows")?.iter().any(
+			|row| row.pointer("/adapter_id").and_then(Value::as_str) == Some("synthetic_rival")
+		)
+	);
+
+	Ok(())
+}
+
+#[test]
+fn quantitative_product_manifest_export_rejects_elf_self_rows() -> Result<()> {
+	let paths = super::product_manifest_paths(
+		"elf-quantitative-product-manifest-elf-test",
+		"elf-product-manifest.json",
+	);
+
+	super::write_adversarial_report(&paths)?;
+
+	let output = Command::new(env!("CARGO_BIN_EXE_real_world_job_benchmark"))
+		.arg("export-quantitative-product-manifest")
+		.arg("--report")
+		.arg(&paths.report_path)
+		.arg("--out")
+		.arg(&paths.manifest_path)
+		.output()?;
+
+	assert!(!output.status.success());
+	assert!(String::from_utf8_lossy(&output.stderr).contains("exports product ELF"));
+
+	Ok(())
+}
diff --git a/apps/elf-eval/tests/real_world_job_benchmark/quantitative/product_manifest/validation.rs b/apps/elf-eval/tests/real_world_job_benchmark/quantitative/product_manifest/validation.rs
new file mode 100644
index 00000000..e4e302b3
--- /dev/null
+++ b/apps/elf-eval/tests/real_world_job_benchmark/quantitative/product_manifest/validation.rs
@@ -0,0 +1,64 @@
+use std::{fs, process::Command};
+
+use color_eyre::Result;
+
+use crate::support;
+
+#[test]
+fn quantitative_product_manifest_rejects_cross_corpus_imports() -> Result<()> {
+	let paths = super::product_manifest_paths(
+		"elf-quantitative-product-manifest-corpus-test",
+		"wrong-corpus-product-manifest.json",
+	);
+
+	super::export_synthetic_rival_manifest(&paths)?;
+
+	let mut manifest = support::load_json(&paths.manifest_path)?;
+
+	support::set_json_pointer(&mut manifest, "/corpus_id", serde_json::json!("wrong-corpus"))?;
+	fs::write(&paths.manifest_path, serde_json::to_vec_pretty(&manifest)?)?;
+
+	let output = Command::new(env!("CARGO_BIN_EXE_real_world_job_benchmark"))
+		.arg("run")
+		.arg("--fixtures")
+		.arg(support::adversarial_quality_fixture_dir())
+		.arg("--quantitative-product-manifest")
+		.arg(&paths.manifest_path)
+		.output()?;
+
+	assert!(!output.status.success());
+	assert!(String::from_utf8_lossy(&output.stderr).contains("expected same-corpus"));
+
+	Ok(())
+}
+
+#[test]
+fn quantitative_product_manifest_rejects_ranked_rows_without_per_query_evidence() -> Result<()> {
+	let paths = super::product_manifest_paths(
+		"elf-quantitative-product-manifest-per-query-test",
+		"missing-per-query-product-manifest.json",
+	);
+
+	super::export_synthetic_rival_manifest(&paths)?;
+
+	let mut manifest = support::load_json(&paths.manifest_path)?;
+
+	support::set_json_pointer(&mut manifest, "/per_query_rows", serde_json::json!([]))?;
+	fs::write(&paths.manifest_path, serde_json::to_vec_pretty(&manifest)?)?;
+
+	let output = Command::new(env!("CARGO_BIN_EXE_real_world_job_benchmark"))
+		.arg("run")
+		.arg("--fixtures")
+		.arg(support::adversarial_quality_fixture_dir())
+		.arg("--quantitative-product-manifest")
+		.arg(&paths.manifest_path)
+		.output()?;
+
+	assert!(!output.status.success());
+
+	let stderr = String::from_utf8_lossy(&output.stderr);
+
+	assert!(stderr.contains("ranked queries but only 0"));
+
+	Ok(())
+}
diff --git a/docs/spec/agent_memory_knowledge_system_v1.md b/docs/spec/agent_memory_knowledge_system_v1.md
index 35d18ca8..070df71f 100644
--- a/docs/spec/agent_memory_knowledge_system_v1.md
+++ b/docs/spec/agent_memory_knowledge_system_v1.md
@@ -272,8 +272,7 @@ Repository-native validation is authoritative.
   docs are validation-ready.
 - Before a PR handoff or any push that refreshes a PR head, run the registered
   Decodex workflow gate: `cargo make fmt`, `cargo make lint-fix`, then
-  `cargo make checks`. In this Makefile tree, `checks` aliases the repo-native
-  aggregate `check` task.
+  `cargo make check`.
 - If a phase changes commands, schemas, config, runtime behavior, status semantics,
   or benchmark claims, update the owning docs and include drift evidence as required
   by `docs/policy.md`.
diff --git a/docs/spec/agent_memory_quantitative_benchmark_v1.md b/docs/spec/agent_memory_quantitative_benchmark_v1.md
index 5974e4bf..265a71c1 100644
--- a/docs/spec/agent_memory_quantitative_benchmark_v1.md
+++ b/docs/spec/agent_memory_quantitative_benchmark_v1.md
@@ -1,216 +1,608 @@
 ---
 type: Spec
 title: "Agent Memory Quantitative Benchmark v1"
-description: "Define the public quantitative competitor scoreboard row contract and claim boundaries."
+description: "Define quantitative same-corpus memory benchmark metrics, formulas, evidence classes, and claim boundaries."
 resource: docs/spec/agent_memory_quantitative_benchmark_v1.md
 status: active
 authority: normative
 owner: spec
-last_verified: 2026-06-27
+last_verified: 2026-06-23
 tags:
   - docs
   - spec
   - benchmarking
   - agent-memory
-source_refs:
-  - XY-1098
-  - XY-1120
+source_refs: []
 code_refs:
+  - Makefile.toml
+  - makefiles/benchmark-memory-a.toml
+  - makefiles/benchmark-memory-b.toml
+  - scripts/materialize-explicit-qrels.py
+  - scripts/real-world-explicit-qrels.sh
+  - scripts/real-world-docker.sh
+  - scripts/real-world-live-explicit-qrels.sh
+  - apps/elf-eval/src/app.rs
   - apps/elf-eval/src/bin/real_world_job_benchmark/main.rs
-  - apps/elf-eval/tests/real_world_job_benchmark.rs
+  - apps/elf-eval/fixtures/real_world_memory/p1_closeout/source_candidate_approval_recall.json
+  - apps/elf-eval/fixtures/real_world_external_adapters/memory_projects_manifest.json
 related:
+  - docs/spec/agent_memory_knowledge_system_v1.md
   - docs/spec/real_world_agent_memory_benchmark_v1.md
-  - docs/evidence/benchmarking/2026-06-27-public-quantitative-competitor-scoreboard-report.md
+  - docs/evidence/benchmarking/2026-06-23-p4-quality-hardening-productization-readiness-report.md
+  - docs/evidence/benchmarking/2026-06-23-p3-competitor-strength-absorption-report.md
 drift_watch:
   - docs/spec/agent_memory_quantitative_benchmark_v1.md
+  - Makefile.toml
+  - makefiles/benchmark-memory-a.toml
+  - makefiles/benchmark-memory-b.toml
+  - scripts/materialize-explicit-qrels.py
+  - scripts/real-world-explicit-qrels.sh
+  - scripts/real-world-docker.sh
+  - scripts/real-world-live-explicit-qrels.sh
+  - docs/spec/agent_memory_knowledge_system_v1.md
   - docs/spec/real_world_agent_memory_benchmark_v1.md
   - apps/elf-eval/src/bin/real_world_job_benchmark/main.rs
-  - apps/elf-eval/fixtures/report_snapshots/2026-06-27-public-quantitative-competitor-scoreboard-report.json
+  - apps/elf-eval/src/app.rs
+  - docs/evidence/benchmarking/index.md
 ---
 # Agent Memory Quantitative Benchmark v1
 
-Purpose: Define the public quantitative competitor scoreboard row contract and claim
-boundaries.
+Purpose: Define the quantitative scoreboard that must sit beside ELF's existing
+typed real-world memory benchmark reports.
 Status: normative
-Read this when: You are implementing, validating, or publishing the public
-competitor-quality scoreboard for agent memory systems.
-Not this document: Real-world job fixture schema, Work Journal behavior, operational
-runbooks, or external adapter setup procedures.
-Defines: `elf.quality_scoreboard/v1` quantitative rows, metrics, comparability gates,
-typed non-pass behavior, and optimization-direction metadata.
-
-## Scope
-
-The quantitative scoreboard turns `real_world_job` reports and external adapter
-manifest records into public product rows. It is a row-level evidence contract, not a
-universal leaderboard. It is allowed to say which metrics are proven for a row, which
-competitor strengths remain visible, and which evidence is missing before a row can be
-treated as comparable.
-
-This contract applies to reports with schema `elf.quality_scoreboard/v1`.
-
-## Scoreboard Report
-
-A report MUST include:
-
-- `schema`: exactly `elf.quality_scoreboard/v1`.
-- `result_states`: the public row-state enum.
-- `evidence_classes`: the public evidence-class enum.
-- `metric_basis`: the ranking basis used for retrieval metrics.
-- `retrieval_k`: the `k` used for recall, precision, MRR, and nDCG.
-- typed non-pass counts and visible typed non-pass states for encoded jobs, external
-  adapter rows, and the aggregate report.
-- evidence-class counts.
-- bounded encoded-job and aggregate summary claims.
-- `unqualified_win_claim_allowed`, which MUST be `false` when any typed non-pass row
-  or non-comparable row exists.
-- `claim_boundary`, a human-readable statement that prevents typed blockers or
-  fixture-only evidence from becoming broad superiority claims.
-- `rows`: one row for ELF plus one row for each tracked external product represented
-  by the loaded adapter manifest.
-- `optimization_roadmap`: concrete next optimization directions derived from missing
-  row evidence, not from hidden assumptions.
-
-## Public Row States
+Read this when: You are adding or reviewing recall, freshness, update, delete,
+expiry, latency, cost, or competitor-comparison metrics for agent memory systems.
+Not this document: A finished benchmark report, a claim that current results beat
+every competitor, or a replacement for typed non-pass outcome reporting.
+Defines: `elf.agent_memory_quantitative_benchmark/v1`, required metric families,
+formulas, denominators, evidence classes, comparability rules, and minimum report
+rows.
+
+## Core Rule
+
+Quantitative memory comparison must measure the exact behavior users care about:
+finding the right evidence, using current facts, suppressing stale or deleted facts,
+showing citations, and staying within latency/cost/resource bounds.
+
+A report must not use broad product labels such as "best memory" or "beats OpenKB"
+unless the specific metric row is same-corpus, same-task, same-evidence-class,
+same-candidate-source, same-denominator, and leaderboard eligible. Typed non-pass
+states remain first-class results.
+
+## Evidence Classes
+
+Every quantitative row must declare one evidence class:
+
+| Evidence class | Meaning | Comparable for leaderboard |
+| --- | --- | --- |
+| `fixture_backed` | Checked-in fixture scored by ELF's runner. | Only against other fixture rows with the same corpus and task. |
+| `live_baseline` | Docker-contained baseline or smoke run that may not execute real-world answer jobs. | No, unless the report states the exact same scored task. |
+| `live_real_world` | Runtime executed the same real-world job prompt and produced scored answer artifacts. | Yes, when same-corpus and same-task. |
+| `public_proxy` | Local proxy contract based on public docs or expected artifact shape, not a product runtime. | No product leaderboard claim. |
+| `private_corpus` | Operator-owned private corpus with publishable bounded metrics only. | Yes only for private-corpus rows with matching policy. |
+| `provider_backed` | Provider credentials/models were used and cost/latency are measured. | Yes only against rows with equivalent provider boundary. |
+| `research_gate` | Research-only, blocked, or reference-only evidence. | No. |
+| `mixed_evidence` | Aggregate row blends multiple evidence classes. | No; split rows before leaderboard use. |
+
+## Result States
+
+Every row must declare one result state:
 
 | State | Meaning |
 | --- | --- |
-| `pass` | The row has a scored pass under its evidence class. A pass is comparable only when every comparability gate is also true. |
-| `wrong_result` | The adapter or job reached the behavioral check but selected the wrong answer, evidence, lifecycle state, or action. |
-| `incomplete` | Setup, build, parse, adapter wiring, or runtime execution did not reach the behavioral check. |
-| `blocked` | The row cannot be completed safely without missing credentials, private input, durable runtime integration, Docker evidence, or manual product setup. |
-| `not_tested` | No benchmark execution or comparable adapter output exists for the row. |
-| `not_encoded` | The suite, scoring dimension, or adapter path is not implemented in the runner. |
-| `not_comparable` | The row has useful evidence but lacks one or more required comparability gates, so it must not be used as a product-runtime comparison pass. |
-| `unsupported_claim` | The row or source report made a substantive claim not supported by corpus evidence, source refs, or report metadata. |
-
-`not_comparable` is a public row state only. It is not a `real_world_job` status and
-must not be written back into job or suite outcome fields.
+| `pass` | The metric is measured and meets the row threshold. |
+| `wrong_result` | The task ran but selected the wrong answer, wrong evidence, or wrong lifecycle state. |
+| `incomplete` | Some required artifacts exist, but the metric denominator is not fully satisfied. |
+| `blocked` | Required setup, credentials, corpus, exported artifact, or product readback is missing. |
+| `not_encoded` | The adapter or benchmark does not implement this metric. |
+| `not_comparable` | A metric exists but evidence class, corpus, task, or denominator differs. |
+| `unsupported_claim` | The output makes a claim that the evidence cannot support. |
+
+Metric states are separate from row result states. A metric state of `measured`
+means the denominator is non-zero and the row has no typed non-pass state; it does
+not mean the value passed a leaderboard threshold. If the row result is
+`blocked`, `wrong_result`, `incomplete`, `not_encoded`, or `unsupported_claim`,
+metric states for measured values must inherit that non-pass state.
+
+Metric states may also use `partial_coverage` when a formula is computable for
+some queries but the row lacks full ranked-candidate coverage or the minimum query
+count required for leaderboard use. `partial_coverage` values are useful regression
+evidence, not product-ranking proof.
+
+## Retrieval Metrics
+
+Retrieval metrics apply when a job has relevance labels and an ordered candidate
+list. The report must name `k` for every `@k` metric. A row must also declare whether
+ranked candidates came from a product/runtime trace or a fixture trace; fixture traces
+are formula smoke tests unless the compared product emitted the same artifact shape.
+Explicit qrels live in `expected_answer.relevance_judgments` as
+`{ "evidence_id": "...", "grade": 0.0 }` records. If a legacy fixture omits qrels,
+the runner may derive binary relevance from required evidence for regression use,
+but that row must expose `qrel_source = expected_evidence_fallback` and must not
+become leaderboard eligible.
+
+`cargo make real-world-memory-explicit-qrels` is the deterministic qrel
+materialization command for fixture-mechanics evidence. It derives positive qrels
+from checked-in `expected_answer.evidence_links` and `required_evidence`, preserves
+existing explicit zero-grade judgments, and leaves unmentioned corpus evidence
+unjudged instead of converting it into synthetic negative labels. Its optional
+oracle ranked candidates are allowed only to prove metric mechanics; they are not
+product-runtime retrieval evidence and cannot satisfy leaderboard runtime, held-out,
+or leakage-audit gates.
+
+`cargo make real-world-memory-live-explicit-qrels` is the current product-runtime
+bridge from deterministic qrel materialization to ELF/qmd live adapter scoring. It
+must materialize explicit qrels with `--ranked-candidates-source none`, then let
+the live adapters emit their own runtime ranked candidates. This command can close
+the `qrel_source` gap for product-runtime rows, but it does not itself prove
+held-out status, leakage audit status, or clean leaderboard eligibility.
+
+| Metric | Formula | Required fields |
+| --- | --- | --- |
+| `recall_at_k` | `relevant_returned_in_top_k / expected_relevant_count` | relevance labels, explicit `ranked_candidate_evidence_ids`, `k` |
+| `precision_at_k` | `relevant_returned_in_top_k / k` | ordered candidates, relevance labels |
+| `mrr` | `1 / rank(first_relevant)` or `0` when no relevant item appears | ordered candidates, relevance labels |
+| `ndcg_at_k` | `dcg_at_k / ideal_dcg_at_k` using graded relevance when available, binary otherwise | ordered candidates, relevance grades |
+| `map` | Mean of per-query average precision values | ordered candidates, relevance labels |
+| `average_precision` | Per-query sum of precision at each relevant hit divided by expected relevant count | ordered candidates, relevance labels |
+| `success_at_k` | Query has at least one relevant candidate in the top `k` | ordered candidates, relevance labels, `k` |
+| `expected_evidence_recall` | `produced_required_evidence_count / required_evidence_count` | required evidence map, produced evidence ids |
+| `citation_coverage` | `claims_with_valid_citation / claims_requiring_citation` | claim list, citation validation result |
+| `source_ref_coverage` | `claims_with_valid_source_ref / claims_requiring_source_ref` | source-ref validation result |
+
+Retrieval metrics must not count redacted, excluded, deleted, expired, unreadable, or
+non-captured source spans as relevant current evidence. Such candidates may be
+reported separately as historical or diagnostic rows.
+
+## Memory Lifecycle Metrics
+
+Memory lifecycle metrics apply to jobs that encode state changes over time.
+
+| Metric | Formula | What it proves |
+| --- | --- | --- |
+| `update_correctness_rate` | `jobs_selecting_current_superseding_fact / update_jobs` | New facts replace old facts for current answers. |
+| `stale_suppression_rate` | `stale_facts_not_used_as_current / stale_fact_opportunities` | Stale facts do not pollute current answers. |
+| `delete_suppression_rate` | `deleted_or_tombstoned_facts_not_used / delete_opportunities` | Deleted or tombstoned facts do not reappear as current context. |
+| `expiry_suppression_rate` | `expired_facts_not_used / expiry_opportunities` | TTL or time-bounded facts are suppressed after expiry. |
+| `rollback_readback_rate` | `rollback_events_with_readback / rollback_events_expected` | Rollback and prior versions remain auditable. |
+| `history_readback_rate` | `history_events_readable / history_events_expected` | Add, update, ignore, reject, delete, restore, and derived transitions are visible. |
+| `contradiction_resolution_rate` | `contradictions_resolved_to_current_supported_answer / contradiction_opportunities` | Mutually inconsistent memories are resolved with current source support instead of arbitrary retrieval order. |
+
+The denominator must be explicit. A benchmark with no delete jobs must report
+`delete_suppression_rate = not_encoded`, not `1.000`.
+
+## Answer Safety Metrics
+
+| Metric | Formula |
+| --- | --- |
+| `unsupported_claim_rate` | `unsupported_claim_count / answer_claim_count` |
+| `stale_answer_rate` | `answers_using_stale_fact_as_current / answered_jobs` |
+| `hallucinated_evidence_rate` | `citations_not_in_candidate_or_source_set / citation_count` |
+| `redaction_leak_count` | Count of private, excluded, or redacted spans surfaced in public output. |
+| `irrelevant_context_ratio` | `irrelevant_context_items / returned_context_items` |
+| `scope_violation_count` | Count of unreadable cross-scope or grant-violating rows returned. |
 
-## Evidence Classes
+Zero values are meaningful only when the denominator is non-zero and the checked row
+actually exercises the failure mode.
+
+## Operational Metrics
 
-| Evidence class | Meaning |
+| Metric | Required unit |
 | --- | --- |
-| `fixture_backed` | Checked-in fixtures were scored. This is regression evidence, not live product-runtime evidence. |
-| `live_baseline` | Docker live-baseline retrieval or lifecycle evidence exists, but the row is not a real-world product-runtime scoreboard pass. |
-| `live_real_world` | A live adapter executed real-world job paths and emitted typed outcomes. |
-| `research_gate` | Research, source mapping, setup, credential, or resource gates are recorded before fair scoring can run. |
-
-## Row Fields
-
-Each `rows[]` entry MUST include:
-
-- `product_id` and `product_name`.
-- `row_source`: stable source label, such as `elf_report` or
-  `external_adapter_manifest`.
-- `evidence_class`.
-- `result_state`.
-- `comparable`: true only when all comparability gates are satisfied and the row has a
-  pass state with quantitative metrics.
-- comparability gates:
-  - `same_corpus`
-  - `source_id_mapped`
-  - `held_out`
-  - `leakage_audited`
-  - `product_runtime`
-  - `container_digest_identified`
-- `metrics`.
-- `strengths`: product strengths supported by the row source.
-- `weaknesses`: typed weaknesses, blockers, or non-pass evidence from the row source.
-- `next_evidence`: row-level evidence needed before the row can become comparable.
-- `source_provenance`: bounded source pointers to the input report, adapter record, or
-  suite records.
-
-`same_corpus = true` requires positive row evidence that the product or checked-in
-adapter is mapped to the benchmark corpus. A blocker sentence that says same-corpus
-evidence is missing is not sufficient. A typed same-corpus setup-blocker adapter may
-set this gate to true only when its source provenance identifies the intended shared
-benchmark corpus and the remaining blocker is runtime/source-id output, not corpus
-selection.
-
-## Metrics
-
-The `metrics` object MUST include `retrieval`, `lifecycle`, `answer_safety`,
-`operations`, and `coverage` sub-objects.
-
-`retrieval` MUST include:
-
-- `k`.
-- `metric_basis`.
-- `recall_at_k`, `precision_at_k`, `mrr`, and `ndcg`, or `null` when the row lacks
-  ranked produced evidence.
-- `expected_evidence_recall`.
-- `citation_source_ref_coverage`.
-- matched, total, and produced evidence counts.
-
-For `metric_basis = "produced_evidence_order"`, ranked retrieval metrics use the
-ordered `produced_evidence` list in the scored job output as the retrieved list.
-Expected evidence ids are the relevance set. Relevance is binary. `recall_at_k` and
-`precision_at_k` use the first `k` produced evidence ids. MRR is reciprocal rank of
-the first relevant produced evidence id. nDCG uses binary gains with the ideal DCG
-bounded by `min(k, expected_evidence_total)`.
-
-`lifecycle` MUST include:
-
-- stale suppression rate and counts.
-- update correctness rate and counts.
-- delete correctness rate and counts.
-- rollback/history readback rate and counts.
-
-`answer_safety` MUST include:
-
-- unsupported-claim rate and count.
-- stale-answer rate and count.
-- hallucinated-evidence rate when measurable.
-- redaction leak count.
-- irrelevant-context ratio.
-
-`operations` MUST include:
-
-- mean latency in milliseconds when measured.
-- total cost when cost accounting exists.
-- resource-envelope status, encoded job count, and pass count.
-
-`coverage` MUST include:
-
-- job count.
-- encoded suite count.
-- pass count.
-- typed non-pass count.
-- source-ref coverage.
-- evidence coverage.
-- evidence class.
-
-## Comparability Rules
-
-A row is comparable only when all of the following are true:
-
-- `same_corpus = true`.
-- `source_id_mapped = true`.
-- `held_out = true`.
-- `leakage_audited = true`.
-- `product_runtime = true`.
-- `container_digest_identified = true`.
-- `result_state = "pass"`.
-- `recall_at_k`, `precision_at_k`, `mrr`, and `ndcg` are present.
-
-If any required gate is false, the report MUST set `comparable = false`, add a
-specific `next_evidence` entry for each missing gate, and avoid any win, parity, or
-rank claim for that row. If an otherwise passing row is missing a required gate, the
-public row state SHOULD be `not_comparable` so the report is explicit about the
-reason no product-runtime comparison claim is allowed.
-
-## Report Claim Rules
-
-- A row with `fixture_backed`, `live_baseline`, or `research_gate` evidence MUST NOT
-  be described as a comparable product-runtime pass.
-- A row with `blocked`, `incomplete`, `not_tested`, `not_encoded`, `not_comparable`,
-  or `unsupported_claim` MUST remain visible as a non-pass row.
-- External competitors MUST have either comparable product-runtime evidence or an
-  explicit typed non-pass/blocker row with source provenance.
-- Missing Docker image digest evidence is a blocker for comparability, even if a live
-  adapter executed.
-- Public-proxy, fixture-only, local-mock, diagnostic, blocked, and not-encoded rows
-  MUST NOT be promoted into universal product superiority claims.
-- Optimization direction MUST be tied to row-level `next_evidence`, metrics, or typed
-  non-pass states.
+| `ingestion_success_rate` | successful ingested records / records submitted |
+| `indexing_coverage` | indexed records or spans / ingestible records or spans |
+| `source_id_mapping_coverage` | returned candidates or generated claims mapped to benchmark source ids / candidates or claims requiring mapping |
+| `query_latency_p50_ms`, `query_latency_p95_ms`, `query_latency_p99_ms` | milliseconds |
+| `ingest_latency_ms` | milliseconds from submitted source to durable ingest acknowledgement |
+| `update_propagation_latency_ms` | milliseconds from write/apply/delete to searchable/readable effect |
+| `cold_start_recovery_seconds` | seconds |
+| `restore_seconds` | seconds |
+| `index_rebuild_seconds` | seconds |
+| `cost_usd` | USD with input/output token counts where applicable |
+| `available_context_token_count` | tokens available in the source corpus or memory store for the query |
+| `answer_context_token_count` | tokens supplied to the answering model or final answer context |
+| `context_token_efficiency` | `answer_context_token_count / available_context_token_count` |
+| `resource_envelope_status` | pass, blocked, incomplete, not_encoded |
+
+Provider-backed rows must include model/provider identifiers or must remain
+`not_comparable`. Fixture zero-cost rows must not imply hosted provider cost.
+
+## Quantitative Scoreboard Schema
+
+Reports that implement this spec must emit:
+
+```json
+{
+  "schema": "elf.agent_memory_quantitative_benchmark/v1",
+  "generated_at": "...",
+  "corpus_id": "...",
+  "k_values": [1, 3, 5, 10],
+  "rows": [
+    {
+      "product": "ELF",
+      "adapter_id": "elf_live_real_world",
+      "adapter_name": "ELF live real-world",
+      "suite": "memory_evolution",
+      "evidence_class": "live_real_world",
+      "result_state": "pass",
+      "comparable": true,
+      "metric_comparable": true,
+      "leaderboard_eligible": false,
+      "held_out": false,
+      "leakage_audited": false,
+      "audit_manifest_id": null,
+      "fixture_regression_only": false,
+      "sample_size": 40,
+      "ranking_query_count": 40,
+      "ranking_coverage_state": "measured",
+      "ranked_candidate_source": "runtime_trace",
+      "qrel_source": "explicit_qrels",
+      "explicit_qrel_query_count": 40,
+      "metrics": {
+        "recall_at_5": 1.0,
+        "precision_at_5": 0.6,
+        "mrr": 1.0,
+        "ndcg_at_5": 1.0,
+        "map": 1.0,
+        "average_precision": 1.0,
+        "success_at_5": 1.0,
+        "explicit_qrel_query_coverage": 1.0,
+        "relevance_judgment_count": 80,
+        "relevance_grade_sum": 160,
+        "update_correctness_rate": 1.0,
+        "stale_suppression_rate": 1.0,
+        "delete_suppression_rate": 1.0,
+        "expected_evidence_recall": 1.0,
+        "unsupported_claim_rate": 0.0,
+        "stale_answer_rate": 0.0
+      },
+      "metric_states": {
+        "recall_at_5": "measured",
+        "precision_at_5": "measured",
+        "mrr": "measured",
+        "ndcg_at_5": "measured",
+        "average_precision": "measured",
+        "map": "measured",
+        "success_at_5": "measured"
+      },
+      "denominators": {
+        "recall_at_5": 80,
+        "precision_at_5": 200,
+        "map": 40,
+        "success_at_5": 40,
+        "update_correctness_rate": 2,
+        "delete_suppression_rate": 1,
+        "stale_answer_rate": 40
+      },
+      "confidence_intervals": {
+        "recall_at_5": {
+          "method": "wilson_score",
+          "confidence": 0.95,
+          "lower": 0.954,
+          "upper": 1.0,
+          "numerator": 80,
+          "denominator": 80
+        }
+      },
+      "claim_boundary": "Comparable only against same-corpus live_real_world rows."
+    }
+  ],
+  "per_query_rows": [
+    {
+      "job_id": "memory-evolution-001",
+      "suite": "memory_evolution",
+      "evidence_class": "live_real_world",
+      "result_state": "pass",
+      "expected_relevant_count": 2,
+      "candidate_count": 8,
+      "qrel_source": "explicit_qrels",
+      "relevance_grade_sum": 4.0,
+      "product": "ELF",
+      "adapter_id": "elf_live_real_world",
+      "metrics": {
+        "recall_at_5": 1.0,
+        "precision_at_5": 0.4,
+        "mrr": 1.0,
+        "ndcg_at_5": 1.0,
+        "average_precision": 1.0,
+        "success_at_5": 1.0
+      },
+      "metric_states": {
+        "recall_at_5": "measured",
+        "precision_at_5": "measured",
+        "mrr": "measured",
+        "ndcg_at_5": "measured",
+        "average_precision": "measured",
+        "success_at_5": "measured"
+      },
+      "denominators": {
+        "recall_at_5": 2,
+        "precision_at_5": 5,
+        "mrr": 1,
+        "ndcg_at_5": 1,
+        "average_precision": 1,
+        "success_at_5": 1
+      }
+    }
+  ],
+  "ablation_rows": [
+    {
+      "product": "ELF",
+      "adapter_id": "elf_live_real_world",
+      "ablation_id": "raw_vector",
+      "job_id": "memory-evolution-001",
+      "suite": "memory_evolution",
+      "evidence_class": "live_real_world",
+      "result_state": "pass",
+      "candidate_source": "runtime_trace_ablation",
+      "qrel_source": "explicit_qrels",
+      "expected_relevant_count": 2,
+      "candidate_count": 8,
+      "metrics": {
+        "recall_at_5": 0.5,
+        "precision_at_5": 0.2,
+        "mrr": 0.5,
+        "ndcg_at_5": 0.62,
+        "average_precision": 0.5,
+        "success_at_5": 1.0
+      },
+      "metric_states": {
+        "recall_at_5": "measured",
+        "precision_at_5": "measured",
+        "mrr": "measured",
+        "ndcg_at_5": "measured",
+        "average_precision": "measured",
+        "success_at_5": "measured"
+      },
+      "denominators": {
+        "recall_at_5": 2,
+        "precision_at_5": 5,
+        "mrr": 1,
+        "ndcg_at_5": 1,
+        "average_precision": 1,
+        "success_at_5": 1
+      },
+      "claim_boundary": "Ablation rows score explicitly supplied candidate orderings for diagnosis; they are not separate product-runtime rows unless the evidence class and candidate source say so."
+    }
+  ],
+  "significance": {
+    "method": "exact_two_sided_sign_test_on_same_query_metric_deltas",
+    "state": "not_encoded_single_product_row",
+    "eligible": false,
+    "minimum_paired_query_count": 30,
+    "comparable_product_row_count": 1,
+    "paired_query_count": 0,
+    "comparisons": [],
+    "ablation_comparisons": [
+      {
+        "comparison_scope": "ablation",
+        "baseline_id": "raw_vector",
+        "candidate_id": "governed_memory",
+        "baseline_product": "raw_vector",
+        "candidate_product": "governed_memory",
+        "metric": "ndcg_at_5",
+        "paired_query_count": 1,
+        "state": "measured",
+        "effect_mean": 0.311,
+        "p_value": 1.0,
+        "win_count": 1,
+        "loss_count": 0,
+        "tie_count": 0
+      }
+    ],
+    "claim_boundary": "Pairwise wins require at least two leaderboard-eligible rows with same-query per-query metrics; otherwise p-values and win claims stay not encoded."
+  },
+  "leakage_audit": {
+    "state": "not_leaderboard_eligible",
+    "held_out": false,
+    "leakage_audited": false,
+    "corpus_profile": "synthetic",
+    "evidence_class": "fixture_backed",
+    "qrel_source": "explicit_qrels",
+    "fixture_regression_only": true,
+    "ranking_coverage_state": "partial_coverage",
+    "leaderboard_blocking_reasons": [
+      "fixture_regression_only",
+      "insufficient_query_count",
+      "no_held_out_manifest",
+      "no_leakage_audit_manifest",
+      "not_live_real_world",
+      "ranking_coverage_not_measured"
+    ],
+    "claim_boundary": "Held-out and leakage-audit fields are explicit gates; fixture or non-audited rows cannot become public leaderboard evidence by omission."
+  },
+  "non_comparable_rows": [
+    {
+      "product": "VectifyAI PageIndex",
+      "adapter_id": "pageindex_public_proxy_contract",
+      "result_state": "not_comparable",
+      "reason": "public_proxy evidence class; no PageIndex product runtime output"
+    }
+  ],
+  "controls": {
+    "same_corpus_required": true,
+    "same_task_required": true,
+    "same_evidence_class_required": true,
+    "same_budget_required": true,
+    "ranked_candidates_required_for_ranking_metrics": true,
+    "raw_ranked_candidate_artifacts_required": true,
+    "held_out_or_leakage_audited_required": true,
+    "explicit_relevance_judgments_required_for_leaderboard": true,
+    "per_query_rows_required_for_significance": true,
+    "minimum_query_count_for_leaderboard": 30,
+    "current_query_count": 40,
+    "current_ranking_query_count": 40,
+    "current_explicit_qrel_query_count": 40,
+    "comparable_product_row_count": 1,
+    "leaderboard_claim_allowed": false,
+    "statistical_significance": "not_encoded_until_at_least_two_same-corpus comparable product rows meet minimum query count, full ranking coverage, and explicit qrels",
+    "uncertainty_reporting": "single-row rates include Wilson 95% confidence intervals; competitor win claims require same-query paired significance over per-query rows.",
+    "leakage_control": "fixture rows are not public leaderboard proof; current product leaderboard rows require held-out and leakage-audited status plus an audit manifest id."
+  }
+}
+```
+
+## External Product Row Import
+
+`real_world_job_benchmark run` may accept an optional
+`--quantitative-product-manifest` file when a competitor adapter has already
+materialized same-corpus product-runtime rows outside the current ELF fixture run.
+The manifest schema is `elf.agent_memory_quantitative_product_manifest/v1`.
+Generated reports infer the quantitative row `product` from the external adapter
+manifest entry matching `--adapter-id`, with `--product` available only as an
+explicit override for old or ad hoc reports.
+
+Use `real_world_job_benchmark export-quantitative-product-manifest --report
+<report.json>` to derive this manifest from a generated `elf.real_world_job_report/v1`
+instead of hand-writing metric rows. The export command copies the report's primary
+aggregate row and matching per-query rows, rejects `ELF` self rows, and then runs
+the same manifest validation used by import. The live qmd adapter sweep writes
+`qmd-quantitative-product-manifest.json` and a combined
+`elf-qmd-quantitative-report.json` so the same-corpus qmd row is visible in
+`quantitative_scoreboard.rows` when fresh live artifacts exist.
+
+```json
+{
+  "schema": "elf.agent_memory_quantitative_product_manifest/v1",
+  "manifest_id": "qmd-live-real-world-2026-06-23",
+  "corpus_id": "...same value as quantitative_scoreboard.corpus_id...",
+  "rows": [
+    {
+      "product": "qmd",
+      "adapter_id": "qmd_live_real_world",
+      "held_out": false,
+      "leakage_audited": false,
+      "audit_manifest_id": null,
+      "metrics": {
+        "recall_at_5": 0.75,
+        "ndcg_at_5": 0.601,
+        "average_precision": 0.608
+      },
+      "metric_states": {
+        "recall_at_5": "measured",
+        "ndcg_at_5": "measured",
+        "average_precision": "measured"
+      }
+    }
+  ],
+  "per_query_rows": [
+    {
+      "product": "qmd",
+      "adapter_id": "qmd_live_real_world",
+      "job_id": "...",
+      "metrics": {
+        "recall_at_5": 0.75,
+        "ndcg_at_5": 0.601,
+        "average_precision": 0.608
+      },
+      "metric_states": {
+        "recall_at_5": "measured",
+        "ndcg_at_5": "measured",
+        "average_precision": "measured"
+      }
+    }
+  ]
+}
+```
+
+The runner must reject imported rows unless:
+
+- the manifest `corpus_id` exactly matches the current scoreboard `corpus_id`
+- each `(product, adapter_id)` matches an external adapter manifest record
+- the product is not `ELF`
+- aggregate rows and per-query rows carry the paired-comparison metrics
+  `recall_at_5`, `ndcg_at_5`, and `average_precision`
+- ranked aggregate rows have at least `ranking_query_count` matching per-query rows
+
+Imported rows replace the matching `non_comparable_rows` entry, but they do not
+automatically authorize leaderboard claims. A row marked `leaderboard_eligible`
+must also be product-runtime evidence with `result_state = pass`, minimum ranked
+query coverage, `ranked_candidate_source = runtime_trace`, `qrel_source =
+explicit_qrels`, enough explicit qrels for every ranked query, `held_out = true`,
+`leakage_audited = true`, and a non-empty `audit_manifest_id`. The current runner
+requires both held-out and leakage-audit fields, plus an audit manifest id, before
+an imported product row can remain marked leaderboard eligible. This keeps
+hand-written, public-proxy, or non-audited rows from becoming hidden wins.
+
+## Minimum Rows For P6
+
+The first implementation issue after this spec must produce a machine-readable
+`quantitative_scoreboard` from `real_world_job_benchmark`. The initial runner row may
+calculate ranking metrics only when the fixture or adapter emits explicit
+`ranked_candidate_evidence_ids`; otherwise it must mark those metrics
+`not_encoded`. If only a subset of queries emits ranked candidates, ranking metrics
+must use `partial_coverage` and must not make the row leaderboard eligible. It must
+publish metric states, denominators, sample size, ranked query count, per-query rows,
+explicit-qrel coverage, qrel source, Wilson 95% intervals for measured or partial
+rate metrics, ablation rows for explicitly supplied candidate orderings, diagnostic
+ablation pairwise comparisons with exact two-sided sign-test p-values,
+paired-significance gating state for product rows, held-out/leakage audit state, and
+controls so missing rows cannot become hidden wins. The runner may also import
+same-corpus external quantitative product rows through
+`elf.agent_memory_quantitative_product_manifest/v1`; this is an adapter artifact
+boundary, not a manual scoring exemption. It must also keep unimplemented but
+required production-memory measures visible as `not_encoded`, including source-id
+mapping coverage, ingestion/indexing coverage, contradiction resolution,
+propagation latency, and context-token efficiency.
+
+The full P6 scoreboard must produce rows for:
+
+- ELF fixture-backed memory authority and knowledge workspace jobs.
+- ELF live-real-world retrieval and memory-evolution jobs where artifacts exist.
+- qmd live-real-world retrieval/debug rows where artifacts exist.
+- mem0/OpenMemory local SDK history/export rows where artifacts exist.
+- Honcho rows as typed same-corpus blockers plus `research_gate`/`not_comparable`
+  external-adapter rows until peer/session outputs, background reasoning artifacts,
+  source-id mapped search/chat/context results, and token/context efficiency
+  measures exist for the same corpus.
+- PageIndex/OpenKB rows as `blocked` or `not_comparable` until actual product
+  artifacts exist.
+- Letta, OpenViking, Graphiti/Zep, RAGFlow, GraphRAG, and LightRAG rows as
+  `blocked`, `not_encoded`, or `not_comparable` unless same-corpus product artifacts
+  are checked in.
+
+## Research Alignment
+
+This benchmark contract is aligned with established retrieval and memory-evaluation
+practice, but it is not itself a public leaderboard until the controls permit one:
+
+- BEIR-style retrieval evaluation requires a shared corpus/query/qrels format and
+  rank-aware metrics such as nDCG@k, MAP, and success@k for comparable retrieval
+  claims.
+- RAGAS-style RAG evaluation separates retrieval context recall/precision from
+  answer faithfulness and response quality.
+- LoCoMo-style memory evaluation shows that long-term memory requires temporal,
+  multi-session, summarization, and event-grounded reasoning slices, not only
+  single-turn retrieval.
+- Production memory comparisons must report token/cost/latency budgets; Mem0's
+  public benchmark framing treats accuracy, token cost, and latency as coupled
+  production dimensions.
+- Honcho's public docs and benchmark materials position it as reasoning-first
+  memory with peer/session representations, background reasoning/dreaming, LongMem,
+  LoCoMo, BEAM, and token-efficiency framing. ELF must treat those as required
+  benchmark surfaces, not as same-corpus product results, until a Honcho adapter
+  emits source-id mapped artifacts on the benchmark corpus.
+- Scientific comparison requires held-out and leakage-audited corpora with audit
+  manifest ids, explicit qrels, raw per-query rows, repeated or paired comparable
+  runs, confidence intervals for single-row estimates, and paired product-row
+  significance tests before a leaderboard claim is allowed. Ablation pairwise tests
+  are diagnostic optimization evidence, not product leaderboard evidence.
+
+## Claim Boundaries
+
+Allowed:
+
+- "ELF has measured evidence recall, source-ref coverage, stale suppression, and
+  update/delete correctness for the rows shown."
+- "Product X is not comparable on metric Y because evidence class, corpus, or
+  product artifact coverage differs."
+- "Product X beats ELF on metric Y" only when both rows are same-corpus,
+  same-evidence-class, same-task, and comparable.
+
+Not allowed:
+
+- A fixture-backed pass cannot beat a provider-backed or product-runtime row.
+- A public-proxy pass cannot prove PageIndex, OpenKB, hosted memory, provider-backed,
+  or private-corpus product quality.
+- A missing denominator cannot be reported as `1.000`.
+- A `blocked`, `not_encoded`, or `not_comparable` row cannot become a win by omission.
diff --git a/makefiles/benchmark-core.toml b/makefiles/benchmark-core.toml
index 02c94349..55243485 100644
--- a/makefiles/benchmark-core.toml
+++ b/makefiles/benchmark-core.toml
@@ -1,95 +1,8 @@
-# Rust workspace tasks: Benchmark core, baseline, and operator tasks.
-
-# Rust workspace tasks: Benchmark.
-
-# Benchmark
-# | task                                       | type      | cwd |
-# | ------------------------------------------ | --------- | --- |
-# | baseline-backfill-100k-docker              | command   |     |
-# | baseline-backfill-10k-docker               | command   |     |
-# | baseline-backfill-docker                   | command   |     |
-# | baseline-live-docker                       | command   |     |
-# | baseline-live-report                       | command   |     |
-# | baseline-production-private                | command   |     |
-# | baseline-production-private-addendum       | command   |     |
-# | baseline-production-synthetic              | command   |     |
-# | baseline-soak-docker                       | command   |     |
-# | local-agent-loop                           | command   |     |
-# | openmemory-ui-export-readback              | command   |     |
-# | parity-docker                              | command   |     |
-# | real-world-first-generation-oss            | composite |     |
-# | real-world-first-generation-oss-json       | command   |     |
-# | real-world-first-generation-oss-report     | command   |     |
-# | real-world-job-operator-ux                 | composite |     |
-# | real-world-job-operator-ux-json            | command   |     |
-# | real-world-job-operator-ux-live-adapters   | command   |     |
-# | real-world-job-operator-ux-report          | command   |     |
-# | real-world-memory                          | composite |     |
-# | real-world-memory-adversarial-quality      | composite |     |
-# | real-world-memory-adversarial-quality-json | command   |     |
-# | real-world-memory-adversarial-quality-report | command |     |
-# | real-world-memory-consolidation            | composite |     |
-# | real-world-memory-consolidation-json       | command   |     |
-# | real-world-memory-consolidation-report     | command   |     |
-# | real-world-memory-p1-closeout              | composite |     |
-# | real-world-memory-p1-closeout-json         | command   |     |
-# | real-world-memory-p1-closeout-report       | command   |     |
-# | real-world-memory-p4-production-readiness  | composite |     |
-# | real-world-memory-p4-production-readiness-json | command |     |
-# | real-world-memory-p4-production-readiness-report | command |     |
-# | real-world-memory-p4-quality-hardening-closeout | composite |     |
-# | real-world-memory-p2-knowledge-closeout    | composite |     |
-# | real-world-memory-core-archival            | composite |     |
-# | real-world-memory-core-archival-json       | command   |     |
-# | real-world-memory-core-archival-report     | command   |     |
-# | real-world-memory-context-trajectory       | composite |     |
-# | real-world-memory-context-trajectory-json  | command   |     |
-# | real-world-memory-context-trajectory-report | command   |     |
-# | real-world-memory-evolution                | composite |     |
-# | real-world-memory-evolution-json           | command   |     |
-# | real-world-memory-evolution-report         | command   |     |
-# | real-world-memory-graph-rag                | composite |     |
-# | real-world-memory-graph-rag-json           | command   |     |
-# | real-world-memory-graph-rag-report         | command   |     |
-# | real-world-memory-json                     | command   |     |
-# | real-world-memory-knowledge                | composite |     |
-# | real-world-memory-knowledge-json           | command   |     |
-# | real-world-memory-knowledge-report         | command   |     |
-# | real-world-memory-live-adapters            | command   |     |
-# | real-world-memory-live-consolidation       | command   |     |
-# | real-world-memory-live-knowledge           | command   |     |
-# | real-world-memory-mem0-openmemory-letta    | composite |     |
-# | real-world-memory-mem0-openmemory-letta-json | command |     |
-# | real-world-memory-mem0-openmemory-letta-report | command |     |
-# | real-world-memory-pageindex-openkb         | composite |     |
-# | real-world-memory-pageindex-openkb-json    | command   |     |
-# | real-world-memory-pageindex-openkb-report  | command   |     |
-# | real-world-memory-proactive-brief          | composite |     |
-# | real-world-memory-proactive-brief-json     | command   |     |
-# | real-world-memory-proactive-brief-report   | command   |     |
-# | real-world-memory-production-ops           | composite |     |
-# | real-world-memory-production-ops-json      | command   |     |
-# | real-world-memory-production-ops-report    | command   |     |
-# | real-world-memory-project-decisions        | composite |     |
-# | real-world-memory-project-decisions-json   | command   |     |
-# | real-world-memory-project-decisions-report | command   |     |
-# | real-world-memory-quantitative-scoreboard  | composite |     |
-# | real-world-memory-quantitative-scoreboard-json | command |     |
-# | real-world-memory-quantitative-scoreboard-report | command |     |
-# | real-world-memory-report                   | command   |     |
-# | real-world-memory-retrieval                | composite |     |
-# | real-world-memory-retrieval-json           | command   |     |
-# | real-world-memory-retrieval-report         | command   |     |
-# | real-world-memory-scheduled                | composite |     |
-# | real-world-memory-scheduled-json           | command   |     |
-# | real-world-memory-scheduled-report         | command   |     |
-# | real-world-memory-service-native-dreaming  | command   |     |
-# | real-world-memory-summary                  | composite |     |
-# | real-world-memory-summary-json             | command   |     |
-# | real-world-memory-summary-report           | command   |     |
-# | real-world-memory-work-continuity          | composite |     |
-# | real-world-memory-work-continuity-json     | command   |     |
-# | real-world-memory-work-continuity-report   | command   |     |
+# Rust workspace tasks: benchmark core, baseline, and operator commands.
+#
+# Keep long task listings out of comments. `cargo make --list-all-steps` is the
+# source for the complete task index, while this file owns only non-sharded
+# benchmark commands.
 
 [tasks.baseline-backfill-100k-docker]
 workspace = false
diff --git a/makefiles/benchmark-memory-a.toml b/makefiles/benchmark-memory-a.toml
index a7063ca4..3f09c7d4 100644
--- a/makefiles/benchmark-memory-a.toml
+++ b/makefiles/benchmark-memory-a.toml
@@ -1,4 +1,4 @@
-# Rust workspace tasks: Benchmark real-world memory tasks, first half.
+# Rust workspace tasks: real-world memory benchmark fixtures A-G.
 
 [tasks.real-world-memory]
 workspace = false
@@ -364,6 +364,13 @@ args = [
 	"tmp/real-world-memory/evolution-report.md",
 ]
 
+[tasks.real-world-memory-explicit-qrels]
+workspace = false
+command = "bash"
+args = [
+	"scripts/real-world-explicit-qrels.sh",
+]
+
 [tasks.real-world-memory-graph-rag]
 workspace = false
 dependencies = [
diff --git a/makefiles/benchmark-memory-b.toml b/makefiles/benchmark-memory-b.toml
index 8657bb36..3b47da39 100644
--- a/makefiles/benchmark-memory-b.toml
+++ b/makefiles/benchmark-memory-b.toml
@@ -1,4 +1,4 @@
-# Rust workspace tasks: Benchmark real-world memory tasks, second half.
+# Rust workspace tasks: real-world memory benchmark fixtures K-W and aggregate runners.
 
 [tasks.real-world-memory-json]
 workspace = false
@@ -251,6 +251,14 @@ args = [
 	"memory-live-consolidation",
 ]
 
+[tasks.real-world-memory-live-explicit-qrels]
+workspace = false
+command = "bash"
+args = [
+	"scripts/real-world-docker.sh",
+	"memory-live-explicit-qrels",
+]
+
 [tasks.real-world-memory-live-knowledge]
 workspace = false
 command = "bash"
@@ -678,12 +686,3 @@ args = [
 	"--out",
 	"tmp/real-world-memory/memory-summary/report.md",
 ]
-
-# Check
-# | task             | type      | cwd |
-# | ---------------- | --------- | --- |
-# | check            | composite |     |
-# | check-docs       | command   |     |
-# | check-rust       | command   |     |
-# | check-trace-gate | command   |     |
-# | checks           | composite |     |
diff --git a/makefiles/check.toml b/makefiles/check.toml
index 5756ac55..c6ab6569 100644
--- a/makefiles/check.toml
+++ b/makefiles/check.toml
@@ -1,14 +1,5 @@
 # Rust workspace tasks: Check.
 
-# Check
-# | task             | type      | cwd |
-# | ---------------- | --------- | --- |
-# | check            | composite |     |
-# | check-docs       | command   |     |
-# | check-rust       | command   |     |
-# | check-trace-gate | command   |     |
-# | checks           | composite |     |
-
 [tasks.check]
 clear = true
 workspace = false
@@ -43,15 +34,3 @@ command = "bash"
 args = [
 	"scripts/trace-gate.sh",
 ]
-
-[tasks.checks]
-workspace = false
-dependencies = [
-	"check",
-]
-
-# Clean
-# | task                       | type    | cwd |
-# | -------------------------- | ------- | --- |
-# | clean-baseline-live-docker | command |     |
-# | clean-parity-docker        | command |     |
diff --git a/makefiles/clean.toml b/makefiles/clean.toml
index 7fc71c62..bf899af0 100644
--- a/makefiles/clean.toml
+++ b/makefiles/clean.toml
@@ -1,11 +1,5 @@
 # Rust workspace tasks: Clean.
 
-# Clean
-# | task                       | type    | cwd |
-# | -------------------------- | ------- | --- |
-# | clean-baseline-live-docker | command |     |
-# | clean-parity-docker        | command |     |
-
 [tasks.clean-baseline-live-docker]
 workspace = false
 command = "docker"
@@ -29,13 +23,3 @@ args = [
 	"-v",
 	"--remove-orphans",
 ]
-
-# Format
-# | task           | type      | cwd |
-# | -------------- | --------- | --- |
-# | fmt            | composite |     |
-# | fmt-check      | composite |     |
-# | fmt-rust       | command   |     |
-# | fmt-rust-check | extend    |     |
-# | fmt-toml       | command   |     |
-# | fmt-toml-check | extend    |     |
diff --git a/makefiles/format.toml b/makefiles/format.toml
index e214c216..8046cfb9 100644
--- a/makefiles/format.toml
+++ b/makefiles/format.toml
@@ -1,15 +1,5 @@
 # Rust workspace tasks: Format.
 
-# Format
-# | task           | type      | cwd |
-# | -------------- | --------- | --- |
-# | fmt            | composite |     |
-# | fmt-check      | composite |     |
-# | fmt-rust       | command   |     |
-# | fmt-rust-check | extend    |     |
-# | fmt-toml       | command   |     |
-# | fmt-toml-check | extend    |     |
-
 [tasks.fmt]
 workspace = false
 dependencies = [
@@ -45,10 +35,3 @@ args = [
 	"fmt",
 	"--check",
 ]
-
-# Lint
-# | task        | type      | cwd |
-# | ----------- | --------- | --- |
-# | lint        | composite |     |
-# | lint-rust   | command   |     |
-# | lint-vstyle | command   |     |
diff --git a/makefiles/lint-fix.toml b/makefiles/lint-fix.toml
index 5aada462..aa2f8a4f 100644
--- a/makefiles/lint-fix.toml
+++ b/makefiles/lint-fix.toml
@@ -1,12 +1,5 @@
 # Rust workspace tasks: Lint Fix.
 
-# Lint Fix
-# | task            | type      | cwd |
-# | --------------- | --------- | --- |
-# | lint-fix        | composite |     |
-# | lint-fix-rust   | command   |     |
-# | lint-fix-vstyle | command   |     |
-
 [tasks.lint-fix]
 workspace = false
 dependencies = [
@@ -55,15 +48,3 @@ args = [
 	"--all-features",
 	"--strict",
 ]
-
-# Research
-# | task                                    | type      | cwd |
-# | --------------------------------------- | --------- | --- |
-# | external-memory-radar                   | command   |     |
-# | external-memory-radar-artifact          | composite |     |
-# | external-memory-radar-artifact-json     | command   |     |
-# | external-memory-radar-artifact-validate | command   |     |
-# | external-memory-radar-dry-run           | composite |     |
-# | external-memory-radar-dry-run-json      | command   |     |
-# | external-memory-radar-dry-run-validate  | command   |     |
-# | external-memory-radar-validate          | command   |     |
diff --git a/makefiles/lint.toml b/makefiles/lint.toml
index 1cedd668..a09517af 100644
--- a/makefiles/lint.toml
+++ b/makefiles/lint.toml
@@ -1,12 +1,5 @@
 # Rust workspace tasks: Lint.
 
-# Lint
-# | task        | type      | cwd |
-# | ----------- | --------- | --- |
-# | lint        | composite |     |
-# | lint-rust   | command   |     |
-# | lint-vstyle | command   |     |
-
 [tasks.lint]
 workspace = false
 dependencies = [
@@ -52,10 +45,3 @@ args = [
 	"--workspace",
 	"--all-features",
 ]
-
-# Lint Fix
-# | task            | type      | cwd |
-# | --------------- | --------- | --- |
-# | lint-fix        | composite |     |
-# | lint-fix-rust   | command   |     |
-# | lint-fix-vstyle | command   |     |
diff --git a/makefiles/research.toml b/makefiles/research.toml
index 1c9db279..45b5770c 100644
--- a/makefiles/research.toml
+++ b/makefiles/research.toml
@@ -1,17 +1,5 @@
 # Rust workspace tasks: Research.
 
-# Research
-# | task                                    | type      | cwd |
-# | --------------------------------------- | --------- | --- |
-# | external-memory-radar                   | command   |     |
-# | external-memory-radar-artifact          | composite |     |
-# | external-memory-radar-artifact-json     | command   |     |
-# | external-memory-radar-artifact-validate | command   |     |
-# | external-memory-radar-dry-run           | composite |     |
-# | external-memory-radar-dry-run-json      | command   |     |
-# | external-memory-radar-dry-run-validate  | command   |     |
-# | external-memory-radar-validate          | command   |     |
-
 [tasks.external-memory-radar]
 workspace = false
 command = "cargo"
@@ -127,16 +115,3 @@ args = [
 	"--cursor",
 	"apps/elf-eval/fixtures/external_memory_pattern_radar/cursor.json",
 ]
-
-# Smoke
-# | task                               | type      | cwd |
-# | ---------------------------------- | --------- | --- |
-# | smoke-graphify-docker-graph-report | command   |     |
-# | smoke-graphiti-zep-docker-temporal | command   |     |
-# | smoke-graphrag-docker              | command   |     |
-# | smoke-letta-core-archive-export-readback | command   |     |
-# | smoke-lightrag-docker-context      | command   |     |
-# | smoke-ragflow-docker               | command   |     |
-# | smoke-real-world-job               | composite |     |
-# | smoke-real-world-job-json          | command   |     |
-# | smoke-real-world-job-report        | command   |     |
diff --git a/makefiles/smoke.toml b/makefiles/smoke.toml
index 88c4e494..43b9874d 100644
--- a/makefiles/smoke.toml
+++ b/makefiles/smoke.toml
@@ -1,18 +1,5 @@
 # Rust workspace tasks: Smoke.
 
-# Smoke
-# | task                               | type      | cwd |
-# | ---------------------------------- | --------- | --- |
-# | smoke-graphify-docker-graph-report | command   |     |
-# | smoke-graphiti-zep-docker-temporal | command   |     |
-# | smoke-graphrag-docker              | command   |     |
-# | smoke-letta-core-archive-export-readback | command   |     |
-# | smoke-lightrag-docker-context      | command   |     |
-# | smoke-ragflow-docker               | command   |     |
-# | smoke-real-world-job               | composite |     |
-# | smoke-real-world-job-json          | command   |     |
-# | smoke-real-world-job-report        | command   |     |
-
 [tasks.smoke-graphify-docker-graph-report]
 workspace = false
 command = "bash"
@@ -102,12 +89,3 @@ args = [
 	"--out",
 	"tmp/real-world-job/real-world-job-smoke-report.md",
 ]
-
-# Test
-# | task                  | type      | cwd |
-# | --------------------- | --------- | --- |
-# | test                  | composite |     |
-# | test-e2e              | command   |     |
-# | test-rust             | command   |     |
-# | test-rust-all         | command   |     |
-# | test-rust-integration | command   |     |
diff --git a/makefiles/test.toml b/makefiles/test.toml
index 4245ab58..9ee899d8 100644
--- a/makefiles/test.toml
+++ b/makefiles/test.toml
@@ -1,14 +1,5 @@
 # Rust workspace tasks: Test.
 
-# Test
-# | task                  | type      | cwd |
-# | --------------------- | --------- | --- |
-# | test                  | composite |     |
-# | test-e2e              | command   |     |
-# | test-rust             | command   |     |
-# | test-rust-all         | command   |     |
-# | test-rust-integration | command   |     |
-
 [tasks.test]
 clear = true
 workspace = false
diff --git a/scripts/materialize-explicit-qrels.py b/scripts/materialize-explicit-qrels.py
new file mode 100755
index 00000000..779abd2f
--- /dev/null
+++ b/scripts/materialize-explicit-qrels.py
@@ -0,0 +1,290 @@
+#!/usr/bin/env python3
+"""Generate explicit relevance-judgment fixtures from real-world job fixtures."""
+
+from __future__ import annotations
+
+import argparse
+import json
+import shutil
+from datetime import UTC, datetime
+from pathlib import Path
+from typing import Any
+
+
+SCHEMA = "elf.real_world_explicit_qrel_materialization/v1"
+JOB_SCHEMA = "elf.real_world_job/v1"
+
+
+def parse_args() -> argparse.Namespace:
+    parser = argparse.ArgumentParser(
+        description=(
+            "Copy real_world_job fixtures and derive expected_answer.relevance_judgments "
+            "from checked-in evidence_links/required_evidence."
+        )
+    )
+    parser.add_argument("--fixtures", required=True, type=Path, help="Input fixture directory.")
+    parser.add_argument("--out-fixtures", required=True, type=Path, help="Generated fixture directory.")
+    parser.add_argument(
+        "--summary-out",
+        required=True,
+        type=Path,
+        help="Write materialization summary JSON.",
+    )
+    parser.add_argument(
+        "--ranked-candidates-source",
+        choices=["none", "oracle"],
+        default="none",
+        help="Optionally add fixture-trace ranked candidates ordered by qrel grade.",
+    )
+    parser.add_argument(
+        "--profile",
+        choices=["preserve", "generated_public"],
+        default="preserve",
+        help="Preserve original corpus profile or mark generated jobs as generated_public.",
+    )
+    parser.add_argument(
+        "--exclude-without-positive-qrels",
+        action="store_true",
+        help="Do not copy job JSON files that have no positive derived qrels.",
+    )
+    parser.add_argument(
+        "--overwrite",
+        action="store_true",
+        help="Replace existing relevance_judgments instead of preserving explicit grades.",
+    )
+
+    return parser.parse_args()
+
+
+def read_json(path: Path) -> Any:
+    with path.open(encoding="utf-8") as fh:
+        return json.load(fh)
+
+
+def write_json(path: Path, value: Any) -> None:
+    path.parent.mkdir(parents=True, exist_ok=True)
+    with path.open("w", encoding="utf-8") as fh:
+        json.dump(value, fh, indent=2, sort_keys=False)
+        fh.write("\n")
+
+
+def stable_unique(values: list[str]) -> list[str]:
+    seen: set[str] = set()
+    result: list[str] = []
+    for value in values:
+        if value and value not in seen:
+            seen.add(value)
+            result.append(value)
+
+    return result
+
+
+def evidence_link_ids(value: Any) -> list[str]:
+    if isinstance(value, str):
+        return [value]
+    if isinstance(value, list):
+        return [item for item in value if isinstance(item, str)]
+
+    return []
+
+
+def corpus_evidence_ids(job: dict[str, Any]) -> list[str]:
+    return [
+        item["evidence_id"]
+        for item in job.get("corpus", {}).get("items", [])
+        if isinstance(item, dict) and isinstance(item.get("evidence_id"), str)
+    ]
+
+
+def derive_positive_grades(job: dict[str, Any]) -> dict[str, float]:
+    grades: dict[str, float] = {}
+    expected = job.get("expected_answer", {})
+
+    for link in expected.get("evidence_links", {}).values():
+        for evidence_id in evidence_link_ids(link):
+            grades[evidence_id] = max(grades.get(evidence_id, 0.0), 2.0)
+
+    for evidence in job.get("required_evidence", []):
+        if isinstance(evidence, dict) and isinstance(evidence.get("evidence_id"), str):
+            grades[evidence["evidence_id"]] = max(grades.get(evidence["evidence_id"], 0.0), 1.0)
+
+    return grades
+
+
+def existing_qrel_grades(job: dict[str, Any]) -> dict[str, float]:
+    grades: dict[str, float] = {}
+    expected = job.get("expected_answer", {})
+    for judgment in expected.get("relevance_judgments", []):
+        if not isinstance(judgment, dict) or not isinstance(judgment.get("evidence_id"), str):
+            continue
+        grade = judgment.get("grade", 1.0)
+        if isinstance(grade, (int, float)):
+            grades[judgment["evidence_id"]] = float(grade)
+
+    return grades
+
+
+def materialized_qrels(job: dict[str, Any], overwrite: bool) -> list[dict[str, Any]]:
+    evidence_ids = corpus_evidence_ids(job)
+    grades = derive_positive_grades(job)
+
+    if not overwrite:
+        grades.update(existing_qrel_grades(job))
+
+    if not any(grade > 0.0 for grade in grades.values()):
+        return []
+
+    return [
+        {"evidence_id": evidence_id, "grade": grades.get(evidence_id, 0.0)}
+        for evidence_id in evidence_ids
+        if evidence_id in grades
+    ]
+
+
+def ranked_candidates_from_qrels(qrels: list[dict[str, Any]]) -> list[str]:
+    return [
+        judgment["evidence_id"]
+        for judgment in sorted(
+            qrels,
+            key=lambda judgment: (
+                -float(judgment.get("grade", 0.0)),
+                str(judgment.get("evidence_id", "")),
+            ),
+        )
+        if judgment.get("evidence_id")
+    ]
+
+
+def add_oracle_ranked_candidates(job: dict[str, Any], qrels: list[dict[str, Any]]) -> bool:
+    answer = job.get("corpus", {}).get("adapter_response", {}).get("answer")
+    if not isinstance(answer, dict):
+        return False
+
+    trace = answer.setdefault("trace_explainability", {})
+    trace["ranked_candidate_evidence_ids"] = ranked_candidates_from_qrels(qrels)
+    trace.setdefault("trace_id", f"{job.get('job_id', 'unknown')}-explicit-qrel-oracle")
+
+    return True
+
+
+def materialize_job(
+    source: Path,
+    target: Path,
+    args: argparse.Namespace,
+) -> dict[str, Any]:
+    job = read_json(source)
+    if not isinstance(job, dict) or job.get("schema") != JOB_SCHEMA:
+        shutil.copy2(source, target)
+        return {"kind": "copied_non_job_json"}
+
+    qrels = materialized_qrels(job, overwrite=args.overwrite)
+    if not qrels and args.exclude_without_positive_qrels:
+        return {
+            "kind": "excluded_without_positive_qrels",
+            "job_id": job.get("job_id"),
+        }
+
+    ranked_candidate_added = False
+    if qrels:
+        expected = job.setdefault("expected_answer", {})
+        had_existing_qrels = bool(expected.get("relevance_judgments"))
+        expected["relevance_judgments"] = qrels
+        tags = stable_unique([*job.get("tags", []), "explicit_qrels_generated"])
+        job["tags"] = tags
+
+        if args.profile == "generated_public":
+            job.setdefault("corpus", {})["profile"] = "generated_public"
+
+        if args.ranked_candidates_source == "oracle":
+            ranked_candidate_added = add_oracle_ranked_candidates(job, qrels)
+
+        write_json(target, job)
+        return {
+            "kind": "materialized_job",
+            "job_id": job.get("job_id"),
+			"judgment_count": len(qrels),
+			"positive_judgment_count": sum(1 for judgment in qrels if judgment["grade"] > 0.0),
+			"zero_grade_judgment_count": sum(1 for judgment in qrels if judgment["grade"] == 0.0),
+			"unjudged_corpus_evidence_count": len(corpus_evidence_ids(job)) - len(qrels),
+			"had_existing_qrels": had_existing_qrels,
+			"ranked_candidate_added": ranked_candidate_added,
+		}
+
+    shutil.copy2(source, target)
+    return {
+        "kind": "copied_without_positive_qrels",
+        "job_id": job.get("job_id"),
+    }
+
+
+def materialize(args: argparse.Namespace) -> dict[str, Any]:
+    if not args.fixtures.is_dir():
+        raise SystemExit(f"{args.fixtures} is not a directory")
+
+    if args.out_fixtures.exists():
+        shutil.rmtree(args.out_fixtures)
+    args.out_fixtures.mkdir(parents=True)
+
+    records: list[dict[str, Any]] = []
+    for source in sorted(args.fixtures.rglob("*")):
+        rel = source.relative_to(args.fixtures)
+        target = args.out_fixtures / rel
+        if source.is_dir():
+            target.mkdir(parents=True, exist_ok=True)
+            continue
+        if source.suffix == ".json":
+            records.append(materialize_job(source, target, args))
+        else:
+            target.parent.mkdir(parents=True, exist_ok=True)
+            shutil.copy2(source, target)
+
+    materialized = [record for record in records if record["kind"] == "materialized_job"]
+    excluded = [record for record in records if record["kind"] == "excluded_without_positive_qrels"]
+
+    summary = {
+        "schema": SCHEMA,
+        "generated_at": datetime.now(UTC).isoformat().replace("+00:00", "Z"),
+        "input_fixture_dir": str(args.fixtures),
+        "output_fixture_dir": str(args.out_fixtures),
+        "ranked_candidates_source": args.ranked_candidates_source,
+        "profile": args.profile,
+        "exclude_without_positive_qrels": args.exclude_without_positive_qrels,
+        "overwrite": args.overwrite,
+        "job_count": len(materialized),
+        "excluded_without_positive_qrels_count": len(excluded),
+		"judgment_count": sum(record["judgment_count"] for record in materialized),
+		"positive_judgment_count": sum(record["positive_judgment_count"] for record in materialized),
+		"zero_grade_judgment_count": sum(record["zero_grade_judgment_count"] for record in materialized),
+		"unjudged_corpus_evidence_count": sum(
+			record["unjudged_corpus_evidence_count"] for record in materialized
+		),
+		"existing_qrel_job_count": sum(1 for record in materialized if record["had_existing_qrels"]),
+        "ranked_candidate_job_count": sum(
+            1 for record in materialized if record["ranked_candidate_added"]
+        ),
+        "excluded_job_ids": [record.get("job_id") for record in excluded],
+        "claim_boundary": (
+			"Derived qrels are deterministic benchmark labels from checked-in evidence links and "
+			"required_evidence. Unmentioned corpus evidence remains unjudged instead of being "
+			"converted into synthetic negative labels. Oracle ranked candidates test metric "
+			"mechanics only; they are not product-runtime retrieval evidence or leaderboard proof."
+		),
+	}
+
+    write_json(args.summary_out, summary)
+    return summary
+
+
+def main() -> None:
+    args = parse_args()
+    summary = materialize(args)
+    print(
+        "materialized explicit qrels: "
+        f"{summary['job_count']} jobs, "
+        f"{summary['judgment_count']} judgments, "
+        f"{summary['ranked_candidate_job_count']} ranked-candidate traces"
+    )
+
+
+if __name__ == "__main__":
+    main()
diff --git a/scripts/real-world-docker.sh b/scripts/real-world-docker.sh
index 163c4d1f..8afc80d5 100755
--- a/scripts/real-world-docker.sh
+++ b/scripts/real-world-docker.sh
@@ -45,6 +45,11 @@ memory-live-adapters)
 		docker compose -f docker-compose.baseline.yml --profile graphiti-zep up -d graphiti-falkordb
 	fi
 	docker compose -f docker-compose.baseline.yml run --build --rm \
+		-e ELF_REAL_WORLD_LIVE_REPORT_DIR \
+		-e ELF_REAL_WORLD_LIVE_FIXTURES \
+		-e ELF_REAL_WORLD_OPERATOR_DEBUG_FIXTURES \
+		-e ELF_REAL_WORLD_LIVE_WORK_DIR \
+		-e ELF_REAL_WORLD_QMD_DIR \
 		-e ELF_REAL_WORLD_LIVE_ENABLE_RAGFLOW \
 		-e ELF_REAL_WORLD_LIVE_ENABLE_LIGHTRAG \
 		-e ELF_REAL_WORLD_LIVE_ENABLE_GRAPHRAG \
@@ -123,6 +128,15 @@ memory-live-adapters)
 	fi
 	exit "$status"
 	;;
+memory-live-explicit-qrels)
+	docker compose -f docker-compose.baseline.yml run --build --rm \
+		-e ELF_REAL_WORLD_LIVE_EXPLICIT_QRELS_REPORT_DIR \
+		-e ELF_REAL_WORLD_LIVE_EXPLICIT_QRELS_FIXTURES \
+		-e ELF_REAL_WORLD_LIVE_EXPLICIT_QRELS_OPERATOR_DEBUG_FIXTURES \
+		-e ELF_REAL_WORLD_LIVE_EXPLICIT_QRELS_WORK_DIR \
+		-e ELF_REAL_WORLD_QMD_DIR \
+		baseline-runner bash scripts/real-world-live-explicit-qrels.sh
+	;;
 *)
 	echo "unknown real-world Docker profile: $profile" >&2
 	exit 2
diff --git a/scripts/real-world-explicit-qrels.sh b/scripts/real-world-explicit-qrels.sh
new file mode 100755
index 00000000..ccd17cf1
--- /dev/null
+++ b/scripts/real-world-explicit-qrels.sh
@@ -0,0 +1,39 @@
+#!/usr/bin/env bash
+set -euo pipefail
+
+ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
+REPORT_DIR="${ELF_REAL_WORLD_EXPLICIT_QRELS_REPORT_DIR:-${ROOT_DIR}/tmp/real-world-memory/explicit-qrels}"
+SOURCE_FIXTURE_DIR="${ELF_REAL_WORLD_EXPLICIT_QRELS_FIXTURES:-${ROOT_DIR}/apps/elf-eval/fixtures/real_world_memory}"
+QREL_FIXTURE_DIR="${ELF_REAL_WORLD_EXPLICIT_QRELS_OUT_FIXTURES:-${REPORT_DIR}/fixtures}"
+
+cd "${ROOT_DIR}"
+
+python3 scripts/materialize-explicit-qrels.py \
+	--fixtures "${SOURCE_FIXTURE_DIR}" \
+	--out-fixtures "${QREL_FIXTURE_DIR}" \
+	--summary-out "${REPORT_DIR}/materialization-summary.json" \
+	--ranked-candidates-source oracle \
+	--profile generated_public \
+	--exclude-without-positive-qrels
+
+cargo run -p elf-eval --bin real_world_job_benchmark -- \
+	run \
+	--fixtures "${QREL_FIXTURE_DIR}" \
+	--out "${REPORT_DIR}/report.json" \
+	--run-id real-world-memory-explicit-qrels \
+	--adapter-id fixture_explicit_qrels \
+	--adapter-name "Explicit qrel oracle fixture pack" \
+	--adapter-behavior explicit_qrel_oracle_fixture \
+	--adapter-storage-status pass \
+	--adapter-runtime-status pass \
+	--adapter-notes "Generated by scripts/materialize-explicit-qrels.py from checked-in evidence_links and required_evidence; unmentioned corpus evidence remains unjudged; oracle ranked candidates test metric mechanics only."
+
+cargo run -p elf-eval --bin real_world_job_benchmark -- \
+	publish \
+	--report "${REPORT_DIR}/report.json" \
+	--out "${REPORT_DIR}/report.md"
+
+echo "Explicit qrel benchmark report:"
+echo "  ${REPORT_DIR}/materialization-summary.json"
+echo "  ${REPORT_DIR}/report.json"
+echo "  ${REPORT_DIR}/report.md"
diff --git a/scripts/real-world-live-explicit-qrels.sh b/scripts/real-world-live-explicit-qrels.sh
new file mode 100755
index 00000000..35212ac1
--- /dev/null
+++ b/scripts/real-world-live-explicit-qrels.sh
@@ -0,0 +1,80 @@
+#!/usr/bin/env bash
+set -euo pipefail
+
+ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
+REPORT_DIR="${ELF_REAL_WORLD_LIVE_EXPLICIT_QRELS_REPORT_DIR:-${ROOT_DIR}/tmp/real-world-memory/live-explicit-qrels}"
+SOURCE_FIXTURE_DIR="${ELF_REAL_WORLD_LIVE_EXPLICIT_QRELS_FIXTURES:-${ROOT_DIR}/apps/elf-eval/fixtures/real_world_memory}"
+OPERATOR_SOURCE_FIXTURE_DIR="${ELF_REAL_WORLD_LIVE_EXPLICIT_QRELS_OPERATOR_DEBUG_FIXTURES:-${ROOT_DIR}/apps/elf-eval/fixtures/real_world_job/operator_debugging_ux}"
+QREL_FIXTURE_DIR="${REPORT_DIR}/explicit-qrel-fixtures"
+QREL_OPERATOR_FIXTURE_DIR="${REPORT_DIR}/explicit-qrel-operator-debug-fixtures"
+LIVE_REPORT_DIR="${REPORT_DIR}/live-adapters"
+LIVE_WORK_DIR="${ELF_REAL_WORLD_LIVE_EXPLICIT_QRELS_WORK_DIR:-/bench/real-world-live-explicit-qrels}"
+
+if [[ ! -f "/.dockerenv" && "${ELF_REAL_WORLD_LIVE_ALLOW_HOST:-0}" != "1" ]]; then
+  echo "Refusing to run live explicit-qrel adapters outside Docker. Use cargo make real-world-memory-live-explicit-qrels." >&2
+  exit 1
+fi
+
+for cmd in bash jq python3; do
+  if ! command -v "${cmd}" >/dev/null 2>&1; then
+    echo "Missing ${cmd} in live explicit-qrel runner." >&2
+    exit 1
+  fi
+done
+
+cd "${ROOT_DIR}"
+
+rm -rf "${REPORT_DIR}"
+mkdir -p "${REPORT_DIR}"
+
+python3 scripts/materialize-explicit-qrels.py \
+  --fixtures "${SOURCE_FIXTURE_DIR}" \
+  --out-fixtures "${QREL_FIXTURE_DIR}" \
+  --summary-out "${REPORT_DIR}/memory-materialization-summary.json" \
+  --ranked-candidates-source none \
+  --profile generated_public \
+  --exclude-without-positive-qrels
+
+python3 scripts/materialize-explicit-qrels.py \
+  --fixtures "${OPERATOR_SOURCE_FIXTURE_DIR}" \
+  --out-fixtures "${QREL_OPERATOR_FIXTURE_DIR}" \
+  --summary-out "${REPORT_DIR}/operator-debug-materialization-summary.json" \
+  --ranked-candidates-source none \
+  --profile generated_public \
+  --exclude-without-positive-qrels
+
+ELF_REAL_WORLD_LIVE_REPORT_DIR="${LIVE_REPORT_DIR}" \
+  ELF_REAL_WORLD_LIVE_FIXTURES="${QREL_FIXTURE_DIR}" \
+  ELF_REAL_WORLD_OPERATOR_DEBUG_FIXTURES="${QREL_OPERATOR_FIXTURE_DIR}" \
+  ELF_REAL_WORLD_LIVE_WORK_DIR="${LIVE_WORK_DIR}" \
+  ELF_REAL_WORLD_LIVE_ELF_RUN_ID="real-world-memory-live-explicit-qrels-elf" \
+  ELF_REAL_WORLD_LIVE_QMD_RUN_ID="real-world-memory-live-explicit-qrels-qmd" \
+  ELF_REAL_WORLD_LIVE_COMBINED_RUN_ID="real-world-memory-live-elf-qmd-explicit-qrels-quantitative" \
+  bash scripts/real-world-live-adapters.sh
+
+jq -n \
+  --slurpfile memory_summary "${REPORT_DIR}/memory-materialization-summary.json" \
+  --slurpfile operator_summary "${REPORT_DIR}/operator-debug-materialization-summary.json" \
+  --slurpfile live_summary "${LIVE_REPORT_DIR}/summary.json" \
+  '{
+    schema: "elf.real_world_live_explicit_qrels_sweep/v1",
+    generated_at: (now | todateiso8601),
+    artifact_dir: (env.ELF_REAL_WORLD_LIVE_EXPLICIT_QRELS_REPORT_DIR // "tmp/real-world-memory/live-explicit-qrels"),
+    live_report_dir: "tmp/real-world-memory/live-explicit-qrels/live-adapters",
+    materialization: {
+      memory: $memory_summary[0],
+      operator_debugging_ux: $operator_summary[0]
+    },
+    live_summary: $live_summary[0],
+    boundary: "Input fixtures have deterministic explicit qrels, but ranked candidates are product-runtime traces from the live adapters. This improves qrel-source evidence only; leaderboard claims still require pass rows, full ranked coverage, held-out/leakage audit evidence, and paired significance."
+  }' >"${REPORT_DIR}/summary.json"
+
+echo "Live explicit-qrel adapter reports:"
+echo "  ${REPORT_DIR}/memory-materialization-summary.json"
+echo "  ${REPORT_DIR}/operator-debug-materialization-summary.json"
+echo "  ${LIVE_REPORT_DIR}/elf-report.json"
+echo "  ${LIVE_REPORT_DIR}/qmd-report.json"
+echo "  ${LIVE_REPORT_DIR}/qmd-quantitative-product-manifest.json"
+echo "  ${LIVE_REPORT_DIR}/elf-qmd-quantitative-report.json"
+echo "  ${LIVE_REPORT_DIR}/elf-qmd-quantitative-report.md"
+echo "  ${REPORT_DIR}/summary.json"