From 8c0d2332a7375903d8185a998258ebfeea78242f Mon Sep 17 00:00:00 2001 From: Oseltamivir <58582368+Oseltamivir@users.noreply.github.com> Date: Fri, 19 Jun 2026 14:52:49 +0800 Subject: [PATCH 1/2] feat(inference): show vendor-aware precision in legend, line labels, and tooltip MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Legend entries now display precision after a · separator (e.g. "B300 (vLLM) · NVFP4", "MI355X (vLLM) · FP8"). FP4 is mapped to NVFP4 (NVIDIA) / MXFP4 (AMD) based on the GPU's vendor in HW_REGISTRY; FP8 stays generic since the DB doesn't distinguish E4M3 vs MXFP8. Multi-precision SKUs join with / (e.g. "· NVFP4 / FP8"). - Add getPrecisionDisplayLabel(precision, hwKey) to lib/constants.ts - Update official + overlay legend labels in ScatterGraph.tsx - Route line labels (lineLabelText, overlayLabelText) through vendor format - Add precision line to ChartTooltip - Add unit tests for the new helper --- .../components/inference/ui/ChartTooltip.tsx | 5 + .../components/inference/ui/ScatterGraph.tsx | 102 ++++++++++++------ packages/app/src/lib/constants.test.ts | 44 ++++++++ packages/app/src/lib/constants.ts | 20 ++++ 4 files changed, 141 insertions(+), 30 deletions(-) diff --git a/packages/app/src/components/inference/ui/ChartTooltip.tsx b/packages/app/src/components/inference/ui/ChartTooltip.tsx index 11843f51..65c28521 100644 --- a/packages/app/src/components/inference/ui/ChartTooltip.tsx +++ b/packages/app/src/components/inference/ui/ChartTooltip.tsx @@ -1,12 +1,14 @@ 'use client'; import { useInference } from '@/components/inference/InferenceContext'; +import { getPrecisionDisplayLabel } from '@/lib/constants'; interface TooltipContentProps { active?: boolean; payload?: { payload?: { hwKey?: string | number; + precision?: string; tp?: number; conc?: number; x?: TValue; @@ -25,6 +27,9 @@ export default function ChartTooltip({ active, payload }: TooltipContentProps

{`GPU: ${hardwareConfig[pointPayload.hwKey as keyof typeof hardwareConfig].gpu}`}

+ {pointPayload.precision && ( +

{`Precision: ${getPrecisionDisplayLabel(pointPayload.precision, String(pointPayload.hwKey ?? ''))}`}

+ )}

{`Total GPUs: ${pointPayload.tp}`}

{(pointPayload.ep !== null && pointPayload.ep !== undefined) || (pointPayload.prefill_ep !== null && pointPayload.prefill_ep !== undefined) ? ( diff --git a/packages/app/src/components/inference/ui/ScatterGraph.tsx b/packages/app/src/components/inference/ui/ScatterGraph.tsx index 68e9cc14..3b993d89 100644 --- a/packages/app/src/components/inference/ui/ScatterGraph.tsx +++ b/packages/app/src/components/inference/ui/ScatterGraph.tsx @@ -10,8 +10,8 @@ import { pointNearestX } from '@/components/inference/ui/line-label-anchor'; import ChartLegend from '@/components/ui/chart-legend'; import { useUnofficialRun } from '@/components/unofficial-run-provider'; import { computeToggle } from '@/hooks/useTogglableSet'; -import { getHardwareConfig, getModelSortIndex } from '@/lib/constants'; -import { getChartWatermark, getPrecisionLabel, type Precision } from '@/lib/data-mappings'; +import { getHardwareConfig, getModelSortIndex, getPrecisionDisplayLabel } from '@/lib/constants'; +import { getChartWatermark, PRECISION_OPTIONS } from '@/lib/data-mappings'; import { matchKnownConfigIssues, pointMatchesIssue } from '@/lib/known-issues'; import { formatNumber, getDisplayLabel, updateRepoUrl } from '@/lib/utils'; import { D3Chart } from '@/lib/d3-chart/D3Chart'; @@ -110,7 +110,7 @@ const parseHwKeyToLabel = (hwKey: string, model?: string): { name: string; label }; // Line-label text for a curve. When more than one precision is shown, each curve -// is its own line, so append the precision (e.g. "B200 (vLLM) FP8") to keep the +// is its own line, so append the precision (e.g. "B200 (vLLM) NVFP4") to keep the // FP4 and FP8 curves of the same hardware distinguishable. const lineLabelText = ( hwKey: string, @@ -119,7 +119,7 @@ const lineLabelText = ( model?: string, ): string => { const base = parseHwKeyToLabel(hwKey, model).label; - return includePrecision ? `${base} ${getPrecisionLabel(precision as Precision)}` : base; + return includePrecision ? `${base} ${getPrecisionDisplayLabel(precision, hwKey)}` : base; }; const ScatterGraph = React.memo( @@ -357,6 +357,20 @@ const ScatterGraph = React.memo( [groupedData, selectedPrecisions, effectiveActiveHwTypes], ); + const precisionsPerHw = useMemo(() => { + const map = new Map>(); + for (const p of filteredData) { + const hw = p.hwKey as string; + let set = map.get(hw); + if (!set) { + set = new Set(); + map.set(hw, set); + } + set.add(p.precision); + } + return map; + }, [filteredData]); + const processedOverlayData = useMemo(() => { if (!overlayData?.data) return []; return overlayData.data.filter((p) => selectedPrecisions.includes(p.precision)); @@ -1109,7 +1123,7 @@ const ScatterGraph = React.memo( ? `✕ ${info.branch || `run ${info.id}`}` : parseHwKeyToLabel(hwKey, modelLabel).label; return multiPrecision - ? `${base} ${getPrecisionLabel(precision as Precision)}` + ? `${base} ${getPrecisionDisplayLabel(precision, hwKey)}` : base; }; const sortedOverlay = Object.entries(overlayRooflines) @@ -1166,7 +1180,7 @@ const ScatterGraph = React.memo( ? `✕ ${info.branch || `run ${info.id}`}` : parseHwKeyToLabel(group.hwKey, modelLabel).label; const labelText = multiPrecision - ? `${branchOrHw} ${getPrecisionLabel((group.points[0]?.precision ?? '') as Precision)}` + ? `${branchOrHw} ${getPrecisionDisplayLabel(group.points[0]?.precision ?? '', group.hwKey)}` : branchOrHw; const labelKey = `overlay-${ovKey}`; const pt = group.points.at(-1)!; @@ -2062,16 +2076,30 @@ const ScatterGraph = React.memo( ...(overlayData && unofficialRunInfos.length > 0 ? unofficialRunInfos .map((info, idx) => { - const hasPoints = overlayData.data.some( - (d) => + const runPrecs = new Set(); + let sampleHwKey = ''; + for (const d of overlayData.data) { + if ( overlayRunIndex(d.run_url ?? null, runIndexByUrl) === idx && - selectedPrecisions.includes(d.precision), - ); - if (!hasPoints) return null; + selectedPrecisions.includes(d.precision) + ) { + runPrecs.add(d.precision); + if (!sampleHwKey && d.hwKey) sampleHwKey = d.hwKey as string; + } + } + if (runPrecs.size === 0) return null; const branch = info.branch || `run ${info.id}`; + const precSuffix = ` · ${[...runPrecs] + .toSorted( + (a, b) => + (PRECISION_OPTIONS as readonly string[]).indexOf(a) - + (PRECISION_OPTIONS as readonly string[]).indexOf(b), + ) + .map((p) => getPrecisionDisplayLabel(p, sampleHwKey)) + .join(' / ')}`; return { name: `✕ unofficial-run-${info.id}`, - label: `✕ ${branch}`, + label: `✕ ${branch}${precSuffix}`, color: overlayRunColor(idx), title: `UNOFFICIAL: ${branch}`, isHighlighted: true, @@ -2105,24 +2133,38 @@ const ScatterGraph = React.memo( .toSorted( ([a], [b]) => getModelSortIndex(a) - getModelSortIndex(b) || a.localeCompare(b), ) - .map(([key, hwConfig]: [string, any]) => ({ - name: hwConfig.name, - label: getDisplayLabel(hwConfig), - color: resolveColor(key), - title: hwConfig.gpu, - isHighlighted: highlightConfigSuffixes.has(key.replaceAll('_', '-')), - hw: key, - isActive: showAllHardwareTypes ? true : effectiveOfficialHwTypes.has(key), - onClick: showAllHardwareTypes - ? () => {} - : () => { - handleToggleHwType(key); - track('latency_hw_type_toggled', { hw: key }); - }, - tooltip: changelog - ? formatChangelogDescription(changelog.entries[0].description) - : null, - })), + .map(([key, hwConfig]: [string, any]) => { + const baseLabel = getDisplayLabel(hwConfig); + const precs = precisionsPerHw.get(key); + const precSuffix = precs + ? ` · ${[...precs] + .toSorted( + (a, b) => + (PRECISION_OPTIONS as readonly string[]).indexOf(a) - + (PRECISION_OPTIONS as readonly string[]).indexOf(b), + ) + .map((p) => getPrecisionDisplayLabel(p, key)) + .join(' / ')}` + : ''; + return { + name: hwConfig.name, + label: `${baseLabel}${precSuffix}`, + color: resolveColor(key), + title: hwConfig.gpu, + isHighlighted: highlightConfigSuffixes.has(key.replaceAll('_', '-')), + hw: key, + isActive: showAllHardwareTypes ? true : effectiveOfficialHwTypes.has(key), + onClick: showAllHardwareTypes + ? () => {} + : () => { + handleToggleHwType(key); + track('latency_hw_type_toggled', { hw: key }); + }, + tooltip: changelog + ? formatChangelogDescription(changelog.entries[0].description) + : null, + }; + }), ]} disableActiveSort={false} isLegendExpanded={isLegendExpanded} diff --git a/packages/app/src/lib/constants.test.ts b/packages/app/src/lib/constants.test.ts index dd849319..ae3ad55a 100644 --- a/packages/app/src/lib/constants.test.ts +++ b/packages/app/src/lib/constants.test.ts @@ -8,6 +8,7 @@ import { getGpuSpecs, getHardwareConfig, getModelSortIndex, + getPrecisionDisplayLabel, hardwareKeyMatchesAnyBase, hardwareKeyMatchesBase, isKnownGpu, @@ -259,3 +260,46 @@ describe('getModelSortIndex', () => { expect(getModelSortIndex('')).toBeGreaterThanOrEqual(9); }); }); + +// =========================================================================== +// getPrecisionDisplayLabel +// =========================================================================== +describe('getPrecisionDisplayLabel', () => { + it('returns NVFP4 for fp4 on NVIDIA hardware', () => { + expect(getPrecisionDisplayLabel('fp4', 'b300_vllm')).toBe('NVFP4'); + expect(getPrecisionDisplayLabel('fp4', 'h100')).toBe('NVFP4'); + expect(getPrecisionDisplayLabel('fp4', 'gb200_dynamo-trt_mtp')).toBe('NVFP4'); + }); + + it('returns MXFP4 for fp4 on AMD hardware', () => { + expect(getPrecisionDisplayLabel('fp4', 'mi355x_vllm')).toBe('MXFP4'); + expect(getPrecisionDisplayLabel('fp4', 'mi300x')).toBe('MXFP4'); + expect(getPrecisionDisplayLabel('fp4', 'mi325x_atom')).toBe('MXFP4'); + }); + + it('returns generic FP4 for unknown vendor', () => { + expect(getPrecisionDisplayLabel('fp4', 'unknown_x')).toBe('FP4'); + expect(getPrecisionDisplayLabel('fp4', '')).toBe('FP4'); + }); + + it('returns vendor-prefixed fp4fp8 labels', () => { + expect(getPrecisionDisplayLabel('fp4fp8', 'b200_vllm')).toBe('NVFP4+FP8'); + expect(getPrecisionDisplayLabel('fp4fp8', 'mi355x')).toBe('MXFP4+FP8'); + expect(getPrecisionDisplayLabel('fp4fp8', 'unknown')).toBe('FP4+FP8'); + }); + + it('returns generic FP8 regardless of vendor', () => { + expect(getPrecisionDisplayLabel('fp8', 'b300_vllm')).toBe('FP8'); + expect(getPrecisionDisplayLabel('fp8', 'mi355x_atom')).toBe('FP8'); + expect(getPrecisionDisplayLabel('fp8', 'unknown')).toBe('FP8'); + }); + + it('returns BF16 and INT4 unchanged', () => { + expect(getPrecisionDisplayLabel('bf16', 'h100_vllm')).toBe('BF16'); + expect(getPrecisionDisplayLabel('int4', 'mi300x')).toBe('INT4'); + }); + + it('falls back gracefully for unknown precision strings', () => { + expect(getPrecisionDisplayLabel('fp16', 'h100')).toBe('fp16'); + }); +}); diff --git a/packages/app/src/lib/constants.ts b/packages/app/src/lib/constants.ts index a720077d..ca83bd39 100644 --- a/packages/app/src/lib/constants.ts +++ b/packages/app/src/lib/constants.ts @@ -1,5 +1,7 @@ import { HW_REGISTRY, resolveFrameworkPartLabel } from '@semianalysisai/inferencex-constants'; +import { getPrecisionLabel, type Precision } from './data-mappings'; + /** d3.schemeTableau10 — 10-color categorical palette for tracked configs. */ export const TABLEAU_10 = [ '#4e79a7', @@ -34,6 +36,24 @@ export function getGpuSpecs(hwKey: string): GpuSpecs { return { power: entry.power, costh: entry.costh, costn: entry.costn, costr: entry.costr }; } +// FP4 → NVFP4/MXFP4 by vendor; FP8 stays generic (E4M3 vs MXFP8 is ambiguous in the DB). +export function getPrecisionDisplayLabel(precision: string, hwKey: string): string { + const base = hwKey.split(/[-_]/u)[0]; + const vendor = HW_REGISTRY[base]?.vendor; + const fp4 = vendor === 'NVIDIA' ? 'NVFP4' : vendor === 'AMD' ? 'MXFP4' : 'FP4'; + switch (precision) { + case 'fp4': { + return fp4; + } + case 'fp4fp8': { + return `${fp4}+FP8`; + } + default: { + return getPrecisionLabel(precision as Precision); + } + } +} + /** Build the vendor prefix string for the `gpu` tooltip field. */ function getVendorPrefix(base: string): string { const entry = HW_REGISTRY[base]; From 792177c30248997d15746deb9404ee47afdf8796 Mon Sep 17 00:00:00 2001 From: Oseltamivir <58582368+Oseltamivir@users.noreply.github.com> Date: Fri, 19 Jun 2026 14:56:40 +0800 Subject: [PATCH 2/2] fix: show precision suffix on deselected legend entries too precisionsPerHw was derived from filteredData (which excludes inactive hw types), so toggled-off SKUs lost their precision suffix. Derive from all data points filtered only by selectedPrecisions instead. --- packages/app/src/components/inference/ui/ScatterGraph.tsx | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/packages/app/src/components/inference/ui/ScatterGraph.tsx b/packages/app/src/components/inference/ui/ScatterGraph.tsx index 3b993d89..8d97eadb 100644 --- a/packages/app/src/components/inference/ui/ScatterGraph.tsx +++ b/packages/app/src/components/inference/ui/ScatterGraph.tsx @@ -359,7 +359,8 @@ const ScatterGraph = React.memo( const precisionsPerHw = useMemo(() => { const map = new Map>(); - for (const p of filteredData) { + for (const p of data) { + if (!selectedPrecisions.includes(p.precision)) continue; const hw = p.hwKey as string; let set = map.get(hw); if (!set) { @@ -369,7 +370,7 @@ const ScatterGraph = React.memo( set.add(p.precision); } return map; - }, [filteredData]); + }, [data, selectedPrecisions]); const processedOverlayData = useMemo(() => { if (!overlayData?.data) return [];