From f6285060907c4ff104e54d720f2ac5cb4c6fc1bb Mon Sep 17 00:00:00 2001 From: Markus Neusinger <2921697+MarkusNeusinger@users.noreply.github.com> Date: Wed, 10 Jun 2026 01:51:34 +0200 Subject: [PATCH 1/3] refactor(app): extract useForceGraphSimulation from MapPage - Move spec loading, similarity weights/minSim state, the KNN graph derivation (node cache + cluster seeding), the settling gate (settled/tickProgress), search matching, and the tuned d3-force configuration into src/hooks/useForceGraphSimulation.ts - Hook stays decoupled from ForceGraph2D's imperative ref API: it returns plain state + handlers (markSettled, handleEngineTick, resetWeights) and a static forceConfig; the only inversion is an onRepaint callback fired when thumbnails finish loading - MapPage keeps the canvas element, paint callbacks, UI overlays, and all ref-coupled interaction handlers (camera fit, force wiring, fly-to); behavior, analytics events, aria attributes and keyboard interactions are unchanged - outlierSquashForce + cluster color constants move to the hook module; MapPage.helpers stays a pure math module (header comment updated); hook exported from the src/hooks barrel - New useForceGraphSimulation.test.ts (17 tests) covers graph derivation, weight/minSim handling, the settling gate, search matching, and the relocated outlierSquashForce suite; MapPage.test.tsx keeps the rendering/interaction tests (frontend suite 552 -> 562) Part 10 of the frontend modernization roadmap. Co-Authored-By: Claude Fable 5 --- app/src/hooks/index.ts | 6 + app/src/hooks/useForceGraphSimulation.test.ts | 427 ++++++++++++++ app/src/hooks/useForceGraphSimulation.ts | 556 ++++++++++++++++++ app/src/pages/MapPage.helpers.ts | 4 +- app/src/pages/MapPage.test.tsx | 128 +--- app/src/pages/MapPage.tsx | 497 ++-------------- 6 files changed, 1043 insertions(+), 575 deletions(-) create mode 100644 app/src/hooks/useForceGraphSimulation.test.ts create mode 100644 app/src/hooks/useForceGraphSimulation.ts diff --git a/app/src/hooks/index.ts b/app/src/hooks/index.ts index 693fb8ca55..1eb2a4be31 100644 --- a/app/src/hooks/index.ts +++ b/app/src/hooks/index.ts @@ -14,6 +14,12 @@ export type { ThemeContextValue, } from 'src/hooks/useLayoutContext'; export { useThemeMode } from 'src/hooks/useThemeMode'; +export { useForceGraphSimulation } from 'src/hooks/useForceGraphSimulation'; +export type { + ForceGraphSimulation, + MapForceConfig, + MapGraphData, +} from 'src/hooks/useForceGraphSimulation'; export { useLatestRelease } from 'src/hooks/useLatestRelease'; export * from 'src/hooks/useFeaturedSpecs'; export * from 'src/hooks/usePlotOfTheDay'; diff --git a/app/src/hooks/useForceGraphSimulation.test.ts b/app/src/hooks/useForceGraphSimulation.test.ts new file mode 100644 index 0000000000..c8e64de844 --- /dev/null +++ b/app/src/hooks/useForceGraphSimulation.test.ts @@ -0,0 +1,427 @@ +import { act, renderHook, waitFor } from '@testing-library/react'; +import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest'; + +import { + outlierSquashForce, + type SimNode, + useForceGraphSimulation, +} from 'src/hooks/useForceGraphSimulation'; +import { + DEFAULT_CATEGORY_WEIGHT, + type MapNode, + TAG_CATEGORIES, + type TagCategory, +} from 'src/pages/MapPage.helpers'; + +vi.mock('src/hooks/useLayoutContext', () => ({ + useTheme: () => ({ isDark: false }), +})); + +const mockSpecs = [ + { + id: 'scatter-basic', + title: 'Basic Scatter Plot', + preview_url_light: 'https://example.com/scatter-basic-light.png', + preview_url_dark: 'https://example.com/scatter-basic-dark.png', + quality_score: 90, + tags: { plot_type: ['scatter'], data_type: ['numeric'], features: ['basic'] }, + impl_tags: { dependencies: ['scipy'] }, + }, + { + id: 'scatter-color-mapped', + title: 'Scatter with Color Mapping', + preview_url_light: 'https://example.com/scatter-color-light.png', + preview_url_dark: 'https://example.com/scatter-color-dark.png', + quality_score: 88, + tags: { plot_type: ['scatter'], data_type: ['numeric'], features: ['color-mapped'] }, + impl_tags: { dependencies: ['scipy'] }, + }, + { + id: 'line-basic', + title: 'Basic Line Chart', + preview_url_light: 'https://example.com/line-basic-light.png', + preview_url_dark: 'https://example.com/line-basic-dark.png', + quality_score: 92, + tags: { plot_type: ['line'], data_type: ['numeric'], features: ['basic'] }, + impl_tags: null, + }, +]; + +function mockFetchSuccess() { + vi.stubGlobal( + 'fetch', + vi.fn().mockResolvedValue({ + ok: true, + json: () => Promise.resolve(mockSpecs), + }) + ); +} + +// Seeded simulation coordinates live on the node objects but are not part of +// the public MapNode surface (FG2D owns them at runtime). +type SeededNode = MapNode & { x?: number; y?: number }; + +async function renderLoadedHook() { + const view = renderHook(() => useForceGraphSimulation()); + await waitFor(() => expect(view.result.current.specs).not.toBeNull()); + return view; +} + +describe('useForceGraphSimulation', () => { + beforeEach(() => { + vi.restoreAllMocks(); + }); + + // Restore stubbed globals (fetch, …) after every test so they don't leak + // into subsequent suites. + afterEach(() => { + vi.unstubAllGlobals(); + vi.restoreAllMocks(); + }); + + describe('graph data derivation', () => { + it('derives nodes, KNN links and legend buckets from the fetched specs', async () => { + mockFetchSuccess(); + const { result } = await renderLoadedHook(); + + const { graphData } = result.current; + expect(graphData.nodes.map(n => n.id).sort()).toEqual([ + 'line-basic', + 'scatter-basic', + 'scatter-color-mapped', + ]); + // Only the two scatters share enough weighted-IDF tag mass to clear the + // default similarity threshold — exactly one deduplicated KNN edge. + expect(graphData.links).toHaveLength(1); + expect([graphData.links[0].source, graphData.links[0].target].sort()).toEqual([ + 'scatter-basic', + 'scatter-color-mapped', + ]); + expect(graphData.links[0].weight).toBeGreaterThan(0); + // Legend buckets: scatter is the biggest cluster, line the runner-up. + expect(graphData.topTypes[0]).toBe('scatter'); + expect(graphData.typeCounts.get('scatter')).toBe(2); + expect(graphData.typeCounts.get('line')).toBe(1); + // nodeById covers every node for O(1) paint-callback lookups. + expect(result.current.nodeById.get('scatter-basic')?.title).toBe('Basic Scatter Plot'); + // neighbors is symmetric for the single link. + expect(result.current.neighbors.get('scatter-basic')?.has('scatter-color-mapped')).toBe(true); + expect(result.current.neighbors.get('scatter-color-mapped')?.has('scatter-basic')).toBe(true); + }); + + it('seeds initial node positions per cluster (warm start for the simulation)', async () => { + mockFetchSuccess(); + const { result } = await renderLoadedHook(); + + const nodes = result.current.graphData.nodes as SeededNode[]; + // Every node should have a numeric seed position before FG2D ever ticks the simulation — + // without seeding, FG2D's random initialiser would leave x/y undefined here. + for (const n of nodes) { + expect(typeof n.x).toBe('number'); + expect(typeof n.y).toBe('number'); + expect(Number.isFinite(n.x as number)).toBe(true); + expect(Number.isFinite(n.y as number)).toBe(true); + } + // Same plot_type (= colorBucket) should land near the same centroid; nodes from + // different buckets should land further apart on average. Take the two scatters + // (bucketed together) vs. line-basic and compare distances. + const scatterA = nodes.find(n => n.id === 'scatter-basic')!; + const scatterB = nodes.find(n => n.id === 'scatter-color-mapped')!; + const line = nodes.find(n => n.id === 'line-basic')!; + const dist = (a: typeof scatterA, b: typeof scatterA) => + Math.hypot((a.x ?? 0) - (b.x ?? 0), (a.y ?? 0) - (b.y ?? 0)); + expect(dist(scatterA, scatterB)).toBeLessThan(dist(scatterA, line)); + }); + + it('surfaces an HTTP error and keeps the graph empty', async () => { + vi.stubGlobal('fetch', vi.fn().mockResolvedValue({ ok: false, status: 500 })); + const { result } = renderHook(() => useForceGraphSimulation()); + + await waitFor(() => expect(result.current.error).toBe('HTTP 500')); + expect(result.current.specs).toBeNull(); + expect(result.current.graphData.nodes).toHaveLength(0); + expect(result.current.graphData.links).toHaveLength(0); + }); + }); + + describe('similarity weight handling', () => { + it('activeCategory follows the highest weight and falls back to plot_type', async () => { + mockFetchSuccess(); + const { result } = await renderLoadedHook(); + + // Defaults privilege plot_type (2.0). + expect(result.current.activeCategory).toBe('plot_type'); + + // Bump features above plot_type — legend buckets switch category. + act(() => { + result.current.setWeights(w => ({ ...w, features: 3 })); + }); + expect(result.current.activeCategory).toBe('features'); + expect(result.current.graphData.topTypes).toContain('basic'); + + // All-zero weights fall back to plot_type. + const zeroWeights = Object.fromEntries(TAG_CATEGORIES.map(c => [c, 0])) as Record< + TagCategory, + number + >; + act(() => { + result.current.setWeights(zeroWeights); + }); + expect(result.current.activeCategory).toBe('plot_type'); + }); + + it('drops KNN edges when minSim rises above the pair similarity', async () => { + mockFetchSuccess(); + const { result } = await renderLoadedHook(); + + expect(result.current.graphData.links).toHaveLength(1); + act(() => { + result.current.setMinSim(0.6); + }); + expect(result.current.graphData.links).toHaveLength(0); + }); + + it('preserves node positions and image caches across weight changes', async () => { + mockFetchSuccess(); + const { result } = await renderLoadedHook(); + + // Simulate FG2D having ticked the layout (position mutation) and a + // thumbnail having loaded into the per-node image cache. + const before = result.current.graphData.nodes.find( + n => n.id === 'scatter-basic' + )! as SeededNode; + before.x = 1234; + const fakeImg = { src: 'x' } as unknown as HTMLImageElement; + before.imgs.set(400, fakeImg); + + act(() => { + result.current.setWeights(w => ({ ...w, techniques: 1 })); + }); + + const after = result.current.graphData.nodes.find( + n => n.id === 'scatter-basic' + )! as SeededNode; + // Fresh node object per derivation, but warm-started from the cache: + // last position and the loaded HTMLImageElements survive the re-derive + // (otherwise every slider tick would flicker all thumbnails). + expect(after).not.toBe(before); + expect(after.x).toBe(1234); + expect(after.imgs).toBe(before.imgs); + expect(after.imgs.get(400)).toBe(fakeImg); + }); + + it('resetWeights restores the default weights and edge threshold', async () => { + mockFetchSuccess(); + const { result } = await renderLoadedHook(); + const initialMinSim = result.current.minSim; + + act(() => { + result.current.setWeights(w => ({ ...w, domain: 4 })); + result.current.setMinSim(0.4); + }); + expect(result.current.weights.domain).toBe(4); + expect(result.current.minSim).toBe(0.4); + + act(() => { + result.current.resetWeights(); + }); + expect(result.current.weights).toEqual(DEFAULT_CATEGORY_WEIGHT); + expect(result.current.minSim).toBe(initialMinSim); + }); + }); + + describe('settling gate', () => { + it('settles via markSettled and re-arms when the graph re-derives', async () => { + mockFetchSuccess(); + const { result } = await renderLoadedHook(); + + // Still cooling after the initial derive. + expect(result.current.settled).toBe(false); + + act(() => { + result.current.markSettled(); + }); + expect(result.current.settled).toBe(true); + + // A weight change re-derives graphData → the gate must re-arm and the + // progress bar must restart from zero for the new cooling phase. + act(() => { + for (let i = 0; i < 6; i++) result.current.handleEngineTick(); + }); + expect(result.current.tickProgress).toBeGreaterThan(0); + act(() => { + result.current.setWeights(w => ({ ...w, features: 2 })); + }); + expect(result.current.settled).toBe(false); + expect(result.current.tickProgress).toBe(0); + }); + + it('throttles tick progress to every 6th engine tick', async () => { + mockFetchSuccess(); + const { result } = await renderLoadedHook(); + + act(() => { + for (let i = 0; i < 5; i++) result.current.handleEngineTick(); + }); + // No flush before the batch boundary — the page must not re-render at 60 Hz. + expect(result.current.tickProgress).toBe(0); + + act(() => { + result.current.handleEngineTick(); + }); + // 6 of 300 cooldown ticks → 2 %. + expect(result.current.tickProgress).toBeCloseTo(0.02, 5); + }); + }); + + describe('search matching', () => { + it('matches every token against title, id and tags, best score first', async () => { + mockFetchSuccess(); + const { result } = await renderLoadedHook(); + + expect(result.current.searchMatches).toEqual([]); + + act(() => { + result.current.setSearchQuery('scatter'); + }); + // Both scatters title-match (score ties) → alphabetical title order. + expect(result.current.searchMatches.map(s => s.id)).toEqual([ + 'scatter-basic', + 'scatter-color-mapped', + ]); + + // Multi-token queries require every token to match somewhere. + act(() => { + result.current.setSearchQuery('scatter color'); + }); + expect(result.current.searchMatches.map(s => s.id)).toEqual(['scatter-color-mapped']); + + // Tag-only hits still match (dependencies:scipy comes from impl_tags). + act(() => { + result.current.setSearchQuery('scipy'); + }); + expect(result.current.searchMatches.map(s => s.id).sort()).toEqual([ + 'scatter-basic', + 'scatter-color-mapped', + ]); + }); + }); + + describe('force configuration', () => { + it('scales link distance/strength with similarity and caps the pull', async () => { + mockFetchSuccess(); + const { result } = await renderLoadedHook(); + const { forceConfig } = result.current; + + // Higher similarity → shorter link (tighter cluster). + expect(forceConfig.linkDistance({ source: 'a', target: 'b', weight: 1 })).toBeLessThan( + forceConfig.linkDistance({ source: 'a', target: 'b', weight: 0.1 }) + ); + // Strength is capped at 0.4 and floored at 0.02. + expect(forceConfig.linkStrength({ source: 'a', target: 'b', weight: 5 })).toBe(0.4); + expect(forceConfig.linkStrength({ source: 'a', target: 'b', weight: 0 })).toBe(0.02); + // Engine props are wired through as positive tuned values. + expect(forceConfig.cooldownTicks).toBeGreaterThan(0); + expect(forceConfig.alphaDecay).toBeGreaterThan(0); + expect(forceConfig.alphaMin).toBeGreaterThan(0); + expect(forceConfig.velocityDecay).toBeGreaterThan(0); + }); + }); + + describe('outlierSquashForce', () => { + // Pure unit tests that exercise the force math directly. We bypass the + // d3-force harness because the force's contract is "modify vx/vy of + // outlier nodes in place"; the harness adds nothing beyond invoking + // force(alpha) and force.initialize(nodes). + type Sim = SimNode & { x: number; y: number; vx: number; vy: number }; + const makeNode = (x: number, y: number): Sim => ({ x, y, vx: 0, vy: 0 }); + + it('is a no-op when there are no nodes', () => { + const force = outlierSquashForce(0.95, 200, 0.18); + // initialize with empty array; force(alpha) must not throw. + (force as unknown as { initialize: (n: SimNode[]) => void }).initialize([]); + expect(() => force(1)).not.toThrow(); + }); + + it('is a no-op for graphs of fewer than 2 nodes', () => { + const force = outlierSquashForce(0.95, 200, 0.18); + const nodes: Sim[] = [makeNode(1000, 0)]; + (force as unknown as { initialize: (n: SimNode[]) => void }).initialize(nodes); + force(1); + expect(nodes[0].vx).toBe(0); + expect(nodes[0].vy).toBe(0); + }); + + it('leaves nodes inside the threshold untouched (inner geometry preserved)', () => { + // 99 inner nodes co-located at the origin + 1 far outlier. All inner + // distances to the centroid are exactly equal, so the percentile + // cutoff R lands exactly at the inner radius and the early + // `r <= R → continue` short-circuit fires for every inner node. + // The outlier is the only node above R. + const force = outlierSquashForce(0.95, 200, 0.18); + const inner: Sim[] = Array.from({ length: 99 }, () => makeNode(0, 0)); + const outlier = makeNode(5000, 0); + const nodes: Sim[] = [...inner, outlier]; + (force as unknown as { initialize: (n: SimNode[]) => void }).initialize(nodes); + force(1); + for (const n of inner) { + expect(n.vx).toBe(0); + expect(n.vy).toBe(0); + } + // Outlier was pulled inward — vx is opposite-sign to its position. + expect(outlier.vx).toBeLessThan(0); + expect(outlier.vy).toBe(0); + }); + + it('still squashes outliers in small graphs (off-by-one regression guard)', () => { + // Naive `floor(length * p)` would pick index 19 (the max) on n = 20 + // and never trigger the squash. The (n - 1) * p indexing must keep + // at least the most-outlying node above R. + const force = outlierSquashForce(0.95, 200, 0.18); + const nodes: Sim[] = Array.from({ length: 20 }, (_, i) => makeNode(i, 0)); + // Push the last node much further so it's the unambiguous outlier. + nodes[19] = makeNode(10_000, 0); + (force as unknown as { initialize: (n: SimNode[]) => void }).initialize(nodes); + force(1); + // The outlier must have a non-zero inward correction. + expect(nodes[19].vx).not.toBe(0); + expect(nodes[19].vx).toBeLessThan(0); + }); + + it('keeps the velocity correction finite even for distant outliers', () => { + // The compression map r' = R + (r - R)/(1 + (r - R)/k) has an + // asymptote at R + k, so even a node at distance 1e6 produces a + // bounded velocity correction. This is the property that prevents + // a single rogue node from blowing up the simulation. + const force = outlierSquashForce(0.95, 200, 0.18); + const inner: Sim[] = Array.from({ length: 99 }, () => makeNode(0, 0)); + const far = makeNode(1_000_000, 0); + const nodes: Sim[] = [...inner, far]; + (force as unknown as { initialize: (n: SimNode[]) => void }).initialize(nodes); + force(1); + expect(far.vx).toBeLessThan(0); + expect(Number.isFinite(far.vx)).toBe(true); + // |vx| upper bound: |position - 0| * strength * alpha = 1e6 * 0.18. + // The actual value is much smaller because (targetR - r) / r is + // close to -1 once r >> R, so the correction approaches -position. + expect(Math.abs(far.vx)).toBeLessThan(1_000_000); + }); + + it('scales the velocity correction with alpha', () => { + const force = outlierSquashForce(0.95, 200, 0.18); + const inner: Sim[] = Array.from({ length: 99 }, () => makeNode(0, 0)); + const hot = makeNode(5_000, 0); + const cool = makeNode(5_000, 0); + // First simulation — alpha = 1 (hot). + (force as unknown as { initialize: (n: SimNode[]) => void }).initialize([...inner, hot]); + force(1); + // Second simulation — alpha = 0.1 (cooling). + const force2 = outlierSquashForce(0.95, 200, 0.18); + const inner2: Sim[] = Array.from({ length: 99 }, () => makeNode(0, 0)); + (force2 as unknown as { initialize: (n: SimNode[]) => void }).initialize([...inner2, cool]); + force2(0.1); + // Cooler alpha → ~10× smaller velocity correction. + expect(Math.abs(hot.vx)).toBeGreaterThan(Math.abs(cool.vx) * 5); + }); + }); +}); diff --git a/app/src/hooks/useForceGraphSimulation.ts b/app/src/hooks/useForceGraphSimulation.ts new file mode 100644 index 0000000000..2ad634707b --- /dev/null +++ b/app/src/hooks/useForceGraphSimulation.ts @@ -0,0 +1,556 @@ +/** + * useForceGraphSimulation — data + simulation-state orchestration for the + * /map page's force-directed graph: spec loading, per-category similarity + * weights, the KNN graph derivation (nodes / links / legend buckets), the + * settling gate that swallows pointer input while the layout cools, search + * matching over the loaded specs, and the tuned d3-force configuration. + * + * Deliberately NOT coupled to ForceGraph2D's imperative ref API: the view + * (MapPage) keeps the canvas element, the paint callbacks, and every + * handler that needs the graph ref (camera fit, force wiring, refresh). + * The hook returns plain state + setters/handlers for the view to wire up; + * the one inversion of control is `onRepaint`, a callback fired whenever a + * thumbnail finishes loading so the view can trigger a canvas repaint. + */ + +import { type Dispatch, type SetStateAction, useEffect, useMemo, useRef, useState } from 'react'; + +import { type Force, forceCollide } from 'd3-force-3d'; + +import { useTheme } from 'src/hooks/useLayoutContext'; +import { ApiError, apiGet, endpoints } from 'src/lib/api'; +import { + buildKNNLinks, + categoryValueCounts, + computeIDF, + DEFAULT_CATEGORY_WEIGHT, + flattenTags, + type MapLink, + type MapNode, + preloadImages, + primaryCategoryValue, + selectMapThumbUrl, + type SpecMapItem, + TAG_CATEGORIES, + type TagCategory, + topCategoryValues, +} from 'src/pages/MapPage.helpers'; + +export const NODE_SIZE = 60; // graph-space size of a node — large enough to read the thumbnail without hovering +const COOLDOWN_TICKS = 300; // simulation lifetime in ticks; the engine cap and alpha-decay below both derive from this so they stop together +// Stop the engine while motion is still perceptible. With d3-force's default +// alphaMin (0.001), alpha keeps decaying for ~150 more ticks after movement +// drops below the visible threshold (alpha ≈ 0.01) — that tail is dead time +// for the user. We bump alphaMin to 0.01 so engine-stop coincides with where +// the layout already looks frozen. +const COOLDOWN_ALPHA_MIN = 0.01; +// Couple alpha decay to COOLDOWN_TICKS so the engine stops exactly when the +// progress bar (denominated in COOLDOWN_TICKS) reaches 100%. Without this, +// alpha hits alphaMin before the bar is full and the "map.simulate()" +// overlay fades out with the bar still partway across. +// alpha(n) = (1 - decay)^n → solve (1 - decay)^COOLDOWN_TICKS = alphaMin. +const COOLDOWN_ALPHA_DECAY = 1 - Math.pow(COOLDOWN_ALPHA_MIN, 1 / COOLDOWN_TICKS); +const CLUSTER_SEED_RADIUS = 600; // distance from origin where each colorBucket cluster's centroid is initially placed +const CLUSTER_SEED_JITTER = 150; // per-node random offset around the cluster centroid — small enough to keep clusters identifiable, large enough that collision can settle them +const KNN_K = 8; // edges per node in the sparse KNN graph +// Default threshold tuned for the plot_type-dominant default. Bumped up +// from 0.05 because once secondary categories (features, techniques, …) +// have non-zero weight, common tags like `features:basic` create weak +// cross-cluster bridges in the 0.05–0.12 range that collapse the graph +// into one blob. At 0.15 those bridges drop out and clusters stay distinct. +// Exposed as a live slider in the weights panel for power users. +const DEFAULT_MIN_SIM = 0.15; +export const MIN_SIM_BOUNDS = { min: 0.05, max: 0.4, step: 0.01 } as const; +// Forces: tuned so KNN edges + collision shape the layout while many-body +// repulsion stays GENTLE — collision already enforces minimum spacing, and +// strong repulsion would just blow the graph wide enough that zoomToFit +// zooms out too far for thumbnails to be readable. Goal: graph extent stays +// small enough that zoomToFit displays nodes at a generous CSS-pixel size. +const REPULSION = -50; // forceManyBody strength +const LINK_DISTANCE_MIN = NODE_SIZE * 1.1; // shortest link (highest sim) +const LINK_DISTANCE_MAX = NODE_SIZE * 3.5; // longest link (lowest sim above threshold) +const LINK_STRENGTH_CAP = 0.4; // max pull from a single link +const COLLIDE_PADDING = 6; // px padding on top of the bounding-box radius — visible breathing room between thumbnails +const CENTER_GRAVITY = 0.04; // gentle pull toward the viewport center; ~25× weaker than d3-force-3d's default to corral outliers without flattening clusters +// Outlier-squash: a custom radial force that activates only beyond a +// distance percentile of the centroid. Inside the threshold, geometry +// is untouched — the inner cluster keeps its exact shape. Outside, each +// outlier's distance is compressed via a sigmoid-like map +// r' = R + (r - R) / (1 + (r - R) / k) +// so far-flung points stay visibly *separate* (their order is preserved) +// but bounded — the asymptote is R + k. This corrects the "everything +// collapses to a dot because of one runaway outlier" zoomToFit problem +// without needing stronger global gravity (which would crush clusters). +const OUTLIER_THRESHOLD_PERCENTILE = 0.95; // distance percentile beyond which compression starts +const OUTLIER_SQUASH_K = 120; // graph-units of extra room outliers can use beyond R; smaller = harder squash +const OUTLIER_SQUASH_STRENGTH = 0.18; // velocity-correction factor; tuned so outliers settle within COOLDOWN_TICKS + +// Top-N most frequent plot_types each get a distinct imprint border color +// so the catalog's biggest categories (line, scatter, bar, …) stand out at +// a glance. Specs that don't fall into the top-N keep a neutral border. +// 8 categorical hues in imprint's hybrid-v3 sort order. +export const CLUSTER_COLORS = [ + '#009E73', // slot 0 — brand green + '#C475FD', // slot 1 — lavender + '#4467A3', // slot 2 — blue + '#BD8233', // slot 3 — ochre + '#AE3030', // slot 4 — matte red + '#2ABCCD', // slot 5 — cyan + '#954477', // slot 6 — rose + '#99B314', // slot 7 — lime +] as const; + +export function colorFor(bucket: string | null, topTypes: string[]): string | null { + if (!bucket) return null; + const idx = topTypes.indexOf(bucket); + if (idx < 0) return null; + return CLUSTER_COLORS[idx % CLUSTER_COLORS.length]; +} + +// Custom d3-force that compresses extreme outliers radially toward the +// cluster centroid while leaving inner geometry untouched. See the block +// comment on OUTLIER_THRESHOLD_PERCENTILE for the math; this is the +// implementation. The simulation calls force(alpha) every tick, alpha +// decays from 1 → 0, so the velocity correction tapers off as the layout +// cools — the force is *active* during the same window the gate covers, +// then becomes a no-op once outliers are at their compressed targets. +// Exported for unit tests — the simulation only ever calls this through +// d3-force's `force(alpha)` interface, so the public surface is internal. +export type SimNode = { x?: number; y?: number; vx?: number; vy?: number }; +export function outlierSquashForce(percentile: number, k: number, strength: number) { + let nodes: SimNode[] = []; + function force(alpha: number) { + if (nodes.length === 0) return; + let cx = 0, + cy = 0, + n = 0; + for (const node of nodes) { + if (node.x == null || node.y == null) continue; + cx += node.x; + cy += node.y; + n++; + } + if (n === 0) return; + cx /= n; + cy /= n; + // One pass to compute distances; second pass to apply velocity + // adjustment to outliers. Allocating a fresh array per tick is fine + // at ~300 nodes (~3 µs); we'd only avoid it at 10k+. + const dists: number[] = new Array(nodes.length); + for (let i = 0; i < nodes.length; i++) { + const node = nodes[i]; + if (node.x == null || node.y == null) { + dists[i] = 0; + continue; + } + dists[i] = Math.hypot(node.x - cx, node.y - cy); + } + const sorted = dists.slice().sort((a, b) => a - b); + // Use the (length - 1) * p index (numpy "linear" / "lower" interpolation + // for a discrete percentile). The naive `length * p` rounds up to + // `length - 1` for any n ≤ 1/(1-p) — i.e. with p = 0.95 and n ≤ 20 the + // cutoff would be the *max* distance and the squash force would silently + // disable itself. Filtered subsets of the catalog can easily land in + // that range, so we never want the cutoff to coincide with the maximum. + if (sorted.length < 2) return; + const idx = Math.floor((sorted.length - 1) * percentile); + const R = sorted[idx]; + if (!(R > 0)) return; + for (let i = 0; i < nodes.length; i++) { + const r = dists[i]; + if (r <= R) continue; + const node = nodes[i]; + if (node.x == null || node.y == null) continue; + const excess = r - R; + const compressed = excess / (1 + excess / k); + const targetR = R + compressed; + const factor = (targetR - r) / r; // negative — pulls toward the centroid + const dx = node.x - cx; + const dy = node.y - cy; + node.vx = (node.vx ?? 0) + dx * factor * strength * alpha; + node.vy = (node.vy ?? 0) + dy * factor * strength * alpha; + } + } + force.initialize = (n: SimNode[]) => { + nodes = n; + }; + return force; +} + +/** Everything the graph derivation produces in one pass. */ +export interface MapGraphData { + nodes: MapNode[]; + links: MapLink[]; + topTypes: string[]; + typeCounts: Map; + idf: Map; +} + +/** + * The tuned force-simulation parameters MapPage forwards to ForceGraph2D + * (engine props + the d3 forces it wires up in onRenderFramePre). Plain + * values and factories — nothing here touches the imperative graph ref. + */ +export interface MapForceConfig { + cooldownTicks: number; + alphaDecay: number; + alphaMin: number; + velocityDecay: number; + chargeStrength: number; + linkDistance: (l: MapLink) => number; + linkStrength: (l: MapLink) => number; + centerGravity: number; + createCollideForce: () => Force; + createOutlierSquashForce: () => ReturnType; +} + +// Static by construction — a single frozen object keeps the identity stable +// across renders so consumers can safely list it in dependency arrays. +const FORCE_CONFIG: MapForceConfig = { + cooldownTicks: COOLDOWN_TICKS, + alphaDecay: COOLDOWN_ALPHA_DECAY, + alphaMin: COOLDOWN_ALPHA_MIN, + velocityDecay: 0.35, + // Stronger many-body repulsion than the default ~-30. + chargeStrength: REPULSION, + // Link distance/strength scale with weighted-Jaccard similarity: + // tighter clusters for highly related specs, looser otherwise. + linkDistance: (l: MapLink) => { + const w = l.weight ?? 0.3; + return LINK_DISTANCE_MIN + (1 - Math.min(1, w)) * (LINK_DISTANCE_MAX - LINK_DISTANCE_MIN); + }, + linkStrength: (l: MapLink) => + Math.max(0.02, Math.min(LINK_STRENGTH_CAP, (l.weight ?? 0.3) * 0.4)), + // Mild centering force so disconnected outliers (no KNN edges because all + // sims < threshold) drift back toward the cluster mass instead of + // vanishing to the corners. Strength is well below the default 1.0 so + // cluster shapes stay intact. + centerGravity: CENTER_GRAVITY, + // Per-node collision: prevents thumbnail overlap. Radius = half the longer + // side of the bounding box plus a small padding. + createCollideForce: () => + forceCollide(() => NODE_SIZE / 2 + COLLIDE_PADDING).iterations(2), + createOutlierSquashForce: () => + outlierSquashForce(OUTLIER_THRESHOLD_PERCENTILE, OUTLIER_SQUASH_K, OUTLIER_SQUASH_STRENGTH), +}; + +export interface UseForceGraphSimulationOptions { + /** + * Fired whenever a node thumbnail finishes loading so the view can repaint + * the canvas (MapPage passes `() => fgRef.current?.refresh?.()`). Held in a + * ref internally, so an inline arrow is fine — it never re-runs effects. + */ + onRepaint?: () => void; +} + +export interface ForceGraphSimulation { + specs: SpecMapItem[] | null; + error: string | null; + weights: Record; + setWeights: Dispatch>>; + minSim: number; + setMinSim: Dispatch>; + /** Reset weights + edge threshold back to the tuned defaults. */ + resetWeights: () => void; + activeCategory: TagCategory; + graphData: MapGraphData; + nodeById: Map; + neighbors: Map>; + settled: boolean; + /** Flip the settling gate once the engine reports it stopped. */ + markSettled: () => void; + tickProgress: number; + /** Forward ForceGraph2D's onEngineTick here to drive the progress bar. */ + handleEngineTick: () => void; + searchQuery: string; + setSearchQuery: Dispatch>; + searchMatches: SpecMapItem[]; + forceConfig: MapForceConfig; +} + +export function useForceGraphSimulation({ + onRepaint, +}: UseForceGraphSimulationOptions = {}): ForceGraphSimulation { + const { isDark } = useTheme(); + + // data state + const [specs, setSpecs] = useState(null); + const [error, setError] = useState(null); + // Per-category weight overrides for the similarity calculation. Bound to + // the weights panel sliders. Live-updates KNN edges + simulation on change. + const [weights, setWeights] = useState>(DEFAULT_CATEGORY_WEIGHT); + const [minSim, setMinSim] = useState(DEFAULT_MIN_SIM); + // settled = true once the force simulation has finished cooling. Until + // then, the canvas is overlaid by a subtle gate that swallows pointer + // input — a click on a still-moving node would otherwise pin the wrong + // spec by the time the simulation settles around it. Resets to false + // whenever graphData re-derives (filter / weight / category change), so + // the gate also covers subsequent re-layouts. + const [settled, setSettled] = useState(false); + // Throttled tick counter for the "computing" overlay's progress bar. + // We update React state at most every PROGRESS_TICK_BATCH simulation + // ticks to avoid re-rendering the page at ~60 Hz while the layout cools. + // tickCountRef holds the un-throttled count so we know when to flush. + const tickCountRef = useRef(0); + const [tickProgress, setTickProgress] = useState(0); + const [searchQuery, setSearchQuery] = useState(''); + + // Keep the latest onRepaint without making it an effect dependency — the + // preload effect must re-run on graphData changes only, and callers pass + // inline arrows. Render-time ref writes are permitted by the project's + // eslint config (`react-hooks/refs: off`): idempotent, pre-commit. + const onRepaintRef = useRef(onRepaint); + onRepaintRef.current = onRepaint; + + // 1. fetch the map payload once on mount + useEffect(() => { + const ctrl = new AbortController(); + apiGet(endpoints.specsMap, { signal: ctrl.signal }) + .then(setSpecs) + .catch((err: unknown) => { + if (err instanceof Error && err.name === 'AbortError') return; + // Keep the pre-migration user-visible message ("HTTP ") rather + // than surfacing the longer ApiError format in the error banner. + if (err instanceof ApiError) setError(`HTTP ${err.status}`); + else setError(err instanceof Error ? err.message : 'Failed to load map data'); + }); + return () => ctrl.abort(); + }, []); + + // The category that drives the legend + node border colors: whichever + // currently has the highest weight (plot_type wins on ties because it's + // the first entry of TAG_CATEGORIES and we use strictly-greater compare). + // Falls back to plot_type when all weights are 0. + const activeCategory: TagCategory = useMemo(() => { + let maxWeight = -Infinity; + let active: TagCategory = 'plot_type'; + for (const c of TAG_CATEGORIES) { + if (weights[c] > maxWeight) { + maxWeight = weights[c]; + active = c; + } + } + return maxWeight > 0 ? active : 'plot_type'; + }, [weights]); + + // graphData rebuilds whenever weights/minSim/activeCategory change + // (because links + colorBucket depend on them). Without this cache, every + // slider-drag tick would recreate every MapNode with empty imgs/pendingTiers + // Maps, dropping the loaded HTMLImageElements — the canvas would then paint + // fallback rects until each re-fires onload, producing a visible + // flicker across all 327 thumbnails on every onChange tick. We keep a + // stable id → MapNode cache here and reuse imgs/pendingTiers as long as + // thumbUrl is unchanged (theme toggle invalidates). + const nodeCacheRef = useRef>(new Map()); + + // 2. derive graph data from specs/theme (pure — no setState in effect) + const graphData = useMemo(() => { + if (!specs) { + return { nodes: [], links: [], topTypes: [], typeCounts: new Map(), idf: new Map() }; + } + const idf = computeIDF(specs); + const topTypes = topCategoryValues(specs, activeCategory, CLUSTER_COLORS.length); + const typeCounts = categoryValueCounts(specs, activeCategory); + const cache = nodeCacheRef.current; + const nextCache = new Map(); + // Pre-compute one centroid per colorBucket on a circle around the origin. + // Seeding each node near its cluster centroid (instead of the FG2D + // default of random positions everywhere) gives the simulation a warm + // start: clusters don't have to first separate from a uniform soup, and + // the same number of cooldown ticks now produces visibly cleaner + // separation. Null-bucket nodes sit at the origin and let the link force + // pull them toward whatever clusters they connect to. + const clusterCentroids = new Map(); + topTypes.forEach((t, i) => { + const angle = (i / topTypes.length) * Math.PI * 2; + clusterCentroids.set(t, { + x: Math.cos(angle) * CLUSTER_SEED_RADIUS, + y: Math.sin(angle) * CLUSTER_SEED_RADIUS, + }); + }); + // Hash-based jitter so seed positions are stable across re-renders for + // the same spec id — avoids reshuffling on filter changes. + const jitter = (id: string, salt: number) => { + let h = salt; + for (let i = 0; i < id.length; i++) h = (h * 31 + id.charCodeAt(i)) | 0; + return ((h & 0xffff) / 0xffff - 0.5) * 2 * CLUSTER_SEED_JITTER; + }; + const nodes: (MapNode & { x?: number; y?: number; vx?: number; vy?: number })[] = specs.map( + s => { + const v = primaryCategoryValue(s, activeCategory); + const colorBucket = topTypes.includes(v) ? v : null; + const thumbUrl = selectMapThumbUrl(s, isDark); + const cached = cache.get(s.id) as + | (MapNode & { x?: number; y?: number; vx?: number; vy?: number }) + | undefined; + const reuse = cached && cached.thumbUrl === thumbUrl; + // Warm-start preference: keep the simulation's last x/y if we have it + // (filter / weight tweaks reuse positions and refine in place). Cold + // start: seed from the cluster centroid + stable per-id jitter. + const seedCenter = colorBucket ? clusterCentroids.get(colorBucket) : null; + const x = cached?.x ?? (seedCenter ? seedCenter.x + jitter(s.id, 1) : jitter(s.id, 3)); + const y = cached?.y ?? (seedCenter ? seedCenter.y + jitter(s.id, 2) : jitter(s.id, 5)); + const node: MapNode & { x: number; y: number; vx: number; vy: number } = { + id: s.id, + title: s.title, + tags: flattenTags(s), + colorBucket, + thumbUrl, + imgs: reuse ? cached!.imgs : new Map(), + pendingTiers: reuse ? cached!.pendingTiers : new Set(), + x, + y, + vx: cached?.vx ?? 0, + vy: cached?.vy ?? 0, + }; + nextCache.set(s.id, node); + return node; + } + ); + nodeCacheRef.current = nextCache; + const links = buildKNNLinks(specs, idf, KNN_K, minSim, weights); + return { nodes, links, topTypes, typeCounts, idf }; + }, [specs, isDark, weights, minSim, activeCategory]); + + // Re-arm the settling gate whenever graphData re-derives — FG2D reheats + // the simulation in response, and we want the gate to cover the new + // cooling phase the same way it covers the initial one. No-op on the + // very first render (settled is already false) and while specs are + // still loading. + // + // Implemented via the "store previous prop in state" pattern (see + // https://react.dev/reference/react/useState#storing-information-from-previous-renders) + // instead of useEffect: React supports calling setState during render of + // the *same* component, batches the updates, and re-renders once before + // commit — no infinite loop, and the rule that bans setState in effects + // doesn't apply to setState during render. + const [prevGraphData, setPrevGraphData] = useState(graphData); + if (graphData !== prevGraphData) { + setPrevGraphData(graphData); + if (graphData.nodes.length > 0) { + setSettled(false); + setTickProgress(0); + tickCountRef.current = 0; + } + } + + // Eager-load the 400-tier thumbnails so something paints fast. Higher tiers + // are fetched lazily from nodeCanvasObject when the user zooms in. + useEffect(() => { + if (graphData.nodes.length === 0) return; + const nodeById = new Map(graphData.nodes.map(n => [n.id, n])); + let cancelled = false; + preloadImages( + graphData.nodes.map(n => ({ id: n.id, thumbUrl: n.thumbUrl })), + (id, tier, img) => { + if (cancelled) return; + const n = nodeById.get(id); + if (n) n.imgs.set(tier, img); + onRepaintRef.current?.(); + } + ); + return () => { + cancelled = true; + }; + }, [graphData]); + + // 3. neighbor lookup for hover highlight (built once per links change) + // Precomputed id → node lookup. linkColor/linkWidth fire once per link + // per frame (~1k links), and a graphData.nodes.find() inside each call + // would be O(N²) total per frame; the Map keeps it O(1). + const nodeById = useMemo(() => { + const map = new Map(); + for (const n of graphData.nodes) map.set(n.id, n); + return map; + }, [graphData.nodes]); + + const neighbors = useMemo(() => { + const map = new Map>(); + for (const l of graphData.links) { + if (!map.has(l.source)) map.set(l.source, new Set()); + if (!map.has(l.target)) map.set(l.target, new Set()); + map.get(l.source)!.add(l.target); + map.get(l.target)!.add(l.source); + } + return map; + }, [graphData.links]); + + // 4. Precompute lowercased searchable fields per spec so each keystroke + // only does .includes() checks, not a fresh tag-flatten + lowercase. + const searchHaystacks = useMemo(() => { + if (!specs) return []; + return specs.map(s => ({ + spec: s, + titleL: s.title.toLowerCase(), + idL: s.id.toLowerCase(), + tagsL: flattenTags(s).map(t => t.toLowerCase()), + })); + }, [specs]); + + // 5. Match the search query: every whitespace-separated token must appear + // somewhere (title / id / tag), score weighted by where it hit. Top 8. + const searchMatches = useMemo(() => { + const q = searchQuery.trim().toLowerCase(); + if (!q) return []; + const tokens = q.split(/\s+/).filter(Boolean); + const scored: { spec: SpecMapItem; score: number }[] = []; + for (const h of searchHaystacks) { + let score = 0; + let allMatch = true; + for (const tok of tokens) { + const inTitle = h.titleL.includes(tok); + const inId = h.idL.includes(tok); + const inTags = h.tagsL.some(t => t.includes(tok)); + if (!(inTitle || inId || inTags)) { + allMatch = false; + break; + } + score += inTitle ? 3 : inId ? 2 : 1; + } + if (allMatch) scored.push({ spec: h.spec, score }); + } + scored.sort((a, b) => b.score - a.score || a.spec.title.localeCompare(b.spec.title)); + return scored.slice(0, 8).map(x => x.spec); + }, [searchQuery, searchHaystacks]); + + const markSettled = () => setSettled(true); + + // Drive the loading-overlay progress bar. Each simulation tick bumps the + // un-throttled ref; we flush to React state every PROGRESS_TICK_BATCH + // ticks (~5×/s at 60 Hz) so the bar advances smoothly without + // re-rendering the page on every tick. + const handleEngineTick = () => { + tickCountRef.current += 1; + const PROGRESS_TICK_BATCH = 6; + if (tickCountRef.current % PROGRESS_TICK_BATCH === 0) { + setTickProgress(Math.min(1, tickCountRef.current / COOLDOWN_TICKS)); + } + }; + + const resetWeights = () => { + setWeights(DEFAULT_CATEGORY_WEIGHT); + setMinSim(DEFAULT_MIN_SIM); + }; + + return { + specs, + error, + weights, + setWeights, + minSim, + setMinSim, + resetWeights, + activeCategory, + graphData, + nodeById, + neighbors, + settled, + markSettled, + tickProgress, + handleEngineTick, + searchQuery, + setSearchQuery, + searchMatches, + forceConfig: FORCE_CONFIG, + }; +} diff --git a/app/src/pages/MapPage.helpers.ts b/app/src/pages/MapPage.helpers.ts index 34e31b397e..1959e85df3 100644 --- a/app/src/pages/MapPage.helpers.ts +++ b/app/src/pages/MapPage.helpers.ts @@ -1,7 +1,9 @@ /** * Helpers for the /map page: tag flattening, IDF weighting, weighted * Jaccard similarity, KNN edge construction, plus thumbnail-tier - * selection and image preloading. + * selection and image preloading. Consumed by useForceGraphSimulation + * (graph derivation + preloading) and by MapPage's canvas paint callbacks + * (tier selection + geometry). * * Most helpers are pure (math + selection logic) so they can be unit * tested in MapPage.helpers.test.ts. The two exceptions — preloadImages diff --git a/app/src/pages/MapPage.test.tsx b/app/src/pages/MapPage.test.tsx index 6df3a054d2..c334995de3 100644 --- a/app/src/pages/MapPage.test.tsx +++ b/app/src/pages/MapPage.test.tsx @@ -3,7 +3,7 @@ import { forwardRef, useImperativeHandle } from 'react'; import { fireEvent } from '@testing-library/react'; import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest'; -import { MapPage, outlierSquashForce, type SimNode } from 'src/pages/MapPage'; +import { MapPage } from 'src/pages/MapPage'; import { act, render, screen, waitFor } from 'src/test-utils'; vi.mock('react-helmet-async', () => ({ @@ -366,35 +366,6 @@ describe('MapPage', () => { expect(small).toBeGreaterThan(0); }); - it('seeds initial node positions per cluster (warm start for the simulation)', async () => { - mockFetchSuccess(); - render(); - await waitFor(() => expect(lastFgProps.current).not.toBeNull()); - - const nodes = ( - lastFgProps.current!.graphData as { - nodes: Array<{ id: string; x?: number; y?: number; vx?: number; vy?: number }>; - } - ).nodes; - // Every node should have a numeric seed position before FG2D ever ticks the simulation — - // without seeding, FG2D's random initialiser would leave x/y undefined here. - for (const n of nodes) { - expect(typeof n.x).toBe('number'); - expect(typeof n.y).toBe('number'); - expect(Number.isFinite(n.x as number)).toBe(true); - expect(Number.isFinite(n.y as number)).toBe(true); - } - // Same plot_type (= colorBucket) should land near the same centroid; nodes from - // different buckets should land further apart on average. Take the two scatters - // (bucketed together) vs. line-basic and compare distances. - const scatterA = nodes.find(n => n.id === 'scatter-basic')!; - const scatterB = nodes.find(n => n.id === 'scatter-color-mapped')!; - const line = nodes.find(n => n.id === 'line-basic')!; - const dist = (a: typeof scatterA, b: typeof scatterA) => - Math.hypot((a.x ?? 0) - (b.x ?? 0), (a.y ?? 0) - (b.y ?? 0)); - expect(dist(scatterA, scatterB)).toBeLessThan(dist(scatterA, line)); - }); - it('shows the settling overlay until the simulation cools, then hides it', async () => { mockFetchSuccess(); render(); @@ -453,103 +424,6 @@ describe('MapPage', () => { expect(mockTrackEvent).toHaveBeenCalledWith('map_node_click', { spec: 'scatter-basic' }); }); - describe('outlierSquashForce', () => { - // Pure unit tests that exercise the force math directly. We bypass the - // d3-force harness because the force's contract is "modify vx/vy of - // outlier nodes in place"; the harness adds nothing beyond invoking - // force(alpha) and force.initialize(nodes). - type Sim = SimNode & { x: number; y: number; vx: number; vy: number }; - const makeNode = (x: number, y: number): Sim => ({ x, y, vx: 0, vy: 0 }); - - it('is a no-op when there are no nodes', () => { - const force = outlierSquashForce(0.95, 200, 0.18); - // initialize with empty array; force(alpha) must not throw. - (force as unknown as { initialize: (n: SimNode[]) => void }).initialize([]); - expect(() => force(1)).not.toThrow(); - }); - - it('is a no-op for graphs of fewer than 2 nodes', () => { - const force = outlierSquashForce(0.95, 200, 0.18); - const nodes: Sim[] = [makeNode(1000, 0)]; - (force as unknown as { initialize: (n: SimNode[]) => void }).initialize(nodes); - force(1); - expect(nodes[0].vx).toBe(0); - expect(nodes[0].vy).toBe(0); - }); - - it('leaves nodes inside the threshold untouched (inner geometry preserved)', () => { - // 99 inner nodes co-located at the origin + 1 far outlier. All inner - // distances to the centroid are exactly equal, so the percentile - // cutoff R lands exactly at the inner radius and the early - // `r <= R → continue` short-circuit fires for every inner node. - // The outlier is the only node above R. - const force = outlierSquashForce(0.95, 200, 0.18); - const inner: Sim[] = Array.from({ length: 99 }, () => makeNode(0, 0)); - const outlier = makeNode(5000, 0); - const nodes: Sim[] = [...inner, outlier]; - (force as unknown as { initialize: (n: SimNode[]) => void }).initialize(nodes); - force(1); - for (const n of inner) { - expect(n.vx).toBe(0); - expect(n.vy).toBe(0); - } - // Outlier was pulled inward — vx is opposite-sign to its position. - expect(outlier.vx).toBeLessThan(0); - expect(outlier.vy).toBe(0); - }); - - it('still squashes outliers in small graphs (off-by-one regression guard)', () => { - // Naive `floor(length * p)` would pick index 19 (the max) on n = 20 - // and never trigger the squash. The (n - 1) * p indexing must keep - // at least the most-outlying node above R. - const force = outlierSquashForce(0.95, 200, 0.18); - const nodes: Sim[] = Array.from({ length: 20 }, (_, i) => makeNode(i, 0)); - // Push the last node much further so it's the unambiguous outlier. - nodes[19] = makeNode(10_000, 0); - (force as unknown as { initialize: (n: SimNode[]) => void }).initialize(nodes); - force(1); - // The outlier must have a non-zero inward correction. - expect(nodes[19].vx).not.toBe(0); - expect(nodes[19].vx).toBeLessThan(0); - }); - - it('keeps the velocity correction finite even for distant outliers', () => { - // The compression map r' = R + (r - R)/(1 + (r - R)/k) has an - // asymptote at R + k, so even a node at distance 1e6 produces a - // bounded velocity correction. This is the property that prevents - // a single rogue node from blowing up the simulation. - const force = outlierSquashForce(0.95, 200, 0.18); - const inner: Sim[] = Array.from({ length: 99 }, () => makeNode(0, 0)); - const far = makeNode(1_000_000, 0); - const nodes: Sim[] = [...inner, far]; - (force as unknown as { initialize: (n: SimNode[]) => void }).initialize(nodes); - force(1); - expect(far.vx).toBeLessThan(0); - expect(Number.isFinite(far.vx)).toBe(true); - // |vx| upper bound: |position - 0| * strength * alpha = 1e6 * 0.18. - // The actual value is much smaller because (targetR - r) / r is - // close to -1 once r >> R, so the correction approaches -position. - expect(Math.abs(far.vx)).toBeLessThan(1_000_000); - }); - - it('scales the velocity correction with alpha', () => { - const force = outlierSquashForce(0.95, 200, 0.18); - const inner: Sim[] = Array.from({ length: 99 }, () => makeNode(0, 0)); - const hot = makeNode(5_000, 0); - const cool = makeNode(5_000, 0); - // First simulation — alpha = 1 (hot). - (force as unknown as { initialize: (n: SimNode[]) => void }).initialize([...inner, hot]); - force(1); - // Second simulation — alpha = 0.1 (cooling). - const force2 = outlierSquashForce(0.95, 200, 0.18); - const inner2: Sim[] = Array.from({ length: 99 }, () => makeNode(0, 0)); - (force2 as unknown as { initialize: (n: SimNode[]) => void }).initialize([...inner2, cool]); - force2(0.1); - // Cooler alpha → ~10× smaller velocity correction. - expect(Math.abs(hot.vx)).toBeGreaterThan(Math.abs(cool.vx) * 5); - }); - }); - it('registers an outlier-squash force on the simulation via onRenderFramePre', async () => { mockFetchSuccess(); render(); diff --git a/app/src/pages/MapPage.tsx b/app/src/pages/MapPage.tsx index af1360791a..c9b3e103cc 100644 --- a/app/src/pages/MapPage.tsx +++ b/app/src/pages/MapPage.tsx @@ -1,6 +1,5 @@ import { useEffect, useMemo, useRef, useState } from 'react'; -import { forceCollide } from 'd3-force-3d'; import ForceGraph2D from 'react-force-graph-2d'; import { Helmet } from 'react-helmet-async'; import { Link, useNavigate } from 'react-router-dom'; @@ -12,83 +11,30 @@ import Typography from '@mui/material/Typography'; import useMediaQuery from '@mui/material/useMediaQuery'; import { useAnalytics } from 'src/hooks'; +import { + CLUSTER_COLORS, + colorFor, + MIN_SIM_BOUNDS, + NODE_SIZE, + useForceGraphSimulation, +} from 'src/hooks/useForceGraphSimulation'; import { useTheme } from 'src/hooks/useLayoutContext'; -import { ApiError, apiGet, endpoints } from 'src/lib/api'; import { - buildKNNLinks, buildVariantUrl, - categoryValueCounts, - computeIDF, - DEFAULT_CATEGORY_WEIGHT, ensureNodeTier, fitToBox, - flattenTags, type MapLink, type MapNode, nodeAspectRatio, pickBestLoadedTier, pickTier, - preloadImages, - primaryCategoryValue, type ResolutionTier, - selectMapThumbUrl, type SpecMapItem, TAG_CATEGORIES, - type TagCategory, - topCategoryValues, } from 'src/pages/MapPage.helpers'; import { specPath } from 'src/routes/paths'; import { colors, fontSize, typography } from 'src/theme'; -const NODE_SIZE = 60; // graph-space size of a node — large enough to read the thumbnail without hovering -const COOLDOWN_TICKS = 300; // simulation lifetime in ticks; the engine cap and alpha-decay below both derive from this so they stop together -// Stop the engine while motion is still perceptible. With d3-force's default -// alphaMin (0.001), alpha keeps decaying for ~150 more ticks after movement -// drops below the visible threshold (alpha ≈ 0.01) — that tail is dead time -// for the user. We bump alphaMin to 0.01 so engine-stop coincides with where -// the layout already looks frozen. -const COOLDOWN_ALPHA_MIN = 0.01; -// Couple alpha decay to COOLDOWN_TICKS so the engine stops exactly when the -// progress bar (denominated in COOLDOWN_TICKS) reaches 100%. Without this, -// alpha hits alphaMin before the bar is full and the "map.simulate()" -// overlay fades out with the bar still partway across. -// alpha(n) = (1 - decay)^n → solve (1 - decay)^COOLDOWN_TICKS = alphaMin. -const COOLDOWN_ALPHA_DECAY = 1 - Math.pow(COOLDOWN_ALPHA_MIN, 1 / COOLDOWN_TICKS); -const CLUSTER_SEED_RADIUS = 600; // distance from origin where each colorBucket cluster's centroid is initially placed -const CLUSTER_SEED_JITTER = 150; // per-node random offset around the cluster centroid — small enough to keep clusters identifiable, large enough that collision can settle them -const KNN_K = 8; // edges per node in the sparse KNN graph -// Default threshold tuned for the plot_type-dominant default. Bumped up -// from 0.05 because once secondary categories (features, techniques, …) -// have non-zero weight, common tags like `features:basic` create weak -// cross-cluster bridges in the 0.05–0.12 range that collapse the graph -// into one blob. At 0.15 those bridges drop out and clusters stay distinct. -// Exposed as a live slider in the weights panel for power users. -const DEFAULT_MIN_SIM = 0.15; -const MIN_SIM_BOUNDS = { min: 0.05, max: 0.4, step: 0.01 } as const; -// Forces: tuned so KNN edges + collision shape the layout while many-body -// repulsion stays GENTLE — collision already enforces minimum spacing, and -// strong repulsion would just blow the graph wide enough that zoomToFit -// zooms out too far for thumbnails to be readable. Goal: graph extent stays -// small enough that zoomToFit displays nodes at a generous CSS-pixel size. -const REPULSION = -50; // forceManyBody strength -const LINK_DISTANCE_MIN = NODE_SIZE * 1.1; // shortest link (highest sim) -const LINK_DISTANCE_MAX = NODE_SIZE * 3.5; // longest link (lowest sim above threshold) -const LINK_STRENGTH_CAP = 0.4; // max pull from a single link -const COLLIDE_PADDING = 6; // px padding on top of the bounding-box radius — visible breathing room between thumbnails -const CENTER_GRAVITY = 0.04; // gentle pull toward the viewport center; ~25× weaker than d3-force-3d's default to corral outliers without flattening clusters -// Outlier-squash: a custom radial force that activates only beyond a -// distance percentile of the centroid. Inside the threshold, geometry -// is untouched — the inner cluster keeps its exact shape. Outside, each -// outlier's distance is compressed via a sigmoid-like map -// r' = R + (r - R) / (1 + (r - R) / k) -// so far-flung points stay visibly *separate* (their order is preserved) -// but bounded — the asymptote is R + k. This corrects the "everything -// collapses to a dot because of one runaway outlier" zoomToFit problem -// without needing stronger global gravity (which would crush clusters). -const OUTLIER_THRESHOLD_PERCENTILE = 0.95; // distance percentile beyond which compression starts -const OUTLIER_SQUASH_K = 120; // graph-units of extra room outliers can use beyond R; smaller = harder squash -const OUTLIER_SQUASH_STRENGTH = 0.18; // velocity-correction factor; tuned so outliers settle within COOLDOWN_TICKS - // visually-hidden style — keeps the spec list readable for screen readers // even though the canvas is the primary interface. const visuallyHiddenSx = { @@ -103,28 +49,6 @@ const visuallyHiddenSx = { border: 0, }; -// Top-N most frequent plot_types each get a distinct imprint border color -// so the catalog's biggest categories (line, scatter, bar, …) stand out at -// a glance. Specs that don't fall into the top-N keep a neutral border. -// 8 categorical hues in imprint's hybrid-v3 sort order. -const CLUSTER_COLORS = [ - '#009E73', // slot 0 — brand green - '#C475FD', // slot 1 — lavender - '#4467A3', // slot 2 — blue - '#BD8233', // slot 3 — ochre - '#AE3030', // slot 4 — matte red - '#2ABCCD', // slot 5 — cyan - '#954477', // slot 6 — rose - '#99B314', // slot 7 — lime -] as const; - -function colorFor(bucket: string | null, topTypes: string[]): string | null { - if (!bucket) return null; - const idx = topTypes.indexOf(bucket); - if (idx < 0) return null; - return CLUSTER_COLORS[idx % CLUSTER_COLORS.length]; -} - // Read a link endpoint's spec id regardless of whether ForceGraph2D has // already mutated the field from a string into the resolved node object // (it does so after the first simulation tick). All link-side comparisons @@ -134,76 +58,6 @@ function linkEndId(end: MapLink['source']): string | undefined { return (end as { id?: string })?.id; } -// Custom d3-force that compresses extreme outliers radially toward the -// cluster centroid while leaving inner geometry untouched. See the block -// comment on OUTLIER_THRESHOLD_PERCENTILE for the math; this is the -// implementation. The simulation calls force(alpha) every tick, alpha -// decays from 1 → 0, so the velocity correction tapers off as the layout -// cools — the force is *active* during the same window the gate covers, -// then becomes a no-op once outliers are at their compressed targets. -// Exported for unit tests — the simulation only ever calls this through -// d3-force's `force(alpha)` interface, so the public surface is internal. -export type SimNode = { x?: number; y?: number; vx?: number; vy?: number }; -export function outlierSquashForce(percentile: number, k: number, strength: number) { - let nodes: SimNode[] = []; - function force(alpha: number) { - if (nodes.length === 0) return; - let cx = 0, - cy = 0, - n = 0; - for (const node of nodes) { - if (node.x == null || node.y == null) continue; - cx += node.x; - cy += node.y; - n++; - } - if (n === 0) return; - cx /= n; - cy /= n; - // One pass to compute distances; second pass to apply velocity - // adjustment to outliers. Allocating a fresh array per tick is fine - // at ~300 nodes (~3 µs); we'd only avoid it at 10k+. - const dists: number[] = new Array(nodes.length); - for (let i = 0; i < nodes.length; i++) { - const node = nodes[i]; - if (node.x == null || node.y == null) { - dists[i] = 0; - continue; - } - dists[i] = Math.hypot(node.x - cx, node.y - cy); - } - const sorted = dists.slice().sort((a, b) => a - b); - // Use the (length - 1) * p index (numpy "linear" / "lower" interpolation - // for a discrete percentile). The naive `length * p` rounds up to - // `length - 1` for any n ≤ 1/(1-p) — i.e. with p = 0.95 and n ≤ 20 the - // cutoff would be the *max* distance and the squash force would silently - // disable itself. Filtered subsets of the catalog can easily land in - // that range, so we never want the cutoff to coincide with the maximum. - if (sorted.length < 2) return; - const idx = Math.floor((sorted.length - 1) * percentile); - const R = sorted[idx]; - if (!(R > 0)) return; - for (let i = 0; i < nodes.length; i++) { - const r = dists[i]; - if (r <= R) continue; - const node = nodes[i]; - if (node.x == null || node.y == null) continue; - const excess = r - R; - const compressed = excess / (1 + excess / k); - const targetR = R + compressed; - const factor = (targetR - r) / r; // negative — pulls toward the centroid - const dx = node.x - cx; - const dy = node.y - cy; - node.vx = (node.vx ?? 0) + dx * factor * strength * alpha; - node.vy = (node.vy ?? 0) + dy * factor * strength * alpha; - } - } - force.initialize = (n: SimNode[]) => { - nodes = n; - }; - return force; -} - // Hairline border around a thumbnail node, theme-aware. Top-N plot types // paint with a brand color; the rest fall back to a neutral hairline. // On hover we keep the cluster color (or fall back to brand primary for @@ -227,9 +81,32 @@ export function MapPage() { const fgRef = useRef(null); const containerRef = useRef(null); - // data state - const [specs, setSpecs] = useState(null); - const [error, setError] = useState(null); + // Simulation/data orchestration: spec loading, weights, the KNN graph + // derivation, settling gate, search matching, and force tuning all live + // in the hook. This component keeps the canvas, the paint callbacks, and + // every handler that needs the imperative graph ref. + const { + specs, + error, + weights, + setWeights, + minSim, + setMinSim, + resetWeights, + activeCategory, + graphData, + nodeById, + neighbors, + settled, + markSettled, + tickProgress, + handleEngineTick, + searchQuery, + setSearchQuery, + searchMatches, + forceConfig, + } = useForceGraphSimulation({ onRepaint: () => fgRef.current?.refresh?.() }); + const [size, setSize] = useState<{ w: number; h: number }>({ w: 0, h: 0 }); const [hoverId, setHoverId] = useState(null); // panelNodeId trails hoverId on mouse-out so the corner preview can fade @@ -256,55 +133,23 @@ export function MapPage() { // hoverType = a plot_type the user is hovering in the legend; everything // not in that cluster dims so the cluster shape is obvious. const [hoverType, setHoverType] = useState(null); - // Per-category weight overrides for the similarity calculation. Bound to - // the weights panel sliders. Live-updates KNN edges + simulation on change. - const [weights, setWeights] = useState>(DEFAULT_CATEGORY_WEIGHT); - const [minSim, setMinSim] = useState(DEFAULT_MIN_SIM); const [weightsOpen, setWeightsOpen] = useState(false); // Mobile-only: legend collapses behind a `legend ▸` toggle to leave // canvas room. Tablet/desktop renders the legend list always-visible. const [legendOpen, setLegendOpen] = useState(false); - // settled = true once the force simulation has finished cooling. Until - // then, the canvas is overlaid by a subtle gate that swallows pointer - // input — a click on a still-moving node would otherwise pin the wrong - // spec by the time the simulation settles around it. Resets to false - // whenever graphData re-derives (filter / weight / category change), so - // the gate also covers subsequent re-layouts. - const [settled, setSettled] = useState(false); - // Throttled tick counter for the "computing" overlay's progress bar. - // We update React state at most every PROGRESS_TICK_BATCH simulation - // ticks to avoid re-rendering MapPage at ~60 Hz while the layout cools. - // tickCountRef holds the un-throttled count so we know when to flush. - const tickCountRef = useRef(0); - const [tickProgress, setTickProgress] = useState(0); // Search-pill state. searchOpen controls dropdown visibility (separate // from focus so we can keep showing matches briefly while a click is in // flight via the input's onBlur grace period). - const [searchQuery, setSearchQuery] = useState(''); const [searchOpen, setSearchOpen] = useState(false); const [searchIdx, setSearchIdx] = useState(0); const searchInputRef = useRef(null); - // 1. fetch + page view + // 1. page view useEffect(() => { trackPageview('/map'); }, [trackPageview]); - useEffect(() => { - const ctrl = new AbortController(); - apiGet(endpoints.specsMap, { signal: ctrl.signal }) - .then(setSpecs) - .catch((err: unknown) => { - if (err instanceof Error && err.name === 'AbortError') return; - // Keep the pre-migration user-visible message ("HTTP ") rather - // than surfacing the longer ApiError format in the error banner. - if (err instanceof ApiError) setError(`HTTP ${err.status}`); - else setError(err instanceof Error ? err.message : 'Failed to load map data'); - }); - return () => ctrl.abort(); - }, []); - // 2. resize observer useEffect(() => { const el = containerRef.current; @@ -341,170 +186,6 @@ export function MapPage() { }; }, []); - // The category that drives the legend + node border colors: whichever - // currently has the highest weight (plot_type wins on ties because it's - // the first entry of TAG_CATEGORIES and we use strictly-greater compare). - // Falls back to plot_type when all weights are 0. - const activeCategory: TagCategory = useMemo(() => { - let maxWeight = -Infinity; - let active: TagCategory = 'plot_type'; - for (const c of TAG_CATEGORIES) { - if (weights[c] > maxWeight) { - maxWeight = weights[c]; - active = c; - } - } - return maxWeight > 0 ? active : 'plot_type'; - }, [weights]); - - // graphData rebuilds whenever weights/minSim/activeCategory change - // (because links + colorBucket depend on them). Without this cache, every - // slider-drag tick would recreate every MapNode with empty imgs/pendingTiers - // Maps, dropping the loaded HTMLImageElements — the canvas would then paint - // fallback rects until each re-fires onload, producing a visible - // flicker across all 327 thumbnails on every onChange tick. We keep a - // stable id → MapNode cache here and reuse imgs/pendingTiers as long as - // thumbUrl is unchanged (theme toggle invalidates). - const nodeCacheRef = useRef>(new Map()); - - // 3. derive graph data from specs/theme (pure — no setState in effect) - const graphData = useMemo<{ - nodes: MapNode[]; - links: MapLink[]; - topTypes: string[]; - typeCounts: Map; - idf: Map; - }>(() => { - if (!specs) { - return { nodes: [], links: [], topTypes: [], typeCounts: new Map(), idf: new Map() }; - } - const idf = computeIDF(specs); - const topTypes = topCategoryValues(specs, activeCategory, CLUSTER_COLORS.length); - const typeCounts = categoryValueCounts(specs, activeCategory); - const cache = nodeCacheRef.current; - const nextCache = new Map(); - // Pre-compute one centroid per colorBucket on a circle around the origin. - // Seeding each node near its cluster centroid (instead of the FG2D - // default of random positions everywhere) gives the simulation a warm - // start: clusters don't have to first separate from a uniform soup, and - // the same number of cooldown ticks now produces visibly cleaner - // separation. Null-bucket nodes sit at the origin and let the link force - // pull them toward whatever clusters they connect to. - const clusterCentroids = new Map(); - topTypes.forEach((t, i) => { - const angle = (i / topTypes.length) * Math.PI * 2; - clusterCentroids.set(t, { - x: Math.cos(angle) * CLUSTER_SEED_RADIUS, - y: Math.sin(angle) * CLUSTER_SEED_RADIUS, - }); - }); - // Hash-based jitter so seed positions are stable across re-renders for - // the same spec id — avoids reshuffling on filter changes. - const jitter = (id: string, salt: number) => { - let h = salt; - for (let i = 0; i < id.length; i++) h = (h * 31 + id.charCodeAt(i)) | 0; - return ((h & 0xffff) / 0xffff - 0.5) * 2 * CLUSTER_SEED_JITTER; - }; - const nodes: (MapNode & { x?: number; y?: number; vx?: number; vy?: number })[] = specs.map( - s => { - const v = primaryCategoryValue(s, activeCategory); - const colorBucket = topTypes.includes(v) ? v : null; - const thumbUrl = selectMapThumbUrl(s, isDark); - const cached = cache.get(s.id) as - | (MapNode & { x?: number; y?: number; vx?: number; vy?: number }) - | undefined; - const reuse = cached && cached.thumbUrl === thumbUrl; - // Warm-start preference: keep the simulation's last x/y if we have it - // (filter / weight tweaks reuse positions and refine in place). Cold - // start: seed from the cluster centroid + stable per-id jitter. - const seedCenter = colorBucket ? clusterCentroids.get(colorBucket) : null; - const x = cached?.x ?? (seedCenter ? seedCenter.x + jitter(s.id, 1) : jitter(s.id, 3)); - const y = cached?.y ?? (seedCenter ? seedCenter.y + jitter(s.id, 2) : jitter(s.id, 5)); - const node: MapNode & { x: number; y: number; vx: number; vy: number } = { - id: s.id, - title: s.title, - tags: flattenTags(s), - colorBucket, - thumbUrl, - imgs: reuse ? cached!.imgs : new Map(), - pendingTiers: reuse ? cached!.pendingTiers : new Set(), - x, - y, - vx: cached?.vx ?? 0, - vy: cached?.vy ?? 0, - }; - nextCache.set(s.id, node); - return node; - } - ); - nodeCacheRef.current = nextCache; - const links = buildKNNLinks(specs, idf, KNN_K, minSim, weights); - return { nodes, links, topTypes, typeCounts, idf }; - }, [specs, isDark, weights, minSim, activeCategory]); - - // Re-arm the settling gate whenever graphData re-derives — FG2D reheats - // the simulation in response, and we want the gate to cover the new - // cooling phase the same way it covers the initial one. No-op on the - // very first render (settled is already false) and while specs are - // still loading. - // - // Implemented via the "store previous prop in state" pattern (see - // https://react.dev/reference/react/useState#storing-information-from-previous-renders) - // instead of useEffect: React supports calling setState during render of - // the *same* component, batches the updates, and re-renders once before - // commit — no infinite loop, and the rule that bans setState in effects - // doesn't apply to setState during render. - const [prevGraphData, setPrevGraphData] = useState(graphData); - if (graphData !== prevGraphData) { - setPrevGraphData(graphData); - if (graphData.nodes.length > 0) { - setSettled(false); - setTickProgress(0); - tickCountRef.current = 0; - } - } - - // Eager-load the 400-tier thumbnails so something paints fast. Higher tiers - // are fetched lazily from nodeCanvasObject when the user zooms in. - useEffect(() => { - if (graphData.nodes.length === 0) return; - const nodeById = new Map(graphData.nodes.map(n => [n.id, n])); - let cancelled = false; - preloadImages( - graphData.nodes.map(n => ({ id: n.id, thumbUrl: n.thumbUrl })), - (id, tier, img) => { - if (cancelled) return; - const n = nodeById.get(id); - if (n) n.imgs.set(tier, img); - fgRef.current?.refresh?.(); - } - ); - return () => { - cancelled = true; - }; - }, [graphData]); - - // 4. neighbor lookup for hover highlight (built once per links change) - // Precomputed id → node lookup. linkColor/linkWidth fire once per link - // per frame (~1k links), and a graphData.nodes.find() inside each call - // would be O(N²) total per frame; the Map keeps it O(1). - const nodeById = useMemo(() => { - const map = new Map(); - for (const n of graphData.nodes) map.set(n.id, n); - return map; - }, [graphData.nodes]); - - const neighbors = useMemo(() => { - const map = new Map>(); - for (const l of graphData.links) { - if (!map.has(l.source)) map.set(l.source, new Set()); - if (!map.has(l.target)) map.set(l.target, new Set()); - map.get(l.source)!.add(l.target); - map.get(l.target)!.add(l.source); - } - return map; - }, [graphData.links]); - // Track the pinned node's on-screen rect so the DOM-overlay pulse marker // stays glued to it while the user pans/zooms. Cheap (one RAF tick = a // graph→screen coord transform + a setState that no-ops on sub-pixel @@ -549,7 +230,7 @@ export function MapPage() { return () => cancelAnimationFrame(raf); }, [pinnedId, graphData, nodeById]); - // 5. derive everything the corner hover-panel needs from the (lagged) + // 3. derive everything the corner hover-panel needs from the (lagged) // panelNodeId, so the panel can fade out without losing its content. const panelData = useMemo(() => { if (!panelNodeId) return null; @@ -590,44 +271,6 @@ export function MapPage() { return colorFor(node.colorBucket, graphData.topTypes) ?? colors.primary; }, [pinnedId, nodeById, graphData.topTypes]); - // 6. Precompute lowercased searchable fields per spec so each keystroke - // only does .includes() checks, not a fresh tag-flatten + lowercase. - const searchHaystacks = useMemo(() => { - if (!specs) return []; - return specs.map(s => ({ - spec: s, - titleL: s.title.toLowerCase(), - idL: s.id.toLowerCase(), - tagsL: flattenTags(s).map(t => t.toLowerCase()), - })); - }, [specs]); - - // 7. Match the search query: every whitespace-separated token must appear - // somewhere (title / id / tag), score weighted by where it hit. Top 8. - const searchMatches = useMemo(() => { - const q = searchQuery.trim().toLowerCase(); - if (!q) return []; - const tokens = q.split(/\s+/).filter(Boolean); - const scored: { spec: SpecMapItem; score: number }[] = []; - for (const h of searchHaystacks) { - let score = 0; - let allMatch = true; - for (const tok of tokens) { - const inTitle = h.titleL.includes(tok); - const inId = h.idL.includes(tok); - const inTags = h.tagsL.some(t => t.includes(tok)); - if (!(inTitle || inId || inTags)) { - allMatch = false; - break; - } - score += inTitle ? 3 : inId ? 2 : 1; - } - if (allMatch) scored.push({ spec: h.spec, score }); - } - scored.sort((a, b) => b.score - a.score || a.spec.title.localeCompare(b.spec.title)); - return scored.slice(0, 8).map(x => x.spec); - }, [searchQuery, searchHaystacks]); - // (searchIdx is reset inline in the input's onChange handler — moved out // of useEffect to avoid set-state-in-effect: it's a one-line followup // to a user event, not external sync.) @@ -646,7 +289,7 @@ export function MapPage() { return () => window.removeEventListener('keydown', onKey); }, []); - // 5. ForceGraph2D callbacks. Types for ctx come from the wrapper's prop signature + // 4. ForceGraph2D callbacks. Types for ctx come from the wrapper's prop signature // when these are passed inline below — extracting them out would force us to spell // CanvasRenderingContext2D explicitly, which our eslint config doesn't recognize. type WithCoords = MapNode & { x?: number; y?: number }; @@ -1136,10 +779,7 @@ export function MapPage() { { - setWeights(DEFAULT_CATEGORY_WEIGHT); - setMinSim(DEFAULT_MIN_SIM); - }} + onClick={resetWeights} sx={{ all: 'unset', cursor: 'pointer', @@ -1343,10 +983,9 @@ export function MapPage() { height={size.h} backgroundColor={'transparent'} nodeLabel={(n: MapNode) => n.title} - // Boost global repulsion so nodes aren't crammed into a blob. - d3VelocityDecay={0.35} - d3AlphaDecay={COOLDOWN_ALPHA_DECAY} - d3AlphaMin={COOLDOWN_ALPHA_MIN} + d3VelocityDecay={forceConfig.velocityDecay} + d3AlphaDecay={forceConfig.alphaDecay} + d3AlphaMin={forceConfig.alphaMin} nodeCanvasObject={(node, ctx, globalScale) => { const n = node as WithCoords; if (n.x == null || n.y == null) return; @@ -1471,7 +1110,7 @@ export function MapPage() { setHoverId(null); } }} - cooldownTicks={COOLDOWN_TICKS} + cooldownTicks={forceConfig.cooldownTicks} // Frame the dense cluster to ~80% of the viewport — instantly // (0 ms), so the camera move happens behind the still-active // gate overlay and the user just sees the final framing when @@ -1515,71 +1154,35 @@ export function MapPage() { fg.zoom?.(fitZoom, 0); } } - setSettled(true); + markSettled(); }} // Wire up the custom forces once the imperative ref is available. + // The tuned values/factories come from the hook's forceConfig. // onRenderFramePre fires every frame; the __forcesWired guard makes // it idempotent and the cost on subsequent frames is one property read. onRenderFramePre={() => { const fg = fgRef.current; if (!fg || fg.__forcesWired) return; - // Stronger many-body repulsion than the default ~-30. - fg.d3Force('charge')?.strength(REPULSION); - // Link distance/strength scale with weighted-Jaccard similarity: - // tighter clusters for highly related specs, looser otherwise. + fg.d3Force('charge')?.strength(forceConfig.chargeStrength); // The d3-force-3d ambient types are minimal; cast for the chained calls. // eslint-disable-next-line @typescript-eslint/no-explicit-any const linkForce = fg.d3Force('link') as any; if (linkForce) { - linkForce.distance((l: MapLink) => { - const w = l.weight ?? 0.3; - return ( - LINK_DISTANCE_MIN + - (1 - Math.min(1, w)) * (LINK_DISTANCE_MAX - LINK_DISTANCE_MIN) - ); - }); - linkForce.strength((l: MapLink) => - Math.max(0.02, Math.min(LINK_STRENGTH_CAP, (l.weight ?? 0.3) * 0.4)) - ); + linkForce.distance(forceConfig.linkDistance); + linkForce.strength(forceConfig.linkStrength); } - // Per-node collision: prevents thumbnail overlap. Radius = half - // the longer side of the bounding box plus a small padding. - fg.d3Force( - 'collide', - forceCollide(() => NODE_SIZE / 2 + COLLIDE_PADDING).iterations(2) - ); - // Mild centering force so disconnected outliers (no KNN edges - // because all sims < threshold) drift back toward the cluster - // mass instead of vanishing to the corners. Strength is well - // below the default 1.0 so cluster shapes stay intact. - fg.d3Force('center')?.strength?.(CENTER_GRAVITY); + fg.d3Force('collide', forceConfig.createCollideForce()); + fg.d3Force('center')?.strength?.(forceConfig.centerGravity); // Outlier-squash: register the custom radial-compression force // AFTER the standard ones so its velocity correction is the // last word per tick. Inner geometry is untouched (force is a // no-op below the percentile threshold), so this stacks on the // existing layout instead of fighting it. - fg.d3Force( - 'outlier-squash', - outlierSquashForce( - OUTLIER_THRESHOLD_PERCENTILE, - OUTLIER_SQUASH_K, - OUTLIER_SQUASH_STRENGTH - ) - ); + fg.d3Force('outlier-squash', forceConfig.createOutlierSquashForce()); fg.__forcesWired = true; fg.d3ReheatSimulation?.(); }} - // Drive the loading-overlay progress bar. Each simulation tick - // bumps the un-throttled ref; we flush to React state every - // PROGRESS_TICK_BATCH ticks (~5×/s at 60 Hz) so the bar advances - // smoothly without re-rendering MapPage on every tick. - onEngineTick={() => { - tickCountRef.current += 1; - const PROGRESS_TICK_BATCH = 6; - if (tickCountRef.current % PROGRESS_TICK_BATCH === 0) { - setTickProgress(Math.min(1, tickCountRef.current / COOLDOWN_TICKS)); - } - }} + onEngineTick={handleEngineTick} /> )} From b7a952b7434b80ab3cb4316f8912c5a4d95b5877 Mon Sep 17 00:00:00 2001 From: Markus Neusinger <2921697+MarkusNeusinger@users.noreply.github.com> Date: Wed, 10 Jun 2026 05:09:37 +0200 Subject: [PATCH 2/3] fix(app): freeze FORCE_CONFIG and couple tick progress to it Copilot review findings: the 'frozen' comment is now literally true via Object.freeze, and handleEngineTick derives the progress denominator from FORCE_CONFIG.cooldownTicks instead of the raw constant so the progress bar can't drift from the configured simulation lifetime. Co-Authored-By: Claude Fable 5 --- app/src/hooks/useForceGraphSimulation.ts | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/app/src/hooks/useForceGraphSimulation.ts b/app/src/hooks/useForceGraphSimulation.ts index 2ad634707b..4589e672d2 100644 --- a/app/src/hooks/useForceGraphSimulation.ts +++ b/app/src/hooks/useForceGraphSimulation.ts @@ -206,7 +206,7 @@ export interface MapForceConfig { // Static by construction — a single frozen object keeps the identity stable // across renders so consumers can safely list it in dependency arrays. -const FORCE_CONFIG: MapForceConfig = { +const FORCE_CONFIG: MapForceConfig = Object.freeze({ cooldownTicks: COOLDOWN_TICKS, alphaDecay: COOLDOWN_ALPHA_DECAY, alphaMin: COOLDOWN_ALPHA_MIN, @@ -232,7 +232,7 @@ const FORCE_CONFIG: MapForceConfig = { forceCollide(() => NODE_SIZE / 2 + COLLIDE_PADDING).iterations(2), createOutlierSquashForce: () => outlierSquashForce(OUTLIER_THRESHOLD_PERCENTILE, OUTLIER_SQUASH_K, OUTLIER_SQUASH_STRENGTH), -}; +}); export interface UseForceGraphSimulationOptions { /** @@ -523,7 +523,7 @@ export function useForceGraphSimulation({ tickCountRef.current += 1; const PROGRESS_TICK_BATCH = 6; if (tickCountRef.current % PROGRESS_TICK_BATCH === 0) { - setTickProgress(Math.min(1, tickCountRef.current / COOLDOWN_TICKS)); + setTickProgress(Math.min(1, tickCountRef.current / FORCE_CONFIG.cooldownTicks)); } }; From 16368d46fcb66aec74fa0cd80b7a426fa5b115c2 Mon Sep 17 00:00:00 2001 From: Markus Neusinger <2921697+MarkusNeusinger@users.noreply.github.com> Date: Wed, 10 Jun 2026 05:35:21 +0200 Subject: [PATCH 3/3] ci: retrigger required checks (no changes)