From 0882c281c00a4b147b50af77fba7aead600b40d2 Mon Sep 17 00:00:00 2001 From: carlos-alm <127798846+carlos-alm@users.noreply.github.com> Date: Fri, 13 Mar 2026 15:03:27 -0600 Subject: [PATCH 1/5] refactor: extract graph model subsystem with algorithms, builders, and classifiers Introduce src/graph/ with a unified in-memory CodeGraph model that replaces ad-hoc graphology usage scattered across consumers. Graph construction (from DB), algorithms (BFS, Tarjan, Louvain, shortest-path), and classification (roles, risk) are now centralized. Refactors communities, cycles, structure, triage, and viewer to use the new subsystem, removing ~300 lines of duplicated graph-building logic. --- src/communities.js | 102 ++--------- src/cycles.js | 115 ++++--------- src/graph/algorithms/bfs.js | 49 ++++++ src/graph/algorithms/centrality.js | 16 ++ src/graph/algorithms/index.js | 5 + src/graph/algorithms/louvain.js | 26 +++ src/graph/algorithms/shortest-path.js | 41 +++++ src/graph/algorithms/tarjan.js | 49 ++++++ src/graph/builders/dependency.js | 91 ++++++++++ src/graph/builders/index.js | 3 + src/graph/builders/structure.js | 40 +++++ src/graph/builders/temporal.js | 33 ++++ src/graph/classifiers/index.js | 2 + src/graph/classifiers/risk.js | 77 +++++++++ src/graph/classifiers/roles.js | 64 +++++++ src/graph/index.js | 13 ++ src/graph/model.js | 236 ++++++++++++++++++++++++++ src/structure.js | 55 ++---- src/triage.js | 92 +++------- src/viewer.js | 41 ++--- 20 files changed, 847 insertions(+), 303 deletions(-) create mode 100644 src/graph/algorithms/bfs.js create mode 100644 src/graph/algorithms/centrality.js create mode 100644 src/graph/algorithms/index.js create mode 100644 src/graph/algorithms/louvain.js create mode 100644 src/graph/algorithms/shortest-path.js create mode 100644 src/graph/algorithms/tarjan.js create mode 100644 src/graph/builders/dependency.js create mode 100644 src/graph/builders/index.js create mode 100644 src/graph/builders/structure.js create mode 100644 src/graph/builders/temporal.js create mode 100644 src/graph/classifiers/index.js create mode 100644 src/graph/classifiers/risk.js create mode 100644 src/graph/classifiers/roles.js create mode 100644 src/graph/index.js create mode 100644 src/graph/model.js diff --git a/src/communities.js b/src/communities.js index 90456ea..e0d51ae 100644 --- a/src/communities.js +++ b/src/communities.js @@ -1,79 +1,9 @@ import path from 'node:path'; -import Graph from 'graphology'; -import louvain from 'graphology-communities-louvain'; -import { - getCallableNodes, - getCallEdges, - getFileNodesAll, - getImportEdges, - openReadonlyOrFail, -} from './db.js'; -import { isTestFile } from './infrastructure/test-filter.js'; +import { openReadonlyOrFail } from './db.js'; +import { louvainCommunities } from './graph/algorithms/louvain.js'; +import { buildDependencyGraph } from './graph/builders/dependency.js'; import { paginateResult } from './paginate.js'; -// ─── Graph Construction ─────────────────────────────────────────────── - -/** - * Build a graphology graph from the codegraph SQLite database. - * - * @param {object} db - open better-sqlite3 database (readonly) - * @param {object} opts - * @param {boolean} [opts.functions] - Function-level instead of file-level - * @param {boolean} [opts.noTests] - Exclude test files - * @returns {Graph} - */ -function buildGraphologyGraph(db, opts = {}) { - const graph = new Graph({ type: 'undirected' }); - - if (opts.functions) { - // Function-level: nodes = function/method/class symbols, edges = calls - let nodes = getCallableNodes(db); - if (opts.noTests) nodes = nodes.filter((n) => !isTestFile(n.file)); - - const nodeIds = new Set(); - for (const n of nodes) { - const key = String(n.id); - graph.addNode(key, { label: n.name, file: n.file, kind: n.kind }); - nodeIds.add(n.id); - } - - const edges = getCallEdges(db); - for (const e of edges) { - if (!nodeIds.has(e.source_id) || !nodeIds.has(e.target_id)) continue; - const src = String(e.source_id); - const tgt = String(e.target_id); - if (src === tgt) continue; - if (!graph.hasEdge(src, tgt)) { - graph.addEdge(src, tgt); - } - } - } else { - // File-level: nodes = files, edges = imports + imports-type (deduplicated, cross-file) - let nodes = getFileNodesAll(db); - if (opts.noTests) nodes = nodes.filter((n) => !isTestFile(n.file)); - - const nodeIds = new Set(); - for (const n of nodes) { - const key = String(n.id); - graph.addNode(key, { label: n.file, file: n.file }); - nodeIds.add(n.id); - } - - const edges = getImportEdges(db); - for (const e of edges) { - if (!nodeIds.has(e.source_id) || !nodeIds.has(e.target_id)) continue; - const src = String(e.source_id); - const tgt = String(e.target_id); - if (src === tgt) continue; - if (!graph.hasEdge(src, tgt)) { - graph.addEdge(src, tgt); - } - } - } - - return graph; -} - // ─── Directory Helpers ──────────────────────────────────────────────── function getDirectory(filePath) { @@ -97,11 +27,10 @@ function getDirectory(filePath) { */ export function communitiesData(customDbPath, opts = {}) { const db = openReadonlyOrFail(customDbPath); - const resolution = opts.resolution ?? 1.0; let graph; try { - graph = buildGraphologyGraph(db, { - functions: opts.functions, + graph = buildDependencyGraph(db, { + fileLevel: !opts.functions, noTests: opts.noTests, }); } finally { @@ -109,27 +38,27 @@ export function communitiesData(customDbPath, opts = {}) { } // Handle empty or trivial graphs - if (graph.order === 0 || graph.size === 0) { + if (graph.nodeCount === 0 || graph.edgeCount === 0) { return { communities: [], modularity: 0, drift: { splitCandidates: [], mergeCandidates: [] }, - summary: { communityCount: 0, modularity: 0, nodeCount: graph.order, driftScore: 0 }, + summary: { communityCount: 0, modularity: 0, nodeCount: graph.nodeCount, driftScore: 0 }, }; } // Run Louvain - const details = louvain.detailed(graph, { resolution }); - const assignments = details.communities; // node → community id - const modularity = details.modularity; + const resolution = opts.resolution ?? 1.0; + const { assignments, modularity } = louvainCommunities(graph, { resolution }); // Group nodes by community const communityMap = new Map(); // community id → node keys[] - graph.forEachNode((key) => { - const cid = assignments[key]; + for (const [key] of graph.nodes()) { + const cid = assignments.get(key); + if (cid == null) continue; if (!communityMap.has(cid)) communityMap.set(cid, []); communityMap.get(cid).push(key); - }); + } // Build community objects const communities = []; @@ -139,7 +68,7 @@ export function communitiesData(customDbPath, opts = {}) { const dirCounts = {}; const memberData = []; for (const key of members) { - const attrs = graph.getNodeAttributes(key); + const attrs = graph.getNodeAttrs(key); const dir = getDirectory(attrs.file); dirCounts[dir] = (dirCounts[dir] || 0) + 1; memberData.push({ @@ -196,7 +125,6 @@ export function communitiesData(customDbPath, opts = {}) { mergeCandidates.sort((a, b) => b.directoryCount - a.directoryCount); // Drift score: 0-100 based on how much directory structure diverges from communities - // Higher = more drift (directories don't match communities) const totalDirs = dirToCommunities.size; const splitDirs = splitCandidates.length; const splitRatio = totalDirs > 0 ? splitDirs / totalDirs : 0; @@ -214,7 +142,7 @@ export function communitiesData(customDbPath, opts = {}) { summary: { communityCount: communities.length, modularity: +modularity.toFixed(4), - nodeCount: graph.order, + nodeCount: graph.nodeCount, driftScore, }, }; diff --git a/src/cycles.js b/src/cycles.js index 6accf7a..a66b6ff 100644 --- a/src/cycles.js +++ b/src/cycles.js @@ -1,4 +1,6 @@ -import { isTestFile } from './infrastructure/test-filter.js'; +import { tarjan } from './graph/algorithms/tarjan.js'; +import { buildDependencyGraph } from './graph/builders/dependency.js'; +import { CodeGraph } from './graph/model.js'; import { loadNative } from './native.js'; /** @@ -12,107 +14,50 @@ export function findCycles(db, opts = {}) { const fileLevel = opts.fileLevel !== false; const noTests = opts.noTests || false; - // Build adjacency list from SQLite (stays in JS — only the algorithm can move to Rust) - let edges; - if (fileLevel) { - edges = db - .prepare(` - SELECT DISTINCT n1.file AS source, n2.file AS target - FROM edges e - JOIN nodes n1 ON e.source_id = n1.id - JOIN nodes n2 ON e.target_id = n2.id - WHERE n1.file != n2.file AND e.kind IN ('imports', 'imports-type') - `) - .all(); - if (noTests) { - edges = edges.filter((e) => !isTestFile(e.source) && !isTestFile(e.target)); - } - } else { - edges = db - .prepare(` - SELECT DISTINCT - (n1.name || '|' || n1.file) AS source, - (n2.name || '|' || n2.file) AS target - FROM edges e - JOIN nodes n1 ON e.source_id = n1.id - JOIN nodes n2 ON e.target_id = n2.id - WHERE n1.kind IN ('function', 'method', 'class', 'interface', 'type', 'struct', 'enum', 'trait', 'record', 'module') - AND n2.kind IN ('function', 'method', 'class', 'interface', 'type', 'struct', 'enum', 'trait', 'record', 'module') - AND e.kind = 'calls' - AND n1.id != n2.id - `) - .all(); - if (noTests) { - edges = edges.filter((e) => { - const sourceFile = e.source.split('|').pop(); - const targetFile = e.target.split('|').pop(); - return !isTestFile(sourceFile) && !isTestFile(targetFile); - }); + const graph = buildDependencyGraph(db, { fileLevel, noTests }); + + // Build a label map: DB string ID → human-readable key + // File-level: file path; Function-level: name|file composite (for native Rust compat) + const idToLabel = new Map(); + for (const [id, attrs] of graph.nodes()) { + if (fileLevel) { + idToLabel.set(id, attrs.file); + } else { + idToLabel.set(id, `${attrs.label}|${attrs.file}`); } } + // Build edge array with human-readable keys (for native engine) + const edges = graph.toEdgeArray().map((e) => ({ + source: idToLabel.get(e.source), + target: idToLabel.get(e.target), + })); + // Try native Rust implementation const native = loadNative(); if (native) { return native.detectCycles(edges); } - // Fallback: JS Tarjan - return findCyclesJS(edges); + // Fallback: JS Tarjan via graph subsystem + // Re-key graph with human-readable labels for consistent output + const labelGraph = new CodeGraph(); + for (const { source, target } of edges) { + labelGraph.addEdge(source, target); + } + return tarjan(labelGraph); } /** * Pure-JS Tarjan's SCC implementation. + * Kept for backward compatibility — accepts raw {source, target}[] edges. */ export function findCyclesJS(edges) { - const graph = new Map(); + const graph = new CodeGraph(); for (const { source, target } of edges) { - if (!graph.has(source)) graph.set(source, []); - graph.get(source).push(target); - if (!graph.has(target)) graph.set(target, []); + graph.addEdge(source, target); } - - // Tarjan's strongly connected components algorithm - let index = 0; - const stack = []; - const onStack = new Set(); - const indices = new Map(); - const lowlinks = new Map(); - const sccs = []; - - function strongconnect(v) { - indices.set(v, index); - lowlinks.set(v, index); - index++; - stack.push(v); - onStack.add(v); - - for (const w of graph.get(v) || []) { - if (!indices.has(w)) { - strongconnect(w); - lowlinks.set(v, Math.min(lowlinks.get(v), lowlinks.get(w))); - } else if (onStack.has(w)) { - lowlinks.set(v, Math.min(lowlinks.get(v), indices.get(w))); - } - } - - if (lowlinks.get(v) === indices.get(v)) { - const scc = []; - let w; - do { - w = stack.pop(); - onStack.delete(w); - scc.push(w); - } while (w !== v); - if (scc.length > 1) sccs.push(scc); - } - } - - for (const node of graph.keys()) { - if (!indices.has(node)) strongconnect(node); - } - - return sccs; + return tarjan(graph); } /** diff --git a/src/graph/algorithms/bfs.js b/src/graph/algorithms/bfs.js new file mode 100644 index 0000000..9ecb25d --- /dev/null +++ b/src/graph/algorithms/bfs.js @@ -0,0 +1,49 @@ +/** + * Breadth-first traversal on a CodeGraph. + * + * @param {import('../model.js').CodeGraph} graph + * @param {string|string[]} startIds - One or more starting node IDs + * @param {{ maxDepth?: number, direction?: 'forward'|'backward'|'both' }} [opts] + * @returns {Map} nodeId → depth from nearest start node + */ +export function bfs(graph, startIds, opts = {}) { + const maxDepth = opts.maxDepth ?? Infinity; + const direction = opts.direction ?? 'forward'; + const starts = Array.isArray(startIds) ? startIds : [startIds]; + + const depths = new Map(); + const queue = []; + + for (const id of starts) { + const key = String(id); + if (graph.hasNode(key)) { + depths.set(key, 0); + queue.push(key); + } + } + + let head = 0; + while (head < queue.length) { + const current = queue[head++]; + const depth = depths.get(current); + if (depth >= maxDepth) continue; + + let neighbors; + if (direction === 'forward') { + neighbors = graph.successors(current); + } else if (direction === 'backward') { + neighbors = graph.predecessors(current); + } else { + neighbors = graph.neighbors(current); + } + + for (const n of neighbors) { + if (!depths.has(n)) { + depths.set(n, depth + 1); + queue.push(n); + } + } + } + + return depths; +} diff --git a/src/graph/algorithms/centrality.js b/src/graph/algorithms/centrality.js new file mode 100644 index 0000000..c7d7c91 --- /dev/null +++ b/src/graph/algorithms/centrality.js @@ -0,0 +1,16 @@ +/** + * Fan-in / fan-out centrality for all nodes in a CodeGraph. + * + * @param {import('../model.js').CodeGraph} graph + * @returns {Map} + */ +export function fanInOut(graph) { + const result = new Map(); + for (const id of graph.nodeIds()) { + result.set(id, { + fanIn: graph.inDegree(id), + fanOut: graph.outDegree(id), + }); + } + return result; +} diff --git a/src/graph/algorithms/index.js b/src/graph/algorithms/index.js new file mode 100644 index 0000000..3949b94 --- /dev/null +++ b/src/graph/algorithms/index.js @@ -0,0 +1,5 @@ +export { bfs } from './bfs.js'; +export { fanInOut } from './centrality.js'; +export { louvainCommunities } from './louvain.js'; +export { shortestPath } from './shortest-path.js'; +export { tarjan } from './tarjan.js'; diff --git a/src/graph/algorithms/louvain.js b/src/graph/algorithms/louvain.js new file mode 100644 index 0000000..2a7f3a6 --- /dev/null +++ b/src/graph/algorithms/louvain.js @@ -0,0 +1,26 @@ +/** + * Louvain community detection via graphology. + * + * @param {import('../model.js').CodeGraph} graph + * @param {{ resolution?: number }} [opts] + * @returns {{ assignments: Map, modularity: number }} + */ +import graphologyLouvain from 'graphology-communities-louvain'; + +export function louvainCommunities(graph, opts = {}) { + const gy = graph.toGraphology({ type: 'undirected' }); + + if (gy.order === 0 || gy.size === 0) { + return { assignments: new Map(), modularity: 0 }; + } + + const resolution = opts.resolution ?? 1.0; + const details = graphologyLouvain.detailed(gy, { resolution }); + + const assignments = new Map(); + for (const [nodeId, communityId] of Object.entries(details.communities)) { + assignments.set(nodeId, communityId); + } + + return { assignments, modularity: details.modularity }; +} diff --git a/src/graph/algorithms/shortest-path.js b/src/graph/algorithms/shortest-path.js new file mode 100644 index 0000000..c594559 --- /dev/null +++ b/src/graph/algorithms/shortest-path.js @@ -0,0 +1,41 @@ +/** + * BFS-based shortest path on a CodeGraph. + * + * @param {import('../model.js').CodeGraph} graph + * @param {string} fromId + * @param {string} toId + * @returns {string[]|null} Path from fromId to toId (inclusive), or null if unreachable + */ +export function shortestPath(graph, fromId, toId) { + const from = String(fromId); + const to = String(toId); + + if (!graph.hasNode(from) || !graph.hasNode(to)) return null; + if (from === to) return [from]; + + const parent = new Map(); + parent.set(from, null); + const queue = [from]; + let head = 0; + + while (head < queue.length) { + const current = queue[head++]; + for (const neighbor of graph.successors(current)) { + if (parent.has(neighbor)) continue; + parent.set(neighbor, current); + if (neighbor === to) { + // Reconstruct path + const path = []; + let node = to; + while (node !== null) { + path.push(node); + node = parent.get(node); + } + return path.reverse(); + } + queue.push(neighbor); + } + } + + return null; +} diff --git a/src/graph/algorithms/tarjan.js b/src/graph/algorithms/tarjan.js new file mode 100644 index 0000000..958d5f3 --- /dev/null +++ b/src/graph/algorithms/tarjan.js @@ -0,0 +1,49 @@ +/** + * Tarjan's strongly connected components algorithm. + * Operates on a CodeGraph instance. + * + * @param {import('../model.js').CodeGraph} graph + * @returns {string[][]} SCCs with length > 1 (cycles) + */ +export function tarjan(graph) { + let index = 0; + const stack = []; + const onStack = new Set(); + const indices = new Map(); + const lowlinks = new Map(); + const sccs = []; + + function strongconnect(v) { + indices.set(v, index); + lowlinks.set(v, index); + index++; + stack.push(v); + onStack.add(v); + + for (const w of graph.successors(v)) { + if (!indices.has(w)) { + strongconnect(w); + lowlinks.set(v, Math.min(lowlinks.get(v), lowlinks.get(w))); + } else if (onStack.has(w)) { + lowlinks.set(v, Math.min(lowlinks.get(v), indices.get(w))); + } + } + + if (lowlinks.get(v) === indices.get(v)) { + const scc = []; + let w; + do { + w = stack.pop(); + onStack.delete(w); + scc.push(w); + } while (w !== v); + if (scc.length > 1) sccs.push(scc); + } + } + + for (const id of graph.nodeIds()) { + if (!indices.has(id)) strongconnect(id); + } + + return sccs; +} diff --git a/src/graph/builders/dependency.js b/src/graph/builders/dependency.js new file mode 100644 index 0000000..b0e24f7 --- /dev/null +++ b/src/graph/builders/dependency.js @@ -0,0 +1,91 @@ +/** + * Build a CodeGraph from the SQLite database. + * Replaces inline graph construction in cycles.js, communities.js, viewer.js, export.js. + */ + +import { getCallableNodes, getCallEdges, getFileNodesAll, getImportEdges } from '../../db.js'; +import { isTestFile } from '../../infrastructure/test-filter.js'; +import { CodeGraph } from '../model.js'; + +/** + * @param {object} db - Open better-sqlite3 database (readonly) + * @param {object} [opts] + * @param {boolean} [opts.fileLevel=true] - File-level (imports) or function-level (calls) + * @param {boolean} [opts.noTests=false] - Exclude test files + * @param {number} [opts.minConfidence] - Minimum edge confidence (function-level only) + * @returns {CodeGraph} + */ +export function buildDependencyGraph(db, opts = {}) { + const fileLevel = opts.fileLevel !== false; + const noTests = opts.noTests || false; + + if (fileLevel) { + return buildFileLevelGraph(db, noTests); + } + return buildFunctionLevelGraph(db, noTests, opts.minConfidence); +} + +function buildFileLevelGraph(db, noTests) { + const graph = new CodeGraph(); + + let nodes = getFileNodesAll(db); + if (noTests) nodes = nodes.filter((n) => !isTestFile(n.file)); + + const nodeIds = new Set(); + for (const n of nodes) { + graph.addNode(String(n.id), { label: n.file, file: n.file, dbId: n.id }); + nodeIds.add(n.id); + } + + const edges = getImportEdges(db); + for (const e of edges) { + if (!nodeIds.has(e.source_id) || !nodeIds.has(e.target_id)) continue; + const src = String(e.source_id); + const tgt = String(e.target_id); + if (src === tgt) continue; + if (!graph.hasEdge(src, tgt)) { + graph.addEdge(src, tgt, { kind: 'imports' }); + } + } + + return graph; +} + +function buildFunctionLevelGraph(db, noTests, minConfidence) { + const graph = new CodeGraph(); + + let nodes = getCallableNodes(db); + if (noTests) nodes = nodes.filter((n) => !isTestFile(n.file)); + + const nodeIds = new Set(); + for (const n of nodes) { + graph.addNode(String(n.id), { + label: n.name, + file: n.file, + kind: n.kind, + dbId: n.id, + }); + nodeIds.add(n.id); + } + + let edges; + if (minConfidence != null) { + edges = db + .prepare("SELECT source_id, target_id FROM edges WHERE kind = 'calls' AND confidence >= ?") + .all(minConfidence); + } else { + edges = getCallEdges(db); + } + + for (const e of edges) { + if (!nodeIds.has(e.source_id) || !nodeIds.has(e.target_id)) continue; + const src = String(e.source_id); + const tgt = String(e.target_id); + if (src === tgt) continue; + if (!graph.hasEdge(src, tgt)) { + graph.addEdge(src, tgt, { kind: 'calls' }); + } + } + + return graph; +} diff --git a/src/graph/builders/index.js b/src/graph/builders/index.js new file mode 100644 index 0000000..18bbad6 --- /dev/null +++ b/src/graph/builders/index.js @@ -0,0 +1,3 @@ +export { buildDependencyGraph } from './dependency.js'; +export { buildStructureGraph } from './structure.js'; +export { buildTemporalGraph } from './temporal.js'; diff --git a/src/graph/builders/structure.js b/src/graph/builders/structure.js new file mode 100644 index 0000000..10efb11 --- /dev/null +++ b/src/graph/builders/structure.js @@ -0,0 +1,40 @@ +/** + * Build a containment graph (directory → file) from the SQLite database. + */ + +import { CodeGraph } from '../model.js'; + +/** + * @param {object} db - Open better-sqlite3 database (readonly) + * @returns {CodeGraph} Directed graph with directory→file containment edges + */ +export function buildStructureGraph(db) { + const graph = new CodeGraph(); + + const dirs = db.prepare("SELECT id, name FROM nodes WHERE kind = 'directory'").all(); + + for (const d of dirs) { + graph.addNode(String(d.id), { label: d.name, kind: 'directory' }); + } + + const files = db.prepare("SELECT id, name, file FROM nodes WHERE kind = 'file'").all(); + + for (const f of files) { + graph.addNode(String(f.id), { label: f.name, kind: 'file', file: f.file }); + } + + const containsEdges = db + .prepare(` + SELECT e.source_id, e.target_id + FROM edges e + JOIN nodes n ON e.source_id = n.id + WHERE e.kind = 'contains' AND n.kind = 'directory' + `) + .all(); + + for (const e of containsEdges) { + graph.addEdge(String(e.source_id), String(e.target_id), { kind: 'contains' }); + } + + return graph; +} diff --git a/src/graph/builders/temporal.js b/src/graph/builders/temporal.js new file mode 100644 index 0000000..c694d47 --- /dev/null +++ b/src/graph/builders/temporal.js @@ -0,0 +1,33 @@ +/** + * Build a co-change (temporal) graph weighted by Jaccard similarity. + */ + +import { CodeGraph } from '../model.js'; + +/** + * @param {object} db - Open better-sqlite3 database (readonly) + * @param {{ minJaccard?: number }} [opts] + * @returns {CodeGraph} Undirected graph weighted by Jaccard similarity + */ +export function buildTemporalGraph(db, opts = {}) { + const minJaccard = opts.minJaccard ?? 0.0; + const graph = new CodeGraph({ directed: false }); + + // Check if co_changes table exists + const tableCheck = db + .prepare("SELECT name FROM sqlite_master WHERE type='table' AND name='co_changes'") + .get(); + if (!tableCheck) return graph; + + const rows = db + .prepare('SELECT file_a, file_b, jaccard FROM co_changes WHERE jaccard >= ?') + .all(minJaccard); + + for (const r of rows) { + if (!graph.hasNode(r.file_a)) graph.addNode(r.file_a, { label: r.file_a }); + if (!graph.hasNode(r.file_b)) graph.addNode(r.file_b, { label: r.file_b }); + graph.addEdge(r.file_a, r.file_b, { jaccard: r.jaccard }); + } + + return graph; +} diff --git a/src/graph/classifiers/index.js b/src/graph/classifiers/index.js new file mode 100644 index 0000000..36f5435 --- /dev/null +++ b/src/graph/classifiers/index.js @@ -0,0 +1,2 @@ +export { DEFAULT_WEIGHTS, minMaxNormalize, ROLE_WEIGHTS, scoreRisk } from './risk.js'; +export { classifyRoles, FRAMEWORK_ENTRY_PREFIXES } from './roles.js'; diff --git a/src/graph/classifiers/risk.js b/src/graph/classifiers/risk.js new file mode 100644 index 0000000..f1aafe6 --- /dev/null +++ b/src/graph/classifiers/risk.js @@ -0,0 +1,77 @@ +/** + * Risk scoring — pure logic, no DB. + */ + +export const DEFAULT_WEIGHTS = { + fanIn: 0.25, + complexity: 0.3, + churn: 0.2, + role: 0.15, + mi: 0.1, +}; + +export const ROLE_WEIGHTS = { + core: 1.0, + utility: 0.9, + entry: 0.8, + adapter: 0.5, + leaf: 0.2, + dead: 0.1, +}; + +const DEFAULT_ROLE_WEIGHT = 0.5; + +/** Min-max normalize an array of numbers. All-equal → all zeros. */ +export function minMaxNormalize(values) { + const min = Math.min(...values); + const max = Math.max(...values); + if (max === min) return values.map(() => 0); + const range = max - min; + return values.map((v) => (v - min) / range); +} + +function round4(n) { + return Math.round(n * 10000) / 10000; +} + +/** + * Score risk for a list of items. + * + * @param {{ fan_in: number, cognitive: number, churn: number, mi: number, role: string|null }[]} items + * @param {object} [weights] - Override DEFAULT_WEIGHTS + * @returns {{ normFanIn: number, normComplexity: number, normChurn: number, normMI: number, roleWeight: number, riskScore: number }[]} + * Parallel array with risk metrics for each input item. + */ +export function scoreRisk(items, weights = {}) { + const w = { ...DEFAULT_WEIGHTS, ...weights }; + + const fanIns = items.map((r) => r.fan_in); + const cognitives = items.map((r) => r.cognitive); + const churns = items.map((r) => r.churn); + const mis = items.map((r) => r.mi); + + const normFanIns = minMaxNormalize(fanIns); + const normCognitives = minMaxNormalize(cognitives); + const normChurns = minMaxNormalize(churns); + const normMIsRaw = minMaxNormalize(mis); + const normMIs = normMIsRaw.map((v) => round4(1 - v)); + + return items.map((r, i) => { + const roleWeight = ROLE_WEIGHTS[r.role] ?? DEFAULT_ROLE_WEIGHT; + const riskScore = + w.fanIn * normFanIns[i] + + w.complexity * normCognitives[i] + + w.churn * normChurns[i] + + w.role * roleWeight + + w.mi * normMIs[i]; + + return { + normFanIn: round4(normFanIns[i]), + normComplexity: round4(normCognitives[i]), + normChurn: round4(normChurns[i]), + normMI: round4(normMIs[i]), + roleWeight, + riskScore: round4(riskScore), + }; + }); +} diff --git a/src/graph/classifiers/roles.js b/src/graph/classifiers/roles.js new file mode 100644 index 0000000..394197d --- /dev/null +++ b/src/graph/classifiers/roles.js @@ -0,0 +1,64 @@ +/** + * Node role classification — pure logic, no DB. + * + * Roles: entry, core, utility, adapter, leaf, dead + */ + +export const FRAMEWORK_ENTRY_PREFIXES = ['route:', 'event:', 'command:']; + +function median(sorted) { + if (sorted.length === 0) return 0; + const mid = Math.floor(sorted.length / 2); + return sorted.length % 2 === 0 ? (sorted[mid - 1] + sorted[mid]) / 2 : sorted[mid]; +} + +/** + * Classify nodes into architectural roles based on fan-in/fan-out metrics. + * + * @param {{ id: string, name: string, fanIn: number, fanOut: number, isExported: boolean }[]} nodes + * @returns {Map} nodeId → role + */ +export function classifyRoles(nodes) { + if (nodes.length === 0) return new Map(); + + const nonZeroFanIn = nodes + .filter((n) => n.fanIn > 0) + .map((n) => n.fanIn) + .sort((a, b) => a - b); + const nonZeroFanOut = nodes + .filter((n) => n.fanOut > 0) + .map((n) => n.fanOut) + .sort((a, b) => a - b); + + const medFanIn = median(nonZeroFanIn); + const medFanOut = median(nonZeroFanOut); + + const result = new Map(); + + for (const node of nodes) { + const highIn = node.fanIn >= medFanIn && node.fanIn > 0; + const highOut = node.fanOut >= medFanOut && node.fanOut > 0; + + let role; + const isFrameworkEntry = FRAMEWORK_ENTRY_PREFIXES.some((p) => node.name.startsWith(p)); + if (isFrameworkEntry) { + role = 'entry'; + } else if (node.fanIn === 0 && !node.isExported) { + role = 'dead'; + } else if (node.fanIn === 0 && node.isExported) { + role = 'entry'; + } else if (highIn && !highOut) { + role = 'core'; + } else if (highIn && highOut) { + role = 'utility'; + } else if (!highIn && highOut) { + role = 'adapter'; + } else { + role = 'leaf'; + } + + result.set(node.id, role); + } + + return result; +} diff --git a/src/graph/index.js b/src/graph/index.js new file mode 100644 index 0000000..e9ac5b4 --- /dev/null +++ b/src/graph/index.js @@ -0,0 +1,13 @@ +// Graph subsystem barrel export + +export { bfs, fanInOut, louvainCommunities, shortestPath, tarjan } from './algorithms/index.js'; +export { buildDependencyGraph, buildStructureGraph, buildTemporalGraph } from './builders/index.js'; +export { + classifyRoles, + DEFAULT_WEIGHTS, + FRAMEWORK_ENTRY_PREFIXES, + minMaxNormalize, + ROLE_WEIGHTS, + scoreRisk, +} from './classifiers/index.js'; +export { CodeGraph } from './model.js'; diff --git a/src/graph/model.js b/src/graph/model.js new file mode 100644 index 0000000..062a487 --- /dev/null +++ b/src/graph/model.js @@ -0,0 +1,236 @@ +/** + * Unified in-memory graph model. + * + * Stores directed (default) or undirected graphs with node/edge attributes. + * Node IDs are always strings. DB integer IDs should be stringified before use. + */ + +import Graph from 'graphology'; + +export class CodeGraph { + /** + * @param {{ directed?: boolean }} [opts] + */ + constructor(opts = {}) { + this._directed = opts.directed !== false; + /** @type {Map} */ + this._nodes = new Map(); + /** @type {Map>} node → (target → edgeAttrs) */ + this._successors = new Map(); + /** @type {Map>} node → (source → edgeAttrs) */ + this._predecessors = new Map(); + } + + get directed() { + return this._directed; + } + + get nodeCount() { + return this._nodes.size; + } + + get edgeCount() { + let count = 0; + for (const targets of this._successors.values()) count += targets.size; + return count; + } + + // ─── Node operations ──────────────────────────────────────────── + + addNode(id, attrs = {}) { + const key = String(id); + this._nodes.set(key, attrs); + if (!this._successors.has(key)) this._successors.set(key, new Map()); + if (!this._predecessors.has(key)) this._predecessors.set(key, new Map()); + return this; + } + + hasNode(id) { + return this._nodes.has(String(id)); + } + + getNodeAttrs(id) { + return this._nodes.get(String(id)); + } + + /** @returns {IterableIterator<[string, object]>} */ + nodes() { + return this._nodes.entries(); + } + + /** @returns {string[]} */ + nodeIds() { + return [...this._nodes.keys()]; + } + + // ─── Edge operations ──────────────────────────────────────────── + + addEdge(source, target, attrs = {}) { + const src = String(source); + const tgt = String(target); + // Auto-add nodes if missing + if (!this._nodes.has(src)) this.addNode(src); + if (!this._nodes.has(tgt)) this.addNode(tgt); + + this._successors.get(src).set(tgt, attrs); + this._predecessors.get(tgt).set(src, attrs); + + if (!this._directed) { + this._successors.get(tgt).set(src, attrs); + this._predecessors.get(src).set(tgt, attrs); + } + return this; + } + + hasEdge(source, target) { + const src = String(source); + const tgt = String(target); + return this._successors.has(src) && this._successors.get(src).has(tgt); + } + + getEdgeAttrs(source, target) { + const src = String(source); + const tgt = String(target); + return this._successors.get(src)?.get(tgt); + } + + /** @yields {[string, string, object]} source, target, attrs */ + *edges() { + const seen = this._directed ? null : new Set(); + for (const [src, targets] of this._successors) { + for (const [tgt, attrs] of targets) { + if (!this._directed) { + const key = src < tgt ? `${src}\0${tgt}` : `${tgt}\0${src}`; + if (seen.has(key)) continue; + seen.add(key); + } + yield [src, tgt, attrs]; + } + } + } + + // ─── Adjacency ────────────────────────────────────────────────── + + /** Direct successors of a node (outgoing edges). */ + successors(id) { + const key = String(id); + const map = this._successors.get(key); + return map ? [...map.keys()] : []; + } + + /** Direct predecessors of a node (incoming edges). */ + predecessors(id) { + const key = String(id); + const map = this._predecessors.get(key); + return map ? [...map.keys()] : []; + } + + /** All neighbors (union of successors + predecessors). */ + neighbors(id) { + const key = String(id); + const set = new Set(); + const succ = this._successors.get(key); + if (succ) for (const k of succ.keys()) set.add(k); + const pred = this._predecessors.get(key); + if (pred) for (const k of pred.keys()) set.add(k); + return [...set]; + } + + outDegree(id) { + const map = this._successors.get(String(id)); + return map ? map.size : 0; + } + + inDegree(id) { + const map = this._predecessors.get(String(id)); + return map ? map.size : 0; + } + + // ─── Filtering ────────────────────────────────────────────────── + + /** Return a new graph containing only nodes matching the predicate. */ + subgraph(predicate) { + const g = new CodeGraph({ directed: this._directed }); + for (const [id, attrs] of this._nodes) { + if (predicate(id, attrs)) g.addNode(id, { ...attrs }); + } + for (const [src, tgt, attrs] of this.edges()) { + if (g.hasNode(src) && g.hasNode(tgt)) { + g.addEdge(src, tgt, { ...attrs }); + } + } + return g; + } + + /** Return a new graph containing only edges matching the predicate. */ + filterEdges(predicate) { + const g = new CodeGraph({ directed: this._directed }); + for (const [id, attrs] of this._nodes) { + g.addNode(id, { ...attrs }); + } + for (const [src, tgt, attrs] of this.edges()) { + if (predicate(src, tgt, attrs)) { + g.addEdge(src, tgt, { ...attrs }); + } + } + return g; + } + + // ─── Conversion ───────────────────────────────────────────────── + + /** Convert to flat edge array for native Rust interop. */ + toEdgeArray() { + const result = []; + for (const [source, target] of this.edges()) { + result.push({ source, target }); + } + return result; + } + + /** Convert to graphology instance (for Louvain etc). */ + toGraphology(opts = {}) { + const type = opts.type || (this._directed ? 'directed' : 'undirected'); + const g = new Graph({ type }); + for (const [id] of this._nodes) { + g.addNode(id); + } + + if (type === 'undirected') { + // Deduplicate: only add each unordered pair once + for (const [src, tgt] of this.edges()) { + if (src === tgt) continue; + if (!g.hasEdge(src, tgt)) g.addEdge(src, tgt); + } + } else { + for (const [src, tgt] of this.edges()) { + if (src === tgt) continue; + if (!g.hasEdge(src, tgt)) g.addEdge(src, tgt); + } + } + return g; + } + + // ─── Utilities ────────────────────────────────────────────────── + + clone() { + const g = new CodeGraph({ directed: this._directed }); + for (const [id, attrs] of this._nodes) { + g.addNode(id, { ...attrs }); + } + for (const [src, tgt, attrs] of this.edges()) { + g.addEdge(src, tgt, { ...attrs }); + } + return g; + } + + /** Merge another graph into this one. Nodes/edges from other override on conflict. */ + merge(other) { + for (const [id, attrs] of other.nodes()) { + this.addNode(id, attrs); + } + for (const [src, tgt, attrs] of other.edges()) { + this.addEdge(src, tgt, attrs); + } + return this; + } +} diff --git a/src/structure.js b/src/structure.js index d97a093..8a65125 100644 --- a/src/structure.js +++ b/src/structure.js @@ -312,13 +312,10 @@ export function buildStructure(db, fileSymbols, _rootDir, lineCountMap, director // ─── Node role classification ───────────────────────────────────────── -export const FRAMEWORK_ENTRY_PREFIXES = ['route:', 'event:', 'command:']; +// Re-export from classifier for backward compatibility +export { FRAMEWORK_ENTRY_PREFIXES } from './graph/classifiers/roles.js'; -function median(sorted) { - if (sorted.length === 0) return 0; - const mid = Math.floor(sorted.length / 2); - return sorted.length % 2 === 0 ? (sorted[mid - 1] + sorted[mid]) / 2 : sorted[mid]; -} +import { classifyRoles } from './graph/classifiers/roles.js'; export function classifyNodeRoles(db) { const rows = db @@ -354,44 +351,22 @@ export function classifyNodeRoles(db) { .map((r) => r.target_id), ); - const nonZeroFanIn = rows - .filter((r) => r.fan_in > 0) - .map((r) => r.fan_in) - .sort((a, b) => a - b); - const nonZeroFanOut = rows - .filter((r) => r.fan_out > 0) - .map((r) => r.fan_out) - .sort((a, b) => a - b); + // Delegate classification to the pure-logic classifier + const classifierInput = rows.map((r) => ({ + id: String(r.id), + name: r.name, + fanIn: r.fan_in, + fanOut: r.fan_out, + isExported: exportedIds.has(r.id), + })); - const medFanIn = median(nonZeroFanIn); - const medFanOut = median(nonZeroFanOut); + const roleMap = classifyRoles(classifierInput); - const updates = []; + // Build summary and updates const summary = { entry: 0, core: 0, utility: 0, adapter: 0, dead: 0, leaf: 0 }; - + const updates = []; for (const row of rows) { - const highIn = row.fan_in >= medFanIn && row.fan_in > 0; - const highOut = row.fan_out >= medFanOut && row.fan_out > 0; - const isExported = exportedIds.has(row.id); - - let role; - const isFrameworkEntry = FRAMEWORK_ENTRY_PREFIXES.some((p) => row.name.startsWith(p)); - if (isFrameworkEntry) { - role = 'entry'; - } else if (row.fan_in === 0 && !isExported) { - role = 'dead'; - } else if (row.fan_in === 0 && isExported) { - role = 'entry'; - } else if (highIn && !highOut) { - role = 'core'; - } else if (highIn && highOut) { - role = 'utility'; - } else if (!highIn && highOut) { - role = 'adapter'; - } else { - role = 'leaf'; - } - + const role = roleMap.get(String(row.id)) || 'leaf'; updates.push({ id: row.id, role }); summary[role]++; } diff --git a/src/triage.js b/src/triage.js index c5d8840..5f02aa9 100644 --- a/src/triage.js +++ b/src/triage.js @@ -1,40 +1,9 @@ import { findNodesForTriage, openReadonlyOrFail } from './db.js'; +import { DEFAULT_WEIGHTS, scoreRisk } from './graph/classifiers/risk.js'; import { isTestFile } from './infrastructure/test-filter.js'; import { warn } from './logger.js'; import { paginateResult } from './paginate.js'; -// ─── Constants ──────────────────────────────────────────────────────── - -const DEFAULT_WEIGHTS = { - fanIn: 0.25, - complexity: 0.3, - churn: 0.2, - role: 0.15, - mi: 0.1, -}; - -const ROLE_WEIGHTS = { - core: 1.0, - utility: 0.9, - entry: 0.8, - adapter: 0.5, - leaf: 0.2, - dead: 0.1, -}; - -const DEFAULT_ROLE_WEIGHT = 0.5; - -// ─── Helpers ────────────────────────────────────────────────────────── - -/** Min-max normalize an array of numbers. All-equal → all zeros. */ -function minMaxNormalize(values) { - const min = Math.min(...values); - const max = Math.max(...values); - if (max === min) return values.map(() => 0); - const range = max - min; - return values.map((v) => (v - min) / range); -} - // ─── Data Function ──────────────────────────────────────────────────── /** @@ -81,48 +50,27 @@ export function triageData(customDbPath, opts = {}) { }; } - // Extract raw signal arrays - const fanIns = filtered.map((r) => r.fan_in); - const cognitives = filtered.map((r) => r.cognitive); - const churns = filtered.map((r) => r.churn); - const mis = filtered.map((r) => r.mi); - - // Min-max normalize - const normFanIns = minMaxNormalize(fanIns); - const normCognitives = minMaxNormalize(cognitives); - const normChurns = minMaxNormalize(churns); - // MI: higher is better, so invert: 1 - norm(mi) - const normMIsRaw = minMaxNormalize(mis); - const normMIs = normMIsRaw.map((v) => round4(1 - v)); + // Delegate scoring to classifier + const riskMetrics = scoreRisk(filtered, weights); // Compute risk scores - const items = filtered.map((r, i) => { - const roleWeight = ROLE_WEIGHTS[r.role] ?? DEFAULT_ROLE_WEIGHT; - const riskScore = - weights.fanIn * normFanIns[i] + - weights.complexity * normCognitives[i] + - weights.churn * normChurns[i] + - weights.role * roleWeight + - weights.mi * normMIs[i]; - - return { - name: r.name, - kind: r.kind, - file: r.file, - line: r.line, - role: r.role || null, - fanIn: r.fan_in, - cognitive: r.cognitive, - churn: r.churn, - maintainabilityIndex: r.mi, - normFanIn: round4(normFanIns[i]), - normComplexity: round4(normCognitives[i]), - normChurn: round4(normChurns[i]), - normMI: round4(normMIs[i]), - roleWeight, - riskScore: round4(riskScore), - }; - }); + const items = filtered.map((r, i) => ({ + name: r.name, + kind: r.kind, + file: r.file, + line: r.line, + role: r.role || null, + fanIn: r.fan_in, + cognitive: r.cognitive, + churn: r.churn, + maintainabilityIndex: r.mi, + normFanIn: riskMetrics[i].normFanIn, + normComplexity: riskMetrics[i].normComplexity, + normChurn: riskMetrics[i].normChurn, + normMI: riskMetrics[i].normMI, + roleWeight: riskMetrics[i].roleWeight, + riskScore: riskMetrics[i].riskScore, + })); // Apply minScore filter const scored = minScore != null ? items.filter((it) => it.riskScore >= minScore) : items; diff --git a/src/viewer.js b/src/viewer.js index c4a06b7..6bd94d3 100644 --- a/src/viewer.js +++ b/src/viewer.js @@ -1,7 +1,7 @@ import fs from 'node:fs'; import path from 'node:path'; -import Graph from 'graphology'; -import louvain from 'graphology-communities-louvain'; +import { louvainCommunities } from './graph/algorithms/louvain.js'; +import { CodeGraph } from './graph/model.js'; import { isTestFile } from './infrastructure/test-filter.js'; const DEFAULT_MIN_CONFIDENCE = 0.5; @@ -208,7 +208,16 @@ function prepareFunctionLevelData(db, noTests, minConf, cfg) { // table may not exist in old DBs } - // Fan-in / fan-out + // Fan-in / fan-out via graph subsystem + const fnGraph = new CodeGraph(); + for (const [id] of nodeMap) fnGraph.addNode(String(id)); + for (const e of edges) { + const src = String(e.source_id); + const tgt = String(e.target_id); + if (src !== tgt && !fnGraph.hasEdge(src, tgt)) fnGraph.addEdge(src, tgt); + } + + // Use DB-level fan-in/fan-out (counts ALL call edges, not just visible) const fanInMap = new Map(); const fanOutMap = new Map(); const fanInRows = db @@ -225,19 +234,12 @@ function prepareFunctionLevelData(db, noTests, minConf, cfg) { .all(); for (const r of fanOutRows) fanOutMap.set(r.node_id, r.fan_out); - // Communities (Louvain) + // Communities (Louvain) via graph subsystem const communityMap = new Map(); if (nodeMap.size > 0) { try { - const graph = new Graph({ type: 'undirected' }); - for (const [id] of nodeMap) graph.addNode(String(id)); - for (const e of edges) { - const src = String(e.source_id); - const tgt = String(e.target_id); - if (src !== tgt && !graph.hasEdge(src, tgt)) graph.addEdge(src, tgt); - } - const communities = louvain(graph); - for (const [nid, cid] of Object.entries(communities)) communityMap.set(Number(nid), cid); + const { assignments } = louvainCommunities(fnGraph); + for (const [nid, cid] of assignments) communityMap.set(Number(nid), cid); } catch { // louvain can fail on disconnected graphs } @@ -335,17 +337,18 @@ function prepareFileLevelData(db, noTests, minConf, cfg) { fanInCount.set(target, (fanInCount.get(target) || 0) + 1); } - // Communities + // Communities via graph subsystem const communityMap = new Map(); if (files.size > 0) { try { - const graph = new Graph({ type: 'undirected' }); - for (const f of files) graph.addNode(f); + const fileGraph = new CodeGraph(); + for (const f of files) fileGraph.addNode(f); for (const { source, target } of edges) { - if (source !== target && !graph.hasEdge(source, target)) graph.addEdge(source, target); + if (source !== target && !fileGraph.hasEdge(source, target)) + fileGraph.addEdge(source, target); } - const communities = louvain(graph); - for (const [file, cid] of Object.entries(communities)) communityMap.set(file, cid); + const { assignments } = louvainCommunities(fileGraph); + for (const [file, cid] of assignments) communityMap.set(file, cid); } catch { // ignore } From ca868baf3400da24b527295725d61562beb95f26 Mon Sep 17 00:00:00 2001 From: carlos-alm <127798846+carlos-alm@users.noreply.github.com> Date: Fri, 13 Mar 2026 15:20:37 -0600 Subject: [PATCH 2/5] =?UTF-8?q?fix:=20address=20review=20=E2=80=94=20colla?= =?UTF-8?q?pse=20dead=20toGraphology=20conditional,=20deduplicate=20round4?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Collapse identical if/else branches in CodeGraph.toGraphology() into a single loop (the undirected dedup is handled by graphology's hasEdge guard) - Export round4 from risk.js and import in triage.js instead of redeclaring --- src/graph/classifiers/index.js | 2 +- src/graph/classifiers/risk.js | 2 +- src/graph/model.js | 14 +++----------- src/triage.js | 7 +------ 4 files changed, 6 insertions(+), 19 deletions(-) diff --git a/src/graph/classifiers/index.js b/src/graph/classifiers/index.js index 36f5435..0db0d06 100644 --- a/src/graph/classifiers/index.js +++ b/src/graph/classifiers/index.js @@ -1,2 +1,2 @@ -export { DEFAULT_WEIGHTS, minMaxNormalize, ROLE_WEIGHTS, scoreRisk } from './risk.js'; +export { DEFAULT_WEIGHTS, minMaxNormalize, ROLE_WEIGHTS, round4, scoreRisk } from './risk.js'; export { classifyRoles, FRAMEWORK_ENTRY_PREFIXES } from './roles.js'; diff --git a/src/graph/classifiers/risk.js b/src/graph/classifiers/risk.js index f1aafe6..642ee8a 100644 --- a/src/graph/classifiers/risk.js +++ b/src/graph/classifiers/risk.js @@ -30,7 +30,7 @@ export function minMaxNormalize(values) { return values.map((v) => (v - min) / range); } -function round4(n) { +export function round4(n) { return Math.round(n * 10000) / 10000; } diff --git a/src/graph/model.js b/src/graph/model.js index 062a487..7bd3cb6 100644 --- a/src/graph/model.js +++ b/src/graph/model.js @@ -195,17 +195,9 @@ export class CodeGraph { g.addNode(id); } - if (type === 'undirected') { - // Deduplicate: only add each unordered pair once - for (const [src, tgt] of this.edges()) { - if (src === tgt) continue; - if (!g.hasEdge(src, tgt)) g.addEdge(src, tgt); - } - } else { - for (const [src, tgt] of this.edges()) { - if (src === tgt) continue; - if (!g.hasEdge(src, tgt)) g.addEdge(src, tgt); - } + for (const [src, tgt] of this.edges()) { + if (src === tgt) continue; + if (!g.hasEdge(src, tgt)) g.addEdge(src, tgt); } return g; } diff --git a/src/triage.js b/src/triage.js index 5f02aa9..3624f4d 100644 --- a/src/triage.js +++ b/src/triage.js @@ -1,5 +1,5 @@ import { findNodesForTriage, openReadonlyOrFail } from './db.js'; -import { DEFAULT_WEIGHTS, scoreRisk } from './graph/classifiers/risk.js'; +import { DEFAULT_WEIGHTS, round4, scoreRisk } from './graph/classifiers/risk.js'; import { isTestFile } from './infrastructure/test-filter.js'; import { warn } from './logger.js'; import { paginateResult } from './paginate.js'; @@ -119,8 +119,3 @@ export function triageData(customDbPath, opts = {}) { } } -// ─── Utilities ──────────────────────────────────────────────────────── - -function round4(n) { - return Math.round(n * 10000) / 10000; -} From ce5b6b8c545c30ecf2f4c73ee3c8c3038044769f Mon Sep 17 00:00:00 2001 From: carlos-alm <127798846+carlos-alm@users.noreply.github.com> Date: Fri, 13 Mar 2026 15:30:27 -0600 Subject: [PATCH 3/5] =?UTF-8?q?fix:=20resolve=20lint=20errors=20=E2=80=94?= =?UTF-8?q?=20unused=20param=20and=20trailing=20blank=20line?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/builder/incremental.js | 2 +- src/triage.js | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/src/builder/incremental.js b/src/builder/incremental.js index 8081b42..d30c89d 100644 --- a/src/builder/incremental.js +++ b/src/builder/incremental.js @@ -25,7 +25,7 @@ import { BUILTIN_RECEIVERS, readFileSafe } from './helpers.js'; * @param {Function} [options.diffSymbols] - Symbol diff function * @returns {Promise} Update result or null on failure */ -export async function rebuildFile(db, rootDir, filePath, stmts, engineOpts, cache, options = {}) { +export async function rebuildFile(_db, rootDir, filePath, stmts, engineOpts, cache, options = {}) { const { diffSymbols } = options; const relPath = normalizePath(path.relative(rootDir, filePath)); const oldNodes = stmts.countNodes.get(relPath)?.c || 0; diff --git a/src/triage.js b/src/triage.js index 3624f4d..3495074 100644 --- a/src/triage.js +++ b/src/triage.js @@ -118,4 +118,3 @@ export function triageData(customDbPath, opts = {}) { db.close(); } } - From e1ad61b272250fdd8d9ff2aedbce6859a503da6e Mon Sep 17 00:00:00 2001 From: carlos-alm <127798846+carlos-alm@users.noreply.github.com> Date: Fri, 13 Mar 2026 16:29:17 -0600 Subject: [PATCH 4/5] fix: add clarifying comments for edge dedup separator and risk weight rationale Impact: 2 functions changed, 0 affected --- src/graph/classifiers/risk.js | 4 ++++ src/graph/model.js | 1 + 2 files changed, 5 insertions(+) diff --git a/src/graph/classifiers/risk.js b/src/graph/classifiers/risk.js index f1aafe6..b31fc41 100644 --- a/src/graph/classifiers/risk.js +++ b/src/graph/classifiers/risk.js @@ -2,6 +2,10 @@ * Risk scoring — pure logic, no DB. */ +// Weights sum to 1.0. Complexity gets the highest weight because cognitive load +// is the strongest predictor of defect density. Fan-in and churn are next as +// they reflect coupling and volatility. Role adds architectural context, and MI +// (maintainability index) is a weaker composite signal, so it gets the least. export const DEFAULT_WEIGHTS = { fanIn: 0.25, complexity: 0.3, diff --git a/src/graph/model.js b/src/graph/model.js index 9f85b8f..733be68 100644 --- a/src/graph/model.js +++ b/src/graph/model.js @@ -101,6 +101,7 @@ export class CodeGraph { for (const [src, targets] of this._successors) { for (const [tgt, attrs] of targets) { if (!this._directed) { + // \0 is safe as separator — node IDs are file paths/symbols, never contain null bytes const key = src < tgt ? `${src}\0${tgt}` : `${tgt}\0${src}`; if (seen.has(key)) continue; seen.add(key); From 92bc2f29a2ecb706dd378cbe51ca7b02abf5868b Mon Sep 17 00:00:00 2001 From: carlos-alm <127798846+carlos-alm@users.noreply.github.com> Date: Fri, 13 Mar 2026 16:47:23 -0600 Subject: [PATCH 5/5] docs: add rationale comments to risk scoring weights Address PR #436 review feedback: document why each risk dimension and role category is weighted the way it is. --- src/graph/classifiers/risk.js | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/graph/classifiers/risk.js b/src/graph/classifiers/risk.js index b31fc41..d427efb 100644 --- a/src/graph/classifiers/risk.js +++ b/src/graph/classifiers/risk.js @@ -14,6 +14,10 @@ export const DEFAULT_WEIGHTS = { mi: 0.1, }; +// Role weights reflect structural importance: core modules are central to the +// dependency graph, utilities are widely imported, entry points are API +// surfaces. Adapters bridge subsystems but are replaceable. Leaves and dead +// code have minimal downstream impact. export const ROLE_WEIGHTS = { core: 1.0, utility: 0.9,