Dev Intel Pipeline v2 — multi-language semantic graph extractor

Phase 1: extract.js — tree-sitter AST parser (TS/JS/Python/Go/Java/Bash) + config parsers (YAML/HCL)
Phase 2: graph.js — in-memory directed graph store with build/query/diff CLI
Phase 3: namespace.js — cross-repo namespace registry with 3-tier resolution
Phase 4: semantic-diff.js — categorized diffs with impact scoring (0-100)
Phase 5: pipeline.js — batch extraction, incremental diffing, benchmarking

Benchmark: 4,325 files, 21,646 nodes, 133,979 edges in 67s (15ms/file)
BMad SPA reviews: all phases GO
This commit is contained in:
Jarvis Prime
2026-03-09 05:29:29 +00:00
commit efb12d003b
19 changed files with 4106 additions and 0 deletions

328
semantic-diff.js Normal file
View File

@@ -0,0 +1,328 @@
const fs = require('fs');
const path = require('path');
const GraphStore = require('./graph.js');
/**
* Developer Intelligence Pipeline v2 - Semantic Diff Engine
* Compares two graph snapshots and produces categorized, scored diffs.
* No external dependencies.
*/
const SEVERITY = [
[0, 20, 'trivial'],
[21, 40, 'low'],
[41, 60, 'moderate'],
[61, 80, 'high'],
[81, 100, 'critical'],
];
function severityLabel(score) {
for (const [lo, hi, label] of SEVERITY) {
if (score >= lo && score <= hi) return label;
}
return 'unknown';
}
/**
* Categorize a single entity change by impact level.
*/
function categorizeEntityChange(changeType, entity, oldEntity) {
const isPublic = (e) => e && e.visibility === 'public';
if (changeType === 'removed' && isPublic(entity)) return 'breaking';
if (changeType === 'added' && isPublic(entity)) return 'significant';
if (changeType === 'modified') {
// Check if only line_range changed (cosmetic)
if (oldEntity && entity) {
const oKeys = Object.keys(oldEntity).filter(k => k !== '_file' && k !== 'line_range');
const nKeys = Object.keys(entity).filter(k => k !== '_file' && k !== 'line_range');
const sameSemantics = oKeys.length === nKeys.length &&
oKeys.every(k => JSON.stringify(oldEntity[k]) === JSON.stringify(entity[k]));
if (sameSemantics) return 'cosmetic';
}
if (isPublic(entity) || isPublic(oldEntity)) return 'significant';
return 'internal';
}
if (changeType === 'added' || changeType === 'removed') return 'internal';
return 'internal';
}
/**
* Categorize a relationship change.
*/
function categorizeRelChange(changeType, rel, graph) {
// Check if source or target is public
const sourceNode = graph ? graph.nodes.get(rel.source) : null;
const targetNode = graph ? graph.nodes.get(rel.target) : null;
const involvesPublic = (sourceNode && sourceNode.visibility === 'public') ||
(targetNode && targetNode.visibility === 'public');
if (changeType === 'removed' && involvesPublic) return 'breaking';
if (involvesPublic) return 'significant';
return 'internal';
}
/**
* Compute semantic diff between two graph snapshots.
*/
function semanticDiff(oldGraph, newGraph) {
const rawDiff = GraphStore.diffSnapshots(oldGraph, newGraph);
const categorized = {
breaking: [],
significant: [],
internal: [],
cosmetic: [],
};
// Categorize entity changes
for (const entity of rawDiff.entities.added) {
const cat = categorizeEntityChange('added', entity, null);
categorized[cat].push({ change: 'added', entity });
}
for (const entity of rawDiff.entities.removed) {
const cat = categorizeEntityChange('removed', entity, null);
categorized[cat].push({ change: 'removed', entity });
}
for (const { old: oldE, new: newE } of rawDiff.entities.modified) {
const cat = categorizeEntityChange('modified', newE, oldE);
categorized[cat].push({ change: 'modified', old: oldE, new: newE });
}
// Categorize relationship changes
for (const rel of rawDiff.relationships.added) {
const cat = categorizeRelChange('added', rel, newGraph);
categorized[cat].push({ change: 'rel-added', rel });
}
for (const rel of rawDiff.relationships.removed) {
const cat = categorizeRelChange('removed', rel, oldGraph);
categorized[cat].push({ change: 'rel-removed', rel });
}
// Impact score
const score = computeScore(categorized);
// Stats
const filesChanged = new Set();
for (const e of [...rawDiff.entities.added, ...rawDiff.entities.removed]) {
if (e._file) filesChanged.add(e._file);
}
for (const { old: o, new: n } of rawDiff.entities.modified) {
if (o._file) filesChanged.add(o._file);
if (n._file) filesChanged.add(n._file);
}
const stats = {
filesChanged: filesChanged.size,
entitiesAdded: rawDiff.entities.added.length,
entitiesRemoved: rawDiff.entities.removed.length,
entitiesModified: rawDiff.entities.modified.length,
relationshipsAdded: rawDiff.relationships.added.length,
relationshipsRemoved: rawDiff.relationships.removed.length,
};
// Impact analysis: find callers of removed/modified entities
const impactAnalysis = computeImpactAnalysis(categorized, oldGraph, newGraph);
return { categorized, score, severity: severityLabel(score), stats, impactAnalysis, rawDiff };
}
/**
* Compute impact analysis: who calls the things that changed?
*/
function computeImpactAnalysis(categorized, oldGraph, newGraph) {
const impacted = { callers: [], dependents: [] };
// For breaking/significant changes, find callers in the OLD graph
const changedIds = new Set();
for (const item of [...categorized.breaking, ...categorized.significant]) {
if (item.entity) changedIds.add(item.entity.id);
if (item.old) changedIds.add(item.old.id);
if (item.new) changedIds.add(item.new.id);
}
for (const id of changedIds) {
// Find callers in old graph (who depends on this?)
const callers = oldGraph.edges
.filter(e => e.type === 'CALLS' && e.target === id)
.map(e => e.source);
if (callers.length > 0) {
impacted.callers.push({ entityId: id, calledBy: [...new Set(callers)] });
}
// Find dependents (who imports the module this belongs to?)
const entity = oldGraph.nodes.get(id);
if (entity && entity._file) {
const moduleId = [...oldGraph.fileIndex.entries()]
.find(([fp]) => fp === entity._file)?.[1];
if (moduleId) {
const deps = oldGraph.edges
.filter(e => e.type === 'IMPORTS' && [...moduleId].includes(e.target.replace('dep:', '')))
.map(e => e.source);
if (deps.length > 0) {
impacted.dependents.push({ entityId: id, importedBy: [...new Set(deps)] });
}
}
}
}
return impacted;
}
/**
* Compute impact score (0-100).
* Additive weighted score, capped at 100.
*/
function computeScore(categorized) {
const b = categorized.breaking.length;
const s = categorized.significant.length;
const i = categorized.internal.length;
const c = categorized.cosmetic.length;
// Each change contributes its weight directly; cap at 100
const raw = b * 40 + s * 30 + i * 20 + c * 10;
return Math.min(100, raw);
}
/**
* File-scoped diff: only entities belonging to a specific file.
*/
function diffFiles(oldGraph, newGraph, filePath) {
// Build scoped graphs containing only entities from the target file
const scopeGraph = (graph) => {
const scoped = new GraphStore();
const entityIds = graph.fileIndex.get(filePath);
if (!entityIds) return scoped;
for (const id of entityIds) {
const entity = graph.nodes.get(id);
if (entity) scoped.nodes.set(id, entity);
}
for (const edge of graph.edges) {
if (entityIds.has(edge.source) || entityIds.has(edge.target)) {
scoped.edges.push(edge);
}
}
scoped.fileIndex.set(filePath, new Set(entityIds));
return scoped;
};
return semanticDiff(scopeGraph(oldGraph), scopeGraph(newGraph));
}
/**
* Generate human-readable summary.
*/
function formatSummary(diff) {
const lines = [];
lines.push(`=== Semantic Diff Summary ===`);
lines.push(`Impact Score: ${diff.score}/100 (${diff.severity})`);
lines.push(`Files Changed: ${diff.stats.filesChanged}`);
lines.push(`Entities: +${diff.stats.entitiesAdded} -${diff.stats.entitiesRemoved} ~${diff.stats.entitiesModified}`);
lines.push(`Relationships: +${diff.stats.relationshipsAdded} -${diff.stats.relationshipsRemoved}`);
lines.push('');
if (diff.categorized.breaking.length > 0) {
lines.push(`⛔ BREAKING CHANGES (${diff.categorized.breaking.length}):`);
for (const item of diff.categorized.breaking) {
if (item.entity) lines.push(` ${item.change}: ${item.entity.id} (${item.entity.type})`);
if (item.rel) lines.push(` ${item.change}: ${item.rel.type} ${item.rel.source} -> ${item.rel.target}`);
}
lines.push('');
}
if (diff.categorized.significant.length > 0) {
lines.push(`⚠️ SIGNIFICANT CHANGES (${diff.categorized.significant.length}):`);
for (const item of diff.categorized.significant) {
if (item.entity) lines.push(` ${item.change}: ${item.entity.id} (${item.entity.type})`);
if (item.old && item.new) lines.push(` modified: ${item.new.id} (${item.new.type})`);
if (item.rel) lines.push(` ${item.change}: ${item.rel.type} ${item.rel.source} -> ${item.rel.target}`);
}
lines.push('');
}
if (diff.categorized.internal.length > 0) {
lines.push(` INTERNAL CHANGES (${diff.categorized.internal.length}):`);
for (const item of diff.categorized.internal) {
if (item.entity) lines.push(` ${item.change}: ${item.entity.id}`);
if (item.old && item.new) lines.push(` modified: ${item.new.id}`);
if (item.rel) lines.push(` ${item.change}: ${item.rel.type} ${item.rel.source} -> ${item.rel.target}`);
}
lines.push('');
}
if (diff.categorized.cosmetic.length > 0) {
lines.push(`💅 COSMETIC CHANGES (${diff.categorized.cosmetic.length}):`);
for (const item of diff.categorized.cosmetic) {
if (item.old && item.new) lines.push(` moved: ${item.new.id} (lines ${item.old.line_range} -> ${item.new.line_range})`);
}
lines.push('');
}
// Impact analysis
if (diff.impactAnalysis) {
const { callers, dependents } = diff.impactAnalysis;
if (callers.length > 0 || dependents.length > 0) {
lines.push(`🔍 IMPACT ANALYSIS:`);
for (const c of callers) {
lines.push(` ${c.entityId} is called by: ${c.calledBy.join(', ')}`);
}
for (const d of dependents) {
lines.push(` ${d.entityId} is imported by: ${d.importedBy.join(', ')}`);
}
lines.push('');
}
}
return lines.join('\n');
}
// --- CLI ---
if (require.main === module) {
const args = process.argv.slice(2);
const command = args[0];
if (command === 'diff') {
const oldPath = args[1];
const newPath = args[2];
const fileIdx = args.indexOf('--file');
const filePath = fileIdx >= 0 ? args[fileIdx + 1] : null;
if (!oldPath || !newPath) {
console.error('Usage: node semantic-diff.js diff <old.json> <new.json> [--file <path>]');
process.exit(1);
}
const oldGraph = GraphStore.loadSnapshot(oldPath);
const newGraph = GraphStore.loadSnapshot(newPath);
const diff = filePath
? diffFiles(oldGraph, newGraph, filePath)
: semanticDiff(oldGraph, newGraph);
console.log(formatSummary(diff));
console.log('--- Raw JSON ---');
console.log(JSON.stringify({ categorized: diff.categorized, score: diff.score, severity: diff.severity, stats: diff.stats }, null, 2));
} else if (command === 'score') {
const oldPath = args[1];
const newPath = args[2];
if (!oldPath || !newPath) {
console.error('Usage: node semantic-diff.js score <old.json> <new.json>');
process.exit(1);
}
const oldGraph = GraphStore.loadSnapshot(oldPath);
const newGraph = GraphStore.loadSnapshot(newPath);
const diff = semanticDiff(oldGraph, newGraph);
console.log(`${diff.score} (${diff.severity})`);
} else {
console.error('Unknown command. Available: diff, score');
process.exit(1);
}
}
module.exports = { semanticDiff, diffFiles, formatSummary, computeScore };