Files
dev-intel-v2/semantic-diff.js
Jarvis Prime efb12d003b Dev Intel Pipeline v2 — multi-language semantic graph extractor
Phase 1: extract.js — tree-sitter AST parser (TS/JS/Python/Go/Java/Bash) + config parsers (YAML/HCL)
Phase 2: graph.js — in-memory directed graph store with build/query/diff CLI
Phase 3: namespace.js — cross-repo namespace registry with 3-tier resolution
Phase 4: semantic-diff.js — categorized diffs with impact scoring (0-100)
Phase 5: pipeline.js — batch extraction, incremental diffing, benchmarking

Benchmark: 4,325 files, 21,646 nodes, 133,979 edges in 67s (15ms/file)
BMad SPA reviews: all phases GO
2026-03-09 05:29:29 +00:00

329 lines
11 KiB
JavaScript
Raw Permalink Blame History

This file contains invisible Unicode characters
This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
const fs = require('fs');
const path = require('path');
const GraphStore = require('./graph.js');
/**
* Developer Intelligence Pipeline v2 - Semantic Diff Engine
* Compares two graph snapshots and produces categorized, scored diffs.
* No external dependencies.
*/
const SEVERITY = [
[0, 20, 'trivial'],
[21, 40, 'low'],
[41, 60, 'moderate'],
[61, 80, 'high'],
[81, 100, 'critical'],
];
function severityLabel(score) {
for (const [lo, hi, label] of SEVERITY) {
if (score >= lo && score <= hi) return label;
}
return 'unknown';
}
/**
* Categorize a single entity change by impact level.
*/
function categorizeEntityChange(changeType, entity, oldEntity) {
const isPublic = (e) => e && e.visibility === 'public';
if (changeType === 'removed' && isPublic(entity)) return 'breaking';
if (changeType === 'added' && isPublic(entity)) return 'significant';
if (changeType === 'modified') {
// Check if only line_range changed (cosmetic)
if (oldEntity && entity) {
const oKeys = Object.keys(oldEntity).filter(k => k !== '_file' && k !== 'line_range');
const nKeys = Object.keys(entity).filter(k => k !== '_file' && k !== 'line_range');
const sameSemantics = oKeys.length === nKeys.length &&
oKeys.every(k => JSON.stringify(oldEntity[k]) === JSON.stringify(entity[k]));
if (sameSemantics) return 'cosmetic';
}
if (isPublic(entity) || isPublic(oldEntity)) return 'significant';
return 'internal';
}
if (changeType === 'added' || changeType === 'removed') return 'internal';
return 'internal';
}
/**
* Categorize a relationship change.
*/
function categorizeRelChange(changeType, rel, graph) {
// Check if source or target is public
const sourceNode = graph ? graph.nodes.get(rel.source) : null;
const targetNode = graph ? graph.nodes.get(rel.target) : null;
const involvesPublic = (sourceNode && sourceNode.visibility === 'public') ||
(targetNode && targetNode.visibility === 'public');
if (changeType === 'removed' && involvesPublic) return 'breaking';
if (involvesPublic) return 'significant';
return 'internal';
}
/**
* Compute semantic diff between two graph snapshots.
*/
function semanticDiff(oldGraph, newGraph) {
const rawDiff = GraphStore.diffSnapshots(oldGraph, newGraph);
const categorized = {
breaking: [],
significant: [],
internal: [],
cosmetic: [],
};
// Categorize entity changes
for (const entity of rawDiff.entities.added) {
const cat = categorizeEntityChange('added', entity, null);
categorized[cat].push({ change: 'added', entity });
}
for (const entity of rawDiff.entities.removed) {
const cat = categorizeEntityChange('removed', entity, null);
categorized[cat].push({ change: 'removed', entity });
}
for (const { old: oldE, new: newE } of rawDiff.entities.modified) {
const cat = categorizeEntityChange('modified', newE, oldE);
categorized[cat].push({ change: 'modified', old: oldE, new: newE });
}
// Categorize relationship changes
for (const rel of rawDiff.relationships.added) {
const cat = categorizeRelChange('added', rel, newGraph);
categorized[cat].push({ change: 'rel-added', rel });
}
for (const rel of rawDiff.relationships.removed) {
const cat = categorizeRelChange('removed', rel, oldGraph);
categorized[cat].push({ change: 'rel-removed', rel });
}
// Impact score
const score = computeScore(categorized);
// Stats
const filesChanged = new Set();
for (const e of [...rawDiff.entities.added, ...rawDiff.entities.removed]) {
if (e._file) filesChanged.add(e._file);
}
for (const { old: o, new: n } of rawDiff.entities.modified) {
if (o._file) filesChanged.add(o._file);
if (n._file) filesChanged.add(n._file);
}
const stats = {
filesChanged: filesChanged.size,
entitiesAdded: rawDiff.entities.added.length,
entitiesRemoved: rawDiff.entities.removed.length,
entitiesModified: rawDiff.entities.modified.length,
relationshipsAdded: rawDiff.relationships.added.length,
relationshipsRemoved: rawDiff.relationships.removed.length,
};
// Impact analysis: find callers of removed/modified entities
const impactAnalysis = computeImpactAnalysis(categorized, oldGraph, newGraph);
return { categorized, score, severity: severityLabel(score), stats, impactAnalysis, rawDiff };
}
/**
* Compute impact analysis: who calls the things that changed?
*/
function computeImpactAnalysis(categorized, oldGraph, newGraph) {
const impacted = { callers: [], dependents: [] };
// For breaking/significant changes, find callers in the OLD graph
const changedIds = new Set();
for (const item of [...categorized.breaking, ...categorized.significant]) {
if (item.entity) changedIds.add(item.entity.id);
if (item.old) changedIds.add(item.old.id);
if (item.new) changedIds.add(item.new.id);
}
for (const id of changedIds) {
// Find callers in old graph (who depends on this?)
const callers = oldGraph.edges
.filter(e => e.type === 'CALLS' && e.target === id)
.map(e => e.source);
if (callers.length > 0) {
impacted.callers.push({ entityId: id, calledBy: [...new Set(callers)] });
}
// Find dependents (who imports the module this belongs to?)
const entity = oldGraph.nodes.get(id);
if (entity && entity._file) {
const moduleId = [...oldGraph.fileIndex.entries()]
.find(([fp]) => fp === entity._file)?.[1];
if (moduleId) {
const deps = oldGraph.edges
.filter(e => e.type === 'IMPORTS' && [...moduleId].includes(e.target.replace('dep:', '')))
.map(e => e.source);
if (deps.length > 0) {
impacted.dependents.push({ entityId: id, importedBy: [...new Set(deps)] });
}
}
}
}
return impacted;
}
/**
* Compute impact score (0-100).
* Additive weighted score, capped at 100.
*/
function computeScore(categorized) {
const b = categorized.breaking.length;
const s = categorized.significant.length;
const i = categorized.internal.length;
const c = categorized.cosmetic.length;
// Each change contributes its weight directly; cap at 100
const raw = b * 40 + s * 30 + i * 20 + c * 10;
return Math.min(100, raw);
}
/**
* File-scoped diff: only entities belonging to a specific file.
*/
function diffFiles(oldGraph, newGraph, filePath) {
// Build scoped graphs containing only entities from the target file
const scopeGraph = (graph) => {
const scoped = new GraphStore();
const entityIds = graph.fileIndex.get(filePath);
if (!entityIds) return scoped;
for (const id of entityIds) {
const entity = graph.nodes.get(id);
if (entity) scoped.nodes.set(id, entity);
}
for (const edge of graph.edges) {
if (entityIds.has(edge.source) || entityIds.has(edge.target)) {
scoped.edges.push(edge);
}
}
scoped.fileIndex.set(filePath, new Set(entityIds));
return scoped;
};
return semanticDiff(scopeGraph(oldGraph), scopeGraph(newGraph));
}
/**
* Generate human-readable summary.
*/
function formatSummary(diff) {
const lines = [];
lines.push(`=== Semantic Diff Summary ===`);
lines.push(`Impact Score: ${diff.score}/100 (${diff.severity})`);
lines.push(`Files Changed: ${diff.stats.filesChanged}`);
lines.push(`Entities: +${diff.stats.entitiesAdded} -${diff.stats.entitiesRemoved} ~${diff.stats.entitiesModified}`);
lines.push(`Relationships: +${diff.stats.relationshipsAdded} -${diff.stats.relationshipsRemoved}`);
lines.push('');
if (diff.categorized.breaking.length > 0) {
lines.push(`⛔ BREAKING CHANGES (${diff.categorized.breaking.length}):`);
for (const item of diff.categorized.breaking) {
if (item.entity) lines.push(` ${item.change}: ${item.entity.id} (${item.entity.type})`);
if (item.rel) lines.push(` ${item.change}: ${item.rel.type} ${item.rel.source} -> ${item.rel.target}`);
}
lines.push('');
}
if (diff.categorized.significant.length > 0) {
lines.push(`⚠️ SIGNIFICANT CHANGES (${diff.categorized.significant.length}):`);
for (const item of diff.categorized.significant) {
if (item.entity) lines.push(` ${item.change}: ${item.entity.id} (${item.entity.type})`);
if (item.old && item.new) lines.push(` modified: ${item.new.id} (${item.new.type})`);
if (item.rel) lines.push(` ${item.change}: ${item.rel.type} ${item.rel.source} -> ${item.rel.target}`);
}
lines.push('');
}
if (diff.categorized.internal.length > 0) {
lines.push(` INTERNAL CHANGES (${diff.categorized.internal.length}):`);
for (const item of diff.categorized.internal) {
if (item.entity) lines.push(` ${item.change}: ${item.entity.id}`);
if (item.old && item.new) lines.push(` modified: ${item.new.id}`);
if (item.rel) lines.push(` ${item.change}: ${item.rel.type} ${item.rel.source} -> ${item.rel.target}`);
}
lines.push('');
}
if (diff.categorized.cosmetic.length > 0) {
lines.push(`💅 COSMETIC CHANGES (${diff.categorized.cosmetic.length}):`);
for (const item of diff.categorized.cosmetic) {
if (item.old && item.new) lines.push(` moved: ${item.new.id} (lines ${item.old.line_range} -> ${item.new.line_range})`);
}
lines.push('');
}
// Impact analysis
if (diff.impactAnalysis) {
const { callers, dependents } = diff.impactAnalysis;
if (callers.length > 0 || dependents.length > 0) {
lines.push(`🔍 IMPACT ANALYSIS:`);
for (const c of callers) {
lines.push(` ${c.entityId} is called by: ${c.calledBy.join(', ')}`);
}
for (const d of dependents) {
lines.push(` ${d.entityId} is imported by: ${d.importedBy.join(', ')}`);
}
lines.push('');
}
}
return lines.join('\n');
}
// --- CLI ---
if (require.main === module) {
const args = process.argv.slice(2);
const command = args[0];
if (command === 'diff') {
const oldPath = args[1];
const newPath = args[2];
const fileIdx = args.indexOf('--file');
const filePath = fileIdx >= 0 ? args[fileIdx + 1] : null;
if (!oldPath || !newPath) {
console.error('Usage: node semantic-diff.js diff <old.json> <new.json> [--file <path>]');
process.exit(1);
}
const oldGraph = GraphStore.loadSnapshot(oldPath);
const newGraph = GraphStore.loadSnapshot(newPath);
const diff = filePath
? diffFiles(oldGraph, newGraph, filePath)
: semanticDiff(oldGraph, newGraph);
console.log(formatSummary(diff));
console.log('--- Raw JSON ---');
console.log(JSON.stringify({ categorized: diff.categorized, score: diff.score, severity: diff.severity, stats: diff.stats }, null, 2));
} else if (command === 'score') {
const oldPath = args[1];
const newPath = args[2];
if (!oldPath || !newPath) {
console.error('Usage: node semantic-diff.js score <old.json> <new.json>');
process.exit(1);
}
const oldGraph = GraphStore.loadSnapshot(oldPath);
const newGraph = GraphStore.loadSnapshot(newPath);
const diff = semanticDiff(oldGraph, newGraph);
console.log(`${diff.score} (${diff.severity})`);
} else {
console.error('Unknown command. Available: diff, score');
process.exit(1);
}
}
module.exports = { semanticDiff, diffFiles, formatSummary, computeScore };