Dev Intel Pipeline v2 — multi-language semantic graph extractor
Phase 1: extract.js — tree-sitter AST parser (TS/JS/Python/Go/Java/Bash) + config parsers (YAML/HCL) Phase 2: graph.js — in-memory directed graph store with build/query/diff CLI Phase 3: namespace.js — cross-repo namespace registry with 3-tier resolution Phase 4: semantic-diff.js — categorized diffs with impact scoring (0-100) Phase 5: pipeline.js — batch extraction, incremental diffing, benchmarking Benchmark: 4,325 files, 21,646 nodes, 133,979 edges in 67s (15ms/file) BMad SPA reviews: all phases GO
This commit is contained in:
328
semantic-diff.js
Normal file
328
semantic-diff.js
Normal file
@@ -0,0 +1,328 @@
|
||||
const fs = require('fs');
|
||||
const path = require('path');
|
||||
const GraphStore = require('./graph.js');
|
||||
|
||||
/**
|
||||
* Developer Intelligence Pipeline v2 - Semantic Diff Engine
|
||||
* Compares two graph snapshots and produces categorized, scored diffs.
|
||||
* No external dependencies.
|
||||
*/
|
||||
|
||||
const SEVERITY = [
|
||||
[0, 20, 'trivial'],
|
||||
[21, 40, 'low'],
|
||||
[41, 60, 'moderate'],
|
||||
[61, 80, 'high'],
|
||||
[81, 100, 'critical'],
|
||||
];
|
||||
|
||||
function severityLabel(score) {
|
||||
for (const [lo, hi, label] of SEVERITY) {
|
||||
if (score >= lo && score <= hi) return label;
|
||||
}
|
||||
return 'unknown';
|
||||
}
|
||||
|
||||
/**
|
||||
* Categorize a single entity change by impact level.
|
||||
*/
|
||||
function categorizeEntityChange(changeType, entity, oldEntity) {
|
||||
const isPublic = (e) => e && e.visibility === 'public';
|
||||
|
||||
if (changeType === 'removed' && isPublic(entity)) return 'breaking';
|
||||
if (changeType === 'added' && isPublic(entity)) return 'significant';
|
||||
if (changeType === 'modified') {
|
||||
// Check if only line_range changed (cosmetic)
|
||||
if (oldEntity && entity) {
|
||||
const oKeys = Object.keys(oldEntity).filter(k => k !== '_file' && k !== 'line_range');
|
||||
const nKeys = Object.keys(entity).filter(k => k !== '_file' && k !== 'line_range');
|
||||
const sameSemantics = oKeys.length === nKeys.length &&
|
||||
oKeys.every(k => JSON.stringify(oldEntity[k]) === JSON.stringify(entity[k]));
|
||||
if (sameSemantics) return 'cosmetic';
|
||||
}
|
||||
if (isPublic(entity) || isPublic(oldEntity)) return 'significant';
|
||||
return 'internal';
|
||||
}
|
||||
if (changeType === 'added' || changeType === 'removed') return 'internal';
|
||||
return 'internal';
|
||||
}
|
||||
|
||||
/**
|
||||
* Categorize a relationship change.
|
||||
*/
|
||||
function categorizeRelChange(changeType, rel, graph) {
|
||||
// Check if source or target is public
|
||||
const sourceNode = graph ? graph.nodes.get(rel.source) : null;
|
||||
const targetNode = graph ? graph.nodes.get(rel.target) : null;
|
||||
const involvesPublic = (sourceNode && sourceNode.visibility === 'public') ||
|
||||
(targetNode && targetNode.visibility === 'public');
|
||||
|
||||
if (changeType === 'removed' && involvesPublic) return 'breaking';
|
||||
if (involvesPublic) return 'significant';
|
||||
return 'internal';
|
||||
}
|
||||
|
||||
/**
|
||||
* Compute semantic diff between two graph snapshots.
|
||||
*/
|
||||
function semanticDiff(oldGraph, newGraph) {
|
||||
const rawDiff = GraphStore.diffSnapshots(oldGraph, newGraph);
|
||||
|
||||
const categorized = {
|
||||
breaking: [],
|
||||
significant: [],
|
||||
internal: [],
|
||||
cosmetic: [],
|
||||
};
|
||||
|
||||
// Categorize entity changes
|
||||
for (const entity of rawDiff.entities.added) {
|
||||
const cat = categorizeEntityChange('added', entity, null);
|
||||
categorized[cat].push({ change: 'added', entity });
|
||||
}
|
||||
for (const entity of rawDiff.entities.removed) {
|
||||
const cat = categorizeEntityChange('removed', entity, null);
|
||||
categorized[cat].push({ change: 'removed', entity });
|
||||
}
|
||||
for (const { old: oldE, new: newE } of rawDiff.entities.modified) {
|
||||
const cat = categorizeEntityChange('modified', newE, oldE);
|
||||
categorized[cat].push({ change: 'modified', old: oldE, new: newE });
|
||||
}
|
||||
|
||||
// Categorize relationship changes
|
||||
for (const rel of rawDiff.relationships.added) {
|
||||
const cat = categorizeRelChange('added', rel, newGraph);
|
||||
categorized[cat].push({ change: 'rel-added', rel });
|
||||
}
|
||||
for (const rel of rawDiff.relationships.removed) {
|
||||
const cat = categorizeRelChange('removed', rel, oldGraph);
|
||||
categorized[cat].push({ change: 'rel-removed', rel });
|
||||
}
|
||||
|
||||
// Impact score
|
||||
const score = computeScore(categorized);
|
||||
|
||||
// Stats
|
||||
const filesChanged = new Set();
|
||||
for (const e of [...rawDiff.entities.added, ...rawDiff.entities.removed]) {
|
||||
if (e._file) filesChanged.add(e._file);
|
||||
}
|
||||
for (const { old: o, new: n } of rawDiff.entities.modified) {
|
||||
if (o._file) filesChanged.add(o._file);
|
||||
if (n._file) filesChanged.add(n._file);
|
||||
}
|
||||
|
||||
const stats = {
|
||||
filesChanged: filesChanged.size,
|
||||
entitiesAdded: rawDiff.entities.added.length,
|
||||
entitiesRemoved: rawDiff.entities.removed.length,
|
||||
entitiesModified: rawDiff.entities.modified.length,
|
||||
relationshipsAdded: rawDiff.relationships.added.length,
|
||||
relationshipsRemoved: rawDiff.relationships.removed.length,
|
||||
};
|
||||
|
||||
// Impact analysis: find callers of removed/modified entities
|
||||
const impactAnalysis = computeImpactAnalysis(categorized, oldGraph, newGraph);
|
||||
|
||||
return { categorized, score, severity: severityLabel(score), stats, impactAnalysis, rawDiff };
|
||||
}
|
||||
|
||||
/**
|
||||
* Compute impact analysis: who calls the things that changed?
|
||||
*/
|
||||
function computeImpactAnalysis(categorized, oldGraph, newGraph) {
|
||||
const impacted = { callers: [], dependents: [] };
|
||||
|
||||
// For breaking/significant changes, find callers in the OLD graph
|
||||
const changedIds = new Set();
|
||||
for (const item of [...categorized.breaking, ...categorized.significant]) {
|
||||
if (item.entity) changedIds.add(item.entity.id);
|
||||
if (item.old) changedIds.add(item.old.id);
|
||||
if (item.new) changedIds.add(item.new.id);
|
||||
}
|
||||
|
||||
for (const id of changedIds) {
|
||||
// Find callers in old graph (who depends on this?)
|
||||
const callers = oldGraph.edges
|
||||
.filter(e => e.type === 'CALLS' && e.target === id)
|
||||
.map(e => e.source);
|
||||
if (callers.length > 0) {
|
||||
impacted.callers.push({ entityId: id, calledBy: [...new Set(callers)] });
|
||||
}
|
||||
|
||||
// Find dependents (who imports the module this belongs to?)
|
||||
const entity = oldGraph.nodes.get(id);
|
||||
if (entity && entity._file) {
|
||||
const moduleId = [...oldGraph.fileIndex.entries()]
|
||||
.find(([fp]) => fp === entity._file)?.[1];
|
||||
if (moduleId) {
|
||||
const deps = oldGraph.edges
|
||||
.filter(e => e.type === 'IMPORTS' && [...moduleId].includes(e.target.replace('dep:', '')))
|
||||
.map(e => e.source);
|
||||
if (deps.length > 0) {
|
||||
impacted.dependents.push({ entityId: id, importedBy: [...new Set(deps)] });
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return impacted;
|
||||
}
|
||||
|
||||
/**
|
||||
* Compute impact score (0-100).
|
||||
* Additive weighted score, capped at 100.
|
||||
*/
|
||||
function computeScore(categorized) {
|
||||
const b = categorized.breaking.length;
|
||||
const s = categorized.significant.length;
|
||||
const i = categorized.internal.length;
|
||||
const c = categorized.cosmetic.length;
|
||||
|
||||
// Each change contributes its weight directly; cap at 100
|
||||
const raw = b * 40 + s * 30 + i * 20 + c * 10;
|
||||
return Math.min(100, raw);
|
||||
}
|
||||
|
||||
/**
|
||||
* File-scoped diff: only entities belonging to a specific file.
|
||||
*/
|
||||
function diffFiles(oldGraph, newGraph, filePath) {
|
||||
// Build scoped graphs containing only entities from the target file
|
||||
const scopeGraph = (graph) => {
|
||||
const scoped = new GraphStore();
|
||||
const entityIds = graph.fileIndex.get(filePath);
|
||||
if (!entityIds) return scoped;
|
||||
|
||||
for (const id of entityIds) {
|
||||
const entity = graph.nodes.get(id);
|
||||
if (entity) scoped.nodes.set(id, entity);
|
||||
}
|
||||
for (const edge of graph.edges) {
|
||||
if (entityIds.has(edge.source) || entityIds.has(edge.target)) {
|
||||
scoped.edges.push(edge);
|
||||
}
|
||||
}
|
||||
scoped.fileIndex.set(filePath, new Set(entityIds));
|
||||
return scoped;
|
||||
};
|
||||
|
||||
return semanticDiff(scopeGraph(oldGraph), scopeGraph(newGraph));
|
||||
}
|
||||
|
||||
/**
|
||||
* Generate human-readable summary.
|
||||
*/
|
||||
function formatSummary(diff) {
|
||||
const lines = [];
|
||||
lines.push(`=== Semantic Diff Summary ===`);
|
||||
lines.push(`Impact Score: ${diff.score}/100 (${diff.severity})`);
|
||||
lines.push(`Files Changed: ${diff.stats.filesChanged}`);
|
||||
lines.push(`Entities: +${diff.stats.entitiesAdded} -${diff.stats.entitiesRemoved} ~${diff.stats.entitiesModified}`);
|
||||
lines.push(`Relationships: +${diff.stats.relationshipsAdded} -${diff.stats.relationshipsRemoved}`);
|
||||
lines.push('');
|
||||
|
||||
if (diff.categorized.breaking.length > 0) {
|
||||
lines.push(`⛔ BREAKING CHANGES (${diff.categorized.breaking.length}):`);
|
||||
for (const item of diff.categorized.breaking) {
|
||||
if (item.entity) lines.push(` ${item.change}: ${item.entity.id} (${item.entity.type})`);
|
||||
if (item.rel) lines.push(` ${item.change}: ${item.rel.type} ${item.rel.source} -> ${item.rel.target}`);
|
||||
}
|
||||
lines.push('');
|
||||
}
|
||||
|
||||
if (diff.categorized.significant.length > 0) {
|
||||
lines.push(`⚠️ SIGNIFICANT CHANGES (${diff.categorized.significant.length}):`);
|
||||
for (const item of diff.categorized.significant) {
|
||||
if (item.entity) lines.push(` ${item.change}: ${item.entity.id} (${item.entity.type})`);
|
||||
if (item.old && item.new) lines.push(` modified: ${item.new.id} (${item.new.type})`);
|
||||
if (item.rel) lines.push(` ${item.change}: ${item.rel.type} ${item.rel.source} -> ${item.rel.target}`);
|
||||
}
|
||||
lines.push('');
|
||||
}
|
||||
|
||||
if (diff.categorized.internal.length > 0) {
|
||||
lines.push(`ℹ️ INTERNAL CHANGES (${diff.categorized.internal.length}):`);
|
||||
for (const item of diff.categorized.internal) {
|
||||
if (item.entity) lines.push(` ${item.change}: ${item.entity.id}`);
|
||||
if (item.old && item.new) lines.push(` modified: ${item.new.id}`);
|
||||
if (item.rel) lines.push(` ${item.change}: ${item.rel.type} ${item.rel.source} -> ${item.rel.target}`);
|
||||
}
|
||||
lines.push('');
|
||||
}
|
||||
|
||||
if (diff.categorized.cosmetic.length > 0) {
|
||||
lines.push(`💅 COSMETIC CHANGES (${diff.categorized.cosmetic.length}):`);
|
||||
for (const item of diff.categorized.cosmetic) {
|
||||
if (item.old && item.new) lines.push(` moved: ${item.new.id} (lines ${item.old.line_range} -> ${item.new.line_range})`);
|
||||
}
|
||||
lines.push('');
|
||||
}
|
||||
|
||||
// Impact analysis
|
||||
if (diff.impactAnalysis) {
|
||||
const { callers, dependents } = diff.impactAnalysis;
|
||||
if (callers.length > 0 || dependents.length > 0) {
|
||||
lines.push(`🔍 IMPACT ANALYSIS:`);
|
||||
for (const c of callers) {
|
||||
lines.push(` ${c.entityId} is called by: ${c.calledBy.join(', ')}`);
|
||||
}
|
||||
for (const d of dependents) {
|
||||
lines.push(` ${d.entityId} is imported by: ${d.importedBy.join(', ')}`);
|
||||
}
|
||||
lines.push('');
|
||||
}
|
||||
}
|
||||
|
||||
return lines.join('\n');
|
||||
}
|
||||
|
||||
// --- CLI ---
|
||||
if (require.main === module) {
|
||||
const args = process.argv.slice(2);
|
||||
const command = args[0];
|
||||
|
||||
if (command === 'diff') {
|
||||
const oldPath = args[1];
|
||||
const newPath = args[2];
|
||||
const fileIdx = args.indexOf('--file');
|
||||
const filePath = fileIdx >= 0 ? args[fileIdx + 1] : null;
|
||||
|
||||
if (!oldPath || !newPath) {
|
||||
console.error('Usage: node semantic-diff.js diff <old.json> <new.json> [--file <path>]');
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
const oldGraph = GraphStore.loadSnapshot(oldPath);
|
||||
const newGraph = GraphStore.loadSnapshot(newPath);
|
||||
|
||||
const diff = filePath
|
||||
? diffFiles(oldGraph, newGraph, filePath)
|
||||
: semanticDiff(oldGraph, newGraph);
|
||||
|
||||
console.log(formatSummary(diff));
|
||||
console.log('--- Raw JSON ---');
|
||||
console.log(JSON.stringify({ categorized: diff.categorized, score: diff.score, severity: diff.severity, stats: diff.stats }, null, 2));
|
||||
|
||||
} else if (command === 'score') {
|
||||
const oldPath = args[1];
|
||||
const newPath = args[2];
|
||||
|
||||
if (!oldPath || !newPath) {
|
||||
console.error('Usage: node semantic-diff.js score <old.json> <new.json>');
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
const oldGraph = GraphStore.loadSnapshot(oldPath);
|
||||
const newGraph = GraphStore.loadSnapshot(newPath);
|
||||
const diff = semanticDiff(oldGraph, newGraph);
|
||||
|
||||
console.log(`${diff.score} (${diff.severity})`);
|
||||
|
||||
} else {
|
||||
console.error('Unknown command. Available: diff, score');
|
||||
process.exit(1);
|
||||
}
|
||||
}
|
||||
|
||||
module.exports = { semanticDiff, diffFiles, formatSummary, computeScore };
|
||||
Reference in New Issue
Block a user