Dev Intel Pipeline v2 — multi-language semantic graph extractor

Phase 1: extract.js — tree-sitter AST parser (TS/JS/Python/Go/Java/Bash) + config parsers (YAML/HCL)
Phase 2: graph.js — in-memory directed graph store with build/query/diff CLI
Phase 3: namespace.js — cross-repo namespace registry with 3-tier resolution
Phase 4: semantic-diff.js — categorized diffs with impact scoring (0-100)
Phase 5: pipeline.js — batch extraction, incremental diffing, benchmarking

Benchmark: 4,325 files, 21,646 nodes, 133,979 edges in 67s (15ms/file)
BMad SPA reviews: all phases GO
This commit is contained in:
Jarvis Prime
2026-03-09 05:29:29 +00:00
commit efb12d003b
19 changed files with 4106 additions and 0 deletions

82
validate-ground-truth.js Normal file
View File

@@ -0,0 +1,82 @@
const fs = require('fs');
const path = require('path');
const { execSync } = require('child_process');
const groundTruthPath = process.argv[2];
if (!groundTruthPath) {
console.error("Usage: node validate-ground-truth.js <ground-truth-json>");
process.exit(1);
}
const gt = JSON.parse(fs.readFileSync(groundTruthPath, 'utf8'));
const filePath = gt.file;
// Infer repo root from the ground truth: the module entity's ID is the relative path
const moduleEntity = gt.entities.find(e => e.type === 'Module' || e.type === 'Config');
let repoRoot = '/app/src';
if (moduleEntity) {
// filePath = /tmp/test_service.py, moduleEntity.id = test_service.py → repoRoot = /tmp
// filePath = /app/src/cli/route.ts, moduleEntity.id = cli/route.ts → repoRoot = /app/src
const expectedRelPath = moduleEntity.id;
if (filePath.endsWith(expectedRelPath)) {
repoRoot = filePath.slice(0, filePath.length - expectedRelPath.length);
if (repoRoot.endsWith('/')) repoRoot = repoRoot.slice(0, -1);
}
}
const scriptDir = __dirname;
const out = execSync(`node ${path.join(scriptDir, 'extract.js')} "${filePath}" "${repoRoot}"`);
const actual = JSON.parse(out);
// --- Entity Matching (by ID) ---
let correctEntities = 0;
const matchedActualEntities = new Set();
for (const ge of gt.entities) {
const match = actual.entities.find(ae => ae.id === ge.id);
if (match) {
correctEntities++;
matchedActualEntities.add(match.id);
} else {
console.log(`Missing entity: ${ge.id}`);
}
}
const extraEntities = actual.entities.filter(ae => !matchedActualEntities.has(ae.id));
for (const e of extraEntities) {
console.log(`Extra entity: ${e.id}`);
}
const entityPrecision = correctEntities / (actual.entities.length || 1);
const entityRecall = correctEntities / (gt.entities.length || 1);
const entityF1 = (2 * entityPrecision * entityRecall) / (entityPrecision + entityRecall || 1);
// --- Relationship Matching ---
let correctRelationships = 0;
const matchedActualRels = new Set();
for (const gr of gt.relationships) {
const idx = actual.relationships.findIndex(ar => ar.type === gr.type && ar.source === gr.source && ar.target === gr.target);
if (idx >= 0) {
correctRelationships++;
matchedActualRels.add(idx);
} else {
console.log(`Missing relationship: ${gr.type} ${gr.source} -> ${gr.target}`);
}
}
const extraRels = actual.relationships.filter((_, i) => !matchedActualRels.has(i));
for (const r of extraRels) {
console.log(`Extra relationship: ${r.type} ${r.source} -> ${r.target}`);
}
const relPrecision = correctRelationships / (actual.relationships.length || 1);
const relRecall = correctRelationships / (gt.relationships.length || 1);
const relF1 = (2 * relPrecision * relRecall) / (relPrecision + relRecall || 1);
console.log(`Entities: P=${entityPrecision.toFixed(2)}, R=${entityRecall.toFixed(2)}, F1=${entityF1.toFixed(2)}`);
console.log(`Relationships: P=${relPrecision.toFixed(2)}, R=${relRecall.toFixed(2)}, F1=${relF1.toFixed(2)}`);
if (entityF1 >= 0.90 && relF1 >= 0.85) {
console.log("PASS");
process.exit(0);
} else {
console.log("FAIL");
process.exit(1);
}