Dev Intel Pipeline v2 — multi-language semantic graph extractor

Phase 1: extract.js — tree-sitter AST parser (TS/JS/Python/Go/Java/Bash) + config parsers (YAML/HCL)
Phase 2: graph.js — in-memory directed graph store with build/query/diff CLI
Phase 3: namespace.js — cross-repo namespace registry with 3-tier resolution
Phase 4: semantic-diff.js — categorized diffs with impact scoring (0-100)
Phase 5: pipeline.js — batch extraction, incremental diffing, benchmarking

Benchmark: 4,325 files, 21,646 nodes, 133,979 edges in 67s (15ms/file)
BMad SPA reviews: all phases GO
This commit is contained in:
Jarvis Prime
2026-03-09 05:29:29 +00:00
commit efb12d003b
19 changed files with 4106 additions and 0 deletions

256
pipeline.js Normal file
View File

@@ -0,0 +1,256 @@
const fs = require('fs');
const path = require('path');
const GraphStore = require('./graph.js');
const { extract } = require('./extract.js');
const { semanticDiff, formatSummary } = require('./semantic-diff.js');
/**
* Developer Intelligence Pipeline v2 - Pipeline Orchestrator
* Batch extraction, incremental diffing, and benchmarking.
* No external dependencies.
*/
const SUPPORTED_EXTS = new Set([
'.ts', '.tsx', '.js', '.jsx', '.py', '.java', '.go', '.sh', '.bash',
'.yaml', '.yml', '.tf', '.hcl',
]);
const IGNORE_DIRS = new Set([
'node_modules', '.git', 'dist', 'build', '__pycache__', '.next',
'.turbo', 'coverage', '.nyc_output', 'vendor',
]);
const SCRIPT_DIR = __dirname;
const EXTRACT_JS = path.join(SCRIPT_DIR, 'extract.js');
/**
* Recursively discover supported files.
*/
function discoverFiles(dir) {
const results = [];
let entries;
try {
entries = fs.readdirSync(dir, { withFileTypes: true });
} catch { return results; }
for (const entry of entries) {
if (IGNORE_DIRS.has(entry.name)) continue;
const fullPath = path.join(dir, entry.name);
if (entry.isDirectory()) {
results.push(...discoverFiles(fullPath));
} else if (entry.isFile() && SUPPORTED_EXTS.has(path.extname(entry.name))) {
results.push(fullPath);
}
}
return results;
}
/**
* Extract a single file using in-process extract(), no subprocess.
*/
function extractFile(filePath, repoRoot) {
try {
return extract(filePath, repoRoot);
} catch (err) {
return null;
}
}
/**
* Batch extract all files, build graph, save snapshot.
*/
function batchExtract(repoRoot, outputDir) {
const files = discoverFiles(repoRoot);
console.log(`Discovered ${files.length} supported files in ${repoRoot}`);
fs.mkdirSync(outputDir, { recursive: true });
const results = [];
let errors = 0;
const startTime = Date.now();
for (let i = 0; i < files.length; i++) {
const result = extractFile(files[i], repoRoot);
if (result && !result.error) {
results.push(result);
} else {
errors++;
}
if ((i + 1) % 100 === 0) {
console.log(` Extracted ${i + 1}/${files.length}...`);
}
}
const extractTime = Date.now() - startTime;
console.log(`Extraction complete: ${results.length} succeeded, ${errors} failed (${extractTime}ms)`);
const graph = GraphStore.buildGraph(results);
const snapshotPath = path.join(outputDir, 'snapshot.json');
GraphStore.saveSnapshot(graph, snapshotPath);
console.log(`Graph: ${graph.nodes.size} nodes, ${graph.edges.length} edges. Saved to ${snapshotPath}`);
// Save stats
const stats = {
repoRoot,
filesDiscovered: files.length,
filesExtracted: results.length,
errors,
nodes: graph.nodes.size,
edges: graph.edges.length,
extractionTimeMs: extractTime,
avgTimePerFileMs: Math.round(extractTime / files.length),
timestamp: new Date().toISOString(),
};
fs.writeFileSync(path.join(outputDir, 'stats.json'), JSON.stringify(stats, null, 2));
console.log(`Stats saved. Avg ${stats.avgTimePerFileMs}ms/file`);
return { graph, snapshotPath, stats };
}
/**
* Incremental run: extract files, diff against previous snapshot.
*/
function incrementalRun(repoRoot, files, prevSnapshotPath, outputDir) {
fs.mkdirSync(outputDir, { recursive: true });
const filesToExtract = files || discoverFiles(repoRoot);
console.log(`Extracting ${filesToExtract.length} files...`);
const results = [];
let errors = 0;
for (const f of filesToExtract) {
const result = extractFile(f, repoRoot);
if (result && !result.error) {
results.push(result);
} else {
errors++;
}
}
const newGraph = GraphStore.buildGraph(results);
const newSnapshotPath = path.join(outputDir, 'snapshot.json');
GraphStore.saveSnapshot(newGraph, newSnapshotPath);
console.log(`New graph: ${newGraph.nodes.size} nodes, ${newGraph.edges.length} edges`);
if (prevSnapshotPath && fs.existsSync(prevSnapshotPath)) {
const oldGraph = GraphStore.loadSnapshot(prevSnapshotPath);
const diff = semanticDiff(oldGraph, newGraph);
console.log(formatSummary(diff));
fs.writeFileSync(path.join(outputDir, 'diff.json'), JSON.stringify({
score: diff.score,
severity: diff.severity,
stats: diff.stats,
categorized: diff.categorized,
}, null, 2));
}
return { newSnapshotPath };
}
/**
* Benchmark: extract N random files, report timing.
*/
function benchmark(repoRoot, sampleCount) {
const allFiles = discoverFiles(repoRoot);
console.log(`Total supported files: ${allFiles.length}`);
// Shuffle and pick N
const shuffled = allFiles.sort(() => Math.random() - 0.5);
const samples = shuffled.slice(0, Math.min(sampleCount, allFiles.length));
console.log(`Benchmarking ${samples.length} files...\n`);
const timings = [];
let totalEntities = 0;
let totalRelationships = 0;
let errors = 0;
for (const file of samples) {
const start = Date.now();
const result = extractFile(file, repoRoot);
const elapsed = Date.now() - start;
if (result && !result.error) {
timings.push({ file: path.relative(repoRoot, file), timeMs: elapsed, entities: result.entities.length, relationships: result.relationships.length });
totalEntities += result.entities.length;
totalRelationships += result.relationships.length;
} else {
errors++;
timings.push({ file: path.relative(repoRoot, file), timeMs: elapsed, entities: 0, relationships: 0, error: true });
}
}
// Sort by time descending
timings.sort((a, b) => b.timeMs - a.timeMs);
const totalTime = timings.reduce((s, t) => s + t.timeMs, 0);
const avgTime = Math.round(totalTime / timings.length);
const p50 = timings[Math.floor(timings.length * 0.5)]?.timeMs || 0;
const p95 = timings[Math.floor(timings.length * 0.05)]?.timeMs || 0;
console.log('=== V2 Pipeline Benchmark ===');
console.log(`Repo: ${repoRoot}`);
console.log(`Files sampled: ${samples.length} / ${allFiles.length}`);
console.log(`Errors: ${errors}`);
console.log(`Total entities: ${totalEntities}`);
console.log(`Total relationships: ${totalRelationships}`);
console.log(`Total time: ${totalTime}ms`);
console.log(`Avg time/file: ${avgTime}ms`);
console.log(`P50: ${p50}ms | P95: ${p95}ms`);
console.log('');
console.log('Top 5 slowest:');
for (const t of timings.slice(0, 5)) {
console.log(` ${t.timeMs}ms ${t.file} (${t.entities}E/${t.relationships}R)${t.error ? ' ERROR' : ''}`);
}
return { totalFiles: allFiles.length, sampled: samples.length, errors, totalEntities, totalRelationships, totalTime, avgTime, p50, p95 };
}
// --- CLI ---
if (require.main === module) {
const args = process.argv.slice(2);
const command = args[0];
if (command === 'batch') {
const repoRoot = args[1];
const outputIdx = args.indexOf('--output');
const outputDir = outputIdx >= 0 ? args[outputIdx + 1] : '/tmp/pipeline-output';
if (!repoRoot) {
console.error('Usage: node pipeline.js batch <repo-root> --output <dir>');
process.exit(1);
}
batchExtract(repoRoot, outputDir);
} else if (command === 'run') {
const repoRoot = args[1];
const snapshotIdx = args.indexOf('--snapshot');
const prevSnapshot = snapshotIdx >= 0 ? args[snapshotIdx + 1] : null;
const outputIdx = args.indexOf('--output');
const outputDir = outputIdx >= 0 ? args[outputIdx + 1] : '/tmp/pipeline-output';
if (!repoRoot) {
console.error('Usage: node pipeline.js run <repo-root> [--snapshot <prev.json>] [--output <dir>]');
process.exit(1);
}
incrementalRun(repoRoot, null, prevSnapshot, outputDir);
} else if (command === 'benchmark') {
const repoRoot = args[1];
const samplesIdx = args.indexOf('--samples');
const sampleCount = samplesIdx >= 0 ? parseInt(args[samplesIdx + 1], 10) : 10;
if (!repoRoot) {
console.error('Usage: node pipeline.js benchmark <repo-root> --samples <N>');
process.exit(1);
}
benchmark(repoRoot, sampleCount);
} else {
console.error('Unknown command. Available: batch, run, benchmark');
process.exit(1);
}
}
module.exports = { discoverFiles, extractFile, batchExtract, incrementalRun, benchmark };