Dev Intel Pipeline v2 — multi-language semantic graph extractor
Phase 1: extract.js — tree-sitter AST parser (TS/JS/Python/Go/Java/Bash) + config parsers (YAML/HCL) Phase 2: graph.js — in-memory directed graph store with build/query/diff CLI Phase 3: namespace.js — cross-repo namespace registry with 3-tier resolution Phase 4: semantic-diff.js — categorized diffs with impact scoring (0-100) Phase 5: pipeline.js — batch extraction, incremental diffing, benchmarking Benchmark: 4,325 files, 21,646 nodes, 133,979 edges in 67s (15ms/file) BMad SPA reviews: all phases GO
This commit is contained in:
256
pipeline.js
Normal file
256
pipeline.js
Normal file
@@ -0,0 +1,256 @@
|
||||
const fs = require('fs');
|
||||
const path = require('path');
|
||||
const GraphStore = require('./graph.js');
|
||||
const { extract } = require('./extract.js');
|
||||
const { semanticDiff, formatSummary } = require('./semantic-diff.js');
|
||||
|
||||
/**
|
||||
* Developer Intelligence Pipeline v2 - Pipeline Orchestrator
|
||||
* Batch extraction, incremental diffing, and benchmarking.
|
||||
* No external dependencies.
|
||||
*/
|
||||
|
||||
const SUPPORTED_EXTS = new Set([
|
||||
'.ts', '.tsx', '.js', '.jsx', '.py', '.java', '.go', '.sh', '.bash',
|
||||
'.yaml', '.yml', '.tf', '.hcl',
|
||||
]);
|
||||
|
||||
const IGNORE_DIRS = new Set([
|
||||
'node_modules', '.git', 'dist', 'build', '__pycache__', '.next',
|
||||
'.turbo', 'coverage', '.nyc_output', 'vendor',
|
||||
]);
|
||||
|
||||
const SCRIPT_DIR = __dirname;
|
||||
const EXTRACT_JS = path.join(SCRIPT_DIR, 'extract.js');
|
||||
|
||||
/**
|
||||
* Recursively discover supported files.
|
||||
*/
|
||||
function discoverFiles(dir) {
|
||||
const results = [];
|
||||
let entries;
|
||||
try {
|
||||
entries = fs.readdirSync(dir, { withFileTypes: true });
|
||||
} catch { return results; }
|
||||
|
||||
for (const entry of entries) {
|
||||
if (IGNORE_DIRS.has(entry.name)) continue;
|
||||
const fullPath = path.join(dir, entry.name);
|
||||
if (entry.isDirectory()) {
|
||||
results.push(...discoverFiles(fullPath));
|
||||
} else if (entry.isFile() && SUPPORTED_EXTS.has(path.extname(entry.name))) {
|
||||
results.push(fullPath);
|
||||
}
|
||||
}
|
||||
return results;
|
||||
}
|
||||
|
||||
/**
|
||||
* Extract a single file using in-process extract(), no subprocess.
|
||||
*/
|
||||
function extractFile(filePath, repoRoot) {
|
||||
try {
|
||||
return extract(filePath, repoRoot);
|
||||
} catch (err) {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Batch extract all files, build graph, save snapshot.
|
||||
*/
|
||||
function batchExtract(repoRoot, outputDir) {
|
||||
const files = discoverFiles(repoRoot);
|
||||
console.log(`Discovered ${files.length} supported files in ${repoRoot}`);
|
||||
|
||||
fs.mkdirSync(outputDir, { recursive: true });
|
||||
|
||||
const results = [];
|
||||
let errors = 0;
|
||||
const startTime = Date.now();
|
||||
|
||||
for (let i = 0; i < files.length; i++) {
|
||||
const result = extractFile(files[i], repoRoot);
|
||||
if (result && !result.error) {
|
||||
results.push(result);
|
||||
} else {
|
||||
errors++;
|
||||
}
|
||||
if ((i + 1) % 100 === 0) {
|
||||
console.log(` Extracted ${i + 1}/${files.length}...`);
|
||||
}
|
||||
}
|
||||
|
||||
const extractTime = Date.now() - startTime;
|
||||
console.log(`Extraction complete: ${results.length} succeeded, ${errors} failed (${extractTime}ms)`);
|
||||
|
||||
const graph = GraphStore.buildGraph(results);
|
||||
const snapshotPath = path.join(outputDir, 'snapshot.json');
|
||||
GraphStore.saveSnapshot(graph, snapshotPath);
|
||||
console.log(`Graph: ${graph.nodes.size} nodes, ${graph.edges.length} edges. Saved to ${snapshotPath}`);
|
||||
|
||||
// Save stats
|
||||
const stats = {
|
||||
repoRoot,
|
||||
filesDiscovered: files.length,
|
||||
filesExtracted: results.length,
|
||||
errors,
|
||||
nodes: graph.nodes.size,
|
||||
edges: graph.edges.length,
|
||||
extractionTimeMs: extractTime,
|
||||
avgTimePerFileMs: Math.round(extractTime / files.length),
|
||||
timestamp: new Date().toISOString(),
|
||||
};
|
||||
fs.writeFileSync(path.join(outputDir, 'stats.json'), JSON.stringify(stats, null, 2));
|
||||
console.log(`Stats saved. Avg ${stats.avgTimePerFileMs}ms/file`);
|
||||
|
||||
return { graph, snapshotPath, stats };
|
||||
}
|
||||
|
||||
/**
|
||||
* Incremental run: extract files, diff against previous snapshot.
|
||||
*/
|
||||
function incrementalRun(repoRoot, files, prevSnapshotPath, outputDir) {
|
||||
fs.mkdirSync(outputDir, { recursive: true });
|
||||
|
||||
const filesToExtract = files || discoverFiles(repoRoot);
|
||||
console.log(`Extracting ${filesToExtract.length} files...`);
|
||||
|
||||
const results = [];
|
||||
let errors = 0;
|
||||
|
||||
for (const f of filesToExtract) {
|
||||
const result = extractFile(f, repoRoot);
|
||||
if (result && !result.error) {
|
||||
results.push(result);
|
||||
} else {
|
||||
errors++;
|
||||
}
|
||||
}
|
||||
|
||||
const newGraph = GraphStore.buildGraph(results);
|
||||
const newSnapshotPath = path.join(outputDir, 'snapshot.json');
|
||||
GraphStore.saveSnapshot(newGraph, newSnapshotPath);
|
||||
console.log(`New graph: ${newGraph.nodes.size} nodes, ${newGraph.edges.length} edges`);
|
||||
|
||||
if (prevSnapshotPath && fs.existsSync(prevSnapshotPath)) {
|
||||
const oldGraph = GraphStore.loadSnapshot(prevSnapshotPath);
|
||||
const diff = semanticDiff(oldGraph, newGraph);
|
||||
console.log(formatSummary(diff));
|
||||
|
||||
fs.writeFileSync(path.join(outputDir, 'diff.json'), JSON.stringify({
|
||||
score: diff.score,
|
||||
severity: diff.severity,
|
||||
stats: diff.stats,
|
||||
categorized: diff.categorized,
|
||||
}, null, 2));
|
||||
}
|
||||
|
||||
return { newSnapshotPath };
|
||||
}
|
||||
|
||||
/**
|
||||
* Benchmark: extract N random files, report timing.
|
||||
*/
|
||||
function benchmark(repoRoot, sampleCount) {
|
||||
const allFiles = discoverFiles(repoRoot);
|
||||
console.log(`Total supported files: ${allFiles.length}`);
|
||||
|
||||
// Shuffle and pick N
|
||||
const shuffled = allFiles.sort(() => Math.random() - 0.5);
|
||||
const samples = shuffled.slice(0, Math.min(sampleCount, allFiles.length));
|
||||
console.log(`Benchmarking ${samples.length} files...\n`);
|
||||
|
||||
const timings = [];
|
||||
let totalEntities = 0;
|
||||
let totalRelationships = 0;
|
||||
let errors = 0;
|
||||
|
||||
for (const file of samples) {
|
||||
const start = Date.now();
|
||||
const result = extractFile(file, repoRoot);
|
||||
const elapsed = Date.now() - start;
|
||||
|
||||
if (result && !result.error) {
|
||||
timings.push({ file: path.relative(repoRoot, file), timeMs: elapsed, entities: result.entities.length, relationships: result.relationships.length });
|
||||
totalEntities += result.entities.length;
|
||||
totalRelationships += result.relationships.length;
|
||||
} else {
|
||||
errors++;
|
||||
timings.push({ file: path.relative(repoRoot, file), timeMs: elapsed, entities: 0, relationships: 0, error: true });
|
||||
}
|
||||
}
|
||||
|
||||
// Sort by time descending
|
||||
timings.sort((a, b) => b.timeMs - a.timeMs);
|
||||
|
||||
const totalTime = timings.reduce((s, t) => s + t.timeMs, 0);
|
||||
const avgTime = Math.round(totalTime / timings.length);
|
||||
const p50 = timings[Math.floor(timings.length * 0.5)]?.timeMs || 0;
|
||||
const p95 = timings[Math.floor(timings.length * 0.05)]?.timeMs || 0;
|
||||
|
||||
console.log('=== V2 Pipeline Benchmark ===');
|
||||
console.log(`Repo: ${repoRoot}`);
|
||||
console.log(`Files sampled: ${samples.length} / ${allFiles.length}`);
|
||||
console.log(`Errors: ${errors}`);
|
||||
console.log(`Total entities: ${totalEntities}`);
|
||||
console.log(`Total relationships: ${totalRelationships}`);
|
||||
console.log(`Total time: ${totalTime}ms`);
|
||||
console.log(`Avg time/file: ${avgTime}ms`);
|
||||
console.log(`P50: ${p50}ms | P95: ${p95}ms`);
|
||||
console.log('');
|
||||
console.log('Top 5 slowest:');
|
||||
for (const t of timings.slice(0, 5)) {
|
||||
console.log(` ${t.timeMs}ms ${t.file} (${t.entities}E/${t.relationships}R)${t.error ? ' ERROR' : ''}`);
|
||||
}
|
||||
|
||||
return { totalFiles: allFiles.length, sampled: samples.length, errors, totalEntities, totalRelationships, totalTime, avgTime, p50, p95 };
|
||||
}
|
||||
|
||||
// --- CLI ---
|
||||
if (require.main === module) {
|
||||
const args = process.argv.slice(2);
|
||||
const command = args[0];
|
||||
|
||||
if (command === 'batch') {
|
||||
const repoRoot = args[1];
|
||||
const outputIdx = args.indexOf('--output');
|
||||
const outputDir = outputIdx >= 0 ? args[outputIdx + 1] : '/tmp/pipeline-output';
|
||||
|
||||
if (!repoRoot) {
|
||||
console.error('Usage: node pipeline.js batch <repo-root> --output <dir>');
|
||||
process.exit(1);
|
||||
}
|
||||
batchExtract(repoRoot, outputDir);
|
||||
|
||||
} else if (command === 'run') {
|
||||
const repoRoot = args[1];
|
||||
const snapshotIdx = args.indexOf('--snapshot');
|
||||
const prevSnapshot = snapshotIdx >= 0 ? args[snapshotIdx + 1] : null;
|
||||
const outputIdx = args.indexOf('--output');
|
||||
const outputDir = outputIdx >= 0 ? args[outputIdx + 1] : '/tmp/pipeline-output';
|
||||
|
||||
if (!repoRoot) {
|
||||
console.error('Usage: node pipeline.js run <repo-root> [--snapshot <prev.json>] [--output <dir>]');
|
||||
process.exit(1);
|
||||
}
|
||||
incrementalRun(repoRoot, null, prevSnapshot, outputDir);
|
||||
|
||||
} else if (command === 'benchmark') {
|
||||
const repoRoot = args[1];
|
||||
const samplesIdx = args.indexOf('--samples');
|
||||
const sampleCount = samplesIdx >= 0 ? parseInt(args[samplesIdx + 1], 10) : 10;
|
||||
|
||||
if (!repoRoot) {
|
||||
console.error('Usage: node pipeline.js benchmark <repo-root> --samples <N>');
|
||||
process.exit(1);
|
||||
}
|
||||
benchmark(repoRoot, sampleCount);
|
||||
|
||||
} else {
|
||||
console.error('Unknown command. Available: batch, run, benchmark');
|
||||
process.exit(1);
|
||||
}
|
||||
}
|
||||
|
||||
module.exports = { discoverFiles, extractFile, batchExtract, incrementalRun, benchmark };
|
||||
Reference in New Issue
Block a user