2026-03-09 05:29:29 +00:00
|
|
|
const fs = require('fs');
|
|
|
|
|
const path = require('path');
|
|
|
|
|
const GraphStore = require('./graph.js');
|
|
|
|
|
const { extract } = require('./extract.js');
|
|
|
|
|
const { semanticDiff, formatSummary } = require('./semantic-diff.js');
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Developer Intelligence Pipeline v2 - Pipeline Orchestrator
|
|
|
|
|
* Batch extraction, incremental diffing, and benchmarking.
|
|
|
|
|
* No external dependencies.
|
|
|
|
|
*/
|
|
|
|
|
|
|
|
|
|
const SUPPORTED_EXTS = new Set([
|
|
|
|
|
'.ts', '.tsx', '.js', '.jsx', '.py', '.java', '.go', '.sh', '.bash',
|
|
|
|
|
'.yaml', '.yml', '.tf', '.hcl',
|
|
|
|
|
]);
|
|
|
|
|
|
|
|
|
|
const IGNORE_DIRS = new Set([
|
|
|
|
|
'node_modules', '.git', 'dist', 'build', '__pycache__', '.next',
|
feat: confluence benchmark, pattern extractor, agent KB, UX spec
- extract-patterns.js: mines layered arch, ArgoCD appsets, cloud regions,
CIDR allocations, naming conventions, sync waves, tech stack from code
- agent-kb.js: token-efficient JSON rendering of same doc tree
- eval-confluence-ref-questions.json: 32 reference-only benchmark questions
- wiggum-v2.sh: Ralph Wiggum loop targeting confluence baseline (77.8%)
- docs/human-ux-spec.md: BMad UX designer spec for human doc structure
- Eval results: V2 at 28.7% vs confluence 77.8% baseline
- Hub/spoke ownership now correctly extracted (95% on that question)
- Naming conventions, regions, CIDRs surfaced in system-architecture.md
2026-03-10 14:20:35 +00:00
|
|
|
'.turbo', 'coverage', '.nyc_output', 'vendor', 'venv',
|
|
|
|
|
'.codex', '.claude', '.cursor', '.gemini', '.kiro', '.agents',
|
|
|
|
|
'_bmad', '_bmad-output', '.terraform', 'skills', '.ci', 'docs', '.workdir',
|
2026-03-09 05:29:29 +00:00
|
|
|
]);
|
|
|
|
|
|
|
|
|
|
const SCRIPT_DIR = __dirname;
|
|
|
|
|
const EXTRACT_JS = path.join(SCRIPT_DIR, 'extract.js');
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Recursively discover supported files.
|
|
|
|
|
*/
|
|
|
|
|
function discoverFiles(dir) {
|
|
|
|
|
const results = [];
|
|
|
|
|
let entries;
|
|
|
|
|
try {
|
|
|
|
|
entries = fs.readdirSync(dir, { withFileTypes: true });
|
|
|
|
|
} catch { return results; }
|
|
|
|
|
|
|
|
|
|
for (const entry of entries) {
|
|
|
|
|
if (IGNORE_DIRS.has(entry.name)) continue;
|
|
|
|
|
const fullPath = path.join(dir, entry.name);
|
|
|
|
|
if (entry.isDirectory()) {
|
|
|
|
|
results.push(...discoverFiles(fullPath));
|
|
|
|
|
} else if (entry.isFile() && SUPPORTED_EXTS.has(path.extname(entry.name))) {
|
|
|
|
|
results.push(fullPath);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
return results;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Extract a single file using in-process extract(), no subprocess.
|
|
|
|
|
*/
|
|
|
|
|
function extractFile(filePath, repoRoot) {
|
|
|
|
|
try {
|
|
|
|
|
return extract(filePath, repoRoot);
|
|
|
|
|
} catch (err) {
|
|
|
|
|
return null;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Batch extract all files, build graph, save snapshot.
|
|
|
|
|
*/
|
|
|
|
|
function batchExtract(repoRoot, outputDir) {
|
|
|
|
|
const files = discoverFiles(repoRoot);
|
|
|
|
|
console.log(`Discovered ${files.length} supported files in ${repoRoot}`);
|
|
|
|
|
|
|
|
|
|
fs.mkdirSync(outputDir, { recursive: true });
|
|
|
|
|
|
|
|
|
|
const results = [];
|
|
|
|
|
let errors = 0;
|
|
|
|
|
const startTime = Date.now();
|
|
|
|
|
|
|
|
|
|
for (let i = 0; i < files.length; i++) {
|
|
|
|
|
const result = extractFile(files[i], repoRoot);
|
|
|
|
|
if (result && !result.error) {
|
|
|
|
|
results.push(result);
|
|
|
|
|
} else {
|
|
|
|
|
errors++;
|
|
|
|
|
}
|
|
|
|
|
if ((i + 1) % 100 === 0) {
|
|
|
|
|
console.log(` Extracted ${i + 1}/${files.length}...`);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
const extractTime = Date.now() - startTime;
|
|
|
|
|
console.log(`Extraction complete: ${results.length} succeeded, ${errors} failed (${extractTime}ms)`);
|
|
|
|
|
|
|
|
|
|
const graph = GraphStore.buildGraph(results);
|
|
|
|
|
const snapshotPath = path.join(outputDir, 'snapshot.json');
|
|
|
|
|
GraphStore.saveSnapshot(graph, snapshotPath);
|
|
|
|
|
console.log(`Graph: ${graph.nodes.size} nodes, ${graph.edges.length} edges. Saved to ${snapshotPath}`);
|
|
|
|
|
|
|
|
|
|
// Save stats
|
|
|
|
|
const stats = {
|
|
|
|
|
repoRoot,
|
|
|
|
|
filesDiscovered: files.length,
|
|
|
|
|
filesExtracted: results.length,
|
|
|
|
|
errors,
|
|
|
|
|
nodes: graph.nodes.size,
|
|
|
|
|
edges: graph.edges.length,
|
|
|
|
|
extractionTimeMs: extractTime,
|
|
|
|
|
avgTimePerFileMs: Math.round(extractTime / files.length),
|
|
|
|
|
timestamp: new Date().toISOString(),
|
|
|
|
|
};
|
|
|
|
|
fs.writeFileSync(path.join(outputDir, 'stats.json'), JSON.stringify(stats, null, 2));
|
|
|
|
|
console.log(`Stats saved. Avg ${stats.avgTimePerFileMs}ms/file`);
|
|
|
|
|
|
|
|
|
|
return { graph, snapshotPath, stats };
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Incremental run: extract files, diff against previous snapshot.
|
|
|
|
|
*/
|
|
|
|
|
function incrementalRun(repoRoot, files, prevSnapshotPath, outputDir) {
|
|
|
|
|
fs.mkdirSync(outputDir, { recursive: true });
|
|
|
|
|
|
|
|
|
|
const filesToExtract = files || discoverFiles(repoRoot);
|
|
|
|
|
console.log(`Extracting ${filesToExtract.length} files...`);
|
|
|
|
|
|
|
|
|
|
const results = [];
|
|
|
|
|
let errors = 0;
|
|
|
|
|
|
|
|
|
|
for (const f of filesToExtract) {
|
|
|
|
|
const result = extractFile(f, repoRoot);
|
|
|
|
|
if (result && !result.error) {
|
|
|
|
|
results.push(result);
|
|
|
|
|
} else {
|
|
|
|
|
errors++;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
const newGraph = GraphStore.buildGraph(results);
|
|
|
|
|
const newSnapshotPath = path.join(outputDir, 'snapshot.json');
|
|
|
|
|
GraphStore.saveSnapshot(newGraph, newSnapshotPath);
|
|
|
|
|
console.log(`New graph: ${newGraph.nodes.size} nodes, ${newGraph.edges.length} edges`);
|
|
|
|
|
|
|
|
|
|
if (prevSnapshotPath && fs.existsSync(prevSnapshotPath)) {
|
|
|
|
|
const oldGraph = GraphStore.loadSnapshot(prevSnapshotPath);
|
|
|
|
|
const diff = semanticDiff(oldGraph, newGraph);
|
|
|
|
|
console.log(formatSummary(diff));
|
|
|
|
|
|
|
|
|
|
fs.writeFileSync(path.join(outputDir, 'diff.json'), JSON.stringify({
|
|
|
|
|
score: diff.score,
|
|
|
|
|
severity: diff.severity,
|
|
|
|
|
stats: diff.stats,
|
|
|
|
|
categorized: diff.categorized,
|
|
|
|
|
}, null, 2));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return { newSnapshotPath };
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Benchmark: extract N random files, report timing.
|
|
|
|
|
*/
|
|
|
|
|
function benchmark(repoRoot, sampleCount) {
|
|
|
|
|
const allFiles = discoverFiles(repoRoot);
|
|
|
|
|
console.log(`Total supported files: ${allFiles.length}`);
|
|
|
|
|
|
|
|
|
|
// Shuffle and pick N
|
|
|
|
|
const shuffled = allFiles.sort(() => Math.random() - 0.5);
|
|
|
|
|
const samples = shuffled.slice(0, Math.min(sampleCount, allFiles.length));
|
|
|
|
|
console.log(`Benchmarking ${samples.length} files...\n`);
|
|
|
|
|
|
|
|
|
|
const timings = [];
|
|
|
|
|
let totalEntities = 0;
|
|
|
|
|
let totalRelationships = 0;
|
|
|
|
|
let errors = 0;
|
|
|
|
|
|
|
|
|
|
for (const file of samples) {
|
|
|
|
|
const start = Date.now();
|
|
|
|
|
const result = extractFile(file, repoRoot);
|
|
|
|
|
const elapsed = Date.now() - start;
|
|
|
|
|
|
|
|
|
|
if (result && !result.error) {
|
|
|
|
|
timings.push({ file: path.relative(repoRoot, file), timeMs: elapsed, entities: result.entities.length, relationships: result.relationships.length });
|
|
|
|
|
totalEntities += result.entities.length;
|
|
|
|
|
totalRelationships += result.relationships.length;
|
|
|
|
|
} else {
|
|
|
|
|
errors++;
|
|
|
|
|
timings.push({ file: path.relative(repoRoot, file), timeMs: elapsed, entities: 0, relationships: 0, error: true });
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Sort by time descending
|
|
|
|
|
timings.sort((a, b) => b.timeMs - a.timeMs);
|
|
|
|
|
|
|
|
|
|
const totalTime = timings.reduce((s, t) => s + t.timeMs, 0);
|
|
|
|
|
const avgTime = Math.round(totalTime / timings.length);
|
|
|
|
|
const p50 = timings[Math.floor(timings.length * 0.5)]?.timeMs || 0;
|
|
|
|
|
const p95 = timings[Math.floor(timings.length * 0.05)]?.timeMs || 0;
|
|
|
|
|
|
|
|
|
|
console.log('=== V2 Pipeline Benchmark ===');
|
|
|
|
|
console.log(`Repo: ${repoRoot}`);
|
|
|
|
|
console.log(`Files sampled: ${samples.length} / ${allFiles.length}`);
|
|
|
|
|
console.log(`Errors: ${errors}`);
|
|
|
|
|
console.log(`Total entities: ${totalEntities}`);
|
|
|
|
|
console.log(`Total relationships: ${totalRelationships}`);
|
|
|
|
|
console.log(`Total time: ${totalTime}ms`);
|
|
|
|
|
console.log(`Avg time/file: ${avgTime}ms`);
|
|
|
|
|
console.log(`P50: ${p50}ms | P95: ${p95}ms`);
|
|
|
|
|
console.log('');
|
|
|
|
|
console.log('Top 5 slowest:');
|
|
|
|
|
for (const t of timings.slice(0, 5)) {
|
|
|
|
|
console.log(` ${t.timeMs}ms ${t.file} (${t.entities}E/${t.relationships}R)${t.error ? ' ERROR' : ''}`);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return { totalFiles: allFiles.length, sampled: samples.length, errors, totalEntities, totalRelationships, totalTime, avgTime, p50, p95 };
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// --- CLI ---
|
|
|
|
|
if (require.main === module) {
|
|
|
|
|
const args = process.argv.slice(2);
|
|
|
|
|
const command = args[0];
|
|
|
|
|
|
|
|
|
|
if (command === 'batch') {
|
|
|
|
|
const repoRoot = args[1];
|
|
|
|
|
const outputIdx = args.indexOf('--output');
|
|
|
|
|
const outputDir = outputIdx >= 0 ? args[outputIdx + 1] : '/tmp/pipeline-output';
|
|
|
|
|
|
|
|
|
|
if (!repoRoot) {
|
|
|
|
|
console.error('Usage: node pipeline.js batch <repo-root> --output <dir>');
|
|
|
|
|
process.exit(1);
|
|
|
|
|
}
|
|
|
|
|
batchExtract(repoRoot, outputDir);
|
|
|
|
|
|
|
|
|
|
} else if (command === 'run') {
|
|
|
|
|
const repoRoot = args[1];
|
|
|
|
|
const snapshotIdx = args.indexOf('--snapshot');
|
|
|
|
|
const prevSnapshot = snapshotIdx >= 0 ? args[snapshotIdx + 1] : null;
|
|
|
|
|
const outputIdx = args.indexOf('--output');
|
|
|
|
|
const outputDir = outputIdx >= 0 ? args[outputIdx + 1] : '/tmp/pipeline-output';
|
|
|
|
|
|
|
|
|
|
if (!repoRoot) {
|
|
|
|
|
console.error('Usage: node pipeline.js run <repo-root> [--snapshot <prev.json>] [--output <dir>]');
|
|
|
|
|
process.exit(1);
|
|
|
|
|
}
|
|
|
|
|
incrementalRun(repoRoot, null, prevSnapshot, outputDir);
|
|
|
|
|
|
|
|
|
|
} else if (command === 'benchmark') {
|
|
|
|
|
const repoRoot = args[1];
|
|
|
|
|
const samplesIdx = args.indexOf('--samples');
|
|
|
|
|
const sampleCount = samplesIdx >= 0 ? parseInt(args[samplesIdx + 1], 10) : 10;
|
|
|
|
|
|
|
|
|
|
if (!repoRoot) {
|
|
|
|
|
console.error('Usage: node pipeline.js benchmark <repo-root> --samples <N>');
|
|
|
|
|
process.exit(1);
|
|
|
|
|
}
|
|
|
|
|
benchmark(repoRoot, sampleCount);
|
|
|
|
|
|
|
|
|
|
} else {
|
|
|
|
|
console.error('Unknown command. Available: batch, run, benchmark');
|
|
|
|
|
process.exit(1);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
module.exports = { discoverFiles, extractFile, batchExtract, incrementalRun, benchmark };
|