const fs = require('fs'); const path = require('path'); const GraphStore = require('./graph.js'); const { extract } = require('./extract.js'); const { semanticDiff, formatSummary } = require('./semantic-diff.js'); /** * Developer Intelligence Pipeline v2 - Pipeline Orchestrator * Batch extraction, incremental diffing, and benchmarking. * No external dependencies. */ const SUPPORTED_EXTS = new Set([ '.ts', '.tsx', '.js', '.jsx', '.py', '.java', '.go', '.sh', '.bash', '.yaml', '.yml', '.tf', '.hcl', ]); const IGNORE_DIRS = new Set([ 'node_modules', '.git', 'dist', 'build', '__pycache__', '.next', '.turbo', 'coverage', '.nyc_output', 'vendor', ]); const SCRIPT_DIR = __dirname; const EXTRACT_JS = path.join(SCRIPT_DIR, 'extract.js'); /** * Recursively discover supported files. */ function discoverFiles(dir) { const results = []; let entries; try { entries = fs.readdirSync(dir, { withFileTypes: true }); } catch { return results; } for (const entry of entries) { if (IGNORE_DIRS.has(entry.name)) continue; const fullPath = path.join(dir, entry.name); if (entry.isDirectory()) { results.push(...discoverFiles(fullPath)); } else if (entry.isFile() && SUPPORTED_EXTS.has(path.extname(entry.name))) { results.push(fullPath); } } return results; } /** * Extract a single file using in-process extract(), no subprocess. */ function extractFile(filePath, repoRoot) { try { return extract(filePath, repoRoot); } catch (err) { return null; } } /** * Batch extract all files, build graph, save snapshot. */ function batchExtract(repoRoot, outputDir) { const files = discoverFiles(repoRoot); console.log(`Discovered ${files.length} supported files in ${repoRoot}`); fs.mkdirSync(outputDir, { recursive: true }); const results = []; let errors = 0; const startTime = Date.now(); for (let i = 0; i < files.length; i++) { const result = extractFile(files[i], repoRoot); if (result && !result.error) { results.push(result); } else { errors++; } if ((i + 1) % 100 === 0) { console.log(` Extracted ${i + 1}/${files.length}...`); } } const extractTime = Date.now() - startTime; console.log(`Extraction complete: ${results.length} succeeded, ${errors} failed (${extractTime}ms)`); const graph = GraphStore.buildGraph(results); const snapshotPath = path.join(outputDir, 'snapshot.json'); GraphStore.saveSnapshot(graph, snapshotPath); console.log(`Graph: ${graph.nodes.size} nodes, ${graph.edges.length} edges. Saved to ${snapshotPath}`); // Save stats const stats = { repoRoot, filesDiscovered: files.length, filesExtracted: results.length, errors, nodes: graph.nodes.size, edges: graph.edges.length, extractionTimeMs: extractTime, avgTimePerFileMs: Math.round(extractTime / files.length), timestamp: new Date().toISOString(), }; fs.writeFileSync(path.join(outputDir, 'stats.json'), JSON.stringify(stats, null, 2)); console.log(`Stats saved. Avg ${stats.avgTimePerFileMs}ms/file`); return { graph, snapshotPath, stats }; } /** * Incremental run: extract files, diff against previous snapshot. */ function incrementalRun(repoRoot, files, prevSnapshotPath, outputDir) { fs.mkdirSync(outputDir, { recursive: true }); const filesToExtract = files || discoverFiles(repoRoot); console.log(`Extracting ${filesToExtract.length} files...`); const results = []; let errors = 0; for (const f of filesToExtract) { const result = extractFile(f, repoRoot); if (result && !result.error) { results.push(result); } else { errors++; } } const newGraph = GraphStore.buildGraph(results); const newSnapshotPath = path.join(outputDir, 'snapshot.json'); GraphStore.saveSnapshot(newGraph, newSnapshotPath); console.log(`New graph: ${newGraph.nodes.size} nodes, ${newGraph.edges.length} edges`); if (prevSnapshotPath && fs.existsSync(prevSnapshotPath)) { const oldGraph = GraphStore.loadSnapshot(prevSnapshotPath); const diff = semanticDiff(oldGraph, newGraph); console.log(formatSummary(diff)); fs.writeFileSync(path.join(outputDir, 'diff.json'), JSON.stringify({ score: diff.score, severity: diff.severity, stats: diff.stats, categorized: diff.categorized, }, null, 2)); } return { newSnapshotPath }; } /** * Benchmark: extract N random files, report timing. */ function benchmark(repoRoot, sampleCount) { const allFiles = discoverFiles(repoRoot); console.log(`Total supported files: ${allFiles.length}`); // Shuffle and pick N const shuffled = allFiles.sort(() => Math.random() - 0.5); const samples = shuffled.slice(0, Math.min(sampleCount, allFiles.length)); console.log(`Benchmarking ${samples.length} files...\n`); const timings = []; let totalEntities = 0; let totalRelationships = 0; let errors = 0; for (const file of samples) { const start = Date.now(); const result = extractFile(file, repoRoot); const elapsed = Date.now() - start; if (result && !result.error) { timings.push({ file: path.relative(repoRoot, file), timeMs: elapsed, entities: result.entities.length, relationships: result.relationships.length }); totalEntities += result.entities.length; totalRelationships += result.relationships.length; } else { errors++; timings.push({ file: path.relative(repoRoot, file), timeMs: elapsed, entities: 0, relationships: 0, error: true }); } } // Sort by time descending timings.sort((a, b) => b.timeMs - a.timeMs); const totalTime = timings.reduce((s, t) => s + t.timeMs, 0); const avgTime = Math.round(totalTime / timings.length); const p50 = timings[Math.floor(timings.length * 0.5)]?.timeMs || 0; const p95 = timings[Math.floor(timings.length * 0.05)]?.timeMs || 0; console.log('=== V2 Pipeline Benchmark ==='); console.log(`Repo: ${repoRoot}`); console.log(`Files sampled: ${samples.length} / ${allFiles.length}`); console.log(`Errors: ${errors}`); console.log(`Total entities: ${totalEntities}`); console.log(`Total relationships: ${totalRelationships}`); console.log(`Total time: ${totalTime}ms`); console.log(`Avg time/file: ${avgTime}ms`); console.log(`P50: ${p50}ms | P95: ${p95}ms`); console.log(''); console.log('Top 5 slowest:'); for (const t of timings.slice(0, 5)) { console.log(` ${t.timeMs}ms ${t.file} (${t.entities}E/${t.relationships}R)${t.error ? ' ERROR' : ''}`); } return { totalFiles: allFiles.length, sampled: samples.length, errors, totalEntities, totalRelationships, totalTime, avgTime, p50, p95 }; } // --- CLI --- if (require.main === module) { const args = process.argv.slice(2); const command = args[0]; if (command === 'batch') { const repoRoot = args[1]; const outputIdx = args.indexOf('--output'); const outputDir = outputIdx >= 0 ? args[outputIdx + 1] : '/tmp/pipeline-output'; if (!repoRoot) { console.error('Usage: node pipeline.js batch --output '); process.exit(1); } batchExtract(repoRoot, outputDir); } else if (command === 'run') { const repoRoot = args[1]; const snapshotIdx = args.indexOf('--snapshot'); const prevSnapshot = snapshotIdx >= 0 ? args[snapshotIdx + 1] : null; const outputIdx = args.indexOf('--output'); const outputDir = outputIdx >= 0 ? args[outputIdx + 1] : '/tmp/pipeline-output'; if (!repoRoot) { console.error('Usage: node pipeline.js run [--snapshot ] [--output ]'); process.exit(1); } incrementalRun(repoRoot, null, prevSnapshot, outputDir); } else if (command === 'benchmark') { const repoRoot = args[1]; const samplesIdx = args.indexOf('--samples'); const sampleCount = samplesIdx >= 0 ? parseInt(args[samplesIdx + 1], 10) : 10; if (!repoRoot) { console.error('Usage: node pipeline.js benchmark --samples '); process.exit(1); } benchmark(repoRoot, sampleCount); } else { console.error('Unknown command. Available: batch, run, benchmark'); process.exit(1); } } module.exports = { discoverFiles, extractFile, batchExtract, incrementalRun, benchmark };