Dev Intel Pipeline v2 — multi-language semantic graph extractor

Phase 1: extract.js — tree-sitter AST parser (TS/JS/Python/Go/Java/Bash) + config parsers (YAML/HCL)
Phase 2: graph.js — in-memory directed graph store with build/query/diff CLI
Phase 3: namespace.js — cross-repo namespace registry with 3-tier resolution
Phase 4: semantic-diff.js — categorized diffs with impact scoring (0-100)
Phase 5: pipeline.js — batch extraction, incremental diffing, benchmarking

Benchmark: 4,325 files, 21,646 nodes, 133,979 edges in 67s (15ms/file)
BMad SPA reviews: all phases GO
This commit is contained in:
Jarvis Prime
2026-03-09 05:29:29 +00:00
commit efb12d003b
19 changed files with 4106 additions and 0 deletions

278
graph.js Normal file
View File

@@ -0,0 +1,278 @@
const fs = require('fs');
const path = require('path');
/**
* Developer Intelligence Pipeline v2 - Graph Store
* In-memory directed graph using a simple adjacency list.
* No external dependencies.
*/
class GraphStore {
constructor() {
this.nodes = new Map(); // entityId -> entity object
this.edges = []; // Array of {type, source, target}
this._edgeSet = new Set(); // For O(1) dedup
this.fileIndex = new Map(); // filePath -> Set of entityIds
}
/**
* Builds the graph from an array of extract.js result objects.
* @param {Array<Object>} extractResults
* @returns {GraphStore}
*/
static buildGraph(extractResults) {
const graph = new GraphStore();
for (const result of extractResults) {
const filePath = result.file;
if (!filePath) continue;
if (!graph.fileIndex.has(filePath)) {
graph.fileIndex.set(filePath, new Set());
}
const fileEntities = graph.fileIndex.get(filePath);
// Add nodes
if (Array.isArray(result.entities)) {
for (const entity of result.entities) {
graph.nodes.set(entity.id, { ...entity, _file: filePath });
fileEntities.add(entity.id);
}
}
// Add edges (deduplicated via Set)
if (Array.isArray(result.relationships)) {
for (const rel of result.relationships) {
const key = `${rel.type}:${rel.source}->${rel.target}`;
if (!graph._edgeSet.has(key)) {
graph._edgeSet.add(key);
graph.edges.push({
type: rel.type,
source: rel.source,
target: rel.target
});
}
}
}
}
return graph;
}
/**
* Serializes the graph to a JSON file.
* @param {GraphStore} graph
* @param {string} outputPath
*/
static saveSnapshot(graph, outputPath) {
const serialized = {
nodes: Object.fromEntries(graph.nodes),
edges: graph.edges,
fileIndex: Object.fromEntries(
Array.from(graph.fileIndex.entries()).map(([k, v]) => [k, Array.from(v)])
)
};
fs.writeFileSync(outputPath, JSON.stringify(serialized, null, 2), 'utf8');
}
/**
* Deserializes a JSON file to a GraphStore.
* @param {string} inputPath
* @returns {GraphStore}
*/
static loadSnapshot(inputPath) {
const data = JSON.parse(fs.readFileSync(inputPath, 'utf8'));
const graph = new GraphStore();
for (const [id, entity] of Object.entries(data.nodes || {})) {
graph.nodes.set(id, entity);
}
graph.edges = data.edges || [];
for (const [filePath, entityIds] of Object.entries(data.fileIndex || {})) {
graph.fileIndex.set(filePath, new Set(entityIds));
}
return graph;
}
/**
* Returns an entity and all its incoming/outgoing edges.
* @param {GraphStore} graph
* @param {string} entityId
* @returns {Object}
*/
static query(graph, entityId) {
const entity = graph.nodes.get(entityId);
if (!entity) return null;
const incoming = graph.edges.filter(e => e.target === entityId);
const outgoing = graph.edges.filter(e => e.source === entityId);
return {
entity,
incoming,
outgoing
};
}
/**
* Returns all entities that CALL this function.
* @param {GraphStore} graph
* @param {string} functionName (entityId)
* @returns {Array<Object>}
*/
static findCallers(graph, functionName) {
return graph.edges
.filter(e => e.type === 'CALLS' && e.target === functionName)
.map(e => graph.nodes.get(e.source))
.filter(Boolean);
}
/**
* Returns all modules that IMPORT this module.
* @param {GraphStore} graph
* @param {string} moduleId (entityId)
* @returns {Array<Object>}
*/
static findDependents(graph, moduleId) {
return graph.edges
.filter(e => (e.type === 'IMPORTS' || e.type === 'DEPENDS_ON') && e.target === moduleId)
.map(e => graph.nodes.get(e.source))
.filter(Boolean);
}
/**
* Returns all public entities in a file.
* @param {GraphStore} graph
* @param {string} filePath
* @returns {Array<Object>}
*/
static getExports(graph, filePath) {
const entityIds = graph.fileIndex.get(filePath);
if (!entityIds) return [];
return Array.from(entityIds)
.map(id => graph.nodes.get(id))
.filter(entity => entity && entity.visibility === 'public');
}
/**
* Returns added/removed/modified entities and relationships between two snapshots.
* @param {GraphStore} oldGraph
* @param {GraphStore} newGraph
* @returns {Object}
*/
static diffSnapshots(oldGraph, newGraph) {
const diff = {
entities: { added: [], removed: [], modified: [] },
relationships: { added: [], removed: [] }
};
// Diff Entities
for (const [id, oldEntity] of oldGraph.nodes.entries()) {
if (!newGraph.nodes.has(id)) {
diff.entities.removed.push(oldEntity);
}
}
for (const [id, newEntity] of newGraph.nodes.entries()) {
const oldEntity = oldGraph.nodes.get(id);
if (!oldEntity) {
diff.entities.added.push(newEntity);
} else {
// Deterministic deep comparison: sort keys, compare canonical JSON
const canonicalize = (obj) => JSON.stringify(obj, Object.keys(obj).filter(k => k !== '_file').sort());
if (canonicalize(oldEntity) !== canonicalize(newEntity)) {
diff.entities.modified.push({ old: oldEntity, new: newEntity });
}
}
}
// Diff Relationships
const edgeToString = (e) => `${e.type}:${e.source}->${e.target}`;
const oldEdges = new Set(oldGraph.edges.map(edgeToString));
const newEdges = new Set(newGraph.edges.map(edgeToString));
for (const e of newGraph.edges) {
if (!oldEdges.has(edgeToString(e))) diff.relationships.added.push(e);
}
for (const e of oldGraph.edges) {
if (!newEdges.has(edgeToString(e))) diff.relationships.removed.push(e);
}
return diff;
}
}
// CLI handling
if (require.main === module) {
const args = process.argv.slice(2);
const command = args[0];
if (command === 'build') {
const inputDir = args[1];
const outputPath = args[2];
if (!inputDir || !outputPath) {
console.error('Usage: node graph.js build <dir-of-json-files> <output-snapshot.json>');
process.exit(1);
}
const files = fs.readdirSync(inputDir).filter(f => f.endsWith('.json'));
const extractResults = files.map(f => {
const content = fs.readFileSync(path.join(inputDir, f), 'utf8');
try {
return JSON.parse(content);
} catch (e) {
console.error(`Error parsing ${f}:`, e.message);
return null;
}
}).filter(Boolean);
const graph = GraphStore.buildGraph(extractResults);
GraphStore.saveSnapshot(graph, outputPath);
console.log(`Built graph with ${graph.nodes.size} nodes and ${graph.edges.length} edges. Saved to ${outputPath}`);
} else if (command === 'query') {
const snapshotPath = args[1];
const entityId = args[2];
if (!snapshotPath || !entityId) {
console.error('Usage: node graph.js query <snapshot.json> <entityId>');
process.exit(1);
}
const graph = GraphStore.loadSnapshot(snapshotPath);
const result = GraphStore.query(graph, entityId);
if (!result) {
console.log(`Entity ${entityId} not found.`);
} else {
console.log(JSON.stringify(result, null, 2));
}
} else if (command === 'diff') {
const oldSnapshotPath = args[1];
const newSnapshotPath = args[2];
if (!oldSnapshotPath || !newSnapshotPath) {
console.error('Usage: node graph.js diff <old-snapshot.json> <new-snapshot.json>');
process.exit(1);
}
const oldGraph = GraphStore.loadSnapshot(oldSnapshotPath);
const newGraph = GraphStore.loadSnapshot(newSnapshotPath);
const diff = GraphStore.diffSnapshots(oldGraph, newGraph);
console.log(JSON.stringify(diff, null, 2));
} else {
console.error('Unknown command. Available commands: build, query, diff');
process.exit(1);
}
}
module.exports = GraphStore;