commit efb12d003b489316d1ea0811af7862cd9ff4da32 Author: Jarvis Prime Date: Mon Mar 9 05:29:29 2026 +0000 Dev Intel Pipeline v2 — multi-language semantic graph extractor Phase 1: extract.js — tree-sitter AST parser (TS/JS/Python/Go/Java/Bash) + config parsers (YAML/HCL) Phase 2: graph.js — in-memory directed graph store with build/query/diff CLI Phase 3: namespace.js — cross-repo namespace registry with 3-tier resolution Phase 4: semantic-diff.js — categorized diffs with impact scoring (0-100) Phase 5: pipeline.js — batch extraction, incremental diffing, benchmarking Benchmark: 4,325 files, 21,646 nodes, 133,979 edges in 67s (15ms/file) BMad SPA reviews: all phases GO diff --git a/extract-config.js b/extract-config.js new file mode 100644 index 0000000..a9ea268 --- /dev/null +++ b/extract-config.js @@ -0,0 +1,102 @@ +const fs = require('fs'); +const path = require('path'); +const jsYaml = require('js-yaml'); + +function extractYaml(filePath, repoRoot) { + const sourceCode = fs.readFileSync(filePath, 'utf8'); + const relPath = path.relative(repoRoot, filePath); + const moduleId = relPath; + const entities = []; + const relationships = []; + + entities.push({ + id: moduleId, + type: 'Config', + name: relPath, + kind: 'yaml-config', + visibility: 'public', + line_range: [1, sourceCode.split('\n').length] + }); + + try { + // Attempt to load multiple documents + const docs = jsYaml.loadAll(sourceCode); + let lineNum = 1; + for (const doc of docs) { + if (doc && typeof doc === 'object') { + for (const key of Object.keys(doc)) { + const keyId = `${moduleId}:${key}`; + entities.push({ + id: keyId, + type: 'Config', + name: key, + kind: 'yaml-key', + visibility: 'public', + line_range: [lineNum, lineNum] // Approximation without AST + }); + relationships.push({ + type: 'CONTAINS', + source: moduleId, + target: keyId + }); + } + } + } + } catch (e) { + // Log warning, return base module + console.error(`YAML parse error in ${relPath}: ${e.message}`); + } + + return { file: filePath, language: 'yaml', entities, relationships }; +} + +function extractHcl(filePath, repoRoot) { + const sourceCode = fs.readFileSync(filePath, 'utf8'); + const relPath = path.relative(repoRoot, filePath); + const moduleId = relPath; + const entities = []; + const relationships = []; + const lines = sourceCode.split('\n'); + + entities.push({ + id: moduleId, + type: 'Config', + name: relPath, + kind: 'terraform', + visibility: 'public', + line_range: [1, lines.length] + }); + + // Regex for top-level HCL blocks (e.g., resource "aws_s3_bucket" "my_bucket" {) + const blockRegex = /^(resource|data|module|variable|output|provider)\s+"([^"]+)"(?:\s+"([^"]+)")?\s*\{/; + + for (let i = 0; i < lines.length; i++) { + const match = lines[i].match(blockRegex); + if (match) { + const type = match[1]; + const name1 = match[2]; + const name2 = match[3]; + const fullName = name2 ? `${type}.${name1}.${name2}` : `${type}.${name1}`; + const blockId = `${moduleId}:${fullName}`; + + entities.push({ + id: blockId, + type: 'Config', + name: fullName, + kind: 'hcl-block', + visibility: 'public', + line_range: [i + 1, i + 1] // Approximation + }); + + relationships.push({ + type: 'CONTAINS', + source: moduleId, + target: blockId + }); + } + } + + return { file: filePath, language: 'hcl', entities, relationships }; +} + +module.exports = { extractYaml, extractHcl }; diff --git a/extract.js b/extract.js new file mode 100644 index 0000000..b787633 --- /dev/null +++ b/extract.js @@ -0,0 +1,806 @@ +const fs = require('fs'); +const path = require('path'); +const Parser = require('tree-sitter'); +const jsYaml = require('js-yaml'); + +// --- Language Grammars (tree-sitter for code only) --- +const GRAMMARS = { + typescript: require('tree-sitter-typescript').typescript, + tsx: require('tree-sitter-typescript').tsx, + javascript: require('tree-sitter-javascript'), + python: require('tree-sitter-python'), + java: require('tree-sitter-java'), + go: require('tree-sitter-go'), + bash: require('tree-sitter-bash'), +}; + +const { extractYaml, extractHcl } = require('./extract-config.js'); + +const EXT_MAP = { + '.ts': 'typescript', '.tsx': 'tsx', '.js': 'javascript', '.jsx': 'javascript', + '.py': 'python', '.java': 'java', '.go': 'go', + '.sh': 'bash', '.bash': 'bash', + '.yaml': 'yaml', '.yml': 'yaml', + '.tf': 'hcl', '.hcl': 'hcl', + '.kcl': 'yaml', // KCL has no tree-sitter grammar; parse as YAML (structural approximation) +}; + +// --- Language Adapters --- +// Each adapter defines node types for that language's AST +const ADAPTERS = { + typescript: { + classNodes: ['class_declaration'], + functionNodes: ['function_declaration'], + arrowFuncParent: 'lexical_declaration', + methodNodes: ['method_definition'], + fieldNodes: ['public_field_definition'], + importNodes: ['import_statement'], + requireFunc: 'require', + exportWrapper: 'export_statement', + varDecl: ['lexical_declaration', 'variable_declaration'], + callExpr: 'call_expression', + funcField: 'function', + nameField: 'name', + bodyField: 'body', + sourceField: 'source', + valueField: 'value', + arrowTypes: ['arrow_function', 'function'], + accessModifier: 'accessibility_modifier', + heritage: 'class_heritage', + implementsClause: 'implements_clause', + }, + python: { + classNodes: ['class_definition'], + functionNodes: ['function_definition'], + arrowFuncParent: null, + methodNodes: [], // methods are function_definition inside class + fieldNodes: [], + importNodes: ['import_statement', 'import_from_statement'], + requireFunc: null, + exportWrapper: null, + varDecl: ['assignment', 'augmented_assignment'], + callExpr: 'call', + funcField: 'function', + nameField: 'name', + bodyField: 'body', + sourceField: null, + valueField: 'right', + arrowTypes: ['lambda'], + accessModifier: null, + heritage: null, + implementsClause: null, + }, + java: { + classNodes: ['class_declaration', 'interface_declaration', 'enum_declaration'], + functionNodes: ['method_declaration', 'constructor_declaration'], + arrowFuncParent: null, + methodNodes: ['method_declaration', 'constructor_declaration'], + fieldNodes: ['field_declaration'], + importNodes: ['import_declaration'], + requireFunc: null, + exportWrapper: null, + varDecl: ['local_variable_declaration', 'field_declaration'], + callExpr: 'method_invocation', + funcField: 'name', + nameField: 'name', + bodyField: 'body', + sourceField: null, + valueField: null, + arrowTypes: ['lambda_expression'], + accessModifier: 'modifiers', + heritage: 'superclass', + implementsClause: 'super_interfaces', + }, + go: { + classNodes: ['type_declaration'], // struct types + functionNodes: ['function_declaration', 'method_declaration'], + arrowFuncParent: null, + methodNodes: ['method_declaration'], + fieldNodes: [], + importNodes: ['import_declaration'], + requireFunc: null, + exportWrapper: null, + varDecl: ['var_declaration', 'short_var_declaration', 'const_declaration'], + callExpr: 'call_expression', + funcField: 'function', + nameField: 'name', + bodyField: 'body', + sourceField: 'path', + valueField: null, + arrowTypes: ['func_literal'], + accessModifier: null, + heritage: null, + implementsClause: null, + }, + yaml: { + classNodes: [], + functionNodes: [], + arrowFuncParent: null, + methodNodes: [], + fieldNodes: [], + importNodes: [], + requireFunc: null, + exportWrapper: null, + varDecl: [], + callExpr: null, + funcField: null, + nameField: null, + bodyField: null, + sourceField: null, + valueField: null, + arrowTypes: [], + accessModifier: null, + heritage: null, + implementsClause: null, + }, + hcl: { + classNodes: [], + functionNodes: [], + arrowFuncParent: null, + methodNodes: [], + fieldNodes: [], + importNodes: [], + requireFunc: null, + exportWrapper: null, + varDecl: [], + callExpr: 'function_call', + funcField: null, + nameField: null, + bodyField: 'body', + sourceField: null, + valueField: null, + arrowTypes: [], + accessModifier: null, + heritage: null, + implementsClause: null, + }, +}; + +// Alias adapters +ADAPTERS.tsx = ADAPTERS.typescript; +ADAPTERS.javascript = ADAPTERS.typescript; + +ADAPTERS.bash = { + classNodes: [], + functionNodes: ['function_definition'], + arrowFuncParent: null, + methodNodes: [], + fieldNodes: [], + importNodes: [], + requireFunc: null, + exportWrapper: null, + varDecl: ['variable_assignment'], + callExpr: 'command', + funcField: 'name', + nameField: 'name', + bodyField: 'body', + sourceField: null, + valueField: null, + arrowTypes: [], + accessModifier: null, + heritage: null, + implementsClause: null, +}; + +// --- Core Extractor --- +function extract(filePath, repoRoot) { + const ext = path.extname(filePath); + const lang = EXT_MAP[ext]; + if (!lang) { + console.error(`Unsupported extension: ${ext}`); + process.exit(1); + } + + if (lang === 'yaml') return extractYaml(filePath, repoRoot); + if (lang === 'hcl') return extractHcl(filePath, repoRoot); + + const grammar = GRAMMARS[lang]; + const adapter = ADAPTERS[lang]; + if (!grammar || !adapter) { + console.error(`No grammar/adapter for: ${lang}`); + process.exit(1); + } + + const parser = new Parser(); + parser.setLanguage(grammar); + + let sourceCode; + try { + sourceCode = fs.readFileSync(filePath, 'utf8'); + } catch (err) { + console.error(`Failed to read ${filePath}: ${err.message}`); + return { file: filePath, language: lang, entities: [], relationships: [], error: err.message }; + } + + let tree; + try { + tree = parser.parse(sourceCode); + } catch (err) { + console.error(`Failed to parse ${filePath}: ${err.message}`); + return { file: filePath, language: lang, entities: [], relationships: [], error: err.message }; + } + + const relPath = path.relative(repoRoot, filePath); + const moduleId = relPath; + const entities = []; + const relationships = []; + + function getText(node) { + return sourceCode.substring(node.startIndex, node.endIndex); + } + + function lineRange(node) { + return [node.startPosition.row + 1, node.endPosition.row + 1]; + } + + function isExported(node) { + if (adapter.exportWrapper) { + // ES6 export + if (node.parent && node.parent.type === adapter.exportWrapper) return true; + // CommonJS: module.exports = { ... } or exports.foo = ... + // Check if this function/class name appears in a module.exports assignment + const nameNode = node.childForFieldName('name'); + if (nameNode) { + const name = getText(nameNode); + // Walk up to find module.exports references to this name + const root = tree.rootNode; + for (const child of root.children) { + if (child.type === 'expression_statement') { + const expr = child.children[0]; + if (expr && expr.type === 'assignment_expression') { + const left = expr.childForFieldName('left'); + if (left) { + const leftText = getText(left); + // module.exports.foo = ... or exports.foo = ... + if (leftText === `module.exports.${name}` || leftText === `exports.${name}`) return true; + // module.exports = { foo, bar } or module.exports = foo + if (leftText === 'module.exports') { + const right = expr.childForFieldName('right'); + if (right) { + const rightText = getText(right); + if (rightText === name || rightText.includes(name)) return true; + } + } + } + } + } + } + } + return false; + } + // Python: no export concept, everything is public + // Java: check modifiers + // Go: capitalized name = exported + if (lang === 'go') { + const nameNode = node.childForFieldName('name'); + if (nameNode) { + const name = getText(nameNode); + return name[0] === name[0].toUpperCase(); + } + } + if (lang === 'java') { + const mods = node.children.find(c => c.type === 'modifiers'); + if (mods) return getText(mods).includes('public'); + return false; + } + return true; // Python: everything is public + } + + function addEntity(e) { + if (!entities.find(x => x.id === e.id)) entities.push(e); + } + + const _relSet = new Set(); + function addRel(r) { + const key = `${r.type}:${r.source}->${r.target}`; + if (!_relSet.has(key)) { + _relSet.add(key); + relationships.push(r); + } + } + + // --- Import Extraction --- + function extractImports(node) { + if (adapter.importNodes.includes(node.type)) { + if (lang === 'typescript' || lang === 'tsx' || lang === 'javascript') { + const sourceNode = node.childForFieldName('source'); + if (sourceNode) { + const depName = getText(sourceNode).replace(/['"]/g, ''); + // Resolve relative imports against file directory + let resolvedDep = depName; + if (depName.startsWith('.')) { + resolvedDep = path.posix.normalize(path.posix.join(path.dirname(relPath), depName)); + } + const depId = `dep:${resolvedDep}`; + addEntity({ id: depId, type: 'Dependency', name: resolvedDep, kind: 'import', visibility: 'internal', line_range: lineRange(node) }); + addRel({ type: 'IMPORTS', source: moduleId, target: depId }); + } + return true; + } + if (lang === 'python') { + // import X or from X import Y + const modNode = node.childForFieldName('module_name') || node.childForFieldName('name'); + let depName = 'unknown'; + if (modNode) { + depName = getText(modNode); + } else { + // Fallback: grab dotted name from children + const dotted = node.children.find(c => c.type === 'dotted_name'); + if (dotted) depName = getText(dotted); + } + const depId = `dep:${depName}`; + addEntity({ id: depId, type: 'Dependency', name: depName, kind: 'import', visibility: 'internal', line_range: lineRange(node) }); + addRel({ type: 'IMPORTS', source: moduleId, target: depId }); + return true; + } + if (lang === 'java') { + // import com.foo.Bar; + const scopedId = node.children.find(c => c.type === 'scoped_identifier'); + if (scopedId) { + const depName = getText(scopedId); + const depId = `dep:${depName}`; + addEntity({ id: depId, type: 'Dependency', name: depName, kind: 'import', visibility: 'internal', line_range: lineRange(node) }); + addRel({ type: 'IMPORTS', source: moduleId, target: depId }); + } + return true; + } + if (lang === 'go') { + // import "fmt" or import ( "fmt" "os" ) + for (const child of node.namedChildren) { + if (child.type === 'import_spec' || child.type === 'import_spec_list') { + const specs = child.type === 'import_spec_list' ? child.namedChildren : [child]; + for (const spec of specs) { + const pathNode = spec.childForFieldName('path'); + if (pathNode) { + const depName = getText(pathNode).replace(/"/g, ''); + const depId = `dep:${depName}`; + addEntity({ id: depId, type: 'Dependency', name: depName, kind: 'import', visibility: 'internal', line_range: lineRange(spec) }); + addRel({ type: 'IMPORTS', source: moduleId, target: depId }); + } + } + } + } + return true; + } + } + // CommonJS require() for JS/TS + if (adapter.requireFunc && (node.type === 'lexical_declaration' || node.type === 'variable_declaration')) { + for (const child of node.children) { + if (child.type === 'variable_declarator') { + const value = child.childForFieldName('value'); + if (value && value.type === 'call_expression') { + const func = value.childForFieldName('function'); + if (func && getText(func) === adapter.requireFunc) { + const args = value.childForFieldName('arguments'); + if (args && args.namedChildCount > 0) { + const arg = args.namedChildren[0]; + if (arg.type === 'string') { + const depName = getText(arg).replace(/['"]/g, ''); + let resolvedDep = depName; + if (depName.startsWith('.')) { + resolvedDep = path.posix.normalize(path.posix.join(path.dirname(relPath), depName)); + } + const depId = `dep:${resolvedDep}`; + addEntity({ id: depId, type: 'Dependency', name: resolvedDep, kind: 'require', visibility: 'internal', line_range: lineRange(node) }); + addRel({ type: 'IMPORTS', source: moduleId, target: depId }); + return true; + } + } + } + } + } + } + } + + // Bash: source ./utils.sh -> IMPORTS + if (lang === 'bash' && node.type === 'command') { + const cmd = node.namedChildren[0]; + if (cmd && getText(cmd) === 'source') { + const arg = node.namedChildren[1]; + if (arg) { + const depName = getText(arg); + const depId = `dep:${depName}`; + addEntity({ id: depId, type: 'Dependency', name: depName, kind: 'import', visibility: 'internal', line_range: lineRange(node) }); + addRel({ type: 'IMPORTS', source: moduleId, target: depId }); + return true; + } + } + } + + return false; + } + + // --- Class Extraction --- + function extractClass(node, parentId) { + const nameNode = node.childForFieldName('name'); + if (!nameNode) return null; + const name = getText(nameNode); + const id = `${parentId}:${name}`; + const exported = isExported(node); + + let kind = 'class'; + if (lang === 'go') kind = 'struct'; + if (node.type === 'interface_declaration') kind = 'interface'; + if (node.type === 'enum_declaration') kind = 'enum'; + + addEntity({ id, type: 'Class', name, kind, visibility: exported ? 'public' : 'internal', line_range: lineRange(node) }); + addRel({ type: 'CONTAINS', source: parentId, target: id }); + + // Implements/extends + if (adapter.heritage) { + const heritage = node.children.filter(c => c.type === adapter.heritage); + for (const h of heritage) { + for (const child of h.namedChildren) { + if (adapter.implementsClause && child.type === adapter.implementsClause) { + for (const impl of child.namedChildren) { + addRel({ type: 'IMPLEMENTS', source: id, target: getText(impl) }); + } + } else { + addRel({ type: 'IMPLEMENTS', source: id, target: getText(child) }); + } + } + } + } + // Java: superclass and super_interfaces + if (lang === 'java') { + const superclass = node.childForFieldName('superclass'); + if (superclass) addRel({ type: 'IMPLEMENTS', source: id, target: getText(superclass).replace(/^extends\s+/, '') }); + const superInterfaces = node.childForFieldName('interfaces'); + if (superInterfaces) { + for (const iface of superInterfaces.namedChildren) { + addRel({ type: 'IMPLEMENTS', source: id, target: getText(iface) }); + } + } + } + // Python: bases + if (lang === 'python') { + const argList = node.childForFieldName('superclasses'); + if (argList) { + for (const base of argList.namedChildren) { + addRel({ type: 'IMPLEMENTS', source: id, target: getText(base) }); + } + } + } + + return id; + } + + // --- Method Extraction --- + function extractMethod(node, parentId) { + const nameNode = node.childForFieldName('name'); + if (!nameNode) return null; + const name = getText(nameNode); + const id = `${parentId}:${name}`; + + let visibility = 'public'; + if (adapter.accessModifier) { + for (const child of node.children) { + if (child.type === adapter.accessModifier) { + const modText = getText(child); + if (modText.includes('private')) visibility = 'private'; + else if (modText.includes('protected')) visibility = 'protected'; + break; + } + } + } + // Python: _ prefix = private, __ = very private + if (lang === 'python' && name.startsWith('_')) { + visibility = name.startsWith('__') ? 'private' : 'protected'; + } + + addEntity({ id, type: 'Function', name, kind: 'method', visibility, line_range: lineRange(node) }); + addRel({ type: 'CONTAINS', source: parentId, target: id }); + return id; + } + + // --- Function Extraction --- + function extractFunction(node, parentId) { + const exported = isExported(node); + + if (adapter.functionNodes.includes(node.type)) { + const nameNode = node.childForFieldName('name'); + if (!nameNode) return null; + const name = getText(nameNode); + const id = `${parentId}:${name}`; + + let visibility = exported ? 'public' : 'internal'; + if (lang === 'go' && name[0] === name[0].toUpperCase()) visibility = 'public'; + if (lang === 'go' && name[0] === name[0].toLowerCase()) visibility = 'internal'; + + addEntity({ id, type: 'Function', name, kind: 'function', visibility, line_range: lineRange(node) }); + addRel({ type: 'CONTAINS', source: parentId, target: id }); + return id; + } + + // JS/TS arrow functions + if (adapter.arrowFuncParent && node.type === adapter.arrowFuncParent) { + for (const child of node.children) { + if (child.type === 'variable_declarator') { + const value = child.childForFieldName('value'); + if (value && adapter.arrowTypes.includes(value.type)) { + const nameNode = child.childForFieldName('name'); + if (!nameNode) continue; + const name = getText(nameNode); + const id = `${parentId}:${name}`; + addEntity({ id, type: 'Function', name, kind: 'function', visibility: exported ? 'public' : 'internal', line_range: lineRange(node) }); + addRel({ type: 'CONTAINS', source: parentId, target: id }); + return id; + } + } + } + } + return null; + } + + // --- Class Field (arrow method vs property) --- + function extractClassField(node, parentId) { + if (!adapter.fieldNodes.includes(node.type)) return null; + const nameNode = node.childForFieldName('name'); + if (!nameNode) return null; + const value = node.childForFieldName('value'); + if (value && adapter.arrowTypes.includes(value.type)) { + return extractMethod(node, parentId); + } + return null; // Skip non-function class properties + } + + // --- Call Extraction --- + function extractCalls(node, parentId) { + if (!adapter.callExpr) return; + if (node.type === adapter.callExpr) { + let funcName; + if (lang === 'java') { + const nameNode = node.childForFieldName('name'); + const obj = node.childForFieldName('object'); + funcName = obj ? `${getText(obj)}.${getText(nameNode)}` : (nameNode ? getText(nameNode) : null); + } else if (lang === 'python') { + const funcNode = node.childForFieldName('function'); + funcName = funcNode ? getText(funcNode) : null; + } else if (lang === 'bash') { + const funcNode = node.namedChildren[0]; + funcName = funcNode ? getText(funcNode) : null; + } else { + const funcNode = node.childForFieldName(adapter.funcField); + funcName = funcNode ? getText(funcNode) : null; + } + if (funcName) { + if (adapter.requireFunc && funcName === adapter.requireFunc) return; + addRel({ type: 'CALLS', source: parentId, target: funcName }); + } + } + } + + // --- YAML/HCL Config Extraction --- + function extractConfig(node) { + if (lang === 'yaml') { + addEntity({ id: moduleId, type: 'Config', name: relPath, kind: 'yaml-config', visibility: 'public', line_range: lineRange(node) }); + // Extract top-level keys as config entries + if (node.type === 'stream') { + for (const doc of node.namedChildren) { + if (doc.type === 'document') { + const block = doc.namedChildren[0]; + if (block && block.type === 'block_node') { + const mapping = block.namedChildren[0]; + if (mapping && mapping.type === 'block_mapping') { + for (const pair of mapping.namedChildren) { + if (pair.type === 'block_mapping_pair') { + const key = pair.childForFieldName('key'); + if (key) { + const keyName = getText(key); + const keyId = `${moduleId}:${keyName}`; + addEntity({ id: keyId, type: 'Config', name: keyName, kind: 'yaml-key', visibility: 'public', line_range: lineRange(pair) }); + addRel({ type: 'CONTAINS', source: moduleId, target: keyId }); + } + } + } + } + } + } + } + } + return true; + } + if (lang === 'hcl') { + addEntity({ id: moduleId, type: 'Config', name: relPath, kind: 'terraform', visibility: 'public', line_range: lineRange(node) }); + // Extract top-level blocks (resource, data, variable, output, module, provider) + for (const child of node.namedChildren) { + if (child.type === 'block') { + const blockType = child.namedChildren[0]; // e.g., "resource" + const labels = child.namedChildren.filter(c => c.type === 'string_lit' || c.type === 'identifier'); + const blockName = labels.map(l => getText(l).replace(/"/g, '')).join('.'); + const fullName = blockType ? `${getText(blockType)}.${blockName}` : blockName; + const blockId = `${moduleId}:${fullName}`; + addEntity({ id: blockId, type: 'Config', name: fullName, kind: 'hcl-block', visibility: 'public', line_range: lineRange(child) }); + addRel({ type: 'CONTAINS', source: moduleId, target: blockId }); + } + } + return true; + } + return false; + } + + // --- Main Walker --- + function walk(node, parentId) { + if (node.type === 'program' || node.type === 'source_file' || node.type === 'stream' || node.type === 'compilation_unit' || node.type === 'module') { + // Config files (YAML/HCL) + if (extractConfig(node)) return; + + // Code files + addEntity({ id: moduleId, type: 'Module', name: relPath, kind: 'module', visibility: 'public', line_range: lineRange(node) }); + for (const child of node.children) { + walk(child, moduleId); + } + return; + } + + // Export wrapper (JS/TS) + if (adapter.exportWrapper && node.type === adapter.exportWrapper) { + for (const child of node.children) { + if (child.type !== 'export' && child.type !== 'default') { + walk(child, parentId); + } + } + return; + } + + // Imports + if (extractImports(node)) return; + + // Classes + if (adapter.classNodes.includes(node.type)) { + const classId = extractClass(node, parentId); + if (classId) { + const body = node.childForFieldName('body'); + if (body) { + for (const child of body.namedChildren || body.children) { + walk(child, classId); + } + } + // Go type_declaration: walk type_spec children + if (lang === 'go') { + for (const child of node.namedChildren) { + if (child.type === 'type_spec') { + const structBody = child.childForFieldName('type'); + if (structBody) { + for (const field of structBody.namedChildren) { + walk(field, classId); + } + } + } + } + } + } + return; + } + + // Methods (inside class body) + if (adapter.methodNodes.includes(node.type)) { + const methodId = extractMethod(node, parentId); + if (methodId) { + const body = node.childForFieldName('body'); + if (body) walkBody(body, methodId); + } + return; + } + + // Class fields (arrow methods vs properties) + if (adapter.fieldNodes.includes(node.type)) { + const methodId = extractClassField(node, parentId); + if (methodId) { + const value = node.childForFieldName('value'); + if (value) { + const body = value.childForFieldName('body'); + if (body) walkBody(body, methodId); + } + } + return; + } + + // Python: function_definition can be top-level or method (inside class) + if (lang === 'python' && node.type === 'function_definition') { + if (parentId && parentId.includes(':') && parentId !== moduleId) { + // Inside a class → method + const methodId = extractMethod(node, parentId); + if (methodId) { + const body = node.childForFieldName('body'); + if (body) walkBody(body, methodId); + } + } else { + // Top-level → function + const funcId = extractFunction(node, parentId); + if (funcId) { + const body = node.childForFieldName('body'); + if (body) walkBody(body, funcId); + } + } + return; + } + + // Go: method_declaration (receiver-based) + if (lang === 'go' && node.type === 'method_declaration') { + const nameNode = node.childForFieldName('name'); + const receiver = node.childForFieldName('receiver'); + if (nameNode) { + const name = getText(nameNode); + if (!name || name.length === 0) return; + let receiverType = parentId; + if (receiver) { + const paramList = receiver.namedChildren; + for (const p of paramList) { + const typeNode = p.childForFieldName('type'); + if (typeNode) { + let raw = getText(typeNode); + // Strip pointer (*) and generic brackets safely + let typeName = raw.replace(/^\*+/, '').replace(/\[.*\]$/, '').trim(); + if (typeName.length > 0) { + receiverType = `${moduleId}:${typeName}`; + } + } + } + } + const id = `${receiverType}:${name}`; + const visibility = name.length > 0 && name[0] === name[0].toUpperCase() ? 'public' : 'internal'; + addEntity({ id, type: 'Function', name, kind: 'method', visibility, line_range: lineRange(node) }); + addRel({ type: 'CONTAINS', source: receiverType, target: id }); + const body = node.childForFieldName('body'); + if (body) walkBody(body, id); + } + return; + } + + // Functions (top-level) + if (adapter.functionNodes.includes(node.type) || (adapter.arrowFuncParent && node.type === adapter.arrowFuncParent)) { + const funcId = extractFunction(node, parentId); + if (funcId) { + const body = node.type === adapter.arrowFuncParent + ? node // For lexical_declaration, walk the whole thing + : node.childForFieldName('body'); + if (body) walkBody(body, funcId); + return; + } + // Module-level variable (JS/TS only) + if (parentId === moduleId && adapter.arrowFuncParent && node.type === adapter.arrowFuncParent) { + // Not a function, might be a module-level const + return; + } + } + + // Java: package_declaration + if (lang === 'java' && node.type === 'package_declaration') return; + + // Top-level calls + extractCalls(node, parentId); + + for (const child of node.children) { + walk(child, parentId); + } + } + + // Walk function/method bodies for CALLS only + function walkBody(node, parentId) { + if (!node) return; + extractCalls(node, parentId); + for (const child of node.children) { + walkBody(child, parentId); + } + } + + walk(tree.rootNode); + + return { file: filePath, language: lang, entities, relationships }; +} + +// --- CLI --- +if (require.main === module) { + const filePath = process.argv[2]; + const repoRoot = process.argv[3] || '/app/src'; + if (!filePath) { + console.error("Usage: node extract.js [repo-root]"); + process.exit(1); + } + + const result = extract(filePath, repoRoot); + console.log(JSON.stringify(result, null, 2)); +} + +module.exports = { extract }; diff --git a/graph.js b/graph.js new file mode 100644 index 0000000..da86e37 --- /dev/null +++ b/graph.js @@ -0,0 +1,278 @@ +const fs = require('fs'); +const path = require('path'); + +/** + * Developer Intelligence Pipeline v2 - Graph Store + * In-memory directed graph using a simple adjacency list. + * No external dependencies. + */ + +class GraphStore { + constructor() { + this.nodes = new Map(); // entityId -> entity object + this.edges = []; // Array of {type, source, target} + this._edgeSet = new Set(); // For O(1) dedup + this.fileIndex = new Map(); // filePath -> Set of entityIds + } + + /** + * Builds the graph from an array of extract.js result objects. + * @param {Array} extractResults + * @returns {GraphStore} + */ + static buildGraph(extractResults) { + const graph = new GraphStore(); + + for (const result of extractResults) { + const filePath = result.file; + if (!filePath) continue; + + if (!graph.fileIndex.has(filePath)) { + graph.fileIndex.set(filePath, new Set()); + } + + const fileEntities = graph.fileIndex.get(filePath); + + // Add nodes + if (Array.isArray(result.entities)) { + for (const entity of result.entities) { + graph.nodes.set(entity.id, { ...entity, _file: filePath }); + fileEntities.add(entity.id); + } + } + + // Add edges (deduplicated via Set) + if (Array.isArray(result.relationships)) { + for (const rel of result.relationships) { + const key = `${rel.type}:${rel.source}->${rel.target}`; + if (!graph._edgeSet.has(key)) { + graph._edgeSet.add(key); + graph.edges.push({ + type: rel.type, + source: rel.source, + target: rel.target + }); + } + } + } + } + + return graph; + } + + /** + * Serializes the graph to a JSON file. + * @param {GraphStore} graph + * @param {string} outputPath + */ + static saveSnapshot(graph, outputPath) { + const serialized = { + nodes: Object.fromEntries(graph.nodes), + edges: graph.edges, + fileIndex: Object.fromEntries( + Array.from(graph.fileIndex.entries()).map(([k, v]) => [k, Array.from(v)]) + ) + }; + fs.writeFileSync(outputPath, JSON.stringify(serialized, null, 2), 'utf8'); + } + + /** + * Deserializes a JSON file to a GraphStore. + * @param {string} inputPath + * @returns {GraphStore} + */ + static loadSnapshot(inputPath) { + const data = JSON.parse(fs.readFileSync(inputPath, 'utf8')); + const graph = new GraphStore(); + + for (const [id, entity] of Object.entries(data.nodes || {})) { + graph.nodes.set(id, entity); + } + + graph.edges = data.edges || []; + + for (const [filePath, entityIds] of Object.entries(data.fileIndex || {})) { + graph.fileIndex.set(filePath, new Set(entityIds)); + } + + return graph; + } + + /** + * Returns an entity and all its incoming/outgoing edges. + * @param {GraphStore} graph + * @param {string} entityId + * @returns {Object} + */ + static query(graph, entityId) { + const entity = graph.nodes.get(entityId); + if (!entity) return null; + + const incoming = graph.edges.filter(e => e.target === entityId); + const outgoing = graph.edges.filter(e => e.source === entityId); + + return { + entity, + incoming, + outgoing + }; + } + + /** + * Returns all entities that CALL this function. + * @param {GraphStore} graph + * @param {string} functionName (entityId) + * @returns {Array} + */ + static findCallers(graph, functionName) { + return graph.edges + .filter(e => e.type === 'CALLS' && e.target === functionName) + .map(e => graph.nodes.get(e.source)) + .filter(Boolean); + } + + /** + * Returns all modules that IMPORT this module. + * @param {GraphStore} graph + * @param {string} moduleId (entityId) + * @returns {Array} + */ + static findDependents(graph, moduleId) { + return graph.edges + .filter(e => (e.type === 'IMPORTS' || e.type === 'DEPENDS_ON') && e.target === moduleId) + .map(e => graph.nodes.get(e.source)) + .filter(Boolean); + } + + /** + * Returns all public entities in a file. + * @param {GraphStore} graph + * @param {string} filePath + * @returns {Array} + */ + static getExports(graph, filePath) { + const entityIds = graph.fileIndex.get(filePath); + if (!entityIds) return []; + + return Array.from(entityIds) + .map(id => graph.nodes.get(id)) + .filter(entity => entity && entity.visibility === 'public'); + } + + /** + * Returns added/removed/modified entities and relationships between two snapshots. + * @param {GraphStore} oldGraph + * @param {GraphStore} newGraph + * @returns {Object} + */ + static diffSnapshots(oldGraph, newGraph) { + const diff = { + entities: { added: [], removed: [], modified: [] }, + relationships: { added: [], removed: [] } + }; + + // Diff Entities + for (const [id, oldEntity] of oldGraph.nodes.entries()) { + if (!newGraph.nodes.has(id)) { + diff.entities.removed.push(oldEntity); + } + } + + for (const [id, newEntity] of newGraph.nodes.entries()) { + const oldEntity = oldGraph.nodes.get(id); + if (!oldEntity) { + diff.entities.added.push(newEntity); + } else { + // Deterministic deep comparison: sort keys, compare canonical JSON + const canonicalize = (obj) => JSON.stringify(obj, Object.keys(obj).filter(k => k !== '_file').sort()); + if (canonicalize(oldEntity) !== canonicalize(newEntity)) { + diff.entities.modified.push({ old: oldEntity, new: newEntity }); + } + } + } + + // Diff Relationships + const edgeToString = (e) => `${e.type}:${e.source}->${e.target}`; + const oldEdges = new Set(oldGraph.edges.map(edgeToString)); + const newEdges = new Set(newGraph.edges.map(edgeToString)); + + for (const e of newGraph.edges) { + if (!oldEdges.has(edgeToString(e))) diff.relationships.added.push(e); + } + for (const e of oldGraph.edges) { + if (!newEdges.has(edgeToString(e))) diff.relationships.removed.push(e); + } + + return diff; + } +} + +// CLI handling +if (require.main === module) { + const args = process.argv.slice(2); + const command = args[0]; + + if (command === 'build') { + const inputDir = args[1]; + const outputPath = args[2]; + + if (!inputDir || !outputPath) { + console.error('Usage: node graph.js build '); + process.exit(1); + } + + const files = fs.readdirSync(inputDir).filter(f => f.endsWith('.json')); + const extractResults = files.map(f => { + const content = fs.readFileSync(path.join(inputDir, f), 'utf8'); + try { + return JSON.parse(content); + } catch (e) { + console.error(`Error parsing ${f}:`, e.message); + return null; + } + }).filter(Boolean); + + const graph = GraphStore.buildGraph(extractResults); + GraphStore.saveSnapshot(graph, outputPath); + console.log(`Built graph with ${graph.nodes.size} nodes and ${graph.edges.length} edges. Saved to ${outputPath}`); + + } else if (command === 'query') { + const snapshotPath = args[1]; + const entityId = args[2]; + + if (!snapshotPath || !entityId) { + console.error('Usage: node graph.js query '); + process.exit(1); + } + + const graph = GraphStore.loadSnapshot(snapshotPath); + const result = GraphStore.query(graph, entityId); + + if (!result) { + console.log(`Entity ${entityId} not found.`); + } else { + console.log(JSON.stringify(result, null, 2)); + } + + } else if (command === 'diff') { + const oldSnapshotPath = args[1]; + const newSnapshotPath = args[2]; + + if (!oldSnapshotPath || !newSnapshotPath) { + console.error('Usage: node graph.js diff '); + process.exit(1); + } + + const oldGraph = GraphStore.loadSnapshot(oldSnapshotPath); + const newGraph = GraphStore.loadSnapshot(newSnapshotPath); + + const diff = GraphStore.diffSnapshots(oldGraph, newGraph); + console.log(JSON.stringify(diff, null, 2)); + + } else { + console.error('Unknown command. Available commands: build, query, diff'); + process.exit(1); + } +} + +module.exports = GraphStore; diff --git a/namespace.js b/namespace.js new file mode 100644 index 0000000..84d9729 --- /dev/null +++ b/namespace.js @@ -0,0 +1,291 @@ +const fs = require('fs'); +const path = require('path'); +const GraphStore = require('./graph.js'); + +/** + * Developer Intelligence Pipeline v2 - Cross-Repo Namespace Registry + * Resolves cross-repo references using 3-tier matching. + * No external dependencies. + */ + +const SCRIPT_DIR = __dirname; + +/** + * Classify an entity into an artifact type for infrastructure-level matching. + * Supports: rest-api, grpc-service, helm-chart, terraform-resource, config, code-module + */ +function classifyArtifact(entity) { + if (entity.type === 'Config') { + if (entity.kind === 'terraform' || entity.kind === 'hcl-block') return 'terraform-resource'; + if (entity.kind === 'yaml-config' || entity.kind === 'yaml-key') return 'config'; + return 'config'; + } + if (entity.type === 'Class' && entity.kind === 'interface') return 'interface'; + if (entity.type === 'Class') return 'class'; + if (entity.type === 'Function') return 'code-module'; + if (entity.type === 'Module') return 'code-module'; + return 'code-module'; +} + +class NamespaceRegistry { + constructor() { + this.byShortName = new Map(); // shortName -> [{repoId, entityId, type, kind}] + this.byEntityId = new Map(); // entityId -> {repoId, shortName} + this.overrides = new Map(); // localName -> {repoId, entityId} + } + + /** + * Build registry from multiple graph snapshots. + * Collects public entities and indexes them for cross-repo resolution. + * @param {Array<{repoId: string, snapshot: GraphStore}>} repos + * @returns {NamespaceRegistry} + */ + static build(repos) { + const reg = new NamespaceRegistry(); + + for (const { repoId, snapshot } of repos) { + for (const [id, entity] of snapshot.nodes.entries()) { + if (entity.visibility !== 'public') continue; + if (entity.type === 'Dependency') continue; + + const shortName = entity.name; + const entry = { + repoId, + entityId: id, + type: entity.type, + kind: entity.kind, + // Artifact classification for infrastructure matching + artifact: classifyArtifact(entity), + }; + + // byShortName + if (!reg.byShortName.has(shortName)) { + reg.byShortName.set(shortName, []); + } + reg.byShortName.get(shortName).push(entry); + + // byEntityId (prefix with repoId for cross-repo uniqueness) + reg.byEntityId.set(`${repoId}:${id}`, { repoId, shortName, artifact: entry.artifact }); + } + } + + return reg; + } + + /** + * Load overrides from a JSON file. + * @param {string} overridePath + */ + loadOverrides(overridePath) { + if (!fs.existsSync(overridePath)) return; + const data = JSON.parse(fs.readFileSync(overridePath, 'utf8')); + for (const [localName, target] of Object.entries(data)) { + const colonIdx = target.indexOf(':'); + if (colonIdx > 0) { + this.overrides.set(localName, { + repoId: target.slice(0, colonIdx), + entityId: target.slice(colonIdx + 1), + }); + } + } + } + + /** + * Resolve a name using 3-tier matching. + * @param {string} name - The unresolved target name + * @param {string} [sourceRepoId] - The repo making the call (excluded from results) + * @returns {{resolvedTo: {repoId, entityId}, tier: number, confidence: number} | null} + */ + resolve(name, sourceRepoId) { + // Override always wins + if (this.overrides.has(name)) { + const target = this.overrides.get(name); + return { resolvedTo: target, tier: 0, confidence: 1.0 }; + } + + // Tier 1: Exact entity ID match + for (const [key, val] of this.byEntityId.entries()) { + const entityId = key.slice(key.indexOf(':') + 1); + if (entityId === name && val.repoId !== sourceRepoId) { + return { resolvedTo: { repoId: val.repoId, entityId }, tier: 1, confidence: 1.0 }; + } + } + + // Tier 2: Normalized match (strip extensions, normalize paths) + const normalized = name.replace(/\.(ts|js|tsx|jsx|py|java|go|sh)$/, '').replace(/\\/g, '/'); + for (const [key, val] of this.byEntityId.entries()) { + const entityId = key.slice(key.indexOf(':') + 1); + const normId = entityId.replace(/\.(ts|js|tsx|jsx|py|java|go|sh)/, '').replace(/\\/g, '/'); + if (normId === normalized && val.repoId !== sourceRepoId) { + return { resolvedTo: { repoId: val.repoId, entityId }, tier: 2, confidence: 0.9 }; + } + } + + // Tier 3: Name-only match + const matches = (this.byShortName.get(name) || []).filter(e => e.repoId !== sourceRepoId); + if (matches.length === 1) { + return { resolvedTo: { repoId: matches[0].repoId, entityId: matches[0].entityId }, tier: 3, confidence: 0.7 }; + } + if (matches.length > 1) { + // Ambiguous — return first match with lower confidence + return { resolvedTo: { repoId: matches[0].repoId, entityId: matches[0].entityId }, tier: 3, confidence: 0.5 }; + } + + return null; + } + + /** + * Resolve all unresolved CALLS edges in a graph. + * @param {GraphStore} graph + * @param {NamespaceRegistry} registry + * @param {string} sourceRepoId + * @returns {Array<{source, target, resolvedTo, tier, confidence}>} + */ + static resolveExternalCalls(graph, registry, sourceRepoId) { + const results = []; + + for (const edge of graph.edges) { + if (edge.type !== 'CALLS') continue; + // If target exists as a node, it's internal — skip + if (graph.nodes.has(edge.target)) continue; + + const resolution = registry.resolve(edge.target, sourceRepoId); + if (resolution) { + results.push({ + source: edge.source, + target: edge.target, + resolvedTo: resolution.resolvedTo, + tier: resolution.tier, + confidence: resolution.confidence, + }); + } + } + + return results; + } + + /** + * Serialize registry to JSON. + */ + toJSON() { + return { + byShortName: Object.fromEntries(this.byShortName), + byEntityId: Object.fromEntries(this.byEntityId), + overrides: Object.fromEntries(this.overrides), + }; + } + + /** + * Deserialize registry from JSON. + */ + static fromJSON(data) { + const reg = new NamespaceRegistry(); + for (const [k, v] of Object.entries(data.byShortName || {})) { + reg.byShortName.set(k, v); + } + for (const [k, v] of Object.entries(data.byEntityId || {})) { + reg.byEntityId.set(k, v); + } + for (const [k, v] of Object.entries(data.overrides || {})) { + reg.overrides.set(k, v); + } + return reg; + } + + /** + * Lookup a name in the registry. + */ + lookup(name) { + const exact = this.byShortName.get(name) || []; + // Also check entity IDs containing the name + const byId = []; + for (const [key, val] of this.byEntityId.entries()) { + const entityId = key.slice(key.indexOf(':') + 1); + if (entityId.includes(name)) { + byId.push({ ...val, entityId }); + } + } + return { byName: exact, byId }; + } +} + +// --- CLI --- +if (require.main === module) { + const args = process.argv.slice(2); + const command = args[0]; + + if (command === 'build') { + const outputIdx = args.indexOf('--output'); + const outputPath = outputIdx >= 0 ? args[outputIdx + 1] : null; + const snapshotPaths = args.slice(1).filter((_, i) => { + const argIdx = i + 1; + return argIdx !== outputIdx && argIdx !== outputIdx + 1; + }); + + if (snapshotPaths.length === 0 || !outputPath) { + console.error('Usage: node namespace.js build [snapshot2.json ...] --output '); + process.exit(1); + } + + const repos = snapshotPaths.map((p, i) => { + const snapshot = GraphStore.loadSnapshot(p); + const repoId = path.basename(p, '.json'); + return { repoId, snapshot }; + }); + + const registry = NamespaceRegistry.build(repos); + + // Load overrides if present + const overridePath = path.join(path.dirname(outputPath), 'namespace-overrides.json'); + registry.loadOverrides(overridePath); + + fs.writeFileSync(outputPath, JSON.stringify(registry.toJSON(), null, 2), 'utf8'); + console.log(`Registry built: ${registry.byShortName.size} names, ${registry.byEntityId.size} entities from ${repos.length} repos. Saved to ${outputPath}`); + + } else if (command === 'resolve') { + const graphPath = args[1]; + const registryPath = args[2]; + + if (!graphPath || !registryPath) { + console.error('Usage: node namespace.js resolve '); + process.exit(1); + } + + const graph = GraphStore.loadSnapshot(graphPath); + const regData = JSON.parse(fs.readFileSync(registryPath, 'utf8')); + const registry = NamespaceRegistry.fromJSON(regData); + const sourceRepoId = path.basename(graphPath, '.json'); + + const results = NamespaceRegistry.resolveExternalCalls(graph, registry, sourceRepoId); + + if (results.length === 0) { + console.log('No external calls resolved.'); + } else { + console.log(`Resolved ${results.length} external call(s):`); + for (const r of results) { + console.log(` ${r.source} -> ${r.target} => ${r.resolvedTo.repoId}:${r.resolvedTo.entityId} (tier ${r.tier}, confidence ${r.confidence})`); + } + } + + } else if (command === 'lookup') { + const registryPath = args[1]; + const name = args[2]; + + if (!registryPath || !name) { + console.error('Usage: node namespace.js lookup '); + process.exit(1); + } + + const regData = JSON.parse(fs.readFileSync(registryPath, 'utf8')); + const registry = NamespaceRegistry.fromJSON(regData); + const result = registry.lookup(name); + + console.log(JSON.stringify(result, null, 2)); + + } else { + console.error('Unknown command. Available: build, resolve, lookup'); + process.exit(1); + } +} + +module.exports = NamespaceRegistry; diff --git a/package.json b/package.json new file mode 100644 index 0000000..f360b13 --- /dev/null +++ b/package.json @@ -0,0 +1,25 @@ +{ + "name": "dev-intel-v2", + "version": "1.0.0", + "description": "", + "main": "index.js", + "scripts": { + "test": "echo \"Error: no test specified\" && exit 1" + }, + "keywords": [], + "author": "", + "license": "ISC", + "dependencies": { + "@tree-sitter-grammars/tree-sitter-hcl": "^1.2.0", + "js-yaml": "^4.1.1", + "tree-sitter": "^0.21.1", + "tree-sitter-bash": "^0.21.0", + "tree-sitter-go": "^0.21.2", + "tree-sitter-java": "^0.21.0", + "tree-sitter-javascript": "^0.21.2", + "tree-sitter-python": "^0.21.0", + "tree-sitter-typescript": "^0.21.1", + "tree-sitter-yaml": "^0.5.0", + "web-tree-sitter": "^0.26.6" + } +} diff --git a/pipeline.js b/pipeline.js new file mode 100644 index 0000000..5ae18d9 --- /dev/null +++ b/pipeline.js @@ -0,0 +1,256 @@ +const fs = require('fs'); +const path = require('path'); +const GraphStore = require('./graph.js'); +const { extract } = require('./extract.js'); +const { semanticDiff, formatSummary } = require('./semantic-diff.js'); + +/** + * Developer Intelligence Pipeline v2 - Pipeline Orchestrator + * Batch extraction, incremental diffing, and benchmarking. + * No external dependencies. + */ + +const SUPPORTED_EXTS = new Set([ + '.ts', '.tsx', '.js', '.jsx', '.py', '.java', '.go', '.sh', '.bash', + '.yaml', '.yml', '.tf', '.hcl', +]); + +const IGNORE_DIRS = new Set([ + 'node_modules', '.git', 'dist', 'build', '__pycache__', '.next', + '.turbo', 'coverage', '.nyc_output', 'vendor', +]); + +const SCRIPT_DIR = __dirname; +const EXTRACT_JS = path.join(SCRIPT_DIR, 'extract.js'); + +/** + * Recursively discover supported files. + */ +function discoverFiles(dir) { + const results = []; + let entries; + try { + entries = fs.readdirSync(dir, { withFileTypes: true }); + } catch { return results; } + + for (const entry of entries) { + if (IGNORE_DIRS.has(entry.name)) continue; + const fullPath = path.join(dir, entry.name); + if (entry.isDirectory()) { + results.push(...discoverFiles(fullPath)); + } else if (entry.isFile() && SUPPORTED_EXTS.has(path.extname(entry.name))) { + results.push(fullPath); + } + } + return results; +} + +/** + * Extract a single file using in-process extract(), no subprocess. + */ +function extractFile(filePath, repoRoot) { + try { + return extract(filePath, repoRoot); + } catch (err) { + return null; + } +} + +/** + * Batch extract all files, build graph, save snapshot. + */ +function batchExtract(repoRoot, outputDir) { + const files = discoverFiles(repoRoot); + console.log(`Discovered ${files.length} supported files in ${repoRoot}`); + + fs.mkdirSync(outputDir, { recursive: true }); + + const results = []; + let errors = 0; + const startTime = Date.now(); + + for (let i = 0; i < files.length; i++) { + const result = extractFile(files[i], repoRoot); + if (result && !result.error) { + results.push(result); + } else { + errors++; + } + if ((i + 1) % 100 === 0) { + console.log(` Extracted ${i + 1}/${files.length}...`); + } + } + + const extractTime = Date.now() - startTime; + console.log(`Extraction complete: ${results.length} succeeded, ${errors} failed (${extractTime}ms)`); + + const graph = GraphStore.buildGraph(results); + const snapshotPath = path.join(outputDir, 'snapshot.json'); + GraphStore.saveSnapshot(graph, snapshotPath); + console.log(`Graph: ${graph.nodes.size} nodes, ${graph.edges.length} edges. Saved to ${snapshotPath}`); + + // Save stats + const stats = { + repoRoot, + filesDiscovered: files.length, + filesExtracted: results.length, + errors, + nodes: graph.nodes.size, + edges: graph.edges.length, + extractionTimeMs: extractTime, + avgTimePerFileMs: Math.round(extractTime / files.length), + timestamp: new Date().toISOString(), + }; + fs.writeFileSync(path.join(outputDir, 'stats.json'), JSON.stringify(stats, null, 2)); + console.log(`Stats saved. Avg ${stats.avgTimePerFileMs}ms/file`); + + return { graph, snapshotPath, stats }; +} + +/** + * Incremental run: extract files, diff against previous snapshot. + */ +function incrementalRun(repoRoot, files, prevSnapshotPath, outputDir) { + fs.mkdirSync(outputDir, { recursive: true }); + + const filesToExtract = files || discoverFiles(repoRoot); + console.log(`Extracting ${filesToExtract.length} files...`); + + const results = []; + let errors = 0; + + for (const f of filesToExtract) { + const result = extractFile(f, repoRoot); + if (result && !result.error) { + results.push(result); + } else { + errors++; + } + } + + const newGraph = GraphStore.buildGraph(results); + const newSnapshotPath = path.join(outputDir, 'snapshot.json'); + GraphStore.saveSnapshot(newGraph, newSnapshotPath); + console.log(`New graph: ${newGraph.nodes.size} nodes, ${newGraph.edges.length} edges`); + + if (prevSnapshotPath && fs.existsSync(prevSnapshotPath)) { + const oldGraph = GraphStore.loadSnapshot(prevSnapshotPath); + const diff = semanticDiff(oldGraph, newGraph); + console.log(formatSummary(diff)); + + fs.writeFileSync(path.join(outputDir, 'diff.json'), JSON.stringify({ + score: diff.score, + severity: diff.severity, + stats: diff.stats, + categorized: diff.categorized, + }, null, 2)); + } + + return { newSnapshotPath }; +} + +/** + * Benchmark: extract N random files, report timing. + */ +function benchmark(repoRoot, sampleCount) { + const allFiles = discoverFiles(repoRoot); + console.log(`Total supported files: ${allFiles.length}`); + + // Shuffle and pick N + const shuffled = allFiles.sort(() => Math.random() - 0.5); + const samples = shuffled.slice(0, Math.min(sampleCount, allFiles.length)); + console.log(`Benchmarking ${samples.length} files...\n`); + + const timings = []; + let totalEntities = 0; + let totalRelationships = 0; + let errors = 0; + + for (const file of samples) { + const start = Date.now(); + const result = extractFile(file, repoRoot); + const elapsed = Date.now() - start; + + if (result && !result.error) { + timings.push({ file: path.relative(repoRoot, file), timeMs: elapsed, entities: result.entities.length, relationships: result.relationships.length }); + totalEntities += result.entities.length; + totalRelationships += result.relationships.length; + } else { + errors++; + timings.push({ file: path.relative(repoRoot, file), timeMs: elapsed, entities: 0, relationships: 0, error: true }); + } + } + + // Sort by time descending + timings.sort((a, b) => b.timeMs - a.timeMs); + + const totalTime = timings.reduce((s, t) => s + t.timeMs, 0); + const avgTime = Math.round(totalTime / timings.length); + const p50 = timings[Math.floor(timings.length * 0.5)]?.timeMs || 0; + const p95 = timings[Math.floor(timings.length * 0.05)]?.timeMs || 0; + + console.log('=== V2 Pipeline Benchmark ==='); + console.log(`Repo: ${repoRoot}`); + console.log(`Files sampled: ${samples.length} / ${allFiles.length}`); + console.log(`Errors: ${errors}`); + console.log(`Total entities: ${totalEntities}`); + console.log(`Total relationships: ${totalRelationships}`); + console.log(`Total time: ${totalTime}ms`); + console.log(`Avg time/file: ${avgTime}ms`); + console.log(`P50: ${p50}ms | P95: ${p95}ms`); + console.log(''); + console.log('Top 5 slowest:'); + for (const t of timings.slice(0, 5)) { + console.log(` ${t.timeMs}ms ${t.file} (${t.entities}E/${t.relationships}R)${t.error ? ' ERROR' : ''}`); + } + + return { totalFiles: allFiles.length, sampled: samples.length, errors, totalEntities, totalRelationships, totalTime, avgTime, p50, p95 }; +} + +// --- CLI --- +if (require.main === module) { + const args = process.argv.slice(2); + const command = args[0]; + + if (command === 'batch') { + const repoRoot = args[1]; + const outputIdx = args.indexOf('--output'); + const outputDir = outputIdx >= 0 ? args[outputIdx + 1] : '/tmp/pipeline-output'; + + if (!repoRoot) { + console.error('Usage: node pipeline.js batch --output '); + process.exit(1); + } + batchExtract(repoRoot, outputDir); + + } else if (command === 'run') { + const repoRoot = args[1]; + const snapshotIdx = args.indexOf('--snapshot'); + const prevSnapshot = snapshotIdx >= 0 ? args[snapshotIdx + 1] : null; + const outputIdx = args.indexOf('--output'); + const outputDir = outputIdx >= 0 ? args[outputIdx + 1] : '/tmp/pipeline-output'; + + if (!repoRoot) { + console.error('Usage: node pipeline.js run [--snapshot ] [--output ]'); + process.exit(1); + } + incrementalRun(repoRoot, null, prevSnapshot, outputDir); + + } else if (command === 'benchmark') { + const repoRoot = args[1]; + const samplesIdx = args.indexOf('--samples'); + const sampleCount = samplesIdx >= 0 ? parseInt(args[samplesIdx + 1], 10) : 10; + + if (!repoRoot) { + console.error('Usage: node pipeline.js benchmark --samples '); + process.exit(1); + } + benchmark(repoRoot, sampleCount); + + } else { + console.error('Unknown command. Available: batch, run, benchmark'); + process.exit(1); + } +} + +module.exports = { discoverFiles, extractFile, batchExtract, incrementalRun, benchmark }; diff --git a/semantic-diff.js b/semantic-diff.js new file mode 100644 index 0000000..de827ef --- /dev/null +++ b/semantic-diff.js @@ -0,0 +1,328 @@ +const fs = require('fs'); +const path = require('path'); +const GraphStore = require('./graph.js'); + +/** + * Developer Intelligence Pipeline v2 - Semantic Diff Engine + * Compares two graph snapshots and produces categorized, scored diffs. + * No external dependencies. + */ + +const SEVERITY = [ + [0, 20, 'trivial'], + [21, 40, 'low'], + [41, 60, 'moderate'], + [61, 80, 'high'], + [81, 100, 'critical'], +]; + +function severityLabel(score) { + for (const [lo, hi, label] of SEVERITY) { + if (score >= lo && score <= hi) return label; + } + return 'unknown'; +} + +/** + * Categorize a single entity change by impact level. + */ +function categorizeEntityChange(changeType, entity, oldEntity) { + const isPublic = (e) => e && e.visibility === 'public'; + + if (changeType === 'removed' && isPublic(entity)) return 'breaking'; + if (changeType === 'added' && isPublic(entity)) return 'significant'; + if (changeType === 'modified') { + // Check if only line_range changed (cosmetic) + if (oldEntity && entity) { + const oKeys = Object.keys(oldEntity).filter(k => k !== '_file' && k !== 'line_range'); + const nKeys = Object.keys(entity).filter(k => k !== '_file' && k !== 'line_range'); + const sameSemantics = oKeys.length === nKeys.length && + oKeys.every(k => JSON.stringify(oldEntity[k]) === JSON.stringify(entity[k])); + if (sameSemantics) return 'cosmetic'; + } + if (isPublic(entity) || isPublic(oldEntity)) return 'significant'; + return 'internal'; + } + if (changeType === 'added' || changeType === 'removed') return 'internal'; + return 'internal'; +} + +/** + * Categorize a relationship change. + */ +function categorizeRelChange(changeType, rel, graph) { + // Check if source or target is public + const sourceNode = graph ? graph.nodes.get(rel.source) : null; + const targetNode = graph ? graph.nodes.get(rel.target) : null; + const involvesPublic = (sourceNode && sourceNode.visibility === 'public') || + (targetNode && targetNode.visibility === 'public'); + + if (changeType === 'removed' && involvesPublic) return 'breaking'; + if (involvesPublic) return 'significant'; + return 'internal'; +} + +/** + * Compute semantic diff between two graph snapshots. + */ +function semanticDiff(oldGraph, newGraph) { + const rawDiff = GraphStore.diffSnapshots(oldGraph, newGraph); + + const categorized = { + breaking: [], + significant: [], + internal: [], + cosmetic: [], + }; + + // Categorize entity changes + for (const entity of rawDiff.entities.added) { + const cat = categorizeEntityChange('added', entity, null); + categorized[cat].push({ change: 'added', entity }); + } + for (const entity of rawDiff.entities.removed) { + const cat = categorizeEntityChange('removed', entity, null); + categorized[cat].push({ change: 'removed', entity }); + } + for (const { old: oldE, new: newE } of rawDiff.entities.modified) { + const cat = categorizeEntityChange('modified', newE, oldE); + categorized[cat].push({ change: 'modified', old: oldE, new: newE }); + } + + // Categorize relationship changes + for (const rel of rawDiff.relationships.added) { + const cat = categorizeRelChange('added', rel, newGraph); + categorized[cat].push({ change: 'rel-added', rel }); + } + for (const rel of rawDiff.relationships.removed) { + const cat = categorizeRelChange('removed', rel, oldGraph); + categorized[cat].push({ change: 'rel-removed', rel }); + } + + // Impact score + const score = computeScore(categorized); + + // Stats + const filesChanged = new Set(); + for (const e of [...rawDiff.entities.added, ...rawDiff.entities.removed]) { + if (e._file) filesChanged.add(e._file); + } + for (const { old: o, new: n } of rawDiff.entities.modified) { + if (o._file) filesChanged.add(o._file); + if (n._file) filesChanged.add(n._file); + } + + const stats = { + filesChanged: filesChanged.size, + entitiesAdded: rawDiff.entities.added.length, + entitiesRemoved: rawDiff.entities.removed.length, + entitiesModified: rawDiff.entities.modified.length, + relationshipsAdded: rawDiff.relationships.added.length, + relationshipsRemoved: rawDiff.relationships.removed.length, + }; + + // Impact analysis: find callers of removed/modified entities + const impactAnalysis = computeImpactAnalysis(categorized, oldGraph, newGraph); + + return { categorized, score, severity: severityLabel(score), stats, impactAnalysis, rawDiff }; +} + +/** + * Compute impact analysis: who calls the things that changed? + */ +function computeImpactAnalysis(categorized, oldGraph, newGraph) { + const impacted = { callers: [], dependents: [] }; + + // For breaking/significant changes, find callers in the OLD graph + const changedIds = new Set(); + for (const item of [...categorized.breaking, ...categorized.significant]) { + if (item.entity) changedIds.add(item.entity.id); + if (item.old) changedIds.add(item.old.id); + if (item.new) changedIds.add(item.new.id); + } + + for (const id of changedIds) { + // Find callers in old graph (who depends on this?) + const callers = oldGraph.edges + .filter(e => e.type === 'CALLS' && e.target === id) + .map(e => e.source); + if (callers.length > 0) { + impacted.callers.push({ entityId: id, calledBy: [...new Set(callers)] }); + } + + // Find dependents (who imports the module this belongs to?) + const entity = oldGraph.nodes.get(id); + if (entity && entity._file) { + const moduleId = [...oldGraph.fileIndex.entries()] + .find(([fp]) => fp === entity._file)?.[1]; + if (moduleId) { + const deps = oldGraph.edges + .filter(e => e.type === 'IMPORTS' && [...moduleId].includes(e.target.replace('dep:', ''))) + .map(e => e.source); + if (deps.length > 0) { + impacted.dependents.push({ entityId: id, importedBy: [...new Set(deps)] }); + } + } + } + } + + return impacted; +} + +/** + * Compute impact score (0-100). + * Additive weighted score, capped at 100. + */ +function computeScore(categorized) { + const b = categorized.breaking.length; + const s = categorized.significant.length; + const i = categorized.internal.length; + const c = categorized.cosmetic.length; + + // Each change contributes its weight directly; cap at 100 + const raw = b * 40 + s * 30 + i * 20 + c * 10; + return Math.min(100, raw); +} + +/** + * File-scoped diff: only entities belonging to a specific file. + */ +function diffFiles(oldGraph, newGraph, filePath) { + // Build scoped graphs containing only entities from the target file + const scopeGraph = (graph) => { + const scoped = new GraphStore(); + const entityIds = graph.fileIndex.get(filePath); + if (!entityIds) return scoped; + + for (const id of entityIds) { + const entity = graph.nodes.get(id); + if (entity) scoped.nodes.set(id, entity); + } + for (const edge of graph.edges) { + if (entityIds.has(edge.source) || entityIds.has(edge.target)) { + scoped.edges.push(edge); + } + } + scoped.fileIndex.set(filePath, new Set(entityIds)); + return scoped; + }; + + return semanticDiff(scopeGraph(oldGraph), scopeGraph(newGraph)); +} + +/** + * Generate human-readable summary. + */ +function formatSummary(diff) { + const lines = []; + lines.push(`=== Semantic Diff Summary ===`); + lines.push(`Impact Score: ${diff.score}/100 (${diff.severity})`); + lines.push(`Files Changed: ${diff.stats.filesChanged}`); + lines.push(`Entities: +${diff.stats.entitiesAdded} -${diff.stats.entitiesRemoved} ~${diff.stats.entitiesModified}`); + lines.push(`Relationships: +${diff.stats.relationshipsAdded} -${diff.stats.relationshipsRemoved}`); + lines.push(''); + + if (diff.categorized.breaking.length > 0) { + lines.push(`⛔ BREAKING CHANGES (${diff.categorized.breaking.length}):`); + for (const item of diff.categorized.breaking) { + if (item.entity) lines.push(` ${item.change}: ${item.entity.id} (${item.entity.type})`); + if (item.rel) lines.push(` ${item.change}: ${item.rel.type} ${item.rel.source} -> ${item.rel.target}`); + } + lines.push(''); + } + + if (diff.categorized.significant.length > 0) { + lines.push(`⚠️ SIGNIFICANT CHANGES (${diff.categorized.significant.length}):`); + for (const item of diff.categorized.significant) { + if (item.entity) lines.push(` ${item.change}: ${item.entity.id} (${item.entity.type})`); + if (item.old && item.new) lines.push(` modified: ${item.new.id} (${item.new.type})`); + if (item.rel) lines.push(` ${item.change}: ${item.rel.type} ${item.rel.source} -> ${item.rel.target}`); + } + lines.push(''); + } + + if (diff.categorized.internal.length > 0) { + lines.push(`ℹ️ INTERNAL CHANGES (${diff.categorized.internal.length}):`); + for (const item of diff.categorized.internal) { + if (item.entity) lines.push(` ${item.change}: ${item.entity.id}`); + if (item.old && item.new) lines.push(` modified: ${item.new.id}`); + if (item.rel) lines.push(` ${item.change}: ${item.rel.type} ${item.rel.source} -> ${item.rel.target}`); + } + lines.push(''); + } + + if (diff.categorized.cosmetic.length > 0) { + lines.push(`💅 COSMETIC CHANGES (${diff.categorized.cosmetic.length}):`); + for (const item of diff.categorized.cosmetic) { + if (item.old && item.new) lines.push(` moved: ${item.new.id} (lines ${item.old.line_range} -> ${item.new.line_range})`); + } + lines.push(''); + } + + // Impact analysis + if (diff.impactAnalysis) { + const { callers, dependents } = diff.impactAnalysis; + if (callers.length > 0 || dependents.length > 0) { + lines.push(`🔍 IMPACT ANALYSIS:`); + for (const c of callers) { + lines.push(` ${c.entityId} is called by: ${c.calledBy.join(', ')}`); + } + for (const d of dependents) { + lines.push(` ${d.entityId} is imported by: ${d.importedBy.join(', ')}`); + } + lines.push(''); + } + } + + return lines.join('\n'); +} + +// --- CLI --- +if (require.main === module) { + const args = process.argv.slice(2); + const command = args[0]; + + if (command === 'diff') { + const oldPath = args[1]; + const newPath = args[2]; + const fileIdx = args.indexOf('--file'); + const filePath = fileIdx >= 0 ? args[fileIdx + 1] : null; + + if (!oldPath || !newPath) { + console.error('Usage: node semantic-diff.js diff [--file ]'); + process.exit(1); + } + + const oldGraph = GraphStore.loadSnapshot(oldPath); + const newGraph = GraphStore.loadSnapshot(newPath); + + const diff = filePath + ? diffFiles(oldGraph, newGraph, filePath) + : semanticDiff(oldGraph, newGraph); + + console.log(formatSummary(diff)); + console.log('--- Raw JSON ---'); + console.log(JSON.stringify({ categorized: diff.categorized, score: diff.score, severity: diff.severity, stats: diff.stats }, null, 2)); + + } else if (command === 'score') { + const oldPath = args[1]; + const newPath = args[2]; + + if (!oldPath || !newPath) { + console.error('Usage: node semantic-diff.js score '); + process.exit(1); + } + + const oldGraph = GraphStore.loadSnapshot(oldPath); + const newGraph = GraphStore.loadSnapshot(newPath); + const diff = semanticDiff(oldGraph, newGraph); + + console.log(`${diff.score} (${diff.severity})`); + + } else { + console.error('Unknown command. Available: diff, score'); + process.exit(1); + } +} + +module.exports = { semanticDiff, diffFiles, formatSummary, computeScore }; diff --git a/test/ground-truth/bash-deploy.json b/test/ground-truth/bash-deploy.json new file mode 100644 index 0000000..be2641a --- /dev/null +++ b/test/ground-truth/bash-deploy.json @@ -0,0 +1,92 @@ +{ + "file": "/tmp/deploy.sh", + "language": "bash", + "entities": [ + { + "id": "deploy.sh", + "type": "Module", + "name": "deploy.sh", + "kind": "module", + "visibility": "public", + "line_range": [ + 1, + 17 + ] + }, + { + "id": "dep:./utils.sh", + "type": "Dependency", + "name": "./utils.sh", + "kind": "import", + "visibility": "internal", + "line_range": [ + 2, + 2 + ] + }, + { + "id": "deploy.sh:build_image", + "type": "Function", + "name": "build_image", + "kind": "function", + "visibility": "public", + "line_range": [ + 4, + 8 + ] + }, + { + "id": "deploy.sh:deploy_k8s", + "type": "Function", + "name": "deploy_k8s", + "kind": "function", + "visibility": "public", + "line_range": [ + 10, + 12 + ] + } + ], + "relationships": [ + { + "type": "IMPORTS", + "source": "deploy.sh", + "target": "dep:./utils.sh" + }, + { + "type": "CONTAINS", + "source": "deploy.sh", + "target": "deploy.sh:build_image" + }, + { + "type": "CALLS", + "source": "deploy.sh:build_image", + "target": "docker" + }, + { + "type": "CONTAINS", + "source": "deploy.sh", + "target": "deploy.sh:deploy_k8s" + }, + { + "type": "CALLS", + "source": "deploy.sh:deploy_k8s", + "target": "kubectl" + }, + { + "type": "CALLS", + "source": "deploy.sh", + "target": "echo" + }, + { + "type": "CALLS", + "source": "deploy.sh", + "target": "build_image" + }, + { + "type": "CALLS", + "source": "deploy.sh", + "target": "deploy_k8s" + } + ] +} diff --git a/test/ground-truth/go-server.json b/test/ground-truth/go-server.json new file mode 100644 index 0000000..9f7f4bb --- /dev/null +++ b/test/ground-truth/go-server.json @@ -0,0 +1,98 @@ +{ + "file": "/tmp/test_go.go", + "language": "go", + "entities": [ + { + "id": "test_go.go", + "type": "Module", + "name": "test_go.go", + "kind": "module", + "visibility": "public", + "line_range": [ + 1, + 21 + ] + }, + { + "id": "dep:fmt", + "type": "Dependency", + "name": "fmt", + "kind": "import", + "visibility": "internal", + "line_range": [ + 4, + 4 + ] + }, + { + "id": "dep:net/http", + "type": "Dependency", + "name": "net/http", + "kind": "import", + "visibility": "internal", + "line_range": [ + 5, + 5 + ] + }, + { + "id": "test_go.go:Start", + "type": "Function", + "name": "Start", + "kind": "method", + "visibility": "public", + "line_range": [ + 12, + 15 + ] + }, + { + "id": "test_go.go:main", + "type": "Function", + "name": "main", + "kind": "function", + "visibility": "internal", + "line_range": [ + 17, + 20 + ] + } + ], + "relationships": [ + { + "type": "IMPORTS", + "source": "test_go.go", + "target": "dep:fmt" + }, + { + "type": "IMPORTS", + "source": "test_go.go", + "target": "dep:net/http" + }, + { + "type": "CONTAINS", + "source": "test_go.go", + "target": "test_go.go:Start" + }, + { + "type": "CALLS", + "source": "test_go.go:Start", + "target": "fmt.Println" + }, + { + "type": "CALLS", + "source": "test_go.go:Start", + "target": "http.ListenAndServe" + }, + { + "type": "CONTAINS", + "source": "test_go.go", + "target": "test_go.go:main" + }, + { + "type": "CALLS", + "source": "test_go.go:main", + "target": "s.Start" + } + ] +} diff --git a/test/ground-truth/java-service.json b/test/ground-truth/java-service.json new file mode 100644 index 0000000..80626e5 --- /dev/null +++ b/test/ground-truth/java-service.json @@ -0,0 +1,130 @@ +{ + "file": "/tmp/TestJava.java", + "language": "java", + "entities": [ + { + "id": "TestJava.java", + "type": "Module", + "name": "TestJava.java", + "kind": "module", + "visibility": "public", + "line_range": [ + 1, + 22 + ] + }, + { + "id": "dep:java.util.List", + "type": "Dependency", + "name": "java.util.List", + "kind": "import", + "visibility": "internal", + "line_range": [ + 3, + 3 + ] + }, + { + "id": "dep:org.springframework.stereotype.Service", + "type": "Dependency", + "name": "org.springframework.stereotype.Service", + "kind": "import", + "visibility": "internal", + "line_range": [ + 4, + 4 + ] + }, + { + "id": "TestJava.java:TenantService", + "type": "Class", + "name": "TenantService", + "kind": "class", + "visibility": "public", + "line_range": [ + 6, + 21 + ] + }, + { + "id": "TestJava.java:TenantService:TenantService", + "type": "Function", + "name": "TenantService", + "kind": "method", + "visibility": "public", + "line_range": [ + 10, + 12 + ] + }, + { + "id": "TestJava.java:TenantService:getTenants", + "type": "Function", + "name": "getTenants", + "kind": "method", + "visibility": "public", + "line_range": [ + 14, + 16 + ] + }, + { + "id": "TestJava.java:TenantService:audit", + "type": "Function", + "name": "audit", + "kind": "method", + "visibility": "private", + "line_range": [ + 18, + 20 + ] + } + ], + "relationships": [ + { + "type": "IMPORTS", + "source": "TestJava.java", + "target": "dep:java.util.List" + }, + { + "type": "IMPORTS", + "source": "TestJava.java", + "target": "dep:org.springframework.stereotype.Service" + }, + { + "type": "CONTAINS", + "source": "TestJava.java", + "target": "TestJava.java:TenantService" + }, + { + "type": "IMPLEMENTS", + "source": "TestJava.java:TenantService", + "target": "BaseService" + }, + { + "type": "CONTAINS", + "source": "TestJava.java:TenantService", + "target": "TestJava.java:TenantService:TenantService" + }, + { + "type": "CONTAINS", + "source": "TestJava.java:TenantService", + "target": "TestJava.java:TenantService:getTenants" + }, + { + "type": "CALLS", + "source": "TestJava.java:TenantService:getTenants", + "target": "this.db.query" + }, + { + "type": "CONTAINS", + "source": "TestJava.java:TenantService", + "target": "TestJava.java:TenantService:audit" + }, + { + "type": "CALLS", + "source": "TestJava.java:TenantService:audit", + "target": "Logger.log" + } + ] +} diff --git a/test/ground-truth/mask-api-key.json b/test/ground-truth/mask-api-key.json new file mode 100644 index 0000000..45f934f --- /dev/null +++ b/test/ground-truth/mask-api-key.json @@ -0,0 +1,45 @@ +{ + "file": "/app/src/utils/mask-api-key.ts", + "language": "typescript", + "entities": [ + { + "id": "utils/mask-api-key.ts", + "type": "Module", + "name": "utils/mask-api-key.ts", + "kind": "module", + "visibility": "public", + "line_range": [ + 1, + 14 + ] + }, + { + "id": "utils/mask-api-key.ts:maskApiKey", + "type": "Function", + "name": "maskApiKey", + "kind": "function", + "visibility": "public", + "line_range": [ + 1, + 13 + ] + } + ], + "relationships": [ + { + "type": "CONTAINS", + "source": "utils/mask-api-key.ts", + "target": "utils/mask-api-key.ts:maskApiKey" + }, + { + "type": "CALLS", + "source": "utils/mask-api-key.ts:maskApiKey", + "target": "value.trim" + }, + { + "type": "CALLS", + "source": "utils/mask-api-key.ts:maskApiKey", + "target": "trimmed.slice" + } + ] +} diff --git a/test/ground-truth/python-service.json b/test/ground-truth/python-service.json new file mode 100644 index 0000000..7ad8ff1 --- /dev/null +++ b/test/ground-truth/python-service.json @@ -0,0 +1,241 @@ +{ + "file": "/tmp/test_service.py", + "language": "python", + "entities": [ + { + "id": "test_service.py", + "type": "Module", + "name": "test_service.py", + "kind": "module", + "visibility": "public", + "line_range": [ + 1, + 34 + ] + }, + { + "id": "dep:os", + "type": "Dependency", + "name": "os", + "kind": "import", + "visibility": "internal", + "line_range": [ + 1, + 1 + ] + }, + { + "id": "dep:typing", + "type": "Dependency", + "name": "typing", + "kind": "import", + "visibility": "internal", + "line_range": [ + 2, + 2 + ] + }, + { + "id": "dep:dataclasses", + "type": "Dependency", + "name": "dataclasses", + "kind": "import", + "visibility": "internal", + "line_range": [ + 3, + 3 + ] + }, + { + "id": "dep:.config", + "type": "Dependency", + "name": ".config", + "kind": "import", + "visibility": "internal", + "line_range": [ + 4, + 4 + ] + }, + { + "id": "dep:.database", + "type": "Dependency", + "name": ".database", + "kind": "import", + "visibility": "internal", + "line_range": [ + 5, + 5 + ] + }, + { + "id": "test_service.py:TenantConfig", + "type": "Class", + "name": "TenantConfig", + "kind": "class", + "visibility": "public", + "line_range": [ + 8, + 11 + ] + }, + { + "id": "test_service.py:TenantService", + "type": "Class", + "name": "TenantService", + "kind": "class", + "visibility": "public", + "line_range": [ + 13, + 30 + ] + }, + { + "id": "test_service.py:TenantService:__init__", + "type": "Function", + "name": "__init__", + "kind": "method", + "visibility": "private", + "line_range": [ + 14, + 16 + ] + }, + { + "id": "test_service.py:TenantService:get_tenant", + "type": "Function", + "name": "get_tenant", + "kind": "method", + "visibility": "public", + "line_range": [ + 18, + 22 + ] + }, + { + "id": "test_service.py:TenantService:_enrich", + "type": "Function", + "name": "_enrich", + "kind": "method", + "visibility": "protected", + "line_range": [ + 24, + 26 + ] + }, + { + "id": "test_service.py:TenantService:create_tenant", + "type": "Function", + "name": "create_tenant", + "kind": "method", + "visibility": "public", + "line_range": [ + 28, + 30 + ] + }, + { + "id": "test_service.py:health_check", + "type": "Function", + "name": "health_check", + "kind": "function", + "visibility": "public", + "line_range": [ + 32, + 33 + ] + } + ], + "relationships": [ + { + "type": "IMPORTS", + "source": "test_service.py", + "target": "dep:os" + }, + { + "type": "IMPORTS", + "source": "test_service.py", + "target": "dep:typing" + }, + { + "type": "IMPORTS", + "source": "test_service.py", + "target": "dep:dataclasses" + }, + { + "type": "IMPORTS", + "source": "test_service.py", + "target": "dep:.config" + }, + { + "type": "IMPORTS", + "source": "test_service.py", + "target": "dep:.database" + }, + { + "type": "CONTAINS", + "source": "test_service.py", + "target": "test_service.py:TenantConfig" + }, + { + "type": "CONTAINS", + "source": "test_service.py", + "target": "test_service.py:TenantService" + }, + { + "type": "CONTAINS", + "source": "test_service.py:TenantService", + "target": "test_service.py:TenantService:__init__" + }, + { + "type": "CALLS", + "source": "test_service.py:TenantService:__init__", + "target": "load_config" + }, + { + "type": "CONTAINS", + "source": "test_service.py:TenantService", + "target": "test_service.py:TenantService:get_tenant" + }, + { + "type": "CALLS", + "source": "test_service.py:TenantService:get_tenant", + "target": "self.db.query" + }, + { + "type": "CALLS", + "source": "test_service.py:TenantService:get_tenant", + "target": "self._enrich" + }, + { + "type": "CONTAINS", + "source": "test_service.py:TenantService", + "target": "test_service.py:TenantService:_enrich" + }, + { + "type": "CALLS", + "source": "test_service.py:TenantService:_enrich", + "target": "self.config.get" + }, + { + "type": "CONTAINS", + "source": "test_service.py:TenantService", + "target": "test_service.py:TenantService:create_tenant" + }, + { + "type": "CALLS", + "source": "test_service.py:TenantService:create_tenant", + "target": "TenantConfig" + }, + { + "type": "CALLS", + "source": "test_service.py:TenantService:create_tenant", + "target": "self.db.insert" + }, + { + "type": "CONTAINS", + "source": "test_service.py", + "target": "test_service.py:health_check" + } + ] +} diff --git a/test/ground-truth/route.json b/test/ground-truth/route.json new file mode 100644 index 0000000..2d0ff82 --- /dev/null +++ b/test/ground-truth/route.json @@ -0,0 +1,234 @@ +{ + "file": "/app/src/cli/route.ts", + "language": "typescript", + "entities": [ + { + "id": "cli/route.ts", + "type": "Module", + "name": "cli/route.ts", + "kind": "module", + "visibility": "public", + "line_range": [ + 1, + 48 + ] + }, + { + "id": "dep:infra/env.js", + "type": "Dependency", + "name": "infra/env.js", + "kind": "import", + "visibility": "internal", + "line_range": [ + 1, + 1 + ] + }, + { + "id": "dep:runtime.js", + "type": "Dependency", + "name": "runtime.js", + "kind": "import", + "visibility": "internal", + "line_range": [ + 2, + 2 + ] + }, + { + "id": "dep:version.js", + "type": "Dependency", + "name": "version.js", + "kind": "import", + "visibility": "internal", + "line_range": [ + 3, + 3 + ] + }, + { + "id": "dep:cli/argv.js", + "type": "Dependency", + "name": "cli/argv.js", + "kind": "import", + "visibility": "internal", + "line_range": [ + 4, + 4 + ] + }, + { + "id": "dep:cli/banner.js", + "type": "Dependency", + "name": "cli/banner.js", + "kind": "import", + "visibility": "internal", + "line_range": [ + 5, + 5 + ] + }, + { + "id": "dep:cli/plugin-registry.js", + "type": "Dependency", + "name": "cli/plugin-registry.js", + "kind": "import", + "visibility": "internal", + "line_range": [ + 6, + 6 + ] + }, + { + "id": "dep:cli/program/config-guard.js", + "type": "Dependency", + "name": "cli/program/config-guard.js", + "kind": "import", + "visibility": "internal", + "line_range": [ + 7, + 7 + ] + }, + { + "id": "dep:cli/program/routes.js", + "type": "Dependency", + "name": "cli/program/routes.js", + "kind": "import", + "visibility": "internal", + "line_range": [ + 8, + 8 + ] + }, + { + "id": "cli/route.ts:prepareRoutedCommand", + "type": "Function", + "name": "prepareRoutedCommand", + "kind": "function", + "visibility": "internal", + "line_range": [ + 10, + 27 + ] + }, + { + "id": "cli/route.ts:tryRouteCli", + "type": "Function", + "name": "tryRouteCli", + "kind": "function", + "visibility": "public", + "line_range": [ + 29, + 47 + ] + } + ], + "relationships": [ + { + "type": "IMPORTS", + "source": "cli/route.ts", + "target": "dep:infra/env.js" + }, + { + "type": "IMPORTS", + "source": "cli/route.ts", + "target": "dep:runtime.js" + }, + { + "type": "IMPORTS", + "source": "cli/route.ts", + "target": "dep:version.js" + }, + { + "type": "IMPORTS", + "source": "cli/route.ts", + "target": "dep:cli/argv.js" + }, + { + "type": "IMPORTS", + "source": "cli/route.ts", + "target": "dep:cli/banner.js" + }, + { + "type": "IMPORTS", + "source": "cli/route.ts", + "target": "dep:cli/plugin-registry.js" + }, + { + "type": "IMPORTS", + "source": "cli/route.ts", + "target": "dep:cli/program/config-guard.js" + }, + { + "type": "IMPORTS", + "source": "cli/route.ts", + "target": "dep:cli/program/routes.js" + }, + { + "type": "CONTAINS", + "source": "cli/route.ts", + "target": "cli/route.ts:prepareRoutedCommand" + }, + { + "type": "CALLS", + "source": "cli/route.ts:prepareRoutedCommand", + "target": "hasFlag" + }, + { + "type": "CALLS", + "source": "cli/route.ts:prepareRoutedCommand", + "target": "emitCliBanner" + }, + { + "type": "CALLS", + "source": "cli/route.ts:prepareRoutedCommand", + "target": "ensureConfigReady" + }, + { + "type": "CALLS", + "source": "cli/route.ts:prepareRoutedCommand", + "target": "params.loadPlugins" + }, + { + "type": "CALLS", + "source": "cli/route.ts:prepareRoutedCommand", + "target": "ensurePluginRegistryLoaded" + }, + { + "type": "CONTAINS", + "source": "cli/route.ts", + "target": "cli/route.ts:tryRouteCli" + }, + { + "type": "CALLS", + "source": "cli/route.ts:tryRouteCli", + "target": "isTruthyEnvValue" + }, + { + "type": "CALLS", + "source": "cli/route.ts:tryRouteCli", + "target": "hasHelpOrVersion" + }, + { + "type": "CALLS", + "source": "cli/route.ts:tryRouteCli", + "target": "getCommandPathWithRootOptions" + }, + { + "type": "CALLS", + "source": "cli/route.ts:tryRouteCli", + "target": "findRoutedCommand" + }, + { + "type": "CALLS", + "source": "cli/route.ts:tryRouteCli", + "target": "prepareRoutedCommand" + }, + { + "type": "CALLS", + "source": "cli/route.ts:tryRouteCli", + "target": "route.run" + } + ] +} diff --git a/test/ground-truth/session.json b/test/ground-truth/session.json new file mode 100644 index 0000000..0f4e5e9 --- /dev/null +++ b/test/ground-truth/session.json @@ -0,0 +1,579 @@ +{ + "file": "/app/src/wizard/session.ts", + "language": "typescript", + "entities": [ + { + "id": "wizard/session.ts", + "type": "Module", + "name": "wizard/session.ts", + "kind": "module", + "visibility": "public", + "line_range": [ + 1, + 265 + ] + }, + { + "id": "dep:node:crypto", + "type": "Dependency", + "name": "node:crypto", + "kind": "import", + "visibility": "internal", + "line_range": [ + 1, + 1 + ] + }, + { + "id": "dep:wizard/prompts.js", + "type": "Dependency", + "name": "wizard/prompts.js", + "kind": "import", + "visibility": "internal", + "line_range": [ + 2, + 2 + ] + }, + { + "id": "wizard/session.ts:createDeferred", + "type": "Function", + "name": "createDeferred", + "kind": "function", + "visibility": "internal", + "line_range": [ + 37, + 45 + ] + }, + { + "id": "wizard/session.ts:WizardSessionPrompter", + "type": "Class", + "name": "WizardSessionPrompter", + "kind": "class", + "visibility": "internal", + "line_range": [ + 47, + 161 + ] + }, + { + "id": "wizard/session.ts:WizardSessionPrompter:constructor", + "type": "Function", + "name": "constructor", + "kind": "method", + "visibility": "public", + "line_range": [ + 48, + 48 + ] + }, + { + "id": "wizard/session.ts:WizardSessionPrompter:intro", + "type": "Function", + "name": "intro", + "kind": "method", + "visibility": "public", + "line_range": [ + 50, + 57 + ] + }, + { + "id": "wizard/session.ts:WizardSessionPrompter:outro", + "type": "Function", + "name": "outro", + "kind": "method", + "visibility": "public", + "line_range": [ + 59, + 66 + ] + }, + { + "id": "wizard/session.ts:WizardSessionPrompter:note", + "type": "Function", + "name": "note", + "kind": "method", + "visibility": "public", + "line_range": [ + 68, + 70 + ] + }, + { + "id": "wizard/session.ts:WizardSessionPrompter:select", + "type": "Function", + "name": "select", + "kind": "method", + "visibility": "public", + "line_range": [ + 72, + 89 + ] + }, + { + "id": "wizard/session.ts:WizardSessionPrompter:multiselect", + "type": "Function", + "name": "multiselect", + "kind": "method", + "visibility": "public", + "line_range": [ + 91, + 108 + ] + }, + { + "id": "wizard/session.ts:WizardSessionPrompter:text", + "type": "Function", + "name": "text", + "kind": "method", + "visibility": "public", + "line_range": [ + 110, + 136 + ] + }, + { + "id": "wizard/session.ts:WizardSessionPrompter:confirm", + "type": "Function", + "name": "confirm", + "kind": "method", + "visibility": "public", + "line_range": [ + 138, + 146 + ] + }, + { + "id": "wizard/session.ts:WizardSessionPrompter:progress", + "type": "Function", + "name": "progress", + "kind": "method", + "visibility": "public", + "line_range": [ + 148, + 153 + ] + }, + { + "id": "wizard/session.ts:WizardSessionPrompter:prompt", + "type": "Function", + "name": "prompt", + "kind": "method", + "visibility": "private", + "line_range": [ + 155, + 160 + ] + }, + { + "id": "wizard/session.ts:WizardSession", + "type": "Class", + "name": "WizardSession", + "kind": "class", + "visibility": "public", + "line_range": [ + 163, + 264 + ] + }, + { + "id": "wizard/session.ts:WizardSession:constructor", + "type": "Function", + "name": "constructor", + "kind": "method", + "visibility": "public", + "line_range": [ + 170, + 173 + ] + }, + { + "id": "wizard/session.ts:WizardSession:next", + "type": "Function", + "name": "next", + "kind": "method", + "visibility": "public", + "line_range": [ + 175, + 190 + ] + }, + { + "id": "wizard/session.ts:WizardSession:answer", + "type": "Function", + "name": "answer", + "kind": "method", + "visibility": "public", + "line_range": [ + 192, + 200 + ] + }, + { + "id": "wizard/session.ts:WizardSession:cancel", + "type": "Function", + "name": "cancel", + "kind": "method", + "visibility": "public", + "line_range": [ + 202, + 214 + ] + }, + { + "id": "wizard/session.ts:WizardSession:pushStep", + "type": "Function", + "name": "pushStep", + "kind": "method", + "visibility": "public", + "line_range": [ + 216, + 219 + ] + }, + { + "id": "wizard/session.ts:WizardSession:run", + "type": "Function", + "name": "run", + "kind": "method", + "visibility": "private", + "line_range": [ + 221, + 236 + ] + }, + { + "id": "wizard/session.ts:WizardSession:awaitAnswer", + "type": "Function", + "name": "awaitAnswer", + "kind": "method", + "visibility": "public", + "line_range": [ + 238, + 246 + ] + }, + { + "id": "wizard/session.ts:WizardSession:resolveStep", + "type": "Function", + "name": "resolveStep", + "kind": "method", + "visibility": "private", + "line_range": [ + 248, + 255 + ] + }, + { + "id": "wizard/session.ts:WizardSession:getStatus", + "type": "Function", + "name": "getStatus", + "kind": "method", + "visibility": "public", + "line_range": [ + 257, + 259 + ] + }, + { + "id": "wizard/session.ts:WizardSession:getError", + "type": "Function", + "name": "getError", + "kind": "method", + "visibility": "public", + "line_range": [ + 261, + 263 + ] + } + ], + "relationships": [ + { + "type": "IMPORTS", + "source": "wizard/session.ts", + "target": "dep:node:crypto" + }, + { + "type": "IMPORTS", + "source": "wizard/session.ts", + "target": "dep:wizard/prompts.js" + }, + { + "type": "CONTAINS", + "source": "wizard/session.ts", + "target": "wizard/session.ts:createDeferred" + }, + { + "type": "CONTAINS", + "source": "wizard/session.ts", + "target": "wizard/session.ts:WizardSessionPrompter" + }, + { + "type": "IMPLEMENTS", + "source": "wizard/session.ts:WizardSessionPrompter", + "target": "WizardPrompter" + }, + { + "type": "CONTAINS", + "source": "wizard/session.ts:WizardSessionPrompter", + "target": "wizard/session.ts:WizardSessionPrompter:constructor" + }, + { + "type": "CONTAINS", + "source": "wizard/session.ts:WizardSessionPrompter", + "target": "wizard/session.ts:WizardSessionPrompter:intro" + }, + { + "type": "CALLS", + "source": "wizard/session.ts:WizardSessionPrompter:intro", + "target": "this.prompt" + }, + { + "type": "CONTAINS", + "source": "wizard/session.ts:WizardSessionPrompter", + "target": "wizard/session.ts:WizardSessionPrompter:outro" + }, + { + "type": "CALLS", + "source": "wizard/session.ts:WizardSessionPrompter:outro", + "target": "this.prompt" + }, + { + "type": "CONTAINS", + "source": "wizard/session.ts:WizardSessionPrompter", + "target": "wizard/session.ts:WizardSessionPrompter:note" + }, + { + "type": "CALLS", + "source": "wizard/session.ts:WizardSessionPrompter:note", + "target": "this.prompt" + }, + { + "type": "CONTAINS", + "source": "wizard/session.ts:WizardSessionPrompter", + "target": "wizard/session.ts:WizardSessionPrompter:select" + }, + { + "type": "CALLS", + "source": "wizard/session.ts:WizardSessionPrompter:select", + "target": "this.prompt" + }, + { + "type": "CALLS", + "source": "wizard/session.ts:WizardSessionPrompter:select", + "target": "params.options.map" + }, + { + "type": "CONTAINS", + "source": "wizard/session.ts:WizardSessionPrompter", + "target": "wizard/session.ts:WizardSessionPrompter:multiselect" + }, + { + "type": "CALLS", + "source": "wizard/session.ts:WizardSessionPrompter:multiselect", + "target": "this.prompt" + }, + { + "type": "CALLS", + "source": "wizard/session.ts:WizardSessionPrompter:multiselect", + "target": "params.options.map" + }, + { + "type": "CALLS", + "source": "wizard/session.ts:WizardSessionPrompter:multiselect", + "target": "Array.isArray" + }, + { + "type": "CONTAINS", + "source": "wizard/session.ts:WizardSessionPrompter", + "target": "wizard/session.ts:WizardSessionPrompter:text" + }, + { + "type": "CALLS", + "source": "wizard/session.ts:WizardSessionPrompter:text", + "target": "this.prompt" + }, + { + "type": "CALLS", + "source": "wizard/session.ts:WizardSessionPrompter:text", + "target": "String" + }, + { + "type": "CALLS", + "source": "wizard/session.ts:WizardSessionPrompter:text", + "target": "params.validate" + }, + { + "type": "CONTAINS", + "source": "wizard/session.ts:WizardSessionPrompter", + "target": "wizard/session.ts:WizardSessionPrompter:confirm" + }, + { + "type": "CALLS", + "source": "wizard/session.ts:WizardSessionPrompter:confirm", + "target": "this.prompt" + }, + { + "type": "CALLS", + "source": "wizard/session.ts:WizardSessionPrompter:confirm", + "target": "Boolean" + }, + { + "type": "CONTAINS", + "source": "wizard/session.ts:WizardSessionPrompter", + "target": "wizard/session.ts:WizardSessionPrompter:progress" + }, + { + "type": "CONTAINS", + "source": "wizard/session.ts:WizardSessionPrompter", + "target": "wizard/session.ts:WizardSessionPrompter:prompt" + }, + { + "type": "CALLS", + "source": "wizard/session.ts:WizardSessionPrompter:prompt", + "target": "this.session.awaitAnswer" + }, + { + "type": "CALLS", + "source": "wizard/session.ts:WizardSessionPrompter:prompt", + "target": "randomUUID" + }, + { + "type": "CONTAINS", + "source": "wizard/session.ts", + "target": "wizard/session.ts:WizardSession" + }, + { + "type": "CONTAINS", + "source": "wizard/session.ts:WizardSession", + "target": "wizard/session.ts:WizardSession:constructor" + }, + { + "type": "CALLS", + "source": "wizard/session.ts:WizardSession:constructor", + "target": "this.run" + }, + { + "type": "CONTAINS", + "source": "wizard/session.ts:WizardSession", + "target": "wizard/session.ts:WizardSession:next" + }, + { + "type": "CALLS", + "source": "wizard/session.ts:WizardSession:next", + "target": "createDeferred" + }, + { + "type": "CONTAINS", + "source": "wizard/session.ts:WizardSession", + "target": "wizard/session.ts:WizardSession:answer" + }, + { + "type": "CALLS", + "source": "wizard/session.ts:WizardSession:answer", + "target": "this.answerDeferred.get" + }, + { + "type": "CALLS", + "source": "wizard/session.ts:WizardSession:answer", + "target": "this.answerDeferred.delete" + }, + { + "type": "CALLS", + "source": "wizard/session.ts:WizardSession:answer", + "target": "deferred.resolve" + }, + { + "type": "CONTAINS", + "source": "wizard/session.ts:WizardSession", + "target": "wizard/session.ts:WizardSession:cancel" + }, + { + "type": "CALLS", + "source": "wizard/session.ts:WizardSession:cancel", + "target": "deferred.reject" + }, + { + "type": "CALLS", + "source": "wizard/session.ts:WizardSession:cancel", + "target": "this.answerDeferred.clear" + }, + { + "type": "CALLS", + "source": "wizard/session.ts:WizardSession:cancel", + "target": "this.resolveStep" + }, + { + "type": "CONTAINS", + "source": "wizard/session.ts:WizardSession", + "target": "wizard/session.ts:WizardSession:pushStep" + }, + { + "type": "CALLS", + "source": "wizard/session.ts:WizardSession:pushStep", + "target": "this.resolveStep" + }, + { + "type": "CONTAINS", + "source": "wizard/session.ts:WizardSession", + "target": "wizard/session.ts:WizardSession:run" + }, + { + "type": "CALLS", + "source": "wizard/session.ts:WizardSession:run", + "target": "this.runner" + }, + { + "type": "CALLS", + "source": "wizard/session.ts:WizardSession:run", + "target": "String" + }, + { + "type": "CALLS", + "source": "wizard/session.ts:WizardSession:run", + "target": "this.resolveStep" + }, + { + "type": "CONTAINS", + "source": "wizard/session.ts:WizardSession", + "target": "wizard/session.ts:WizardSession:awaitAnswer" + }, + { + "type": "CALLS", + "source": "wizard/session.ts:WizardSession:awaitAnswer", + "target": "this.pushStep" + }, + { + "type": "CALLS", + "source": "wizard/session.ts:WizardSession:awaitAnswer", + "target": "createDeferred" + }, + { + "type": "CALLS", + "source": "wizard/session.ts:WizardSession:awaitAnswer", + "target": "this.answerDeferred.set" + }, + { + "type": "CONTAINS", + "source": "wizard/session.ts:WizardSession", + "target": "wizard/session.ts:WizardSession:resolveStep" + }, + { + "type": "CALLS", + "source": "wizard/session.ts:WizardSession:resolveStep", + "target": "deferred.resolve" + }, + { + "type": "CONTAINS", + "source": "wizard/session.ts:WizardSession", + "target": "wizard/session.ts:WizardSession:getStatus" + }, + { + "type": "CONTAINS", + "source": "wizard/session.ts:WizardSession", + "target": "wizard/session.ts:WizardSession:getError" + } + ] +} diff --git a/test/ground-truth/terraform-main.json b/test/ground-truth/terraform-main.json new file mode 100644 index 0000000..f25a4b3 --- /dev/null +++ b/test/ground-truth/terraform-main.json @@ -0,0 +1,83 @@ +{ + "file": "/tmp/main.tf", + "language": "hcl", + "entities": [ + { + "id": "main.tf", + "type": "Config", + "name": "main.tf", + "kind": "terraform", + "visibility": "public", + "line_range": [ + 1, + 18 + ] + }, + { + "id": "main.tf:provider.aws", + "type": "Config", + "name": "provider.aws", + "kind": "hcl-block", + "visibility": "public", + "line_range": [ + 1, + 1 + ] + }, + { + "id": "main.tf:resource.aws_s3_bucket.b", + "type": "Config", + "name": "resource.aws_s3_bucket.b", + "kind": "hcl-block", + "visibility": "public", + "line_range": [ + 5, + 5 + ] + }, + { + "id": "main.tf:module.vpc", + "type": "Config", + "name": "module.vpc", + "kind": "hcl-block", + "visibility": "public", + "line_range": [ + 9, + 9 + ] + }, + { + "id": "main.tf:data.aws_iam_policy_document.assume_role", + "type": "Config", + "name": "data.aws_iam_policy_document.assume_role", + "kind": "hcl-block", + "visibility": "public", + "line_range": [ + 13, + 13 + ] + } + ], + "relationships": [ + { + "type": "CONTAINS", + "source": "main.tf", + "target": "main.tf:provider.aws" + }, + { + "type": "CONTAINS", + "source": "main.tf", + "target": "main.tf:resource.aws_s3_bucket.b" + }, + { + "type": "CONTAINS", + "source": "main.tf", + "target": "main.tf:module.vpc" + }, + { + "type": "CONTAINS", + "source": "main.tf", + "target": "main.tf:data.aws_iam_policy_document.assume_role" + } + ] +} diff --git a/test/ground-truth/yaml-deployment.json b/test/ground-truth/yaml-deployment.json new file mode 100644 index 0000000..91e51c0 --- /dev/null +++ b/test/ground-truth/yaml-deployment.json @@ -0,0 +1,83 @@ +{ + "file": "/tmp/test_deployment.yaml", + "language": "yaml", + "entities": [ + { + "id": "test_deployment.yaml", + "type": "Config", + "name": "test_deployment.yaml", + "kind": "yaml-config", + "visibility": "public", + "line_range": [ + 1, + 12 + ] + }, + { + "id": "test_deployment.yaml:apiVersion", + "type": "Config", + "name": "apiVersion", + "kind": "yaml-key", + "visibility": "public", + "line_range": [ + 1, + 1 + ] + }, + { + "id": "test_deployment.yaml:kind", + "type": "Config", + "name": "kind", + "kind": "yaml-key", + "visibility": "public", + "line_range": [ + 1, + 1 + ] + }, + { + "id": "test_deployment.yaml:metadata", + "type": "Config", + "name": "metadata", + "kind": "yaml-key", + "visibility": "public", + "line_range": [ + 1, + 1 + ] + }, + { + "id": "test_deployment.yaml:spec", + "type": "Config", + "name": "spec", + "kind": "yaml-key", + "visibility": "public", + "line_range": [ + 1, + 1 + ] + } + ], + "relationships": [ + { + "type": "CONTAINS", + "source": "test_deployment.yaml", + "target": "test_deployment.yaml:apiVersion" + }, + { + "type": "CONTAINS", + "source": "test_deployment.yaml", + "target": "test_deployment.yaml:kind" + }, + { + "type": "CONTAINS", + "source": "test_deployment.yaml", + "target": "test_deployment.yaml:metadata" + }, + { + "type": "CONTAINS", + "source": "test_deployment.yaml", + "target": "test_deployment.yaml:spec" + } + ] +} diff --git a/test/run-all.sh b/test/run-all.sh new file mode 100755 index 0000000..f0d531f --- /dev/null +++ b/test/run-all.sh @@ -0,0 +1,30 @@ +#!/bin/bash +cd "$(dirname "$0")/.." + +PASS=0 +FAIL=0 +TOTAL=0 + +echo "=== Dev Intel v2 — Ground Truth Benchmark Suite ===" +echo "" + +for gt in test/ground-truth/*.json; do + name=$(basename "$gt" .json) + TOTAL=$((TOTAL + 1)) + result=$(node validate-ground-truth.js "$gt" 2>&1) + if echo "$result" | grep -q "^PASS"; then + echo "✅ PASS $name" + PASS=$((PASS + 1)) + else + echo "❌ FAIL $name" + echo "$result" | grep -E "^(Entities|Relationships|Missing)" | sed 's/^/ /' + FAIL=$((FAIL + 1)) + fi +done + +echo "" +echo "=== Results: $PASS/$TOTAL passed, $FAIL failed ===" + +if [ $FAIL -gt 0 ]; then + exit 1 +fi diff --git a/test/test-graph.js b/test/test-graph.js new file mode 100644 index 0000000..3c30d5d --- /dev/null +++ b/test/test-graph.js @@ -0,0 +1,323 @@ +const assert = require('assert'); +const fs = require('fs'); +const path = require('path'); +const { execSync } = require('child_process'); +const GraphStore = require('../graph'); + +let passed = 0; +let total = 0; + +function runTest(name, fn) { + total++; + try { + fn(); + console.log(`✅ PASS ${name}`); + passed++; + } catch (err) { + console.log(`❌ FAIL ${name}: ${err.message}`); + // console.error(err.stack); // uncomment if debugging is needed + } +} + +// Helper to create mock extract results +const mockFile1 = { + file: 'file1.ts', + language: 'typescript', + entities: [ + { id: 'file1.ts', type: 'Module', name: 'file1.ts', visibility: 'public' }, + { id: 'func1', type: 'Function', name: 'func1', visibility: 'public' } + ], + relationships: [ + { type: 'CONTAINS', source: 'file1.ts', target: 'func1' }, + { type: 'CALLS', source: 'func1', target: 'func2' } + ] +}; + +const mockFile2 = { + file: 'file2.ts', + language: 'typescript', + entities: [ + { id: 'file2.ts', type: 'Module', name: 'file2.ts', visibility: 'public' }, + { id: 'func2', type: 'Function', name: 'func2', visibility: 'private' } + ], + relationships: [ + { type: 'CONTAINS', source: 'file2.ts', target: 'func2' }, + { type: 'IMPORTS', source: 'file2.ts', target: 'file1.ts' }, + { type: 'CALLS', source: 'func2', target: 'func1' }, + // Duplicate edge from file1 to test deduplication + { type: 'CALLS', source: 'func1', target: 'func2' } + ] +}; + +// --- 1. buildGraph() --- + +runTest('buildGraph: Empty input (no results) -> empty graph', () => { + const graph = GraphStore.buildGraph([]); + assert.strictEqual(graph.nodes.size, 0); + assert.strictEqual(graph.edges.length, 0); + assert.strictEqual(graph.fileIndex.size, 0); +}); + +runTest('buildGraph: Single file extraction -> correct node count, edge count', () => { + const graph = GraphStore.buildGraph([mockFile1]); + assert.strictEqual(graph.nodes.size, 2); + assert.strictEqual(graph.edges.length, 2); +}); + +runTest('buildGraph: Multiple file extractions -> merges correctly, no duplicate nodes', () => { + // Pass mockFile1 twice to test node deduplication by ID Map + const graph = GraphStore.buildGraph([mockFile1, mockFile2, mockFile1]); + assert.strictEqual(graph.nodes.size, 4); // file1.ts, func1, file2.ts, func2 +}); + +runTest('buildGraph: Duplicate edges from multiple files are deduplicated', () => { + const graph = GraphStore.buildGraph([mockFile1, mockFile2]); + // Expected edges: + // file1 CONTAINS func1 + // func1 CALLS func2 + // file2 CONTAINS func2 + // file2 IMPORTS file1 + // func2 CALLS func1 + // The second func1 CALLS func2 in mockFile2 should be ignored + assert.strictEqual(graph.edges.length, 5); + const callsFunc2 = graph.edges.filter(e => e.source === 'func1' && e.target === 'func2' && e.type === 'CALLS'); + assert.strictEqual(callsFunc2.length, 1); +}); + +runTest('buildGraph: fileIndex is correctly populated', () => { + const graph = GraphStore.buildGraph([mockFile1, mockFile2]); + assert.strictEqual(graph.fileIndex.size, 2); + assert.ok(graph.fileIndex.get('file1.ts').has('func1')); + assert.ok(graph.fileIndex.get('file2.ts').has('func2')); +}); + +// --- 2. saveSnapshot() / loadSnapshot() --- + +const SNAPSHOT_PATH = path.join(__dirname, 'test-snapshot.json'); + +runTest('saveSnapshot/loadSnapshot: Round-trip -> verify nodes, edges, fileIndex match exactly', () => { + const graph = GraphStore.buildGraph([mockFile1, mockFile2]); + GraphStore.saveSnapshot(graph, SNAPSHOT_PATH); + + const loaded = GraphStore.loadSnapshot(SNAPSHOT_PATH); + assert.strictEqual(loaded.nodes.size, graph.nodes.size); + assert.strictEqual(loaded.edges.length, graph.edges.length); + assert.strictEqual(loaded.fileIndex.size, graph.fileIndex.size); + + assert.deepStrictEqual(loaded.nodes.get('func1'), graph.nodes.get('func1')); + assert.deepStrictEqual(loaded.edges, graph.edges); + assert.deepStrictEqual(Array.from(loaded.fileIndex.get('file1.ts')), Array.from(graph.fileIndex.get('file1.ts'))); + + if (fs.existsSync(SNAPSHOT_PATH)) fs.unlinkSync(SNAPSHOT_PATH); +}); + +runTest('saveSnapshot/loadSnapshot: Save creates valid JSON file', () => { + const graph = GraphStore.buildGraph([mockFile1]); + GraphStore.saveSnapshot(graph, SNAPSHOT_PATH); + + const content = fs.readFileSync(SNAPSHOT_PATH, 'utf8'); + assert.doesNotThrow(() => JSON.parse(content)); + + if (fs.existsSync(SNAPSHOT_PATH)) fs.unlinkSync(SNAPSHOT_PATH); +}); + +runTest('saveSnapshot/loadSnapshot: Load from non-existent file throws', () => { + assert.throws(() => { + GraphStore.loadSnapshot('does-not-exist.json'); + }); +}); + +// --- 3. query() --- + +runTest('query: Query existing entity -> returns entity + correct incoming/outgoing edges', () => { + const graph = GraphStore.buildGraph([mockFile1, mockFile2]); + const result = GraphStore.query(graph, 'func1'); + + assert.ok(result); + assert.strictEqual(result.entity.id, 'func1'); + + // Incoming: file1 CONTAINS func1, func2 CALLS func1 + assert.strictEqual(result.incoming.length, 2); + + // Outgoing: func1 CALLS func2 + assert.strictEqual(result.outgoing.length, 1); +}); + +runTest('query: Query non-existent entity -> returns null', () => { + const graph = GraphStore.buildGraph([mockFile1]); + const result = GraphStore.query(graph, 'non-existent'); + assert.strictEqual(result, null); +}); + +runTest('query: Entity with no edges -> returns entity with empty incoming/outgoing arrays', () => { + const isolatedEntity = { + file: 'iso.ts', + entities: [{ id: 'iso', type: 'Module' }], + relationships: [] + }; + const graph = GraphStore.buildGraph([isolatedEntity]); + const result = GraphStore.query(graph, 'iso'); + + assert.ok(result); + assert.strictEqual(result.incoming.length, 0); + assert.strictEqual(result.outgoing.length, 0); +}); + +// --- 4. findCallers() --- + +runTest('findCallers: Function with multiple callers -> returns all', () => { + const multiCall = { + file: 'multi.ts', + entities: [{ id: 'target' }, { id: 'c1' }, { id: 'c2' }], + relationships: [ + { type: 'CALLS', source: 'c1', target: 'target' }, + { type: 'CALLS', source: 'c2', target: 'target' } + ] + }; + const graph = GraphStore.buildGraph([multiCall]); + const callers = GraphStore.findCallers(graph, 'target'); + assert.strictEqual(callers.length, 2); + const ids = callers.map(c => c.id); + assert.ok(ids.includes('c1') && ids.includes('c2')); +}); + +runTest('findCallers: Function with no callers -> returns empty array', () => { + const graph = GraphStore.buildGraph([mockFile1]); + const callers = GraphStore.findCallers(graph, 'func1'); + assert.strictEqual(callers.length, 0); +}); + +runTest('findCallers: Only returns CALLS edges, not CONTAINS or IMPORTS', () => { + const mixedEdges = { + file: 'mixed.ts', + entities: [{ id: 'target' }, { id: 'c1' }, { id: 'c2' }], + relationships: [ + { type: 'CALLS', source: 'c1', target: 'target' }, + { type: 'CONTAINS', source: 'c2', target: 'target' }, + { type: 'IMPORTS', source: 'c2', target: 'target' } + ] + }; + const graph = GraphStore.buildGraph([mixedEdges]); + const callers = GraphStore.findCallers(graph, 'target'); + assert.strictEqual(callers.length, 1); + assert.strictEqual(callers[0].id, 'c1'); +}); + +// --- 5. findDependents() --- + +runTest('findDependents: Module with dependents -> returns all', () => { + const graph = GraphStore.buildGraph([mockFile1, mockFile2]); + const dependents = GraphStore.findDependents(graph, 'file1.ts'); + assert.strictEqual(dependents.length, 1); + assert.strictEqual(dependents[0].id, 'file2.ts'); +}); + +runTest('findDependents: Module with no dependents -> returns empty array', () => { + const graph = GraphStore.buildGraph([mockFile1, mockFile2]); + const dependents = GraphStore.findDependents(graph, 'file2.ts'); + assert.strictEqual(dependents.length, 0); +}); + +// --- 6. getExports() --- + +runTest('getExports: File with mix of public/private entities -> returns only public', () => { + const graph = GraphStore.buildGraph([mockFile1, mockFile2]); + const exportsFile1 = GraphStore.getExports(graph, 'file1.ts'); + assert.strictEqual(exportsFile1.length, 2); // Both file1.ts and func1 are public + + const exportsFile2 = GraphStore.getExports(graph, 'file2.ts'); + assert.strictEqual(exportsFile2.length, 1); // Only file2.ts is public, func2 is private + assert.strictEqual(exportsFile2[0].id, 'file2.ts'); +}); + +runTest('getExports: Non-existent file -> returns empty array', () => { + const graph = GraphStore.buildGraph([mockFile1]); + const exports = GraphStore.getExports(graph, 'missing.ts'); + assert.strictEqual(exports.length, 0); +}); + +// --- 7. diffSnapshots() --- + +runTest('diffSnapshots: Identical graphs -> empty diff', () => { + const graph1 = GraphStore.buildGraph([mockFile1]); + const graph2 = GraphStore.buildGraph([mockFile1]); + const diff = GraphStore.diffSnapshots(graph1, graph2); + + assert.strictEqual(diff.entities.added.length, 0); + assert.strictEqual(diff.entities.removed.length, 0); + assert.strictEqual(diff.entities.modified.length, 0); + assert.strictEqual(diff.relationships.added.length, 0); + assert.strictEqual(diff.relationships.removed.length, 0); +}); + +runTest('diffSnapshots: Added entities -> appear in diff.entities.added', () => { + const graph1 = GraphStore.buildGraph([mockFile1]); + const graph2 = GraphStore.buildGraph([mockFile1, mockFile2]); + const diff = GraphStore.diffSnapshots(graph1, graph2); + + assert.strictEqual(diff.entities.added.length, 2); // file2.ts, func2 + const ids = diff.entities.added.map(e => e.id); + assert.ok(ids.includes('file2.ts') && ids.includes('func2')); +}); + +runTest('diffSnapshots: Removed entities -> appear in diff.entities.removed', () => { + const graph1 = GraphStore.buildGraph([mockFile1, mockFile2]); + const graph2 = GraphStore.buildGraph([mockFile1]); + const diff = GraphStore.diffSnapshots(graph1, graph2); + + assert.strictEqual(diff.entities.removed.length, 2); + const ids = diff.entities.removed.map(e => e.id); + assert.ok(ids.includes('file2.ts') && ids.includes('func2')); +}); + +runTest('diffSnapshots: Modified entities (e.g. changed line_range) -> appear in diff.entities.modified', () => { + const mockFile1Mod = JSON.parse(JSON.stringify(mockFile1)); + mockFile1Mod.entities[1].line_range = [10, 20]; + + const graph1 = GraphStore.buildGraph([mockFile1]); + const graph2 = GraphStore.buildGraph([mockFile1Mod]); + const diff = GraphStore.diffSnapshots(graph1, graph2); + + assert.strictEqual(diff.entities.modified.length, 1); + assert.strictEqual(diff.entities.modified[0].old.id, 'func1'); +}); + +runTest('diffSnapshots: Added relationships -> appear in diff.relationships.added', () => { + const graph1 = GraphStore.buildGraph([mockFile1]); + const graph2 = GraphStore.buildGraph([mockFile1, mockFile2]); + const diff = GraphStore.diffSnapshots(graph1, graph2); + + // file2 relationships will be added (3 of them: CONTAINS, IMPORTS, CALLS) + assert.strictEqual(diff.relationships.added.length, 3); +}); + +runTest('diffSnapshots: Removed relationships -> appear in diff.relationships.removed', () => { + const graph1 = GraphStore.buildGraph([mockFile1, mockFile2]); + const graph2 = GraphStore.buildGraph([mockFile1]); + const diff = GraphStore.diffSnapshots(graph1, graph2); + + assert.strictEqual(diff.relationships.removed.length, 3); +}); + +// --- 8. Integration test --- + +runTest('Integration test: extract.js on mask-api-key.ts -> buildGraph -> query', () => { + const extractCmd = `node ${path.join(__dirname, '../extract.js')} /app/src/utils/mask-api-key.ts /app/src`; + const output = execSync(extractCmd, { encoding: 'utf8' }); + const resultObj = JSON.parse(output); + + const graph = GraphStore.buildGraph([resultObj]); + const entityId = 'utils/mask-api-key.ts:maskApiKey'; + + const queryResult = GraphStore.query(graph, entityId); + assert.ok(queryResult); + assert.strictEqual(queryResult.entity.name, 'maskApiKey'); + assert.strictEqual(queryResult.outgoing.length, 2); // CALLS value.trim, CALLS trimmed.slice + + const callTargets = queryResult.outgoing.map(e => e.target); + assert.ok(callTargets.includes('value.trim')); + assert.ok(callTargets.includes('trimmed.slice')); +}); + +console.log(`\n${passed}/${total} tests passed.`); +if (passed !== total) process.exit(1); diff --git a/validate-ground-truth.js b/validate-ground-truth.js new file mode 100644 index 0000000..9d90963 --- /dev/null +++ b/validate-ground-truth.js @@ -0,0 +1,82 @@ +const fs = require('fs'); +const path = require('path'); +const { execSync } = require('child_process'); + +const groundTruthPath = process.argv[2]; +if (!groundTruthPath) { + console.error("Usage: node validate-ground-truth.js "); + process.exit(1); +} + +const gt = JSON.parse(fs.readFileSync(groundTruthPath, 'utf8')); +const filePath = gt.file; + +// Infer repo root from the ground truth: the module entity's ID is the relative path +const moduleEntity = gt.entities.find(e => e.type === 'Module' || e.type === 'Config'); +let repoRoot = '/app/src'; +if (moduleEntity) { + // filePath = /tmp/test_service.py, moduleEntity.id = test_service.py → repoRoot = /tmp + // filePath = /app/src/cli/route.ts, moduleEntity.id = cli/route.ts → repoRoot = /app/src + const expectedRelPath = moduleEntity.id; + if (filePath.endsWith(expectedRelPath)) { + repoRoot = filePath.slice(0, filePath.length - expectedRelPath.length); + if (repoRoot.endsWith('/')) repoRoot = repoRoot.slice(0, -1); + } +} + +const scriptDir = __dirname; +const out = execSync(`node ${path.join(scriptDir, 'extract.js')} "${filePath}" "${repoRoot}"`); +const actual = JSON.parse(out); + +// --- Entity Matching (by ID) --- +let correctEntities = 0; +const matchedActualEntities = new Set(); +for (const ge of gt.entities) { + const match = actual.entities.find(ae => ae.id === ge.id); + if (match) { + correctEntities++; + matchedActualEntities.add(match.id); + } else { + console.log(`Missing entity: ${ge.id}`); + } +} +const extraEntities = actual.entities.filter(ae => !matchedActualEntities.has(ae.id)); +for (const e of extraEntities) { + console.log(`Extra entity: ${e.id}`); +} + +const entityPrecision = correctEntities / (actual.entities.length || 1); +const entityRecall = correctEntities / (gt.entities.length || 1); +const entityF1 = (2 * entityPrecision * entityRecall) / (entityPrecision + entityRecall || 1); + +// --- Relationship Matching --- +let correctRelationships = 0; +const matchedActualRels = new Set(); +for (const gr of gt.relationships) { + const idx = actual.relationships.findIndex(ar => ar.type === gr.type && ar.source === gr.source && ar.target === gr.target); + if (idx >= 0) { + correctRelationships++; + matchedActualRels.add(idx); + } else { + console.log(`Missing relationship: ${gr.type} ${gr.source} -> ${gr.target}`); + } +} +const extraRels = actual.relationships.filter((_, i) => !matchedActualRels.has(i)); +for (const r of extraRels) { + console.log(`Extra relationship: ${r.type} ${r.source} -> ${r.target}`); +} + +const relPrecision = correctRelationships / (actual.relationships.length || 1); +const relRecall = correctRelationships / (gt.relationships.length || 1); +const relF1 = (2 * relPrecision * relRecall) / (relPrecision + relRecall || 1); + +console.log(`Entities: P=${entityPrecision.toFixed(2)}, R=${entityRecall.toFixed(2)}, F1=${entityF1.toFixed(2)}`); +console.log(`Relationships: P=${relPrecision.toFixed(2)}, R=${relRecall.toFixed(2)}, F1=${relF1.toFixed(2)}`); + +if (entityF1 >= 0.90 && relF1 >= 0.85) { + console.log("PASS"); + process.exit(0); +} else { + console.log("FAIL"); + process.exit(1); +}