Phase 1: extract.js — tree-sitter AST parser (TS/JS/Python/Go/Java/Bash) + config parsers (YAML/HCL) Phase 2: graph.js — in-memory directed graph store with build/query/diff CLI Phase 3: namespace.js — cross-repo namespace registry with 3-tier resolution Phase 4: semantic-diff.js — categorized diffs with impact scoring (0-100) Phase 5: pipeline.js — batch extraction, incremental diffing, benchmarking Benchmark: 4,325 files, 21,646 nodes, 133,979 edges in 67s (15ms/file) BMad SPA reviews: all phases GO
807 lines
28 KiB
JavaScript
807 lines
28 KiB
JavaScript
const fs = require('fs');
|
|
const path = require('path');
|
|
const Parser = require('tree-sitter');
|
|
const jsYaml = require('js-yaml');
|
|
|
|
// --- Language Grammars (tree-sitter for code only) ---
|
|
const GRAMMARS = {
|
|
typescript: require('tree-sitter-typescript').typescript,
|
|
tsx: require('tree-sitter-typescript').tsx,
|
|
javascript: require('tree-sitter-javascript'),
|
|
python: require('tree-sitter-python'),
|
|
java: require('tree-sitter-java'),
|
|
go: require('tree-sitter-go'),
|
|
bash: require('tree-sitter-bash'),
|
|
};
|
|
|
|
const { extractYaml, extractHcl } = require('./extract-config.js');
|
|
|
|
const EXT_MAP = {
|
|
'.ts': 'typescript', '.tsx': 'tsx', '.js': 'javascript', '.jsx': 'javascript',
|
|
'.py': 'python', '.java': 'java', '.go': 'go',
|
|
'.sh': 'bash', '.bash': 'bash',
|
|
'.yaml': 'yaml', '.yml': 'yaml',
|
|
'.tf': 'hcl', '.hcl': 'hcl',
|
|
'.kcl': 'yaml', // KCL has no tree-sitter grammar; parse as YAML (structural approximation)
|
|
};
|
|
|
|
// --- Language Adapters ---
|
|
// Each adapter defines node types for that language's AST
|
|
const ADAPTERS = {
|
|
typescript: {
|
|
classNodes: ['class_declaration'],
|
|
functionNodes: ['function_declaration'],
|
|
arrowFuncParent: 'lexical_declaration',
|
|
methodNodes: ['method_definition'],
|
|
fieldNodes: ['public_field_definition'],
|
|
importNodes: ['import_statement'],
|
|
requireFunc: 'require',
|
|
exportWrapper: 'export_statement',
|
|
varDecl: ['lexical_declaration', 'variable_declaration'],
|
|
callExpr: 'call_expression',
|
|
funcField: 'function',
|
|
nameField: 'name',
|
|
bodyField: 'body',
|
|
sourceField: 'source',
|
|
valueField: 'value',
|
|
arrowTypes: ['arrow_function', 'function'],
|
|
accessModifier: 'accessibility_modifier',
|
|
heritage: 'class_heritage',
|
|
implementsClause: 'implements_clause',
|
|
},
|
|
python: {
|
|
classNodes: ['class_definition'],
|
|
functionNodes: ['function_definition'],
|
|
arrowFuncParent: null,
|
|
methodNodes: [], // methods are function_definition inside class
|
|
fieldNodes: [],
|
|
importNodes: ['import_statement', 'import_from_statement'],
|
|
requireFunc: null,
|
|
exportWrapper: null,
|
|
varDecl: ['assignment', 'augmented_assignment'],
|
|
callExpr: 'call',
|
|
funcField: 'function',
|
|
nameField: 'name',
|
|
bodyField: 'body',
|
|
sourceField: null,
|
|
valueField: 'right',
|
|
arrowTypes: ['lambda'],
|
|
accessModifier: null,
|
|
heritage: null,
|
|
implementsClause: null,
|
|
},
|
|
java: {
|
|
classNodes: ['class_declaration', 'interface_declaration', 'enum_declaration'],
|
|
functionNodes: ['method_declaration', 'constructor_declaration'],
|
|
arrowFuncParent: null,
|
|
methodNodes: ['method_declaration', 'constructor_declaration'],
|
|
fieldNodes: ['field_declaration'],
|
|
importNodes: ['import_declaration'],
|
|
requireFunc: null,
|
|
exportWrapper: null,
|
|
varDecl: ['local_variable_declaration', 'field_declaration'],
|
|
callExpr: 'method_invocation',
|
|
funcField: 'name',
|
|
nameField: 'name',
|
|
bodyField: 'body',
|
|
sourceField: null,
|
|
valueField: null,
|
|
arrowTypes: ['lambda_expression'],
|
|
accessModifier: 'modifiers',
|
|
heritage: 'superclass',
|
|
implementsClause: 'super_interfaces',
|
|
},
|
|
go: {
|
|
classNodes: ['type_declaration'], // struct types
|
|
functionNodes: ['function_declaration', 'method_declaration'],
|
|
arrowFuncParent: null,
|
|
methodNodes: ['method_declaration'],
|
|
fieldNodes: [],
|
|
importNodes: ['import_declaration'],
|
|
requireFunc: null,
|
|
exportWrapper: null,
|
|
varDecl: ['var_declaration', 'short_var_declaration', 'const_declaration'],
|
|
callExpr: 'call_expression',
|
|
funcField: 'function',
|
|
nameField: 'name',
|
|
bodyField: 'body',
|
|
sourceField: 'path',
|
|
valueField: null,
|
|
arrowTypes: ['func_literal'],
|
|
accessModifier: null,
|
|
heritage: null,
|
|
implementsClause: null,
|
|
},
|
|
yaml: {
|
|
classNodes: [],
|
|
functionNodes: [],
|
|
arrowFuncParent: null,
|
|
methodNodes: [],
|
|
fieldNodes: [],
|
|
importNodes: [],
|
|
requireFunc: null,
|
|
exportWrapper: null,
|
|
varDecl: [],
|
|
callExpr: null,
|
|
funcField: null,
|
|
nameField: null,
|
|
bodyField: null,
|
|
sourceField: null,
|
|
valueField: null,
|
|
arrowTypes: [],
|
|
accessModifier: null,
|
|
heritage: null,
|
|
implementsClause: null,
|
|
},
|
|
hcl: {
|
|
classNodes: [],
|
|
functionNodes: [],
|
|
arrowFuncParent: null,
|
|
methodNodes: [],
|
|
fieldNodes: [],
|
|
importNodes: [],
|
|
requireFunc: null,
|
|
exportWrapper: null,
|
|
varDecl: [],
|
|
callExpr: 'function_call',
|
|
funcField: null,
|
|
nameField: null,
|
|
bodyField: 'body',
|
|
sourceField: null,
|
|
valueField: null,
|
|
arrowTypes: [],
|
|
accessModifier: null,
|
|
heritage: null,
|
|
implementsClause: null,
|
|
},
|
|
};
|
|
|
|
// Alias adapters
|
|
ADAPTERS.tsx = ADAPTERS.typescript;
|
|
ADAPTERS.javascript = ADAPTERS.typescript;
|
|
|
|
ADAPTERS.bash = {
|
|
classNodes: [],
|
|
functionNodes: ['function_definition'],
|
|
arrowFuncParent: null,
|
|
methodNodes: [],
|
|
fieldNodes: [],
|
|
importNodes: [],
|
|
requireFunc: null,
|
|
exportWrapper: null,
|
|
varDecl: ['variable_assignment'],
|
|
callExpr: 'command',
|
|
funcField: 'name',
|
|
nameField: 'name',
|
|
bodyField: 'body',
|
|
sourceField: null,
|
|
valueField: null,
|
|
arrowTypes: [],
|
|
accessModifier: null,
|
|
heritage: null,
|
|
implementsClause: null,
|
|
};
|
|
|
|
// --- Core Extractor ---
|
|
function extract(filePath, repoRoot) {
|
|
const ext = path.extname(filePath);
|
|
const lang = EXT_MAP[ext];
|
|
if (!lang) {
|
|
console.error(`Unsupported extension: ${ext}`);
|
|
process.exit(1);
|
|
}
|
|
|
|
if (lang === 'yaml') return extractYaml(filePath, repoRoot);
|
|
if (lang === 'hcl') return extractHcl(filePath, repoRoot);
|
|
|
|
const grammar = GRAMMARS[lang];
|
|
const adapter = ADAPTERS[lang];
|
|
if (!grammar || !adapter) {
|
|
console.error(`No grammar/adapter for: ${lang}`);
|
|
process.exit(1);
|
|
}
|
|
|
|
const parser = new Parser();
|
|
parser.setLanguage(grammar);
|
|
|
|
let sourceCode;
|
|
try {
|
|
sourceCode = fs.readFileSync(filePath, 'utf8');
|
|
} catch (err) {
|
|
console.error(`Failed to read ${filePath}: ${err.message}`);
|
|
return { file: filePath, language: lang, entities: [], relationships: [], error: err.message };
|
|
}
|
|
|
|
let tree;
|
|
try {
|
|
tree = parser.parse(sourceCode);
|
|
} catch (err) {
|
|
console.error(`Failed to parse ${filePath}: ${err.message}`);
|
|
return { file: filePath, language: lang, entities: [], relationships: [], error: err.message };
|
|
}
|
|
|
|
const relPath = path.relative(repoRoot, filePath);
|
|
const moduleId = relPath;
|
|
const entities = [];
|
|
const relationships = [];
|
|
|
|
function getText(node) {
|
|
return sourceCode.substring(node.startIndex, node.endIndex);
|
|
}
|
|
|
|
function lineRange(node) {
|
|
return [node.startPosition.row + 1, node.endPosition.row + 1];
|
|
}
|
|
|
|
function isExported(node) {
|
|
if (adapter.exportWrapper) {
|
|
// ES6 export
|
|
if (node.parent && node.parent.type === adapter.exportWrapper) return true;
|
|
// CommonJS: module.exports = { ... } or exports.foo = ...
|
|
// Check if this function/class name appears in a module.exports assignment
|
|
const nameNode = node.childForFieldName('name');
|
|
if (nameNode) {
|
|
const name = getText(nameNode);
|
|
// Walk up to find module.exports references to this name
|
|
const root = tree.rootNode;
|
|
for (const child of root.children) {
|
|
if (child.type === 'expression_statement') {
|
|
const expr = child.children[0];
|
|
if (expr && expr.type === 'assignment_expression') {
|
|
const left = expr.childForFieldName('left');
|
|
if (left) {
|
|
const leftText = getText(left);
|
|
// module.exports.foo = ... or exports.foo = ...
|
|
if (leftText === `module.exports.${name}` || leftText === `exports.${name}`) return true;
|
|
// module.exports = { foo, bar } or module.exports = foo
|
|
if (leftText === 'module.exports') {
|
|
const right = expr.childForFieldName('right');
|
|
if (right) {
|
|
const rightText = getText(right);
|
|
if (rightText === name || rightText.includes(name)) return true;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
return false;
|
|
}
|
|
// Python: no export concept, everything is public
|
|
// Java: check modifiers
|
|
// Go: capitalized name = exported
|
|
if (lang === 'go') {
|
|
const nameNode = node.childForFieldName('name');
|
|
if (nameNode) {
|
|
const name = getText(nameNode);
|
|
return name[0] === name[0].toUpperCase();
|
|
}
|
|
}
|
|
if (lang === 'java') {
|
|
const mods = node.children.find(c => c.type === 'modifiers');
|
|
if (mods) return getText(mods).includes('public');
|
|
return false;
|
|
}
|
|
return true; // Python: everything is public
|
|
}
|
|
|
|
function addEntity(e) {
|
|
if (!entities.find(x => x.id === e.id)) entities.push(e);
|
|
}
|
|
|
|
const _relSet = new Set();
|
|
function addRel(r) {
|
|
const key = `${r.type}:${r.source}->${r.target}`;
|
|
if (!_relSet.has(key)) {
|
|
_relSet.add(key);
|
|
relationships.push(r);
|
|
}
|
|
}
|
|
|
|
// --- Import Extraction ---
|
|
function extractImports(node) {
|
|
if (adapter.importNodes.includes(node.type)) {
|
|
if (lang === 'typescript' || lang === 'tsx' || lang === 'javascript') {
|
|
const sourceNode = node.childForFieldName('source');
|
|
if (sourceNode) {
|
|
const depName = getText(sourceNode).replace(/['"]/g, '');
|
|
// Resolve relative imports against file directory
|
|
let resolvedDep = depName;
|
|
if (depName.startsWith('.')) {
|
|
resolvedDep = path.posix.normalize(path.posix.join(path.dirname(relPath), depName));
|
|
}
|
|
const depId = `dep:${resolvedDep}`;
|
|
addEntity({ id: depId, type: 'Dependency', name: resolvedDep, kind: 'import', visibility: 'internal', line_range: lineRange(node) });
|
|
addRel({ type: 'IMPORTS', source: moduleId, target: depId });
|
|
}
|
|
return true;
|
|
}
|
|
if (lang === 'python') {
|
|
// import X or from X import Y
|
|
const modNode = node.childForFieldName('module_name') || node.childForFieldName('name');
|
|
let depName = 'unknown';
|
|
if (modNode) {
|
|
depName = getText(modNode);
|
|
} else {
|
|
// Fallback: grab dotted name from children
|
|
const dotted = node.children.find(c => c.type === 'dotted_name');
|
|
if (dotted) depName = getText(dotted);
|
|
}
|
|
const depId = `dep:${depName}`;
|
|
addEntity({ id: depId, type: 'Dependency', name: depName, kind: 'import', visibility: 'internal', line_range: lineRange(node) });
|
|
addRel({ type: 'IMPORTS', source: moduleId, target: depId });
|
|
return true;
|
|
}
|
|
if (lang === 'java') {
|
|
// import com.foo.Bar;
|
|
const scopedId = node.children.find(c => c.type === 'scoped_identifier');
|
|
if (scopedId) {
|
|
const depName = getText(scopedId);
|
|
const depId = `dep:${depName}`;
|
|
addEntity({ id: depId, type: 'Dependency', name: depName, kind: 'import', visibility: 'internal', line_range: lineRange(node) });
|
|
addRel({ type: 'IMPORTS', source: moduleId, target: depId });
|
|
}
|
|
return true;
|
|
}
|
|
if (lang === 'go') {
|
|
// import "fmt" or import ( "fmt" "os" )
|
|
for (const child of node.namedChildren) {
|
|
if (child.type === 'import_spec' || child.type === 'import_spec_list') {
|
|
const specs = child.type === 'import_spec_list' ? child.namedChildren : [child];
|
|
for (const spec of specs) {
|
|
const pathNode = spec.childForFieldName('path');
|
|
if (pathNode) {
|
|
const depName = getText(pathNode).replace(/"/g, '');
|
|
const depId = `dep:${depName}`;
|
|
addEntity({ id: depId, type: 'Dependency', name: depName, kind: 'import', visibility: 'internal', line_range: lineRange(spec) });
|
|
addRel({ type: 'IMPORTS', source: moduleId, target: depId });
|
|
}
|
|
}
|
|
}
|
|
}
|
|
return true;
|
|
}
|
|
}
|
|
// CommonJS require() for JS/TS
|
|
if (adapter.requireFunc && (node.type === 'lexical_declaration' || node.type === 'variable_declaration')) {
|
|
for (const child of node.children) {
|
|
if (child.type === 'variable_declarator') {
|
|
const value = child.childForFieldName('value');
|
|
if (value && value.type === 'call_expression') {
|
|
const func = value.childForFieldName('function');
|
|
if (func && getText(func) === adapter.requireFunc) {
|
|
const args = value.childForFieldName('arguments');
|
|
if (args && args.namedChildCount > 0) {
|
|
const arg = args.namedChildren[0];
|
|
if (arg.type === 'string') {
|
|
const depName = getText(arg).replace(/['"]/g, '');
|
|
let resolvedDep = depName;
|
|
if (depName.startsWith('.')) {
|
|
resolvedDep = path.posix.normalize(path.posix.join(path.dirname(relPath), depName));
|
|
}
|
|
const depId = `dep:${resolvedDep}`;
|
|
addEntity({ id: depId, type: 'Dependency', name: resolvedDep, kind: 'require', visibility: 'internal', line_range: lineRange(node) });
|
|
addRel({ type: 'IMPORTS', source: moduleId, target: depId });
|
|
return true;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// Bash: source ./utils.sh -> IMPORTS
|
|
if (lang === 'bash' && node.type === 'command') {
|
|
const cmd = node.namedChildren[0];
|
|
if (cmd && getText(cmd) === 'source') {
|
|
const arg = node.namedChildren[1];
|
|
if (arg) {
|
|
const depName = getText(arg);
|
|
const depId = `dep:${depName}`;
|
|
addEntity({ id: depId, type: 'Dependency', name: depName, kind: 'import', visibility: 'internal', line_range: lineRange(node) });
|
|
addRel({ type: 'IMPORTS', source: moduleId, target: depId });
|
|
return true;
|
|
}
|
|
}
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
// --- Class Extraction ---
|
|
function extractClass(node, parentId) {
|
|
const nameNode = node.childForFieldName('name');
|
|
if (!nameNode) return null;
|
|
const name = getText(nameNode);
|
|
const id = `${parentId}:${name}`;
|
|
const exported = isExported(node);
|
|
|
|
let kind = 'class';
|
|
if (lang === 'go') kind = 'struct';
|
|
if (node.type === 'interface_declaration') kind = 'interface';
|
|
if (node.type === 'enum_declaration') kind = 'enum';
|
|
|
|
addEntity({ id, type: 'Class', name, kind, visibility: exported ? 'public' : 'internal', line_range: lineRange(node) });
|
|
addRel({ type: 'CONTAINS', source: parentId, target: id });
|
|
|
|
// Implements/extends
|
|
if (adapter.heritage) {
|
|
const heritage = node.children.filter(c => c.type === adapter.heritage);
|
|
for (const h of heritage) {
|
|
for (const child of h.namedChildren) {
|
|
if (adapter.implementsClause && child.type === adapter.implementsClause) {
|
|
for (const impl of child.namedChildren) {
|
|
addRel({ type: 'IMPLEMENTS', source: id, target: getText(impl) });
|
|
}
|
|
} else {
|
|
addRel({ type: 'IMPLEMENTS', source: id, target: getText(child) });
|
|
}
|
|
}
|
|
}
|
|
}
|
|
// Java: superclass and super_interfaces
|
|
if (lang === 'java') {
|
|
const superclass = node.childForFieldName('superclass');
|
|
if (superclass) addRel({ type: 'IMPLEMENTS', source: id, target: getText(superclass).replace(/^extends\s+/, '') });
|
|
const superInterfaces = node.childForFieldName('interfaces');
|
|
if (superInterfaces) {
|
|
for (const iface of superInterfaces.namedChildren) {
|
|
addRel({ type: 'IMPLEMENTS', source: id, target: getText(iface) });
|
|
}
|
|
}
|
|
}
|
|
// Python: bases
|
|
if (lang === 'python') {
|
|
const argList = node.childForFieldName('superclasses');
|
|
if (argList) {
|
|
for (const base of argList.namedChildren) {
|
|
addRel({ type: 'IMPLEMENTS', source: id, target: getText(base) });
|
|
}
|
|
}
|
|
}
|
|
|
|
return id;
|
|
}
|
|
|
|
// --- Method Extraction ---
|
|
function extractMethod(node, parentId) {
|
|
const nameNode = node.childForFieldName('name');
|
|
if (!nameNode) return null;
|
|
const name = getText(nameNode);
|
|
const id = `${parentId}:${name}`;
|
|
|
|
let visibility = 'public';
|
|
if (adapter.accessModifier) {
|
|
for (const child of node.children) {
|
|
if (child.type === adapter.accessModifier) {
|
|
const modText = getText(child);
|
|
if (modText.includes('private')) visibility = 'private';
|
|
else if (modText.includes('protected')) visibility = 'protected';
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
// Python: _ prefix = private, __ = very private
|
|
if (lang === 'python' && name.startsWith('_')) {
|
|
visibility = name.startsWith('__') ? 'private' : 'protected';
|
|
}
|
|
|
|
addEntity({ id, type: 'Function', name, kind: 'method', visibility, line_range: lineRange(node) });
|
|
addRel({ type: 'CONTAINS', source: parentId, target: id });
|
|
return id;
|
|
}
|
|
|
|
// --- Function Extraction ---
|
|
function extractFunction(node, parentId) {
|
|
const exported = isExported(node);
|
|
|
|
if (adapter.functionNodes.includes(node.type)) {
|
|
const nameNode = node.childForFieldName('name');
|
|
if (!nameNode) return null;
|
|
const name = getText(nameNode);
|
|
const id = `${parentId}:${name}`;
|
|
|
|
let visibility = exported ? 'public' : 'internal';
|
|
if (lang === 'go' && name[0] === name[0].toUpperCase()) visibility = 'public';
|
|
if (lang === 'go' && name[0] === name[0].toLowerCase()) visibility = 'internal';
|
|
|
|
addEntity({ id, type: 'Function', name, kind: 'function', visibility, line_range: lineRange(node) });
|
|
addRel({ type: 'CONTAINS', source: parentId, target: id });
|
|
return id;
|
|
}
|
|
|
|
// JS/TS arrow functions
|
|
if (adapter.arrowFuncParent && node.type === adapter.arrowFuncParent) {
|
|
for (const child of node.children) {
|
|
if (child.type === 'variable_declarator') {
|
|
const value = child.childForFieldName('value');
|
|
if (value && adapter.arrowTypes.includes(value.type)) {
|
|
const nameNode = child.childForFieldName('name');
|
|
if (!nameNode) continue;
|
|
const name = getText(nameNode);
|
|
const id = `${parentId}:${name}`;
|
|
addEntity({ id, type: 'Function', name, kind: 'function', visibility: exported ? 'public' : 'internal', line_range: lineRange(node) });
|
|
addRel({ type: 'CONTAINS', source: parentId, target: id });
|
|
return id;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
return null;
|
|
}
|
|
|
|
// --- Class Field (arrow method vs property) ---
|
|
function extractClassField(node, parentId) {
|
|
if (!adapter.fieldNodes.includes(node.type)) return null;
|
|
const nameNode = node.childForFieldName('name');
|
|
if (!nameNode) return null;
|
|
const value = node.childForFieldName('value');
|
|
if (value && adapter.arrowTypes.includes(value.type)) {
|
|
return extractMethod(node, parentId);
|
|
}
|
|
return null; // Skip non-function class properties
|
|
}
|
|
|
|
// --- Call Extraction ---
|
|
function extractCalls(node, parentId) {
|
|
if (!adapter.callExpr) return;
|
|
if (node.type === adapter.callExpr) {
|
|
let funcName;
|
|
if (lang === 'java') {
|
|
const nameNode = node.childForFieldName('name');
|
|
const obj = node.childForFieldName('object');
|
|
funcName = obj ? `${getText(obj)}.${getText(nameNode)}` : (nameNode ? getText(nameNode) : null);
|
|
} else if (lang === 'python') {
|
|
const funcNode = node.childForFieldName('function');
|
|
funcName = funcNode ? getText(funcNode) : null;
|
|
} else if (lang === 'bash') {
|
|
const funcNode = node.namedChildren[0];
|
|
funcName = funcNode ? getText(funcNode) : null;
|
|
} else {
|
|
const funcNode = node.childForFieldName(adapter.funcField);
|
|
funcName = funcNode ? getText(funcNode) : null;
|
|
}
|
|
if (funcName) {
|
|
if (adapter.requireFunc && funcName === adapter.requireFunc) return;
|
|
addRel({ type: 'CALLS', source: parentId, target: funcName });
|
|
}
|
|
}
|
|
}
|
|
|
|
// --- YAML/HCL Config Extraction ---
|
|
function extractConfig(node) {
|
|
if (lang === 'yaml') {
|
|
addEntity({ id: moduleId, type: 'Config', name: relPath, kind: 'yaml-config', visibility: 'public', line_range: lineRange(node) });
|
|
// Extract top-level keys as config entries
|
|
if (node.type === 'stream') {
|
|
for (const doc of node.namedChildren) {
|
|
if (doc.type === 'document') {
|
|
const block = doc.namedChildren[0];
|
|
if (block && block.type === 'block_node') {
|
|
const mapping = block.namedChildren[0];
|
|
if (mapping && mapping.type === 'block_mapping') {
|
|
for (const pair of mapping.namedChildren) {
|
|
if (pair.type === 'block_mapping_pair') {
|
|
const key = pair.childForFieldName('key');
|
|
if (key) {
|
|
const keyName = getText(key);
|
|
const keyId = `${moduleId}:${keyName}`;
|
|
addEntity({ id: keyId, type: 'Config', name: keyName, kind: 'yaml-key', visibility: 'public', line_range: lineRange(pair) });
|
|
addRel({ type: 'CONTAINS', source: moduleId, target: keyId });
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
return true;
|
|
}
|
|
if (lang === 'hcl') {
|
|
addEntity({ id: moduleId, type: 'Config', name: relPath, kind: 'terraform', visibility: 'public', line_range: lineRange(node) });
|
|
// Extract top-level blocks (resource, data, variable, output, module, provider)
|
|
for (const child of node.namedChildren) {
|
|
if (child.type === 'block') {
|
|
const blockType = child.namedChildren[0]; // e.g., "resource"
|
|
const labels = child.namedChildren.filter(c => c.type === 'string_lit' || c.type === 'identifier');
|
|
const blockName = labels.map(l => getText(l).replace(/"/g, '')).join('.');
|
|
const fullName = blockType ? `${getText(blockType)}.${blockName}` : blockName;
|
|
const blockId = `${moduleId}:${fullName}`;
|
|
addEntity({ id: blockId, type: 'Config', name: fullName, kind: 'hcl-block', visibility: 'public', line_range: lineRange(child) });
|
|
addRel({ type: 'CONTAINS', source: moduleId, target: blockId });
|
|
}
|
|
}
|
|
return true;
|
|
}
|
|
return false;
|
|
}
|
|
|
|
// --- Main Walker ---
|
|
function walk(node, parentId) {
|
|
if (node.type === 'program' || node.type === 'source_file' || node.type === 'stream' || node.type === 'compilation_unit' || node.type === 'module') {
|
|
// Config files (YAML/HCL)
|
|
if (extractConfig(node)) return;
|
|
|
|
// Code files
|
|
addEntity({ id: moduleId, type: 'Module', name: relPath, kind: 'module', visibility: 'public', line_range: lineRange(node) });
|
|
for (const child of node.children) {
|
|
walk(child, moduleId);
|
|
}
|
|
return;
|
|
}
|
|
|
|
// Export wrapper (JS/TS)
|
|
if (adapter.exportWrapper && node.type === adapter.exportWrapper) {
|
|
for (const child of node.children) {
|
|
if (child.type !== 'export' && child.type !== 'default') {
|
|
walk(child, parentId);
|
|
}
|
|
}
|
|
return;
|
|
}
|
|
|
|
// Imports
|
|
if (extractImports(node)) return;
|
|
|
|
// Classes
|
|
if (adapter.classNodes.includes(node.type)) {
|
|
const classId = extractClass(node, parentId);
|
|
if (classId) {
|
|
const body = node.childForFieldName('body');
|
|
if (body) {
|
|
for (const child of body.namedChildren || body.children) {
|
|
walk(child, classId);
|
|
}
|
|
}
|
|
// Go type_declaration: walk type_spec children
|
|
if (lang === 'go') {
|
|
for (const child of node.namedChildren) {
|
|
if (child.type === 'type_spec') {
|
|
const structBody = child.childForFieldName('type');
|
|
if (structBody) {
|
|
for (const field of structBody.namedChildren) {
|
|
walk(field, classId);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
return;
|
|
}
|
|
|
|
// Methods (inside class body)
|
|
if (adapter.methodNodes.includes(node.type)) {
|
|
const methodId = extractMethod(node, parentId);
|
|
if (methodId) {
|
|
const body = node.childForFieldName('body');
|
|
if (body) walkBody(body, methodId);
|
|
}
|
|
return;
|
|
}
|
|
|
|
// Class fields (arrow methods vs properties)
|
|
if (adapter.fieldNodes.includes(node.type)) {
|
|
const methodId = extractClassField(node, parentId);
|
|
if (methodId) {
|
|
const value = node.childForFieldName('value');
|
|
if (value) {
|
|
const body = value.childForFieldName('body');
|
|
if (body) walkBody(body, methodId);
|
|
}
|
|
}
|
|
return;
|
|
}
|
|
|
|
// Python: function_definition can be top-level or method (inside class)
|
|
if (lang === 'python' && node.type === 'function_definition') {
|
|
if (parentId && parentId.includes(':') && parentId !== moduleId) {
|
|
// Inside a class → method
|
|
const methodId = extractMethod(node, parentId);
|
|
if (methodId) {
|
|
const body = node.childForFieldName('body');
|
|
if (body) walkBody(body, methodId);
|
|
}
|
|
} else {
|
|
// Top-level → function
|
|
const funcId = extractFunction(node, parentId);
|
|
if (funcId) {
|
|
const body = node.childForFieldName('body');
|
|
if (body) walkBody(body, funcId);
|
|
}
|
|
}
|
|
return;
|
|
}
|
|
|
|
// Go: method_declaration (receiver-based)
|
|
if (lang === 'go' && node.type === 'method_declaration') {
|
|
const nameNode = node.childForFieldName('name');
|
|
const receiver = node.childForFieldName('receiver');
|
|
if (nameNode) {
|
|
const name = getText(nameNode);
|
|
if (!name || name.length === 0) return;
|
|
let receiverType = parentId;
|
|
if (receiver) {
|
|
const paramList = receiver.namedChildren;
|
|
for (const p of paramList) {
|
|
const typeNode = p.childForFieldName('type');
|
|
if (typeNode) {
|
|
let raw = getText(typeNode);
|
|
// Strip pointer (*) and generic brackets safely
|
|
let typeName = raw.replace(/^\*+/, '').replace(/\[.*\]$/, '').trim();
|
|
if (typeName.length > 0) {
|
|
receiverType = `${moduleId}:${typeName}`;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
const id = `${receiverType}:${name}`;
|
|
const visibility = name.length > 0 && name[0] === name[0].toUpperCase() ? 'public' : 'internal';
|
|
addEntity({ id, type: 'Function', name, kind: 'method', visibility, line_range: lineRange(node) });
|
|
addRel({ type: 'CONTAINS', source: receiverType, target: id });
|
|
const body = node.childForFieldName('body');
|
|
if (body) walkBody(body, id);
|
|
}
|
|
return;
|
|
}
|
|
|
|
// Functions (top-level)
|
|
if (adapter.functionNodes.includes(node.type) || (adapter.arrowFuncParent && node.type === adapter.arrowFuncParent)) {
|
|
const funcId = extractFunction(node, parentId);
|
|
if (funcId) {
|
|
const body = node.type === adapter.arrowFuncParent
|
|
? node // For lexical_declaration, walk the whole thing
|
|
: node.childForFieldName('body');
|
|
if (body) walkBody(body, funcId);
|
|
return;
|
|
}
|
|
// Module-level variable (JS/TS only)
|
|
if (parentId === moduleId && adapter.arrowFuncParent && node.type === adapter.arrowFuncParent) {
|
|
// Not a function, might be a module-level const
|
|
return;
|
|
}
|
|
}
|
|
|
|
// Java: package_declaration
|
|
if (lang === 'java' && node.type === 'package_declaration') return;
|
|
|
|
// Top-level calls
|
|
extractCalls(node, parentId);
|
|
|
|
for (const child of node.children) {
|
|
walk(child, parentId);
|
|
}
|
|
}
|
|
|
|
// Walk function/method bodies for CALLS only
|
|
function walkBody(node, parentId) {
|
|
if (!node) return;
|
|
extractCalls(node, parentId);
|
|
for (const child of node.children) {
|
|
walkBody(child, parentId);
|
|
}
|
|
}
|
|
|
|
walk(tree.rootNode);
|
|
|
|
return { file: filePath, language: lang, entities, relationships };
|
|
}
|
|
|
|
// --- CLI ---
|
|
if (require.main === module) {
|
|
const filePath = process.argv[2];
|
|
const repoRoot = process.argv[3] || '/app/src';
|
|
if (!filePath) {
|
|
console.error("Usage: node extract.js <file> [repo-root]");
|
|
process.exit(1);
|
|
}
|
|
|
|
const result = extract(filePath, repoRoot);
|
|
console.log(JSON.stringify(result, null, 2));
|
|
}
|
|
|
|
module.exports = { extract };
|