const fs = require('fs'); const path = require('path'); const Parser = require('tree-sitter'); const jsYaml = require('js-yaml'); // --- Language Grammars (tree-sitter for code only) --- const GRAMMARS = { typescript: require('tree-sitter-typescript').typescript, tsx: require('tree-sitter-typescript').tsx, javascript: require('tree-sitter-javascript'), python: require('tree-sitter-python'), java: require('tree-sitter-java'), go: require('tree-sitter-go'), bash: require('tree-sitter-bash'), }; const { extractYaml, extractHcl } = require('./extract-config.js'); const EXT_MAP = { '.ts': 'typescript', '.tsx': 'tsx', '.js': 'javascript', '.jsx': 'javascript', '.py': 'python', '.java': 'java', '.go': 'go', '.sh': 'bash', '.bash': 'bash', '.yaml': 'yaml', '.yml': 'yaml', '.tf': 'hcl', '.hcl': 'hcl', '.kcl': 'yaml', // KCL has no tree-sitter grammar; parse as YAML (structural approximation) }; // --- Language Adapters --- // Each adapter defines node types for that language's AST const ADAPTERS = { typescript: { classNodes: ['class_declaration'], functionNodes: ['function_declaration'], arrowFuncParent: 'lexical_declaration', methodNodes: ['method_definition'], fieldNodes: ['public_field_definition'], importNodes: ['import_statement'], requireFunc: 'require', exportWrapper: 'export_statement', varDecl: ['lexical_declaration', 'variable_declaration'], callExpr: 'call_expression', funcField: 'function', nameField: 'name', bodyField: 'body', sourceField: 'source', valueField: 'value', arrowTypes: ['arrow_function', 'function'], accessModifier: 'accessibility_modifier', heritage: 'class_heritage', implementsClause: 'implements_clause', }, python: { classNodes: ['class_definition'], functionNodes: ['function_definition'], arrowFuncParent: null, methodNodes: [], // methods are function_definition inside class fieldNodes: [], importNodes: ['import_statement', 'import_from_statement'], requireFunc: null, exportWrapper: null, varDecl: ['assignment', 'augmented_assignment'], callExpr: 'call', funcField: 'function', nameField: 'name', bodyField: 'body', sourceField: null, valueField: 'right', arrowTypes: ['lambda'], accessModifier: null, heritage: null, implementsClause: null, }, java: { classNodes: ['class_declaration', 'interface_declaration', 'enum_declaration'], functionNodes: ['method_declaration', 'constructor_declaration'], arrowFuncParent: null, methodNodes: ['method_declaration', 'constructor_declaration'], fieldNodes: ['field_declaration'], importNodes: ['import_declaration'], requireFunc: null, exportWrapper: null, varDecl: ['local_variable_declaration', 'field_declaration'], callExpr: 'method_invocation', funcField: 'name', nameField: 'name', bodyField: 'body', sourceField: null, valueField: null, arrowTypes: ['lambda_expression'], accessModifier: 'modifiers', heritage: 'superclass', implementsClause: 'super_interfaces', }, go: { classNodes: ['type_declaration'], // struct types functionNodes: ['function_declaration', 'method_declaration'], arrowFuncParent: null, methodNodes: ['method_declaration'], fieldNodes: [], importNodes: ['import_declaration'], requireFunc: null, exportWrapper: null, varDecl: ['var_declaration', 'short_var_declaration', 'const_declaration'], callExpr: 'call_expression', funcField: 'function', nameField: 'name', bodyField: 'body', sourceField: 'path', valueField: null, arrowTypes: ['func_literal'], accessModifier: null, heritage: null, implementsClause: null, }, yaml: { classNodes: [], functionNodes: [], arrowFuncParent: null, methodNodes: [], fieldNodes: [], importNodes: [], requireFunc: null, exportWrapper: null, varDecl: [], callExpr: null, funcField: null, nameField: null, bodyField: null, sourceField: null, valueField: null, arrowTypes: [], accessModifier: null, heritage: null, implementsClause: null, }, hcl: { classNodes: [], functionNodes: [], arrowFuncParent: null, methodNodes: [], fieldNodes: [], importNodes: [], requireFunc: null, exportWrapper: null, varDecl: [], callExpr: 'function_call', funcField: null, nameField: null, bodyField: 'body', sourceField: null, valueField: null, arrowTypes: [], accessModifier: null, heritage: null, implementsClause: null, }, }; // Alias adapters ADAPTERS.tsx = ADAPTERS.typescript; ADAPTERS.javascript = ADAPTERS.typescript; ADAPTERS.bash = { classNodes: [], functionNodes: ['function_definition'], arrowFuncParent: null, methodNodes: [], fieldNodes: [], importNodes: [], requireFunc: null, exportWrapper: null, varDecl: ['variable_assignment'], callExpr: 'command', funcField: 'name', nameField: 'name', bodyField: 'body', sourceField: null, valueField: null, arrowTypes: [], accessModifier: null, heritage: null, implementsClause: null, }; // --- Core Extractor --- function extract(filePath, repoRoot) { const ext = path.extname(filePath); const lang = EXT_MAP[ext]; if (!lang) { console.error(`Unsupported extension: ${ext}`); process.exit(1); } if (lang === 'yaml') return extractYaml(filePath, repoRoot); if (lang === 'hcl') return extractHcl(filePath, repoRoot); const grammar = GRAMMARS[lang]; const adapter = ADAPTERS[lang]; if (!grammar || !adapter) { console.error(`No grammar/adapter for: ${lang}`); process.exit(1); } const parser = new Parser(); parser.setLanguage(grammar); let sourceCode; try { sourceCode = fs.readFileSync(filePath, 'utf8'); } catch (err) { console.error(`Failed to read ${filePath}: ${err.message}`); return { file: filePath, language: lang, entities: [], relationships: [], error: err.message }; } let tree; try { tree = parser.parse(sourceCode); } catch (err) { console.error(`Failed to parse ${filePath}: ${err.message}`); return { file: filePath, language: lang, entities: [], relationships: [], error: err.message }; } const relPath = path.relative(repoRoot, filePath); const moduleId = relPath; const entities = []; const relationships = []; function getText(node) { return sourceCode.substring(node.startIndex, node.endIndex); } function lineRange(node) { return [node.startPosition.row + 1, node.endPosition.row + 1]; } function isExported(node) { if (adapter.exportWrapper) { // ES6 export if (node.parent && node.parent.type === adapter.exportWrapper) return true; // CommonJS: module.exports = { ... } or exports.foo = ... // Check if this function/class name appears in a module.exports assignment const nameNode = node.childForFieldName('name'); if (nameNode) { const name = getText(nameNode); // Walk up to find module.exports references to this name const root = tree.rootNode; for (const child of root.children) { if (child.type === 'expression_statement') { const expr = child.children[0]; if (expr && expr.type === 'assignment_expression') { const left = expr.childForFieldName('left'); if (left) { const leftText = getText(left); // module.exports.foo = ... or exports.foo = ... if (leftText === `module.exports.${name}` || leftText === `exports.${name}`) return true; // module.exports = { foo, bar } or module.exports = foo if (leftText === 'module.exports') { const right = expr.childForFieldName('right'); if (right) { const rightText = getText(right); if (rightText === name || rightText.includes(name)) return true; } } } } } } } return false; } // Python: no export concept, everything is public // Java: check modifiers // Go: capitalized name = exported if (lang === 'go') { const nameNode = node.childForFieldName('name'); if (nameNode) { const name = getText(nameNode); return name[0] === name[0].toUpperCase(); } } if (lang === 'java') { const mods = node.children.find(c => c.type === 'modifiers'); if (mods) return getText(mods).includes('public'); return false; } return true; // Python: everything is public } function addEntity(e) { if (!entities.find(x => x.id === e.id)) entities.push(e); } const _relSet = new Set(); function addRel(r) { const key = `${r.type}:${r.source}->${r.target}`; if (!_relSet.has(key)) { _relSet.add(key); relationships.push(r); } } // --- Import Extraction --- function extractImports(node) { if (adapter.importNodes.includes(node.type)) { if (lang === 'typescript' || lang === 'tsx' || lang === 'javascript') { const sourceNode = node.childForFieldName('source'); if (sourceNode) { const depName = getText(sourceNode).replace(/['"]/g, ''); // Resolve relative imports against file directory let resolvedDep = depName; if (depName.startsWith('.')) { resolvedDep = path.posix.normalize(path.posix.join(path.dirname(relPath), depName)); } const depId = `dep:${resolvedDep}`; addEntity({ id: depId, type: 'Dependency', name: resolvedDep, kind: 'import', visibility: 'internal', line_range: lineRange(node) }); addRel({ type: 'IMPORTS', source: moduleId, target: depId }); } return true; } if (lang === 'python') { // import X or from X import Y const modNode = node.childForFieldName('module_name') || node.childForFieldName('name'); let depName = 'unknown'; if (modNode) { depName = getText(modNode); } else { // Fallback: grab dotted name from children const dotted = node.children.find(c => c.type === 'dotted_name'); if (dotted) depName = getText(dotted); } const depId = `dep:${depName}`; addEntity({ id: depId, type: 'Dependency', name: depName, kind: 'import', visibility: 'internal', line_range: lineRange(node) }); addRel({ type: 'IMPORTS', source: moduleId, target: depId }); return true; } if (lang === 'java') { // import com.foo.Bar; const scopedId = node.children.find(c => c.type === 'scoped_identifier'); if (scopedId) { const depName = getText(scopedId); const depId = `dep:${depName}`; addEntity({ id: depId, type: 'Dependency', name: depName, kind: 'import', visibility: 'internal', line_range: lineRange(node) }); addRel({ type: 'IMPORTS', source: moduleId, target: depId }); } return true; } if (lang === 'go') { // import "fmt" or import ( "fmt" "os" ) for (const child of node.namedChildren) { if (child.type === 'import_spec' || child.type === 'import_spec_list') { const specs = child.type === 'import_spec_list' ? child.namedChildren : [child]; for (const spec of specs) { const pathNode = spec.childForFieldName('path'); if (pathNode) { const depName = getText(pathNode).replace(/"/g, ''); const depId = `dep:${depName}`; addEntity({ id: depId, type: 'Dependency', name: depName, kind: 'import', visibility: 'internal', line_range: lineRange(spec) }); addRel({ type: 'IMPORTS', source: moduleId, target: depId }); } } } } return true; } } // CommonJS require() for JS/TS if (adapter.requireFunc && (node.type === 'lexical_declaration' || node.type === 'variable_declaration')) { for (const child of node.children) { if (child.type === 'variable_declarator') { const value = child.childForFieldName('value'); if (value && value.type === 'call_expression') { const func = value.childForFieldName('function'); if (func && getText(func) === adapter.requireFunc) { const args = value.childForFieldName('arguments'); if (args && args.namedChildCount > 0) { const arg = args.namedChildren[0]; if (arg.type === 'string') { const depName = getText(arg).replace(/['"]/g, ''); let resolvedDep = depName; if (depName.startsWith('.')) { resolvedDep = path.posix.normalize(path.posix.join(path.dirname(relPath), depName)); } const depId = `dep:${resolvedDep}`; addEntity({ id: depId, type: 'Dependency', name: resolvedDep, kind: 'require', visibility: 'internal', line_range: lineRange(node) }); addRel({ type: 'IMPORTS', source: moduleId, target: depId }); return true; } } } } } } } // Bash: source ./utils.sh -> IMPORTS if (lang === 'bash' && node.type === 'command') { const cmd = node.namedChildren[0]; if (cmd && getText(cmd) === 'source') { const arg = node.namedChildren[1]; if (arg) { const depName = getText(arg); const depId = `dep:${depName}`; addEntity({ id: depId, type: 'Dependency', name: depName, kind: 'import', visibility: 'internal', line_range: lineRange(node) }); addRel({ type: 'IMPORTS', source: moduleId, target: depId }); return true; } } } return false; } // --- Class Extraction --- function extractClass(node, parentId) { const nameNode = node.childForFieldName('name'); if (!nameNode) return null; const name = getText(nameNode); const id = `${parentId}:${name}`; const exported = isExported(node); let kind = 'class'; if (lang === 'go') kind = 'struct'; if (node.type === 'interface_declaration') kind = 'interface'; if (node.type === 'enum_declaration') kind = 'enum'; addEntity({ id, type: 'Class', name, kind, visibility: exported ? 'public' : 'internal', line_range: lineRange(node) }); addRel({ type: 'CONTAINS', source: parentId, target: id }); // Implements/extends if (adapter.heritage) { const heritage = node.children.filter(c => c.type === adapter.heritage); for (const h of heritage) { for (const child of h.namedChildren) { if (adapter.implementsClause && child.type === adapter.implementsClause) { for (const impl of child.namedChildren) { addRel({ type: 'IMPLEMENTS', source: id, target: getText(impl) }); } } else { addRel({ type: 'IMPLEMENTS', source: id, target: getText(child) }); } } } } // Java: superclass and super_interfaces if (lang === 'java') { const superclass = node.childForFieldName('superclass'); if (superclass) addRel({ type: 'IMPLEMENTS', source: id, target: getText(superclass).replace(/^extends\s+/, '') }); const superInterfaces = node.childForFieldName('interfaces'); if (superInterfaces) { for (const iface of superInterfaces.namedChildren) { addRel({ type: 'IMPLEMENTS', source: id, target: getText(iface) }); } } } // Python: bases if (lang === 'python') { const argList = node.childForFieldName('superclasses'); if (argList) { for (const base of argList.namedChildren) { addRel({ type: 'IMPLEMENTS', source: id, target: getText(base) }); } } } return id; } // --- Method Extraction --- function extractMethod(node, parentId) { const nameNode = node.childForFieldName('name'); if (!nameNode) return null; const name = getText(nameNode); const id = `${parentId}:${name}`; let visibility = 'public'; if (adapter.accessModifier) { for (const child of node.children) { if (child.type === adapter.accessModifier) { const modText = getText(child); if (modText.includes('private')) visibility = 'private'; else if (modText.includes('protected')) visibility = 'protected'; break; } } } // Python: _ prefix = private, __ = very private if (lang === 'python' && name.startsWith('_')) { visibility = name.startsWith('__') ? 'private' : 'protected'; } addEntity({ id, type: 'Function', name, kind: 'method', visibility, line_range: lineRange(node) }); addRel({ type: 'CONTAINS', source: parentId, target: id }); return id; } // --- Function Extraction --- function extractFunction(node, parentId) { const exported = isExported(node); if (adapter.functionNodes.includes(node.type)) { const nameNode = node.childForFieldName('name'); if (!nameNode) return null; const name = getText(nameNode); const id = `${parentId}:${name}`; let visibility = exported ? 'public' : 'internal'; if (lang === 'go' && name[0] === name[0].toUpperCase()) visibility = 'public'; if (lang === 'go' && name[0] === name[0].toLowerCase()) visibility = 'internal'; addEntity({ id, type: 'Function', name, kind: 'function', visibility, line_range: lineRange(node) }); addRel({ type: 'CONTAINS', source: parentId, target: id }); return id; } // JS/TS arrow functions if (adapter.arrowFuncParent && node.type === adapter.arrowFuncParent) { for (const child of node.children) { if (child.type === 'variable_declarator') { const value = child.childForFieldName('value'); if (value && adapter.arrowTypes.includes(value.type)) { const nameNode = child.childForFieldName('name'); if (!nameNode) continue; const name = getText(nameNode); const id = `${parentId}:${name}`; addEntity({ id, type: 'Function', name, kind: 'function', visibility: exported ? 'public' : 'internal', line_range: lineRange(node) }); addRel({ type: 'CONTAINS', source: parentId, target: id }); return id; } } } } return null; } // --- Class Field (arrow method vs property) --- function extractClassField(node, parentId) { if (!adapter.fieldNodes.includes(node.type)) return null; const nameNode = node.childForFieldName('name'); if (!nameNode) return null; const value = node.childForFieldName('value'); if (value && adapter.arrowTypes.includes(value.type)) { return extractMethod(node, parentId); } return null; // Skip non-function class properties } // --- Call Extraction --- function extractCalls(node, parentId) { if (!adapter.callExpr) return; if (node.type === adapter.callExpr) { let funcName; if (lang === 'java') { const nameNode = node.childForFieldName('name'); const obj = node.childForFieldName('object'); funcName = obj ? `${getText(obj)}.${getText(nameNode)}` : (nameNode ? getText(nameNode) : null); } else if (lang === 'python') { const funcNode = node.childForFieldName('function'); funcName = funcNode ? getText(funcNode) : null; } else if (lang === 'bash') { const funcNode = node.namedChildren[0]; funcName = funcNode ? getText(funcNode) : null; } else { const funcNode = node.childForFieldName(adapter.funcField); funcName = funcNode ? getText(funcNode) : null; } if (funcName) { if (adapter.requireFunc && funcName === adapter.requireFunc) return; addRel({ type: 'CALLS', source: parentId, target: funcName }); } } } // --- YAML/HCL Config Extraction --- function extractConfig(node) { if (lang === 'yaml') { addEntity({ id: moduleId, type: 'Config', name: relPath, kind: 'yaml-config', visibility: 'public', line_range: lineRange(node) }); // Extract top-level keys as config entries if (node.type === 'stream') { for (const doc of node.namedChildren) { if (doc.type === 'document') { const block = doc.namedChildren[0]; if (block && block.type === 'block_node') { const mapping = block.namedChildren[0]; if (mapping && mapping.type === 'block_mapping') { for (const pair of mapping.namedChildren) { if (pair.type === 'block_mapping_pair') { const key = pair.childForFieldName('key'); if (key) { const keyName = getText(key); const keyId = `${moduleId}:${keyName}`; addEntity({ id: keyId, type: 'Config', name: keyName, kind: 'yaml-key', visibility: 'public', line_range: lineRange(pair) }); addRel({ type: 'CONTAINS', source: moduleId, target: keyId }); } } } } } } } } return true; } if (lang === 'hcl') { addEntity({ id: moduleId, type: 'Config', name: relPath, kind: 'terraform', visibility: 'public', line_range: lineRange(node) }); // Extract top-level blocks (resource, data, variable, output, module, provider) for (const child of node.namedChildren) { if (child.type === 'block') { const blockType = child.namedChildren[0]; // e.g., "resource" const labels = child.namedChildren.filter(c => c.type === 'string_lit' || c.type === 'identifier'); const blockName = labels.map(l => getText(l).replace(/"/g, '')).join('.'); const fullName = blockType ? `${getText(blockType)}.${blockName}` : blockName; const blockId = `${moduleId}:${fullName}`; addEntity({ id: blockId, type: 'Config', name: fullName, kind: 'hcl-block', visibility: 'public', line_range: lineRange(child) }); addRel({ type: 'CONTAINS', source: moduleId, target: blockId }); } } return true; } return false; } // --- Main Walker --- function walk(node, parentId) { if (node.type === 'program' || node.type === 'source_file' || node.type === 'stream' || node.type === 'compilation_unit' || node.type === 'module') { // Config files (YAML/HCL) if (extractConfig(node)) return; // Code files addEntity({ id: moduleId, type: 'Module', name: relPath, kind: 'module', visibility: 'public', line_range: lineRange(node) }); for (const child of node.children) { walk(child, moduleId); } return; } // Export wrapper (JS/TS) if (adapter.exportWrapper && node.type === adapter.exportWrapper) { for (const child of node.children) { if (child.type !== 'export' && child.type !== 'default') { walk(child, parentId); } } return; } // Imports if (extractImports(node)) return; // Classes if (adapter.classNodes.includes(node.type)) { const classId = extractClass(node, parentId); if (classId) { const body = node.childForFieldName('body'); if (body) { for (const child of body.namedChildren || body.children) { walk(child, classId); } } // Go type_declaration: walk type_spec children if (lang === 'go') { for (const child of node.namedChildren) { if (child.type === 'type_spec') { const structBody = child.childForFieldName('type'); if (structBody) { for (const field of structBody.namedChildren) { walk(field, classId); } } } } } } return; } // Methods (inside class body) if (adapter.methodNodes.includes(node.type)) { const methodId = extractMethod(node, parentId); if (methodId) { const body = node.childForFieldName('body'); if (body) walkBody(body, methodId); } return; } // Class fields (arrow methods vs properties) if (adapter.fieldNodes.includes(node.type)) { const methodId = extractClassField(node, parentId); if (methodId) { const value = node.childForFieldName('value'); if (value) { const body = value.childForFieldName('body'); if (body) walkBody(body, methodId); } } return; } // Python: function_definition can be top-level or method (inside class) if (lang === 'python' && node.type === 'function_definition') { if (parentId && parentId.includes(':') && parentId !== moduleId) { // Inside a class → method const methodId = extractMethod(node, parentId); if (methodId) { const body = node.childForFieldName('body'); if (body) walkBody(body, methodId); } } else { // Top-level → function const funcId = extractFunction(node, parentId); if (funcId) { const body = node.childForFieldName('body'); if (body) walkBody(body, funcId); } } return; } // Go: method_declaration (receiver-based) if (lang === 'go' && node.type === 'method_declaration') { const nameNode = node.childForFieldName('name'); const receiver = node.childForFieldName('receiver'); if (nameNode) { const name = getText(nameNode); if (!name || name.length === 0) return; let receiverType = parentId; if (receiver) { const paramList = receiver.namedChildren; for (const p of paramList) { const typeNode = p.childForFieldName('type'); if (typeNode) { let raw = getText(typeNode); // Strip pointer (*) and generic brackets safely let typeName = raw.replace(/^\*+/, '').replace(/\[.*\]$/, '').trim(); if (typeName.length > 0) { receiverType = `${moduleId}:${typeName}`; } } } } const id = `${receiverType}:${name}`; const visibility = name.length > 0 && name[0] === name[0].toUpperCase() ? 'public' : 'internal'; addEntity({ id, type: 'Function', name, kind: 'method', visibility, line_range: lineRange(node) }); addRel({ type: 'CONTAINS', source: receiverType, target: id }); const body = node.childForFieldName('body'); if (body) walkBody(body, id); } return; } // Functions (top-level) if (adapter.functionNodes.includes(node.type) || (adapter.arrowFuncParent && node.type === adapter.arrowFuncParent)) { const funcId = extractFunction(node, parentId); if (funcId) { const body = node.type === adapter.arrowFuncParent ? node // For lexical_declaration, walk the whole thing : node.childForFieldName('body'); if (body) walkBody(body, funcId); return; } // Module-level variable (JS/TS only) if (parentId === moduleId && adapter.arrowFuncParent && node.type === adapter.arrowFuncParent) { // Not a function, might be a module-level const return; } } // Java: package_declaration if (lang === 'java' && node.type === 'package_declaration') return; // Top-level calls extractCalls(node, parentId); for (const child of node.children) { walk(child, parentId); } } // Walk function/method bodies for CALLS only function walkBody(node, parentId) { if (!node) return; extractCalls(node, parentId); for (const child of node.children) { walkBody(child, parentId); } } walk(tree.rootNode); return { file: filePath, language: lang, entities, relationships }; } // --- CLI --- if (require.main === module) { const filePath = process.argv[2]; const repoRoot = process.argv[3] || '/app/src'; if (!filePath) { console.error("Usage: node extract.js [repo-root]"); process.exit(1); } const result = extract(filePath, repoRoot); console.log(JSON.stringify(result, null, 2)); } module.exports = { extract };