Phase 1: extract.js — tree-sitter AST parser (TS/JS/Python/Go/Java/Bash) + config parsers (YAML/HCL) Phase 2: graph.js — in-memory directed graph store with build/query/diff CLI Phase 3: namespace.js — cross-repo namespace registry with 3-tier resolution Phase 4: semantic-diff.js — categorized diffs with impact scoring (0-100) Phase 5: pipeline.js — batch extraction, incremental diffing, benchmarking Benchmark: 4,325 files, 21,646 nodes, 133,979 edges in 67s (15ms/file) BMad SPA reviews: all phases GO
103 lines
2.8 KiB
JavaScript
103 lines
2.8 KiB
JavaScript
const fs = require('fs');
|
|
const path = require('path');
|
|
const jsYaml = require('js-yaml');
|
|
|
|
function extractYaml(filePath, repoRoot) {
|
|
const sourceCode = fs.readFileSync(filePath, 'utf8');
|
|
const relPath = path.relative(repoRoot, filePath);
|
|
const moduleId = relPath;
|
|
const entities = [];
|
|
const relationships = [];
|
|
|
|
entities.push({
|
|
id: moduleId,
|
|
type: 'Config',
|
|
name: relPath,
|
|
kind: 'yaml-config',
|
|
visibility: 'public',
|
|
line_range: [1, sourceCode.split('\n').length]
|
|
});
|
|
|
|
try {
|
|
// Attempt to load multiple documents
|
|
const docs = jsYaml.loadAll(sourceCode);
|
|
let lineNum = 1;
|
|
for (const doc of docs) {
|
|
if (doc && typeof doc === 'object') {
|
|
for (const key of Object.keys(doc)) {
|
|
const keyId = `${moduleId}:${key}`;
|
|
entities.push({
|
|
id: keyId,
|
|
type: 'Config',
|
|
name: key,
|
|
kind: 'yaml-key',
|
|
visibility: 'public',
|
|
line_range: [lineNum, lineNum] // Approximation without AST
|
|
});
|
|
relationships.push({
|
|
type: 'CONTAINS',
|
|
source: moduleId,
|
|
target: keyId
|
|
});
|
|
}
|
|
}
|
|
}
|
|
} catch (e) {
|
|
// Log warning, return base module
|
|
console.error(`YAML parse error in ${relPath}: ${e.message}`);
|
|
}
|
|
|
|
return { file: filePath, language: 'yaml', entities, relationships };
|
|
}
|
|
|
|
function extractHcl(filePath, repoRoot) {
|
|
const sourceCode = fs.readFileSync(filePath, 'utf8');
|
|
const relPath = path.relative(repoRoot, filePath);
|
|
const moduleId = relPath;
|
|
const entities = [];
|
|
const relationships = [];
|
|
const lines = sourceCode.split('\n');
|
|
|
|
entities.push({
|
|
id: moduleId,
|
|
type: 'Config',
|
|
name: relPath,
|
|
kind: 'terraform',
|
|
visibility: 'public',
|
|
line_range: [1, lines.length]
|
|
});
|
|
|
|
// Regex for top-level HCL blocks (e.g., resource "aws_s3_bucket" "my_bucket" {)
|
|
const blockRegex = /^(resource|data|module|variable|output|provider)\s+"([^"]+)"(?:\s+"([^"]+)")?\s*\{/;
|
|
|
|
for (let i = 0; i < lines.length; i++) {
|
|
const match = lines[i].match(blockRegex);
|
|
if (match) {
|
|
const type = match[1];
|
|
const name1 = match[2];
|
|
const name2 = match[3];
|
|
const fullName = name2 ? `${type}.${name1}.${name2}` : `${type}.${name1}`;
|
|
const blockId = `${moduleId}:${fullName}`;
|
|
|
|
entities.push({
|
|
id: blockId,
|
|
type: 'Config',
|
|
name: fullName,
|
|
kind: 'hcl-block',
|
|
visibility: 'public',
|
|
line_range: [i + 1, i + 1] // Approximation
|
|
});
|
|
|
|
relationships.push({
|
|
type: 'CONTAINS',
|
|
source: moduleId,
|
|
target: blockId
|
|
});
|
|
}
|
|
}
|
|
|
|
return { file: filePath, language: 'hcl', entities, relationships };
|
|
}
|
|
|
|
module.exports = { extractYaml, extractHcl };
|