- extract-patterns.js: mines layered arch, ArgoCD appsets, cloud regions, CIDR allocations, naming conventions, sync waves, tech stack from code - agent-kb.js: token-efficient JSON rendering of same doc tree - eval-confluence-ref-questions.json: 32 reference-only benchmark questions - wiggum-v2.sh: Ralph Wiggum loop targeting confluence baseline (77.8%) - docs/human-ux-spec.md: BMad UX designer spec for human doc structure - Eval results: V2 at 28.7% vs confluence 77.8% baseline - Hub/spoke ownership now correctly extracted (95% on that question) - Naming conventions, regions, CIDRs surfaced in system-architecture.md
340 lines
10 KiB
JavaScript
340 lines
10 KiB
JavaScript
const fs = require('fs');
|
|
const path = require('path');
|
|
|
|
/**
|
|
* Phase 9: Terraform Extraction
|
|
* Multi-pass regex parser for .tf files.
|
|
* Extracts: modules, resources, variables, outputs, data sources, locals, providers.
|
|
* Detects cross-references: var.X, module.X, local.X, data.X, resource refs.
|
|
*/
|
|
|
|
// Block types we care about
|
|
const BLOCK_TYPES = ['resource', 'data', 'module', 'variable', 'output', 'provider', 'locals', 'terraform'];
|
|
|
|
/**
|
|
* Find matching closing brace for a block starting at `startLine`.
|
|
* Returns the line index of the closing brace.
|
|
*/
|
|
function findBlockEnd(lines, startLine) {
|
|
let depth = 0;
|
|
for (let i = startLine; i < lines.length; i++) {
|
|
const line = lines[i];
|
|
// Count braces outside of strings (simplified — good enough for HCL)
|
|
for (let j = 0; j < line.length; j++) {
|
|
if (line[j] === '{') depth++;
|
|
else if (line[j] === '}') {
|
|
depth--;
|
|
if (depth === 0) return i;
|
|
}
|
|
}
|
|
}
|
|
return lines.length - 1; // fallback
|
|
}
|
|
|
|
/**
|
|
* Extract the body text of a block (between opening { and closing }).
|
|
*/
|
|
function extractBlockBody(lines, startLine, endLine) {
|
|
const bodyLines = lines.slice(startLine, endLine + 1);
|
|
return bodyLines.join('\n');
|
|
}
|
|
|
|
/**
|
|
* Parse a variable block for type and default.
|
|
*/
|
|
function parseVariableBlock(body) {
|
|
const result = { type: null, default: null, description: null };
|
|
|
|
const typeMatch = body.match(/^\s*type\s*=\s*(.+)/m);
|
|
if (typeMatch) result.type = typeMatch[1].trim();
|
|
|
|
const defaultMatch = body.match(/^\s*default\s*=\s*(.+)/m);
|
|
if (defaultMatch) result.default = defaultMatch[1].trim().replace(/^"(.*)"$/, '$1');
|
|
|
|
const descMatch = body.match(/^\s*description\s*=\s*"([^"]+)"/m);
|
|
if (descMatch) result.description = descMatch[1];
|
|
|
|
return result;
|
|
}
|
|
|
|
/**
|
|
* Parse a module block for source and version.
|
|
*/
|
|
function parseModuleBlock(body) {
|
|
const result = { source: null, version: null };
|
|
|
|
const sourceMatch = body.match(/^\s*source\s*=\s*"([^"]+)"/m);
|
|
if (sourceMatch) result.source = sourceMatch[1];
|
|
|
|
const versionMatch = body.match(/^\s*version\s*=\s*"([^"]+)"/m);
|
|
if (versionMatch) result.version = versionMatch[1];
|
|
|
|
return result;
|
|
}
|
|
|
|
/**
|
|
* Parse an output block for value and description.
|
|
*/
|
|
function parseOutputBlock(body) {
|
|
const result = { value: null, description: null };
|
|
|
|
const valueMatch = body.match(/^\s*value\s*=\s*(.+)/m);
|
|
if (valueMatch) result.value = valueMatch[1].trim();
|
|
|
|
const descMatch = body.match(/^\s*description\s*=\s*"([^"]+)"/m);
|
|
if (descMatch) result.description = descMatch[1];
|
|
|
|
return result;
|
|
}
|
|
|
|
/**
|
|
* Extract all cross-references from a block body.
|
|
* Returns array of { type, name } objects.
|
|
*/
|
|
function extractReferences(body) {
|
|
const refs = [];
|
|
const seen = new Set();
|
|
|
|
const patterns = [
|
|
{ regex: /var\.([a-zA-Z0-9_-]+)/g, type: 'variable' },
|
|
{ regex: /local\.([a-zA-Z0-9_-]+)/g, type: 'local' },
|
|
{ regex: /module\.([a-zA-Z0-9_-]+)/g, type: 'module' },
|
|
{ regex: /data\.([a-zA-Z0-9_-]+)\.([a-zA-Z0-9_-]+)/g, type: 'data' },
|
|
// Resource references: aws_instance.web.id → resource ref
|
|
{ regex: /(?<!data\.)(?<!module\.)(?<!var\.)(?<!local\.)([a-z][a-z0-9]*_[a-z][a-z0-9_]*)\.([a-zA-Z0-9_-]+)\./g, type: 'resource' },
|
|
];
|
|
|
|
for (const { regex, type } of patterns) {
|
|
let m;
|
|
while ((m = regex.exec(body)) !== null) {
|
|
const key = type === 'data' ? `data.${m[1]}.${m[2]}` :
|
|
type === 'resource' ? `${m[1]}.${m[2]}` :
|
|
`${type}.${m[1]}`;
|
|
if (!seen.has(key)) {
|
|
seen.add(key);
|
|
refs.push({ type, ref: key });
|
|
}
|
|
}
|
|
}
|
|
|
|
return refs;
|
|
}
|
|
|
|
/**
|
|
* Extract all Terraform entities and relationships from a .tf file.
|
|
* @param {string} filePath - Absolute path to the .tf file.
|
|
* @param {string} repoRoot - Base path of the repository.
|
|
* @returns {Object} { file, language: 'hcl', entities: [...], relationships: [...] }
|
|
*/
|
|
function extractTerraform(filePath, repoRoot) {
|
|
const sourceCode = fs.readFileSync(filePath, 'utf8');
|
|
const relFilePath = path.relative(repoRoot, filePath);
|
|
const lines = sourceCode.split('\n');
|
|
const entities = [];
|
|
const relationships = [];
|
|
|
|
// File-level entity
|
|
const fileId = relFilePath;
|
|
entities.push({
|
|
id: fileId,
|
|
type: 'Config',
|
|
name: relFilePath,
|
|
kind: 'terraform',
|
|
visibility: 'public',
|
|
line_range: [1, lines.length]
|
|
});
|
|
|
|
// Pass 1: Extract block declarations
|
|
// Patterns:
|
|
// resource "type" "name" {
|
|
// data "type" "name" {
|
|
// module "name" {
|
|
// variable "name" {
|
|
// output "name" {
|
|
// provider "name" {
|
|
// locals {
|
|
// terraform {
|
|
const blockRegex = /^(resource|data|module|variable|output|provider|locals|terraform)\s*(?:"([^"]+)")?(?:\s+"([^"]+)")?\s*\{/;
|
|
|
|
for (let i = 0; i < lines.length; i++) {
|
|
const trimmed = lines[i].trimStart();
|
|
// Skip comments
|
|
if (trimmed.startsWith('#') || trimmed.startsWith('//') || trimmed.startsWith('/*')) continue;
|
|
|
|
const match = trimmed.match(blockRegex);
|
|
if (!match) continue;
|
|
|
|
const blockType = match[1];
|
|
const label1 = match[2] || null;
|
|
const label2 = match[3] || null;
|
|
const blockEnd = findBlockEnd(lines, i);
|
|
const body = extractBlockBody(lines, i, blockEnd);
|
|
|
|
let entityId, entityName, entityKind, extra = {};
|
|
|
|
switch (blockType) {
|
|
case 'resource': {
|
|
entityName = label2 ? `${label1}.${label2}` : label1;
|
|
entityId = `${fileId}:resource.${entityName}`;
|
|
entityKind = 'terraform-resource';
|
|
extra = { resourceType: label1, resourceName: label2 };
|
|
break;
|
|
}
|
|
case 'data': {
|
|
entityName = label2 ? `${label1}.${label2}` : label1;
|
|
entityId = `${fileId}:data.${entityName}`;
|
|
entityKind = 'terraform-data';
|
|
extra = { dataType: label1, dataName: label2 };
|
|
break;
|
|
}
|
|
case 'module': {
|
|
entityName = label1;
|
|
entityId = `${fileId}:module.${entityName}`;
|
|
entityKind = 'terraform-module';
|
|
const modInfo = parseModuleBlock(body);
|
|
extra = { source: modInfo.source, version: modInfo.version };
|
|
break;
|
|
}
|
|
case 'variable': {
|
|
entityName = label1;
|
|
entityId = `${fileId}:variable.${entityName}`;
|
|
entityKind = 'terraform-variable';
|
|
const varInfo = parseVariableBlock(body);
|
|
extra = { varType: varInfo.type, default: varInfo.default, description: varInfo.description };
|
|
break;
|
|
}
|
|
case 'output': {
|
|
entityName = label1;
|
|
entityId = `${fileId}:output.${entityName}`;
|
|
entityKind = 'terraform-output';
|
|
const outInfo = parseOutputBlock(body);
|
|
extra = { value: outInfo.value, description: outInfo.description };
|
|
break;
|
|
}
|
|
case 'provider': {
|
|
entityName = label1;
|
|
entityId = `${fileId}:provider.${entityName}`;
|
|
entityKind = 'terraform-provider';
|
|
break;
|
|
}
|
|
case 'locals': {
|
|
entityName = 'locals';
|
|
entityId = `${fileId}:locals.${i + 1}`;
|
|
entityKind = 'terraform-locals';
|
|
// Extract local names from body
|
|
const localNames = [];
|
|
const localRegex = /^\s*([a-zA-Z_][a-zA-Z0-9_]*)\s*=/gm;
|
|
let lm;
|
|
while ((lm = localRegex.exec(body)) !== null) {
|
|
localNames.push(lm[1]);
|
|
}
|
|
extra = { localNames };
|
|
break;
|
|
}
|
|
case 'terraform': {
|
|
entityName = 'terraform';
|
|
entityId = `${fileId}:terraform`;
|
|
entityKind = 'terraform-config';
|
|
break;
|
|
}
|
|
default:
|
|
continue;
|
|
}
|
|
|
|
entities.push({
|
|
id: entityId,
|
|
type: 'Config',
|
|
name: entityName,
|
|
kind: entityKind,
|
|
visibility: 'public',
|
|
line_range: [i + 1, blockEnd + 1],
|
|
...extra
|
|
});
|
|
|
|
relationships.push({
|
|
type: 'CONTAINS',
|
|
source: fileId,
|
|
target: entityId
|
|
});
|
|
|
|
// Pass 2: Extract cross-references from block body
|
|
const refs = extractReferences(body);
|
|
for (const ref of refs) {
|
|
relationships.push({
|
|
type: 'DEPENDS_ON',
|
|
source: entityId,
|
|
target: `${ref.ref}`, // Will be resolved to full IDs during graph merge
|
|
_refType: ref.type,
|
|
_unresolved: true
|
|
});
|
|
}
|
|
|
|
// Skip to end of block
|
|
i = blockEnd;
|
|
}
|
|
|
|
return { file: filePath, language: 'hcl', entities, relationships };
|
|
}
|
|
|
|
/**
|
|
* Resolve unresolved DEPENDS_ON references within a set of extraction results.
|
|
* Matches var.X → variable.X, module.X → module.X, etc. within the same directory tree.
|
|
*/
|
|
function resolveReferences(results) {
|
|
// Build lookup: shortRef → full entityId
|
|
const lookup = new Map();
|
|
for (const result of results) {
|
|
for (const entity of result.entities) {
|
|
if (entity.kind === 'terraform-variable') {
|
|
lookup.set(`variable.${entity.name}`, entity.id);
|
|
} else if (entity.kind === 'terraform-module') {
|
|
lookup.set(`module.${entity.name}`, entity.id);
|
|
} else if (entity.kind === 'terraform-resource') {
|
|
lookup.set(`resource.${entity.name}`, entity.id);
|
|
// Also register as type.name for resource refs
|
|
if (entity.resourceType && entity.resourceName) {
|
|
lookup.set(`${entity.resourceType}.${entity.resourceName}`, entity.id);
|
|
}
|
|
} else if (entity.kind === 'terraform-data') {
|
|
lookup.set(`data.${entity.name}`, entity.id);
|
|
} else if (entity.kind === 'terraform-locals') {
|
|
if (entity.localNames) {
|
|
for (const ln of entity.localNames) {
|
|
lookup.set(`local.${ln}`, entity.id);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// Resolve
|
|
for (const result of results) {
|
|
for (const rel of result.relationships) {
|
|
if (rel._unresolved) {
|
|
const resolved = lookup.get(rel.target) || lookup.get(rel._refType + '.' + rel.target);
|
|
if (resolved) {
|
|
rel.target = resolved;
|
|
}
|
|
delete rel._unresolved;
|
|
delete rel._refType;
|
|
}
|
|
}
|
|
}
|
|
|
|
return results;
|
|
}
|
|
|
|
// CLI
|
|
if (require.main === module) {
|
|
const filePath = process.argv[2];
|
|
const repoRoot = process.argv[3] || process.cwd();
|
|
if (!filePath) {
|
|
console.error('Usage: node extract-terraform.js <file.tf> [repoRoot]');
|
|
process.exit(1);
|
|
}
|
|
const result = extractTerraform(filePath, repoRoot);
|
|
console.log(JSON.stringify(result, null, 2));
|
|
}
|
|
|
|
module.exports = { extractTerraform, resolveReferences };
|