feat: confluence benchmark, pattern extractor, agent KB, UX spec
- extract-patterns.js: mines layered arch, ArgoCD appsets, cloud regions, CIDR allocations, naming conventions, sync waves, tech stack from code - agent-kb.js: token-efficient JSON rendering of same doc tree - eval-confluence-ref-questions.json: 32 reference-only benchmark questions - wiggum-v2.sh: Ralph Wiggum loop targeting confluence baseline (77.8%) - docs/human-ux-spec.md: BMad UX designer spec for human doc structure - Eval results: V2 at 28.7% vs confluence 77.8% baseline - Hub/spoke ownership now correctly extracted (95% on that question) - Naming conventions, regions, CIDRs surfaced in system-architecture.md
This commit is contained in:
339
extract-terraform.js
Normal file
339
extract-terraform.js
Normal file
@@ -0,0 +1,339 @@
|
||||
const fs = require('fs');
|
||||
const path = require('path');
|
||||
|
||||
/**
|
||||
* Phase 9: Terraform Extraction
|
||||
* Multi-pass regex parser for .tf files.
|
||||
* Extracts: modules, resources, variables, outputs, data sources, locals, providers.
|
||||
* Detects cross-references: var.X, module.X, local.X, data.X, resource refs.
|
||||
*/
|
||||
|
||||
// Block types we care about
|
||||
const BLOCK_TYPES = ['resource', 'data', 'module', 'variable', 'output', 'provider', 'locals', 'terraform'];
|
||||
|
||||
/**
|
||||
* Find matching closing brace for a block starting at `startLine`.
|
||||
* Returns the line index of the closing brace.
|
||||
*/
|
||||
function findBlockEnd(lines, startLine) {
|
||||
let depth = 0;
|
||||
for (let i = startLine; i < lines.length; i++) {
|
||||
const line = lines[i];
|
||||
// Count braces outside of strings (simplified — good enough for HCL)
|
||||
for (let j = 0; j < line.length; j++) {
|
||||
if (line[j] === '{') depth++;
|
||||
else if (line[j] === '}') {
|
||||
depth--;
|
||||
if (depth === 0) return i;
|
||||
}
|
||||
}
|
||||
}
|
||||
return lines.length - 1; // fallback
|
||||
}
|
||||
|
||||
/**
|
||||
* Extract the body text of a block (between opening { and closing }).
|
||||
*/
|
||||
function extractBlockBody(lines, startLine, endLine) {
|
||||
const bodyLines = lines.slice(startLine, endLine + 1);
|
||||
return bodyLines.join('\n');
|
||||
}
|
||||
|
||||
/**
|
||||
* Parse a variable block for type and default.
|
||||
*/
|
||||
function parseVariableBlock(body) {
|
||||
const result = { type: null, default: null, description: null };
|
||||
|
||||
const typeMatch = body.match(/^\s*type\s*=\s*(.+)/m);
|
||||
if (typeMatch) result.type = typeMatch[1].trim();
|
||||
|
||||
const defaultMatch = body.match(/^\s*default\s*=\s*(.+)/m);
|
||||
if (defaultMatch) result.default = defaultMatch[1].trim().replace(/^"(.*)"$/, '$1');
|
||||
|
||||
const descMatch = body.match(/^\s*description\s*=\s*"([^"]+)"/m);
|
||||
if (descMatch) result.description = descMatch[1];
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* Parse a module block for source and version.
|
||||
*/
|
||||
function parseModuleBlock(body) {
|
||||
const result = { source: null, version: null };
|
||||
|
||||
const sourceMatch = body.match(/^\s*source\s*=\s*"([^"]+)"/m);
|
||||
if (sourceMatch) result.source = sourceMatch[1];
|
||||
|
||||
const versionMatch = body.match(/^\s*version\s*=\s*"([^"]+)"/m);
|
||||
if (versionMatch) result.version = versionMatch[1];
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* Parse an output block for value and description.
|
||||
*/
|
||||
function parseOutputBlock(body) {
|
||||
const result = { value: null, description: null };
|
||||
|
||||
const valueMatch = body.match(/^\s*value\s*=\s*(.+)/m);
|
||||
if (valueMatch) result.value = valueMatch[1].trim();
|
||||
|
||||
const descMatch = body.match(/^\s*description\s*=\s*"([^"]+)"/m);
|
||||
if (descMatch) result.description = descMatch[1];
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* Extract all cross-references from a block body.
|
||||
* Returns array of { type, name } objects.
|
||||
*/
|
||||
function extractReferences(body) {
|
||||
const refs = [];
|
||||
const seen = new Set();
|
||||
|
||||
const patterns = [
|
||||
{ regex: /var\.([a-zA-Z0-9_-]+)/g, type: 'variable' },
|
||||
{ regex: /local\.([a-zA-Z0-9_-]+)/g, type: 'local' },
|
||||
{ regex: /module\.([a-zA-Z0-9_-]+)/g, type: 'module' },
|
||||
{ regex: /data\.([a-zA-Z0-9_-]+)\.([a-zA-Z0-9_-]+)/g, type: 'data' },
|
||||
// Resource references: aws_instance.web.id → resource ref
|
||||
{ regex: /(?<!data\.)(?<!module\.)(?<!var\.)(?<!local\.)([a-z][a-z0-9]*_[a-z][a-z0-9_]*)\.([a-zA-Z0-9_-]+)\./g, type: 'resource' },
|
||||
];
|
||||
|
||||
for (const { regex, type } of patterns) {
|
||||
let m;
|
||||
while ((m = regex.exec(body)) !== null) {
|
||||
const key = type === 'data' ? `data.${m[1]}.${m[2]}` :
|
||||
type === 'resource' ? `${m[1]}.${m[2]}` :
|
||||
`${type}.${m[1]}`;
|
||||
if (!seen.has(key)) {
|
||||
seen.add(key);
|
||||
refs.push({ type, ref: key });
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return refs;
|
||||
}
|
||||
|
||||
/**
|
||||
* Extract all Terraform entities and relationships from a .tf file.
|
||||
* @param {string} filePath - Absolute path to the .tf file.
|
||||
* @param {string} repoRoot - Base path of the repository.
|
||||
* @returns {Object} { file, language: 'hcl', entities: [...], relationships: [...] }
|
||||
*/
|
||||
function extractTerraform(filePath, repoRoot) {
|
||||
const sourceCode = fs.readFileSync(filePath, 'utf8');
|
||||
const relFilePath = path.relative(repoRoot, filePath);
|
||||
const lines = sourceCode.split('\n');
|
||||
const entities = [];
|
||||
const relationships = [];
|
||||
|
||||
// File-level entity
|
||||
const fileId = relFilePath;
|
||||
entities.push({
|
||||
id: fileId,
|
||||
type: 'Config',
|
||||
name: relFilePath,
|
||||
kind: 'terraform',
|
||||
visibility: 'public',
|
||||
line_range: [1, lines.length]
|
||||
});
|
||||
|
||||
// Pass 1: Extract block declarations
|
||||
// Patterns:
|
||||
// resource "type" "name" {
|
||||
// data "type" "name" {
|
||||
// module "name" {
|
||||
// variable "name" {
|
||||
// output "name" {
|
||||
// provider "name" {
|
||||
// locals {
|
||||
// terraform {
|
||||
const blockRegex = /^(resource|data|module|variable|output|provider|locals|terraform)\s*(?:"([^"]+)")?(?:\s+"([^"]+)")?\s*\{/;
|
||||
|
||||
for (let i = 0; i < lines.length; i++) {
|
||||
const trimmed = lines[i].trimStart();
|
||||
// Skip comments
|
||||
if (trimmed.startsWith('#') || trimmed.startsWith('//') || trimmed.startsWith('/*')) continue;
|
||||
|
||||
const match = trimmed.match(blockRegex);
|
||||
if (!match) continue;
|
||||
|
||||
const blockType = match[1];
|
||||
const label1 = match[2] || null;
|
||||
const label2 = match[3] || null;
|
||||
const blockEnd = findBlockEnd(lines, i);
|
||||
const body = extractBlockBody(lines, i, blockEnd);
|
||||
|
||||
let entityId, entityName, entityKind, extra = {};
|
||||
|
||||
switch (blockType) {
|
||||
case 'resource': {
|
||||
entityName = label2 ? `${label1}.${label2}` : label1;
|
||||
entityId = `${fileId}:resource.${entityName}`;
|
||||
entityKind = 'terraform-resource';
|
||||
extra = { resourceType: label1, resourceName: label2 };
|
||||
break;
|
||||
}
|
||||
case 'data': {
|
||||
entityName = label2 ? `${label1}.${label2}` : label1;
|
||||
entityId = `${fileId}:data.${entityName}`;
|
||||
entityKind = 'terraform-data';
|
||||
extra = { dataType: label1, dataName: label2 };
|
||||
break;
|
||||
}
|
||||
case 'module': {
|
||||
entityName = label1;
|
||||
entityId = `${fileId}:module.${entityName}`;
|
||||
entityKind = 'terraform-module';
|
||||
const modInfo = parseModuleBlock(body);
|
||||
extra = { source: modInfo.source, version: modInfo.version };
|
||||
break;
|
||||
}
|
||||
case 'variable': {
|
||||
entityName = label1;
|
||||
entityId = `${fileId}:variable.${entityName}`;
|
||||
entityKind = 'terraform-variable';
|
||||
const varInfo = parseVariableBlock(body);
|
||||
extra = { varType: varInfo.type, default: varInfo.default, description: varInfo.description };
|
||||
break;
|
||||
}
|
||||
case 'output': {
|
||||
entityName = label1;
|
||||
entityId = `${fileId}:output.${entityName}`;
|
||||
entityKind = 'terraform-output';
|
||||
const outInfo = parseOutputBlock(body);
|
||||
extra = { value: outInfo.value, description: outInfo.description };
|
||||
break;
|
||||
}
|
||||
case 'provider': {
|
||||
entityName = label1;
|
||||
entityId = `${fileId}:provider.${entityName}`;
|
||||
entityKind = 'terraform-provider';
|
||||
break;
|
||||
}
|
||||
case 'locals': {
|
||||
entityName = 'locals';
|
||||
entityId = `${fileId}:locals.${i + 1}`;
|
||||
entityKind = 'terraform-locals';
|
||||
// Extract local names from body
|
||||
const localNames = [];
|
||||
const localRegex = /^\s*([a-zA-Z_][a-zA-Z0-9_]*)\s*=/gm;
|
||||
let lm;
|
||||
while ((lm = localRegex.exec(body)) !== null) {
|
||||
localNames.push(lm[1]);
|
||||
}
|
||||
extra = { localNames };
|
||||
break;
|
||||
}
|
||||
case 'terraform': {
|
||||
entityName = 'terraform';
|
||||
entityId = `${fileId}:terraform`;
|
||||
entityKind = 'terraform-config';
|
||||
break;
|
||||
}
|
||||
default:
|
||||
continue;
|
||||
}
|
||||
|
||||
entities.push({
|
||||
id: entityId,
|
||||
type: 'Config',
|
||||
name: entityName,
|
||||
kind: entityKind,
|
||||
visibility: 'public',
|
||||
line_range: [i + 1, blockEnd + 1],
|
||||
...extra
|
||||
});
|
||||
|
||||
relationships.push({
|
||||
type: 'CONTAINS',
|
||||
source: fileId,
|
||||
target: entityId
|
||||
});
|
||||
|
||||
// Pass 2: Extract cross-references from block body
|
||||
const refs = extractReferences(body);
|
||||
for (const ref of refs) {
|
||||
relationships.push({
|
||||
type: 'DEPENDS_ON',
|
||||
source: entityId,
|
||||
target: `${ref.ref}`, // Will be resolved to full IDs during graph merge
|
||||
_refType: ref.type,
|
||||
_unresolved: true
|
||||
});
|
||||
}
|
||||
|
||||
// Skip to end of block
|
||||
i = blockEnd;
|
||||
}
|
||||
|
||||
return { file: filePath, language: 'hcl', entities, relationships };
|
||||
}
|
||||
|
||||
/**
|
||||
* Resolve unresolved DEPENDS_ON references within a set of extraction results.
|
||||
* Matches var.X → variable.X, module.X → module.X, etc. within the same directory tree.
|
||||
*/
|
||||
function resolveReferences(results) {
|
||||
// Build lookup: shortRef → full entityId
|
||||
const lookup = new Map();
|
||||
for (const result of results) {
|
||||
for (const entity of result.entities) {
|
||||
if (entity.kind === 'terraform-variable') {
|
||||
lookup.set(`variable.${entity.name}`, entity.id);
|
||||
} else if (entity.kind === 'terraform-module') {
|
||||
lookup.set(`module.${entity.name}`, entity.id);
|
||||
} else if (entity.kind === 'terraform-resource') {
|
||||
lookup.set(`resource.${entity.name}`, entity.id);
|
||||
// Also register as type.name for resource refs
|
||||
if (entity.resourceType && entity.resourceName) {
|
||||
lookup.set(`${entity.resourceType}.${entity.resourceName}`, entity.id);
|
||||
}
|
||||
} else if (entity.kind === 'terraform-data') {
|
||||
lookup.set(`data.${entity.name}`, entity.id);
|
||||
} else if (entity.kind === 'terraform-locals') {
|
||||
if (entity.localNames) {
|
||||
for (const ln of entity.localNames) {
|
||||
lookup.set(`local.${ln}`, entity.id);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Resolve
|
||||
for (const result of results) {
|
||||
for (const rel of result.relationships) {
|
||||
if (rel._unresolved) {
|
||||
const resolved = lookup.get(rel.target) || lookup.get(rel._refType + '.' + rel.target);
|
||||
if (resolved) {
|
||||
rel.target = resolved;
|
||||
}
|
||||
delete rel._unresolved;
|
||||
delete rel._refType;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return results;
|
||||
}
|
||||
|
||||
// CLI
|
||||
if (require.main === module) {
|
||||
const filePath = process.argv[2];
|
||||
const repoRoot = process.argv[3] || process.cwd();
|
||||
if (!filePath) {
|
||||
console.error('Usage: node extract-terraform.js <file.tf> [repoRoot]');
|
||||
process.exit(1);
|
||||
}
|
||||
const result = extractTerraform(filePath, repoRoot);
|
||||
console.log(JSON.stringify(result, null, 2));
|
||||
}
|
||||
|
||||
module.exports = { extractTerraform, resolveReferences };
|
||||
Reference in New Issue
Block a user