feat: confluence benchmark, pattern extractor, agent KB, UX spec

- extract-patterns.js: mines layered arch, ArgoCD appsets, cloud regions,
  CIDR allocations, naming conventions, sync waves, tech stack from code
- agent-kb.js: token-efficient JSON rendering of same doc tree
- eval-confluence-ref-questions.json: 32 reference-only benchmark questions
- wiggum-v2.sh: Ralph Wiggum loop targeting confluence baseline (77.8%)
- docs/human-ux-spec.md: BMad UX designer spec for human doc structure
- Eval results: V2 at 28.7% vs confluence 77.8% baseline
- Hub/spoke ownership now correctly extracted (95% on that question)
- Naming conventions, regions, CIDRs surfaced in system-architecture.md
This commit is contained in:
Jarvis Prime
2026-03-10 14:20:35 +00:00
parent 049609a358
commit 0265ec7a60
844 changed files with 2129910 additions and 30 deletions

339
extract-terraform.js Normal file
View File

@@ -0,0 +1,339 @@
const fs = require('fs');
const path = require('path');
/**
* Phase 9: Terraform Extraction
* Multi-pass regex parser for .tf files.
* Extracts: modules, resources, variables, outputs, data sources, locals, providers.
* Detects cross-references: var.X, module.X, local.X, data.X, resource refs.
*/
// Block types we care about
const BLOCK_TYPES = ['resource', 'data', 'module', 'variable', 'output', 'provider', 'locals', 'terraform'];
/**
* Find matching closing brace for a block starting at `startLine`.
* Returns the line index of the closing brace.
*/
function findBlockEnd(lines, startLine) {
let depth = 0;
for (let i = startLine; i < lines.length; i++) {
const line = lines[i];
// Count braces outside of strings (simplified — good enough for HCL)
for (let j = 0; j < line.length; j++) {
if (line[j] === '{') depth++;
else if (line[j] === '}') {
depth--;
if (depth === 0) return i;
}
}
}
return lines.length - 1; // fallback
}
/**
* Extract the body text of a block (between opening { and closing }).
*/
function extractBlockBody(lines, startLine, endLine) {
const bodyLines = lines.slice(startLine, endLine + 1);
return bodyLines.join('\n');
}
/**
* Parse a variable block for type and default.
*/
function parseVariableBlock(body) {
const result = { type: null, default: null, description: null };
const typeMatch = body.match(/^\s*type\s*=\s*(.+)/m);
if (typeMatch) result.type = typeMatch[1].trim();
const defaultMatch = body.match(/^\s*default\s*=\s*(.+)/m);
if (defaultMatch) result.default = defaultMatch[1].trim().replace(/^"(.*)"$/, '$1');
const descMatch = body.match(/^\s*description\s*=\s*"([^"]+)"/m);
if (descMatch) result.description = descMatch[1];
return result;
}
/**
* Parse a module block for source and version.
*/
function parseModuleBlock(body) {
const result = { source: null, version: null };
const sourceMatch = body.match(/^\s*source\s*=\s*"([^"]+)"/m);
if (sourceMatch) result.source = sourceMatch[1];
const versionMatch = body.match(/^\s*version\s*=\s*"([^"]+)"/m);
if (versionMatch) result.version = versionMatch[1];
return result;
}
/**
* Parse an output block for value and description.
*/
function parseOutputBlock(body) {
const result = { value: null, description: null };
const valueMatch = body.match(/^\s*value\s*=\s*(.+)/m);
if (valueMatch) result.value = valueMatch[1].trim();
const descMatch = body.match(/^\s*description\s*=\s*"([^"]+)"/m);
if (descMatch) result.description = descMatch[1];
return result;
}
/**
* Extract all cross-references from a block body.
* Returns array of { type, name } objects.
*/
function extractReferences(body) {
const refs = [];
const seen = new Set();
const patterns = [
{ regex: /var\.([a-zA-Z0-9_-]+)/g, type: 'variable' },
{ regex: /local\.([a-zA-Z0-9_-]+)/g, type: 'local' },
{ regex: /module\.([a-zA-Z0-9_-]+)/g, type: 'module' },
{ regex: /data\.([a-zA-Z0-9_-]+)\.([a-zA-Z0-9_-]+)/g, type: 'data' },
// Resource references: aws_instance.web.id → resource ref
{ regex: /(?<!data\.)(?<!module\.)(?<!var\.)(?<!local\.)([a-z][a-z0-9]*_[a-z][a-z0-9_]*)\.([a-zA-Z0-9_-]+)\./g, type: 'resource' },
];
for (const { regex, type } of patterns) {
let m;
while ((m = regex.exec(body)) !== null) {
const key = type === 'data' ? `data.${m[1]}.${m[2]}` :
type === 'resource' ? `${m[1]}.${m[2]}` :
`${type}.${m[1]}`;
if (!seen.has(key)) {
seen.add(key);
refs.push({ type, ref: key });
}
}
}
return refs;
}
/**
* Extract all Terraform entities and relationships from a .tf file.
* @param {string} filePath - Absolute path to the .tf file.
* @param {string} repoRoot - Base path of the repository.
* @returns {Object} { file, language: 'hcl', entities: [...], relationships: [...] }
*/
function extractTerraform(filePath, repoRoot) {
const sourceCode = fs.readFileSync(filePath, 'utf8');
const relFilePath = path.relative(repoRoot, filePath);
const lines = sourceCode.split('\n');
const entities = [];
const relationships = [];
// File-level entity
const fileId = relFilePath;
entities.push({
id: fileId,
type: 'Config',
name: relFilePath,
kind: 'terraform',
visibility: 'public',
line_range: [1, lines.length]
});
// Pass 1: Extract block declarations
// Patterns:
// resource "type" "name" {
// data "type" "name" {
// module "name" {
// variable "name" {
// output "name" {
// provider "name" {
// locals {
// terraform {
const blockRegex = /^(resource|data|module|variable|output|provider|locals|terraform)\s*(?:"([^"]+)")?(?:\s+"([^"]+)")?\s*\{/;
for (let i = 0; i < lines.length; i++) {
const trimmed = lines[i].trimStart();
// Skip comments
if (trimmed.startsWith('#') || trimmed.startsWith('//') || trimmed.startsWith('/*')) continue;
const match = trimmed.match(blockRegex);
if (!match) continue;
const blockType = match[1];
const label1 = match[2] || null;
const label2 = match[3] || null;
const blockEnd = findBlockEnd(lines, i);
const body = extractBlockBody(lines, i, blockEnd);
let entityId, entityName, entityKind, extra = {};
switch (blockType) {
case 'resource': {
entityName = label2 ? `${label1}.${label2}` : label1;
entityId = `${fileId}:resource.${entityName}`;
entityKind = 'terraform-resource';
extra = { resourceType: label1, resourceName: label2 };
break;
}
case 'data': {
entityName = label2 ? `${label1}.${label2}` : label1;
entityId = `${fileId}:data.${entityName}`;
entityKind = 'terraform-data';
extra = { dataType: label1, dataName: label2 };
break;
}
case 'module': {
entityName = label1;
entityId = `${fileId}:module.${entityName}`;
entityKind = 'terraform-module';
const modInfo = parseModuleBlock(body);
extra = { source: modInfo.source, version: modInfo.version };
break;
}
case 'variable': {
entityName = label1;
entityId = `${fileId}:variable.${entityName}`;
entityKind = 'terraform-variable';
const varInfo = parseVariableBlock(body);
extra = { varType: varInfo.type, default: varInfo.default, description: varInfo.description };
break;
}
case 'output': {
entityName = label1;
entityId = `${fileId}:output.${entityName}`;
entityKind = 'terraform-output';
const outInfo = parseOutputBlock(body);
extra = { value: outInfo.value, description: outInfo.description };
break;
}
case 'provider': {
entityName = label1;
entityId = `${fileId}:provider.${entityName}`;
entityKind = 'terraform-provider';
break;
}
case 'locals': {
entityName = 'locals';
entityId = `${fileId}:locals.${i + 1}`;
entityKind = 'terraform-locals';
// Extract local names from body
const localNames = [];
const localRegex = /^\s*([a-zA-Z_][a-zA-Z0-9_]*)\s*=/gm;
let lm;
while ((lm = localRegex.exec(body)) !== null) {
localNames.push(lm[1]);
}
extra = { localNames };
break;
}
case 'terraform': {
entityName = 'terraform';
entityId = `${fileId}:terraform`;
entityKind = 'terraform-config';
break;
}
default:
continue;
}
entities.push({
id: entityId,
type: 'Config',
name: entityName,
kind: entityKind,
visibility: 'public',
line_range: [i + 1, blockEnd + 1],
...extra
});
relationships.push({
type: 'CONTAINS',
source: fileId,
target: entityId
});
// Pass 2: Extract cross-references from block body
const refs = extractReferences(body);
for (const ref of refs) {
relationships.push({
type: 'DEPENDS_ON',
source: entityId,
target: `${ref.ref}`, // Will be resolved to full IDs during graph merge
_refType: ref.type,
_unresolved: true
});
}
// Skip to end of block
i = blockEnd;
}
return { file: filePath, language: 'hcl', entities, relationships };
}
/**
* Resolve unresolved DEPENDS_ON references within a set of extraction results.
* Matches var.X → variable.X, module.X → module.X, etc. within the same directory tree.
*/
function resolveReferences(results) {
// Build lookup: shortRef → full entityId
const lookup = new Map();
for (const result of results) {
for (const entity of result.entities) {
if (entity.kind === 'terraform-variable') {
lookup.set(`variable.${entity.name}`, entity.id);
} else if (entity.kind === 'terraform-module') {
lookup.set(`module.${entity.name}`, entity.id);
} else if (entity.kind === 'terraform-resource') {
lookup.set(`resource.${entity.name}`, entity.id);
// Also register as type.name for resource refs
if (entity.resourceType && entity.resourceName) {
lookup.set(`${entity.resourceType}.${entity.resourceName}`, entity.id);
}
} else if (entity.kind === 'terraform-data') {
lookup.set(`data.${entity.name}`, entity.id);
} else if (entity.kind === 'terraform-locals') {
if (entity.localNames) {
for (const ln of entity.localNames) {
lookup.set(`local.${ln}`, entity.id);
}
}
}
}
}
// Resolve
for (const result of results) {
for (const rel of result.relationships) {
if (rel._unresolved) {
const resolved = lookup.get(rel.target) || lookup.get(rel._refType + '.' + rel.target);
if (resolved) {
rel.target = resolved;
}
delete rel._unresolved;
delete rel._refType;
}
}
}
return results;
}
// CLI
if (require.main === module) {
const filePath = process.argv[2];
const repoRoot = process.argv[3] || process.cwd();
if (!filePath) {
console.error('Usage: node extract-terraform.js <file.tf> [repoRoot]');
process.exit(1);
}
const result = extractTerraform(filePath, repoRoot);
console.log(JSON.stringify(result, null, 2));
}
module.exports = { extractTerraform, resolveReferences };