/** * extract-deep.js — Deep extraction of specific config values, script parameters, * and operational details that the high-level extractors miss. * * Targets the specific data points that Confluence reference docs contain * but our generated docs don't surface. */ const fs = require('fs'); const path = require('path'); const IGNORE_DIRS = new Set([ 'node_modules', '.git', 'venv', '.terraform', '__pycache__', '_bmad', '_bmad-output', '.codex', '.claude', '.cursor', '.gemini', '.kiro', '.agents', 'dist', 'build', 'coverage' ]); function walk(dir, filter, results = []) { try { for (const entry of fs.readdirSync(dir, { withFileTypes: true })) { if (IGNORE_DIRS.has(entry.name)) continue; const full = path.join(dir, entry.name); if (entry.isDirectory()) { walk(full, filter, results); } else if (filter(entry.name)) { results.push(full); } } } catch {} return results; } /** * Extract EKS/GKE/AKS addon versions from values.yaml files. */ function extractAddonVersions(srcRoot) { const addons = []; const files = walk(srcRoot, f => f === 'values.yaml'); for (const file of files) { try { const content = fs.readFileSync(file, 'utf8'); const relPath = path.relative(srcRoot, file); // Match addon blocks with name + version const lines = content.split('\n'); for (let i = 0; i < lines.length; i++) { const nameMatch = lines[i].match(/^\s*-?\s*name:\s*["']?([^\s"']+)/); if (nameMatch) { // Look for version on next few lines for (let j = i + 1; j < Math.min(i + 5, lines.length); j++) { const verMatch = lines[j].match(/version:\s*["']?([^\s"']+)/); if (verMatch) { addons.push({ name: nameMatch[1], version: verMatch[1], file: relPath, }); break; } } } } } catch {} } return addons; } /** * Extract Terraform resource configurations (RDS, VPC, etc.) with specific values. */ function extractTerraformConfigs(srcRoot) { const configs = []; const files = walk(srcRoot, f => f.endsWith('.tf')); for (const file of files) { try { const content = fs.readFileSync(file, 'utf8'); const relPath = path.relative(srcRoot, file); // Extract key config values const patterns = [ { key: 'backup_retention_period', regex: /backup_retention_period\s*=\s*(\d+)/ }, { key: 'backup_window', regex: /backup_window\s*=\s*"([^"]+)"/ }, { key: 'engine_version', regex: /engine_version\s*=\s*"([^"]+)"/ }, { key: 'instance_class', regex: /instance_class\s*=\s*"([^"]+)"/ }, { key: 'allocated_storage', regex: /allocated_storage\s*=\s*(\d+)/ }, { key: 'multi_az', regex: /multi_az\s*=\s*(true|false)/ }, { key: 'deletion_protection', regex: /deletion_protection\s*=\s*(true|false)/ }, { key: 'node_count', regex: /(?:node_count|desired_size|min_size|max_size)\s*=\s*(\d+)/ }, { key: 'machine_type', regex: /(?:machine_type|instance_type|vm_size)\s*=\s*"([^"]+)"/ }, ]; for (const p of patterns) { const match = content.match(p.regex); if (match) { configs.push({ key: p.key, value: match[1], file: relPath }); } } } catch {} } return configs; } /** * Extract script parameters (timeouts, retries, poll intervals). */ function extractScriptParams(srcRoot) { const params = []; const files = walk(srcRoot, f => f.endsWith('.sh') || f.endsWith('.py')); for (const file of files) { try { const content = fs.readFileSync(file, 'utf8'); const relPath = path.relative(srcRoot, file); const lines = content.split('\n'); for (const line of lines) { // Match variable assignments with numeric values and comments const match = line.match(/^([A-Z_]+)\s*=\s*(\d+)\s*(?:#\s*(.+))?/); if (match) { params.push({ name: match[1], value: match[2], comment: match[3] || '', file: relPath, }); } } } catch {} } return params; } /** * Extract Helm template specific values (product IDs, OU IDs, etc.). */ function extractHelmTemplateValues(srcRoot) { const values = []; const files = walk(srcRoot, f => f.endsWith('.yaml') || f.endsWith('.yml')); for (const file of files) { try { const content = fs.readFileSync(file, 'utf8'); const relPath = path.relative(srcRoot, file); // Extract specific identifiers const patterns = [ { key: 'product_id', regex: /(?:product[_-]?id|productId)\s*[:=]\s*"?([a-z]+-[a-z0-9]+)"?/i }, { key: 'ou_id', regex: /(?:ou[_-]?id|organizationalUnit)\s*[:=]\s*"?(ou-[a-z0-9-]+)"?/i }, { key: 'account_id', regex: /(?:account[_-]?id|accountId)\s*[:=]\s*"?(\d{12})"?/ }, { key: 'host_project', regex: /(?:hostProject|host_project)\s*[:=]\s*"?([a-z][-a-z0-9]+)"?/ }, { key: 'shared_vpc', regex: /sharedVpc[\s\S]*?enabled:\s*(true|false)/m }, ]; for (const p of patterns) { const match = content.match(p.regex); if (match) { values.push({ key: p.key, value: match[1], file: relPath }); } } } catch {} } // Deduplicate const seen = new Set(); return values.filter(v => { const k = `${v.key}:${v.value}`; if (seen.has(k)) return false; seen.add(k); return true; }); } /** * Extract state management services from Helm chart names. */ function extractStateServices(srcRoot) { const stateCharts = ['elasticsearch', 'hazelcast', 'redis', 'milvus', 'cassandra', 'kafka', 'rabbitmq', 'postgresql', 'mysql', 'mongodb']; const found = []; const files = walk(srcRoot, f => f === 'Chart.yaml'); for (const file of files) { try { const content = fs.readFileSync(file, 'utf8'); const nameMatch = content.match(/name:\s*(.+)/); if (nameMatch) { const name = nameMatch[1].trim(); if (stateCharts.some(s => name.toLowerCase().includes(s))) { const relPath = path.relative(srcRoot, file); const versionMatch = content.match(/(?:appVersion|version):\s*(.+)/); found.push({ name, version: versionMatch ? versionMatch[1].trim() : null, path: path.dirname(relPath), }); } } } catch {} } return found; } /** * Run all deep extractors. */ function extractDeep(srcRoot) { console.log('Running deep extraction...'); const addons = extractAddonVersions(srcRoot); console.log(` Addon versions: ${addons.length}`); const tfConfigs = extractTerraformConfigs(srcRoot); console.log(` TF configs: ${tfConfigs.length}`); const scriptParams = extractScriptParams(srcRoot); console.log(` Script params: ${scriptParams.length}`); const helmValues = extractHelmTemplateValues(srcRoot); console.log(` Helm template values: ${helmValues.length}`); const stateServices = extractStateServices(srcRoot); console.log(` State services: ${stateServices.length}`); return { addons, tfConfigs, scriptParams, helmValues, stateServices }; } module.exports = { extractDeep, extractAddonVersions, extractTerraformConfigs, extractScriptParams, extractHelmTemplateValues, extractStateServices };