Files
dev-intel-v2/extract-deep.js

228 lines
7.3 KiB
JavaScript
Raw Normal View History

/**
* extract-deep.js Deep extraction of specific config values, script parameters,
* and operational details that the high-level extractors miss.
*
* Targets the specific data points that Confluence reference docs contain
* but our generated docs don't surface.
*/
const fs = require('fs');
const path = require('path');
const IGNORE_DIRS = new Set([
'node_modules', '.git', 'venv', '.terraform', '__pycache__',
'_bmad', '_bmad-output', '.codex', '.claude', '.cursor', '.gemini',
'.kiro', '.agents', 'dist', 'build', 'coverage'
]);
function walk(dir, filter, results = []) {
try {
for (const entry of fs.readdirSync(dir, { withFileTypes: true })) {
if (IGNORE_DIRS.has(entry.name)) continue;
const full = path.join(dir, entry.name);
if (entry.isDirectory()) {
walk(full, filter, results);
} else if (filter(entry.name)) {
results.push(full);
}
}
} catch {}
return results;
}
/**
* Extract EKS/GKE/AKS addon versions from values.yaml files.
*/
function extractAddonVersions(srcRoot) {
const addons = [];
const files = walk(srcRoot, f => f === 'values.yaml');
for (const file of files) {
try {
const content = fs.readFileSync(file, 'utf8');
const relPath = path.relative(srcRoot, file);
// Match addon blocks with name + version
const lines = content.split('\n');
for (let i = 0; i < lines.length; i++) {
const nameMatch = lines[i].match(/^\s*-?\s*name:\s*["']?([^\s"']+)/);
if (nameMatch) {
// Look for version on next few lines
for (let j = i + 1; j < Math.min(i + 5, lines.length); j++) {
const verMatch = lines[j].match(/version:\s*["']?([^\s"']+)/);
if (verMatch) {
addons.push({
name: nameMatch[1],
version: verMatch[1],
file: relPath,
});
break;
}
}
}
}
} catch {}
}
return addons;
}
/**
* Extract Terraform resource configurations (RDS, VPC, etc.) with specific values.
*/
function extractTerraformConfigs(srcRoot) {
const configs = [];
const files = walk(srcRoot, f => f.endsWith('.tf'));
for (const file of files) {
try {
const content = fs.readFileSync(file, 'utf8');
const relPath = path.relative(srcRoot, file);
// Extract key config values
const patterns = [
{ key: 'backup_retention_period', regex: /backup_retention_period\s*=\s*(\d+)/ },
{ key: 'backup_window', regex: /backup_window\s*=\s*"([^"]+)"/ },
{ key: 'engine_version', regex: /engine_version\s*=\s*"([^"]+)"/ },
{ key: 'instance_class', regex: /instance_class\s*=\s*"([^"]+)"/ },
{ key: 'allocated_storage', regex: /allocated_storage\s*=\s*(\d+)/ },
{ key: 'multi_az', regex: /multi_az\s*=\s*(true|false)/ },
{ key: 'deletion_protection', regex: /deletion_protection\s*=\s*(true|false)/ },
{ key: 'node_count', regex: /(?:node_count|desired_size|min_size|max_size)\s*=\s*(\d+)/ },
{ key: 'machine_type', regex: /(?:machine_type|instance_type|vm_size)\s*=\s*"([^"]+)"/ },
];
for (const p of patterns) {
const match = content.match(p.regex);
if (match) {
configs.push({ key: p.key, value: match[1], file: relPath });
}
}
} catch {}
}
return configs;
}
/**
* Extract script parameters (timeouts, retries, poll intervals).
*/
function extractScriptParams(srcRoot) {
const params = [];
const files = walk(srcRoot, f => f.endsWith('.sh') || f.endsWith('.py'));
for (const file of files) {
try {
const content = fs.readFileSync(file, 'utf8');
const relPath = path.relative(srcRoot, file);
const lines = content.split('\n');
for (const line of lines) {
// Match variable assignments with numeric values and comments
const match = line.match(/^([A-Z_]+)\s*=\s*(\d+)\s*(?:#\s*(.+))?/);
if (match) {
params.push({
name: match[1],
value: match[2],
comment: match[3] || '',
file: relPath,
});
}
}
} catch {}
}
return params;
}
/**
* Extract Helm template specific values (product IDs, OU IDs, etc.).
*/
function extractHelmTemplateValues(srcRoot) {
const values = [];
const files = walk(srcRoot, f => f.endsWith('.yaml') || f.endsWith('.yml'));
for (const file of files) {
try {
const content = fs.readFileSync(file, 'utf8');
const relPath = path.relative(srcRoot, file);
// Extract specific identifiers
const patterns = [
{ key: 'product_id', regex: /(?:product[_-]?id|productId)\s*[:=]\s*"?([a-z]+-[a-z0-9]+)"?/i },
{ key: 'ou_id', regex: /(?:ou[_-]?id|organizationalUnit)\s*[:=]\s*"?(ou-[a-z0-9-]+)"?/i },
{ key: 'account_id', regex: /(?:account[_-]?id|accountId)\s*[:=]\s*"?(\d{12})"?/ },
{ key: 'host_project', regex: /(?:hostProject|host_project)\s*[:=]\s*"?([a-z][-a-z0-9]+)"?/ },
{ key: 'shared_vpc', regex: /sharedVpc[\s\S]*?enabled:\s*(true|false)/m },
];
for (const p of patterns) {
const match = content.match(p.regex);
if (match) {
values.push({ key: p.key, value: match[1], file: relPath });
}
}
} catch {}
}
// Deduplicate
const seen = new Set();
return values.filter(v => {
const k = `${v.key}:${v.value}`;
if (seen.has(k)) return false;
seen.add(k);
return true;
});
}
/**
* Extract state management services from Helm chart names.
*/
function extractStateServices(srcRoot) {
const stateCharts = ['elasticsearch', 'hazelcast', 'redis', 'milvus', 'cassandra', 'kafka', 'rabbitmq', 'postgresql', 'mysql', 'mongodb'];
const found = [];
const files = walk(srcRoot, f => f === 'Chart.yaml');
for (const file of files) {
try {
const content = fs.readFileSync(file, 'utf8');
const nameMatch = content.match(/name:\s*(.+)/);
if (nameMatch) {
const name = nameMatch[1].trim();
if (stateCharts.some(s => name.toLowerCase().includes(s))) {
const relPath = path.relative(srcRoot, file);
const versionMatch = content.match(/(?:appVersion|version):\s*(.+)/);
found.push({
name,
version: versionMatch ? versionMatch[1].trim() : null,
path: path.dirname(relPath),
});
}
}
} catch {}
}
return found;
}
/**
* Run all deep extractors.
*/
function extractDeep(srcRoot) {
console.log('Running deep extraction...');
const addons = extractAddonVersions(srcRoot);
console.log(` Addon versions: ${addons.length}`);
const tfConfigs = extractTerraformConfigs(srcRoot);
console.log(` TF configs: ${tfConfigs.length}`);
const scriptParams = extractScriptParams(srcRoot);
console.log(` Script params: ${scriptParams.length}`);
const helmValues = extractHelmTemplateValues(srcRoot);
console.log(` Helm template values: ${helmValues.length}`);
const stateServices = extractStateServices(srcRoot);
console.log(` State services: ${stateServices.length}`);
return { addons, tfConfigs, scriptParams, helmValues, stateServices };
}
module.exports = { extractDeep, extractAddonVersions, extractTerraformConfigs, extractScriptParams, extractHelmTemplateValues, extractStateServices };