/** * extract-patterns.js — Mine architectural patterns from code artifacts. * * Extracts: * - Layered architecture from repo/dir naming conventions * - Hub/spoke model from ArgoCD ApplicationSet configs * - Cloud regions from terraform configs + values.yaml * - CIDR allocations from terraform variables * - Naming conventions from scripts + terraform * - Sync-wave ordering from Helm template annotations * - Release/deployment patterns from CI configs + scripts * - Tech stack from Helm chart images + dependencies */ const fs = require('fs'); const path = require('path'); const LAYER_PATTERNS = [ { pattern: /^app[-_]/, layer: 'Application', order: 1 }, { pattern: /^compute[-_]/, layer: 'Compute', order: 2 }, { pattern: /^network[-_]/, layer: 'Network', order: 3 }, { pattern: /^account[-_]/, layer: 'Account', order: 4 }, { pattern: /^control[-_]/, layer: 'Control Plane', order: 5 }, { pattern: /^runtime/, layer: 'Runtime (shared)', order: 0 }, { pattern: /^ipam[-_]/, layer: 'IPAM', order: 3.5 }, { pattern: /^skills/, layer: 'Skills/Tooling', order: 6 }, { pattern: /^docs/, layer: 'Documentation', order: 7 }, ]; /** * Infer layered architecture from top-level directory names. */ function extractLayers(srcRoot) { const dirs = fs.readdirSync(srcRoot, { withFileTypes: true }) .filter(d => d.isDirectory() && !d.name.startsWith('.')) .map(d => d.name); const layers = {}; for (const dir of dirs) { for (const lp of LAYER_PATTERNS) { if (lp.pattern.test(dir)) { if (!layers[lp.layer]) layers[lp.layer] = { order: lp.order, repos: [] }; layers[lp.layer].repos.push(dir); break; } } } return Object.entries(layers) .sort((a, b) => a[1].order - b[1].order) .map(([name, info]) => ({ layer: name, repos: info.repos, order: info.order })); } /** * Extract ArgoCD ApplicationSet configs to infer hub/spoke ownership. */ function extractArgoCDAppSets(srcRoot) { const appsets = []; const walkDir = (dir) => { try { for (const entry of fs.readdirSync(dir, { withFileTypes: true })) { const full = path.join(dir, entry.name); if (entry.isDirectory()) { if (['node_modules', '.git', 'venv', '.terraform', '__pycache__'].includes(entry.name)) continue; walkDir(full); } else if (entry.name.endsWith('-appset.yaml') || entry.name.endsWith('-appset.yml')) { try { const content = fs.readFileSync(full, 'utf8'); const name = content.match(/name:\s*['"]?([^\s'"]+)/)?.[1] || entry.name; const namespace = content.match(/namespace:\s*['"]?([^\s'"]+)/)?.[1] || ''; const repoURL = content.match(/repoURL:\s*['"]?([^\s'"]+)/)?.[1] || ''; const targetRevision = content.match(/targetRevision:\s*['"]?([^\s'"]+)/)?.[1] || ''; const destServer = content.match(/server:\s*['"]?([^\s'"]+)/)?.[1] || ''; const relPath = path.relative(srcRoot, full); // Determine if hub or spoke based on path const isHub = relPath.includes('hub') || relPath.includes('control-plane'); appsets.push({ name, namespace, repoURL, targetRevision, destServer, file: relPath, location: isHub ? 'hub' : 'spoke', repoName: repoURL.match(/\/([^/]+?)(?:\.git)?$/)?.[1] || repoURL, }); } catch {} } } } catch {} }; walkDir(srcRoot); return appsets; } /** * Extract cloud regions from terraform configs and values.yaml files. */ function extractCloudRegions(srcRoot) { const regions = { aws: new Set(), azure: new Set(), gcp: new Set() }; const walkDir = (dir) => { try { for (const entry of fs.readdirSync(dir, { withFileTypes: true })) { const full = path.join(dir, entry.name); if (entry.isDirectory()) { if (['node_modules', '.git', 'venv', '.terraform', '__pycache__'].includes(entry.name)) continue; walkDir(full); } else if (entry.name.endsWith('.tf') || entry.name === 'values.yaml' || entry.name === 'variables.tf') { try { const content = fs.readFileSync(full, 'utf8'); // AWS regions const awsMatches = content.match(/us-east-[12]|us-west-[12]|eu-west-[123]|eu-central-[12]|ap-southeast-[12]|ap-northeast-[123]/g); if (awsMatches) awsMatches.forEach(r => regions.aws.add(r)); // Azure regions const azureMatches = content.match(/(?:centralus|eastus[2]?|westus[23]?|westeurope|northeurope|southeastasia|australiaeast)/g); if (azureMatches) azureMatches.forEach(r => regions.azure.add(r)); // GCP regions const gcpMatches = content.match(/us-central1|us-east[14]|us-west[14]|europe-west[1-6]|asia-east[12]|asia-southeast[12]/g); if (gcpMatches) gcpMatches.forEach(r => regions.gcp.add(r)); } catch {} } } } catch {} }; walkDir(srcRoot); return { aws: [...regions.aws].sort(), azure: [...regions.azure].sort(), gcp: [...regions.gcp].sort(), }; } /** * Extract CIDR allocations from terraform variables and configs. */ function extractCIDRAllocations(srcRoot) { const cidrs = []; const walkDir = (dir) => { try { for (const entry of fs.readdirSync(dir, { withFileTypes: true })) { const full = path.join(dir, entry.name); if (entry.isDirectory()) { if (['node_modules', '.git', 'venv', '.terraform', '__pycache__'].includes(entry.name)) continue; walkDir(full); } else if (entry.name.endsWith('.tf') || entry.name.endsWith('.tfvars')) { try { const content = fs.readFileSync(full, 'utf8'); const relPath = path.relative(srcRoot, full); const lines = content.split('\n'); for (let i = 0; i < lines.length; i++) { const line = lines[i]; const cidrMatch = line.match(/(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}\/\d{1,2})/); if (cidrMatch) { const cidr = cidrMatch[1]; let context = line.includes('#') ? line.substring(line.indexOf('#') + 1).trim() : ''; if (!context) { for (let j = Math.max(0, i - 3); j < i; j++) { if (lines[j].trim().startsWith('#')) { context = lines[j].replace(/^#\s*/, '').trim(); break; } } } if (!context) context = line.trim(); cidrs.push({ cidr, context, file: relPath }); } } } catch {} } } } catch {} }; walkDir(srcRoot); const unique = {}; for (const c of cidrs) { if (!unique[c.cidr]) unique[c.cidr] = []; unique[c.cidr].push(c); } return Object.entries(unique).map(([cidr, refs]) => { refs.sort((a, b) => { const aIsCode = a.context.includes('=') || a.context.includes('"'); const bIsCode = b.context.includes('=') || b.context.includes('"'); if (!aIsCode && bIsCode) return -1; if (aIsCode && !bIsCode) return 1; return 0; }); return { cidr, refs }; }); } /** * Extract naming conventions from scripts and terraform. */ function extractNamingConventions(srcRoot) { const conventions = []; const walkDir = (dir) => { try { for (const entry of fs.readdirSync(dir, { withFileTypes: true })) { const full = path.join(dir, entry.name); if (entry.isDirectory()) { if (['node_modules', '.git', 'venv', '.terraform', '__pycache__'].includes(entry.name)) continue; walkDir(full); } else if (entry.name.endsWith('.sh') || entry.name.endsWith('.py') || entry.name.endsWith('.tf')) { try { const content = fs.readFileSync(full, 'utf8'); const relPath = path.relative(srcRoot, full); // Only match lines that explicitly describe naming conventions with template patterns const lines = content.split('\n'); for (const line of lines) { const trimmed = line.trim(); // Must contain a template-like pattern AND a convention keyword if (trimmed.match(/convention|naming|format/i) && trimmed.match(/\{(phase|region|cloud|index|env)\}/i)) { conventions.push({ pattern: trimmed.substring(0, 200), file: relPath }); } // Also match explicit naming examples like "aws-{phase}-{region-code}-{index}-vpc" if (trimmed.match(/(?:aws|azr|gcp)-\{.*\}-\{.*\}/)) { conventions.push({ pattern: trimmed.substring(0, 200), file: relPath }); } } } catch {} } } } catch {} }; walkDir(srcRoot); // Deduplicate const seen = new Set(); return conventions.filter(c => { const key = c.pattern; if (seen.has(key)) return false; seen.add(key); return true; }); } /** * Extract sync-wave ordering from Helm templates. */ function extractSyncWaves(srcRoot) { const waves = {}; const walkDir = (dir) => { try { for (const entry of fs.readdirSync(dir, { withFileTypes: true })) { const full = path.join(dir, entry.name); if (entry.isDirectory()) { if (['node_modules', '.git', 'venv', '.terraform', '__pycache__'].includes(entry.name)) continue; walkDir(full); } else if (entry.name.endsWith('.yaml') || entry.name.endsWith('.yml')) { try { const content = fs.readFileSync(full, 'utf8'); const waveMatch = content.match(/sync-wave:\s*["']?(-?\d+)["']?/); if (waveMatch) { const wave = parseInt(waveMatch[1]); const kind = content.match(/kind:\s*(\w+)/)?.[1] || 'Unknown'; const name = content.match(/name:\s*['"]?([^\s'"]+)/)?.[1] || entry.name; const relPath = path.relative(srcRoot, full); if (!waves[wave]) waves[wave] = []; waves[wave].push({ kind, name, file: relPath }); } } catch {} } } } catch {} }; walkDir(srcRoot); return Object.entries(waves) .sort((a, b) => Number(a[0]) - Number(b[0])) .map(([wave, resources]) => ({ wave: Number(wave), resources })); } /** * Extract tech stack from Helm chart images and package.json. */ function extractTechStack(srcRoot) { const images = new Set(); const packages = {}; const walkDir = (dir) => { try { for (const entry of fs.readdirSync(dir, { withFileTypes: true })) { const full = path.join(dir, entry.name); if (entry.isDirectory()) { if (['node_modules', '.git', 'venv', '.terraform', '__pycache__'].includes(entry.name)) continue; walkDir(full); } else if (entry.name === 'values.yaml') { try { const content = fs.readFileSync(full, 'utf8'); const imgMatches = content.match(/image:\s*['"]?([^\s'"]+)/g); if (imgMatches) imgMatches.forEach(m => { const img = m.replace(/image:\s*['"]?/, '').replace(/['"]$/, ''); if (img && !img.includes('{{') && !img.includes('__helm')) images.add(img); }); const repoMatches = content.match(/repository:\s*['"]?([^\s'"]+)/g); if (repoMatches) repoMatches.forEach(m => { const repo = m.replace(/repository:\s*['"]?/, '').replace(/['"]$/, ''); if (repo && !repo.includes('{{') && !repo.includes('__helm') && repo.includes('/')) images.add(repo); }); } catch {} } else if (entry.name === 'package.json') { try { const pkg = JSON.parse(fs.readFileSync(full, 'utf8')); const relPath = path.relative(srcRoot, full); if (pkg.dependencies) { for (const [name, ver] of Object.entries(pkg.dependencies)) { if (!packages[name]) packages[name] = []; packages[name].push({ version: ver, file: relPath }); } } } catch {} } } } catch {} }; walkDir(srcRoot); return { containerImages: [...images].sort(), npmPackages: Object.entries(packages).sort((a, b) => b[1].length - a[1].length).slice(0, 30) .map(([name, refs]) => ({ name, count: refs.length, versions: [...new Set(refs.map(r => r.version))] })), }; } /** * Run all pattern extractors and return a unified result. */ function extractAllPatterns(srcRoot) { console.log('Extracting architectural patterns...'); const layers = extractLayers(srcRoot); console.log(` Layers: ${layers.length}`); const appsets = extractArgoCDAppSets(srcRoot); console.log(` ApplicationSets: ${appsets.length}`); const regions = extractCloudRegions(srcRoot); console.log(` Regions: AWS=${regions.aws.length} Azure=${regions.azure.length} GCP=${regions.gcp.length}`); const cidrs = extractCIDRAllocations(srcRoot); console.log(` CIDR allocations: ${cidrs.length}`); const naming = extractNamingConventions(srcRoot); console.log(` Naming conventions: ${naming.length}`); const syncWaves = extractSyncWaves(srcRoot); console.log(` Sync waves: ${syncWaves.length} distinct waves`); const techStack = extractTechStack(srcRoot); console.log(` Container images: ${techStack.containerImages.length}, NPM packages: ${techStack.npmPackages.length}`); return { layers, appsets, regions, cidrs, naming, syncWaves, techStack }; } module.exports = { extractAllPatterns, extractLayers, extractArgoCDAppSets, extractCloudRegions, extractCIDRAllocations, extractNamingConventions, extractSyncWaves, extractTechStack };