334 lines
13 KiB
JavaScript
334 lines
13 KiB
JavaScript
|
|
/**
|
||
|
|
* extract-patterns.js — Mine architectural patterns from code artifacts.
|
||
|
|
*
|
||
|
|
* Extracts:
|
||
|
|
* - Layered architecture from repo/dir naming conventions
|
||
|
|
* - Hub/spoke model from ArgoCD ApplicationSet configs
|
||
|
|
* - Cloud regions from terraform configs + values.yaml
|
||
|
|
* - CIDR allocations from terraform variables
|
||
|
|
* - Naming conventions from scripts + terraform
|
||
|
|
* - Sync-wave ordering from Helm template annotations
|
||
|
|
* - Release/deployment patterns from CI configs + scripts
|
||
|
|
* - Tech stack from Helm chart images + dependencies
|
||
|
|
*/
|
||
|
|
|
||
|
|
const fs = require('fs');
|
||
|
|
const path = require('path');
|
||
|
|
|
||
|
|
const LAYER_PATTERNS = [
|
||
|
|
{ pattern: /^app[-_]/, layer: 'Application', order: 1 },
|
||
|
|
{ pattern: /^compute[-_]/, layer: 'Compute', order: 2 },
|
||
|
|
{ pattern: /^network[-_]/, layer: 'Network', order: 3 },
|
||
|
|
{ pattern: /^account[-_]/, layer: 'Account', order: 4 },
|
||
|
|
{ pattern: /^control[-_]/, layer: 'Control Plane', order: 5 },
|
||
|
|
{ pattern: /^runtime/, layer: 'Runtime (shared)', order: 0 },
|
||
|
|
{ pattern: /^ipam[-_]/, layer: 'IPAM', order: 3.5 },
|
||
|
|
{ pattern: /^skills/, layer: 'Skills/Tooling', order: 6 },
|
||
|
|
{ pattern: /^docs/, layer: 'Documentation', order: 7 },
|
||
|
|
];
|
||
|
|
|
||
|
|
/**
|
||
|
|
* Infer layered architecture from top-level directory names.
|
||
|
|
*/
|
||
|
|
function extractLayers(srcRoot) {
|
||
|
|
const dirs = fs.readdirSync(srcRoot, { withFileTypes: true })
|
||
|
|
.filter(d => d.isDirectory() && !d.name.startsWith('.'))
|
||
|
|
.map(d => d.name);
|
||
|
|
|
||
|
|
const layers = {};
|
||
|
|
for (const dir of dirs) {
|
||
|
|
for (const lp of LAYER_PATTERNS) {
|
||
|
|
if (lp.pattern.test(dir)) {
|
||
|
|
if (!layers[lp.layer]) layers[lp.layer] = { order: lp.order, repos: [] };
|
||
|
|
layers[lp.layer].repos.push(dir);
|
||
|
|
break;
|
||
|
|
}
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
return Object.entries(layers)
|
||
|
|
.sort((a, b) => a[1].order - b[1].order)
|
||
|
|
.map(([name, info]) => ({ layer: name, repos: info.repos, order: info.order }));
|
||
|
|
}
|
||
|
|
|
||
|
|
/**
|
||
|
|
* Extract ArgoCD ApplicationSet configs to infer hub/spoke ownership.
|
||
|
|
*/
|
||
|
|
function extractArgoCDAppSets(srcRoot) {
|
||
|
|
const appsets = [];
|
||
|
|
const walkDir = (dir) => {
|
||
|
|
try {
|
||
|
|
for (const entry of fs.readdirSync(dir, { withFileTypes: true })) {
|
||
|
|
const full = path.join(dir, entry.name);
|
||
|
|
if (entry.isDirectory()) {
|
||
|
|
if (['node_modules', '.git', 'venv', '.terraform', '__pycache__'].includes(entry.name)) continue;
|
||
|
|
walkDir(full);
|
||
|
|
} else if (entry.name.endsWith('-appset.yaml') || entry.name.endsWith('-appset.yml')) {
|
||
|
|
try {
|
||
|
|
const content = fs.readFileSync(full, 'utf8');
|
||
|
|
const name = content.match(/name:\s*['"]?([^\s'"]+)/)?.[1] || entry.name;
|
||
|
|
const namespace = content.match(/namespace:\s*['"]?([^\s'"]+)/)?.[1] || '';
|
||
|
|
const repoURL = content.match(/repoURL:\s*['"]?([^\s'"]+)/)?.[1] || '';
|
||
|
|
const targetRevision = content.match(/targetRevision:\s*['"]?([^\s'"]+)/)?.[1] || '';
|
||
|
|
const destServer = content.match(/server:\s*['"]?([^\s'"]+)/)?.[1] || '';
|
||
|
|
const relPath = path.relative(srcRoot, full);
|
||
|
|
|
||
|
|
// Determine if hub or spoke based on path
|
||
|
|
const isHub = relPath.includes('hub') || relPath.includes('control-plane');
|
||
|
|
|
||
|
|
appsets.push({
|
||
|
|
name, namespace, repoURL, targetRevision, destServer,
|
||
|
|
file: relPath,
|
||
|
|
location: isHub ? 'hub' : 'spoke',
|
||
|
|
repoName: repoURL.match(/\/([^/]+?)(?:\.git)?$/)?.[1] || repoURL,
|
||
|
|
});
|
||
|
|
} catch {}
|
||
|
|
}
|
||
|
|
}
|
||
|
|
} catch {}
|
||
|
|
};
|
||
|
|
walkDir(srcRoot);
|
||
|
|
return appsets;
|
||
|
|
}
|
||
|
|
|
||
|
|
/**
|
||
|
|
* Extract cloud regions from terraform configs and values.yaml files.
|
||
|
|
*/
|
||
|
|
function extractCloudRegions(srcRoot) {
|
||
|
|
const regions = { aws: new Set(), azure: new Set(), gcp: new Set() };
|
||
|
|
|
||
|
|
const walkDir = (dir) => {
|
||
|
|
try {
|
||
|
|
for (const entry of fs.readdirSync(dir, { withFileTypes: true })) {
|
||
|
|
const full = path.join(dir, entry.name);
|
||
|
|
if (entry.isDirectory()) {
|
||
|
|
if (['node_modules', '.git', 'venv', '.terraform', '__pycache__'].includes(entry.name)) continue;
|
||
|
|
walkDir(full);
|
||
|
|
} else if (entry.name.endsWith('.tf') || entry.name === 'values.yaml' || entry.name === 'variables.tf') {
|
||
|
|
try {
|
||
|
|
const content = fs.readFileSync(full, 'utf8');
|
||
|
|
// AWS regions
|
||
|
|
const awsMatches = content.match(/us-east-[12]|us-west-[12]|eu-west-[123]|eu-central-[12]|ap-southeast-[12]|ap-northeast-[123]/g);
|
||
|
|
if (awsMatches) awsMatches.forEach(r => regions.aws.add(r));
|
||
|
|
// Azure regions
|
||
|
|
const azureMatches = content.match(/(?:centralus|eastus[2]?|westus[23]?|westeurope|northeurope|southeastasia|australiaeast)/g);
|
||
|
|
if (azureMatches) azureMatches.forEach(r => regions.azure.add(r));
|
||
|
|
// GCP regions
|
||
|
|
const gcpMatches = content.match(/us-central1|us-east[14]|us-west[14]|europe-west[1-6]|asia-east[12]|asia-southeast[12]/g);
|
||
|
|
if (gcpMatches) gcpMatches.forEach(r => regions.gcp.add(r));
|
||
|
|
} catch {}
|
||
|
|
}
|
||
|
|
}
|
||
|
|
} catch {}
|
||
|
|
};
|
||
|
|
walkDir(srcRoot);
|
||
|
|
|
||
|
|
return {
|
||
|
|
aws: [...regions.aws].sort(),
|
||
|
|
azure: [...regions.azure].sort(),
|
||
|
|
gcp: [...regions.gcp].sort(),
|
||
|
|
};
|
||
|
|
}
|
||
|
|
|
||
|
|
/**
|
||
|
|
* Extract CIDR allocations from terraform variables and configs.
|
||
|
|
*/
|
||
|
|
function extractCIDRAllocations(srcRoot) {
|
||
|
|
const cidrs = [];
|
||
|
|
|
||
|
|
const walkDir = (dir) => {
|
||
|
|
try {
|
||
|
|
for (const entry of fs.readdirSync(dir, { withFileTypes: true })) {
|
||
|
|
const full = path.join(dir, entry.name);
|
||
|
|
if (entry.isDirectory()) {
|
||
|
|
if (['node_modules', '.git', 'venv', '.terraform', '__pycache__'].includes(entry.name)) continue;
|
||
|
|
walkDir(full);
|
||
|
|
} else if (entry.name.endsWith('.tf') || entry.name.endsWith('.tfvars')) {
|
||
|
|
try {
|
||
|
|
const content = fs.readFileSync(full, 'utf8');
|
||
|
|
const relPath = path.relative(srcRoot, full);
|
||
|
|
// Match CIDR blocks
|
||
|
|
const cidrMatches = content.match(/(?:cidr|CIDR|subnet|network).*?(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}\/\d{1,2})/g);
|
||
|
|
if (cidrMatches) {
|
||
|
|
for (const m of cidrMatches) {
|
||
|
|
const cidr = m.match(/(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}\/\d{1,2})/)?.[1];
|
||
|
|
if (cidr) cidrs.push({ cidr, context: m.trim().substring(0, 100), file: relPath });
|
||
|
|
}
|
||
|
|
}
|
||
|
|
} catch {}
|
||
|
|
}
|
||
|
|
}
|
||
|
|
} catch {}
|
||
|
|
};
|
||
|
|
walkDir(srcRoot);
|
||
|
|
|
||
|
|
// Deduplicate by CIDR
|
||
|
|
const unique = {};
|
||
|
|
for (const c of cidrs) {
|
||
|
|
if (!unique[c.cidr]) unique[c.cidr] = [];
|
||
|
|
unique[c.cidr].push({ context: c.context, file: c.file });
|
||
|
|
}
|
||
|
|
return Object.entries(unique).map(([cidr, refs]) => ({ cidr, refs }));
|
||
|
|
}
|
||
|
|
|
||
|
|
/**
|
||
|
|
* Extract naming conventions from scripts and terraform.
|
||
|
|
*/
|
||
|
|
function extractNamingConventions(srcRoot) {
|
||
|
|
const conventions = [];
|
||
|
|
|
||
|
|
const walkDir = (dir) => {
|
||
|
|
try {
|
||
|
|
for (const entry of fs.readdirSync(dir, { withFileTypes: true })) {
|
||
|
|
const full = path.join(dir, entry.name);
|
||
|
|
if (entry.isDirectory()) {
|
||
|
|
if (['node_modules', '.git', 'venv', '.terraform', '__pycache__'].includes(entry.name)) continue;
|
||
|
|
walkDir(full);
|
||
|
|
} else if (entry.name.endsWith('.sh') || entry.name.endsWith('.py') || entry.name.endsWith('.tf')) {
|
||
|
|
try {
|
||
|
|
const content = fs.readFileSync(full, 'utf8');
|
||
|
|
const relPath = path.relative(srcRoot, full);
|
||
|
|
// Only match lines that explicitly describe naming conventions with template patterns
|
||
|
|
const lines = content.split('\n');
|
||
|
|
for (const line of lines) {
|
||
|
|
const trimmed = line.trim();
|
||
|
|
// Must contain a template-like pattern AND a convention keyword
|
||
|
|
if (trimmed.match(/convention|naming|format/i) && trimmed.match(/\{(phase|region|cloud|index|env)\}/i)) {
|
||
|
|
conventions.push({ pattern: trimmed.substring(0, 200), file: relPath });
|
||
|
|
}
|
||
|
|
// Also match explicit naming examples like "aws-{phase}-{region-code}-{index}-vpc"
|
||
|
|
if (trimmed.match(/(?:aws|azr|gcp)-\{.*\}-\{.*\}/)) {
|
||
|
|
conventions.push({ pattern: trimmed.substring(0, 200), file: relPath });
|
||
|
|
}
|
||
|
|
}
|
||
|
|
} catch {}
|
||
|
|
}
|
||
|
|
}
|
||
|
|
} catch {}
|
||
|
|
};
|
||
|
|
walkDir(srcRoot);
|
||
|
|
|
||
|
|
// Deduplicate
|
||
|
|
const seen = new Set();
|
||
|
|
return conventions.filter(c => {
|
||
|
|
const key = c.pattern;
|
||
|
|
if (seen.has(key)) return false;
|
||
|
|
seen.add(key);
|
||
|
|
return true;
|
||
|
|
});
|
||
|
|
}
|
||
|
|
|
||
|
|
/**
|
||
|
|
* Extract sync-wave ordering from Helm templates.
|
||
|
|
*/
|
||
|
|
function extractSyncWaves(srcRoot) {
|
||
|
|
const waves = {};
|
||
|
|
|
||
|
|
const walkDir = (dir) => {
|
||
|
|
try {
|
||
|
|
for (const entry of fs.readdirSync(dir, { withFileTypes: true })) {
|
||
|
|
const full = path.join(dir, entry.name);
|
||
|
|
if (entry.isDirectory()) {
|
||
|
|
if (['node_modules', '.git', 'venv', '.terraform', '__pycache__'].includes(entry.name)) continue;
|
||
|
|
walkDir(full);
|
||
|
|
} else if (entry.name.endsWith('.yaml') || entry.name.endsWith('.yml')) {
|
||
|
|
try {
|
||
|
|
const content = fs.readFileSync(full, 'utf8');
|
||
|
|
const waveMatch = content.match(/sync-wave:\s*["']?(-?\d+)["']?/);
|
||
|
|
if (waveMatch) {
|
||
|
|
const wave = parseInt(waveMatch[1]);
|
||
|
|
const kind = content.match(/kind:\s*(\w+)/)?.[1] || 'Unknown';
|
||
|
|
const name = content.match(/name:\s*['"]?([^\s'"]+)/)?.[1] || entry.name;
|
||
|
|
const relPath = path.relative(srcRoot, full);
|
||
|
|
if (!waves[wave]) waves[wave] = [];
|
||
|
|
waves[wave].push({ kind, name, file: relPath });
|
||
|
|
}
|
||
|
|
} catch {}
|
||
|
|
}
|
||
|
|
}
|
||
|
|
} catch {}
|
||
|
|
};
|
||
|
|
walkDir(srcRoot);
|
||
|
|
|
||
|
|
return Object.entries(waves)
|
||
|
|
.sort((a, b) => Number(a[0]) - Number(b[0]))
|
||
|
|
.map(([wave, resources]) => ({ wave: Number(wave), resources }));
|
||
|
|
}
|
||
|
|
|
||
|
|
/**
|
||
|
|
* Extract tech stack from Helm chart images and package.json.
|
||
|
|
*/
|
||
|
|
function extractTechStack(srcRoot) {
|
||
|
|
const images = new Set();
|
||
|
|
const packages = {};
|
||
|
|
|
||
|
|
const walkDir = (dir) => {
|
||
|
|
try {
|
||
|
|
for (const entry of fs.readdirSync(dir, { withFileTypes: true })) {
|
||
|
|
const full = path.join(dir, entry.name);
|
||
|
|
if (entry.isDirectory()) {
|
||
|
|
if (['node_modules', '.git', 'venv', '.terraform', '__pycache__'].includes(entry.name)) continue;
|
||
|
|
walkDir(full);
|
||
|
|
} else if (entry.name === 'values.yaml') {
|
||
|
|
try {
|
||
|
|
const content = fs.readFileSync(full, 'utf8');
|
||
|
|
const imgMatches = content.match(/image:\s*['"]?([^\s'"]+)/g);
|
||
|
|
if (imgMatches) imgMatches.forEach(m => {
|
||
|
|
const img = m.replace(/image:\s*['"]?/, '').replace(/['"]$/, '');
|
||
|
|
if (img && !img.includes('{{') && !img.includes('__helm')) images.add(img);
|
||
|
|
});
|
||
|
|
const repoMatches = content.match(/repository:\s*['"]?([^\s'"]+)/g);
|
||
|
|
if (repoMatches) repoMatches.forEach(m => {
|
||
|
|
const repo = m.replace(/repository:\s*['"]?/, '').replace(/['"]$/, '');
|
||
|
|
if (repo && !repo.includes('{{') && !repo.includes('__helm') && repo.includes('/')) images.add(repo);
|
||
|
|
});
|
||
|
|
} catch {}
|
||
|
|
} else if (entry.name === 'package.json') {
|
||
|
|
try {
|
||
|
|
const pkg = JSON.parse(fs.readFileSync(full, 'utf8'));
|
||
|
|
const relPath = path.relative(srcRoot, full);
|
||
|
|
if (pkg.dependencies) {
|
||
|
|
for (const [name, ver] of Object.entries(pkg.dependencies)) {
|
||
|
|
if (!packages[name]) packages[name] = [];
|
||
|
|
packages[name].push({ version: ver, file: relPath });
|
||
|
|
}
|
||
|
|
}
|
||
|
|
} catch {}
|
||
|
|
}
|
||
|
|
}
|
||
|
|
} catch {}
|
||
|
|
};
|
||
|
|
walkDir(srcRoot);
|
||
|
|
|
||
|
|
return {
|
||
|
|
containerImages: [...images].sort(),
|
||
|
|
npmPackages: Object.entries(packages).sort((a, b) => b[1].length - a[1].length).slice(0, 30)
|
||
|
|
.map(([name, refs]) => ({ name, count: refs.length, versions: [...new Set(refs.map(r => r.version))] })),
|
||
|
|
};
|
||
|
|
}
|
||
|
|
|
||
|
|
/**
|
||
|
|
* Run all pattern extractors and return a unified result.
|
||
|
|
*/
|
||
|
|
function extractAllPatterns(srcRoot) {
|
||
|
|
console.log('Extracting architectural patterns...');
|
||
|
|
const layers = extractLayers(srcRoot);
|
||
|
|
console.log(` Layers: ${layers.length}`);
|
||
|
|
const appsets = extractArgoCDAppSets(srcRoot);
|
||
|
|
console.log(` ApplicationSets: ${appsets.length}`);
|
||
|
|
const regions = extractCloudRegions(srcRoot);
|
||
|
|
console.log(` Regions: AWS=${regions.aws.length} Azure=${regions.azure.length} GCP=${regions.gcp.length}`);
|
||
|
|
const cidrs = extractCIDRAllocations(srcRoot);
|
||
|
|
console.log(` CIDR allocations: ${cidrs.length}`);
|
||
|
|
const naming = extractNamingConventions(srcRoot);
|
||
|
|
console.log(` Naming conventions: ${naming.length}`);
|
||
|
|
const syncWaves = extractSyncWaves(srcRoot);
|
||
|
|
console.log(` Sync waves: ${syncWaves.length} distinct waves`);
|
||
|
|
const techStack = extractTechStack(srcRoot);
|
||
|
|
console.log(` Container images: ${techStack.containerImages.length}, NPM packages: ${techStack.npmPackages.length}`);
|
||
|
|
|
||
|
|
return { layers, appsets, regions, cidrs, naming, syncWaves, techStack };
|
||
|
|
}
|
||
|
|
|
||
|
|
module.exports = { extractAllPatterns, extractLayers, extractArgoCDAppSets, extractCloudRegions, extractCIDRAllocations, extractNamingConventions, extractSyncWaves, extractTechStack };
|