/** * Phase 8: Helm Chart Extractor * * Extracts structure from Helm charts with Go template syntax: * - Chart.yaml: metadata, dependencies * - values.yaml: contract surface (configurable parameters) * - templates/: K8s resource types, service interactions * * Strategy: strip Go templates before YAML parse, regex-extract K8s kinds from templates. */ const fs = require('fs'); const path = require('path'); const jsYaml = require('js-yaml'); /** Strip Go template directives so js-yaml can parse the structural YAML */ function stripGoTemplates(source) { // Replace {{- ... -}} and {{ ... }} with empty string or placeholder // Multi-line blocks: {{- if ... }} ... {{- end }} let out = source; // Remove template comments {{/* ... */}} out = out.replace(/\{\{\/\*[\s\S]*?\*\/\}\}/g, ''); // Replace {{ expr }} with a safe YAML placeholder // For values in mapping positions, replace with a quoted string out = out.replace(/\{\{-?\s*[\s\S]*?\s*-?\}\}/g, '"__helm_tpl__"'); // Clean up lines that are entirely template control flow (if/range/end/define/with) // These often leave broken YAML structure out = out.split('\n').map(line => { const trimmed = line.trim(); // Lines that are just template placeholders or empty after stripping if (trimmed === '"__helm_tpl__"') return ''; // Lines starting with - "__helm_tpl__" that break list structure if (trimmed === '- "__helm_tpl__"' && !line.includes(':')) return ''; return line; }).join('\n'); return out; } /** Parse Chart.yaml — plain YAML, no templates */ function parseChartYaml(chartDir) { const chartPath = path.join(chartDir, 'Chart.yaml'); if (!fs.existsSync(chartPath)) return null; try { const doc = jsYaml.load(fs.readFileSync(chartPath, 'utf8')); return { name: doc.name || path.basename(chartDir), version: doc.version || '0.0.0', appVersion: doc.appVersion || '', description: doc.description || '', type: doc.type || 'application', dependencies: (doc.dependencies || []).map(d => ({ name: d.name, version: d.version || '', repository: d.repository || '', condition: d.condition || '', isLocal: (d.repository || '').startsWith('file://'), })), }; } catch (e) { return { name: path.basename(chartDir), version: '0.0.0', description: '', type: 'application', dependencies: [], _parseError: e.message }; } } /** Extract top-level keys from values.yaml as the chart's contract surface */ function parseValuesYaml(chartDir) { const valuesPath = path.join(chartDir, 'values.yaml'); if (!fs.existsSync(valuesPath)) return { keys: [], raw: '' }; const raw = fs.readFileSync(valuesPath, 'utf8'); const stripped = stripGoTemplates(raw); try { const doc = jsYaml.load(stripped); if (!doc || typeof doc !== 'object') return { keys: [], raw }; const keys = []; for (const [key, val] of Object.entries(doc)) { const type = Array.isArray(val) ? 'list' : typeof val === 'object' && val !== null ? 'object' : typeof val; keys.push({ name: key, type, hasDefault: val !== null && val !== undefined && val !== '' && val !== '__helm_tpl__', defaultValue: typeof val === 'string' && val === '__helm_tpl__' ? '(templated)' : typeof val === 'object' ? undefined : val, }); } return { keys, raw }; } catch (e) { // Fallback: regex extract top-level keys const keys = []; const keyRegex = /^([a-zA-Z_][a-zA-Z0-9_-]*):/gm; let m; while ((m = keyRegex.exec(raw)) !== null) { if (!keys.find(k => k.name === m[1])) { keys.push({ name: m[1], type: 'unknown', hasDefault: true }); } } return { keys, raw, _parseError: e.message }; } } /** Scan template files for K8s resource kinds and .Values references */ function parseTemplates(chartDir) { const templatesDir = path.join(chartDir, 'templates'); if (!fs.existsSync(templatesDir)) return { resources: [], valuesRefs: new Set(), templateFiles: [] }; const resources = []; const valuesRefs = new Set(); const templateFiles = []; const seenResources = new Set(); function scanDir(dir) { let entries; try { entries = fs.readdirSync(dir, { withFileTypes: true }); } catch { return; } for (const e of entries) { const fp = path.join(dir, e.name); if (e.isDirectory()) { scanDir(fp); continue; } if (!e.name.endsWith('.yaml') && !e.name.endsWith('.yml') && !e.name.endsWith('.tpl')) continue; const relTpl = path.relative(chartDir, fp); templateFiles.push(relTpl); let content; try { content = fs.readFileSync(fp, 'utf8'); } catch { continue; } // Extract K8s resource kinds via "kind: " pattern const kindRegex = /^\s*kind:\s*([A-Z][a-zA-Z]+)/gm; let km; while ((km = kindRegex.exec(content)) !== null) { const kind = km[1]; const key = `${kind}:${relTpl}`; if (!seenResources.has(key)) { seenResources.add(key); // Try to extract the name const nameRegex = /name:\s*(?:\{\{[^}]*\}\}|"[^"]*"|'[^']*'|([a-zA-Z0-9_.-]+))/; const nameMatch = content.slice(Math.max(0, km.index - 200), km.index + 500).match(nameRegex); resources.push({ kind, file: relTpl, name: nameMatch ? (nameMatch[1] || '(templated)') : '(unknown)', }); } } // Extract .Values.xxx references const valRegex = /\.Values\.([a-zA-Z_][a-zA-Z0-9_.]*)/g; let vm; while ((vm = valRegex.exec(content)) !== null) { valuesRefs.add(vm[1].split('.')[0]); // top-level key } } } scanDir(templatesDir); return { resources, valuesRefs: Array.from(valuesRefs), templateFiles }; } /** Extract interactions: what external services/endpoints does this chart reference? */ function extractInteractions(chartDir, chartMeta) { const interactions = []; const templatesDir = path.join(chartDir, 'templates'); if (!fs.existsSync(templatesDir)) return interactions; function scanDir(dir) { let entries; try { entries = fs.readdirSync(dir, { withFileTypes: true }); } catch { return; } for (const e of entries) { const fp = path.join(dir, e.name); if (e.isDirectory()) { scanDir(fp); continue; } let content; try { content = fs.readFileSync(fp, 'utf8'); } catch { continue; } // Service references: ..svc.cluster.local const svcRegex = /([a-z][a-z0-9-]+)\.(?:\{\{[^}]*\}\}|[a-z0-9-]+)\.svc\.cluster\.local/g; let sm; while ((sm = svcRegex.exec(content)) !== null) { interactions.push({ type: 'k8s-service', target: sm[1], file: path.relative(chartDir, fp) }); } // ConfigMap/Secret references in envFrom or volumeMounts const configRefRegex = /configMapKeyRef:\s*\n\s*name:\s*([^\n]+)|secretKeyRef:\s*\n\s*name:\s*([^\n]+)/g; let cr; while ((cr = configRefRegex.exec(content)) !== null) { let ref = (cr[1] || cr[2] || '').trim().replace(/["'{}]/g, ''); if (ref.includes('helm_tpl') || ref.includes('__helm')) continue; // Strip Go template noise — if it's mostly template syntax, skip it if (ref.includes('tpl') || ref.includes('.Values') || ref.includes('include') || ref.includes('$')) continue; if (!ref || ref.length < 2) continue; interactions.push({ type: 'config-ref', target: ref, file: path.relative(chartDir, fp) }); } // Port references (containerPort, port, targetPort) const portRegex = /(?:containerPort|port|targetPort):\s*(\d+)/g; let pr; while ((pr = portRegex.exec(content)) !== null) { interactions.push({ type: 'port', target: pr[1], file: path.relative(chartDir, fp) }); } } } scanDir(templatesDir); // Deduplicate const seen = new Set(); return interactions.filter(i => { const key = `${i.type}:${i.target}`; if (seen.has(key)) return false; seen.add(key); return true; }); } /** * Discover and extract all Helm charts under a root directory. * @param {string} rootDir - Root directory to scan * @param {Set} ignoreDirs - Directory names to skip * @returns {Array} Array of chart descriptors */ function discoverCharts(rootDir, ignoreDirs) { const charts = []; const ignore = ignoreDirs || new Set(['node_modules', '.git', 'venv', '__pycache__', '.terraform']); function walk(dir, depth) { if (depth > 10) return; // safety let entries; try { entries = fs.readdirSync(dir, { withFileTypes: true }); } catch { return; } // Check if this directory is a chart const hasChart = entries.some(e => e.isFile() && e.name === 'Chart.yaml'); if (hasChart) { const chartMeta = parseChartYaml(dir); const values = parseValuesYaml(dir); const templates = parseTemplates(dir); const interactions = extractInteractions(dir, chartMeta); charts.push({ dir: path.relative(rootDir, dir), chart: chartMeta, values, templates, interactions, }); } // Recurse into subdirectories for (const e of entries) { if (!e.isDirectory()) continue; if (ignore.has(e.name)) continue; walk(path.join(dir, e.name), depth + 1); } } walk(rootDir, 0); return charts; } /** * Convert chart data into graph entities and relationships for the pipeline. * @param {Array} charts - From discoverCharts * @param {string} rootDir - Root directory * @returns {{ entities: Array, relationships: Array }} */ function chartsToGraph(charts, rootDir) { const entities = []; const relationships = []; for (const c of charts) { const chartId = `helm:${c.chart.name}@${c.dir}`; // Chart as a module entities.push({ id: chartId, type: 'HelmChart', name: c.chart.name, kind: 'helm-chart', visibility: 'public', description: c.chart.description, version: c.chart.version, appVersion: c.chart.appVersion, chartType: c.chart.type, dir: c.dir, }); // Values as contract fields for (const key of c.values.keys) { const keyId = `${chartId}:values:${key.name}`; entities.push({ id: keyId, type: 'HelmValue', name: key.name, kind: 'helm-value', visibility: 'public', valueType: key.type, hasDefault: key.hasDefault, }); relationships.push({ type: 'CONTAINS', source: chartId, target: keyId }); } // K8s resources for (const res of c.templates.resources) { const resId = `${chartId}:resource:${res.kind}:${res.file}`; entities.push({ id: resId, type: 'K8sResource', name: `${res.kind}`, kind: 'k8s-resource', visibility: 'public', resourceKind: res.kind, file: res.file, }); relationships.push({ type: 'PRODUCES', source: chartId, target: resId }); } // Dependencies (chart → chart) for (const dep of c.chart.dependencies) { // Find the dependency chart const depChart = charts.find(dc => dc.chart.name === dep.name); if (depChart) { const depId = `helm:${depChart.chart.name}@${depChart.dir}`; relationships.push({ type: 'DEPENDS_ON', source: chartId, target: depId, condition: dep.condition }); } else { // External dependency const extId = `helm-ext:${dep.name}`; if (!entities.find(e => e.id === extId)) { entities.push({ id: extId, type: 'HelmChart', name: dep.name, kind: 'helm-chart-external', visibility: 'public', version: dep.version, repository: dep.repository, }); } relationships.push({ type: 'DEPENDS_ON', source: chartId, target: extId, condition: dep.condition }); } } // Service interactions for (const interaction of c.interactions) { if (interaction.type === 'k8s-service') { // Find chart that produces this service const targetChart = charts.find(tc => { return tc.templates.resources.some(r => r.kind === 'Service') && tc.chart.name.includes(interaction.target); }); if (targetChart && targetChart.chart.name !== c.chart.name) { const targetId = `helm:${targetChart.chart.name}@${targetChart.dir}`; relationships.push({ type: 'CALLS', source: chartId, target: targetId, via: interaction.target }); } } } } return { entities, relationships }; } /** * Generate a Mermaid diagram showing cross-chart interactions. * Groups charts by subsystem (top-level dir) and shows dependency/service edges. */ function generateHelmDiagram(charts) { const lines = ['graph TD']; // Use dir-based IDs to avoid collisions between same-named charts function chartId(c) { return c.dir.replace(/[^a-zA-Z0-9]/g, '_'); } // Group charts by subsystem (first path segment) const groups = {}; for (const c of charts) { const sub = c.dir.split('/')[0] || 'root'; if (!groups[sub]) groups[sub] = []; groups[sub].push(c); } // Emit subgraphs for (const [sub, subCharts] of Object.entries(groups)) { const safeSubId = sub.replace(/[^a-zA-Z0-9]/g, '_'); lines.push(` subgraph ${safeSubId}["${sub}"]`); for (const c of subCharts) { const id = chartId(c); const resCount = c.templates.resources.length; lines.push(` ${id}["${c.chart.name}
${resCount} resources"]`); } lines.push(' end'); } // Build lookup: chart name → chart objects (may be multiple) const nameIndex = {}; for (const c of charts) { if (!nameIndex[c.chart.name]) nameIndex[c.chart.name] = []; nameIndex[c.chart.name].push(c); } // Emit dependency edges const seenEdges = new Set(); for (const c of charts) { const srcId = chartId(c); for (const dep of c.chart.dependencies) { // Find dep chart — prefer one in same subsystem tree const candidates = nameIndex[dep.name] || []; let target = candidates.find(dc => c.dir.startsWith(dc.dir.split('/')[0])) || candidates[0]; if (!target || chartId(target) === srcId) continue; const tgtId = chartId(target); const edgeKey = `${srcId}->${tgtId}`; if (!seenEdges.has(edgeKey)) { seenEdges.add(edgeKey); lines.push(` ${srcId} -->|depends| ${tgtId}`); } } } // Emit shared-secret edges (charts that reference the same config-ref) const configUsers = {}; for (const c of charts) { for (const i of c.interactions) { if (i.type === 'config-ref') { if (!configUsers[i.target]) configUsers[i.target] = []; configUsers[i.target].push(c); } } } for (const [secret, users] of Object.entries(configUsers)) { // Deduplicate by chart dir const unique = [...new Map(users.map(u => [u.dir, u])).values()]; if (unique.length > 1) { for (let i = 0; i < unique.length - 1; i++) { const srcId = chartId(unique[i]); const tgtId = chartId(unique[i + 1]); const edgeKey = `${srcId}<->${tgtId}:${secret}`; if (!seenEdges.has(edgeKey)) { seenEdges.add(edgeKey); lines.push(` ${srcId} -.-|${secret}| ${tgtId}`); } } } } // Emit k8s-service edges for (const c of charts) { const srcId = chartId(c); for (const i of c.interactions) { if (i.type === 'k8s-service') { const tgtId = i.target.replace(/[^a-zA-Z0-9]/g, '_'); const edgeKey = `${srcId}--svc-->${tgtId}`; if (!seenEdges.has(edgeKey)) { seenEdges.add(edgeKey); lines.push(` ${srcId} ==>|svc: ${i.target}| ${tgtId}`); } } } } return lines.join('\n'); } module.exports = { stripGoTemplates, parseChartYaml, parseValuesYaml, parseTemplates, extractInteractions, discoverCharts, chartsToGraph, generateHelmDiagram };