Files
dev-intel-v2/extract-helm.js
Jarvis Prime f49a6c2dd9 Phase 8: Helm chart extraction with Go template support
- extract-helm.js: strips Go templates, parses Chart.yaml/values.yaml/templates
- Extracts K8s resource kinds, cross-chart interactions, shared secrets, ports
- generateHelmDiagram() for Mermaid interaction graphs
- Integrated into sysdoc.js: Helm entities merge into main knowledge graph
- Dir-based filenames to handle duplicate chart names
- .gitignore for node_modules, snapshots, venv, wasm
- 76 charts, 1813 entities, 1769 relationships on Foxtrot
2026-03-09 20:03:04 +00:00

479 lines
16 KiB
JavaScript

/**
* Phase 8: Helm Chart Extractor
*
* Extracts structure from Helm charts with Go template syntax:
* - Chart.yaml: metadata, dependencies
* - values.yaml: contract surface (configurable parameters)
* - templates/: K8s resource types, service interactions
*
* Strategy: strip Go templates before YAML parse, regex-extract K8s kinds from templates.
*/
const fs = require('fs');
const path = require('path');
const jsYaml = require('js-yaml');
/** Strip Go template directives so js-yaml can parse the structural YAML */
function stripGoTemplates(source) {
// Replace {{- ... -}} and {{ ... }} with empty string or placeholder
// Multi-line blocks: {{- if ... }} ... {{- end }}
let out = source;
// Remove template comments {{/* ... */}}
out = out.replace(/\{\{\/\*[\s\S]*?\*\/\}\}/g, '');
// Replace {{ expr }} with a safe YAML placeholder
// For values in mapping positions, replace with a quoted string
out = out.replace(/\{\{-?\s*[\s\S]*?\s*-?\}\}/g, '"__helm_tpl__"');
// Clean up lines that are entirely template control flow (if/range/end/define/with)
// These often leave broken YAML structure
out = out.split('\n').map(line => {
const trimmed = line.trim();
// Lines that are just template placeholders or empty after stripping
if (trimmed === '"__helm_tpl__"') return '';
// Lines starting with - "__helm_tpl__" that break list structure
if (trimmed === '- "__helm_tpl__"' && !line.includes(':')) return '';
return line;
}).join('\n');
return out;
}
/** Parse Chart.yaml — plain YAML, no templates */
function parseChartYaml(chartDir) {
const chartPath = path.join(chartDir, 'Chart.yaml');
if (!fs.existsSync(chartPath)) return null;
try {
const doc = jsYaml.load(fs.readFileSync(chartPath, 'utf8'));
return {
name: doc.name || path.basename(chartDir),
version: doc.version || '0.0.0',
appVersion: doc.appVersion || '',
description: doc.description || '',
type: doc.type || 'application',
dependencies: (doc.dependencies || []).map(d => ({
name: d.name,
version: d.version || '',
repository: d.repository || '',
condition: d.condition || '',
isLocal: (d.repository || '').startsWith('file://'),
})),
};
} catch (e) {
return { name: path.basename(chartDir), version: '0.0.0', description: '', type: 'application', dependencies: [], _parseError: e.message };
}
}
/** Extract top-level keys from values.yaml as the chart's contract surface */
function parseValuesYaml(chartDir) {
const valuesPath = path.join(chartDir, 'values.yaml');
if (!fs.existsSync(valuesPath)) return { keys: [], raw: '' };
const raw = fs.readFileSync(valuesPath, 'utf8');
const stripped = stripGoTemplates(raw);
try {
const doc = jsYaml.load(stripped);
if (!doc || typeof doc !== 'object') return { keys: [], raw };
const keys = [];
for (const [key, val] of Object.entries(doc)) {
const type = Array.isArray(val) ? 'list' : typeof val === 'object' && val !== null ? 'object' : typeof val;
keys.push({
name: key,
type,
hasDefault: val !== null && val !== undefined && val !== '' && val !== '__helm_tpl__',
defaultValue: typeof val === 'string' && val === '__helm_tpl__' ? '(templated)' :
typeof val === 'object' ? undefined : val,
});
}
return { keys, raw };
} catch (e) {
// Fallback: regex extract top-level keys
const keys = [];
const keyRegex = /^([a-zA-Z_][a-zA-Z0-9_-]*):/gm;
let m;
while ((m = keyRegex.exec(raw)) !== null) {
if (!keys.find(k => k.name === m[1])) {
keys.push({ name: m[1], type: 'unknown', hasDefault: true });
}
}
return { keys, raw, _parseError: e.message };
}
}
/** Scan template files for K8s resource kinds and .Values references */
function parseTemplates(chartDir) {
const templatesDir = path.join(chartDir, 'templates');
if (!fs.existsSync(templatesDir)) return { resources: [], valuesRefs: new Set(), templateFiles: [] };
const resources = [];
const valuesRefs = new Set();
const templateFiles = [];
const seenResources = new Set();
function scanDir(dir) {
let entries;
try { entries = fs.readdirSync(dir, { withFileTypes: true }); } catch { return; }
for (const e of entries) {
const fp = path.join(dir, e.name);
if (e.isDirectory()) { scanDir(fp); continue; }
if (!e.name.endsWith('.yaml') && !e.name.endsWith('.yml') && !e.name.endsWith('.tpl')) continue;
const relTpl = path.relative(chartDir, fp);
templateFiles.push(relTpl);
let content;
try { content = fs.readFileSync(fp, 'utf8'); } catch { continue; }
// Extract K8s resource kinds via "kind: <Kind>" pattern
const kindRegex = /^\s*kind:\s*([A-Z][a-zA-Z]+)/gm;
let km;
while ((km = kindRegex.exec(content)) !== null) {
const kind = km[1];
const key = `${kind}:${relTpl}`;
if (!seenResources.has(key)) {
seenResources.add(key);
// Try to extract the name
const nameRegex = /name:\s*(?:\{\{[^}]*\}\}|"[^"]*"|'[^']*'|([a-zA-Z0-9_.-]+))/;
const nameMatch = content.slice(Math.max(0, km.index - 200), km.index + 500).match(nameRegex);
resources.push({
kind,
file: relTpl,
name: nameMatch ? (nameMatch[1] || '(templated)') : '(unknown)',
});
}
}
// Extract .Values.xxx references
const valRegex = /\.Values\.([a-zA-Z_][a-zA-Z0-9_.]*)/g;
let vm;
while ((vm = valRegex.exec(content)) !== null) {
valuesRefs.add(vm[1].split('.')[0]); // top-level key
}
}
}
scanDir(templatesDir);
return { resources, valuesRefs: Array.from(valuesRefs), templateFiles };
}
/** Extract interactions: what external services/endpoints does this chart reference? */
function extractInteractions(chartDir, chartMeta) {
const interactions = [];
const templatesDir = path.join(chartDir, 'templates');
if (!fs.existsSync(templatesDir)) return interactions;
function scanDir(dir) {
let entries;
try { entries = fs.readdirSync(dir, { withFileTypes: true }); } catch { return; }
for (const e of entries) {
const fp = path.join(dir, e.name);
if (e.isDirectory()) { scanDir(fp); continue; }
let content;
try { content = fs.readFileSync(fp, 'utf8'); } catch { continue; }
// Service references: <service>.<namespace>.svc.cluster.local
const svcRegex = /([a-z][a-z0-9-]+)\.(?:\{\{[^}]*\}\}|[a-z0-9-]+)\.svc\.cluster\.local/g;
let sm;
while ((sm = svcRegex.exec(content)) !== null) {
interactions.push({ type: 'k8s-service', target: sm[1], file: path.relative(chartDir, fp) });
}
// ConfigMap/Secret references in envFrom or volumeMounts
const configRefRegex = /configMapKeyRef:\s*\n\s*name:\s*([^\n]+)|secretKeyRef:\s*\n\s*name:\s*([^\n]+)/g;
let cr;
while ((cr = configRefRegex.exec(content)) !== null) {
let ref = (cr[1] || cr[2] || '').trim().replace(/["'{}]/g, '');
if (ref.includes('helm_tpl') || ref.includes('__helm')) continue;
// Strip Go template noise — if it's mostly template syntax, skip it
if (ref.includes('tpl') || ref.includes('.Values') || ref.includes('include') || ref.includes('$')) continue;
if (!ref || ref.length < 2) continue;
interactions.push({ type: 'config-ref', target: ref, file: path.relative(chartDir, fp) });
}
// Port references (containerPort, port, targetPort)
const portRegex = /(?:containerPort|port|targetPort):\s*(\d+)/g;
let pr;
while ((pr = portRegex.exec(content)) !== null) {
interactions.push({ type: 'port', target: pr[1], file: path.relative(chartDir, fp) });
}
}
}
scanDir(templatesDir);
// Deduplicate
const seen = new Set();
return interactions.filter(i => {
const key = `${i.type}:${i.target}`;
if (seen.has(key)) return false;
seen.add(key);
return true;
});
}
/**
* Discover and extract all Helm charts under a root directory.
* @param {string} rootDir - Root directory to scan
* @param {Set<string>} ignoreDirs - Directory names to skip
* @returns {Array<object>} Array of chart descriptors
*/
function discoverCharts(rootDir, ignoreDirs) {
const charts = [];
const ignore = ignoreDirs || new Set(['node_modules', '.git', 'venv', '__pycache__', '.terraform']);
function walk(dir, depth) {
if (depth > 10) return; // safety
let entries;
try { entries = fs.readdirSync(dir, { withFileTypes: true }); } catch { return; }
// Check if this directory is a chart
const hasChart = entries.some(e => e.isFile() && e.name === 'Chart.yaml');
if (hasChart) {
const chartMeta = parseChartYaml(dir);
const values = parseValuesYaml(dir);
const templates = parseTemplates(dir);
const interactions = extractInteractions(dir, chartMeta);
charts.push({
dir: path.relative(rootDir, dir),
chart: chartMeta,
values,
templates,
interactions,
});
}
// Recurse into subdirectories
for (const e of entries) {
if (!e.isDirectory()) continue;
if (ignore.has(e.name)) continue;
walk(path.join(dir, e.name), depth + 1);
}
}
walk(rootDir, 0);
return charts;
}
/**
* Convert chart data into graph entities and relationships for the pipeline.
* @param {Array} charts - From discoverCharts
* @param {string} rootDir - Root directory
* @returns {{ entities: Array, relationships: Array }}
*/
function chartsToGraph(charts, rootDir) {
const entities = [];
const relationships = [];
for (const c of charts) {
const chartId = `helm:${c.chart.name}@${c.dir}`;
// Chart as a module
entities.push({
id: chartId,
type: 'HelmChart',
name: c.chart.name,
kind: 'helm-chart',
visibility: 'public',
description: c.chart.description,
version: c.chart.version,
appVersion: c.chart.appVersion,
chartType: c.chart.type,
dir: c.dir,
});
// Values as contract fields
for (const key of c.values.keys) {
const keyId = `${chartId}:values:${key.name}`;
entities.push({
id: keyId,
type: 'HelmValue',
name: key.name,
kind: 'helm-value',
visibility: 'public',
valueType: key.type,
hasDefault: key.hasDefault,
});
relationships.push({ type: 'CONTAINS', source: chartId, target: keyId });
}
// K8s resources
for (const res of c.templates.resources) {
const resId = `${chartId}:resource:${res.kind}:${res.file}`;
entities.push({
id: resId,
type: 'K8sResource',
name: `${res.kind}`,
kind: 'k8s-resource',
visibility: 'public',
resourceKind: res.kind,
file: res.file,
});
relationships.push({ type: 'PRODUCES', source: chartId, target: resId });
}
// Dependencies (chart → chart)
for (const dep of c.chart.dependencies) {
// Find the dependency chart
const depChart = charts.find(dc => dc.chart.name === dep.name);
if (depChart) {
const depId = `helm:${depChart.chart.name}@${depChart.dir}`;
relationships.push({ type: 'DEPENDS_ON', source: chartId, target: depId, condition: dep.condition });
} else {
// External dependency
const extId = `helm-ext:${dep.name}`;
if (!entities.find(e => e.id === extId)) {
entities.push({
id: extId,
type: 'HelmChart',
name: dep.name,
kind: 'helm-chart-external',
visibility: 'public',
version: dep.version,
repository: dep.repository,
});
}
relationships.push({ type: 'DEPENDS_ON', source: chartId, target: extId, condition: dep.condition });
}
}
// Service interactions
for (const interaction of c.interactions) {
if (interaction.type === 'k8s-service') {
// Find chart that produces this service
const targetChart = charts.find(tc => {
return tc.templates.resources.some(r => r.kind === 'Service') &&
tc.chart.name.includes(interaction.target);
});
if (targetChart && targetChart.chart.name !== c.chart.name) {
const targetId = `helm:${targetChart.chart.name}@${targetChart.dir}`;
relationships.push({ type: 'CALLS', source: chartId, target: targetId, via: interaction.target });
}
}
}
}
return { entities, relationships };
}
/**
* Generate a Mermaid diagram showing cross-chart interactions.
* Groups charts by subsystem (top-level dir) and shows dependency/service edges.
*/
function generateHelmDiagram(charts) {
const lines = ['graph TD'];
// Use dir-based IDs to avoid collisions between same-named charts
function chartId(c) {
return c.dir.replace(/[^a-zA-Z0-9]/g, '_');
}
// Group charts by subsystem (first path segment)
const groups = {};
for (const c of charts) {
const sub = c.dir.split('/')[0] || 'root';
if (!groups[sub]) groups[sub] = [];
groups[sub].push(c);
}
// Emit subgraphs
for (const [sub, subCharts] of Object.entries(groups)) {
const safeSubId = sub.replace(/[^a-zA-Z0-9]/g, '_');
lines.push(` subgraph ${safeSubId}["${sub}"]`);
for (const c of subCharts) {
const id = chartId(c);
const resCount = c.templates.resources.length;
lines.push(` ${id}["${c.chart.name}<br/>${resCount} resources"]`);
}
lines.push(' end');
}
// Build lookup: chart name → chart objects (may be multiple)
const nameIndex = {};
for (const c of charts) {
if (!nameIndex[c.chart.name]) nameIndex[c.chart.name] = [];
nameIndex[c.chart.name].push(c);
}
// Emit dependency edges
const seenEdges = new Set();
for (const c of charts) {
const srcId = chartId(c);
for (const dep of c.chart.dependencies) {
// Find dep chart — prefer one in same subsystem tree
const candidates = nameIndex[dep.name] || [];
let target = candidates.find(dc => c.dir.startsWith(dc.dir.split('/')[0])) || candidates[0];
if (!target || chartId(target) === srcId) continue;
const tgtId = chartId(target);
const edgeKey = `${srcId}->${tgtId}`;
if (!seenEdges.has(edgeKey)) {
seenEdges.add(edgeKey);
lines.push(` ${srcId} -->|depends| ${tgtId}`);
}
}
}
// Emit shared-secret edges (charts that reference the same config-ref)
const configUsers = {};
for (const c of charts) {
for (const i of c.interactions) {
if (i.type === 'config-ref') {
if (!configUsers[i.target]) configUsers[i.target] = [];
configUsers[i.target].push(c);
}
}
}
for (const [secret, users] of Object.entries(configUsers)) {
// Deduplicate by chart dir
const unique = [...new Map(users.map(u => [u.dir, u])).values()];
if (unique.length > 1) {
for (let i = 0; i < unique.length - 1; i++) {
const srcId = chartId(unique[i]);
const tgtId = chartId(unique[i + 1]);
const edgeKey = `${srcId}<->${tgtId}:${secret}`;
if (!seenEdges.has(edgeKey)) {
seenEdges.add(edgeKey);
lines.push(` ${srcId} -.-|${secret}| ${tgtId}`);
}
}
}
}
// Emit k8s-service edges
for (const c of charts) {
const srcId = chartId(c);
for (const i of c.interactions) {
if (i.type === 'k8s-service') {
const tgtId = i.target.replace(/[^a-zA-Z0-9]/g, '_');
const edgeKey = `${srcId}--svc-->${tgtId}`;
if (!seenEdges.has(edgeKey)) {
seenEdges.add(edgeKey);
lines.push(` ${srcId} ==>|svc: ${i.target}| ${tgtId}`);
}
}
}
}
return lines.join('\n');
}
module.exports = {
stripGoTemplates,
parseChartYaml,
parseValuesYaml,
parseTemplates,
extractInteractions,
discoverCharts,
chartsToGraph,
generateHelmDiagram
};