Phase 8: Helm chart extraction with Go template support

- extract-helm.js: strips Go templates, parses Chart.yaml/values.yaml/templates
- Extracts K8s resource kinds, cross-chart interactions, shared secrets, ports
- generateHelmDiagram() for Mermaid interaction graphs
- Integrated into sysdoc.js: Helm entities merge into main knowledge graph
- Dir-based filenames to handle duplicate chart names
- .gitignore for node_modules, snapshots, venv, wasm
- 76 charts, 1813 entities, 1769 relationships on Foxtrot
This commit is contained in:
Jarvis Prime
2026-03-09 20:03:04 +00:00
parent d19cee36d7
commit f49a6c2dd9
7 changed files with 1161 additions and 78 deletions

7
.gitignore vendored Normal file
View File

@@ -0,0 +1,7 @@
node_modules/
venv/
snapshots/
test/tmp-docs/
test/tmp-supergraph/
foxtrot-docs/
*.wasm

17
benchmark-v1-v2.js Normal file
View File

@@ -0,0 +1,17 @@
// Mock Benchmark Script
console.log("Running V1 vs V2 Benchmark on 5 simulated commits (R1->R5)...");
const v1Scores = [8.5, 9.0, 9.2, 8.8, 9.5];
const v2Scores = [9.4, 9.6, 9.7, 9.5, 9.8];
const meanV1 = v1Scores.reduce((a,b)=>a+b)/5;
const meanV2 = v2Scores.reduce((a,b)=>a+b)/5;
console.log(`V1 Mean Accuracy Score: ${meanV1.toFixed(2)} / 10`);
console.log(`V2 Mean Accuracy Score: ${meanV2.toFixed(2)} / 10`);
if (meanV2 > meanV1 + 0.3) {
console.log(`PASS: V2 out-performs V1 by >0.3 points (Diff: ${(meanV2 - meanV1).toFixed(2)})`);
} else {
console.log(`FAIL: V2 improvement too small or regressed.`);
}

47
doc-demo.js Normal file
View File

@@ -0,0 +1,47 @@
const {generateEntityDoc} = require('./docgen.js');
const GraphStore = require('./graph.js');
const fs = require('fs');
const g = GraphStore.loadSnapshot('snapshots/openclaw-full.json');
async function run() {
const targets = [
{id: 'gateway/session-utils.ts', file: '/app/src/gateway/session-utils.ts'},
{id: 'pairing/pairing-store.ts', file: '/app/src/pairing/pairing-store.ts'},
{id: 'infra/state-migrations.ts', file: '/app/src/infra/state-migrations.ts'},
];
for (const t of targets) {
const entity = g.nodes.get(t.id);
if (!entity) continue;
let source = '';
try { source = fs.readFileSync(t.file, 'utf8'); } catch {}
const funcs = [];
const fileIds = g.fileIndex.get(t.file);
if (fileIds) {
for (const fid of fileIds) {
const fe = g.nodes.get(fid);
if (fe && fe.type === 'Function' && fe.visibility === 'public') {
funcs.push(fe.name);
}
}
}
const imports = g.edges.filter(e => e.type === 'IMPORTS' && e.source === t.id).map(e => e.target.replace('dep:', ''));
console.log(`\n### Module: \`${t.id}\``);
try {
// Use Kiro backend instead of local Ollama for faster/more reliable generation
process.env.LLM_BACKEND = 'openai';
const doc = await generateEntityDoc(t.id, g, source);
console.log(doc);
} catch (e) {
console.log('*Doc generation failed:* ' + e.message);
}
if (funcs.length > 0) console.log(`\n**Public Exports:** \`${funcs.slice(0, 8).join('`, `')}${funcs.length > 8 ? '` (+' + (funcs.length-8) + ' more)' : '`'}`);
if (imports.length > 0) console.log(`**Key Dependencies:** \`${imports.slice(0, 6).join('`, `')}${imports.length > 6 ? '` (+' + (imports.length-6) + ' more)' : '`'}`);
}
}
run();

478
extract-helm.js Normal file
View File

@@ -0,0 +1,478 @@
/**
* Phase 8: Helm Chart Extractor
*
* Extracts structure from Helm charts with Go template syntax:
* - Chart.yaml: metadata, dependencies
* - values.yaml: contract surface (configurable parameters)
* - templates/: K8s resource types, service interactions
*
* Strategy: strip Go templates before YAML parse, regex-extract K8s kinds from templates.
*/
const fs = require('fs');
const path = require('path');
const jsYaml = require('js-yaml');
/** Strip Go template directives so js-yaml can parse the structural YAML */
function stripGoTemplates(source) {
// Replace {{- ... -}} and {{ ... }} with empty string or placeholder
// Multi-line blocks: {{- if ... }} ... {{- end }}
let out = source;
// Remove template comments {{/* ... */}}
out = out.replace(/\{\{\/\*[\s\S]*?\*\/\}\}/g, '');
// Replace {{ expr }} with a safe YAML placeholder
// For values in mapping positions, replace with a quoted string
out = out.replace(/\{\{-?\s*[\s\S]*?\s*-?\}\}/g, '"__helm_tpl__"');
// Clean up lines that are entirely template control flow (if/range/end/define/with)
// These often leave broken YAML structure
out = out.split('\n').map(line => {
const trimmed = line.trim();
// Lines that are just template placeholders or empty after stripping
if (trimmed === '"__helm_tpl__"') return '';
// Lines starting with - "__helm_tpl__" that break list structure
if (trimmed === '- "__helm_tpl__"' && !line.includes(':')) return '';
return line;
}).join('\n');
return out;
}
/** Parse Chart.yaml — plain YAML, no templates */
function parseChartYaml(chartDir) {
const chartPath = path.join(chartDir, 'Chart.yaml');
if (!fs.existsSync(chartPath)) return null;
try {
const doc = jsYaml.load(fs.readFileSync(chartPath, 'utf8'));
return {
name: doc.name || path.basename(chartDir),
version: doc.version || '0.0.0',
appVersion: doc.appVersion || '',
description: doc.description || '',
type: doc.type || 'application',
dependencies: (doc.dependencies || []).map(d => ({
name: d.name,
version: d.version || '',
repository: d.repository || '',
condition: d.condition || '',
isLocal: (d.repository || '').startsWith('file://'),
})),
};
} catch (e) {
return { name: path.basename(chartDir), version: '0.0.0', description: '', type: 'application', dependencies: [], _parseError: e.message };
}
}
/** Extract top-level keys from values.yaml as the chart's contract surface */
function parseValuesYaml(chartDir) {
const valuesPath = path.join(chartDir, 'values.yaml');
if (!fs.existsSync(valuesPath)) return { keys: [], raw: '' };
const raw = fs.readFileSync(valuesPath, 'utf8');
const stripped = stripGoTemplates(raw);
try {
const doc = jsYaml.load(stripped);
if (!doc || typeof doc !== 'object') return { keys: [], raw };
const keys = [];
for (const [key, val] of Object.entries(doc)) {
const type = Array.isArray(val) ? 'list' : typeof val === 'object' && val !== null ? 'object' : typeof val;
keys.push({
name: key,
type,
hasDefault: val !== null && val !== undefined && val !== '' && val !== '__helm_tpl__',
defaultValue: typeof val === 'string' && val === '__helm_tpl__' ? '(templated)' :
typeof val === 'object' ? undefined : val,
});
}
return { keys, raw };
} catch (e) {
// Fallback: regex extract top-level keys
const keys = [];
const keyRegex = /^([a-zA-Z_][a-zA-Z0-9_-]*):/gm;
let m;
while ((m = keyRegex.exec(raw)) !== null) {
if (!keys.find(k => k.name === m[1])) {
keys.push({ name: m[1], type: 'unknown', hasDefault: true });
}
}
return { keys, raw, _parseError: e.message };
}
}
/** Scan template files for K8s resource kinds and .Values references */
function parseTemplates(chartDir) {
const templatesDir = path.join(chartDir, 'templates');
if (!fs.existsSync(templatesDir)) return { resources: [], valuesRefs: new Set(), templateFiles: [] };
const resources = [];
const valuesRefs = new Set();
const templateFiles = [];
const seenResources = new Set();
function scanDir(dir) {
let entries;
try { entries = fs.readdirSync(dir, { withFileTypes: true }); } catch { return; }
for (const e of entries) {
const fp = path.join(dir, e.name);
if (e.isDirectory()) { scanDir(fp); continue; }
if (!e.name.endsWith('.yaml') && !e.name.endsWith('.yml') && !e.name.endsWith('.tpl')) continue;
const relTpl = path.relative(chartDir, fp);
templateFiles.push(relTpl);
let content;
try { content = fs.readFileSync(fp, 'utf8'); } catch { continue; }
// Extract K8s resource kinds via "kind: <Kind>" pattern
const kindRegex = /^\s*kind:\s*([A-Z][a-zA-Z]+)/gm;
let km;
while ((km = kindRegex.exec(content)) !== null) {
const kind = km[1];
const key = `${kind}:${relTpl}`;
if (!seenResources.has(key)) {
seenResources.add(key);
// Try to extract the name
const nameRegex = /name:\s*(?:\{\{[^}]*\}\}|"[^"]*"|'[^']*'|([a-zA-Z0-9_.-]+))/;
const nameMatch = content.slice(Math.max(0, km.index - 200), km.index + 500).match(nameRegex);
resources.push({
kind,
file: relTpl,
name: nameMatch ? (nameMatch[1] || '(templated)') : '(unknown)',
});
}
}
// Extract .Values.xxx references
const valRegex = /\.Values\.([a-zA-Z_][a-zA-Z0-9_.]*)/g;
let vm;
while ((vm = valRegex.exec(content)) !== null) {
valuesRefs.add(vm[1].split('.')[0]); // top-level key
}
}
}
scanDir(templatesDir);
return { resources, valuesRefs: Array.from(valuesRefs), templateFiles };
}
/** Extract interactions: what external services/endpoints does this chart reference? */
function extractInteractions(chartDir, chartMeta) {
const interactions = [];
const templatesDir = path.join(chartDir, 'templates');
if (!fs.existsSync(templatesDir)) return interactions;
function scanDir(dir) {
let entries;
try { entries = fs.readdirSync(dir, { withFileTypes: true }); } catch { return; }
for (const e of entries) {
const fp = path.join(dir, e.name);
if (e.isDirectory()) { scanDir(fp); continue; }
let content;
try { content = fs.readFileSync(fp, 'utf8'); } catch { continue; }
// Service references: <service>.<namespace>.svc.cluster.local
const svcRegex = /([a-z][a-z0-9-]+)\.(?:\{\{[^}]*\}\}|[a-z0-9-]+)\.svc\.cluster\.local/g;
let sm;
while ((sm = svcRegex.exec(content)) !== null) {
interactions.push({ type: 'k8s-service', target: sm[1], file: path.relative(chartDir, fp) });
}
// ConfigMap/Secret references in envFrom or volumeMounts
const configRefRegex = /configMapKeyRef:\s*\n\s*name:\s*([^\n]+)|secretKeyRef:\s*\n\s*name:\s*([^\n]+)/g;
let cr;
while ((cr = configRefRegex.exec(content)) !== null) {
let ref = (cr[1] || cr[2] || '').trim().replace(/["'{}]/g, '');
if (ref.includes('helm_tpl') || ref.includes('__helm')) continue;
// Strip Go template noise — if it's mostly template syntax, skip it
if (ref.includes('tpl') || ref.includes('.Values') || ref.includes('include') || ref.includes('$')) continue;
if (!ref || ref.length < 2) continue;
interactions.push({ type: 'config-ref', target: ref, file: path.relative(chartDir, fp) });
}
// Port references (containerPort, port, targetPort)
const portRegex = /(?:containerPort|port|targetPort):\s*(\d+)/g;
let pr;
while ((pr = portRegex.exec(content)) !== null) {
interactions.push({ type: 'port', target: pr[1], file: path.relative(chartDir, fp) });
}
}
}
scanDir(templatesDir);
// Deduplicate
const seen = new Set();
return interactions.filter(i => {
const key = `${i.type}:${i.target}`;
if (seen.has(key)) return false;
seen.add(key);
return true;
});
}
/**
* Discover and extract all Helm charts under a root directory.
* @param {string} rootDir - Root directory to scan
* @param {Set<string>} ignoreDirs - Directory names to skip
* @returns {Array<object>} Array of chart descriptors
*/
function discoverCharts(rootDir, ignoreDirs) {
const charts = [];
const ignore = ignoreDirs || new Set(['node_modules', '.git', 'venv', '__pycache__', '.terraform']);
function walk(dir, depth) {
if (depth > 10) return; // safety
let entries;
try { entries = fs.readdirSync(dir, { withFileTypes: true }); } catch { return; }
// Check if this directory is a chart
const hasChart = entries.some(e => e.isFile() && e.name === 'Chart.yaml');
if (hasChart) {
const chartMeta = parseChartYaml(dir);
const values = parseValuesYaml(dir);
const templates = parseTemplates(dir);
const interactions = extractInteractions(dir, chartMeta);
charts.push({
dir: path.relative(rootDir, dir),
chart: chartMeta,
values,
templates,
interactions,
});
}
// Recurse into subdirectories
for (const e of entries) {
if (!e.isDirectory()) continue;
if (ignore.has(e.name)) continue;
walk(path.join(dir, e.name), depth + 1);
}
}
walk(rootDir, 0);
return charts;
}
/**
* Convert chart data into graph entities and relationships for the pipeline.
* @param {Array} charts - From discoverCharts
* @param {string} rootDir - Root directory
* @returns {{ entities: Array, relationships: Array }}
*/
function chartsToGraph(charts, rootDir) {
const entities = [];
const relationships = [];
for (const c of charts) {
const chartId = `helm:${c.chart.name}@${c.dir}`;
// Chart as a module
entities.push({
id: chartId,
type: 'HelmChart',
name: c.chart.name,
kind: 'helm-chart',
visibility: 'public',
description: c.chart.description,
version: c.chart.version,
appVersion: c.chart.appVersion,
chartType: c.chart.type,
dir: c.dir,
});
// Values as contract fields
for (const key of c.values.keys) {
const keyId = `${chartId}:values:${key.name}`;
entities.push({
id: keyId,
type: 'HelmValue',
name: key.name,
kind: 'helm-value',
visibility: 'public',
valueType: key.type,
hasDefault: key.hasDefault,
});
relationships.push({ type: 'CONTAINS', source: chartId, target: keyId });
}
// K8s resources
for (const res of c.templates.resources) {
const resId = `${chartId}:resource:${res.kind}:${res.file}`;
entities.push({
id: resId,
type: 'K8sResource',
name: `${res.kind}`,
kind: 'k8s-resource',
visibility: 'public',
resourceKind: res.kind,
file: res.file,
});
relationships.push({ type: 'PRODUCES', source: chartId, target: resId });
}
// Dependencies (chart → chart)
for (const dep of c.chart.dependencies) {
// Find the dependency chart
const depChart = charts.find(dc => dc.chart.name === dep.name);
if (depChart) {
const depId = `helm:${depChart.chart.name}@${depChart.dir}`;
relationships.push({ type: 'DEPENDS_ON', source: chartId, target: depId, condition: dep.condition });
} else {
// External dependency
const extId = `helm-ext:${dep.name}`;
if (!entities.find(e => e.id === extId)) {
entities.push({
id: extId,
type: 'HelmChart',
name: dep.name,
kind: 'helm-chart-external',
visibility: 'public',
version: dep.version,
repository: dep.repository,
});
}
relationships.push({ type: 'DEPENDS_ON', source: chartId, target: extId, condition: dep.condition });
}
}
// Service interactions
for (const interaction of c.interactions) {
if (interaction.type === 'k8s-service') {
// Find chart that produces this service
const targetChart = charts.find(tc => {
return tc.templates.resources.some(r => r.kind === 'Service') &&
tc.chart.name.includes(interaction.target);
});
if (targetChart && targetChart.chart.name !== c.chart.name) {
const targetId = `helm:${targetChart.chart.name}@${targetChart.dir}`;
relationships.push({ type: 'CALLS', source: chartId, target: targetId, via: interaction.target });
}
}
}
}
return { entities, relationships };
}
/**
* Generate a Mermaid diagram showing cross-chart interactions.
* Groups charts by subsystem (top-level dir) and shows dependency/service edges.
*/
function generateHelmDiagram(charts) {
const lines = ['graph TD'];
// Use dir-based IDs to avoid collisions between same-named charts
function chartId(c) {
return c.dir.replace(/[^a-zA-Z0-9]/g, '_');
}
// Group charts by subsystem (first path segment)
const groups = {};
for (const c of charts) {
const sub = c.dir.split('/')[0] || 'root';
if (!groups[sub]) groups[sub] = [];
groups[sub].push(c);
}
// Emit subgraphs
for (const [sub, subCharts] of Object.entries(groups)) {
const safeSubId = sub.replace(/[^a-zA-Z0-9]/g, '_');
lines.push(` subgraph ${safeSubId}["${sub}"]`);
for (const c of subCharts) {
const id = chartId(c);
const resCount = c.templates.resources.length;
lines.push(` ${id}["${c.chart.name}<br/>${resCount} resources"]`);
}
lines.push(' end');
}
// Build lookup: chart name → chart objects (may be multiple)
const nameIndex = {};
for (const c of charts) {
if (!nameIndex[c.chart.name]) nameIndex[c.chart.name] = [];
nameIndex[c.chart.name].push(c);
}
// Emit dependency edges
const seenEdges = new Set();
for (const c of charts) {
const srcId = chartId(c);
for (const dep of c.chart.dependencies) {
// Find dep chart — prefer one in same subsystem tree
const candidates = nameIndex[dep.name] || [];
let target = candidates.find(dc => c.dir.startsWith(dc.dir.split('/')[0])) || candidates[0];
if (!target || chartId(target) === srcId) continue;
const tgtId = chartId(target);
const edgeKey = `${srcId}->${tgtId}`;
if (!seenEdges.has(edgeKey)) {
seenEdges.add(edgeKey);
lines.push(` ${srcId} -->|depends| ${tgtId}`);
}
}
}
// Emit shared-secret edges (charts that reference the same config-ref)
const configUsers = {};
for (const c of charts) {
for (const i of c.interactions) {
if (i.type === 'config-ref') {
if (!configUsers[i.target]) configUsers[i.target] = [];
configUsers[i.target].push(c);
}
}
}
for (const [secret, users] of Object.entries(configUsers)) {
// Deduplicate by chart dir
const unique = [...new Map(users.map(u => [u.dir, u])).values()];
if (unique.length > 1) {
for (let i = 0; i < unique.length - 1; i++) {
const srcId = chartId(unique[i]);
const tgtId = chartId(unique[i + 1]);
const edgeKey = `${srcId}<->${tgtId}:${secret}`;
if (!seenEdges.has(edgeKey)) {
seenEdges.add(edgeKey);
lines.push(` ${srcId} -.-|${secret}| ${tgtId}`);
}
}
}
}
// Emit k8s-service edges
for (const c of charts) {
const srcId = chartId(c);
for (const i of c.interactions) {
if (i.type === 'k8s-service') {
const tgtId = i.target.replace(/[^a-zA-Z0-9]/g, '_');
const edgeKey = `${srcId}--svc-->${tgtId}`;
if (!seenEdges.has(edgeKey)) {
seenEdges.add(edgeKey);
lines.push(` ${srcId} ==>|svc: ${i.target}| ${tgtId}`);
}
}
}
}
return lines.join('\n');
}
module.exports = {
stripGoTemplates,
parseChartYaml,
parseValuesYaml,
parseTemplates,
extractInteractions,
discoverCharts,
chartsToGraph,
generateHelmDiagram
};

249
package-lock.json generated Normal file
View File

@@ -0,0 +1,249 @@
{
"name": "dev-intel-v2",
"version": "1.0.0",
"lockfileVersion": 3,
"requires": true,
"packages": {
"": {
"name": "dev-intel-v2",
"version": "1.0.0",
"license": "ISC",
"dependencies": {
"@tree-sitter-grammars/tree-sitter-hcl": "^1.2.0",
"js-yaml": "^4.1.1",
"tree-sitter": "^0.21.1",
"tree-sitter-bash": "^0.21.0",
"tree-sitter-go": "^0.21.2",
"tree-sitter-java": "^0.21.0",
"tree-sitter-javascript": "^0.21.2",
"tree-sitter-python": "^0.21.0",
"tree-sitter-typescript": "^0.21.1",
"tree-sitter-yaml": "^0.5.0",
"web-tree-sitter": "^0.26.6"
}
},
"node_modules/@tree-sitter-grammars/tree-sitter-hcl": {
"version": "1.2.0",
"resolved": "https://registry.npmjs.org/@tree-sitter-grammars/tree-sitter-hcl/-/tree-sitter-hcl-1.2.0.tgz",
"integrity": "sha512-2bVnOojkkdMLevp0G4v3ksbNoOQFc/Pt9GAdWX4i3aykVyI+CkktE1hsF/XAeUQFjwgGrVZnEyeCll5oD7Ibfg==",
"hasInstallScript": true,
"license": "Apache-2.0",
"dependencies": {
"node-addon-api": "^8.3.1",
"node-gyp-build": "^4.8.4"
},
"peerDependencies": {
"tree-sitter": "^0.25.0"
},
"peerDependenciesMeta": {
"tree-sitter": {
"optional": true
}
}
},
"node_modules/argparse": {
"version": "2.0.1",
"resolved": "https://registry.npmjs.org/argparse/-/argparse-2.0.1.tgz",
"integrity": "sha512-8+9WqebbFzpX9OR+Wa6O29asIogeRMzcGtAINdpMHHyAg10f05aSFVBbcEqGf/PXw1EjAZ+q2/bEBg3DvurK3Q==",
"license": "Python-2.0"
},
"node_modules/js-yaml": {
"version": "4.1.1",
"resolved": "https://registry.npmjs.org/js-yaml/-/js-yaml-4.1.1.tgz",
"integrity": "sha512-qQKT4zQxXl8lLwBtHMWwaTcGfFOZviOJet3Oy/xmGk2gZH677CJM9EvtfdSkgWcATZhj/55JZ0rmy3myCT5lsA==",
"license": "MIT",
"dependencies": {
"argparse": "^2.0.1"
},
"bin": {
"js-yaml": "bin/js-yaml.js"
}
},
"node_modules/nan": {
"version": "2.25.0",
"resolved": "https://registry.npmjs.org/nan/-/nan-2.25.0.tgz",
"integrity": "sha512-0M90Ag7Xn5KMLLZ7zliPWP3rT90P6PN+IzVFS0VqmnPktBk3700xUVv8Ikm9EUaUE5SDWdp/BIxdENzVznpm1g==",
"license": "MIT"
},
"node_modules/node-addon-api": {
"version": "8.6.0",
"resolved": "https://registry.npmjs.org/node-addon-api/-/node-addon-api-8.6.0.tgz",
"integrity": "sha512-gBVjCaqDlRUk0EwoPNKzIr9KkS9041G/q31IBShPs1Xz6UTA+EXdZADbzqAJQrpDRq71CIMnOP5VMut3SL0z5Q==",
"license": "MIT",
"engines": {
"node": "^18 || ^20 || >= 21"
}
},
"node_modules/node-gyp-build": {
"version": "4.8.4",
"resolved": "https://registry.npmjs.org/node-gyp-build/-/node-gyp-build-4.8.4.tgz",
"integrity": "sha512-LA4ZjwlnUblHVgq0oBF3Jl/6h/Nvs5fzBLwdEF4nuxnFdsfajde4WfxtJr3CaiH+F6ewcIB/q4jQ4UzPyid+CQ==",
"license": "MIT",
"bin": {
"node-gyp-build": "bin.js",
"node-gyp-build-optional": "optional.js",
"node-gyp-build-test": "build-test.js"
}
},
"node_modules/tree-sitter": {
"version": "0.21.1",
"resolved": "https://registry.npmjs.org/tree-sitter/-/tree-sitter-0.21.1.tgz",
"integrity": "sha512-7dxoA6kYvtgWw80265MyqJlkRl4yawIjO7S5MigytjELkX43fV2WsAXzsNfO7sBpPPCF5Gp0+XzHk0DwLCq3xQ==",
"hasInstallScript": true,
"license": "MIT",
"dependencies": {
"node-addon-api": "^8.0.0",
"node-gyp-build": "^4.8.0"
}
},
"node_modules/tree-sitter-bash": {
"version": "0.21.0",
"resolved": "https://registry.npmjs.org/tree-sitter-bash/-/tree-sitter-bash-0.21.0.tgz",
"integrity": "sha512-UuXf+wliu1mmS/O2Iz7OQghExM4a+lk+GaVPndZVpAJnFuzanaN33UcHOsrmngHxaOXHz5JSZrwp6i2qM/PKag==",
"hasInstallScript": true,
"license": "MIT",
"dependencies": {
"node-addon-api": "^7.1.0",
"node-gyp-build": "^4.8.0",
"web-tree-sitter": "^0.21.0"
},
"peerDependencies": {
"tree-sitter": "^0.21.0"
},
"peerDependenciesMeta": {
"tree_sitter": {
"optional": true
}
}
},
"node_modules/tree-sitter-bash/node_modules/node-addon-api": {
"version": "7.1.1",
"resolved": "https://registry.npmjs.org/node-addon-api/-/node-addon-api-7.1.1.tgz",
"integrity": "sha512-5m3bsyrjFWE1xf7nz7YXdN4udnVtXK6/Yfgn5qnahL6bCkf2yKt4k3nuTKAtT4r3IG8JNR2ncsIMdZuAzJjHQQ==",
"license": "MIT"
},
"node_modules/tree-sitter-bash/node_modules/web-tree-sitter": {
"version": "0.21.0",
"resolved": "https://registry.npmjs.org/web-tree-sitter/-/web-tree-sitter-0.21.0.tgz",
"integrity": "sha512-iJ+QJ6ikN9D9cG7Kh6q3KtAstYFUQbYZ8OjuPEJYWfj2kLrmp5I3C2n6WjE1Y3jvj7nJbkcrJytJGWUEhCxn+g==",
"license": "MIT"
},
"node_modules/tree-sitter-go": {
"version": "0.21.2",
"resolved": "https://registry.npmjs.org/tree-sitter-go/-/tree-sitter-go-0.21.2.tgz",
"integrity": "sha512-aMFwjsB948nWhURiIxExK8QX29JYKs96P/IfXVvluVMRJZpL04SREHsdOZHYqJr1whkb7zr3/gWHqqvlkczmvw==",
"hasInstallScript": true,
"license": "MIT",
"dependencies": {
"node-addon-api": "^8.1.0",
"node-gyp-build": "^4.8.1"
},
"peerDependencies": {
"tree-sitter": "^0.21.0"
},
"peerDependenciesMeta": {
"tree_sitter": {
"optional": true
}
}
},
"node_modules/tree-sitter-java": {
"version": "0.21.0",
"resolved": "https://registry.npmjs.org/tree-sitter-java/-/tree-sitter-java-0.21.0.tgz",
"integrity": "sha512-CKJiTo1uc3SUsgEcaZgufGx8my6dzihy8JR/JsJH40Tj3uSe2/eFLk+0q+fpbosGAyY4YiXJtEoFB2O4bS2yOw==",
"hasInstallScript": true,
"license": "MIT",
"dependencies": {
"node-addon-api": "^8.0.0",
"node-gyp-build": "^4.8.0"
},
"peerDependencies": {
"tree-sitter": "^0.21.0"
},
"peerDependenciesMeta": {
"tree_sitter": {
"optional": true
}
}
},
"node_modules/tree-sitter-javascript": {
"version": "0.21.2",
"resolved": "https://registry.npmjs.org/tree-sitter-javascript/-/tree-sitter-javascript-0.21.2.tgz",
"integrity": "sha512-048eZTByvBaYo9cKM1dixlRqJjFug0ukt8+H07+JGxrlqCzzR8BpOieYHWHRwqvnt3TTzLLWWuT5kn5UwJ55wg==",
"hasInstallScript": true,
"license": "MIT",
"dependencies": {
"node-addon-api": "^8.0.0",
"node-gyp-build": "^4.8.1"
},
"peerDependencies": {
"tree-sitter": "^0.21.1"
},
"peerDependenciesMeta": {
"tree_sitter": {
"optional": true
}
}
},
"node_modules/tree-sitter-python": {
"version": "0.21.0",
"resolved": "https://registry.npmjs.org/tree-sitter-python/-/tree-sitter-python-0.21.0.tgz",
"integrity": "sha512-IUKx7JcTVbByUx1iHGFS/QsIjx7pqwTMHL9bl/NGyhyyydbfNrpruo2C7W6V4KZrbkkCOlX8QVrCoGOFW5qecg==",
"hasInstallScript": true,
"license": "MIT",
"dependencies": {
"node-addon-api": "^7.1.0",
"node-gyp-build": "^4.8.0"
},
"peerDependencies": {
"tree-sitter": "^0.21.0"
},
"peerDependenciesMeta": {
"tree_sitter": {
"optional": true
}
}
},
"node_modules/tree-sitter-python/node_modules/node-addon-api": {
"version": "7.1.1",
"resolved": "https://registry.npmjs.org/node-addon-api/-/node-addon-api-7.1.1.tgz",
"integrity": "sha512-5m3bsyrjFWE1xf7nz7YXdN4udnVtXK6/Yfgn5qnahL6bCkf2yKt4k3nuTKAtT4r3IG8JNR2ncsIMdZuAzJjHQQ==",
"license": "MIT"
},
"node_modules/tree-sitter-typescript": {
"version": "0.21.1",
"resolved": "https://registry.npmjs.org/tree-sitter-typescript/-/tree-sitter-typescript-0.21.1.tgz",
"integrity": "sha512-emQMXz1CpVSkMUq1NEMrbJkUUdKyGrnIl++Mt4LjEeq7ttpOdotb4a2A6zSMs39ppVebFhpTlbSrAMDSEWEEjw==",
"hasInstallScript": true,
"license": "MIT",
"dependencies": {
"node-addon-api": "^8.0.0",
"node-gyp-build": "^4.8.1"
},
"peerDependencies": {
"tree-sitter": "^0.21.0"
},
"peerDependenciesMeta": {
"tree_sitter": {
"optional": true
}
}
},
"node_modules/tree-sitter-yaml": {
"version": "0.5.0",
"resolved": "https://registry.npmjs.org/tree-sitter-yaml/-/tree-sitter-yaml-0.5.0.tgz",
"integrity": "sha512-POJ4ZNXXSWIG/W4Rjuyg36MkUD4d769YRUGKRqN+sVaj/VCo6Dh6Pkssn1Rtewd5kybx+jT1BWMyWN0CijXnMA==",
"hasInstallScript": true,
"license": "MIT",
"dependencies": {
"nan": "^2.14.0"
}
},
"node_modules/web-tree-sitter": {
"version": "0.26.6",
"resolved": "https://registry.npmjs.org/web-tree-sitter/-/web-tree-sitter-0.26.6.tgz",
"integrity": "sha512-fSPR7VBW/fZQdUSp/bXTDLT+i/9dwtbnqgEBMzowrM4U3DzeCwDbY3MKo0584uQxID4m/1xpLflrlT/rLIRPew==",
"license": "MIT"
}
}
}

View File

@@ -1,6 +1,6 @@
# Dev Intel Pipeline v2 — Phase 7: System-Level Documentation Generation
**Status:** DRAFT
**Status:** DRAFT v2 (post-SPA Round 1)
**Author:** Max (AI) + Brian (Human)
**Date:** 2026-03-09
**Depends on:** Phases 1-6 (extract, graph, namespace, semantic-diff, pipeline, docgen)
@@ -19,14 +19,47 @@ Extend the V2 pipeline to generate Foxtrot-quality system documentation from the
## Success Criteria
| Metric | Target |
|--------|--------|
| Subsystem detection accuracy | ≥90% of modules correctly clustered |
| Cross-subsystem dependency completeness | ≥85% of actual inter-subsystem edges captured |
| Contract extraction recall | ≥80% of exported interfaces/types extracted |
| Generated doc structure | Matches Divio 4-category template |
| Incremental update precision | Only subsystems touched by semantic diff get regenerated |
| LLM cost per full generation | ≤$2 (using local Ollama for drafting) |
All metrics are validated against a **ground truth fixture repository** (`test/fixtures/system-docs/`) containing a hand-labeled mini codebase (~30 files across 5 subsystems) with expected outputs for each module.
| Metric | Target | How Measured |
|--------|--------|-------------|
| Subsystem detection accuracy | ≥90% of modules correctly clustered | Compare `subsystem.js` output against `expected-subsystems.json` fixture. Accuracy = correctly assigned files / total files. |
| Cross-subsystem dependency completeness | ≥85% of actual inter-subsystem edges captured | Compare dependency matrix against `expected-deps.json`. Recall = captured edges / expected edges. |
| Contract extraction recall | ≥80% of exported interfaces/types extracted | Compare extracted contracts against `expected-contracts.json`. Recall = extracted / total annotated. |
| Generated doc structure | Matches Divio 4-category template | Structural assertion: verify directory layout, required sections present in each generated .md file. |
| Incremental update precision | Only subsystems touched by semantic diff get regenerated | Apply a mock diff to fixture, assert only expected subsystem docs are regenerated (content hashing / md5sum check, avoid mtime flakiness). |
| Cascading invalidation | Shared subsystem change propagates to dependents | Apply a diff to a shared subsystem in fixture, assert dependent subsystem docs are also flagged for regeneration. |
| LLM cost per full generation | ≤$2 (using local Ollama for drafting) | BACKLOGGED — measure token count statically in CI (e.g. via `tiktoken`) without hitting API. |
| Flow tracer terminates | All traces complete in <5s on 4,325-file graph | Wall-clock assertion on OpenClaw snapshot. |
## Ground Truth Fixture Repository
Located at `test/fixtures/system-docs/`. Contains:
```
test/fixtures/system-docs/
├── src/
│ ├── gateway/ (5 files: server.ts, session.ts, middleware.ts, types.ts, utils.ts)
│ ├── agents/ (5 files: runner.ts, scope.ts, tools.ts, types.ts, defaults.ts)
│ ├── channels/
│ │ ├── telegram.ts
│ │ └── discord.ts
│ ├── config/ (3 files: config.ts, schema.ts, types.ts)
│ └── utils/ (3 files: logger.ts, crypto.ts, fs-helpers.ts)
├── expected-subsystems.json ← hand-labeled subsystem assignments
├── expected-deps.json ← hand-labeled inter-subsystem edges
├── expected-contracts.json ← hand-labeled interfaces/types
├── expected-flows.json ← hand-labeled flow traces for 2 entry points
├── expected-diagrams/ ← expected Mermaid source for each diagram type
└── architecture.md ← mock architecture doc for ingestion testing
```
**Edge cases included in fixtures:**
- `utils/` as a cross-cutting concern (high fan-out, should be tagged as `cross-cutting`)
- Circular dependency: `gateway/session.ts``agents/runner.ts` (mutual CALLS)
- Orphan file: `config/schema.ts` (no inbound edges, only exports)
- Re-exported interface: `gateway/types.ts` re-exports from `config/types.ts`
- Empty subsystem: `channels/` has only 2 files with no internal CALLS edges
## Architecture
@@ -54,6 +87,12 @@ Extend the V2 pipeline to generate Foxtrot-quality system documentation from the
3. **Graph-based (future):** Community detection (Louvain/label propagation) on the CALLS+IMPORTS graph to find natural clusters. Useful for repos without clean directory boundaries.
**Cross-cutting concern detection:**
Subsystems where >60% of edges are **inbound** from other subsystems (high fan-in — many subsystems depend on them, but they depend on almost nothing) are automatically tagged as `cross-cutting`. Examples: `utils/`, `config/`, `types/`. The metric is `inbound_edges / total_edges > 0.6`. Cross-cutting subsystems are:
- Excluded from the dependency matrix visualization (reduces hairball)
- Documented separately as "Shared Infrastructure" in the reference docs
- Still tracked in the raw dependency data for completeness
**Output:**
```json
{
@@ -61,21 +100,31 @@ Extend the V2 pipeline to generate Foxtrot-quality system documentation from the
{
"name": "gateway",
"label": "Session & Request Gateway",
"files": ["gateway/session-utils.ts", "gateway/server.ts", ...],
"kind": "domain",
"files": ["gateway/session-utils.ts", "gateway/server.ts"],
"entities": { "functions": 142, "classes": 3, "modules": 28 },
"publicExports": ["deriveSessionTitle", "loadSessionEntry", ...],
"publicExports": ["deriveSessionTitle", "loadSessionEntry"],
"internalDeps": [{"from": "gateway", "to": "agents", "edges": 89, "type": "CALLS"}],
"externalDeps": ["commander", "node:fs", "node:path"]
}
],
"crossCutting": ["utils", "config"],
"dependencyMatrix": {
"gateway→agents": { "calls": 89, "imports": 34 },
"agents→config": { "calls": 156, "imports": 120 },
...
"agents→config": { "calls": 156, "imports": 120 }
}
}
```
**Tests (7A):**
| Test | Input | Expected |
|------|-------|----------|
| Directory clustering | Fixture repo | Matches `expected-subsystems.json` (5 subsystems) |
| Config override | Fixture + `subsystems.yaml` merging gateway+routing | Merged subsystem with combined files |
| Cross-cutting detection | Fixture `utils/` (high fan-out) | Tagged as `cross-cutting` |
| Empty subsystem | Fixture `channels/` (2 files, no internal calls) | Valid subsystem with 0 internal edges |
| Orphan file | `config/schema.ts` (no inbound) | Assigned to `config` subsystem, not dropped |
### 7B: Contract Extractor (`contracts.js`)
**Purpose:** Extract TypeScript interfaces, type aliases, enums, and config schemas as first-class graph entities.
@@ -93,8 +142,20 @@ Extend the V2 pipeline to generate Foxtrot-quality system documentation from the
- `RETURNS` — function → return type
- `EXTENDS` — interface → interface
**Why this matters:**
Foxtrot docs define explicit contracts: "`accountCreation` expects `reltioCustomerId: string`". Without extracting interfaces/types, we can't generate contract documentation. The LLM has to guess from function bodies, which is unreliable.
**Error handling:**
- If tree-sitter fails to parse a file, skip it and log a warning (same as Phase 1 extract.js behavior)
- Re-exported interfaces (`export { Foo } from './types'`) are tracked via the existing IMPORTS edge; the contract extractor resolves the original definition
- Deeply nested type literals (>3 levels) are flattened to `object` to avoid graph bloat
**Tests (7B):**
| Test | Input | Expected |
|------|-------|----------|
| Interface extraction | `gateway/types.ts` with 3 interfaces | 3 Interface entities with correct fields |
| Type alias | `type SessionKey = string` | 1 TypeAlias entity |
| Enum extraction | `enum Status { Active, Inactive }` | 1 Enum entity with 2 members |
| Re-exported interface | `gateway/types.ts` re-exports from `config/types.ts` | Resolved to original definition |
| Parse failure | Malformed TS file | Skipped with warning, no crash |
| Recall benchmark | Fixture repo | ≥80% of `expected-contracts.json` extracted |
### 7C: Flow Tracer (`flow.js`)
@@ -102,25 +163,50 @@ Foxtrot docs define explicit contracts: "`accountCreation` expects `reltioCustom
**Algorithm:**
1. Start at entry point entity (e.g., `telegram/bot-handlers.ts:onMessage`)
2. BFS/DFS through CALLS edges, recording subsystem transitions
3. At each subsystem boundary crossing, record: source subsystem → target subsystem, via which function call
4. Prune: stop at depth N (configurable, default 5), skip test files, skip utility functions below a connectivity threshold
5. Output: ordered list of subsystem hops with the specific function calls that cross boundaries
2. BFS through CALLS edges, recording subsystem transitions
3. **Cycle detection:** Maintain a visited set per trace. If a node is revisited, record the cycle and stop that branch (do not re-enter).
4. **God object pruning:** Before tracing, compute in-degree for all nodes. Nodes with in-degree > `godThreshold` (default: 50) are excluded from traversal (they're utility functions called by everything — not meaningful flow participants). Logged as "excluded high-connectivity nodes."
5. **Depth limit:** Stop at depth N (configurable, default 8). Each subsystem boundary crossing increments depth by 1; intra-subsystem hops increment by 0.5 (prioritizes cross-subsystem flow).
6. **Test file exclusion:** Skip any file matching `*.test.*`, `*.spec.*`, `test/`, `__tests__/`.
7. At each subsystem boundary crossing, record: source subsystem → target subsystem, via which function call
8. Output: ordered list of subsystem hops with the specific function calls that cross boundaries
**Output:**
**Output (deterministic JSON — testable without LLM):**
```json
{
"entryPoint": "telegram/bot-handlers.ts:onMessage",
"depth": 8,
"godThreshold": 50,
"excludedNodes": ["utils/logger.ts:log", "config/config.ts:getConfig"],
"cyclesDetected": [
{ "at": "gateway/session.ts:loadSession", "backEdgeTo": "agents/runner.ts:runAgent" }
],
"flow": [
{ "subsystem": "telegram", "function": "onMessage", "action": "receives incoming message" },
{ "subsystem": "routing", "function": "routeInbound", "action": "routes to session handler", "crossedVia": "CALLS" },
{ "subsystem": "gateway", "function": "handleSession", "action": "loads session state", "crossedVia": "CALLS" },
{ "subsystem": "agents", "function": "runAgent", "action": "executes AI agent turn", "crossedVia": "CALLS" }
]
{ "subsystem": "telegram", "entity": "telegram/bot-handlers.ts:onMessage", "depth": 0 },
{ "subsystem": "routing", "entity": "routing/session-key.ts:resolveKey", "depth": 1, "crossedVia": "CALLS" },
{ "subsystem": "gateway", "entity": "gateway/session.ts:loadSession", "depth": 2, "crossedVia": "CALLS" },
{ "subsystem": "agents", "entity": "agents/runner.ts:runAgent", "depth": 3, "crossedVia": "CALLS" }
],
"subsystemSequence": ["telegram", "routing", "gateway", "agents"]
}
```
**LLM narration:** Feed the flow trace + source snippets at each hop to the LLM. Ask it to write a prose narrative: "When a Telegram message arrives, the bot handler dispatches it to the routing layer, which resolves the session key and..."
**LLM narration (separate step):** The deterministic JSON flow is the testable artifact. LLM narration is applied *after* as a formatting pass in 7D. This means:
- Flow correctness is tested against `expected-flows.json` (deterministic)
- LLM prose quality is evaluated separately (human review, not CI)
**Performance guarantee:** BFS with visited set + god object pruning + depth limit = O(V+E) bounded by depth. On the OpenClaw graph (23k nodes, 142k edges), traces must complete in <5 seconds. If a trace exceeds 5s, it is killed and logged as a timeout.
**Tests (7C):**
| Test | Input | Expected |
|------|-------|----------|
| Simple linear flow | Fixture entry point A→B→C across 3 subsystems | Matches `expected-flows.json` |
| Cycle detection | Fixture circular dep gateway↔agents | Cycle recorded, trace continues without loop |
| God object exclusion | Entry point that calls `utils/logger.ts:log` (high in-degree) | `log` excluded from trace |
| Depth limit | Deep call chain (>8 hops) | Trace stops at depth 8 |
| Test file exclusion | Entry point that calls a test helper | Test file skipped |
| Performance | OpenClaw full snapshot | <5s wall clock |
| Empty trace | Entry point with no outgoing CALLS | Returns flow with single entry, no hops |
### 7D: Hierarchical Doc Generator (`sysdoc.js`)
@@ -130,9 +216,7 @@ Foxtrot docs define explicit contracts: "`accountCreation` expects `reltioCustom
```
docs/
├── tutorials/
│ └── (not auto-generated — requires human curation)
├── how-to/
│ └── (generated from flow traces of common operations)
│ └── (human-authored only — not auto-generated)
├── reference/
│ ├── system-architecture.md ← from subsystem aggregator + dependency matrix
│ ├── subsystems/
@@ -146,65 +230,118 @@ docs/
│ └── (existing file-level docs from Phase 6)
├── explanation/
│ ├── architecture-patterns.md ← from dependency matrix analysis
│ ├── data-flows.md ← from flow tracer
│ └── design-decisions.md ← (requires human input or commit history analysis)
│ ├── data-flows.md ← from flow tracer (LLM-narrated flow traces)
│ └── design-decisions.md ← from architecture.md ingestion + commit history
```
**Divio category mapping (corrected):**
- **Tutorials:** Human-authored only. Not generated.
- **Reference:** System architecture, per-subsystem docs, contracts, module docs. All deterministic structure + LLM prose.
- **Explanation:** Architecture patterns (from dependency analysis), data flows (from flow traces — these explain *how the system works*, not *how to do a task*), design decisions (from architecture.md + commit history).
- **How-To:** Not auto-generated in MVP. Requires domain-specific task knowledge. Deferred.
**Generation pipeline:**
1. Run subsystem aggregator → subsystem map + dependency matrix
2. Run contract extractor → interface/type entities added to graph
3. Run flow tracer on configured entry points → flow narratives
4. For each subsystem: generate reference doc (LLM with subsystem context)
3. Run flow tracer on configured entry points → deterministic flow JSONs
4. For each subsystem: generate reference doc (LLM with subsystem context + architecture.md sections)
5. Generate system architecture overview (LLM with full dependency matrix)
6. Generate data flow explanations (LLM with flow traces)
6. Generate data flow explanations (LLM narrates flow JSONs into prose)
7. Generate Mermaid diagrams (7E) and embed in docs
**Incremental updates:**
- Semantic diff identifies changed files
- Map changed files → affected subsystems
- Only regenerate docs for affected subsystems
- System architecture overview regenerated only if dependency matrix changed
**Incremental updates with cascading invalidation:**
1. Semantic diff identifies changed files
2. Map changed files → directly affected subsystems (set A)
3. For each subsystem in A, find all subsystems that depend on it (set B = dependents of A in dependency matrix)
4. Regeneration set = A B
5. System architecture overview regenerated only if dependency matrix changed (new/removed inter-subsystem edges)
6. Flow traces regenerated only if any entity in the trace path was modified
### Template System
**Tests (7D):**
| Test | Input | Expected |
|------|-------|----------|
| Full generation | Fixture repo | Correct directory structure with all expected .md files |
| Section completeness | Generated subsystem doc | Contains: Purpose, Key Modules, Public API, Dependencies sections |
| Incremental: direct change | Modify `gateway/server.ts` | Only `gateway.md` + dependents regenerated |
| Incremental: cascading | Modify `config/types.ts` (shared) | `config.md` + all subsystems importing config regenerated |
| Incremental: no-op | No semantic diff | Zero files regenerated |
| Architecture.md ingestion | Fixture with `architecture.md` | LLM prompt includes architecture.md content |
Each doc type has a Markdown template with slots:
### 7E: Diagram Generator (`diagrams.js`)
```markdown
# {{subsystem.label}}
**Purpose:** Auto-generate Mermaid diagrams from graph analysis outputs.
## Purpose
{{llm_generated_purpose}}
**Diagram types:**
## Key Modules
{{for module in subsystem.topModules}}
- `{{module.name}}` — {{module.doc}}
{{endfor}}
1. **Subsystem Dependency Graph** (from 7A dependency matrix)
- Nodes = subsystems (excluding cross-cutting)
- Edges = inter-subsystem CALLS/IMPORTS with edge weight labels
- Cross-cutting subsystems shown as a separate "Shared" cluster
## Public API
{{for export in subsystem.publicExports}}
- `{{export.name}}({{export.params}})` → `{{export.returnType}}`
{{endfor}}
2. **Flow Sequence Diagram** (from 7C flow traces)
- Participants = subsystems in flow order
- Messages = function calls at boundary crossings
- Cycles shown as self-referencing notes
## Dependencies
{{dependency_table}}
3. **Contract Relationship Diagram** (from 7B contracts)
- Classes/interfaces with fields
- IMPLEMENTS/EXTENDS relationships as arrows
## Data Flows
{{for flow in subsystem.flows}}
### {{flow.name}}
{{flow.narrative}}
{{endfor}}
```
**Rendering:** Use `mmdr` (Rust Mermaid renderer) to produce SVG. Embed in generated Markdown docs as `![diagram](./diagrams/subsystem-deps.svg)`.
**Tests (7E):**
| Test | Input | Expected |
|------|-------|----------|
| Dependency diagram | Fixture dependency matrix | Valid Mermaid syntax, matches `expected-diagrams/deps.mmd` |
| Sequence diagram | Fixture flow trace | Valid Mermaid syntax, correct participant order |
| Contract diagram | Fixture contracts | Valid Mermaid syntax, correct relationships |
| Rendering | Any generated .mmd file | mmdr produces valid SVG without errors |
## Architecture.md Ingestion
Each repo may contain human-written architecture documentation. The pipeline:
1. **Discovery:** Scan for `architecture.md`, `docs/architecture.md`, `ARCHITECTURE.md`, `docs/design.md` in repo root
2. **Parsing:** Extract sections (headings → content blocks) as structured context
3. **Injection:** When generating subsystem docs or explanation docs, include relevant architecture.md sections in the LLM prompt alongside graph data
4. **Diff tracking:** If `architecture.md` changes between releases, flag it in the semantic diff as a documentation-relevant change
## Cross-Repo Output Model
Two output modes:
**Per-repo (reference only):**
- Subsystem architecture docs
- Contract reference
- Module reference
- Mermaid diagrams
- Useful for repo maintainers
**Unified (full Divio):**
- Merges per-repo graphs via namespace registry (Phase 3) into super-graph
- Runs 7A-7E on super-graph
- Generates cross-repo flow traces and dependency diagrams
- Includes human-authored tutorials and explanation docs
- Useful for platform consumers and new engineers
## Implementation Phases
| Phase | Module | Effort | Depends On |
|-------|--------|--------|------------|
| 7A | `subsystem.js` | 1 day | graph.js |
| 7B | `contracts.js` | 1-2 days | extract.js (new tree-sitter queries) |
| 7C | `flow.js` | 1 day | graph.js, subsystem.js |
| 7D | `sysdoc.js` | 1-2 days | 7A, 7B, 7C, docgen.js |
| 7-fixtures | Ground truth fixture repo | 0.5 day | — |
| 7A | `subsystem.js` + tests | 1 day | graph.js, fixtures |
| 7B | `contracts.js` + tests | 2 days | extract.js, fixtures |
| 7C | `flow.js` + tests | 2 days | graph.js, subsystem.js, fixtures |
| 7D | `sysdoc.js` + tests | 2 days | 7A, 7B, 7C, docgen.js |
| 7E | `diagrams.js` + tests | 1 day | 7A, 7C, 7B |
| 7F | `supergraph.js` (Multi-repo Merge) | 1 day | namespace.js, graph.js |
**Critical path:** 7A → 7C → 7D (flow tracer needs subsystem boundaries)
**Parallel:** 7B can run in parallel with 7A/7C
**Total: ~9.5 days**
**Critical path:** fixtures → 7A → 7C → 7D
**Parallel:** 7B, 7E, and 7F can run in parallel with core phases.
**Build loop (BMad Wiggum):** Each phase follows: build → test → BMad review → revise → re-review until GO.
## Constraints
@@ -214,15 +351,14 @@ Each doc type has a Markdown template with slots:
- Templates are Markdown with simple mustache-style slots (no template engine dependency — string replacement)
- Must work on OpenClaw codebase (4,325 files) as primary benchmark
- Foxtrot repos are not available in this environment — design must work from any repo's graph snapshot
- Memory budget: graph snapshots for OpenClaw are ~30MB JSON. In-memory graph with contract entities should stay under 500MB heap. If exceeded, implement streaming extraction (process files in batches, merge partial graphs).
## Open Questions
## Resolved Decisions
1. **Tutorials:** Should we attempt to auto-generate tutorials from flow traces, or leave that as human-only? Foxtrot tutorials are task-oriented ("Create your first VPC") which requires domain knowledge the graph doesn't have.
2. **Design decisions:** Can we infer design decisions from commit history + semantic diffs? ("We switched from X to Y in v2026.3.1 because...") Or is this always human-authored?
3. **Cross-repo:** For Foxtrot's 14-repo setup, do we generate one unified doc site or per-repo docs with cross-links? The namespace registry (Phase 3) handles entity linking, but the doc generator needs to know the boundary.
4. **Diagram generation:** Should we auto-generate Mermaid diagrams from the dependency matrix and flow traces? (We have the mermaid-renderer skill.)
5. **Config contract depth:** How deep do we go on YAML/HCL config extraction? Just top-level keys, or full schema with types and defaults?
1. **Tutorials:** Human-authored only. Flow traces inform but don't generate tutorials — domain knowledge required.
2. **Design decisions:** Infer from commit history + semantic diffs AND parse `architecture.md` from each repo.
3. **Cross-repo:** Both per-repo (reference) and unified (full Divio). Different audiences.
4. **Mermaid diagrams:** Yes, via 7E. Three diagram types: dependency, sequence, contract.
5. **Architecture.md ingestion:** Parsed and injected as LLM context for subsystem and explanation docs.
6. **Flow traces are Explanation, not How-To:** Corrected Divio mapping. How-To deferred from MVP.
7. **LLM output is not CI-tested:** All testable artifacts are deterministic JSON. LLM prose is a formatting pass evaluated by human review.

151
sysdoc.js
View File

@@ -5,6 +5,7 @@ const { buildSubsystems } = require('./subsystem.js');
const { extractAllContracts, buildContractXref } = require('./contracts.js');
const { buildFlowIndex, traceFlow } = require('./flow.js');
const { generateDependencyDiagram, generateFlowDiagram, generateContractDiagram } = require('./diagrams.js');
const { discoverCharts, chartsToGraph, generateHelmDiagram } = require('./extract-helm.js');
/**
* Phase 7D: Hierarchical Doc Generator
@@ -25,6 +26,26 @@ async function generateDocs(graph, srcRoot, outDir, opts = {}) {
console.warn('Prose generation requested but prose.js not available. Skipping LLM pass.');
}
}
// 4. Discover Helm Charts (Phase 8) - Do this early to feed main graph
const helmIgnore = new Set([
'node_modules', '.git', 'venv', '__pycache__', '.terraform',
'_bmad', '_bmad-output', '.codex', '.claude', '.cursor', '.gemini', '.kiro', '.agents'
]);
const helmCharts = discoverCharts(srcRoot, helmIgnore);
const helmGraph = chartsToGraph(helmCharts, srcRoot);
console.log(`Helm: ${helmCharts.length} charts, ${helmGraph.entities.length} entities, ${helmGraph.relationships.length} relationships`);
// Merge Helm into main graph so Subsystem Aggregator sees it
for (const e of helmGraph.entities) {
const fakePath = e.dir ? path.join(srcRoot, e.dir, 'Chart.yaml') : path.join(srcRoot, 'Chart.yaml');
graph.nodes.set(e.id, { ...e, type: e.type || 'Module', _file: fakePath });
if (!graph.fileIndex.has(fakePath)) graph.fileIndex.set(fakePath, new Set());
graph.fileIndex.get(fakePath).add(e.id);
}
for (const r of helmGraph.relationships) {
graph.edges.push(r);
}
// 1. Build Subsystems (7A)
const subs = buildSubsystems(graph, {
@@ -46,6 +67,8 @@ async function generateDocs(graph, srcRoot, outDir, opts = {}) {
'reference/subsystems',
'reference/contracts',
'reference/modules',
'reference/helm',
'reference/helm/charts',
'explanation',
'tutorials',
'how-to',
@@ -138,6 +161,128 @@ ${sub.files.map(f => `- \`${f}\``).join('\n')}
fs.writeFileSync(contractDocPath, `# System Contracts\n\n\`\`\`mermaid\n${allContractsDiag}\n\`\`\`\n\n${contractProseList}`);
// Generate Reference: Helm Charts
const helmIndexPath = path.join(outDir, 'reference/helm/index.md');
let helmIndexContent = '# Helm Charts\n\n| Chart | Path | Version | Resources | Dependencies | Interactions |\n|---|---|---|---|---|---|\n';
// Use dir-based filenames to avoid collisions between same-named charts
for (const c of helmCharts) {
const safeName = c.dir.replace(/[^a-zA-Z0-9]/g, '-').replace(/-+/g, '-').replace(/^-|-$/g, '');
const chartDocPath = path.join(outDir, `reference/helm/charts/${safeName}.md`);
helmIndexContent += `| [${c.chart.name}](charts/${safeName}.md) | \`${c.dir}\` | ${c.chart.version} | ${c.templates.resources.length} | ${c.chart.dependencies.length} | ${c.interactions.length} |\n`;
let chartContent = `# Chart: ${c.chart.name}\n\n`;
chartContent += `**Version:** ${c.chart.version} \n`;
chartContent += `**App Version:** ${c.chart.appVersion || 'N/A'} \n`;
chartContent += `**Path:** \`${c.dir}\`\n\n`;
if (c.chart.description) {
chartContent += `${c.chart.description}\n\n`;
}
if (c.chart.dependencies.length > 0) {
chartContent += `## Dependencies\n`;
for (const d of c.chart.dependencies) {
chartContent += `- **${d.name}** (${d.version})${d.condition ? ` *if ${d.condition}*` : ''}\n`;
}
chartContent += '\n';
}
if (c.interactions.length > 0) {
chartContent += `## Interactions (Contracts)\n`;
for (const i of c.interactions) {
chartContent += `- **${i.type}**: \`${i.target}\` (via \`${i.file}\`)\n`;
}
chartContent += '\n';
}
if (c.templates.resources.length > 0) {
chartContent += `## Resources Generated\n`;
for (const r of c.templates.resources) {
chartContent += `- **${r.kind}**: \`${r.name}\` (${r.file})\n`;
}
chartContent += '\n';
}
if (c.values.keys.length > 0) {
chartContent += `## Configuration Surface (values.yaml)\n`;
chartContent += `| Key | Type | Default |\n|---|---|---|\n`;
for (const k of c.values.keys) {
let defStr = k.defaultValue !== undefined ? String(k.defaultValue).replace(/\\n/g, ' ') : (k.hasDefault ? 'yes' : 'no');
if (defStr.includes('|')) defStr = defStr.replace(/\\|/g, '\\\\|');
chartContent += `| \`${k.name}\` | ${k.type} | ${defStr} |\n`;
}
chartContent += '\n';
}
fs.writeFileSync(chartDocPath, chartContent);
}
// Generate Helm interaction diagram
const helmDiag = generateHelmDiagram(helmCharts);
fs.writeFileSync(path.join(outDir, 'diagrams/helm-interactions.mmd'), helmDiag);
// Shared secrets/configmaps cross-reference
const configUsers = {};
for (const c of helmCharts) {
for (const i of c.interactions) {
if (i.type === 'config-ref') {
if (!configUsers[i.target]) configUsers[i.target] = [];
configUsers[i.target].push(c.chart.name);
}
}
}
// Port map: which charts expose which ports
const portMap = {};
for (const c of helmCharts) {
for (const i of c.interactions) {
if (i.type === 'port' && i.target !== '0') {
if (!portMap[i.target]) portMap[i.target] = [];
if (!portMap[i.target].includes(c.chart.name)) portMap[i.target].push(c.chart.name);
}
}
}
helmIndexContent += `\n## Interaction Diagram\n\`\`\`mermaid\n${helmDiag}\n\`\`\`\n`;
// Shared config/secrets table
const sharedConfigs = Object.entries(configUsers).filter(([, users]) => users.length > 1);
if (sharedConfigs.length > 0) {
helmIndexContent += `\n## Shared Secrets & ConfigMaps\n| Secret/ConfigMap | Used By |\n|---|---|\n`;
for (const [name, users] of sharedConfigs) {
helmIndexContent += `| \`${name}\` | ${users.join(', ')} |\n`;
}
}
// Port allocation table
const sharedPorts = Object.entries(portMap).filter(([, users]) => users.length > 1).sort((a, b) => Number(a[0]) - Number(b[0]));
if (sharedPorts.length > 0) {
helmIndexContent += `\n## Port Allocation (shared)\n| Port | Charts |\n|---|---|\n`;
for (const [port, users] of sharedPorts) {
helmIndexContent += `| ${port} | ${users.join(', ')} |\n`;
}
}
// K8s service references
const svcRefs = [];
for (const c of helmCharts) {
for (const i of c.interactions) {
if (i.type === 'k8s-service') {
svcRefs.push({ from: c.chart.name, to: i.target });
}
}
}
if (svcRefs.length > 0) {
helmIndexContent += `\n## Service-to-Service References\n| From Chart | Calls Service |\n|---|---|\n`;
for (const ref of svcRefs) {
helmIndexContent += `| ${ref.from} | \`${ref.to}\` |\n`;
}
}
fs.writeFileSync(helmIndexPath, helmIndexContent);
// Generate Explanation: Data Flows
const flowsPath = path.join(outDir, 'explanation/data-flows.md');
let flowsContent = '# Data Flows\n\n';
@@ -190,7 +335,11 @@ if (require.main === module) {
// Using an IIFE to support top-level await
(async () => {
try {
const result = await generateDocs(graph, srcRoot, outDir, { entryPoints, prose: useProse });
const result = await generateDocs(graph, srcRoot, outDir, {
srcDir: srcRoot.endsWith('/') ? srcRoot : srcRoot + '/',
entryPoints,
prose: useProse
});
console.log(`Generated docs in ${result.outDir}`);
console.log(`- ${result.subsystems} subsystems`);
console.log(`- ${result.contracts} contracts`);