/** * Eval Generator: Repo-Agnostic Question Bank * * Generates ground-truth Q&A pairs from graph + Helm data. * Questions target what the docs actually cover: subsystems, charts, * dependencies, interactions, contracts, resource types. * * Usage: node eval-generator.js [output.json] [--dry-run] */ const fs = require('fs'); const path = require('path'); const GraphStore = require('./graph.js'); const { discoverCharts, chartsToGraph } = require('./extract-helm.js'); const { buildSubsystems } = require('./subsystem.js'); const HELM_IGNORE = new Set([ 'node_modules', '.git', 'venv', '__pycache__', '.terraform', '_bmad', '_bmad-output', '.codex', '.claude', '.cursor', '.gemini', '.kiro', '.agents' ]); function generateQuestions(snapshotPath, srcRoot) { const questions = []; let qIdx = 1; const qid = () => `q-${String(qIdx++).padStart(3, '0')}`; // Load graph const graph = GraphStore.loadSnapshot(snapshotPath); // Load Helm data const charts = discoverCharts(srcRoot, HELM_IGNORE); // Merge Helm into graph (same as sysdoc.js) if (charts.length > 0) { const helmGraph = chartsToGraph(charts, srcRoot); for (const e of helmGraph.entities) { const fakePath = e.dir ? path.join(srcRoot, e.dir, 'Chart.yaml') : path.join(srcRoot, 'Chart.yaml'); graph.nodes.set(e.id, { ...e, type: e.type || 'Module', _file: fakePath }); if (!graph.fileIndex.has(fakePath)) graph.fileIndex.set(fakePath, new Set()); graph.fileIndex.get(fakePath).add(e.id); } for (const r of helmGraph.relationships) { graph.edges.push(r); } } // Build subsystems const subs = buildSubsystems(graph, { srcDir: (srcRoot || '').endsWith('/') ? (srcRoot || '') : (srcRoot || '') + '/', minTraffic: 3, crossCuttingThreshold: 0.6 }); // ─── Category 1: Structural ─── // Total chart count if (charts.length > 0) { questions.push({ id: qid(), category: 'structural', difficulty: 'easy', audience: ['human', 'machine'], question: 'How many Helm charts are defined across this repository?', expected_answer: String(charts.length), answer: String(charts.length), answerType: 'exact', source: 'Chart.yaml discovery', source_entity: 'all charts' }); } // Subsystem count if (subs.subsystems.length > 0) { questions.push({ id: qid(), category: 'structural', difficulty: 'easy', audience: ['human', 'machine'], question: 'How many subsystems does this codebase contain?', expected_answer: String(subs.subsystems.length), answer: String(subs.subsystems.length), answerType: 'exact', source: 'subsystem aggregation', source_entity: 'all subsystems' }); } // Top charts by K8s resources const topByResources = [...charts] .filter(c => c.templates && c.templates.resources) .sort((a, b) => b.templates.resources.length - a.templates.resources.length) .slice(0, 5); if (topByResources.length > 0 && topByResources[0].templates.resources.length > 0) { questions.push({ id: qid(), category: 'structural', difficulty: 'medium', audience: ['human', 'machine'], question: 'Which 5 Helm charts produce the most Kubernetes resources? List them with their resource counts.', expected_answer: topByResources.map(c => `${c.chart.name} (${c.dir}): ${c.templates.resources.length}`).join('\n'), answer: topByResources.map(c => `${c.chart.name} (${c.dir}): ${c.templates.resources.length}`).join('\n'), answerType: 'ranked-list', source: 'template scanning', source_entity: topByResources.map(c => c.chart.name).join(', ') }); } // Largest subsystem const sortedSubs = [...subs.subsystems].sort((a, b) => b.files.length - a.files.length); if (sortedSubs.length > 0) { questions.push({ id: qid(), category: 'structural', difficulty: 'easy', audience: ['human'], question: 'Which subsystem contains the most files, and how many?', expected_answer: `${sortedSubs[0].name}: ${sortedSubs[0].files.length} files`, answer: `${sortedSubs[0].name}: ${sortedSubs[0].files.length} files`, answerType: 'exact', source: 'subsystem aggregation', source_entity: sortedSubs[0].name }); } // ─── Category 2: Dependencies ─── const chartsWithDeps = charts.filter(c => c.chart.dependencies && c.chart.dependencies.length > 0); for (const c of chartsWithDeps.slice(0, 5)) { questions.push({ id: qid(), category: 'dependency', difficulty: 'medium', audience: ['human', 'machine'], question: `What are the dependencies of the "${c.chart.name}" chart (at ${c.dir})?`, expected_answer: c.chart.dependencies.map(d => `${d.name} (${d.version})${d.condition ? ` [condition: ${d.condition}]` : ''}`).join('\n'), answer: c.chart.dependencies.map(d => `${d.name} (${d.version})${d.condition ? ` [condition: ${d.condition}]` : ''}`).join('\n'), answerType: 'list', source: `${c.dir}/Chart.yaml`, source_entity: c.chart.name }); } // ─── Category 3: Contracts (shared secrets/configs) ─── const configUsers = {}; for (const c of charts) { for (const i of (c.interactions || [])) { if (i.type === 'config-ref') { if (!configUsers[i.target]) configUsers[i.target] = []; configUsers[i.target].push(c.chart.name); } } } const sharedSecrets = Object.entries(configUsers).filter(([, users]) => users.length > 1); if (sharedSecrets.length > 0) { questions.push({ id: qid(), category: 'contract', difficulty: 'medium', audience: ['human', 'machine'], question: 'Which secrets or ConfigMaps are shared across multiple Helm charts? List each with the charts that use it.', expected_answer: sharedSecrets.map(([name, users]) => `${name}: ${[...new Set(users)].join(', ')}`).join('\n'), answer: sharedSecrets.map(([name, users]) => `${name}: ${[...new Set(users)].join(', ')}`).join('\n'), answerType: 'list', source: 'template interaction scanning', source_entity: sharedSecrets.map(([n]) => n).join(', ') }); } // ─── Category 4: Configuration Surface ─── // Top charts by config surface (most values.yaml keys) const chartsByValues = [...charts] .filter(c => c.values && c.values.keys && c.values.keys.length > 5) .sort((a, b) => b.values.keys.length - a.values.keys.length); for (const chart of chartsByValues.slice(0, 5)) { questions.push({ id: qid(), category: 'configuration', difficulty: 'easy', audience: ['machine'], question: `How many top-level configuration keys does the ${chart.chart.name} chart (at ${chart.dir}) expose in its values.yaml?`, expected_answer: String(chart.values.keys.length), answer: String(chart.values.keys.length), answerType: 'exact', source: `${chart.dir}/values.yaml`, source_entity: chart.chart.name }); } // ─── Category 5: Interactions (service-to-service) ─── const svcRefs = []; for (const c of charts) { for (const i of (c.interactions || [])) { if (i.type === 'k8s-service') { svcRefs.push({ from: c.chart.name, dir: c.dir, to: i.target }); } } } if (svcRefs.length > 0) { questions.push({ id: qid(), category: 'interaction', difficulty: 'medium', audience: ['human', 'machine'], question: 'Which Helm charts reference Kubernetes services from other components? List each chart and the service it calls.', expected_answer: svcRefs.map(r => `${r.from} (${r.dir}) → ${r.to}`).join('\n'), answer: svcRefs.map(r => `${r.from} (${r.dir}) → ${r.to}`).join('\n'), answerType: 'list', source: 'template interaction scanning', source_entity: svcRefs.map(r => r.from).join(', ') }); } // Shared ports const portMap = {}; for (const c of charts) { for (const i of (c.interactions || [])) { if (i.type === 'port' && i.target !== '0') { if (!portMap[i.target]) portMap[i.target] = []; if (!portMap[i.target].includes(c.chart.name)) portMap[i.target].push(c.chart.name); } } } const sharedPorts = Object.entries(portMap).filter(([, users]) => users.length > 1); if (sharedPorts.length > 0) { questions.push({ id: qid(), category: 'interaction', difficulty: 'hard', audience: ['human', 'machine'], question: 'Which network ports are used by multiple Helm charts? List each port and the charts that expose it.', expected_answer: sharedPorts.sort((a, b) => Number(a[0]) - Number(b[0])).map(([port, users]) => `Port ${port}: ${users.join(', ')}`).join('\n'), answer: sharedPorts.sort((a, b) => Number(a[0]) - Number(b[0])).map(([port, users]) => `Port ${port}: ${users.join(', ')}`).join('\n'), answerType: 'list', source: 'template port scanning', source_entity: 'shared ports' }); } // ─── Category 6: Resource Types ─── const kindCounts = {}; for (const c of charts) { for (const r of (c.templates?.resources || [])) { kindCounts[r.kind] = (kindCounts[r.kind] || 0) + 1; } } const topKinds = Object.entries(kindCounts).sort((a, b) => b[1] - a[1]).slice(0, 10); if (topKinds.length > 0) { questions.push({ id: qid(), category: 'resource', difficulty: 'medium', audience: ['human', 'machine'], question: 'What are the most common Kubernetes resource types generated across all Helm charts?', expected_answer: topKinds.map(([kind, count]) => `${kind}: ${count}`).join('\n'), answer: topKinds.map(([kind, count]) => `${kind}: ${count}`).join('\n'), answerType: 'ranked-list', source: 'template resource scanning', source_entity: 'all charts' }); } // Per-chart resource breakdown for top 3 charts by resource count for (const chart of topByResources.slice(0, 3)) { const kinds = {}; for (const r of chart.templates.resources) { kinds[r.kind] = (kinds[r.kind] || 0) + 1; } questions.push({ id: qid(), category: 'resource', difficulty: 'medium', audience: ['human', 'machine'], question: `What Kubernetes resource types does the ${chart.chart.name} chart generate? List each type and count.`, expected_answer: Object.entries(kinds).sort((a, b) => b[1] - a[1]).map(([k, v]) => `${k}: ${v}`).join('\n'), answer: Object.entries(kinds).sort((a, b) => b[1] - a[1]).map(([k, v]) => `${k}: ${v}`).join('\n'), answerType: 'list', source: `${chart.dir}/templates/`, source_entity: chart.chart.name }); } // ─── Category 7: Cross-Subsystem ─── if (subs.crossCutting && subs.crossCutting.length > 0) { questions.push({ id: qid(), category: 'cross-subsystem', difficulty: 'easy', audience: ['human'], question: 'Which subsystems are identified as cross-cutting concerns?', expected_answer: subs.crossCutting.join(', '), answer: subs.crossCutting.join(', '), answerType: 'list', source: 'subsystem aggregation', source_entity: subs.crossCutting.join(', ') }); } // Dependency matrix questions if (subs.dependencyMatrix) { const heavyDeps = Object.entries(subs.dependencyMatrix) .filter(([, v]) => (v.calls + v.imports) > 3) .sort((a, b) => (b[1].calls + b[1].imports) - (a[1].calls + a[1].imports)) .slice(0, 5); if (heavyDeps.length > 0) { const targetSub = heavyDeps[0][0].split('→')[1]; const depsForTarget = heavyDeps.filter(([k]) => k.endsWith(`→${targetSub}`)); if (depsForTarget.length > 0) { questions.push({ id: qid(), category: 'cross-subsystem', difficulty: 'hard', audience: ['human', 'machine'], question: `Which subsystems depend on ${targetSub}, and how heavily (by call+import count)?`, expected_answer: depsForTarget.map(([k, v]) => `${k.split('→')[0]}: ${v.calls + v.imports}`).join('\n'), answer: depsForTarget.map(([k, v]) => `${k.split('→')[0]}: ${v.calls + v.imports}`).join('\n'), answerType: 'list', source: 'dependency matrix', source_entity: targetSub }); } } } // ─── Category 8: Architectural ─── // Empty subsystems (Helm-only) const emptySubs = subs.subsystems.filter(s => s.entities.functions === 0 && s.entities.modules === 0); if (emptySubs.length > 0) { questions.push({ id: qid(), category: 'architectural', difficulty: 'hard', audience: ['human'], question: `The following subsystems have 0 detected functions and 0 modules: ${emptySubs.map(s => s.name).join(', ')}. Why might this be the case, and what do they actually contain?`, expected_answer: 'These subsystems primarily contain Helm charts with Go-templated YAML, Terraform HCL, and/or Crossplane compositions. The code analysis pipeline detects functions/modules from Python, Go, TypeScript, and shell scripts — but Helm templates use Go template syntax which does not produce traditional function/module entities. Their content is captured through the Helm chart extraction phase instead.', answer: 'These subsystems primarily contain Helm charts, Terraform, or Crossplane compositions rather than traditional code.', answerType: 'explanation', source: 'architectural analysis', source_entity: emptySubs.map(s => s.name).join(', ') }); } // Chart version for top chart if (charts.length > 0) { const topChart = topByResources[0] || charts[0]; questions.push({ id: qid(), category: 'architectural', difficulty: 'easy', audience: ['human', 'machine'], question: `What is the current version and appVersion of the ${topChart.chart.name} Helm chart?`, expected_answer: `version: ${topChart.chart.version}, appVersion: ${topChart.chart.appVersion}`, answer: `version: ${topChart.chart.version}, appVersion: ${topChart.chart.appVersion}`, answerType: 'exact', source: `${topChart.dir}/Chart.yaml`, source_entity: topChart.chart.name }); } // ─── Category 9: Scenario-Based ─── // Secret rotation scenario if (sharedSecrets.length > 0) { const [secretName, secretUsers] = sharedSecrets[0]; questions.push({ id: qid(), category: 'scenario', difficulty: 'hard', audience: ['human'], question: `If you need to rotate the "${secretName}" shared secret, which Helm charts would be affected and need redeployment?`, expected_answer: [...new Set(secretUsers)].join(', '), answer: [...new Set(secretUsers)].join(', '), answerType: 'list', source: 'template interaction scanning', source_entity: secretName }); } // Deployment scenario for top chart if (topByResources.length > 0) { const chart = topByResources[0]; questions.push({ id: qid(), category: 'scenario', difficulty: 'hard', audience: ['human'], question: `A new engineer needs to deploy the ${chart.chart.name} application. What charts, configuration values, and external dependencies should they understand first?`, expected_answer: `Chart: ${chart.chart.name} (${chart.dir}), Version: ${chart.chart.version}\nKey values: ${(chart.values?.keys || []).slice(0, 10).map(k => k.name).join(', ')}${chart.values?.keys?.length > 10 ? ` (+${chart.values.keys.length - 10} more)` : ''}\nResources generated: ${chart.templates.resources.length} K8s resources\nInteractions: ${(chart.interactions || []).map(i => `${i.type}: ${i.target}`).join(', ') || 'none detected'}`, answer: `Chart: ${chart.chart.name} (${chart.dir}), Version: ${chart.chart.version}`, answerType: 'explanation', source: `${chart.dir}`, source_entity: chart.chart.name }); } return questions; } module.exports = { generateQuestions }; if (require.main === module) { const args = process.argv.slice(2).filter(a => !a.startsWith('-')); const snapshotPath = args[0]; const srcRoot = args[1]; const outFile = args[2] || null; if (!snapshotPath || !srcRoot) { console.error('Usage: node eval-generator.js [output.json] [--dry-run]'); process.exit(1); } if (!fs.existsSync(snapshotPath)) { console.error(`Snapshot not found: ${snapshotPath}`); process.exit(1); } const questions = generateQuestions(snapshotPath, srcRoot); // Summary const cats = {}; for (const q of questions) { cats[q.category] = (cats[q.category] || 0) + 1; } console.log(`Generated ${questions.length} questions:`); for (const [cat, count] of Object.entries(cats).sort((a, b) => b[1] - a[1])) { console.log(` ${cat}: ${count}`); } console.log(`Difficulty: easy=${questions.filter(q => q.difficulty === 'easy').length}, medium=${questions.filter(q => q.difficulty === 'medium').length}, hard=${questions.filter(q => q.difficulty === 'hard').length}`); const result = { generated: new Date().toISOString(), count: questions.length, questions }; const json = JSON.stringify(result, null, 2); if (outFile) { fs.writeFileSync(outFile, json); console.log(`\nWritten to ${outFile}`); } else { console.log(json); } }