/** * Eval Harness: Question Bank Generator * * Generates ground-truth Q&A pairs directly from source code and Helm charts. * Questions are categorized by type and difficulty. * * Usage: node eval-questions.js */ const fs = require('fs'); const path = require('path'); const { discoverCharts } = require('./extract-helm.js'); const GraphStore = require('./graph.js'); const { buildSubsystems } = require('./subsystem.js'); function generateQuestions(srcRoot, snapshotPath) { const questions = []; const helmIgnore = new Set([ 'node_modules', '.git', 'venv', '__pycache__', '.terraform', '_bmad', '_bmad-output', '.codex', '.claude', '.cursor', '.gemini', '.kiro', '.agents' ]); // Load Helm data const charts = discoverCharts(srcRoot, helmIgnore); // Load graph + subsystems const graph = GraphStore.loadSnapshot(snapshotPath); const subs = buildSubsystems(graph, { srcDir: srcRoot.endsWith('/') ? srcRoot : srcRoot + '/', minTraffic: 3, crossCuttingThreshold: 0.6 }); // ─── Category 1: Structural (chart metadata) ─── // Q: How many Helm charts exist? questions.push({ id: 'struct-001', category: 'structural', difficulty: 'easy', audience: ['human', 'machine'], question: 'How many Helm charts are defined across the Foxtrot monorepo?', answer: String(charts.length), answerType: 'exact', source: 'Chart.yaml discovery', }); // Q: Which charts have the most K8s resources? const topByResources = [...charts] .sort((a, b) => b.templates.resources.length - a.templates.resources.length) .slice(0, 5); questions.push({ id: 'struct-002', category: 'structural', difficulty: 'medium', audience: ['human', 'machine'], question: 'Which 5 Helm charts produce the most Kubernetes resources? List them with their resource counts.', answer: topByResources.map(c => `${c.chart.name} (${c.dir}): ${c.templates.resources.length}`).join('\n'), answerType: 'ranked-list', source: 'template scanning', }); // Q: How many subsystems are detected? questions.push({ id: 'struct-003', category: 'structural', difficulty: 'easy', audience: ['human', 'machine'], question: 'How many subsystems does the Foxtrot codebase contain?', answer: String(subs.subsystems.length), answerType: 'exact', source: 'subsystem aggregation', }); // Q: Which subsystem has the most files? const topBySub = [...subs.subsystems].sort((a, b) => b.files.length - a.files.length)[0]; questions.push({ id: 'struct-004', category: 'structural', difficulty: 'easy', audience: ['human'], question: 'Which subsystem contains the most files, and how many?', answer: `${topBySub.name}: ${topBySub.files.length} files`, answerType: 'exact', source: 'subsystem aggregation', }); // ─── Category 2: Dependencies (chart-to-chart) ─── // Charts with dependencies const chartsWithDeps = charts.filter(c => c.chart.dependencies.length > 0); for (const c of chartsWithDeps.slice(0, 5)) { questions.push({ id: `dep-${c.chart.name.replace(/[^a-z0-9]/g, '-')}-001`, category: 'dependency', difficulty: 'medium', audience: ['human', 'machine'], question: `What are the dependencies of the "${c.chart.name}" chart (at ${c.dir})?`, answer: c.chart.dependencies.map(d => `${d.name} (${d.version})${d.condition ? ` [condition: ${d.condition}]` : ''}`).join('\n'), answerType: 'list', source: `${c.dir}/Chart.yaml`, }); } // ─── Category 3: Contracts (shared secrets, config refs) ─── // Shared secrets const configUsers = {}; for (const c of charts) { for (const i of c.interactions) { if (i.type === 'config-ref') { if (!configUsers[i.target]) configUsers[i.target] = []; configUsers[i.target].push(c.chart.name); } } } const sharedSecrets = Object.entries(configUsers).filter(([, users]) => users.length > 1); if (sharedSecrets.length > 0) { questions.push({ id: 'contract-001', category: 'contract', difficulty: 'medium', audience: ['human', 'machine'], question: 'Which secrets or ConfigMaps are shared across multiple Helm charts? List each with the charts that use it.', answer: sharedSecrets.map(([name, users]) => `${name}: ${[...new Set(users)].join(', ')}`).join('\n'), answerType: 'list', source: 'template interaction scanning', }); } // ─── Category 4: Configuration Surface (values.yaml) ─── // Specific value questions from important charts const importantCharts = ['mdm-app', 'ai-app', 'elasticsearch', 'hazelcast', 'cassandra', 'kong', 'redis']; for (const name of importantCharts) { const chart = charts.find(c => c.chart.name === name && c.values.keys.length > 10); if (!chart) continue; // What's the default domain? const domainKey = chart.values.keys.find(k => k.name === 'domain' || k.name === 'rootDomain'); if (domainKey && domainKey.defaultValue) { questions.push({ id: `config-${name}-domain`, category: 'configuration', difficulty: 'easy', audience: ['human'], question: `What is the default value of "${domainKey.name}" in the ${name} chart?`, answer: String(domainKey.defaultValue), answerType: 'exact', source: `${chart.dir}/values.yaml`, }); } // How many configurable values? questions.push({ id: `config-${name}-count`, category: 'configuration', difficulty: 'easy', audience: ['machine'], question: `How many top-level configuration keys does the ${name} chart (at ${chart.dir}) expose in its values.yaml?`, answer: String(chart.values.keys.length), answerType: 'exact', source: `${chart.dir}/values.yaml`, }); } // ─── Category 5: Interactions (service-to-service) ─── const svcRefs = []; for (const c of charts) { for (const i of c.interactions) { if (i.type === 'k8s-service') { svcRefs.push({ from: c.chart.name, dir: c.dir, to: i.target }); } } } if (svcRefs.length > 0) { questions.push({ id: 'interaction-001', category: 'interaction', difficulty: 'medium', audience: ['human', 'machine'], question: 'Which Helm charts reference Kubernetes services from other components? List each chart and the service it calls.', answer: svcRefs.map(r => `${r.from} (${r.dir}) → ${r.to}`).join('\n'), answerType: 'list', source: 'template interaction scanning', }); } // Port allocation const portMap = {}; for (const c of charts) { for (const i of c.interactions) { if (i.type === 'port' && i.target !== '0') { if (!portMap[i.target]) portMap[i.target] = []; if (!portMap[i.target].includes(c.chart.name)) portMap[i.target].push(c.chart.name); } } } const sharedPorts = Object.entries(portMap).filter(([, users]) => users.length > 1); if (sharedPorts.length > 0) { questions.push({ id: 'interaction-002', category: 'interaction', difficulty: 'hard', audience: ['human', 'machine'], question: 'Which network ports are used by multiple Helm charts? List each port and the charts that expose it.', answer: sharedPorts.sort((a, b) => Number(a[0]) - Number(b[0])).map(([port, users]) => `Port ${port}: ${users.join(', ')}`).join('\n'), answerType: 'list', source: 'template port scanning', }); } // ─── Category 6: Resource Types ─── const kindCounts = {}; for (const c of charts) { for (const r of c.templates.resources) { kindCounts[r.kind] = (kindCounts[r.kind] || 0) + 1; } } const topKinds = Object.entries(kindCounts).sort((a, b) => b[1] - a[1]).slice(0, 10); questions.push({ id: 'resource-001', category: 'resource', difficulty: 'medium', audience: ['human', 'machine'], question: 'What are the 10 most common Kubernetes resource types generated across all Foxtrot Helm charts?', answer: topKinds.map(([kind, count]) => `${kind}: ${count}`).join('\n'), answerType: 'ranked-list', source: 'template resource scanning', }); // Specific chart resource questions for (const name of ['mdm-app', 'cassandra', 'jenkins']) { const chart = charts.find(c => c.chart.name === name && c.templates.resources.length > 5); if (!chart) continue; const kinds = {}; for (const r of chart.templates.resources) { kinds[r.kind] = (kinds[r.kind] || 0) + 1; } questions.push({ id: `resource-${name}-001`, category: 'resource', difficulty: 'medium', audience: ['human', 'machine'], question: `What Kubernetes resource types does the ${name} chart generate? List each type and count.`, answer: Object.entries(kinds).sort((a, b) => b[1] - a[1]).map(([k, v]) => `${k}: ${v}`).join('\n'), answerType: 'list', source: `${chart.dir}/templates/`, }); } // ─── Category 7: Cross-Subsystem (code-level) ─── // Which subsystems depend on app-tools? const appToolsDeps = Object.entries(subs.dependencyMatrix) .filter(([k, v]) => k.endsWith('→app-tools') && (v.calls + v.imports) > 0) .map(([k, v]) => ({ from: k.split('→')[0], weight: v.calls + v.imports })); if (appToolsDeps.length > 0) { questions.push({ id: 'cross-sub-001', category: 'cross-subsystem', difficulty: 'hard', audience: ['human', 'machine'], question: 'Which subsystems depend on app-tools, and how heavily (by call+import count)?', answer: appToolsDeps.sort((a, b) => b.weight - a.weight).map(d => `${d.from}: ${d.weight}`).join('\n'), answerType: 'list', source: 'dependency matrix', }); } // Cross-cutting concerns if (subs.crossCutting.length > 0) { questions.push({ id: 'cross-sub-002', category: 'cross-subsystem', difficulty: 'easy', audience: ['human'], question: 'Which subsystems are identified as cross-cutting concerns?', answer: subs.crossCutting.join(', '), answerType: 'list', source: 'subsystem aggregation', }); } // ─── Category 8: Architectural Reasoning ─── // Why are certain subsystems empty? const emptySubs = subs.subsystems.filter(s => s.entities.functions === 0 && s.entities.modules === 0); if (emptySubs.length > 0) { questions.push({ id: 'arch-001', category: 'architectural', difficulty: 'hard', audience: ['human'], question: `The following subsystems have 0 detected functions and 0 modules: ${emptySubs.map(s => s.name).join(', ')}. Why might this be the case, and what do they actually contain?`, answer: `These subsystems primarily contain Helm charts with Go-templated YAML, Terraform HCL, and Crossplane compositions. The code analysis pipeline detects functions/modules from Python, Go, TypeScript, and shell scripts — but Helm templates use Go template syntax ({{ }}) which doesn't produce traditional function/module entities. Their content is captured through the Helm chart extraction phase instead.`, answerType: 'explanation', source: 'architectural analysis', }); } // Chart version question const mdmChart = charts.find(c => c.chart.name === 'mdm-app'); if (mdmChart) { questions.push({ id: 'arch-002', category: 'architectural', difficulty: 'easy', audience: ['human', 'machine'], question: 'What is the current version and appVersion of the mdm-app Helm chart?', answer: `version: ${mdmChart.chart.version}, appVersion: ${mdmChart.chart.appVersion}`, answerType: 'exact', source: `${mdmChart.dir}/Chart.yaml`, }); } // ─── Category 9: Scenario-Based (human-focused) ─── questions.push({ id: 'scenario-001', category: 'scenario', difficulty: 'hard', audience: ['human'], question: 'If you need to rotate the "vault-secret" shared secret, which Helm charts would be affected and need redeployment?', answer: (configUsers['vault-secret'] || []).join(', '), answerType: 'list', source: 'template interaction scanning', }); questions.push({ id: 'scenario-002', category: 'scenario', difficulty: 'hard', audience: ['human'], question: 'A new engineer needs to deploy the MDM application stack. What charts, configuration values, and external dependencies should they understand first?', answer: mdmChart ? `Chart: ${mdmChart.chart.name} (${mdmChart.dir}), Version: ${mdmChart.chart.version}\nKey values: ${mdmChart.values.keys.slice(0, 10).map(k => k.name).join(', ')} (+${Math.max(0, mdmChart.values.keys.length - 10)} more)\nResources generated: ${mdmChart.templates.resources.length} K8s resources\nInteractions: ${mdmChart.interactions.map(i => `${i.type}: ${i.target}`).join(', ') || 'none detected'}` : 'mdm-app chart not found', answerType: 'explanation', source: `${mdmChart?.dir || 'N/A'}`, }); // Cassandra ports const cassChart = charts.find(c => c.chart.name === 'cassandra'); if (cassChart) { const cassPorts = cassChart.interactions.filter(i => i.type === 'port').map(i => i.target); questions.push({ id: 'scenario-003', category: 'scenario', difficulty: 'medium', audience: ['human'], question: 'What network ports does the Cassandra chart expose, and what are they typically used for?', answer: `Ports: ${cassPorts.join(', ')}\n7000: Cassandra inter-node cluster communication\n7001: Cassandra SSL inter-node\n7199: JMX monitoring\n9042: CQL native transport (client connections)\n9160: Thrift client API (legacy)`, answerType: 'explanation', source: `${cassChart.dir}/templates/`, }); } return questions; } if (require.main === module) { const srcRoot = process.argv[2]; const snapshotPath = process.argv[3]; const outPath = process.argv[4] || './eval-questions.json'; if (!srcRoot || !snapshotPath) { console.error('Usage: node eval-questions.js [output.json]'); process.exit(1); } const questions = generateQuestions(srcRoot, snapshotPath); // Summary const cats = {}; for (const q of questions) { cats[q.category] = (cats[q.category] || 0) + 1; } console.log(`Generated ${questions.length} questions:`); for (const [cat, count] of Object.entries(cats).sort((a, b) => b[1] - a[1])) { console.log(` ${cat}: ${count}`); } console.log(`\nDifficulty: easy=${questions.filter(q => q.difficulty === 'easy').length}, medium=${questions.filter(q => q.difficulty === 'medium').length}, hard=${questions.filter(q => q.difficulty === 'hard').length}`); console.log(`Audience: human=${questions.filter(q => q.audience.includes('human')).length}, machine=${questions.filter(q => q.audience.includes('machine')).length}`); fs.writeFileSync(outPath, JSON.stringify({ generated: new Date().toISOString(), count: questions.length, questions }, null, 2)); console.log(`\nWritten to ${outPath}`); } module.exports = { generateQuestions };