393 lines
15 KiB
JavaScript
393 lines
15 KiB
JavaScript
|
|
/**
|
||
|
|
* Eval Harness: Question Bank Generator
|
||
|
|
*
|
||
|
|
* Generates ground-truth Q&A pairs directly from source code and Helm charts.
|
||
|
|
* Questions are categorized by type and difficulty.
|
||
|
|
*
|
||
|
|
* Usage: node eval-questions.js <foxtrot-root> <output.json>
|
||
|
|
*/
|
||
|
|
|
||
|
|
const fs = require('fs');
|
||
|
|
const path = require('path');
|
||
|
|
const { discoverCharts } = require('./extract-helm.js');
|
||
|
|
const GraphStore = require('./graph.js');
|
||
|
|
const { buildSubsystems } = require('./subsystem.js');
|
||
|
|
|
||
|
|
function generateQuestions(srcRoot, snapshotPath) {
|
||
|
|
const questions = [];
|
||
|
|
const helmIgnore = new Set([
|
||
|
|
'node_modules', '.git', 'venv', '__pycache__', '.terraform',
|
||
|
|
'_bmad', '_bmad-output', '.codex', '.claude', '.cursor', '.gemini', '.kiro', '.agents'
|
||
|
|
]);
|
||
|
|
|
||
|
|
// Load Helm data
|
||
|
|
const charts = discoverCharts(srcRoot, helmIgnore);
|
||
|
|
|
||
|
|
// Load graph + subsystems
|
||
|
|
const graph = GraphStore.loadSnapshot(snapshotPath);
|
||
|
|
const subs = buildSubsystems(graph, {
|
||
|
|
srcDir: srcRoot.endsWith('/') ? srcRoot : srcRoot + '/',
|
||
|
|
minTraffic: 3,
|
||
|
|
crossCuttingThreshold: 0.6
|
||
|
|
});
|
||
|
|
|
||
|
|
// ─── Category 1: Structural (chart metadata) ───
|
||
|
|
|
||
|
|
// Q: How many Helm charts exist?
|
||
|
|
questions.push({
|
||
|
|
id: 'struct-001',
|
||
|
|
category: 'structural',
|
||
|
|
difficulty: 'easy',
|
||
|
|
audience: ['human', 'machine'],
|
||
|
|
question: 'How many Helm charts are defined across the Foxtrot monorepo?',
|
||
|
|
answer: String(charts.length),
|
||
|
|
answerType: 'exact',
|
||
|
|
source: 'Chart.yaml discovery',
|
||
|
|
});
|
||
|
|
|
||
|
|
// Q: Which charts have the most K8s resources?
|
||
|
|
const topByResources = [...charts]
|
||
|
|
.sort((a, b) => b.templates.resources.length - a.templates.resources.length)
|
||
|
|
.slice(0, 5);
|
||
|
|
questions.push({
|
||
|
|
id: 'struct-002',
|
||
|
|
category: 'structural',
|
||
|
|
difficulty: 'medium',
|
||
|
|
audience: ['human', 'machine'],
|
||
|
|
question: 'Which 5 Helm charts produce the most Kubernetes resources? List them with their resource counts.',
|
||
|
|
answer: topByResources.map(c => `${c.chart.name} (${c.dir}): ${c.templates.resources.length}`).join('\n'),
|
||
|
|
answerType: 'ranked-list',
|
||
|
|
source: 'template scanning',
|
||
|
|
});
|
||
|
|
|
||
|
|
// Q: How many subsystems are detected?
|
||
|
|
questions.push({
|
||
|
|
id: 'struct-003',
|
||
|
|
category: 'structural',
|
||
|
|
difficulty: 'easy',
|
||
|
|
audience: ['human', 'machine'],
|
||
|
|
question: 'How many subsystems does the Foxtrot codebase contain?',
|
||
|
|
answer: String(subs.subsystems.length),
|
||
|
|
answerType: 'exact',
|
||
|
|
source: 'subsystem aggregation',
|
||
|
|
});
|
||
|
|
|
||
|
|
// Q: Which subsystem has the most files?
|
||
|
|
const topBySub = [...subs.subsystems].sort((a, b) => b.files.length - a.files.length)[0];
|
||
|
|
questions.push({
|
||
|
|
id: 'struct-004',
|
||
|
|
category: 'structural',
|
||
|
|
difficulty: 'easy',
|
||
|
|
audience: ['human'],
|
||
|
|
question: 'Which subsystem contains the most files, and how many?',
|
||
|
|
answer: `${topBySub.name}: ${topBySub.files.length} files`,
|
||
|
|
answerType: 'exact',
|
||
|
|
source: 'subsystem aggregation',
|
||
|
|
});
|
||
|
|
|
||
|
|
// ─── Category 2: Dependencies (chart-to-chart) ───
|
||
|
|
|
||
|
|
// Charts with dependencies
|
||
|
|
const chartsWithDeps = charts.filter(c => c.chart.dependencies.length > 0);
|
||
|
|
for (const c of chartsWithDeps.slice(0, 5)) {
|
||
|
|
questions.push({
|
||
|
|
id: `dep-${c.chart.name.replace(/[^a-z0-9]/g, '-')}-001`,
|
||
|
|
category: 'dependency',
|
||
|
|
difficulty: 'medium',
|
||
|
|
audience: ['human', 'machine'],
|
||
|
|
question: `What are the dependencies of the "${c.chart.name}" chart (at ${c.dir})?`,
|
||
|
|
answer: c.chart.dependencies.map(d => `${d.name} (${d.version})${d.condition ? ` [condition: ${d.condition}]` : ''}`).join('\n'),
|
||
|
|
answerType: 'list',
|
||
|
|
source: `${c.dir}/Chart.yaml`,
|
||
|
|
});
|
||
|
|
}
|
||
|
|
|
||
|
|
// ─── Category 3: Contracts (shared secrets, config refs) ───
|
||
|
|
|
||
|
|
// Shared secrets
|
||
|
|
const configUsers = {};
|
||
|
|
for (const c of charts) {
|
||
|
|
for (const i of c.interactions) {
|
||
|
|
if (i.type === 'config-ref') {
|
||
|
|
if (!configUsers[i.target]) configUsers[i.target] = [];
|
||
|
|
configUsers[i.target].push(c.chart.name);
|
||
|
|
}
|
||
|
|
}
|
||
|
|
}
|
||
|
|
const sharedSecrets = Object.entries(configUsers).filter(([, users]) => users.length > 1);
|
||
|
|
if (sharedSecrets.length > 0) {
|
||
|
|
questions.push({
|
||
|
|
id: 'contract-001',
|
||
|
|
category: 'contract',
|
||
|
|
difficulty: 'medium',
|
||
|
|
audience: ['human', 'machine'],
|
||
|
|
question: 'Which secrets or ConfigMaps are shared across multiple Helm charts? List each with the charts that use it.',
|
||
|
|
answer: sharedSecrets.map(([name, users]) => `${name}: ${[...new Set(users)].join(', ')}`).join('\n'),
|
||
|
|
answerType: 'list',
|
||
|
|
source: 'template interaction scanning',
|
||
|
|
});
|
||
|
|
}
|
||
|
|
|
||
|
|
// ─── Category 4: Configuration Surface (values.yaml) ───
|
||
|
|
|
||
|
|
// Specific value questions from important charts
|
||
|
|
const importantCharts = ['mdm-app', 'ai-app', 'elasticsearch', 'hazelcast', 'cassandra', 'kong', 'redis'];
|
||
|
|
for (const name of importantCharts) {
|
||
|
|
const chart = charts.find(c => c.chart.name === name && c.values.keys.length > 10);
|
||
|
|
if (!chart) continue;
|
||
|
|
|
||
|
|
// What's the default domain?
|
||
|
|
const domainKey = chart.values.keys.find(k => k.name === 'domain' || k.name === 'rootDomain');
|
||
|
|
if (domainKey && domainKey.defaultValue) {
|
||
|
|
questions.push({
|
||
|
|
id: `config-${name}-domain`,
|
||
|
|
category: 'configuration',
|
||
|
|
difficulty: 'easy',
|
||
|
|
audience: ['human'],
|
||
|
|
question: `What is the default value of "${domainKey.name}" in the ${name} chart?`,
|
||
|
|
answer: String(domainKey.defaultValue),
|
||
|
|
answerType: 'exact',
|
||
|
|
source: `${chart.dir}/values.yaml`,
|
||
|
|
});
|
||
|
|
}
|
||
|
|
|
||
|
|
// How many configurable values?
|
||
|
|
questions.push({
|
||
|
|
id: `config-${name}-count`,
|
||
|
|
category: 'configuration',
|
||
|
|
difficulty: 'easy',
|
||
|
|
audience: ['machine'],
|
||
|
|
question: `How many top-level configuration keys does the ${name} chart (at ${chart.dir}) expose in its values.yaml?`,
|
||
|
|
answer: String(chart.values.keys.length),
|
||
|
|
answerType: 'exact',
|
||
|
|
source: `${chart.dir}/values.yaml`,
|
||
|
|
});
|
||
|
|
}
|
||
|
|
|
||
|
|
// ─── Category 5: Interactions (service-to-service) ───
|
||
|
|
|
||
|
|
const svcRefs = [];
|
||
|
|
for (const c of charts) {
|
||
|
|
for (const i of c.interactions) {
|
||
|
|
if (i.type === 'k8s-service') {
|
||
|
|
svcRefs.push({ from: c.chart.name, dir: c.dir, to: i.target });
|
||
|
|
}
|
||
|
|
}
|
||
|
|
}
|
||
|
|
if (svcRefs.length > 0) {
|
||
|
|
questions.push({
|
||
|
|
id: 'interaction-001',
|
||
|
|
category: 'interaction',
|
||
|
|
difficulty: 'medium',
|
||
|
|
audience: ['human', 'machine'],
|
||
|
|
question: 'Which Helm charts reference Kubernetes services from other components? List each chart and the service it calls.',
|
||
|
|
answer: svcRefs.map(r => `${r.from} (${r.dir}) → ${r.to}`).join('\n'),
|
||
|
|
answerType: 'list',
|
||
|
|
source: 'template interaction scanning',
|
||
|
|
});
|
||
|
|
}
|
||
|
|
|
||
|
|
// Port allocation
|
||
|
|
const portMap = {};
|
||
|
|
for (const c of charts) {
|
||
|
|
for (const i of c.interactions) {
|
||
|
|
if (i.type === 'port' && i.target !== '0') {
|
||
|
|
if (!portMap[i.target]) portMap[i.target] = [];
|
||
|
|
if (!portMap[i.target].includes(c.chart.name)) portMap[i.target].push(c.chart.name);
|
||
|
|
}
|
||
|
|
}
|
||
|
|
}
|
||
|
|
const sharedPorts = Object.entries(portMap).filter(([, users]) => users.length > 1);
|
||
|
|
if (sharedPorts.length > 0) {
|
||
|
|
questions.push({
|
||
|
|
id: 'interaction-002',
|
||
|
|
category: 'interaction',
|
||
|
|
difficulty: 'hard',
|
||
|
|
audience: ['human', 'machine'],
|
||
|
|
question: 'Which network ports are used by multiple Helm charts? List each port and the charts that expose it.',
|
||
|
|
answer: sharedPorts.sort((a, b) => Number(a[0]) - Number(b[0])).map(([port, users]) => `Port ${port}: ${users.join(', ')}`).join('\n'),
|
||
|
|
answerType: 'list',
|
||
|
|
source: 'template port scanning',
|
||
|
|
});
|
||
|
|
}
|
||
|
|
|
||
|
|
// ─── Category 6: Resource Types ───
|
||
|
|
|
||
|
|
const kindCounts = {};
|
||
|
|
for (const c of charts) {
|
||
|
|
for (const r of c.templates.resources) {
|
||
|
|
kindCounts[r.kind] = (kindCounts[r.kind] || 0) + 1;
|
||
|
|
}
|
||
|
|
}
|
||
|
|
const topKinds = Object.entries(kindCounts).sort((a, b) => b[1] - a[1]).slice(0, 10);
|
||
|
|
questions.push({
|
||
|
|
id: 'resource-001',
|
||
|
|
category: 'resource',
|
||
|
|
difficulty: 'medium',
|
||
|
|
audience: ['human', 'machine'],
|
||
|
|
question: 'What are the 10 most common Kubernetes resource types generated across all Foxtrot Helm charts?',
|
||
|
|
answer: topKinds.map(([kind, count]) => `${kind}: ${count}`).join('\n'),
|
||
|
|
answerType: 'ranked-list',
|
||
|
|
source: 'template resource scanning',
|
||
|
|
});
|
||
|
|
|
||
|
|
// Specific chart resource questions
|
||
|
|
for (const name of ['mdm-app', 'cassandra', 'jenkins']) {
|
||
|
|
const chart = charts.find(c => c.chart.name === name && c.templates.resources.length > 5);
|
||
|
|
if (!chart) continue;
|
||
|
|
|
||
|
|
const kinds = {};
|
||
|
|
for (const r of chart.templates.resources) {
|
||
|
|
kinds[r.kind] = (kinds[r.kind] || 0) + 1;
|
||
|
|
}
|
||
|
|
questions.push({
|
||
|
|
id: `resource-${name}-001`,
|
||
|
|
category: 'resource',
|
||
|
|
difficulty: 'medium',
|
||
|
|
audience: ['human', 'machine'],
|
||
|
|
question: `What Kubernetes resource types does the ${name} chart generate? List each type and count.`,
|
||
|
|
answer: Object.entries(kinds).sort((a, b) => b[1] - a[1]).map(([k, v]) => `${k}: ${v}`).join('\n'),
|
||
|
|
answerType: 'list',
|
||
|
|
source: `${chart.dir}/templates/`,
|
||
|
|
});
|
||
|
|
}
|
||
|
|
|
||
|
|
// ─── Category 7: Cross-Subsystem (code-level) ───
|
||
|
|
|
||
|
|
// Which subsystems depend on app-tools?
|
||
|
|
const appToolsDeps = Object.entries(subs.dependencyMatrix)
|
||
|
|
.filter(([k, v]) => k.endsWith('→app-tools') && (v.calls + v.imports) > 0)
|
||
|
|
.map(([k, v]) => ({ from: k.split('→')[0], weight: v.calls + v.imports }));
|
||
|
|
if (appToolsDeps.length > 0) {
|
||
|
|
questions.push({
|
||
|
|
id: 'cross-sub-001',
|
||
|
|
category: 'cross-subsystem',
|
||
|
|
difficulty: 'hard',
|
||
|
|
audience: ['human', 'machine'],
|
||
|
|
question: 'Which subsystems depend on app-tools, and how heavily (by call+import count)?',
|
||
|
|
answer: appToolsDeps.sort((a, b) => b.weight - a.weight).map(d => `${d.from}: ${d.weight}`).join('\n'),
|
||
|
|
answerType: 'list',
|
||
|
|
source: 'dependency matrix',
|
||
|
|
});
|
||
|
|
}
|
||
|
|
|
||
|
|
// Cross-cutting concerns
|
||
|
|
if (subs.crossCutting.length > 0) {
|
||
|
|
questions.push({
|
||
|
|
id: 'cross-sub-002',
|
||
|
|
category: 'cross-subsystem',
|
||
|
|
difficulty: 'easy',
|
||
|
|
audience: ['human'],
|
||
|
|
question: 'Which subsystems are identified as cross-cutting concerns?',
|
||
|
|
answer: subs.crossCutting.join(', '),
|
||
|
|
answerType: 'list',
|
||
|
|
source: 'subsystem aggregation',
|
||
|
|
});
|
||
|
|
}
|
||
|
|
|
||
|
|
// ─── Category 8: Architectural Reasoning ───
|
||
|
|
|
||
|
|
// Why are certain subsystems empty?
|
||
|
|
const emptySubs = subs.subsystems.filter(s => s.entities.functions === 0 && s.entities.modules === 0);
|
||
|
|
if (emptySubs.length > 0) {
|
||
|
|
questions.push({
|
||
|
|
id: 'arch-001',
|
||
|
|
category: 'architectural',
|
||
|
|
difficulty: 'hard',
|
||
|
|
audience: ['human'],
|
||
|
|
question: `The following subsystems have 0 detected functions and 0 modules: ${emptySubs.map(s => s.name).join(', ')}. Why might this be the case, and what do they actually contain?`,
|
||
|
|
answer: `These subsystems primarily contain Helm charts with Go-templated YAML, Terraform HCL, and Crossplane compositions. The code analysis pipeline detects functions/modules from Python, Go, TypeScript, and shell scripts — but Helm templates use Go template syntax ({{ }}) which doesn't produce traditional function/module entities. Their content is captured through the Helm chart extraction phase instead.`,
|
||
|
|
answerType: 'explanation',
|
||
|
|
source: 'architectural analysis',
|
||
|
|
});
|
||
|
|
}
|
||
|
|
|
||
|
|
// Chart version question
|
||
|
|
const mdmChart = charts.find(c => c.chart.name === 'mdm-app');
|
||
|
|
if (mdmChart) {
|
||
|
|
questions.push({
|
||
|
|
id: 'arch-002',
|
||
|
|
category: 'architectural',
|
||
|
|
difficulty: 'easy',
|
||
|
|
audience: ['human', 'machine'],
|
||
|
|
question: 'What is the current version and appVersion of the mdm-app Helm chart?',
|
||
|
|
answer: `version: ${mdmChart.chart.version}, appVersion: ${mdmChart.chart.appVersion}`,
|
||
|
|
answerType: 'exact',
|
||
|
|
source: `${mdmChart.dir}/Chart.yaml`,
|
||
|
|
});
|
||
|
|
}
|
||
|
|
|
||
|
|
// ─── Category 9: Scenario-Based (human-focused) ───
|
||
|
|
|
||
|
|
questions.push({
|
||
|
|
id: 'scenario-001',
|
||
|
|
category: 'scenario',
|
||
|
|
difficulty: 'hard',
|
||
|
|
audience: ['human'],
|
||
|
|
question: 'If you need to rotate the "vault-secret" shared secret, which Helm charts would be affected and need redeployment?',
|
||
|
|
answer: (configUsers['vault-secret'] || []).join(', '),
|
||
|
|
answerType: 'list',
|
||
|
|
source: 'template interaction scanning',
|
||
|
|
});
|
||
|
|
|
||
|
|
questions.push({
|
||
|
|
id: 'scenario-002',
|
||
|
|
category: 'scenario',
|
||
|
|
difficulty: 'hard',
|
||
|
|
audience: ['human'],
|
||
|
|
question: 'A new engineer needs to deploy the MDM application stack. What charts, configuration values, and external dependencies should they understand first?',
|
||
|
|
answer: mdmChart ? `Chart: ${mdmChart.chart.name} (${mdmChart.dir}), Version: ${mdmChart.chart.version}\nKey values: ${mdmChart.values.keys.slice(0, 10).map(k => k.name).join(', ')} (+${Math.max(0, mdmChart.values.keys.length - 10)} more)\nResources generated: ${mdmChart.templates.resources.length} K8s resources\nInteractions: ${mdmChart.interactions.map(i => `${i.type}: ${i.target}`).join(', ') || 'none detected'}` : 'mdm-app chart not found',
|
||
|
|
answerType: 'explanation',
|
||
|
|
source: `${mdmChart?.dir || 'N/A'}`,
|
||
|
|
});
|
||
|
|
|
||
|
|
// Cassandra ports
|
||
|
|
const cassChart = charts.find(c => c.chart.name === 'cassandra');
|
||
|
|
if (cassChart) {
|
||
|
|
const cassPorts = cassChart.interactions.filter(i => i.type === 'port').map(i => i.target);
|
||
|
|
questions.push({
|
||
|
|
id: 'scenario-003',
|
||
|
|
category: 'scenario',
|
||
|
|
difficulty: 'medium',
|
||
|
|
audience: ['human'],
|
||
|
|
question: 'What network ports does the Cassandra chart expose, and what are they typically used for?',
|
||
|
|
answer: `Ports: ${cassPorts.join(', ')}\n7000: Cassandra inter-node cluster communication\n7001: Cassandra SSL inter-node\n7199: JMX monitoring\n9042: CQL native transport (client connections)\n9160: Thrift client API (legacy)`,
|
||
|
|
answerType: 'explanation',
|
||
|
|
source: `${cassChart.dir}/templates/`,
|
||
|
|
});
|
||
|
|
}
|
||
|
|
|
||
|
|
return questions;
|
||
|
|
}
|
||
|
|
|
||
|
|
if (require.main === module) {
|
||
|
|
const srcRoot = process.argv[2];
|
||
|
|
const snapshotPath = process.argv[3];
|
||
|
|
const outPath = process.argv[4] || './eval-questions.json';
|
||
|
|
|
||
|
|
if (!srcRoot || !snapshotPath) {
|
||
|
|
console.error('Usage: node eval-questions.js <foxtrot-root> <snapshot.json> [output.json]');
|
||
|
|
process.exit(1);
|
||
|
|
}
|
||
|
|
|
||
|
|
const questions = generateQuestions(srcRoot, snapshotPath);
|
||
|
|
|
||
|
|
// Summary
|
||
|
|
const cats = {};
|
||
|
|
for (const q of questions) {
|
||
|
|
cats[q.category] = (cats[q.category] || 0) + 1;
|
||
|
|
}
|
||
|
|
|
||
|
|
console.log(`Generated ${questions.length} questions:`);
|
||
|
|
for (const [cat, count] of Object.entries(cats).sort((a, b) => b[1] - a[1])) {
|
||
|
|
console.log(` ${cat}: ${count}`);
|
||
|
|
}
|
||
|
|
console.log(`\nDifficulty: easy=${questions.filter(q => q.difficulty === 'easy').length}, medium=${questions.filter(q => q.difficulty === 'medium').length}, hard=${questions.filter(q => q.difficulty === 'hard').length}`);
|
||
|
|
console.log(`Audience: human=${questions.filter(q => q.audience.includes('human')).length}, machine=${questions.filter(q => q.audience.includes('machine')).length}`);
|
||
|
|
|
||
|
|
fs.writeFileSync(outPath, JSON.stringify({ generated: new Date().toISOString(), count: questions.length, questions }, null, 2));
|
||
|
|
console.log(`\nWritten to ${outPath}`);
|
||
|
|
}
|
||
|
|
|
||
|
|
module.exports = { generateQuestions };
|