Files
dev-intel-v2/eval-generator.js

413 lines
17 KiB
JavaScript
Raw Normal View History

/**
* Eval Generator: Repo-Agnostic Question Bank
*
* Generates ground-truth Q&A pairs from graph + Helm data.
* Questions target what the docs actually cover: subsystems, charts,
* dependencies, interactions, contracts, resource types.
*
* Usage: node eval-generator.js <snapshot.json> <repo-root> [output.json] [--dry-run]
*/
const fs = require('fs');
const path = require('path');
const GraphStore = require('./graph.js');
const { discoverCharts, chartsToGraph } = require('./extract-helm.js');
const { buildSubsystems } = require('./subsystem.js');
const HELM_IGNORE = new Set([
'node_modules', '.git', 'venv', '__pycache__', '.terraform',
'_bmad', '_bmad-output', '.codex', '.claude', '.cursor', '.gemini', '.kiro', '.agents'
]);
function generateQuestions(snapshotPath, srcRoot) {
const questions = [];
let qIdx = 1;
const qid = () => `q-${String(qIdx++).padStart(3, '0')}`;
// Load graph
const graph = GraphStore.loadSnapshot(snapshotPath);
// Load Helm data
const charts = discoverCharts(srcRoot, HELM_IGNORE);
// Merge Helm into graph (same as sysdoc.js)
if (charts.length > 0) {
const helmGraph = chartsToGraph(charts, srcRoot);
for (const e of helmGraph.entities) {
const fakePath = e.dir ? path.join(srcRoot, e.dir, 'Chart.yaml') : path.join(srcRoot, 'Chart.yaml');
graph.nodes.set(e.id, { ...e, type: e.type || 'Module', _file: fakePath });
if (!graph.fileIndex.has(fakePath)) graph.fileIndex.set(fakePath, new Set());
graph.fileIndex.get(fakePath).add(e.id);
}
for (const r of helmGraph.relationships) {
graph.edges.push(r);
}
}
// Build subsystems
const subs = buildSubsystems(graph, {
srcDir: (srcRoot || '').endsWith('/') ? (srcRoot || '') : (srcRoot || '') + '/',
minTraffic: 3,
crossCuttingThreshold: 0.6
});
// ─── Category 1: Structural ───
// Total chart count
if (charts.length > 0) {
questions.push({
id: qid(), category: 'structural', difficulty: 'easy',
audience: ['human', 'machine'],
question: 'How many Helm charts are defined across this repository?',
expected_answer: String(charts.length),
answer: String(charts.length),
answerType: 'exact',
source: 'Chart.yaml discovery',
source_entity: 'all charts'
});
}
// Subsystem count
if (subs.subsystems.length > 0) {
questions.push({
id: qid(), category: 'structural', difficulty: 'easy',
audience: ['human', 'machine'],
question: 'How many subsystems does this codebase contain?',
expected_answer: String(subs.subsystems.length),
answer: String(subs.subsystems.length),
answerType: 'exact',
source: 'subsystem aggregation',
source_entity: 'all subsystems'
});
}
// Top charts by K8s resources
const topByResources = [...charts]
.filter(c => c.templates && c.templates.resources)
.sort((a, b) => b.templates.resources.length - a.templates.resources.length)
.slice(0, 5);
if (topByResources.length > 0 && topByResources[0].templates.resources.length > 0) {
questions.push({
id: qid(), category: 'structural', difficulty: 'medium',
audience: ['human', 'machine'],
question: 'Which 5 Helm charts produce the most Kubernetes resources? List them with their resource counts.',
expected_answer: topByResources.map(c => `${c.chart.name} (${c.dir}): ${c.templates.resources.length}`).join('\n'),
answer: topByResources.map(c => `${c.chart.name} (${c.dir}): ${c.templates.resources.length}`).join('\n'),
answerType: 'ranked-list',
source: 'template scanning',
source_entity: topByResources.map(c => c.chart.name).join(', ')
});
}
// Largest subsystem
const sortedSubs = [...subs.subsystems].sort((a, b) => b.files.length - a.files.length);
if (sortedSubs.length > 0) {
questions.push({
id: qid(), category: 'structural', difficulty: 'easy',
audience: ['human'],
question: 'Which subsystem contains the most files, and how many?',
expected_answer: `${sortedSubs[0].name}: ${sortedSubs[0].files.length} files`,
answer: `${sortedSubs[0].name}: ${sortedSubs[0].files.length} files`,
answerType: 'exact',
source: 'subsystem aggregation',
source_entity: sortedSubs[0].name
});
}
// ─── Category 2: Dependencies ───
const chartsWithDeps = charts.filter(c => c.chart.dependencies && c.chart.dependencies.length > 0);
for (const c of chartsWithDeps.slice(0, 5)) {
questions.push({
id: qid(), category: 'dependency', difficulty: 'medium',
audience: ['human', 'machine'],
question: `What are the dependencies of the "${c.chart.name}" chart (at ${c.dir})?`,
expected_answer: c.chart.dependencies.map(d => `${d.name} (${d.version})${d.condition ? ` [condition: ${d.condition}]` : ''}`).join('\n'),
answer: c.chart.dependencies.map(d => `${d.name} (${d.version})${d.condition ? ` [condition: ${d.condition}]` : ''}`).join('\n'),
answerType: 'list',
source: `${c.dir}/Chart.yaml`,
source_entity: c.chart.name
});
}
// ─── Category 3: Contracts (shared secrets/configs) ───
const configUsers = {};
for (const c of charts) {
for (const i of (c.interactions || [])) {
if (i.type === 'config-ref') {
if (!configUsers[i.target]) configUsers[i.target] = [];
configUsers[i.target].push(c.chart.name);
}
}
}
const sharedSecrets = Object.entries(configUsers).filter(([, users]) => users.length > 1);
if (sharedSecrets.length > 0) {
questions.push({
id: qid(), category: 'contract', difficulty: 'medium',
audience: ['human', 'machine'],
question: 'Which secrets or ConfigMaps are shared across multiple Helm charts? List each with the charts that use it.',
expected_answer: sharedSecrets.map(([name, users]) => `${name}: ${[...new Set(users)].join(', ')}`).join('\n'),
answer: sharedSecrets.map(([name, users]) => `${name}: ${[...new Set(users)].join(', ')}`).join('\n'),
answerType: 'list',
source: 'template interaction scanning',
source_entity: sharedSecrets.map(([n]) => n).join(', ')
});
}
// ─── Category 4: Configuration Surface ───
// Top charts by config surface (most values.yaml keys)
const chartsByValues = [...charts]
.filter(c => c.values && c.values.keys && c.values.keys.length > 5)
.sort((a, b) => b.values.keys.length - a.values.keys.length);
for (const chart of chartsByValues.slice(0, 5)) {
questions.push({
id: qid(), category: 'configuration', difficulty: 'easy',
audience: ['machine'],
question: `How many top-level configuration keys does the ${chart.chart.name} chart (at ${chart.dir}) expose in its values.yaml?`,
expected_answer: String(chart.values.keys.length),
answer: String(chart.values.keys.length),
answerType: 'exact',
source: `${chart.dir}/values.yaml`,
source_entity: chart.chart.name
});
}
// ─── Category 5: Interactions (service-to-service) ───
const svcRefs = [];
for (const c of charts) {
for (const i of (c.interactions || [])) {
if (i.type === 'k8s-service') {
svcRefs.push({ from: c.chart.name, dir: c.dir, to: i.target });
}
}
}
if (svcRefs.length > 0) {
questions.push({
id: qid(), category: 'interaction', difficulty: 'medium',
audience: ['human', 'machine'],
question: 'Which Helm charts reference Kubernetes services from other components? List each chart and the service it calls.',
expected_answer: svcRefs.map(r => `${r.from} (${r.dir}) → ${r.to}`).join('\n'),
answer: svcRefs.map(r => `${r.from} (${r.dir}) → ${r.to}`).join('\n'),
answerType: 'list',
source: 'template interaction scanning',
source_entity: svcRefs.map(r => r.from).join(', ')
});
}
// Shared ports
const portMap = {};
for (const c of charts) {
for (const i of (c.interactions || [])) {
if (i.type === 'port' && i.target !== '0') {
if (!portMap[i.target]) portMap[i.target] = [];
if (!portMap[i.target].includes(c.chart.name)) portMap[i.target].push(c.chart.name);
}
}
}
const sharedPorts = Object.entries(portMap).filter(([, users]) => users.length > 1);
if (sharedPorts.length > 0) {
questions.push({
id: qid(), category: 'interaction', difficulty: 'hard',
audience: ['human', 'machine'],
question: 'Which network ports are used by multiple Helm charts? List each port and the charts that expose it.',
expected_answer: sharedPorts.sort((a, b) => Number(a[0]) - Number(b[0])).map(([port, users]) => `Port ${port}: ${users.join(', ')}`).join('\n'),
answer: sharedPorts.sort((a, b) => Number(a[0]) - Number(b[0])).map(([port, users]) => `Port ${port}: ${users.join(', ')}`).join('\n'),
answerType: 'list',
source: 'template port scanning',
source_entity: 'shared ports'
});
}
// ─── Category 6: Resource Types ───
const kindCounts = {};
for (const c of charts) {
for (const r of (c.templates?.resources || [])) {
kindCounts[r.kind] = (kindCounts[r.kind] || 0) + 1;
}
}
const topKinds = Object.entries(kindCounts).sort((a, b) => b[1] - a[1]).slice(0, 10);
if (topKinds.length > 0) {
questions.push({
id: qid(), category: 'resource', difficulty: 'medium',
audience: ['human', 'machine'],
question: 'What are the most common Kubernetes resource types generated across all Helm charts?',
expected_answer: topKinds.map(([kind, count]) => `${kind}: ${count}`).join('\n'),
answer: topKinds.map(([kind, count]) => `${kind}: ${count}`).join('\n'),
answerType: 'ranked-list',
source: 'template resource scanning',
source_entity: 'all charts'
});
}
// Per-chart resource breakdown for top 3 charts by resource count
for (const chart of topByResources.slice(0, 3)) {
const kinds = {};
for (const r of chart.templates.resources) {
kinds[r.kind] = (kinds[r.kind] || 0) + 1;
}
questions.push({
id: qid(), category: 'resource', difficulty: 'medium',
audience: ['human', 'machine'],
question: `What Kubernetes resource types does the ${chart.chart.name} chart generate? List each type and count.`,
expected_answer: Object.entries(kinds).sort((a, b) => b[1] - a[1]).map(([k, v]) => `${k}: ${v}`).join('\n'),
answer: Object.entries(kinds).sort((a, b) => b[1] - a[1]).map(([k, v]) => `${k}: ${v}`).join('\n'),
answerType: 'list',
source: `${chart.dir}/templates/`,
source_entity: chart.chart.name
});
}
// ─── Category 7: Cross-Subsystem ───
if (subs.crossCutting && subs.crossCutting.length > 0) {
questions.push({
id: qid(), category: 'cross-subsystem', difficulty: 'easy',
audience: ['human'],
question: 'Which subsystems are identified as cross-cutting concerns?',
expected_answer: subs.crossCutting.join(', '),
answer: subs.crossCutting.join(', '),
answerType: 'list',
source: 'subsystem aggregation',
source_entity: subs.crossCutting.join(', ')
});
}
// Dependency matrix questions
if (subs.dependencyMatrix) {
const heavyDeps = Object.entries(subs.dependencyMatrix)
.filter(([, v]) => (v.calls + v.imports) > 3)
.sort((a, b) => (b[1].calls + b[1].imports) - (a[1].calls + a[1].imports))
.slice(0, 5);
if (heavyDeps.length > 0) {
const targetSub = heavyDeps[0][0].split('→')[1];
const depsForTarget = heavyDeps.filter(([k]) => k.endsWith(`${targetSub}`));
if (depsForTarget.length > 0) {
questions.push({
id: qid(), category: 'cross-subsystem', difficulty: 'hard',
audience: ['human', 'machine'],
question: `Which subsystems depend on ${targetSub}, and how heavily (by call+import count)?`,
expected_answer: depsForTarget.map(([k, v]) => `${k.split('→')[0]}: ${v.calls + v.imports}`).join('\n'),
answer: depsForTarget.map(([k, v]) => `${k.split('→')[0]}: ${v.calls + v.imports}`).join('\n'),
answerType: 'list',
source: 'dependency matrix',
source_entity: targetSub
});
}
}
}
// ─── Category 8: Architectural ───
// Empty subsystems (Helm-only)
const emptySubs = subs.subsystems.filter(s => s.entities.functions === 0 && s.entities.modules === 0);
if (emptySubs.length > 0) {
questions.push({
id: qid(), category: 'architectural', difficulty: 'hard',
audience: ['human'],
question: `The following subsystems have 0 detected functions and 0 modules: ${emptySubs.map(s => s.name).join(', ')}. Why might this be the case, and what do they actually contain?`,
expected_answer: 'These subsystems primarily contain Helm charts with Go-templated YAML, Terraform HCL, and/or Crossplane compositions. The code analysis pipeline detects functions/modules from Python, Go, TypeScript, and shell scripts — but Helm templates use Go template syntax which does not produce traditional function/module entities. Their content is captured through the Helm chart extraction phase instead.',
answer: 'These subsystems primarily contain Helm charts, Terraform, or Crossplane compositions rather than traditional code.',
answerType: 'explanation',
source: 'architectural analysis',
source_entity: emptySubs.map(s => s.name).join(', ')
});
}
// Chart version for top chart
if (charts.length > 0) {
const topChart = topByResources[0] || charts[0];
questions.push({
id: qid(), category: 'architectural', difficulty: 'easy',
audience: ['human', 'machine'],
question: `What is the current version and appVersion of the ${topChart.chart.name} Helm chart?`,
expected_answer: `version: ${topChart.chart.version}, appVersion: ${topChart.chart.appVersion}`,
answer: `version: ${topChart.chart.version}, appVersion: ${topChart.chart.appVersion}`,
answerType: 'exact',
source: `${topChart.dir}/Chart.yaml`,
source_entity: topChart.chart.name
});
}
// ─── Category 9: Scenario-Based ───
// Secret rotation scenario
if (sharedSecrets.length > 0) {
const [secretName, secretUsers] = sharedSecrets[0];
questions.push({
id: qid(), category: 'scenario', difficulty: 'hard',
audience: ['human'],
question: `If you need to rotate the "${secretName}" shared secret, which Helm charts would be affected and need redeployment?`,
expected_answer: [...new Set(secretUsers)].join(', '),
answer: [...new Set(secretUsers)].join(', '),
answerType: 'list',
source: 'template interaction scanning',
source_entity: secretName
});
}
// Deployment scenario for top chart
if (topByResources.length > 0) {
const chart = topByResources[0];
questions.push({
id: qid(), category: 'scenario', difficulty: 'hard',
audience: ['human'],
question: `A new engineer needs to deploy the ${chart.chart.name} application. What charts, configuration values, and external dependencies should they understand first?`,
expected_answer: `Chart: ${chart.chart.name} (${chart.dir}), Version: ${chart.chart.version}\nKey values: ${(chart.values?.keys || []).slice(0, 10).map(k => k.name).join(', ')}${chart.values?.keys?.length > 10 ? ` (+${chart.values.keys.length - 10} more)` : ''}\nResources generated: ${chart.templates.resources.length} K8s resources\nInteractions: ${(chart.interactions || []).map(i => `${i.type}: ${i.target}`).join(', ') || 'none detected'}`,
answer: `Chart: ${chart.chart.name} (${chart.dir}), Version: ${chart.chart.version}`,
answerType: 'explanation',
source: `${chart.dir}`,
source_entity: chart.chart.name
});
}
return questions;
}
module.exports = { generateQuestions };
if (require.main === module) {
const args = process.argv.slice(2).filter(a => !a.startsWith('-'));
const snapshotPath = args[0];
const srcRoot = args[1];
const outFile = args[2] || null;
if (!snapshotPath || !srcRoot) {
console.error('Usage: node eval-generator.js <snapshot.json> <repo-root> [output.json] [--dry-run]');
process.exit(1);
}
if (!fs.existsSync(snapshotPath)) {
console.error(`Snapshot not found: ${snapshotPath}`);
process.exit(1);
}
const questions = generateQuestions(snapshotPath, srcRoot);
// Summary
const cats = {};
for (const q of questions) {
cats[q.category] = (cats[q.category] || 0) + 1;
}
console.log(`Generated ${questions.length} questions:`);
for (const [cat, count] of Object.entries(cats).sort((a, b) => b[1] - a[1])) {
console.log(` ${cat}: ${count}`);
}
console.log(`Difficulty: easy=${questions.filter(q => q.difficulty === 'easy').length}, medium=${questions.filter(q => q.difficulty === 'medium').length}, hard=${questions.filter(q => q.difficulty === 'hard').length}`);
const result = { generated: new Date().toISOString(), count: questions.length, questions };
const json = JSON.stringify(result, null, 2);
if (outFile) {
fs.writeFileSync(outFile, json);
console.log(`\nWritten to ${outFile}`);
} else {
console.log(json);
}
}