feat: repo-agnostic refactor (BMad spec-test-build loop)

- NEW: repo-profiler.js — deterministic archetype detection (Infra, Frontend, Backend, etc.) - NEW: extract-dynamic.js — generic extractor replacing hardcoded Foxtrot patterns - NEW: eval-generator.js — dynamic ground-truth question generation from any repo graph - NEW: specs/bmad-agnostic-refactor-spec.md — full BMad spec with acceptance criteria - REFACTORED: prose.js — two-pass LLM synthesis with rich context (shared secrets, ports, service refs) - REFACTORED: sysdoc.js — wired repo-profiler + extract-dynamic, --legacy escape hatch - REFACTORED: wiggum-v2.sh — uses eval-generator before benchmarks - FIXED: graph.js — _edgeSet rebuilt on loadSnapshot() (edge dedup was broken) - FIXED: graph.js — recursive sortKeys() for deep equality in diffing - FIXED: prose.js — robust JSON array extraction from LLM output - FIXED: ratchet.js — syntax validation (node --check) before saving LLM mutations - FIXED: extract-dynamic.js — centralized state services regex, added console.warn for silent failures - TESTS: test-eval-generator, test-repo-profiler, test-synthesis-quality + mock fixtures Eval: 81.5% on Foxtrot (fully repo-agnostic, no hardcoded reference pages) BMad reviews: Architect B+, Dev Lead B-, TEA B-
2026-03-11 14:40:31 +00:00
parent 15fb1a753b
commit b8403be96c
26 changed files with 4653 additions and 1037 deletions
--- a/eval-generator.js
+++ b/eval-generator.js
@@ -0,0 +1,412 @@
+/**
+ * Eval Generator: Repo-Agnostic Question Bank
+ * 
+ * Generates ground-truth Q&A pairs from graph + Helm data.
+ * Questions target what the docs actually cover: subsystems, charts,
+ * dependencies, interactions, contracts, resource types.
+ * 
+ * Usage: node eval-generator.js <snapshot.json> <repo-root> [output.json] [--dry-run]
+ */
+
+const fs = require('fs');
+const path = require('path');
+const GraphStore = require('./graph.js');
+const { discoverCharts, chartsToGraph } = require('./extract-helm.js');
+const { buildSubsystems } = require('./subsystem.js');
+
+const HELM_IGNORE = new Set([
+  'node_modules', '.git', 'venv', '__pycache__', '.terraform',
+  '_bmad', '_bmad-output', '.codex', '.claude', '.cursor', '.gemini', '.kiro', '.agents'
+]);
+
+function generateQuestions(snapshotPath, srcRoot) {
+  const questions = [];
+  let qIdx = 1;
+  const qid = () => `q-${String(qIdx++).padStart(3, '0')}`;
+
+  // Load graph
+  const graph = GraphStore.loadSnapshot(snapshotPath);
+
+  // Load Helm data
+  const charts = discoverCharts(srcRoot, HELM_IGNORE);
+
+  // Merge Helm into graph (same as sysdoc.js)
+  if (charts.length > 0) {
+    const helmGraph = chartsToGraph(charts, srcRoot);
+    for (const e of helmGraph.entities) {
+      const fakePath = e.dir ? path.join(srcRoot, e.dir, 'Chart.yaml') : path.join(srcRoot, 'Chart.yaml');
+      graph.nodes.set(e.id, { ...e, type: e.type || 'Module', _file: fakePath });
+      if (!graph.fileIndex.has(fakePath)) graph.fileIndex.set(fakePath, new Set());
+      graph.fileIndex.get(fakePath).add(e.id);
+    }
+    for (const r of helmGraph.relationships) {
+      graph.edges.push(r);
+    }
+  }
+
+  // Build subsystems
+  const subs = buildSubsystems(graph, {
+    srcDir: (srcRoot || '').endsWith('/') ? (srcRoot || '') : (srcRoot || '') + '/',
+    minTraffic: 3,
+    crossCuttingThreshold: 0.6
+  });
+
+  // ─── Category 1: Structural ───
+
+  // Total chart count
+  if (charts.length > 0) {
+    questions.push({
+      id: qid(), category: 'structural', difficulty: 'easy',
+      audience: ['human', 'machine'],
+      question: 'How many Helm charts are defined across this repository?',
+      expected_answer: String(charts.length),
+      answer: String(charts.length),
+      answerType: 'exact',
+      source: 'Chart.yaml discovery',
+      source_entity: 'all charts'
+    });
+  }
+
+  // Subsystem count
+  if (subs.subsystems.length > 0) {
+    questions.push({
+      id: qid(), category: 'structural', difficulty: 'easy',
+      audience: ['human', 'machine'],
+      question: 'How many subsystems does this codebase contain?',
+      expected_answer: String(subs.subsystems.length),
+      answer: String(subs.subsystems.length),
+      answerType: 'exact',
+      source: 'subsystem aggregation',
+      source_entity: 'all subsystems'
+    });
+  }
+
+  // Top charts by K8s resources
+  const topByResources = [...charts]
+    .filter(c => c.templates && c.templates.resources)
+    .sort((a, b) => b.templates.resources.length - a.templates.resources.length)
+    .slice(0, 5);
+  if (topByResources.length > 0 && topByResources[0].templates.resources.length > 0) {
+    questions.push({
+      id: qid(), category: 'structural', difficulty: 'medium',
+      audience: ['human', 'machine'],
+      question: 'Which 5 Helm charts produce the most Kubernetes resources? List them with their resource counts.',
+      expected_answer: topByResources.map(c => `${c.chart.name} (${c.dir}): ${c.templates.resources.length}`).join('\n'),
+      answer: topByResources.map(c => `${c.chart.name} (${c.dir}): ${c.templates.resources.length}`).join('\n'),
+      answerType: 'ranked-list',
+      source: 'template scanning',
+      source_entity: topByResources.map(c => c.chart.name).join(', ')
+    });
+  }
+
+  // Largest subsystem
+  const sortedSubs = [...subs.subsystems].sort((a, b) => b.files.length - a.files.length);
+  if (sortedSubs.length > 0) {
+    questions.push({
+      id: qid(), category: 'structural', difficulty: 'easy',
+      audience: ['human'],
+      question: 'Which subsystem contains the most files, and how many?',
+      expected_answer: `${sortedSubs[0].name}: ${sortedSubs[0].files.length} files`,
+      answer: `${sortedSubs[0].name}: ${sortedSubs[0].files.length} files`,
+      answerType: 'exact',
+      source: 'subsystem aggregation',
+      source_entity: sortedSubs[0].name
+    });
+  }
+
+  // ─── Category 2: Dependencies ───
+
+  const chartsWithDeps = charts.filter(c => c.chart.dependencies && c.chart.dependencies.length > 0);
+  for (const c of chartsWithDeps.slice(0, 5)) {
+    questions.push({
+      id: qid(), category: 'dependency', difficulty: 'medium',
+      audience: ['human', 'machine'],
+      question: `What are the dependencies of the "${c.chart.name}" chart (at ${c.dir})?`,
+      expected_answer: c.chart.dependencies.map(d => `${d.name} (${d.version})${d.condition ? ` [condition: ${d.condition}]` : ''}`).join('\n'),
+      answer: c.chart.dependencies.map(d => `${d.name} (${d.version})${d.condition ? ` [condition: ${d.condition}]` : ''}`).join('\n'),
+      answerType: 'list',
+      source: `${c.dir}/Chart.yaml`,
+      source_entity: c.chart.name
+    });
+  }
+
+  // ─── Category 3: Contracts (shared secrets/configs) ───
+
+  const configUsers = {};
+  for (const c of charts) {
+    for (const i of (c.interactions || [])) {
+      if (i.type === 'config-ref') {
+        if (!configUsers[i.target]) configUsers[i.target] = [];
+        configUsers[i.target].push(c.chart.name);
+      }
+    }
+  }
+  const sharedSecrets = Object.entries(configUsers).filter(([, users]) => users.length > 1);
+  if (sharedSecrets.length > 0) {
+    questions.push({
+      id: qid(), category: 'contract', difficulty: 'medium',
+      audience: ['human', 'machine'],
+      question: 'Which secrets or ConfigMaps are shared across multiple Helm charts? List each with the charts that use it.',
+      expected_answer: sharedSecrets.map(([name, users]) => `${name}: ${[...new Set(users)].join(', ')}`).join('\n'),
+      answer: sharedSecrets.map(([name, users]) => `${name}: ${[...new Set(users)].join(', ')}`).join('\n'),
+      answerType: 'list',
+      source: 'template interaction scanning',
+      source_entity: sharedSecrets.map(([n]) => n).join(', ')
+    });
+  }
+
+  // ─── Category 4: Configuration Surface ───
+
+  // Top charts by config surface (most values.yaml keys)
+  const chartsByValues = [...charts]
+    .filter(c => c.values && c.values.keys && c.values.keys.length > 5)
+    .sort((a, b) => b.values.keys.length - a.values.keys.length);
+
+  for (const chart of chartsByValues.slice(0, 5)) {
+    questions.push({
+      id: qid(), category: 'configuration', difficulty: 'easy',
+      audience: ['machine'],
+      question: `How many top-level configuration keys does the ${chart.chart.name} chart (at ${chart.dir}) expose in its values.yaml?`,
+      expected_answer: String(chart.values.keys.length),
+      answer: String(chart.values.keys.length),
+      answerType: 'exact',
+      source: `${chart.dir}/values.yaml`,
+      source_entity: chart.chart.name
+    });
+  }
+
+  // ─── Category 5: Interactions (service-to-service) ───
+
+  const svcRefs = [];
+  for (const c of charts) {
+    for (const i of (c.interactions || [])) {
+      if (i.type === 'k8s-service') {
+        svcRefs.push({ from: c.chart.name, dir: c.dir, to: i.target });
+      }
+    }
+  }
+  if (svcRefs.length > 0) {
+    questions.push({
+      id: qid(), category: 'interaction', difficulty: 'medium',
+      audience: ['human', 'machine'],
+      question: 'Which Helm charts reference Kubernetes services from other components? List each chart and the service it calls.',
+      expected_answer: svcRefs.map(r => `${r.from} (${r.dir}) → ${r.to}`).join('\n'),
+      answer: svcRefs.map(r => `${r.from} (${r.dir}) → ${r.to}`).join('\n'),
+      answerType: 'list',
+      source: 'template interaction scanning',
+      source_entity: svcRefs.map(r => r.from).join(', ')
+    });
+  }
+
+  // Shared ports
+  const portMap = {};
+  for (const c of charts) {
+    for (const i of (c.interactions || [])) {
+      if (i.type === 'port' && i.target !== '0') {
+        if (!portMap[i.target]) portMap[i.target] = [];
+        if (!portMap[i.target].includes(c.chart.name)) portMap[i.target].push(c.chart.name);
+      }
+    }
+  }
+  const sharedPorts = Object.entries(portMap).filter(([, users]) => users.length > 1);
+  if (sharedPorts.length > 0) {
+    questions.push({
+      id: qid(), category: 'interaction', difficulty: 'hard',
+      audience: ['human', 'machine'],
+      question: 'Which network ports are used by multiple Helm charts? List each port and the charts that expose it.',
+      expected_answer: sharedPorts.sort((a, b) => Number(a[0]) - Number(b[0])).map(([port, users]) => `Port ${port}: ${users.join(', ')}`).join('\n'),
+      answer: sharedPorts.sort((a, b) => Number(a[0]) - Number(b[0])).map(([port, users]) => `Port ${port}: ${users.join(', ')}`).join('\n'),
+      answerType: 'list',
+      source: 'template port scanning',
+      source_entity: 'shared ports'
+    });
+  }
+
+  // ─── Category 6: Resource Types ───
+
+  const kindCounts = {};
+  for (const c of charts) {
+    for (const r of (c.templates?.resources || [])) {
+      kindCounts[r.kind] = (kindCounts[r.kind] || 0) + 1;
+    }
+  }
+  const topKinds = Object.entries(kindCounts).sort((a, b) => b[1] - a[1]).slice(0, 10);
+  if (topKinds.length > 0) {
+    questions.push({
+      id: qid(), category: 'resource', difficulty: 'medium',
+      audience: ['human', 'machine'],
+      question: 'What are the most common Kubernetes resource types generated across all Helm charts?',
+      expected_answer: topKinds.map(([kind, count]) => `${kind}: ${count}`).join('\n'),
+      answer: topKinds.map(([kind, count]) => `${kind}: ${count}`).join('\n'),
+      answerType: 'ranked-list',
+      source: 'template resource scanning',
+      source_entity: 'all charts'
+    });
+  }
+
+  // Per-chart resource breakdown for top 3 charts by resource count
+  for (const chart of topByResources.slice(0, 3)) {
+    const kinds = {};
+    for (const r of chart.templates.resources) {
+      kinds[r.kind] = (kinds[r.kind] || 0) + 1;
+    }
+    questions.push({
+      id: qid(), category: 'resource', difficulty: 'medium',
+      audience: ['human', 'machine'],
+      question: `What Kubernetes resource types does the ${chart.chart.name} chart generate? List each type and count.`,
+      expected_answer: Object.entries(kinds).sort((a, b) => b[1] - a[1]).map(([k, v]) => `${k}: ${v}`).join('\n'),
+      answer: Object.entries(kinds).sort((a, b) => b[1] - a[1]).map(([k, v]) => `${k}: ${v}`).join('\n'),
+      answerType: 'list',
+      source: `${chart.dir}/templates/`,
+      source_entity: chart.chart.name
+    });
+  }
+
+  // ─── Category 7: Cross-Subsystem ───
+
+  if (subs.crossCutting && subs.crossCutting.length > 0) {
+    questions.push({
+      id: qid(), category: 'cross-subsystem', difficulty: 'easy',
+      audience: ['human'],
+      question: 'Which subsystems are identified as cross-cutting concerns?',
+      expected_answer: subs.crossCutting.join(', '),
+      answer: subs.crossCutting.join(', '),
+      answerType: 'list',
+      source: 'subsystem aggregation',
+      source_entity: subs.crossCutting.join(', ')
+    });
+  }
+
+  // Dependency matrix questions
+  if (subs.dependencyMatrix) {
+    const heavyDeps = Object.entries(subs.dependencyMatrix)
+      .filter(([, v]) => (v.calls + v.imports) > 3)
+      .sort((a, b) => (b[1].calls + b[1].imports) - (a[1].calls + a[1].imports))
+      .slice(0, 5);
+    if (heavyDeps.length > 0) {
+      const targetSub = heavyDeps[0][0].split('→')[1];
+      const depsForTarget = heavyDeps.filter(([k]) => k.endsWith(`→${targetSub}`));
+      if (depsForTarget.length > 0) {
+        questions.push({
+          id: qid(), category: 'cross-subsystem', difficulty: 'hard',
+          audience: ['human', 'machine'],
+          question: `Which subsystems depend on ${targetSub}, and how heavily (by call+import count)?`,
+          expected_answer: depsForTarget.map(([k, v]) => `${k.split('→')[0]}: ${v.calls + v.imports}`).join('\n'),
+          answer: depsForTarget.map(([k, v]) => `${k.split('→')[0]}: ${v.calls + v.imports}`).join('\n'),
+          answerType: 'list',
+          source: 'dependency matrix',
+          source_entity: targetSub
+        });
+      }
+    }
+  }
+
+  // ─── Category 8: Architectural ───
+
+  // Empty subsystems (Helm-only)
+  const emptySubs = subs.subsystems.filter(s => s.entities.functions === 0 && s.entities.modules === 0);
+  if (emptySubs.length > 0) {
+    questions.push({
+      id: qid(), category: 'architectural', difficulty: 'hard',
+      audience: ['human'],
+      question: `The following subsystems have 0 detected functions and 0 modules: ${emptySubs.map(s => s.name).join(', ')}. Why might this be the case, and what do they actually contain?`,
+      expected_answer: 'These subsystems primarily contain Helm charts with Go-templated YAML, Terraform HCL, and/or Crossplane compositions. The code analysis pipeline detects functions/modules from Python, Go, TypeScript, and shell scripts — but Helm templates use Go template syntax which does not produce traditional function/module entities. Their content is captured through the Helm chart extraction phase instead.',
+      answer: 'These subsystems primarily contain Helm charts, Terraform, or Crossplane compositions rather than traditional code.',
+      answerType: 'explanation',
+      source: 'architectural analysis',
+      source_entity: emptySubs.map(s => s.name).join(', ')
+    });
+  }
+
+  // Chart version for top chart
+  if (charts.length > 0) {
+    const topChart = topByResources[0] || charts[0];
+    questions.push({
+      id: qid(), category: 'architectural', difficulty: 'easy',
+      audience: ['human', 'machine'],
+      question: `What is the current version and appVersion of the ${topChart.chart.name} Helm chart?`,
+      expected_answer: `version: ${topChart.chart.version}, appVersion: ${topChart.chart.appVersion}`,
+      answer: `version: ${topChart.chart.version}, appVersion: ${topChart.chart.appVersion}`,
+      answerType: 'exact',
+      source: `${topChart.dir}/Chart.yaml`,
+      source_entity: topChart.chart.name
+    });
+  }
+
+  // ─── Category 9: Scenario-Based ───
+
+  // Secret rotation scenario
+  if (sharedSecrets.length > 0) {
+    const [secretName, secretUsers] = sharedSecrets[0];
+    questions.push({
+      id: qid(), category: 'scenario', difficulty: 'hard',
+      audience: ['human'],
+      question: `If you need to rotate the "${secretName}" shared secret, which Helm charts would be affected and need redeployment?`,
+      expected_answer: [...new Set(secretUsers)].join(', '),
+      answer: [...new Set(secretUsers)].join(', '),
+      answerType: 'list',
+      source: 'template interaction scanning',
+      source_entity: secretName
+    });
+  }
+
+  // Deployment scenario for top chart
+  if (topByResources.length > 0) {
+    const chart = topByResources[0];
+    questions.push({
+      id: qid(), category: 'scenario', difficulty: 'hard',
+      audience: ['human'],
+      question: `A new engineer needs to deploy the ${chart.chart.name} application. What charts, configuration values, and external dependencies should they understand first?`,
+      expected_answer: `Chart: ${chart.chart.name} (${chart.dir}), Version: ${chart.chart.version}\nKey values: ${(chart.values?.keys || []).slice(0, 10).map(k => k.name).join(', ')}${chart.values?.keys?.length > 10 ? ` (+${chart.values.keys.length - 10} more)` : ''}\nResources generated: ${chart.templates.resources.length} K8s resources\nInteractions: ${(chart.interactions || []).map(i => `${i.type}: ${i.target}`).join(', ') || 'none detected'}`,
+      answer: `Chart: ${chart.chart.name} (${chart.dir}), Version: ${chart.chart.version}`,
+      answerType: 'explanation',
+      source: `${chart.dir}`,
+      source_entity: chart.chart.name
+    });
+  }
+
+  return questions;
+}
+
+module.exports = { generateQuestions };
+
+if (require.main === module) {
+  const args = process.argv.slice(2).filter(a => !a.startsWith('-'));
+  const snapshotPath = args[0];
+  const srcRoot = args[1];
+  const outFile = args[2] || null;
+
+  if (!snapshotPath || !srcRoot) {
+    console.error('Usage: node eval-generator.js <snapshot.json> <repo-root> [output.json] [--dry-run]');
+    process.exit(1);
+  }
+
+  if (!fs.existsSync(snapshotPath)) {
+    console.error(`Snapshot not found: ${snapshotPath}`);
+    process.exit(1);
+  }
+
+  const questions = generateQuestions(snapshotPath, srcRoot);
+
+  // Summary
+  const cats = {};
+  for (const q of questions) {
+    cats[q.category] = (cats[q.category] || 0) + 1;
+  }
+
+  console.log(`Generated ${questions.length} questions:`);
+  for (const [cat, count] of Object.entries(cats).sort((a, b) => b[1] - a[1])) {
+    console.log(`  ${cat}: ${count}`);
+  }
+  console.log(`Difficulty: easy=${questions.filter(q => q.difficulty === 'easy').length}, medium=${questions.filter(q => q.difficulty === 'medium').length}, hard=${questions.filter(q => q.difficulty === 'hard').length}`);
+
+  const result = { generated: new Date().toISOString(), count: questions.length, questions };
+  const json = JSON.stringify(result, null, 2);
+
+  if (outFile) {
+    fs.writeFileSync(outFile, json);
+    console.log(`\nWritten to ${outFile}`);
+  } else {
+    console.log(json);
+  }
+}