Phase 9b: structural documentation improvements\n\n- sysdoc.js: Added Summary Statistics, Top Charts, and K8s Resource Types to architecture doc\n- Addresses ratchet failures where system-wide rollups were missing from generated prose\n- Eval v2 shows minor improvement, though RAG context window still limits wide scatter-gather queries

2026-03-09 23:40:07 +00:00
parent b99341e8bc
commit 0cc4abcb0f
5 changed files with 1452 additions and 2 deletions
--- a/ratchet.js
+++ b/ratchet.js
@@ -0,0 +1,396 @@
+/**
+ * Quality Ratchet: Automated Doc Improvement Loop
+ * 
+ * BMad-inspired workflow:
+ *   1. EVAL: Run question bank against docs, get scores
+ *   2. DIAGNOSE: Identify weakest categories and specific failures
+ *   3. FIX: LLM proposes concrete code changes to the doc generator
+ *   4. APPLY: Apply fixes, regenerate docs
+ *   5. RE-EVAL: Score again
+ *   6. RATCHET: If passing threshold, add harder questions
+ *   7. REPEAT until target score or max iterations
+ * 
+ * Usage: node ratchet.js <foxtrot-root> <snapshot> <docs-dir> [--target=80] [--max-iter=5]
+ */
+
+const fs = require('fs');
+const path = require('path');
+const { execSync } = require('child_process');
+const { callLLM } = require('./prose.js');
+const { runEval } = require('./eval.js');
+const { generateQuestions } = require('./eval-questions.js');
+
+const PROJECT_DIR = __dirname;
+
+function parseArgs() {
+  const args = process.argv.slice(2);
+  const opts = {
+    srcRoot: args[0],
+    snapshot: args[1],
+    docsDir: args[2],
+    target: 80,
+    maxIter: 5,
+    model: process.env.LLM_MODEL || 'claude-haiku-4.5',
+    fixModel: process.env.FIX_MODEL || 'claude-sonnet-4.6',
+  };
+  for (const a of args) {
+    if (a.startsWith('--target=')) opts.target = Number(a.split('=')[1]);
+    if (a.startsWith('--max-iter=')) opts.maxIter = Number(a.split('=')[1]);
+  }
+  return opts;
+}
+
+/** Diagnose failures and produce a fix plan */
+async function diagnose(report, opts) {
+  // Collect the worst-performing questions with full context
+  const failures = report.results
+    .filter(r => r.composite < 60)
+    .sort((a, b) => a.composite - b.composite)
+    .slice(0, 10);
+
+  if (failures.length === 0) return { fixes: [], diagnosis: 'All questions above 60%. No critical failures.' };
+
+  // Read current sysdoc.js for context
+  const sysdocSource = fs.readFileSync(path.join(PROJECT_DIR, 'sysdoc.js'), 'utf8');
+  
+  // Read extract-helm.js
+  const helmSource = fs.readFileSync(path.join(PROJECT_DIR, 'extract-helm.js'), 'utf8');
+
+  const failureDetails = failures.map(f => 
+    `[${f.id}] Score: ${f.composite}% (A:${f.score.accuracy} C:${f.score.completeness} P:${f.score.precision})
+  Q: ${f.question}
+  Expected: ${f.groundTruth.substring(0, 300)}
+  Got: ${f.llmAnswer.substring(0, 300)}
+  Judge notes: ${f.score.notes}
+  NOT_FOUND: ${f.notFound}`
+  ).join('\n\n');
+
+  const prompt = `You are a documentation pipeline engineer. Analyze these evaluation failures and propose CONCRETE fixes to the doc generator code.
+
+## Current Pipeline
+The doc generator (sysdoc.js) produces Markdown documentation from:
+1. Code analysis graph (subsystems, functions, modules)
+2. Helm chart extraction (Chart.yaml, values.yaml, templates)
+3. LLM prose generation
+
+## Failures (sorted worst-first)
+${failureDetails}
+
+## Score Summary
+Overall: ${report.overallScore}%
+By category: ${JSON.stringify(report.byCategory)}
+NOT_FOUND rate: ${report.notFoundRate}
+
+## Key Source Files
+sysdoc.js generates the docs. extract-helm.js extracts Helm data.
+
+## Rules
+1. Each fix must be a SPECIFIC change to sysdoc.js or extract-helm.js
+2. Focus on information that IS extracted but NOT surfaced in the docs
+3. If data is missing from extraction, propose extraction improvements
+4. Prioritize fixes that improve multiple questions at once
+5. Do NOT propose changes to the eval harness or questions
+
+Respond in this JSON format:
+{
+  "diagnosis": "2-3 sentence summary of root causes",
+  "fixes": [
+    {
+      "id": "fix-001",
+      "file": "sysdoc.js",
+      "description": "What to change and why",
+      "impact": ["question-id-1", "question-id-2"],
+      "expectedImprovement": "+15%",
+      "code_hint": "Brief description of the code change needed"
+    }
+  ]
+}`;
+
+  const raw = await callLLM(prompt, { model: opts.fixModel, maxTokens: 2048, temperature: 0.2 });
+  
+  try {
+    const jsonMatch = raw.match(/\{[\s\S]*\}/);
+    if (jsonMatch) return JSON.parse(jsonMatch[0]);
+  } catch {}
+  
+  return { diagnosis: raw.substring(0, 500), fixes: [] };
+}
+
+/** Apply a fix by having the LLM generate the actual code change */
+async function applyFix(fix, opts) {
+  const filePath = path.join(PROJECT_DIR, fix.file);
+  const source = fs.readFileSync(filePath, 'utf8');
+
+  const prompt = `You are modifying ${fix.file} to improve documentation quality.
+
+CHANGE NEEDED: ${fix.description}
+CODE HINT: ${fix.code_hint}
+
+CURRENT SOURCE (${fix.file}):
+${source}
+
+Generate ONLY the specific code change. Output in this format:
+SEARCH:
+\`\`\`
+exact lines to find
+\`\`\`
+REPLACE:
+\`\`\`
+replacement lines
+\`\`\`
+
+If multiple changes are needed, repeat the SEARCH/REPLACE blocks.
+Be precise — the SEARCH text must match the source exactly.`;
+
+  const raw = await callLLM(prompt, { model: opts.fixModel, maxTokens: 4096, temperature: 0.1 });
+  
+  // Parse SEARCH/REPLACE blocks
+  const changes = [];
+  const blockRegex = /SEARCH:\s*```[^\n]*\n([\s\S]*?)```\s*REPLACE:\s*```[^\n]*\n([\s\S]*?)```/g;
+  let match;
+  while ((match = blockRegex.exec(raw)) !== null) {
+    changes.push({ search: match[1].trimEnd(), replace: match[2].trimEnd() });
+  }
+
+  if (changes.length === 0) {
+    console.log(`  ⚠ No parseable changes from LLM for fix ${fix.id}`);
+    return false;
+  }
+
+  // Apply changes
+  let modified = source;
+  let applied = 0;
+  for (const change of changes) {
+    if (modified.includes(change.search)) {
+      modified = modified.replace(change.search, change.replace);
+      applied++;
+    } else {
+      console.log(`  ⚠ SEARCH block not found in ${fix.file} for fix ${fix.id}`);
+    }
+  }
+
+  if (applied > 0) {
+    fs.writeFileSync(filePath, modified);
+    console.log(`  ✓ Applied ${applied}/${changes.length} changes to ${fix.file}`);
+    return true;
+  }
+  return false;
+}
+
+/** Regenerate docs */
+function regenerateDocs(opts) {
+  console.log('  Regenerating docs...');
+  execSync(`rm -rf ${opts.docsDir}`, { cwd: PROJECT_DIR });
+  const cmd = `LLM_MODEL=${opts.model} node sysdoc.js ${opts.snapshot} ${opts.srcRoot} ${opts.docsDir} --prose`;
+  execSync(cmd, { cwd: PROJECT_DIR, timeout: 600000, stdio: 'pipe' });
+}
+
+/** Generate harder questions based on current performance */
+function ratchetQuestions(currentQuestions, report, srcRoot, snapshotPath) {
+  // Find categories scoring > 90% — make them harder
+  const easyCategories = Object.entries(report.byCategory)
+    .filter(([, s]) => Number(s.avg) > 90)
+    .map(([cat]) => cat);
+
+  if (easyCategories.length === 0) return currentQuestions;
+
+  console.log(`  Ratcheting: categories scoring >90%: ${easyCategories.join(', ')}`);
+  
+  // Add more specific questions for high-scoring categories
+  const newQuestions = [...currentQuestions];
+  const { discoverCharts } = require('./extract-helm.js');
+  const charts = discoverCharts(srcRoot, new Set(['node_modules', '.git', 'venv', '__pycache__', '.terraform', '_bmad', '_bmad-output', '.codex', '.claude']));
+
+  if (easyCategories.includes('configuration')) {
+    // Add nested value questions (harder than top-level)
+    for (const chart of charts.filter(c => c.values.keys.length > 20).slice(0, 3)) {
+      const objectKeys = chart.values.keys.filter(k => k.type === 'object');
+      if (objectKeys.length > 0) {
+        newQuestions.push({
+          id: `ratchet-config-${chart.chart.name}-nested`,
+          category: 'configuration',
+          difficulty: 'hard',
+          audience: ['human', 'machine'],
+          question: `In the ${chart.chart.name} chart, which configuration keys are complex objects (not simple values)? List them.`,
+          answer: objectKeys.map(k => k.name).join(', '),
+          answerType: 'list',
+          source: `${chart.dir}/values.yaml`,
+        });
+      }
+    }
+  }
+
+  if (easyCategories.includes('resource')) {
+    // Add cross-chart resource comparison questions
+    const deployers = charts.filter(c => c.templates.resources.some(r => r.kind === 'Deployment'));
+    if (deployers.length > 0) {
+      newQuestions.push({
+        id: 'ratchet-resource-deployments',
+        category: 'resource',
+        difficulty: 'hard',
+        audience: ['human', 'machine'],
+        question: 'Which Helm charts create Kubernetes Deployments? List all of them.',
+        answer: deployers.map(c => `${c.chart.name} (${c.dir})`).join('\n'),
+        answerType: 'list',
+        source: 'template scanning',
+      });
+    }
+  }
+
+  if (easyCategories.includes('dependency')) {
+    // Add transitive dependency questions
+    const withDeps = charts.filter(c => c.chart.dependencies.length > 2);
+    for (const chart of withDeps.slice(0, 2)) {
+      newQuestions.push({
+        id: `ratchet-dep-${chart.chart.name}-conditions`,
+        category: 'dependency',
+        difficulty: 'hard',
+        audience: ['machine'],
+        question: `What are the enable conditions for each dependency of the "${chart.chart.name}" chart?`,
+        answer: chart.chart.dependencies.map(d => `${d.name}: ${d.condition || 'always enabled'}`).join('\n'),
+        answerType: 'list',
+        source: `${chart.dir}/Chart.yaml`,
+      });
+    }
+  }
+
+  const added = newQuestions.length - currentQuestions.length;
+  if (added > 0) console.log(`  Added ${added} harder questions`);
+  return newQuestions;
+}
+
+/** Main ratchet loop */
+async function ratchetLoop(opts) {
+  console.log('═'.repeat(60));
+  console.log('QUALITY RATCHET — BMad Improvement Loop');
+  console.log('═'.repeat(60));
+  console.log(`Target: ${opts.target}%  Max iterations: ${opts.maxIter}`);
+  console.log(`Eval model: ${opts.model}  Fix model: ${opts.fixModel}`);
+  console.log();
+
+  // Load initial questions
+  const questionsPath = path.join(PROJECT_DIR, 'eval-questions.json');
+  let questionsData = JSON.parse(fs.readFileSync(questionsPath, 'utf8'));
+  let questions = questionsData.questions;
+
+  const history = [];
+
+  for (let iter = 1; iter <= opts.maxIter; iter++) {
+    console.log(`\n${'─'.repeat(60)}`);
+    console.log(`ITERATION ${iter}/${opts.maxIter}`);
+    console.log('─'.repeat(60));
+
+    // Write current questions
+    const iterQuestionsPath = path.join(PROJECT_DIR, `eval-questions-iter${iter}.json`);
+    fs.writeFileSync(iterQuestionsPath, JSON.stringify({ generated: new Date().toISOString(), count: questions.length, questions }, null, 2));
+
+    // Step 1: Eval
+    console.log('\n📊 EVAL');
+    const report = await runEval(opts.docsDir, iterQuestionsPath, { model: opts.model });
+    
+    console.log(`  Score: ${report.overallScore}% (A:${report.avgAccuracy} C:${report.avgCompleteness} P:${report.avgPrecision})`);
+    console.log(`  NOT_FOUND: ${report.notFoundRate}`);
+
+    history.push({
+      iteration: iter,
+      score: report.overallScore,
+      questions: questions.length,
+      notFoundRate: report.notFoundRate,
+    });
+
+    // Check if we've hit the target
+    if (report.overallScore >= opts.target) {
+      console.log(`\n🎯 TARGET REACHED: ${report.overallScore}% >= ${opts.target}%`);
+      
+      // Ratchet: make it harder
+      const harderQuestions = ratchetQuestions(questions, report, opts.srcRoot, opts.snapshot);
+      if (harderQuestions.length > questions.length) {
+        console.log(`  Ratcheting up: ${questions.length} → ${harderQuestions.length} questions`);
+        questions = harderQuestions;
+        // Don't break — run another iteration with harder questions
+        continue;
+      } else {
+        console.log('  No harder questions to add. Pipeline is solid.');
+        break;
+      }
+    }
+
+    // Step 2: Diagnose
+    console.log('\n🔍 DIAGNOSE');
+    const diagnosis = await diagnose(report, opts);
+    console.log(`  ${diagnosis.diagnosis}`);
+    console.log(`  Proposed fixes: ${diagnosis.fixes.length}`);
+
+    if (diagnosis.fixes.length === 0) {
+      console.log('  No actionable fixes proposed. Stopping.');
+      break;
+    }
+
+    // Step 3: Apply fixes
+    console.log('\n🔧 FIX');
+    let anyApplied = false;
+    for (const fix of diagnosis.fixes.slice(0, 3)) { // Max 3 fixes per iteration
+      console.log(`  [${fix.id}] ${fix.description}`);
+      console.log(`    Impact: ${fix.impact.join(', ')} | Expected: ${fix.expectedImprovement}`);
+      const applied = await applyFix(fix, opts);
+      if (applied) anyApplied = true;
+    }
+
+    if (!anyApplied) {
+      console.log('  No fixes could be applied. Stopping.');
+      break;
+    }
+
+    // Step 4: Regenerate docs
+    console.log('\n📝 REGENERATE');
+    try {
+      regenerateDocs(opts);
+    } catch (err) {
+      console.log(`  ⚠ Doc generation failed: ${err.message}`);
+      console.log('  Reverting changes...');
+      execSync(`git checkout -- sysdoc.js extract-helm.js`, { cwd: PROJECT_DIR });
+      break;
+    }
+  }
+
+  // Final summary
+  console.log('\n' + '═'.repeat(60));
+  console.log('RATCHET SUMMARY');
+  console.log('═'.repeat(60));
+  for (const h of history) {
+    console.log(`  Iter ${h.iteration}: ${h.score}% (${h.questions} questions, NOT_FOUND: ${h.notFoundRate})`);
+  }
+
+  const finalScore = history[history.length - 1]?.score || 0;
+  console.log(`\nFinal: ${finalScore}% ${finalScore >= opts.target ? '✅ PASS' : '❌ BELOW TARGET'}`);
+
+  // Save history
+  const historyPath = path.join(PROJECT_DIR, 'ratchet-history.json');
+  fs.writeFileSync(historyPath, JSON.stringify({ 
+    timestamp: new Date().toISOString(),
+    target: opts.target,
+    history,
+    finalScore,
+    passed: finalScore >= opts.target,
+  }, null, 2));
+  console.log(`History: ${historyPath}`);
+
+  return { history, finalScore, passed: finalScore >= opts.target };
+}
+
+if (require.main === module) {
+  const opts = parseArgs();
+  if (!opts.srcRoot || !opts.snapshot || !opts.docsDir) {
+    console.error('Usage: node ratchet.js <foxtrot-root> <snapshot> <docs-dir> [--target=80] [--max-iter=5]');
+    process.exit(1);
+  }
+
+  ratchetLoop(opts).then(result => {
+    process.exit(result.passed ? 0 : 1);
+  }).catch(err => {
+    console.error('Ratchet failed:', err);
+    process.exit(1);
+  });
+}
+
+module.exports = { ratchetLoop };