/** * Quality Ratchet: Automated Doc Improvement Loop * * BMad-inspired workflow: * 1. EVAL: Run question bank against docs, get scores * 2. DIAGNOSE: Identify weakest categories and specific failures * 3. FIX: LLM proposes concrete code changes to the doc generator * 4. APPLY: Apply fixes, regenerate docs * 5. RE-EVAL: Score again * 6. RATCHET: If passing threshold, add harder questions * 7. REPEAT until target score or max iterations * * Usage: node ratchet.js [--target=80] [--max-iter=5] */ const fs = require('fs'); const path = require('path'); const { execSync } = require('child_process'); const { callLLM } = require('./prose.js'); const { runEval } = require('./eval.js'); const { generateQuestions } = require('./eval-questions.js'); const PROJECT_DIR = __dirname; function parseArgs() { const args = process.argv.slice(2); const opts = { srcRoot: args[0], snapshot: args[1], docsDir: args[2], target: 80, maxIter: 5, model: process.env.LLM_MODEL || 'claude-haiku-4.5', fixModel: process.env.FIX_MODEL || 'claude-sonnet-4.6', }; for (const a of args) { if (a.startsWith('--target=')) opts.target = Number(a.split('=')[1]); if (a.startsWith('--max-iter=')) opts.maxIter = Number(a.split('=')[1]); } return opts; } /** Diagnose failures and produce a fix plan */ async function diagnose(report, opts) { // Collect the worst-performing questions with full context const failures = report.results .filter(r => r.composite < 60) .sort((a, b) => a.composite - b.composite) .slice(0, 10); if (failures.length === 0) return { fixes: [], diagnosis: 'All questions above 60%. No critical failures.' }; // Read current sysdoc.js for context const sysdocSource = fs.readFileSync(path.join(PROJECT_DIR, 'sysdoc.js'), 'utf8'); // Read extract-helm.js const helmSource = fs.readFileSync(path.join(PROJECT_DIR, 'extract-helm.js'), 'utf8'); const failureDetails = failures.map(f => `[${f.id}] Score: ${f.composite}% (A:${f.score.accuracy} C:${f.score.completeness} P:${f.score.precision}) Q: ${f.question} Expected: ${f.groundTruth.substring(0, 300)} Got: ${f.llmAnswer.substring(0, 300)} Judge notes: ${f.score.notes} NOT_FOUND: ${f.notFound}` ).join('\n\n'); const prompt = `You are a documentation pipeline engineer. Analyze these evaluation failures and propose CONCRETE fixes to the doc generator code. ## Current Pipeline The doc generator (sysdoc.js) produces Markdown documentation from: 1. Code analysis graph (subsystems, functions, modules) 2. Helm chart extraction (Chart.yaml, values.yaml, templates) 3. LLM prose generation ## Failures (sorted worst-first) ${failureDetails} ## Score Summary Overall: ${report.overallScore}% By category: ${JSON.stringify(report.byCategory)} NOT_FOUND rate: ${report.notFoundRate} ## Key Source Files sysdoc.js generates the docs. extract-helm.js extracts Helm data. ## Rules 1. Each fix must be a SPECIFIC change to sysdoc.js or extract-helm.js 2. Focus on information that IS extracted but NOT surfaced in the docs 3. If data is missing from extraction, propose extraction improvements 4. Prioritize fixes that improve multiple questions at once 5. Do NOT propose changes to the eval harness or questions Respond in this JSON format: { "diagnosis": "2-3 sentence summary of root causes", "fixes": [ { "id": "fix-001", "file": "sysdoc.js", "description": "What to change and why", "impact": ["question-id-1", "question-id-2"], "expectedImprovement": "+15%", "code_hint": "Brief description of the code change needed" } ] }`; const raw = await callLLM(prompt, { model: opts.fixModel, maxTokens: 2048, temperature: 0.2 }); try { const jsonMatch = raw.match(/\{[\s\S]*\}/); if (jsonMatch) return JSON.parse(jsonMatch[0]); } catch {} return { diagnosis: raw.substring(0, 500), fixes: [] }; } /** Apply a fix by having the LLM generate the actual code change */ async function applyFix(fix, opts) { const filePath = path.join(PROJECT_DIR, fix.file); const source = fs.readFileSync(filePath, 'utf8'); const prompt = `You are modifying ${fix.file} to improve documentation quality. CHANGE NEEDED: ${fix.description} CODE HINT: ${fix.code_hint} CURRENT SOURCE (${fix.file}): ${source} Generate ONLY the specific code change. Output in this format: SEARCH: \`\`\` exact lines to find \`\`\` REPLACE: \`\`\` replacement lines \`\`\` If multiple changes are needed, repeat the SEARCH/REPLACE blocks. Be precise — the SEARCH text must match the source exactly.`; const raw = await callLLM(prompt, { model: opts.fixModel, maxTokens: 4096, temperature: 0.1 }); // Parse SEARCH/REPLACE blocks const changes = []; const blockRegex = /SEARCH:\s*```[^\n]*\n([\s\S]*?)```\s*REPLACE:\s*```[^\n]*\n([\s\S]*?)```/g; let match; while ((match = blockRegex.exec(raw)) !== null) { changes.push({ search: match[1].trimEnd(), replace: match[2].trimEnd() }); } if (changes.length === 0) { console.log(` ⚠ No parseable changes from LLM for fix ${fix.id}`); return false; } // Apply changes let modified = source; let applied = 0; for (const change of changes) { if (modified.includes(change.search)) { modified = modified.replace(change.search, change.replace); applied++; } else { console.log(` ⚠ SEARCH block not found in ${fix.file} for fix ${fix.id}`); } } if (applied > 0) { fs.writeFileSync(filePath, modified); try { execSync(`node --check ${filePath}`, { encoding: 'utf8' }); } catch (e) { console.error(`Syntax check failed for ${filePath}, reverting`); fs.writeFileSync(filePath, source); return false; } console.log(` ✓ Applied ${applied}/${changes.length} changes to ${fix.file}`); return true; } return false; } /** Regenerate docs */ function regenerateDocs(opts) { console.log(' Regenerating docs...'); execSync(`rm -rf ${opts.docsDir}`, { cwd: PROJECT_DIR }); const cmd = `LLM_MODEL=${opts.model} node sysdoc.js ${opts.snapshot} ${opts.srcRoot} ${opts.docsDir} --prose`; execSync(cmd, { cwd: PROJECT_DIR, timeout: 600000, stdio: 'pipe' }); } /** Generate harder questions based on current performance */ function ratchetQuestions(currentQuestions, report, srcRoot, snapshotPath) { // Find categories scoring > 90% — make them harder const easyCategories = Object.entries(report.byCategory) .filter(([, s]) => Number(s.avg) > 90) .map(([cat]) => cat); if (easyCategories.length === 0) return currentQuestions; console.log(` Ratcheting: categories scoring >90%: ${easyCategories.join(', ')}`); // Add more specific questions for high-scoring categories const newQuestions = [...currentQuestions]; const { discoverCharts } = require('./extract-helm.js'); const charts = discoverCharts(srcRoot, new Set(['node_modules', '.git', 'venv', '__pycache__', '.terraform', '_bmad', '_bmad-output', '.codex', '.claude'])); if (easyCategories.includes('configuration')) { // Add nested value questions (harder than top-level) for (const chart of charts.filter(c => c.values.keys.length > 20).slice(0, 3)) { const objectKeys = chart.values.keys.filter(k => k.type === 'object'); if (objectKeys.length > 0) { newQuestions.push({ id: `ratchet-config-${chart.chart.name}-nested`, category: 'configuration', difficulty: 'hard', audience: ['human', 'machine'], question: `In the ${chart.chart.name} chart, which configuration keys are complex objects (not simple values)? List them.`, answer: objectKeys.map(k => k.name).join(', '), answerType: 'list', source: `${chart.dir}/values.yaml`, }); } } } if (easyCategories.includes('resource')) { // Add cross-chart resource comparison questions const deployers = charts.filter(c => c.templates.resources.some(r => r.kind === 'Deployment')); if (deployers.length > 0) { newQuestions.push({ id: 'ratchet-resource-deployments', category: 'resource', difficulty: 'hard', audience: ['human', 'machine'], question: 'Which Helm charts create Kubernetes Deployments? List all of them.', answer: deployers.map(c => `${c.chart.name} (${c.dir})`).join('\n'), answerType: 'list', source: 'template scanning', }); } } if (easyCategories.includes('dependency')) { // Add transitive dependency questions const withDeps = charts.filter(c => c.chart.dependencies.length > 2); for (const chart of withDeps.slice(0, 2)) { newQuestions.push({ id: `ratchet-dep-${chart.chart.name}-conditions`, category: 'dependency', difficulty: 'hard', audience: ['machine'], question: `What are the enable conditions for each dependency of the "${chart.chart.name}" chart?`, answer: chart.chart.dependencies.map(d => `${d.name}: ${d.condition || 'always enabled'}`).join('\n'), answerType: 'list', source: `${chart.dir}/Chart.yaml`, }); } } const added = newQuestions.length - currentQuestions.length; if (added > 0) console.log(` Added ${added} harder questions`); return newQuestions; } /** Main ratchet loop */ async function ratchetLoop(opts) { console.log('═'.repeat(60)); console.log('QUALITY RATCHET — BMad Improvement Loop'); console.log('═'.repeat(60)); console.log(`Target: ${opts.target}% Max iterations: ${opts.maxIter}`); console.log(`Eval model: ${opts.model} Fix model: ${opts.fixModel}`); console.log(); // Load initial questions const questionsPath = path.join(PROJECT_DIR, 'eval-questions.json'); let questionsData = JSON.parse(fs.readFileSync(questionsPath, 'utf8')); let questions = questionsData.questions; const history = []; for (let iter = 1; iter <= opts.maxIter; iter++) { console.log(`\n${'─'.repeat(60)}`); console.log(`ITERATION ${iter}/${opts.maxIter}`); console.log('─'.repeat(60)); // Write current questions const iterQuestionsPath = path.join(PROJECT_DIR, `eval-questions-iter${iter}.json`); fs.writeFileSync(iterQuestionsPath, JSON.stringify({ generated: new Date().toISOString(), count: questions.length, questions }, null, 2)); // Step 1: Eval console.log('\n📊 EVAL'); const report = await runEval(opts.docsDir, iterQuestionsPath, { model: opts.model }); console.log(` Score: ${report.overallScore}% (A:${report.avgAccuracy} C:${report.avgCompleteness} P:${report.avgPrecision})`); console.log(` NOT_FOUND: ${report.notFoundRate}`); history.push({ iteration: iter, score: report.overallScore, questions: questions.length, notFoundRate: report.notFoundRate, }); // Check if we've hit the target if (report.overallScore >= opts.target) { console.log(`\n🎯 TARGET REACHED: ${report.overallScore}% >= ${opts.target}%`); // Ratchet: make it harder const harderQuestions = ratchetQuestions(questions, report, opts.srcRoot, opts.snapshot); if (harderQuestions.length > questions.length) { console.log(` Ratcheting up: ${questions.length} → ${harderQuestions.length} questions`); questions = harderQuestions; // Don't break — run another iteration with harder questions continue; } else { console.log(' No harder questions to add. Pipeline is solid.'); break; } } // Step 2: Diagnose console.log('\n🔍 DIAGNOSE'); const diagnosis = await diagnose(report, opts); console.log(` ${diagnosis.diagnosis}`); console.log(` Proposed fixes: ${diagnosis.fixes.length}`); if (diagnosis.fixes.length === 0) { console.log(' No actionable fixes proposed. Stopping.'); break; } // Step 3: Apply fixes console.log('\n🔧 FIX'); let anyApplied = false; for (const fix of diagnosis.fixes.slice(0, 3)) { // Max 3 fixes per iteration console.log(` [${fix.id}] ${fix.description}`); console.log(` Impact: ${fix.impact.join(', ')} | Expected: ${fix.expectedImprovement}`); const applied = await applyFix(fix, opts); if (applied) anyApplied = true; } if (!anyApplied) { console.log(' No fixes could be applied. Stopping.'); break; } // Step 4: Regenerate docs console.log('\n📝 REGENERATE'); try { regenerateDocs(opts); } catch (err) { console.log(` ⚠ Doc generation failed: ${err.message}`); console.log(' Reverting changes...'); execSync(`git checkout -- sysdoc.js extract-helm.js`, { cwd: PROJECT_DIR }); break; } } // Final summary console.log('\n' + '═'.repeat(60)); console.log('RATCHET SUMMARY'); console.log('═'.repeat(60)); for (const h of history) { console.log(` Iter ${h.iteration}: ${h.score}% (${h.questions} questions, NOT_FOUND: ${h.notFoundRate})`); } const finalScore = history[history.length - 1]?.score || 0; console.log(`\nFinal: ${finalScore}% ${finalScore >= opts.target ? '✅ PASS' : '❌ BELOW TARGET'}`); // Save history const historyPath = path.join(PROJECT_DIR, 'ratchet-history.json'); fs.writeFileSync(historyPath, JSON.stringify({ timestamp: new Date().toISOString(), target: opts.target, history, finalScore, passed: finalScore >= opts.target, }, null, 2)); console.log(`History: ${historyPath}`); return { history, finalScore, passed: finalScore >= opts.target }; } if (require.main === module) { const opts = parseArgs(); if (!opts.srcRoot || !opts.snapshot || !opts.docsDir) { console.error('Usage: node ratchet.js [--target=80] [--max-iter=5]'); process.exit(1); } ratchetLoop(opts).then(result => { process.exit(result.passed ? 0 : 1); }).catch(err => { console.error('Ratchet failed:', err); process.exit(1); }); } module.exports = { ratchetLoop };