feat: repo-agnostic refactor (BMad spec-test-build loop)

- NEW: repo-profiler.js — deterministic archetype detection (Infra, Frontend, Backend, etc.) - NEW: extract-dynamic.js — generic extractor replacing hardcoded Foxtrot patterns - NEW: eval-generator.js — dynamic ground-truth question generation from any repo graph - NEW: specs/bmad-agnostic-refactor-spec.md — full BMad spec with acceptance criteria - REFACTORED: prose.js — two-pass LLM synthesis with rich context (shared secrets, ports, service refs) - REFACTORED: sysdoc.js — wired repo-profiler + extract-dynamic, --legacy escape hatch - REFACTORED: wiggum-v2.sh — uses eval-generator before benchmarks - FIXED: graph.js — _edgeSet rebuilt on loadSnapshot() (edge dedup was broken) - FIXED: graph.js — recursive sortKeys() for deep equality in diffing - FIXED: prose.js — robust JSON array extraction from LLM output - FIXED: ratchet.js — syntax validation (node --check) before saving LLM mutations - FIXED: extract-dynamic.js — centralized state services regex, added console.warn for silent failures - TESTS: test-eval-generator, test-repo-profiler, test-synthesis-quality + mock fixtures Eval: 81.5% on Foxtrot (fully repo-agnostic, no hardcoded reference pages) BMad reviews: Architect B+, Dev Lead B-, TEA B-
2026-03-11 14:40:31 +00:00
parent 15fb1a753b
commit b8403be96c
26 changed files with 4653 additions and 1037 deletions
--- a/prose.js
+++ b/prose.js
@@ -94,6 +94,10 @@ function callLLM(prompt, opts = {}) {
      res.on('data', c => data += c);
      res.on('end', () => {
        try {
+          if (res.statusCode >= 400) {
+            console.error('LLM API Error:', res.statusCode, data);
+            return resolve('');
+          }
          const parsed = JSON.parse(data);
          resolve(parsed.choices?.[0]?.message?.content || '');
        } catch (e) {
@@ -284,4 +288,199 @@ Write ONLY the overview paragraph, no heading. Focus on explaining the architect
  return callLLM(prompt, { ...llmOpts, maxTokens: 1536 });
 }

-module.exports = { callLLM, describeSubsystem, describeFlow, describeContract, describeArchitecture, detectAnomalies, loadConfluenceContext, findRelevantContext };
+/**
+ * Synthesize generic reference pages using the extracted facts.
+ */
+async function synthesizeReferencePages(agentKB, deepData, outDir, archetype, llmOpts) {
+  const fs = require('fs');
+  const path = require('path');
+  console.log(`Synthesizing dynamic reference pages via LLM for archetype: ${archetype}...`);
+
+  const refDir = path.join(outDir, 'reference');
+  if (!fs.existsSync(refDir)) fs.mkdirSync(refDir, { recursive: true });
+
+  // Build rich context from agentKB and deepData for synthesis
+  const kb = agentKB || {};
+  const dd = deepData || {};
+
+  // Extract helm interaction details from agentKB structure
+  const rawCharts = (kb.reference && kb.reference.helm && kb.reference.helm.charts) || kb.charts || [];
+  const helmCharts = rawCharts.map(c => ({
+    name: c.name, dir: c.path || c.dir, version: c.version, appVersion: c.appVersion,
+    deps: c.dependencies || [],
+    resourceCount: c.resourceCount || 0,
+    valuesCount: (c.valuesKeys || []).length || c.valuesCount || 0,
+    interactions: c.interactions || []
+  }));
+
+  // Shared secrets/configmaps
+  const configUsers = {};
+  for (const c of helmCharts) {
+    for (const i of c.interactions) {
+      if (i.type === 'config-ref') {
+        if (!configUsers[i.target]) configUsers[i.target] = [];
+        configUsers[i.target].push(c.name);
+      }
+    }
+  }
+  const sharedSecrets = Object.entries(configUsers)
+    .filter(([, users]) => users.length > 1)
+    .map(([name, users]) => `${name}: ${[...new Set(users)].join(', ')}`);
+
+  // Service-to-service refs
+  const svcRefs = [];
+  for (const c of helmCharts) {
+    for (const i of c.interactions) {
+      if (i.type === 'k8s-service') svcRefs.push(`${c.name} → ${i.target}`);
+    }
+  }
+
+  // Shared ports
+  const portMap = {};
+  for (const c of helmCharts) {
+    for (const i of c.interactions) {
+      if (i.type === 'port' && i.target !== '0') {
+        if (!portMap[i.target]) portMap[i.target] = [];
+        if (!portMap[i.target].includes(c.name)) portMap[i.target].push(c.name);
+      }
+    }
+  }
+  const sharedPorts = Object.entries(portMap)
+    .filter(([, users]) => users.length > 1)
+    .map(([port, users]) => `Port ${port}: ${users.join(', ')}`);
+
+  // Resource type breakdown
+  const kindCounts = {};
+  for (const c of helmCharts) {
+    for (const i of c.interactions) {
+      if (i.type === 'resource-kind') {
+        kindCounts[i.target] = (kindCounts[i.target] || 0) + 1;
+      }
+    }
+  }
+
+  // Subsystem summary from agentKB structure
+  const rawSubs = (kb.reference && kb.reference.subsystems) || kb.subsystems || [];
+  const subsystems = rawSubs.map(s => ({
+    name: s.name,
+    files: Array.isArray(s.files) ? s.files.length : (s.fileCount || s.files || 0),
+    functions: (s.entities && s.entities.functions) || s.functions || 0,
+    modules: (s.entities && s.entities.modules) || s.modules || 0
+  }));
+
+  const contextStr = `
+EXTRACTED SYSTEM FACTS:
+
+## Subsystems (${subsystems.length} total)
+${subsystems.map(s => `- ${s.name}: ${s.files} files, ${s.functions} functions, ${s.modules} modules`).join('\n')}
+
+## Helm Charts (${helmCharts.length} total)
+${helmCharts.slice(0, 30).map(c => `- ${c.name} (${c.dir}): v${c.version}, appVersion=${c.appVersion}, ${c.resourceCount} K8s resources, ${c.valuesCount} config keys, deps=[${c.deps.join(',')}]`).join('\n')}
+${helmCharts.length > 30 ? `... and ${helmCharts.length - 30} more charts` : ''}
+
+## Shared Secrets & ConfigMaps (used by multiple charts)
+${sharedSecrets.length > 0 ? sharedSecrets.join('\n') : 'None detected'}
+
+## Service-to-Service References
+${svcRefs.length > 0 ? svcRefs.join('\n') : 'None detected'}
+
+## Shared Network Ports (used by multiple charts)
+${sharedPorts.length > 0 ? sharedPorts.join('\n') : 'None detected'}
+
+## K8s Resource Types
+${Object.entries(kindCounts).sort((a,b) => b[1]-a[1]).slice(0,15).map(([k,v]) => `- ${k}: ${v}`).join('\n') || 'See individual chart docs'}
+
+## Deep Extraction Data
+${JSON.stringify(dd).substring(0, 4000)}
+  `;
+
+  const pagePrompt = `You are a Senior Technical Writer analyzing a repository with the archetype: "${archetype}".
+Given these extracted facts and this repo archetype, what 5 reference pages should be created?
+
+IMPORTANT: You MUST include pages that cover ALL of the following topics (spread across the 5 pages):
+- Shared secrets/ConfigMaps and which charts use them
+- Service-to-service references between charts
+- Network ports used by charts (especially shared ports)
+- Kubernetes resource types generated across charts
+- Chart dependencies and versions
+- Subsystem architecture and cross-cutting concerns
+
+${contextStr}
+
+Respond with ONLY a valid JSON array of objects. Each object must have:
+- "title": The human-readable title of the page
+- "filename": The markdown filename (e.g. "network-architecture.md")
+- "focus": A brief description of what to focus on in this page.
+
+Example for Infrastructure:
+[
+  { "title": "Service Contracts & Interactions", "filename": "service-contracts.md", "focus": "Shared secrets, ConfigMaps, service-to-service references, and network ports across charts" },
+  { "title": "Helm Charts & Dependencies", "filename": "helm-charts-dependencies.md", "focus": "Chart versions, dependencies, and configuration surface" }
+]
+`;
+
+  let pagesJson = '[]';
+  try {
+    pagesJson = await module.exports.callLLM(pagePrompt, { ...llmOpts, maxTokens: 1000 });
+    // basic cleanup in case the LLM returned markdown blocks
+    const match = pagesJson.match(/\[[\s\S]*\]/);
+    if (match) pagesJson = match[0];
+  } catch (e) {
+    console.error('Failed to get page definitions from LLM:', e);
+  }
+
+  let pages = [];
+  try {
+    pages = JSON.parse(pagesJson);
+  } catch (e) {
+    console.error('Failed to parse pages JSON:', pagesJson);
+    pages = [
+      { title: 'System Overview', filename: 'overview.md', focus: 'General facts' }
+    ];
+  }
+
+  const generatedFiles = [];
+  for (const page of pages) {
+    const pagePrompt = `You are a Senior Technical Writer. Generate a "${page.title}" reference page in Markdown for a "${archetype}" repository.
+Focus on: ${page.focus}
+
+CRITICAL INSTRUCTIONS:
+- Include ALL specific data points from the extracted facts below. Do not summarize or omit details.
+- List every shared secret/ConfigMap with the exact chart names that use it.
+- List every service-to-service reference with source and target.
+- List every shared network port with the exact chart names.
+- List Kubernetes resource types with counts.
+- List chart versions and appVersions.
+- Use tables and bullet lists for data-dense sections.
+- Do NOT invent facts. Only use what is in the extracted data below.
+
+${contextStr}
+
+Respond with ONLY the Markdown content. Use # ${page.title} as the main title.`;
+    
+    const content = await module.exports.callLLM(pagePrompt, { ...llmOpts, maxTokens: 4000, title: page.title });
+    const filename = page.filename.endsWith('.md') ? page.filename : `${page.filename}.md`;
+    fs.writeFileSync(path.join(refDir, filename), content);
+    generatedFiles.push({ filename, title: page.title, focus: page.focus });
+  }
+
+  // Generate Index
+  const indexPrompt = `You are a Senior Technical Writer. Create a "reference/index.md" routing table.
+I have generated the following files for this ${archetype} repository:
+${generatedFiles.map(f => `- \`reference/${f.filename}\` (${f.focus})`).join('\n')}
+
+Create a markdown page with two sections:
+## Quick Lookup by Topic
+(A table mapping specific topics/keywords to the exact file path)
+## File Descriptions
+(A table describing what is in each file)
+
+Respond with ONLY the Markdown content.`;
+
+  const indexMd = await module.exports.callLLM(indexPrompt, { ...llmOpts, maxTokens: 1500 });
+  fs.writeFileSync(path.join(refDir, 'index.md'), indexMd);
+  
+  console.log(`Dynamic reference pages and index synthesized for ${archetype}.`);
+}
+
+module.exports = { callLLM, describeSubsystem, describeFlow, describeContract, describeArchitecture, detectAnomalies, loadConfluenceContext, findRelevantContext, synthesizeReferencePages };