feat: repo-agnostic refactor (BMad spec-test-build loop)

- NEW: repo-profiler.js — deterministic archetype detection (Infra, Frontend, Backend, etc.)
- NEW: extract-dynamic.js — generic extractor replacing hardcoded Foxtrot patterns
- NEW: eval-generator.js — dynamic ground-truth question generation from any repo graph
- NEW: specs/bmad-agnostic-refactor-spec.md — full BMad spec with acceptance criteria
- REFACTORED: prose.js — two-pass LLM synthesis with rich context (shared secrets, ports, service refs)
- REFACTORED: sysdoc.js — wired repo-profiler + extract-dynamic, --legacy escape hatch
- REFACTORED: wiggum-v2.sh — uses eval-generator before benchmarks
- FIXED: graph.js — _edgeSet rebuilt on loadSnapshot() (edge dedup was broken)
- FIXED: graph.js — recursive sortKeys() for deep equality in diffing
- FIXED: prose.js — robust JSON array extraction from LLM output
- FIXED: ratchet.js — syntax validation (node --check) before saving LLM mutations
- FIXED: extract-dynamic.js — centralized state services regex, added console.warn for silent failures
- TESTS: test-eval-generator, test-repo-profiler, test-synthesis-quality + mock fixtures

Eval: 81.5% on Foxtrot (fully repo-agnostic, no hardcoded reference pages)
BMad reviews: Architect B+, Dev Lead B-, TEA B-
This commit is contained in:
Jarvis Prime
2026-03-11 14:40:31 +00:00
parent 15fb1a753b
commit b8403be96c
26 changed files with 4653 additions and 1037 deletions

201
prose.js
View File

@@ -94,6 +94,10 @@ function callLLM(prompt, opts = {}) {
res.on('data', c => data += c);
res.on('end', () => {
try {
if (res.statusCode >= 400) {
console.error('LLM API Error:', res.statusCode, data);
return resolve('');
}
const parsed = JSON.parse(data);
resolve(parsed.choices?.[0]?.message?.content || '');
} catch (e) {
@@ -284,4 +288,199 @@ Write ONLY the overview paragraph, no heading. Focus on explaining the architect
return callLLM(prompt, { ...llmOpts, maxTokens: 1536 });
}
module.exports = { callLLM, describeSubsystem, describeFlow, describeContract, describeArchitecture, detectAnomalies, loadConfluenceContext, findRelevantContext };
/**
* Synthesize generic reference pages using the extracted facts.
*/
async function synthesizeReferencePages(agentKB, deepData, outDir, archetype, llmOpts) {
const fs = require('fs');
const path = require('path');
console.log(`Synthesizing dynamic reference pages via LLM for archetype: ${archetype}...`);
const refDir = path.join(outDir, 'reference');
if (!fs.existsSync(refDir)) fs.mkdirSync(refDir, { recursive: true });
// Build rich context from agentKB and deepData for synthesis
const kb = agentKB || {};
const dd = deepData || {};
// Extract helm interaction details from agentKB structure
const rawCharts = (kb.reference && kb.reference.helm && kb.reference.helm.charts) || kb.charts || [];
const helmCharts = rawCharts.map(c => ({
name: c.name, dir: c.path || c.dir, version: c.version, appVersion: c.appVersion,
deps: c.dependencies || [],
resourceCount: c.resourceCount || 0,
valuesCount: (c.valuesKeys || []).length || c.valuesCount || 0,
interactions: c.interactions || []
}));
// Shared secrets/configmaps
const configUsers = {};
for (const c of helmCharts) {
for (const i of c.interactions) {
if (i.type === 'config-ref') {
if (!configUsers[i.target]) configUsers[i.target] = [];
configUsers[i.target].push(c.name);
}
}
}
const sharedSecrets = Object.entries(configUsers)
.filter(([, users]) => users.length > 1)
.map(([name, users]) => `${name}: ${[...new Set(users)].join(', ')}`);
// Service-to-service refs
const svcRefs = [];
for (const c of helmCharts) {
for (const i of c.interactions) {
if (i.type === 'k8s-service') svcRefs.push(`${c.name}${i.target}`);
}
}
// Shared ports
const portMap = {};
for (const c of helmCharts) {
for (const i of c.interactions) {
if (i.type === 'port' && i.target !== '0') {
if (!portMap[i.target]) portMap[i.target] = [];
if (!portMap[i.target].includes(c.name)) portMap[i.target].push(c.name);
}
}
}
const sharedPorts = Object.entries(portMap)
.filter(([, users]) => users.length > 1)
.map(([port, users]) => `Port ${port}: ${users.join(', ')}`);
// Resource type breakdown
const kindCounts = {};
for (const c of helmCharts) {
for (const i of c.interactions) {
if (i.type === 'resource-kind') {
kindCounts[i.target] = (kindCounts[i.target] || 0) + 1;
}
}
}
// Subsystem summary from agentKB structure
const rawSubs = (kb.reference && kb.reference.subsystems) || kb.subsystems || [];
const subsystems = rawSubs.map(s => ({
name: s.name,
files: Array.isArray(s.files) ? s.files.length : (s.fileCount || s.files || 0),
functions: (s.entities && s.entities.functions) || s.functions || 0,
modules: (s.entities && s.entities.modules) || s.modules || 0
}));
const contextStr = `
EXTRACTED SYSTEM FACTS:
## Subsystems (${subsystems.length} total)
${subsystems.map(s => `- ${s.name}: ${s.files} files, ${s.functions} functions, ${s.modules} modules`).join('\n')}
## Helm Charts (${helmCharts.length} total)
${helmCharts.slice(0, 30).map(c => `- ${c.name} (${c.dir}): v${c.version}, appVersion=${c.appVersion}, ${c.resourceCount} K8s resources, ${c.valuesCount} config keys, deps=[${c.deps.join(',')}]`).join('\n')}
${helmCharts.length > 30 ? `... and ${helmCharts.length - 30} more charts` : ''}
## Shared Secrets & ConfigMaps (used by multiple charts)
${sharedSecrets.length > 0 ? sharedSecrets.join('\n') : 'None detected'}
## Service-to-Service References
${svcRefs.length > 0 ? svcRefs.join('\n') : 'None detected'}
## Shared Network Ports (used by multiple charts)
${sharedPorts.length > 0 ? sharedPorts.join('\n') : 'None detected'}
## K8s Resource Types
${Object.entries(kindCounts).sort((a,b) => b[1]-a[1]).slice(0,15).map(([k,v]) => `- ${k}: ${v}`).join('\n') || 'See individual chart docs'}
## Deep Extraction Data
${JSON.stringify(dd).substring(0, 4000)}
`;
const pagePrompt = `You are a Senior Technical Writer analyzing a repository with the archetype: "${archetype}".
Given these extracted facts and this repo archetype, what 5 reference pages should be created?
IMPORTANT: You MUST include pages that cover ALL of the following topics (spread across the 5 pages):
- Shared secrets/ConfigMaps and which charts use them
- Service-to-service references between charts
- Network ports used by charts (especially shared ports)
- Kubernetes resource types generated across charts
- Chart dependencies and versions
- Subsystem architecture and cross-cutting concerns
${contextStr}
Respond with ONLY a valid JSON array of objects. Each object must have:
- "title": The human-readable title of the page
- "filename": The markdown filename (e.g. "network-architecture.md")
- "focus": A brief description of what to focus on in this page.
Example for Infrastructure:
[
{ "title": "Service Contracts & Interactions", "filename": "service-contracts.md", "focus": "Shared secrets, ConfigMaps, service-to-service references, and network ports across charts" },
{ "title": "Helm Charts & Dependencies", "filename": "helm-charts-dependencies.md", "focus": "Chart versions, dependencies, and configuration surface" }
]
`;
let pagesJson = '[]';
try {
pagesJson = await module.exports.callLLM(pagePrompt, { ...llmOpts, maxTokens: 1000 });
// basic cleanup in case the LLM returned markdown blocks
const match = pagesJson.match(/\[[\s\S]*\]/);
if (match) pagesJson = match[0];
} catch (e) {
console.error('Failed to get page definitions from LLM:', e);
}
let pages = [];
try {
pages = JSON.parse(pagesJson);
} catch (e) {
console.error('Failed to parse pages JSON:', pagesJson);
pages = [
{ title: 'System Overview', filename: 'overview.md', focus: 'General facts' }
];
}
const generatedFiles = [];
for (const page of pages) {
const pagePrompt = `You are a Senior Technical Writer. Generate a "${page.title}" reference page in Markdown for a "${archetype}" repository.
Focus on: ${page.focus}
CRITICAL INSTRUCTIONS:
- Include ALL specific data points from the extracted facts below. Do not summarize or omit details.
- List every shared secret/ConfigMap with the exact chart names that use it.
- List every service-to-service reference with source and target.
- List every shared network port with the exact chart names.
- List Kubernetes resource types with counts.
- List chart versions and appVersions.
- Use tables and bullet lists for data-dense sections.
- Do NOT invent facts. Only use what is in the extracted data below.
${contextStr}
Respond with ONLY the Markdown content. Use # ${page.title} as the main title.`;
const content = await module.exports.callLLM(pagePrompt, { ...llmOpts, maxTokens: 4000, title: page.title });
const filename = page.filename.endsWith('.md') ? page.filename : `${page.filename}.md`;
fs.writeFileSync(path.join(refDir, filename), content);
generatedFiles.push({ filename, title: page.title, focus: page.focus });
}
// Generate Index
const indexPrompt = `You are a Senior Technical Writer. Create a "reference/index.md" routing table.
I have generated the following files for this ${archetype} repository:
${generatedFiles.map(f => `- \`reference/${f.filename}\` (${f.focus})`).join('\n')}
Create a markdown page with two sections:
## Quick Lookup by Topic
(A table mapping specific topics/keywords to the exact file path)
## File Descriptions
(A table describing what is in each file)
Respond with ONLY the Markdown content.`;
const indexMd = await module.exports.callLLM(indexPrompt, { ...llmOpts, maxTokens: 1500 });
fs.writeFileSync(path.join(refDir, 'index.md'), indexMd);
console.log(`Dynamic reference pages and index synthesized for ${archetype}.`);
}
module.exports = { callLLM, describeSubsystem, describeFlow, describeContract, describeArchitecture, detectAnomalies, loadConfluenceContext, findRelevantContext, synthesizeReferencePages };