2026-03-09 18:44:19 +00:00
const http = require ( 'http' ) ;
const https = require ( 'https' ) ;
feat: confluence benchmark, pattern extractor, agent KB, UX spec
- extract-patterns.js: mines layered arch, ArgoCD appsets, cloud regions,
CIDR allocations, naming conventions, sync waves, tech stack from code
- agent-kb.js: token-efficient JSON rendering of same doc tree
- eval-confluence-ref-questions.json: 32 reference-only benchmark questions
- wiggum-v2.sh: Ralph Wiggum loop targeting confluence baseline (77.8%)
- docs/human-ux-spec.md: BMad UX designer spec for human doc structure
- Eval results: V2 at 28.7% vs confluence 77.8% baseline
- Hub/spoke ownership now correctly extracted (95% on that question)
- Naming conventions, regions, CIDRs surfaced in system-architecture.md
2026-03-10 14:20:35 +00:00
const fs = require ( 'fs' ) ;
const path = require ( 'path' ) ;
2026-03-09 18:44:19 +00:00
/ * *
* Phase 6 + 7 : LLM Prose Generator
* Generates human - readable prose for system documentation using Claude Sonnet .
* All structural analysis is deterministic — LLM is ONLY for prose formatting .
* /
const DEFAULT _URL = process . env . LLM _URL || 'http://192.168.86.11:8000/v1' ;
const DEFAULT _MODEL = process . env . LLM _MODEL || 'claude-sonnet-4.6' ;
const DEFAULT _API _KEY = process . env . LLM _API _KEY || 'my-super-secret-password-123' ;
feat: confluence benchmark, pattern extractor, agent KB, UX spec
- extract-patterns.js: mines layered arch, ArgoCD appsets, cloud regions,
CIDR allocations, naming conventions, sync waves, tech stack from code
- agent-kb.js: token-efficient JSON rendering of same doc tree
- eval-confluence-ref-questions.json: 32 reference-only benchmark questions
- wiggum-v2.sh: Ralph Wiggum loop targeting confluence baseline (77.8%)
- docs/human-ux-spec.md: BMad UX designer spec for human doc structure
- Eval results: V2 at 28.7% vs confluence 77.8% baseline
- Hub/spoke ownership now correctly extracted (95% on that question)
- Naming conventions, regions, CIDRs surfaced in system-architecture.md
2026-03-10 14:20:35 +00:00
/ * *
* Load Confluence reference + explanation docs as seed context .
* Returns a map of { topic → content } for injection into LLM prompts .
* /
function loadConfluenceContext ( confluenceDir ) {
if ( ! confluenceDir || ! fs . existsSync ( confluenceDir ) ) return { } ;
const ctx = { } ;
for ( const section of [ 'reference' , 'explanation' ] ) {
const dir = path . join ( confluenceDir , section ) ;
if ( ! fs . existsSync ( dir ) ) continue ;
for ( const f of fs . readdirSync ( dir ) . filter ( f => f . endsWith ( '.md' ) ) ) {
const key = f . replace ( '.md' , '' ) ;
const content = fs . readFileSync ( path . join ( dir , f ) , 'utf8' ) . trim ( ) ;
if ( content . length > 0 ) ctx [ key ] = content ;
}
}
return ctx ;
}
/ * *
* Find relevant confluence docs for a given topic by keyword matching .
* Returns concatenated content , capped at maxChars .
* /
function findRelevantContext ( confluenceCtx , keywords , maxChars = 12000 ) {
if ( ! confluenceCtx || Object . keys ( confluenceCtx ) . length === 0 ) return '' ;
const scored = Object . entries ( confluenceCtx ) . map ( ( [ key , content ] ) => {
let score = 0 ;
const lowerKey = key . toLowerCase ( ) ;
const lowerContent = content . toLowerCase ( ) . substring ( 0 , 2000 ) ;
for ( const kw of keywords ) {
const lkw = kw . toLowerCase ( ) ;
if ( lowerKey . includes ( lkw ) ) score += 10 ;
const matches = ( lowerContent . match ( new RegExp ( lkw , 'g' ) ) || [ ] ) . length ;
score += Math . min ( matches , 5 ) ;
}
return { key , content , score } ;
} ) . filter ( s => s . score > 0 ) . sort ( ( a , b ) => b . score - a . score ) ;
let result = '' ;
for ( const s of scored ) {
if ( result . length + s . content . length > maxChars ) {
const remaining = maxChars - result . length ;
if ( remaining > 200 ) result += ` \n \n --- ${ s . key } --- \n ${ s . content . substring ( 0 , remaining ) } ... \n ` ;
break ;
}
result += ` \n \n --- ${ s . key } --- \n ${ s . content } \n ` ;
}
return result ;
}
2026-03-09 18:44:19 +00:00
/ * *
* Call an OpenAI - compatible chat completions API .
* /
function callLLM ( prompt , opts = { } ) {
const baseUrl = opts . url || DEFAULT _URL ;
const model = opts . model || DEFAULT _MODEL ;
const apiKey = opts . apiKey || DEFAULT _API _KEY ;
const maxTokens = opts . maxTokens || 1024 ;
const temperature = opts . temperature || 0.3 ;
return new Promise ( ( resolve , reject ) => {
const url = new URL ( '/v1/chat/completions' , baseUrl . replace ( /\/v1\/?$/ , '' ) ) ;
const body = JSON . stringify ( {
model ,
messages : [
{ role : 'system' , content : 'You are a senior software architect writing concise, precise technical documentation. Write in present tense. Be specific about domain logic, not syntax. No filler.' } ,
{ role : 'user' , content : prompt } ,
] ,
max _tokens : maxTokens ,
temperature ,
} ) ;
const client = url . protocol === 'https:' ? https : http ;
const req = client . request ( url , {
method : 'POST' ,
headers : { 'Content-Type' : 'application/json' , 'Authorization' : ` Bearer ${ apiKey } ` } ,
} , ( res ) => {
let data = '' ;
res . on ( 'data' , c => data += c ) ;
res . on ( 'end' , ( ) => {
try {
feat: repo-agnostic refactor (BMad spec-test-build loop)
- NEW: repo-profiler.js — deterministic archetype detection (Infra, Frontend, Backend, etc.)
- NEW: extract-dynamic.js — generic extractor replacing hardcoded Foxtrot patterns
- NEW: eval-generator.js — dynamic ground-truth question generation from any repo graph
- NEW: specs/bmad-agnostic-refactor-spec.md — full BMad spec with acceptance criteria
- REFACTORED: prose.js — two-pass LLM synthesis with rich context (shared secrets, ports, service refs)
- REFACTORED: sysdoc.js — wired repo-profiler + extract-dynamic, --legacy escape hatch
- REFACTORED: wiggum-v2.sh — uses eval-generator before benchmarks
- FIXED: graph.js — _edgeSet rebuilt on loadSnapshot() (edge dedup was broken)
- FIXED: graph.js — recursive sortKeys() for deep equality in diffing
- FIXED: prose.js — robust JSON array extraction from LLM output
- FIXED: ratchet.js — syntax validation (node --check) before saving LLM mutations
- FIXED: extract-dynamic.js — centralized state services regex, added console.warn for silent failures
- TESTS: test-eval-generator, test-repo-profiler, test-synthesis-quality + mock fixtures
Eval: 81.5% on Foxtrot (fully repo-agnostic, no hardcoded reference pages)
BMad reviews: Architect B+, Dev Lead B-, TEA B-
2026-03-11 14:40:31 +00:00
if ( res . statusCode >= 400 ) {
console . error ( 'LLM API Error:' , res . statusCode , data ) ;
return resolve ( '' ) ;
}
2026-03-09 18:44:19 +00:00
const parsed = JSON . parse ( data ) ;
resolve ( parsed . choices ? . [ 0 ] ? . message ? . content || '' ) ;
} catch ( e ) {
reject ( new Error ( ` LLM parse error: ${ e . message } — raw: ${ data . substring ( 0 , 200 ) } ` ) ) ;
}
} ) ;
} ) ;
req . on ( 'error' , reject ) ;
req . setTimeout ( 120000 , ( ) => { req . destroy ( ) ; reject ( new Error ( 'LLM timeout (120s)' ) ) ; } ) ;
req . write ( body ) ;
req . end ( ) ;
} ) ;
}
/ * *
feat: confluence benchmark, pattern extractor, agent KB, UX spec
- extract-patterns.js: mines layered arch, ArgoCD appsets, cloud regions,
CIDR allocations, naming conventions, sync waves, tech stack from code
- agent-kb.js: token-efficient JSON rendering of same doc tree
- eval-confluence-ref-questions.json: 32 reference-only benchmark questions
- wiggum-v2.sh: Ralph Wiggum loop targeting confluence baseline (77.8%)
- docs/human-ux-spec.md: BMad UX designer spec for human doc structure
- Eval results: V2 at 28.7% vs confluence 77.8% baseline
- Hub/spoke ownership now correctly extracted (95% on that question)
- Naming conventions, regions, CIDRs surfaced in system-architecture.md
2026-03-10 14:20:35 +00:00
* Detect structural anomalies in a subsystem .
* /
function detectAnomalies ( sub , deps ) {
const anomalies = [ ] ;
if ( sub . entities . functions === 0 && sub . files . length > 5 ) {
anomalies . push ( ` Zero functions despite ${ sub . files . length } files — likely a configuration-only or IaC subsystem ` ) ;
}
if ( sub . entities . classes === 0 && sub . entities . functions > 50 ) {
anomalies . push ( ` ${ sub . entities . functions } functions with no classes — procedural/script-heavy architecture ` ) ;
}
// Fan-in/fan-out analysis
const outgoing = Object . entries ( deps ) . filter ( ( [ k ] ) => k . startsWith ( sub . name + '→' ) ) ;
const incoming = Object . entries ( deps ) . filter ( ( [ k ] ) => k . endsWith ( '→' + sub . name ) ) ;
if ( outgoing . length > 5 ) {
anomalies . push ( ` High fan-out: depends on ${ outgoing . length } other subsystems — potential orchestrator or integration layer ` ) ;
}
if ( incoming . length > 5 ) {
anomalies . push ( ` High fan-in: ${ incoming . length } subsystems depend on this — likely a shared library or core service ` ) ;
}
if ( outgoing . length === 0 && incoming . length === 0 && sub . files . length > 3 ) {
anomalies . push ( ` Isolated subsystem with no cross-subsystem dependencies — may be self-contained tooling or unused ` ) ;
}
return anomalies ;
}
/ * *
* Generate an explanatory prose overview for a subsystem .
* Includes dependency rationale and anomaly explanations .
2026-03-09 18:44:19 +00:00
* /
async function describeSubsystem ( sub , deps , llmOpts ) {
feat: confluence benchmark, pattern extractor, agent KB, UX spec
- extract-patterns.js: mines layered arch, ArgoCD appsets, cloud regions,
CIDR allocations, naming conventions, sync waves, tech stack from code
- agent-kb.js: token-efficient JSON rendering of same doc tree
- eval-confluence-ref-questions.json: 32 reference-only benchmark questions
- wiggum-v2.sh: Ralph Wiggum loop targeting confluence baseline (77.8%)
- docs/human-ux-spec.md: BMad UX designer spec for human doc structure
- Eval results: V2 at 28.7% vs confluence 77.8% baseline
- Hub/spoke ownership now correctly extracted (95% on that question)
- Naming conventions, regions, CIDRs surfaced in system-architecture.md
2026-03-10 14:20:35 +00:00
const outgoing = Object . entries ( deps )
. filter ( ( [ k ] ) => k . startsWith ( sub . name + '→' ) )
. map ( ( [ k , v ] ) => ( { target : k . split ( '→' ) [ 1 ] , calls : v . calls , imports : v . imports } ) ) ;
const incoming = Object . entries ( deps )
. filter ( ( [ k ] ) => k . endsWith ( '→' + sub . name ) )
. map ( ( [ k , v ] ) => ( { source : k . split ( '→' ) [ 0 ] , calls : v . calls , imports : v . imports } ) ) ;
const anomalies = detectAnomalies ( sub , deps ) ;
const depContext = [ ] ;
if ( outgoing . length > 0 ) {
depContext . push ( ` Depends on: ${ outgoing . map ( d => ` ${ d . target } ( ${ d . calls } calls, ${ d . imports } imports) ` ) . join ( ', ' ) } ` ) ;
}
if ( incoming . length > 0 ) {
depContext . push ( ` Depended on by: ${ incoming . map ( d => ` ${ d . source } ( ${ d . calls } calls, ${ d . imports } imports) ` ) . join ( ', ' ) } ` ) ;
}
2026-03-09 18:44:19 +00:00
feat: confluence benchmark, pattern extractor, agent KB, UX spec
- extract-patterns.js: mines layered arch, ArgoCD appsets, cloud regions,
CIDR allocations, naming conventions, sync waves, tech stack from code
- agent-kb.js: token-efficient JSON rendering of same doc tree
- eval-confluence-ref-questions.json: 32 reference-only benchmark questions
- wiggum-v2.sh: Ralph Wiggum loop targeting confluence baseline (77.8%)
- docs/human-ux-spec.md: BMad UX designer spec for human doc structure
- Eval results: V2 at 28.7% vs confluence 77.8% baseline
- Hub/spoke ownership now correctly extracted (95% on that question)
- Naming conventions, regions, CIDRs surfaced in system-architecture.md
2026-03-10 14:20:35 +00:00
// Confluence seed context
const confluenceCtx = llmOpts . confluenceCtx || { } ;
const seedContent = findRelevantContext ( confluenceCtx , [ sub . name , sub . name . replace ( /-/g , ' ' ) , sub . kind ] , 8000 ) ;
2026-03-09 18:44:19 +00:00
feat: confluence benchmark, pattern extractor, agent KB, UX spec
- extract-patterns.js: mines layered arch, ArgoCD appsets, cloud regions,
CIDR allocations, naming conventions, sync waves, tech stack from code
- agent-kb.js: token-efficient JSON rendering of same doc tree
- eval-confluence-ref-questions.json: 32 reference-only benchmark questions
- wiggum-v2.sh: Ralph Wiggum loop targeting confluence baseline (77.8%)
- docs/human-ux-spec.md: BMad UX designer spec for human doc structure
- Eval results: V2 at 28.7% vs confluence 77.8% baseline
- Hub/spoke ownership now correctly extracted (95% on that question)
- Naming conventions, regions, CIDRs surfaced in system-architecture.md
2026-03-10 14:20:35 +00:00
const prompt = ` Write a 3-5 sentence technical overview of the " ${ sub . name } " subsystem. You MUST explain WHY it depends on its upstream subsystems and WHY downstream subsystems depend on it. If there are structural anomalies, explain their architectural rationale.
$ { seedContent ? ` \n REFERENCE DOCUMENTATION (use this as authoritative context — incorporate key architectural details, naming conventions, deployment patterns, and design rationale from this content): \n ${ seedContent } \n ` : '' }
2026-03-09 18:44:19 +00:00
Facts :
- Kind : $ { sub . kind }
- Files : $ { sub . files . length }
- Functions : $ { sub . entities . functions } , Classes : $ { sub . entities . classes } , Modules : $ { sub . entities . modules }
- Public exports : $ { sub . publicExports . slice ( 0 , 15 ) . join ( ', ' ) } $ { sub . publicExports . length > 15 ? ` (+ ${ sub . publicExports . length - 15 } more) ` : '' }
feat: confluence benchmark, pattern extractor, agent KB, UX spec
- extract-patterns.js: mines layered arch, ArgoCD appsets, cloud regions,
CIDR allocations, naming conventions, sync waves, tech stack from code
- agent-kb.js: token-efficient JSON rendering of same doc tree
- eval-confluence-ref-questions.json: 32 reference-only benchmark questions
- wiggum-v2.sh: Ralph Wiggum loop targeting confluence baseline (77.8%)
- docs/human-ux-spec.md: BMad UX designer spec for human doc structure
- Eval results: V2 at 28.7% vs confluence 77.8% baseline
- Hub/spoke ownership now correctly extracted (95% on that question)
- Naming conventions, regions, CIDRs surfaced in system-architecture.md
2026-03-10 14:20:35 +00:00
$ { depContext . length > 0 ? ` - Dependency matrix: \n ${ depContext . join ( '\n ' ) } ` : '- No cross-subsystem dependencies (explain why this subsystem is self-contained)' }
$ { anomalies . length > 0 ? ` - Structural anomalies: \n ${ anomalies . join ( '\n ' ) } ` : '' }
2026-03-09 18:44:19 +00:00
feat: confluence benchmark, pattern extractor, agent KB, UX spec
- extract-patterns.js: mines layered arch, ArgoCD appsets, cloud regions,
CIDR allocations, naming conventions, sync waves, tech stack from code
- agent-kb.js: token-efficient JSON rendering of same doc tree
- eval-confluence-ref-questions.json: 32 reference-only benchmark questions
- wiggum-v2.sh: Ralph Wiggum loop targeting confluence baseline (77.8%)
- docs/human-ux-spec.md: BMad UX designer spec for human doc structure
- Eval results: V2 at 28.7% vs confluence 77.8% baseline
- Hub/spoke ownership now correctly extracted (95% on that question)
- Naming conventions, regions, CIDRs surfaced in system-architecture.md
2026-03-10 14:20:35 +00:00
Write ONLY the overview paragraph , no heading . Focus on architectural rationale , not just listing components . ` ;
2026-03-09 18:44:19 +00:00
return callLLM ( prompt , llmOpts ) ;
}
/ * *
* Generate a prose narrative for a data flow trace .
* /
async function describeFlow ( flowResult , llmOpts ) {
const steps = flowResult . flow . slice ( 0 , 20 ) . map ( ( s , i ) =>
` ${ i + 1 } . [ ${ s . subsystem } ] ${ s . entity } ${ s . crossedVia ? ` (crosses via ${ s . crossedVia } ) ` : '' } `
) . join ( '\n' ) ;
const prompt = ` Write a 3-5 sentence narrative describing this data flow through the system.
Entry point : $ { flowResult . entryPoint }
Subsystem sequence : $ { flowResult . subsystemSequence . join ( ' → ' ) }
$ { flowResult . excludedNodes . length > 0 ? ` Excluded (high fan-in): ${ flowResult . excludedNodes . slice ( 0 , 5 ) . join ( ', ' ) } ` : '' }
$ { flowResult . cyclesDetected . length > 0 ? ` Cycles detected: ${ flowResult . cyclesDetected . length } ` : '' }
Steps :
$ { steps } $ { flowResult . flow . length > 20 ? ` \n ... (+ ${ flowResult . flow . length - 20 } more steps) ` : '' }
Write ONLY the narrative paragraph , no heading . Explain what happens when this entry point is triggered and how data moves across subsystem boundaries . ` ;
return callLLM ( prompt , llmOpts ) ;
}
/ * *
* Generate a prose description for a contract ( interface / type / enum ) .
* /
async function describeContract ( contract , xref , llmOpts ) {
const usedBy = xref ? . [ contract . name ] ? . usedBy || [ ] ;
let details = '' ;
2026-03-09 20:15:50 +00:00
2026-03-09 18:44:19 +00:00
if ( contract . type === 'Interface' && contract . fields ) {
details = ` Fields: ${ contract . fields . map ( f => ` ${ f . name } : ${ f . type } ` ) . join ( ', ' ) } ` ;
if ( contract . extends ) details += ` \n Extends: ${ contract . extends . join ( ', ' ) } ` ;
} else if ( contract . type === 'Enum' && contract . members ) {
details = ` Members: ${ contract . members . join ( ', ' ) } ` ;
2026-03-09 20:15:50 +00:00
} else if ( contract . type . startsWith ( 'Helm' ) ) {
// Helm contract types
if ( contract . fields ) {
details = ` Fields: ${ contract . fields . slice ( 0 , 20 ) . map ( f => ` ${ f . name } : ${ f . type } ` ) . join ( ', ' ) } ` ;
if ( contract . fields . length > 20 ) details += ` (+ ${ contract . fields . length - 20 } more) ` ;
}
2026-03-09 18:44:19 +00:00
}
2026-03-09 20:15:50 +00:00
const typeLabel = contract . type . startsWith ( 'Helm' ) ? ` Helm ${ contract . type . replace ( 'Helm' , '' ) . toLowerCase ( ) } contract ` : ` TypeScript ${ contract . type . toLowerCase ( ) } ` ;
const prompt = ` Write a 1-2 sentence description of this ${ typeLabel } .
2026-03-09 18:44:19 +00:00
Name : $ { contract . name }
Type : $ { contract . type }
Defined in : $ { contract . id }
Visibility : $ { contract . visibility }
$ { details }
$ { usedBy . length > 0 ? ` Used by subsystems: ${ usedBy . join ( ', ' ) } ` : 'Not referenced cross-subsystem' }
2026-03-09 20:15:50 +00:00
Write ONLY the description , no heading . Do not ask for more information . ` ;
2026-03-09 18:44:19 +00:00
return callLLM ( prompt , { ... llmOpts , maxTokens : 256 } ) ;
}
/ * *
feat: confluence benchmark, pattern extractor, agent KB, UX spec
- extract-patterns.js: mines layered arch, ArgoCD appsets, cloud regions,
CIDR allocations, naming conventions, sync waves, tech stack from code
- agent-kb.js: token-efficient JSON rendering of same doc tree
- eval-confluence-ref-questions.json: 32 reference-only benchmark questions
- wiggum-v2.sh: Ralph Wiggum loop targeting confluence baseline (77.8%)
- docs/human-ux-spec.md: BMad UX designer spec for human doc structure
- Eval results: V2 at 28.7% vs confluence 77.8% baseline
- Hub/spoke ownership now correctly extracted (95% on that question)
- Naming conventions, regions, CIDRs surfaced in system-architecture.md
2026-03-10 14:20:35 +00:00
* Generate a system - level architecture overview with cross - cutting explanations .
2026-03-09 18:44:19 +00:00
* /
feat: confluence benchmark, pattern extractor, agent KB, UX spec
- extract-patterns.js: mines layered arch, ArgoCD appsets, cloud regions,
CIDR allocations, naming conventions, sync waves, tech stack from code
- agent-kb.js: token-efficient JSON rendering of same doc tree
- eval-confluence-ref-questions.json: 32 reference-only benchmark questions
- wiggum-v2.sh: Ralph Wiggum loop targeting confluence baseline (77.8%)
- docs/human-ux-spec.md: BMad UX designer spec for human doc structure
- Eval results: V2 at 28.7% vs confluence 77.8% baseline
- Hub/spoke ownership now correctly extracted (95% on that question)
- Naming conventions, regions, CIDRs surfaced in system-architecture.md
2026-03-10 14:20:35 +00:00
async function describeArchitecture ( subsystems , crossCutting , stats , llmOpts , opts = { } ) {
const deps = opts . deps || { } ;
const confluenceCtx = llmOpts . confluenceCtx || opts . confluenceCtx || { } ;
// Pull in the most relevant architecture docs
const seedContent = findRelevantContext ( confluenceCtx , [
'system-architecture' , 'architecture' , 'platform-concepts' , 'design-decisions' ,
'technology-choices' , 'multi-cloud' , 'hub' , 'spoke' , 'layered' , 'argocd' ,
'repository-structure' , 'naming-conventions' , 'release-process'
] , 15000 ) ;
2026-03-09 18:44:19 +00:00
feat: confluence benchmark, pattern extractor, agent KB, UX spec
- extract-patterns.js: mines layered arch, ArgoCD appsets, cloud regions,
CIDR allocations, naming conventions, sync waves, tech stack from code
- agent-kb.js: token-efficient JSON rendering of same doc tree
- eval-confluence-ref-questions.json: 32 reference-only benchmark questions
- wiggum-v2.sh: Ralph Wiggum loop targeting confluence baseline (77.8%)
- docs/human-ux-spec.md: BMad UX designer spec for human doc structure
- Eval results: V2 at 28.7% vs confluence 77.8% baseline
- Hub/spoke ownership now correctly extracted (95% on that question)
- Naming conventions, regions, CIDRs surfaced in system-architecture.md
2026-03-10 14:20:35 +00:00
const subList = subsystems . slice ( 0 , 20 ) . map ( s => {
const outDeps = Object . entries ( deps )
. filter ( ( [ k ] ) => k . startsWith ( s . name + '→' ) )
. map ( ( [ k ] ) => k . split ( '→' ) [ 1 ] ) ;
return ` - ${ s . name } ( ${ s . kind } ): ${ s . entities . functions } functions, ${ s . files . length } files ${ outDeps . length > 0 ? ` , depends on: ${ outDeps . join ( ', ' ) } ` : '' } ` ;
} ) . join ( '\n' ) ;
2026-03-09 18:44:19 +00:00
feat: confluence benchmark, pattern extractor, agent KB, UX spec
- extract-patterns.js: mines layered arch, ArgoCD appsets, cloud regions,
CIDR allocations, naming conventions, sync waves, tech stack from code
- agent-kb.js: token-efficient JSON rendering of same doc tree
- eval-confluence-ref-questions.json: 32 reference-only benchmark questions
- wiggum-v2.sh: Ralph Wiggum loop targeting confluence baseline (77.8%)
- docs/human-ux-spec.md: BMad UX designer spec for human doc structure
- Eval results: V2 at 28.7% vs confluence 77.8% baseline
- Hub/spoke ownership now correctly extracted (95% on that question)
- Naming conventions, regions, CIDRs surfaced in system-architecture.md
2026-03-10 14:20:35 +00:00
const anomalySummary = subsystems
. map ( s => {
const a = detectAnomalies ( s , deps ) ;
return a . length > 0 ? ` - ${ s . name } : ${ a [ 0 ] } ` : null ;
} )
. filter ( Boolean )
. slice ( 0 , 5 )
. join ( '\n' ) ;
const prompt = ` Write a 5-8 sentence architecture overview for this software system. Explain the architectural rationale: WHY the system is organized this way, WHY certain subsystems are cross-cutting, and WHY some subsystems have unusual structures.
$ { seedContent ? ` \n REFERENCE DOCUMENTATION (use this as authoritative context — incorporate the layered architecture model, hub/spoke deployment pattern, multi-cloud strategy, naming conventions, CIDR allocation, ArgoCD ownership model, release patterns, and any other architectural details from this content): \n ${ seedContent } \n ` : '' }
2026-03-09 18:44:19 +00:00
Total subsystems : $ { subsystems . length }
Cross - cutting concerns : $ { crossCutting . join ( ', ' ) || 'none detected' }
feat: confluence benchmark, pattern extractor, agent KB, UX spec
- extract-patterns.js: mines layered arch, ArgoCD appsets, cloud regions,
CIDR allocations, naming conventions, sync waves, tech stack from code
- agent-kb.js: token-efficient JSON rendering of same doc tree
- eval-confluence-ref-questions.json: 32 reference-only benchmark questions
- wiggum-v2.sh: Ralph Wiggum loop targeting confluence baseline (77.8%)
- docs/human-ux-spec.md: BMad UX designer spec for human doc structure
- Eval results: V2 at 28.7% vs confluence 77.8% baseline
- Hub/spoke ownership now correctly extracted (95% on that question)
- Naming conventions, regions, CIDRs surfaced in system-architecture.md
2026-03-10 14:20:35 +00:00
Subsystems :
2026-03-09 18:44:19 +00:00
$ { subList }
feat: confluence benchmark, pattern extractor, agent KB, UX spec
- extract-patterns.js: mines layered arch, ArgoCD appsets, cloud regions,
CIDR allocations, naming conventions, sync waves, tech stack from code
- agent-kb.js: token-efficient JSON rendering of same doc tree
- eval-confluence-ref-questions.json: 32 reference-only benchmark questions
- wiggum-v2.sh: Ralph Wiggum loop targeting confluence baseline (77.8%)
- docs/human-ux-spec.md: BMad UX designer spec for human doc structure
- Eval results: V2 at 28.7% vs confluence 77.8% baseline
- Hub/spoke ownership now correctly extracted (95% on that question)
- Naming conventions, regions, CIDRs surfaced in system-architecture.md
2026-03-10 14:20:35 +00:00
$ { anomalySummary ? ` Structural anomalies: \n ${ anomalySummary } ` : '' }
Write ONLY the overview paragraph , no heading . Focus on explaining the architecture , not just listing components . ` ;
2026-03-09 18:44:19 +00:00
feat: confluence benchmark, pattern extractor, agent KB, UX spec
- extract-patterns.js: mines layered arch, ArgoCD appsets, cloud regions,
CIDR allocations, naming conventions, sync waves, tech stack from code
- agent-kb.js: token-efficient JSON rendering of same doc tree
- eval-confluence-ref-questions.json: 32 reference-only benchmark questions
- wiggum-v2.sh: Ralph Wiggum loop targeting confluence baseline (77.8%)
- docs/human-ux-spec.md: BMad UX designer spec for human doc structure
- Eval results: V2 at 28.7% vs confluence 77.8% baseline
- Hub/spoke ownership now correctly extracted (95% on that question)
- Naming conventions, regions, CIDRs surfaced in system-architecture.md
2026-03-10 14:20:35 +00:00
return callLLM ( prompt , { ... llmOpts , maxTokens : 1536 } ) ;
2026-03-09 18:44:19 +00:00
}
feat: repo-agnostic refactor (BMad spec-test-build loop)
- NEW: repo-profiler.js — deterministic archetype detection (Infra, Frontend, Backend, etc.)
- NEW: extract-dynamic.js — generic extractor replacing hardcoded Foxtrot patterns
- NEW: eval-generator.js — dynamic ground-truth question generation from any repo graph
- NEW: specs/bmad-agnostic-refactor-spec.md — full BMad spec with acceptance criteria
- REFACTORED: prose.js — two-pass LLM synthesis with rich context (shared secrets, ports, service refs)
- REFACTORED: sysdoc.js — wired repo-profiler + extract-dynamic, --legacy escape hatch
- REFACTORED: wiggum-v2.sh — uses eval-generator before benchmarks
- FIXED: graph.js — _edgeSet rebuilt on loadSnapshot() (edge dedup was broken)
- FIXED: graph.js — recursive sortKeys() for deep equality in diffing
- FIXED: prose.js — robust JSON array extraction from LLM output
- FIXED: ratchet.js — syntax validation (node --check) before saving LLM mutations
- FIXED: extract-dynamic.js — centralized state services regex, added console.warn for silent failures
- TESTS: test-eval-generator, test-repo-profiler, test-synthesis-quality + mock fixtures
Eval: 81.5% on Foxtrot (fully repo-agnostic, no hardcoded reference pages)
BMad reviews: Architect B+, Dev Lead B-, TEA B-
2026-03-11 14:40:31 +00:00
/ * *
* Synthesize generic reference pages using the extracted facts .
* /
async function synthesizeReferencePages ( agentKB , deepData , outDir , archetype , llmOpts ) {
const fs = require ( 'fs' ) ;
const path = require ( 'path' ) ;
console . log ( ` Synthesizing dynamic reference pages via LLM for archetype: ${ archetype } ... ` ) ;
const refDir = path . join ( outDir , 'reference' ) ;
if ( ! fs . existsSync ( refDir ) ) fs . mkdirSync ( refDir , { recursive : true } ) ;
// Build rich context from agentKB and deepData for synthesis
const kb = agentKB || { } ;
const dd = deepData || { } ;
// Extract helm interaction details from agentKB structure
const rawCharts = ( kb . reference && kb . reference . helm && kb . reference . helm . charts ) || kb . charts || [ ] ;
const helmCharts = rawCharts . map ( c => ( {
name : c . name , dir : c . path || c . dir , version : c . version , appVersion : c . appVersion ,
deps : c . dependencies || [ ] ,
resourceCount : c . resourceCount || 0 ,
valuesCount : ( c . valuesKeys || [ ] ) . length || c . valuesCount || 0 ,
interactions : c . interactions || [ ]
} ) ) ;
// Shared secrets/configmaps
const configUsers = { } ;
for ( const c of helmCharts ) {
for ( const i of c . interactions ) {
if ( i . type === 'config-ref' ) {
if ( ! configUsers [ i . target ] ) configUsers [ i . target ] = [ ] ;
configUsers [ i . target ] . push ( c . name ) ;
}
}
}
const sharedSecrets = Object . entries ( configUsers )
. filter ( ( [ , users ] ) => users . length > 1 )
. map ( ( [ name , users ] ) => ` ${ name } : ${ [ ... new Set ( users ) ] . join ( ', ' ) } ` ) ;
// Service-to-service refs
const svcRefs = [ ] ;
for ( const c of helmCharts ) {
for ( const i of c . interactions ) {
if ( i . type === 'k8s-service' ) svcRefs . push ( ` ${ c . name } → ${ i . target } ` ) ;
}
}
// Shared ports
const portMap = { } ;
for ( const c of helmCharts ) {
for ( const i of c . interactions ) {
if ( i . type === 'port' && i . target !== '0' ) {
if ( ! portMap [ i . target ] ) portMap [ i . target ] = [ ] ;
if ( ! portMap [ i . target ] . includes ( c . name ) ) portMap [ i . target ] . push ( c . name ) ;
}
}
}
const sharedPorts = Object . entries ( portMap )
. filter ( ( [ , users ] ) => users . length > 1 )
. map ( ( [ port , users ] ) => ` Port ${ port } : ${ users . join ( ', ' ) } ` ) ;
// Resource type breakdown
const kindCounts = { } ;
for ( const c of helmCharts ) {
for ( const i of c . interactions ) {
if ( i . type === 'resource-kind' ) {
kindCounts [ i . target ] = ( kindCounts [ i . target ] || 0 ) + 1 ;
}
}
}
// Subsystem summary from agentKB structure
const rawSubs = ( kb . reference && kb . reference . subsystems ) || kb . subsystems || [ ] ;
const subsystems = rawSubs . map ( s => ( {
name : s . name ,
files : Array . isArray ( s . files ) ? s . files . length : ( s . fileCount || s . files || 0 ) ,
functions : ( s . entities && s . entities . functions ) || s . functions || 0 ,
modules : ( s . entities && s . entities . modules ) || s . modules || 0
} ) ) ;
const contextStr = `
EXTRACTED SYSTEM FACTS :
# # Subsystems ( $ { subsystems . length } total )
$ { subsystems . map ( s => ` - ${ s . name } : ${ s . files } files, ${ s . functions } functions, ${ s . modules } modules ` ) . join ( '\n' ) }
# # Helm Charts ( $ { helmCharts . length } total )
$ { helmCharts . slice ( 0 , 30 ) . map ( c => ` - ${ c . name } ( ${ c . dir } ): v ${ c . version } , appVersion= ${ c . appVersion } , ${ c . resourceCount } K8s resources, ${ c . valuesCount } config keys, deps=[ ${ c . deps . join ( ',' ) } ] ` ) . join ( '\n' ) }
$ { helmCharts . length > 30 ? ` ... and ${ helmCharts . length - 30 } more charts ` : '' }
# # Shared Secrets & ConfigMaps ( used by multiple charts )
$ { sharedSecrets . length > 0 ? sharedSecrets . join ( '\n' ) : 'None detected' }
# # Service - to - Service References
$ { svcRefs . length > 0 ? svcRefs . join ( '\n' ) : 'None detected' }
# # Shared Network Ports ( used by multiple charts )
$ { sharedPorts . length > 0 ? sharedPorts . join ( '\n' ) : 'None detected' }
# # K8s Resource Types
$ { Object . entries ( kindCounts ) . sort ( ( a , b ) => b [ 1 ] - a [ 1 ] ) . slice ( 0 , 15 ) . map ( ( [ k , v ] ) => ` - ${ k } : ${ v } ` ) . join ( '\n' ) || 'See individual chart docs' }
# # Deep Extraction Data
$ { JSON . stringify ( dd ) . substring ( 0 , 4000 ) }
` ;
const pagePrompt = ` You are a Senior Technical Writer analyzing a repository with the archetype: " ${ archetype } ".
Given these extracted facts and this repo archetype , what 5 reference pages should be created ?
IMPORTANT : You MUST include pages that cover ALL of the following topics ( spread across the 5 pages ) :
- Shared secrets / ConfigMaps and which charts use them
- Service - to - service references between charts
- Network ports used by charts ( especially shared ports )
- Kubernetes resource types generated across charts
- Chart dependencies and versions
- Subsystem architecture and cross - cutting concerns
$ { contextStr }
Respond with ONLY a valid JSON array of objects . Each object must have :
- "title" : The human - readable title of the page
- "filename" : The markdown filename ( e . g . "network-architecture.md" )
- "focus" : A brief description of what to focus on in this page .
Example for Infrastructure :
[
{ "title" : "Service Contracts & Interactions" , "filename" : "service-contracts.md" , "focus" : "Shared secrets, ConfigMaps, service-to-service references, and network ports across charts" } ,
{ "title" : "Helm Charts & Dependencies" , "filename" : "helm-charts-dependencies.md" , "focus" : "Chart versions, dependencies, and configuration surface" }
]
` ;
let pagesJson = '[]' ;
try {
pagesJson = await module . exports . callLLM ( pagePrompt , { ... llmOpts , maxTokens : 1000 } ) ;
// basic cleanup in case the LLM returned markdown blocks
const match = pagesJson . match ( /\[[\s\S]*\]/ ) ;
if ( match ) pagesJson = match [ 0 ] ;
} catch ( e ) {
console . error ( 'Failed to get page definitions from LLM:' , e ) ;
}
let pages = [ ] ;
try {
pages = JSON . parse ( pagesJson ) ;
} catch ( e ) {
console . error ( 'Failed to parse pages JSON:' , pagesJson ) ;
pages = [
{ title : 'System Overview' , filename : 'overview.md' , focus : 'General facts' }
] ;
}
const generatedFiles = [ ] ;
for ( const page of pages ) {
const pagePrompt = ` You are a Senior Technical Writer. Generate a " ${ page . title } " reference page in Markdown for a " ${ archetype } " repository.
Focus on : $ { page . focus }
CRITICAL INSTRUCTIONS :
- Include ALL specific data points from the extracted facts below . Do not summarize or omit details .
- List every shared secret / ConfigMap with the exact chart names that use it .
- List every service - to - service reference with source and target .
- List every shared network port with the exact chart names .
- List Kubernetes resource types with counts .
- List chart versions and appVersions .
- Use tables and bullet lists for data - dense sections .
- Do NOT invent facts . Only use what is in the extracted data below .
$ { contextStr }
Respond with ONLY the Markdown content . Use # $ { page . title } as the main title . ` ;
const content = await module . exports . callLLM ( pagePrompt , { ... llmOpts , maxTokens : 4000 , title : page . title } ) ;
const filename = page . filename . endsWith ( '.md' ) ? page . filename : ` ${ page . filename } .md ` ;
fs . writeFileSync ( path . join ( refDir , filename ) , content ) ;
generatedFiles . push ( { filename , title : page . title , focus : page . focus } ) ;
}
// Generate Index
const indexPrompt = ` You are a Senior Technical Writer. Create a "reference/index.md" routing table.
I have generated the following files for this $ { archetype } repository :
$ { generatedFiles . map ( f => ` - \` reference/ ${ f . filename } \` ( ${ f . focus } ) ` ) . join ( '\n' ) }
Create a markdown page with two sections :
# # Quick Lookup by Topic
( A table mapping specific topics / keywords to the exact file path )
# # File Descriptions
( A table describing what is in each file )
Respond with ONLY the Markdown content . ` ;
const indexMd = await module . exports . callLLM ( indexPrompt , { ... llmOpts , maxTokens : 1500 } ) ;
fs . writeFileSync ( path . join ( refDir , 'index.md' ) , indexMd ) ;
console . log ( ` Dynamic reference pages and index synthesized for ${ archetype } . ` ) ;
}
module . exports = { callLLM , describeSubsystem , describeFlow , describeContract , describeArchitecture , detectAnomalies , loadConfluenceContext , findRelevantContext , synthesizeReferencePages } ;