const http = require('http'); const https = require('https'); const fs = require('fs'); const path = require('path'); /** * Phase 6+7: LLM Prose Generator * Generates human-readable prose for system documentation using Claude Sonnet. * All structural analysis is deterministic — LLM is ONLY for prose formatting. */ const DEFAULT_URL = process.env.LLM_URL || 'http://192.168.86.11:8000/v1'; const DEFAULT_MODEL = process.env.LLM_MODEL || 'claude-sonnet-4.6'; const DEFAULT_API_KEY = process.env.LLM_API_KEY || 'my-super-secret-password-123'; /** * Load Confluence reference + explanation docs as seed context. * Returns a map of { topic → content } for injection into LLM prompts. */ function loadConfluenceContext(confluenceDir) { if (!confluenceDir || !fs.existsSync(confluenceDir)) return {}; const ctx = {}; for (const section of ['reference', 'explanation']) { const dir = path.join(confluenceDir, section); if (!fs.existsSync(dir)) continue; for (const f of fs.readdirSync(dir).filter(f => f.endsWith('.md'))) { const key = f.replace('.md', ''); const content = fs.readFileSync(path.join(dir, f), 'utf8').trim(); if (content.length > 0) ctx[key] = content; } } return ctx; } /** * Find relevant confluence docs for a given topic by keyword matching. * Returns concatenated content, capped at maxChars. */ function findRelevantContext(confluenceCtx, keywords, maxChars = 12000) { if (!confluenceCtx || Object.keys(confluenceCtx).length === 0) return ''; const scored = Object.entries(confluenceCtx).map(([key, content]) => { let score = 0; const lowerKey = key.toLowerCase(); const lowerContent = content.toLowerCase().substring(0, 2000); for (const kw of keywords) { const lkw = kw.toLowerCase(); if (lowerKey.includes(lkw)) score += 10; const matches = (lowerContent.match(new RegExp(lkw, 'g')) || []).length; score += Math.min(matches, 5); } return { key, content, score }; }).filter(s => s.score > 0).sort((a, b) => b.score - a.score); let result = ''; for (const s of scored) { if (result.length + s.content.length > maxChars) { const remaining = maxChars - result.length; if (remaining > 200) result += `\n\n--- ${s.key} ---\n${s.content.substring(0, remaining)}...\n`; break; } result += `\n\n--- ${s.key} ---\n${s.content}\n`; } return result; } /** * Call an OpenAI-compatible chat completions API. */ function callLLM(prompt, opts = {}) { const baseUrl = opts.url || DEFAULT_URL; const model = opts.model || DEFAULT_MODEL; const apiKey = opts.apiKey || DEFAULT_API_KEY; const maxTokens = opts.maxTokens || 1024; const temperature = opts.temperature || 0.3; return new Promise((resolve, reject) => { const url = new URL('/v1/chat/completions', baseUrl.replace(/\/v1\/?$/, '')); const body = JSON.stringify({ model, messages: [ { role: 'system', content: 'You are a senior software architect writing concise, precise technical documentation. Write in present tense. Be specific about domain logic, not syntax. No filler.' }, { role: 'user', content: prompt }, ], max_tokens: maxTokens, temperature, }); const client = url.protocol === 'https:' ? https : http; const req = client.request(url, { method: 'POST', headers: { 'Content-Type': 'application/json', 'Authorization': `Bearer ${apiKey}` }, }, (res) => { let data = ''; res.on('data', c => data += c); res.on('end', () => { try { if (res.statusCode >= 400) { console.error('LLM API Error:', res.statusCode, data); return resolve(''); } const parsed = JSON.parse(data); resolve(parsed.choices?.[0]?.message?.content || ''); } catch (e) { reject(new Error(`LLM parse error: ${e.message} — raw: ${data.substring(0, 200)}`)); } }); }); req.on('error', reject); req.setTimeout(120000, () => { req.destroy(); reject(new Error('LLM timeout (120s)')); }); req.write(body); req.end(); }); } /** * Detect structural anomalies in a subsystem. */ function detectAnomalies(sub, deps) { const anomalies = []; if (sub.entities.functions === 0 && sub.files.length > 5) { anomalies.push(`Zero functions despite ${sub.files.length} files — likely a configuration-only or IaC subsystem`); } if (sub.entities.classes === 0 && sub.entities.functions > 50) { anomalies.push(`${sub.entities.functions} functions with no classes — procedural/script-heavy architecture`); } // Fan-in/fan-out analysis const outgoing = Object.entries(deps).filter(([k]) => k.startsWith(sub.name + '→')); const incoming = Object.entries(deps).filter(([k]) => k.endsWith('→' + sub.name)); if (outgoing.length > 5) { anomalies.push(`High fan-out: depends on ${outgoing.length} other subsystems — potential orchestrator or integration layer`); } if (incoming.length > 5) { anomalies.push(`High fan-in: ${incoming.length} subsystems depend on this — likely a shared library or core service`); } if (outgoing.length === 0 && incoming.length === 0 && sub.files.length > 3) { anomalies.push(`Isolated subsystem with no cross-subsystem dependencies — may be self-contained tooling or unused`); } return anomalies; } /** * Generate an explanatory prose overview for a subsystem. * Includes dependency rationale and anomaly explanations. */ async function describeSubsystem(sub, deps, llmOpts) { const outgoing = Object.entries(deps) .filter(([k]) => k.startsWith(sub.name + '→')) .map(([k, v]) => ({ target: k.split('→')[1], calls: v.calls, imports: v.imports })); const incoming = Object.entries(deps) .filter(([k]) => k.endsWith('→' + sub.name)) .map(([k, v]) => ({ source: k.split('→')[0], calls: v.calls, imports: v.imports })); const anomalies = detectAnomalies(sub, deps); const depContext = []; if (outgoing.length > 0) { depContext.push(`Depends on: ${outgoing.map(d => `${d.target} (${d.calls} calls, ${d.imports} imports)`).join(', ')}`); } if (incoming.length > 0) { depContext.push(`Depended on by: ${incoming.map(d => `${d.source} (${d.calls} calls, ${d.imports} imports)`).join(', ')}`); } // Confluence seed context const confluenceCtx = llmOpts.confluenceCtx || {}; const seedContent = findRelevantContext(confluenceCtx, [sub.name, sub.name.replace(/-/g, ' '), sub.kind], 8000); const prompt = `Write a 3-5 sentence technical overview of the "${sub.name}" subsystem. You MUST explain WHY it depends on its upstream subsystems and WHY downstream subsystems depend on it. If there are structural anomalies, explain their architectural rationale. ${seedContent ? `\nREFERENCE DOCUMENTATION (use this as authoritative context — incorporate key architectural details, naming conventions, deployment patterns, and design rationale from this content):\n${seedContent}\n` : ''} Facts: - Kind: ${sub.kind} - Files: ${sub.files.length} - Functions: ${sub.entities.functions}, Classes: ${sub.entities.classes}, Modules: ${sub.entities.modules} - Public exports: ${sub.publicExports.slice(0, 15).join(', ')}${sub.publicExports.length > 15 ? ` (+${sub.publicExports.length - 15} more)` : ''} ${depContext.length > 0 ? `- Dependency matrix:\n ${depContext.join('\n ')}` : '- No cross-subsystem dependencies (explain why this subsystem is self-contained)'} ${anomalies.length > 0 ? `- Structural anomalies:\n ${anomalies.join('\n ')}` : ''} Write ONLY the overview paragraph, no heading. Focus on architectural rationale, not just listing components.`; return callLLM(prompt, llmOpts); } /** * Generate a prose narrative for a data flow trace. */ async function describeFlow(flowResult, llmOpts) { const steps = flowResult.flow.slice(0, 20).map((s, i) => `${i + 1}. [${s.subsystem}] ${s.entity}${s.crossedVia ? ` (crosses via ${s.crossedVia})` : ''}` ).join('\n'); const prompt = `Write a 3-5 sentence narrative describing this data flow through the system. Entry point: ${flowResult.entryPoint} Subsystem sequence: ${flowResult.subsystemSequence.join(' → ')} ${flowResult.excludedNodes.length > 0 ? `Excluded (high fan-in): ${flowResult.excludedNodes.slice(0, 5).join(', ')}` : ''} ${flowResult.cyclesDetected.length > 0 ? `Cycles detected: ${flowResult.cyclesDetected.length}` : ''} Steps: ${steps}${flowResult.flow.length > 20 ? `\n... (+${flowResult.flow.length - 20} more steps)` : ''} Write ONLY the narrative paragraph, no heading. Explain what happens when this entry point is triggered and how data moves across subsystem boundaries.`; return callLLM(prompt, llmOpts); } /** * Generate a prose description for a contract (interface/type/enum). */ async function describeContract(contract, xref, llmOpts) { const usedBy = xref?.[contract.name]?.usedBy || []; let details = ''; if (contract.type === 'Interface' && contract.fields) { details = `Fields: ${contract.fields.map(f => `${f.name}: ${f.type}`).join(', ')}`; if (contract.extends) details += `\nExtends: ${contract.extends.join(', ')}`; } else if (contract.type === 'Enum' && contract.members) { details = `Members: ${contract.members.join(', ')}`; } else if (contract.type.startsWith('Helm')) { // Helm contract types if (contract.fields) { details = `Fields: ${contract.fields.slice(0, 20).map(f => `${f.name}: ${f.type}`).join(', ')}`; if (contract.fields.length > 20) details += ` (+${contract.fields.length - 20} more)`; } } const typeLabel = contract.type.startsWith('Helm') ? `Helm ${contract.type.replace('Helm', '').toLowerCase()} contract` : `TypeScript ${contract.type.toLowerCase()}`; const prompt = `Write a 1-2 sentence description of this ${typeLabel}. Name: ${contract.name} Type: ${contract.type} Defined in: ${contract.id} Visibility: ${contract.visibility} ${details} ${usedBy.length > 0 ? `Used by subsystems: ${usedBy.join(', ')}` : 'Not referenced cross-subsystem'} Write ONLY the description, no heading. Do not ask for more information.`; return callLLM(prompt, { ...llmOpts, maxTokens: 256 }); } /** * Generate a system-level architecture overview with cross-cutting explanations. */ async function describeArchitecture(subsystems, crossCutting, stats, llmOpts, opts = {}) { const deps = opts.deps || {}; const confluenceCtx = llmOpts.confluenceCtx || opts.confluenceCtx || {}; // Pull in the most relevant architecture docs const seedContent = findRelevantContext(confluenceCtx, [ 'system-architecture', 'architecture', 'platform-concepts', 'design-decisions', 'technology-choices', 'multi-cloud', 'hub', 'spoke', 'layered', 'argocd', 'repository-structure', 'naming-conventions', 'release-process' ], 15000); const subList = subsystems.slice(0, 20).map(s => { const outDeps = Object.entries(deps) .filter(([k]) => k.startsWith(s.name + '→')) .map(([k]) => k.split('→')[1]); return `- ${s.name} (${s.kind}): ${s.entities.functions} functions, ${s.files.length} files${outDeps.length > 0 ? `, depends on: ${outDeps.join(', ')}` : ''}`; }).join('\n'); const anomalySummary = subsystems .map(s => { const a = detectAnomalies(s, deps); return a.length > 0 ? `- ${s.name}: ${a[0]}` : null; }) .filter(Boolean) .slice(0, 5) .join('\n'); const prompt = `Write a 5-8 sentence architecture overview for this software system. Explain the architectural rationale: WHY the system is organized this way, WHY certain subsystems are cross-cutting, and WHY some subsystems have unusual structures. ${seedContent ? `\nREFERENCE DOCUMENTATION (use this as authoritative context — incorporate the layered architecture model, hub/spoke deployment pattern, multi-cloud strategy, naming conventions, CIDR allocation, ArgoCD ownership model, release patterns, and any other architectural details from this content):\n${seedContent}\n` : ''} Total subsystems: ${subsystems.length} Cross-cutting concerns: ${crossCutting.join(', ') || 'none detected'} Subsystems: ${subList} ${anomalySummary ? `Structural anomalies:\n${anomalySummary}` : ''} Write ONLY the overview paragraph, no heading. Focus on explaining the architecture, not just listing components.`; return callLLM(prompt, { ...llmOpts, maxTokens: 1536 }); } /** * Synthesize generic reference pages using the extracted facts. */ async function synthesizeReferencePages(agentKB, deepData, outDir, archetype, llmOpts) { const fs = require('fs'); const path = require('path'); console.log(`Synthesizing dynamic reference pages via LLM for archetype: ${archetype}...`); const refDir = path.join(outDir, 'reference'); if (!fs.existsSync(refDir)) fs.mkdirSync(refDir, { recursive: true }); // Build rich context from agentKB and deepData for synthesis const kb = agentKB || {}; const dd = deepData || {}; // Extract helm interaction details from agentKB structure const rawCharts = (kb.reference && kb.reference.helm && kb.reference.helm.charts) || kb.charts || []; const helmCharts = rawCharts.map(c => ({ name: c.name, dir: c.path || c.dir, version: c.version, appVersion: c.appVersion, deps: c.dependencies || [], resourceCount: c.resourceCount || 0, valuesCount: (c.valuesKeys || []).length || c.valuesCount || 0, interactions: c.interactions || [] })); // Shared secrets/configmaps const configUsers = {}; for (const c of helmCharts) { for (const i of c.interactions) { if (i.type === 'config-ref') { if (!configUsers[i.target]) configUsers[i.target] = []; configUsers[i.target].push(c.name); } } } const sharedSecrets = Object.entries(configUsers) .filter(([, users]) => users.length > 1) .map(([name, users]) => `${name}: ${[...new Set(users)].join(', ')}`); // Service-to-service refs const svcRefs = []; for (const c of helmCharts) { for (const i of c.interactions) { if (i.type === 'k8s-service') svcRefs.push(`${c.name} → ${i.target}`); } } // Shared ports const portMap = {}; for (const c of helmCharts) { for (const i of c.interactions) { if (i.type === 'port' && i.target !== '0') { if (!portMap[i.target]) portMap[i.target] = []; if (!portMap[i.target].includes(c.name)) portMap[i.target].push(c.name); } } } const sharedPorts = Object.entries(portMap) .filter(([, users]) => users.length > 1) .map(([port, users]) => `Port ${port}: ${users.join(', ')}`); // Resource type breakdown const kindCounts = {}; for (const c of helmCharts) { for (const i of c.interactions) { if (i.type === 'resource-kind') { kindCounts[i.target] = (kindCounts[i.target] || 0) + 1; } } } // Subsystem summary from agentKB structure const rawSubs = (kb.reference && kb.reference.subsystems) || kb.subsystems || []; const subsystems = rawSubs.map(s => ({ name: s.name, files: Array.isArray(s.files) ? s.files.length : (s.fileCount || s.files || 0), functions: (s.entities && s.entities.functions) || s.functions || 0, modules: (s.entities && s.entities.modules) || s.modules || 0 })); const contextStr = ` EXTRACTED SYSTEM FACTS: ## Subsystems (${subsystems.length} total) ${subsystems.map(s => `- ${s.name}: ${s.files} files, ${s.functions} functions, ${s.modules} modules`).join('\n')} ## Helm Charts (${helmCharts.length} total) ${helmCharts.slice(0, 30).map(c => `- ${c.name} (${c.dir}): v${c.version}, appVersion=${c.appVersion}, ${c.resourceCount} K8s resources, ${c.valuesCount} config keys, deps=[${c.deps.join(',')}]`).join('\n')} ${helmCharts.length > 30 ? `... and ${helmCharts.length - 30} more charts` : ''} ## Shared Secrets & ConfigMaps (used by multiple charts) ${sharedSecrets.length > 0 ? sharedSecrets.join('\n') : 'None detected'} ## Service-to-Service References ${svcRefs.length > 0 ? svcRefs.join('\n') : 'None detected'} ## Shared Network Ports (used by multiple charts) ${sharedPorts.length > 0 ? sharedPorts.join('\n') : 'None detected'} ## K8s Resource Types ${Object.entries(kindCounts).sort((a,b) => b[1]-a[1]).slice(0,15).map(([k,v]) => `- ${k}: ${v}`).join('\n') || 'See individual chart docs'} ## Deep Extraction Data ${JSON.stringify(dd).substring(0, 4000)} `; const pagePrompt = `You are a Senior Technical Writer analyzing a repository with the archetype: "${archetype}". Given these extracted facts and this repo archetype, what 5 reference pages should be created? IMPORTANT: You MUST include pages that cover ALL of the following topics (spread across the 5 pages): - Shared secrets/ConfigMaps and which charts use them - Service-to-service references between charts - Network ports used by charts (especially shared ports) - Kubernetes resource types generated across charts - Chart dependencies and versions - Subsystem architecture and cross-cutting concerns ${contextStr} Respond with ONLY a valid JSON array of objects. Each object must have: - "title": The human-readable title of the page - "filename": The markdown filename (e.g. "network-architecture.md") - "focus": A brief description of what to focus on in this page. Example for Infrastructure: [ { "title": "Service Contracts & Interactions", "filename": "service-contracts.md", "focus": "Shared secrets, ConfigMaps, service-to-service references, and network ports across charts" }, { "title": "Helm Charts & Dependencies", "filename": "helm-charts-dependencies.md", "focus": "Chart versions, dependencies, and configuration surface" } ] `; let pagesJson = '[]'; try { pagesJson = await module.exports.callLLM(pagePrompt, { ...llmOpts, maxTokens: 1000 }); // basic cleanup in case the LLM returned markdown blocks const match = pagesJson.match(/\[[\s\S]*\]/); if (match) pagesJson = match[0]; } catch (e) { console.error('Failed to get page definitions from LLM:', e); } let pages = []; try { pages = JSON.parse(pagesJson); } catch (e) { console.error('Failed to parse pages JSON:', pagesJson); pages = [ { title: 'System Overview', filename: 'overview.md', focus: 'General facts' } ]; } const generatedFiles = []; for (const page of pages) { const pagePrompt = `You are a Senior Technical Writer. Generate a "${page.title}" reference page in Markdown for a "${archetype}" repository. Focus on: ${page.focus} CRITICAL INSTRUCTIONS: - Include ALL specific data points from the extracted facts below. Do not summarize or omit details. - List every shared secret/ConfigMap with the exact chart names that use it. - List every service-to-service reference with source and target. - List every shared network port with the exact chart names. - List Kubernetes resource types with counts. - List chart versions and appVersions. - Use tables and bullet lists for data-dense sections. - Do NOT invent facts. Only use what is in the extracted data below. ${contextStr} Respond with ONLY the Markdown content. Use # ${page.title} as the main title.`; const content = await module.exports.callLLM(pagePrompt, { ...llmOpts, maxTokens: 4000, title: page.title }); const filename = page.filename.endsWith('.md') ? page.filename : `${page.filename}.md`; fs.writeFileSync(path.join(refDir, filename), content); generatedFiles.push({ filename, title: page.title, focus: page.focus }); } // Generate Index const indexPrompt = `You are a Senior Technical Writer. Create a "reference/index.md" routing table. I have generated the following files for this ${archetype} repository: ${generatedFiles.map(f => `- \`reference/${f.filename}\` (${f.focus})`).join('\n')} Create a markdown page with two sections: ## Quick Lookup by Topic (A table mapping specific topics/keywords to the exact file path) ## File Descriptions (A table describing what is in each file) Respond with ONLY the Markdown content.`; const indexMd = await module.exports.callLLM(indexPrompt, { ...llmOpts, maxTokens: 1500 }); fs.writeFileSync(path.join(refDir, 'index.md'), indexMd); console.log(`Dynamic reference pages and index synthesized for ${archetype}.`); } module.exports = { callLLM, describeSubsystem, describeFlow, describeContract, describeArchitecture, detectAnomalies, loadConfluenceContext, findRelevantContext, synthesizeReferencePages };