const http = require('http'); const https = require('https'); const fs = require('fs'); const path = require('path'); /** * Phase 6+7: LLM Prose Generator * Generates human-readable prose for system documentation using Claude Sonnet. * All structural analysis is deterministic — LLM is ONLY for prose formatting. */ const DEFAULT_URL = process.env.LLM_URL || 'http://192.168.86.11:8000/v1'; const DEFAULT_MODEL = process.env.LLM_MODEL || 'claude-sonnet-4.6'; const DEFAULT_API_KEY = process.env.LLM_API_KEY || 'my-super-secret-password-123'; /** * Load Confluence reference + explanation docs as seed context. * Returns a map of { topic → content } for injection into LLM prompts. */ function loadConfluenceContext(confluenceDir) { if (!confluenceDir || !fs.existsSync(confluenceDir)) return {}; const ctx = {}; for (const section of ['reference', 'explanation']) { const dir = path.join(confluenceDir, section); if (!fs.existsSync(dir)) continue; for (const f of fs.readdirSync(dir).filter(f => f.endsWith('.md'))) { const key = f.replace('.md', ''); const content = fs.readFileSync(path.join(dir, f), 'utf8').trim(); if (content.length > 0) ctx[key] = content; } } return ctx; } /** * Find relevant confluence docs for a given topic by keyword matching. * Returns concatenated content, capped at maxChars. */ function findRelevantContext(confluenceCtx, keywords, maxChars = 12000) { if (!confluenceCtx || Object.keys(confluenceCtx).length === 0) return ''; const scored = Object.entries(confluenceCtx).map(([key, content]) => { let score = 0; const lowerKey = key.toLowerCase(); const lowerContent = content.toLowerCase().substring(0, 2000); for (const kw of keywords) { const lkw = kw.toLowerCase(); if (lowerKey.includes(lkw)) score += 10; const matches = (lowerContent.match(new RegExp(lkw, 'g')) || []).length; score += Math.min(matches, 5); } return { key, content, score }; }).filter(s => s.score > 0).sort((a, b) => b.score - a.score); let result = ''; for (const s of scored) { if (result.length + s.content.length > maxChars) { const remaining = maxChars - result.length; if (remaining > 200) result += `\n\n--- ${s.key} ---\n${s.content.substring(0, remaining)}...\n`; break; } result += `\n\n--- ${s.key} ---\n${s.content}\n`; } return result; } /** * Call an OpenAI-compatible chat completions API. */ function callLLM(prompt, opts = {}) { const baseUrl = opts.url || DEFAULT_URL; const model = opts.model || DEFAULT_MODEL; const apiKey = opts.apiKey || DEFAULT_API_KEY; const maxTokens = opts.maxTokens || 1024; const temperature = opts.temperature || 0.3; return new Promise((resolve, reject) => { const url = new URL('/v1/chat/completions', baseUrl.replace(/\/v1\/?$/, '')); const body = JSON.stringify({ model, messages: [ { role: 'system', content: 'You are a senior software architect writing concise, precise technical documentation. Write in present tense. Be specific about domain logic, not syntax. No filler.' }, { role: 'user', content: prompt }, ], max_tokens: maxTokens, temperature, }); const client = url.protocol === 'https:' ? https : http; const req = client.request(url, { method: 'POST', headers: { 'Content-Type': 'application/json', 'Authorization': `Bearer ${apiKey}` }, }, (res) => { let data = ''; res.on('data', c => data += c); res.on('end', () => { try { const parsed = JSON.parse(data); resolve(parsed.choices?.[0]?.message?.content || ''); } catch (e) { reject(new Error(`LLM parse error: ${e.message} — raw: ${data.substring(0, 200)}`)); } }); }); req.on('error', reject); req.setTimeout(120000, () => { req.destroy(); reject(new Error('LLM timeout (120s)')); }); req.write(body); req.end(); }); } /** * Detect structural anomalies in a subsystem. */ function detectAnomalies(sub, deps) { const anomalies = []; if (sub.entities.functions === 0 && sub.files.length > 5) { anomalies.push(`Zero functions despite ${sub.files.length} files — likely a configuration-only or IaC subsystem`); } if (sub.entities.classes === 0 && sub.entities.functions > 50) { anomalies.push(`${sub.entities.functions} functions with no classes — procedural/script-heavy architecture`); } // Fan-in/fan-out analysis const outgoing = Object.entries(deps).filter(([k]) => k.startsWith(sub.name + '→')); const incoming = Object.entries(deps).filter(([k]) => k.endsWith('→' + sub.name)); if (outgoing.length > 5) { anomalies.push(`High fan-out: depends on ${outgoing.length} other subsystems — potential orchestrator or integration layer`); } if (incoming.length > 5) { anomalies.push(`High fan-in: ${incoming.length} subsystems depend on this — likely a shared library or core service`); } if (outgoing.length === 0 && incoming.length === 0 && sub.files.length > 3) { anomalies.push(`Isolated subsystem with no cross-subsystem dependencies — may be self-contained tooling or unused`); } return anomalies; } /** * Generate an explanatory prose overview for a subsystem. * Includes dependency rationale and anomaly explanations. */ async function describeSubsystem(sub, deps, llmOpts) { const outgoing = Object.entries(deps) .filter(([k]) => k.startsWith(sub.name + '→')) .map(([k, v]) => ({ target: k.split('→')[1], calls: v.calls, imports: v.imports })); const incoming = Object.entries(deps) .filter(([k]) => k.endsWith('→' + sub.name)) .map(([k, v]) => ({ source: k.split('→')[0], calls: v.calls, imports: v.imports })); const anomalies = detectAnomalies(sub, deps); const depContext = []; if (outgoing.length > 0) { depContext.push(`Depends on: ${outgoing.map(d => `${d.target} (${d.calls} calls, ${d.imports} imports)`).join(', ')}`); } if (incoming.length > 0) { depContext.push(`Depended on by: ${incoming.map(d => `${d.source} (${d.calls} calls, ${d.imports} imports)`).join(', ')}`); } // Confluence seed context const confluenceCtx = llmOpts.confluenceCtx || {}; const seedContent = findRelevantContext(confluenceCtx, [sub.name, sub.name.replace(/-/g, ' '), sub.kind], 8000); const prompt = `Write a 3-5 sentence technical overview of the "${sub.name}" subsystem. You MUST explain WHY it depends on its upstream subsystems and WHY downstream subsystems depend on it. If there are structural anomalies, explain their architectural rationale. ${seedContent ? `\nREFERENCE DOCUMENTATION (use this as authoritative context — incorporate key architectural details, naming conventions, deployment patterns, and design rationale from this content):\n${seedContent}\n` : ''} Facts: - Kind: ${sub.kind} - Files: ${sub.files.length} - Functions: ${sub.entities.functions}, Classes: ${sub.entities.classes}, Modules: ${sub.entities.modules} - Public exports: ${sub.publicExports.slice(0, 15).join(', ')}${sub.publicExports.length > 15 ? ` (+${sub.publicExports.length - 15} more)` : ''} ${depContext.length > 0 ? `- Dependency matrix:\n ${depContext.join('\n ')}` : '- No cross-subsystem dependencies (explain why this subsystem is self-contained)'} ${anomalies.length > 0 ? `- Structural anomalies:\n ${anomalies.join('\n ')}` : ''} Write ONLY the overview paragraph, no heading. Focus on architectural rationale, not just listing components.`; return callLLM(prompt, llmOpts); } /** * Generate a prose narrative for a data flow trace. */ async function describeFlow(flowResult, llmOpts) { const steps = flowResult.flow.slice(0, 20).map((s, i) => `${i + 1}. [${s.subsystem}] ${s.entity}${s.crossedVia ? ` (crosses via ${s.crossedVia})` : ''}` ).join('\n'); const prompt = `Write a 3-5 sentence narrative describing this data flow through the system. Entry point: ${flowResult.entryPoint} Subsystem sequence: ${flowResult.subsystemSequence.join(' → ')} ${flowResult.excludedNodes.length > 0 ? `Excluded (high fan-in): ${flowResult.excludedNodes.slice(0, 5).join(', ')}` : ''} ${flowResult.cyclesDetected.length > 0 ? `Cycles detected: ${flowResult.cyclesDetected.length}` : ''} Steps: ${steps}${flowResult.flow.length > 20 ? `\n... (+${flowResult.flow.length - 20} more steps)` : ''} Write ONLY the narrative paragraph, no heading. Explain what happens when this entry point is triggered and how data moves across subsystem boundaries.`; return callLLM(prompt, llmOpts); } /** * Generate a prose description for a contract (interface/type/enum). */ async function describeContract(contract, xref, llmOpts) { const usedBy = xref?.[contract.name]?.usedBy || []; let details = ''; if (contract.type === 'Interface' && contract.fields) { details = `Fields: ${contract.fields.map(f => `${f.name}: ${f.type}`).join(', ')}`; if (contract.extends) details += `\nExtends: ${contract.extends.join(', ')}`; } else if (contract.type === 'Enum' && contract.members) { details = `Members: ${contract.members.join(', ')}`; } else if (contract.type.startsWith('Helm')) { // Helm contract types if (contract.fields) { details = `Fields: ${contract.fields.slice(0, 20).map(f => `${f.name}: ${f.type}`).join(', ')}`; if (contract.fields.length > 20) details += ` (+${contract.fields.length - 20} more)`; } } const typeLabel = contract.type.startsWith('Helm') ? `Helm ${contract.type.replace('Helm', '').toLowerCase()} contract` : `TypeScript ${contract.type.toLowerCase()}`; const prompt = `Write a 1-2 sentence description of this ${typeLabel}. Name: ${contract.name} Type: ${contract.type} Defined in: ${contract.id} Visibility: ${contract.visibility} ${details} ${usedBy.length > 0 ? `Used by subsystems: ${usedBy.join(', ')}` : 'Not referenced cross-subsystem'} Write ONLY the description, no heading. Do not ask for more information.`; return callLLM(prompt, { ...llmOpts, maxTokens: 256 }); } /** * Generate a system-level architecture overview with cross-cutting explanations. */ async function describeArchitecture(subsystems, crossCutting, stats, llmOpts, opts = {}) { const deps = opts.deps || {}; const confluenceCtx = llmOpts.confluenceCtx || opts.confluenceCtx || {}; // Pull in the most relevant architecture docs const seedContent = findRelevantContext(confluenceCtx, [ 'system-architecture', 'architecture', 'platform-concepts', 'design-decisions', 'technology-choices', 'multi-cloud', 'hub', 'spoke', 'layered', 'argocd', 'repository-structure', 'naming-conventions', 'release-process' ], 15000); const subList = subsystems.slice(0, 20).map(s => { const outDeps = Object.entries(deps) .filter(([k]) => k.startsWith(s.name + '→')) .map(([k]) => k.split('→')[1]); return `- ${s.name} (${s.kind}): ${s.entities.functions} functions, ${s.files.length} files${outDeps.length > 0 ? `, depends on: ${outDeps.join(', ')}` : ''}`; }).join('\n'); const anomalySummary = subsystems .map(s => { const a = detectAnomalies(s, deps); return a.length > 0 ? `- ${s.name}: ${a[0]}` : null; }) .filter(Boolean) .slice(0, 5) .join('\n'); const prompt = `Write a 5-8 sentence architecture overview for this software system. Explain the architectural rationale: WHY the system is organized this way, WHY certain subsystems are cross-cutting, and WHY some subsystems have unusual structures. ${seedContent ? `\nREFERENCE DOCUMENTATION (use this as authoritative context — incorporate the layered architecture model, hub/spoke deployment pattern, multi-cloud strategy, naming conventions, CIDR allocation, ArgoCD ownership model, release patterns, and any other architectural details from this content):\n${seedContent}\n` : ''} Total subsystems: ${subsystems.length} Cross-cutting concerns: ${crossCutting.join(', ') || 'none detected'} Subsystems: ${subList} ${anomalySummary ? `Structural anomalies:\n${anomalySummary}` : ''} Write ONLY the overview paragraph, no heading. Focus on explaining the architecture, not just listing components.`; return callLLM(prompt, { ...llmOpts, maxTokens: 1536 }); } module.exports = { callLLM, describeSubsystem, describeFlow, describeContract, describeArchitecture, detectAnomalies, loadConfluenceContext, findRelevantContext };