Phase 6+7D: Sonnet prose generation integration

2026-03-09 18:44:19 +00:00
parent 1869fcb5b2
commit d19cee36d7
2 changed files with 224 additions and 12 deletions
--- a/prose.js
+++ b/prose.js
@@ -0,0 +1,153 @@
+const http = require('http');
+const https = require('https');
+
+/**
+ * Phase 6+7: LLM Prose Generator
+ * Generates human-readable prose for system documentation using Claude Sonnet.
+ * All structural analysis is deterministic — LLM is ONLY for prose formatting.
+ */
+
+const DEFAULT_URL = process.env.LLM_URL || 'http://192.168.86.11:8000/v1';
+const DEFAULT_MODEL = process.env.LLM_MODEL || 'claude-sonnet-4.6';
+const DEFAULT_API_KEY = process.env.LLM_API_KEY || 'my-super-secret-password-123';
+
+/**
+ * Call an OpenAI-compatible chat completions API.
+ */
+function callLLM(prompt, opts = {}) {
+  const baseUrl = opts.url || DEFAULT_URL;
+  const model = opts.model || DEFAULT_MODEL;
+  const apiKey = opts.apiKey || DEFAULT_API_KEY;
+  const maxTokens = opts.maxTokens || 1024;
+  const temperature = opts.temperature || 0.3;
+
+  return new Promise((resolve, reject) => {
+    const url = new URL('/v1/chat/completions', baseUrl.replace(/\/v1\/?$/, ''));
+    const body = JSON.stringify({
+      model,
+      messages: [
+        { role: 'system', content: 'You are a senior software architect writing concise, precise technical documentation. Write in present tense. Be specific about domain logic, not syntax. No filler.' },
+        { role: 'user', content: prompt },
+      ],
+      max_tokens: maxTokens,
+      temperature,
+    });
+
+    const client = url.protocol === 'https:' ? https : http;
+    const req = client.request(url, {
+      method: 'POST',
+      headers: { 'Content-Type': 'application/json', 'Authorization': `Bearer ${apiKey}` },
+    }, (res) => {
+      let data = '';
+      res.on('data', c => data += c);
+      res.on('end', () => {
+        try {
+          const parsed = JSON.parse(data);
+          resolve(parsed.choices?.[0]?.message?.content || '');
+        } catch (e) {
+          reject(new Error(`LLM parse error: ${e.message} — raw: ${data.substring(0, 200)}`));
+        }
+      });
+    });
+    req.on('error', reject);
+    req.setTimeout(120000, () => { req.destroy(); reject(new Error('LLM timeout (120s)')); });
+    req.write(body);
+    req.end();
+  });
+}
+
+/**
+ * Generate a prose overview for a subsystem.
+ */
+async function describeSubsystem(sub, deps, llmOpts) {
+  const depList = Object.entries(deps)
+    .filter(([k]) => k.startsWith(sub.name + '→') || k.endsWith('→' + sub.name))
+    .map(([k, v]) => `${k}: ${v.calls} calls, ${v.imports} imports`)
+    .slice(0, 10);
+
+  const prompt = `Write a 2-3 sentence technical overview of the "${sub.name}" subsystem.
+
+Facts:
+- Kind: ${sub.kind}
+- Files: ${sub.files.length}
+- Functions: ${sub.entities.functions}, Classes: ${sub.entities.classes}, Modules: ${sub.entities.modules}
+- Public exports: ${sub.publicExports.slice(0, 15).join(', ')}${sub.publicExports.length > 15 ? ` (+${sub.publicExports.length - 15} more)` : ''}
+${depList.length > 0 ? `- Dependencies:\n  ${depList.join('\n  ')}` : '- No cross-subsystem dependencies'}
+
+Write ONLY the overview paragraph, no heading.`;
+
+  return callLLM(prompt, llmOpts);
+}
+
+/**
+ * Generate a prose narrative for a data flow trace.
+ */
+async function describeFlow(flowResult, llmOpts) {
+  const steps = flowResult.flow.slice(0, 20).map((s, i) =>
+    `${i + 1}. [${s.subsystem}] ${s.entity}${s.crossedVia ? ` (crosses via ${s.crossedVia})` : ''}`
+  ).join('\n');
+
+  const prompt = `Write a 3-5 sentence narrative describing this data flow through the system.
+
+Entry point: ${flowResult.entryPoint}
+Subsystem sequence: ${flowResult.subsystemSequence.join(' → ')}
+${flowResult.excludedNodes.length > 0 ? `Excluded (high fan-in): ${flowResult.excludedNodes.slice(0, 5).join(', ')}` : ''}
+${flowResult.cyclesDetected.length > 0 ? `Cycles detected: ${flowResult.cyclesDetected.length}` : ''}
+
+Steps:
+${steps}${flowResult.flow.length > 20 ? `\n... (+${flowResult.flow.length - 20} more steps)` : ''}
+
+Write ONLY the narrative paragraph, no heading. Explain what happens when this entry point is triggered and how data moves across subsystem boundaries.`;
+
+  return callLLM(prompt, llmOpts);
+}
+
+/**
+ * Generate a prose description for a contract (interface/type/enum).
+ */
+async function describeContract(contract, xref, llmOpts) {
+  const usedBy = xref?.[contract.name]?.usedBy || [];
+  let details = '';
+  if (contract.type === 'Interface' && contract.fields) {
+    details = `Fields: ${contract.fields.map(f => `${f.name}: ${f.type}`).join(', ')}`;
+    if (contract.extends) details += `\nExtends: ${contract.extends.join(', ')}`;
+  } else if (contract.type === 'Enum' && contract.members) {
+    details = `Members: ${contract.members.join(', ')}`;
+  }
+
+  const prompt = `Write a 1-2 sentence description of this TypeScript ${contract.type.toLowerCase()}.
+
+Name: ${contract.name}
+Type: ${contract.type}
+Defined in: ${contract.id}
+Visibility: ${contract.visibility}
+${details}
+${usedBy.length > 0 ? `Used by subsystems: ${usedBy.join(', ')}` : 'Not referenced cross-subsystem'}
+
+Write ONLY the description, no heading.`;
+
+  return callLLM(prompt, { ...llmOpts, maxTokens: 256 });
+}
+
+/**
+ * Generate a system-level architecture overview.
+ */
+async function describeArchitecture(subsystems, crossCutting, stats, llmOpts) {
+  const subList = subsystems.slice(0, 20).map(s =>
+    `- ${s.name} (${s.kind}): ${s.entities.functions} functions, ${s.files.length} files`
+  ).join('\n');
+
+  const prompt = `Write a 4-6 sentence architecture overview for this software system.
+
+Total subsystems: ${subsystems.length}
+Cross-cutting concerns: ${crossCutting.join(', ') || 'none detected'}
+
+Largest subsystems:
+${subList}
+
+Write ONLY the overview paragraph, no heading. Describe the high-level architecture, the role of cross-cutting concerns, and the overall system organization.`;
+
+  return callLLM(prompt, { ...llmOpts, maxTokens: 512 });
+}
+
+module.exports = { callLLM, describeSubsystem, describeFlow, describeContract, describeArchitecture };
--- a/sysdoc.js
+++ b/sysdoc.js
@@ -2,7 +2,7 @@ const fs = require('fs');
 const path = require('path');
 const GraphStore = require('./graph.js');
 const { buildSubsystems } = require('./subsystem.js');
-const { extractAllContracts } = require('./contracts.js');
+const { extractAllContracts, buildContractXref } = require('./contracts.js');
 const { buildFlowIndex, traceFlow } = require('./flow.js');
 const { generateDependencyDiagram, generateFlowDiagram, generateContractDiagram } = require('./diagrams.js');

@@ -11,8 +11,20 @@ const { generateDependencyDiagram, generateFlowDiagram, generateContractDiagram
 * Orchestrates 7A, 7B, 7C, and 7E to generate a Divio-structured documentation site.
 */

-function generateDocs(graph, srcRoot, outDir, opts = {}) {
+async function generateDocs(graph, srcRoot, outDir, opts = {}) {
  const entryPoints = opts.entryPoints || [];
+  const useProse = opts.prose === true;
+  
+  // Optional LLM module for prose enrichment
+  let proseMod = null;
+  if (useProse) {
+    try {
+      proseMod = require('./prose.js');
+      console.log('Prose generation enabled (LLM pass active)');
+    } catch (err) {
+      console.warn('Prose generation requested but prose.js not available. Skipping LLM pass.');
+    }
+  }
  
  // 1. Build Subsystems (7A)
  const subs = buildSubsystems(graph, {
@@ -23,6 +35,7 @@ function generateDocs(graph, srcRoot, outDir, opts = {}) {

  // 2. Extract Contracts (7B)
  const contractsResult = extractAllContracts(subs, srcRoot);
+  const xref = buildContractXref(contractsResult.contracts, graph, (p) => p.replace(/^\/?src\//, ''));
  
  // 3. Trace Flows (7C)
  const flowIndex = buildFlowIndex(graph, subs);
@@ -49,8 +62,15 @@ function generateDocs(graph, srcRoot, outDir, opts = {}) {
  const depDiagPath = 'diagrams/system-deps.mmd';
  fs.writeFileSync(path.join(outDir, depDiagPath), depDiag);
  
-  const sysArchContent = `# System Architecture
+  let archProse = '';
+  if (proseMod) {
+    console.log('Generating architecture overview...');
+    archProse = await proseMod.describeArchitecture(subs.subsystems, subs.crossCutting, {}, {});
+    archProse = `\n${archProse.trim()}\n\n`;
+  }
  
+  const sysArchContent = `# System Architecture
+${archProse}
 ## Subsystems
 ${subs.subsystems.map(s => `- **${s.name}** (${s.kind}): ${s.entities.modules} modules, ${s.entities.functions} functions`).join('\n')}

@@ -76,8 +96,15 @@ ${depDiag}
      contractSection = `\n## Contracts\n\`\`\`mermaid\n${contractDiag}\n\`\`\`\n`;
    }

-    const subContent = `# Subsystem: ${sub.name}
+    let subProse = '';
+    if (proseMod) {
+      console.log(`Generating prose for subsystem: ${sub.name}...`);
+      subProse = await proseMod.describeSubsystem(sub, subs.dependencyMatrix, {});
+      subProse = `\n${subProse.trim()}\n\n`;
+    }

+    const subContent = `# Subsystem: ${sub.name}
+${subProse}
 **Kind:** ${sub.kind}
 **Files:** ${sub.files.length}

@@ -94,7 +121,22 @@ ${sub.files.map(f => `- \`${f}\``).join('\n')}
  const contractDocPath = path.join(outDir, 'reference/contracts/index.md');
  const allContractsDiag = generateContractDiagram(contractsResult.contracts);
  fs.writeFileSync(path.join(outDir, 'diagrams/all-contracts.mmd'), allContractsDiag);
-  fs.writeFileSync(contractDocPath, `# System Contracts\n\n\`\`\`mermaid\n${allContractsDiag}\n\`\`\`\n`);
+  
+  let contractProseList = '';
+  if (proseMod && contractsResult.contracts.length > 0) {
+    console.log(`Generating prose for ${contractsResult.contracts.length} contracts...`);
+    // Batch processing to avoid overloading the API
+    const batchSize = 10;
+    const contractDocs = [];
+    for (let i = 0; i < contractsResult.contracts.length; i += batchSize) {
+      const batch = contractsResult.contracts.slice(i, i + batchSize);
+      const docs = await Promise.all(batch.map(c => proseMod.describeContract(c, xref, {})));
+      contractDocs.push(...docs);
+    }
+    contractProseList = contractsResult.contracts.map((c, i) => `### ${c.name}\n${contractDocs[i].trim()}\n`).join('\n');
+  }
+
+  fs.writeFileSync(contractDocPath, `# System Contracts\n\n\`\`\`mermaid\n${allContractsDiag}\n\`\`\`\n\n${contractProseList}`);

  // Generate Explanation: Data Flows
  const flowsPath = path.join(outDir, 'explanation/data-flows.md');
@@ -110,7 +152,15 @@ ${sub.files.map(f => `- \`${f}\``).join('\n')}
    const diagName = `flow-${i}.mmd`;
    fs.writeFileSync(path.join(outDir, `diagrams/${diagName}`), flowDiag);
    
+    let flowProse = '';
+    if (proseMod) {
+      console.log(`Generating prose for flow: ${fr.entryPoint}...`);
+      flowProse = await proseMod.describeFlow(fr, {});
+      flowProse = `${flowProse.trim()}\n\n`;
+    }
+    
    flowsContent += `## Flow: ${fr.entryPoint}\n`;
+    flowsContent += flowProse;
    flowsContent += `**Subsystem Sequence:** ${fr.subsystemSequence.join(' → ')}\n\n`;
    flowsContent += `\`\`\`mermaid\n${flowDiag}\n\`\`\`\n\n`;
  }
@@ -128,19 +178,28 @@ if (require.main === module) {
  const snapshotPath = process.argv[2];
  const srcRoot = process.argv[3];
  const outDir = process.argv[4];
-  const entryPoints = process.argv.slice(5);
+  const useProse = process.argv.includes('--prose');
+  const entryPoints = process.argv.slice(5).filter(a => a !== '--prose');

  if (!snapshotPath || !srcRoot || !outDir) {
-    console.error('Usage: node sysdoc.js <snapshot.json> <srcRoot> <outDir> [entryPoint1] [entryPoint2] ...');
+    console.error('Usage: node sysdoc.js <snapshot.json> <srcRoot> <outDir> [--prose] [entryPoint1] ...');
    process.exit(1);
  }

  const graph = GraphStore.loadSnapshot(snapshotPath);
-  const result = generateDocs(graph, srcRoot, outDir, { entryPoints });
-  console.log(`Generated docs in ${result.outDir}`);
-  console.log(`- ${result.subsystems} subsystems`);
-  console.log(`- ${result.contracts} contracts`);
-  console.log(`- ${result.flows} flows`);
+  // Using an IIFE to support top-level await
+  (async () => {
+    try {
+      const result = await generateDocs(graph, srcRoot, outDir, { entryPoints, prose: useProse });
+      console.log(`Generated docs in ${result.outDir}`);
+      console.log(`- ${result.subsystems} subsystems`);
+      console.log(`- ${result.contracts} contracts`);
+      console.log(`- ${result.flows} flows`);
+    } catch (err) {
+      console.error('Error generating docs:', err);
+      process.exit(1);
+    }
+  })();
 }

 module.exports = { generateDocs };