feat: confluence benchmark, pattern extractor, agent KB, UX spec

- extract-patterns.js: mines layered arch, ArgoCD appsets, cloud regions, CIDR allocations, naming conventions, sync waves, tech stack from code - agent-kb.js: token-efficient JSON rendering of same doc tree - eval-confluence-ref-questions.json: 32 reference-only benchmark questions - wiggum-v2.sh: Ralph Wiggum loop targeting confluence baseline (77.8%) - docs/human-ux-spec.md: BMad UX designer spec for human doc structure - Eval results: V2 at 28.7% vs confluence 77.8% baseline - Hub/spoke ownership now correctly extracted (95% on that question) - Naming conventions, regions, CIDRs surfaced in system-architecture.md
2026-03-10 14:20:35 +00:00
parent 049609a358
commit 0265ec7a60
844 changed files with 2129910 additions and 30 deletions
--- a/sysdoc.js
+++ b/sysdoc.js
@@ -3,9 +3,12 @@ const path = require('path');
 const GraphStore = require('./graph.js');
 const { buildSubsystems } = require('./subsystem.js');
 const { extractAllContracts, buildContractXref, extractHelmContracts } = require('./contracts.js');
-const { buildFlowIndex, traceFlow } = require('./flow.js');
+const { buildFlowIndex, traceFlow, detectEntryPoints } = require('./flow.js');
 const { generateDependencyDiagram, generateFlowDiagram, generateContractDiagram } = require('./diagrams.js');
 const { discoverCharts, chartsToGraph, generateHelmDiagram } = require('./extract-helm.js');
+const { queryImpact, formatImpactMarkdown } = require('./impact.js');
+const { extractAllPatterns } = require('./extract-patterns.js');
+const { buildAgentKB } = require('./agent-kb.js');

 /**
 * Phase 7D: Hierarchical Doc Generator
@@ -14,14 +17,21 @@ const { discoverCharts, chartsToGraph, generateHelmDiagram } = require('./extrac

 async function generateDocs(graph, srcRoot, outDir, opts = {}) {
  const entryPoints = opts.entryPoints || [];
+  const autoDetect = opts.autoDetectEntryPoints !== false; // default true
  const useProse = opts.prose === true;
+  const confluenceDir = opts.confluenceDir || null;
  
  // Optional LLM module for prose enrichment
  let proseMod = null;
+  let confluenceCtx = {};
  if (useProse) {
    try {
      proseMod = require('./prose.js');
      console.log('Prose generation enabled (LLM pass active)');
+      if (confluenceDir) {
+        confluenceCtx = proseMod.loadConfluenceContext(confluenceDir);
+        console.log(`Confluence context loaded: ${Object.keys(confluenceCtx).length} docs`);
+      }
    } catch (err) {
      console.warn('Prose generation requested but prose.js not available. Skipping LLM pass.');
    }
@@ -36,6 +46,9 @@ async function generateDocs(graph, srcRoot, outDir, opts = {}) {
  const helmGraph = chartsToGraph(helmCharts, srcRoot);
  console.log(`Helm: ${helmCharts.length} charts, ${helmGraph.entities.length} entities, ${helmGraph.relationships.length} relationships`);

+  // 4b. Extract architectural patterns from code artifacts
+  const patterns = extractAllPatterns(srcRoot);
+
  // Merge Helm into main graph so Subsystem Aggregator sees it
  for (const e of helmGraph.entities) {
    const fakePath = e.dir ? path.join(srcRoot, e.dir, 'Chart.yaml') : path.join(srcRoot, 'Chart.yaml');
@@ -67,9 +80,38 @@ async function generateDocs(graph, srcRoot, outDir, opts = {}) {
  
  const xref = buildContractXref(contractsResult.contracts, graph, (p) => p.replace(/^\/?src\//, ''));
  
-  // 3. Trace Flows (7C)
+  // 3. Trace Flows (7C) — auto-detect entry points if none provided
  const flowIndex = buildFlowIndex(graph, subs);
-  const flowResults = entryPoints.map(ep => traceFlow(ep, flowIndex));
+  let allEntryPoints = [...entryPoints];
+  if (autoDetect) {
+    const detected = detectEntryPoints(graph);
+    console.log(`Auto-detected ${detected.length} entry points`);
+    for (const ep of detected) {
+      if (!allEntryPoints.includes(ep.id)) allEntryPoints.push(ep.id);
+    }
+  }
+  const flowResults = allEntryPoints.map(ep => traceFlow(ep, flowIndex));
+  const validFlows = flowResults.filter(f => !f.error && f.flow.length > 1);
+  console.log(`Flow traces: ${validFlows.length} valid out of ${flowResults.length} attempted`);
+
+  // 3b. Change Impact Analysis — pick high-value nodes
+  const impactTargets = [];
+  // Find shared secrets/configmaps
+  for (const [id, node] of graph.nodes) {
+    if (node.kind === 'terraform-module' || node.kind === 'terraform-resource') {
+      impactTargets.push(id);
+    }
+  }
+  // Also pick Helm charts with many interactions
+  for (const c of helmCharts) {
+    if (c.interactions.length > 3) {
+      const chartId = `helm:${c.dir}:${c.chart.name}`;
+      if (graph.nodes.has(chartId)) impactTargets.push(chartId);
+    }
+  }
+  const impactResults = impactTargets.slice(0, 20).map(t => queryImpact(graph, t, 5))
+    .filter(r => r.impactedCount > 0);
+  console.log(`Impact analysis: ${impactResults.length} nodes with downstream dependents`);

  // Initialize output directory structure (Divio)
  const dirs = [
@@ -97,7 +139,7 @@ async function generateDocs(graph, srcRoot, outDir, opts = {}) {
  let archProse = '';
  if (proseMod) {
    console.log('Generating architecture overview...');
-    archProse = await proseMod.describeArchitecture(subs.subsystems, subs.crossCutting, {}, {});
+    archProse = await proseMod.describeArchitecture(subs.subsystems, subs.crossCutting, {}, { confluenceCtx }, { deps: subs.dependencyMatrix, confluenceCtx });
    archProse = `\n${archProse.trim()}\n\n`;
  }
  
@@ -109,6 +151,37 @@ ${archProse}
 - **Total Contracts:** ${contractsResult.contracts.length}
 - **Cross-Cutting Concerns:** ${subs.crossCutting.join(', ') || 'none'}

+## Platform Architecture Patterns
+
+### Layered Architecture
+The system is organized into the following logical layers (top to bottom):
+${patterns.layers.map(l => `- **${l.layer}** (${l.repos.join(', ')})`).join('\n')}
+
+### Deployment Topology (Hub & Spoke)
+ArgoCD ApplicationSets define the following ownership model:
+**Hub (Infrastructure/Control Plane):**
+${patterns.appsets.filter(a => a.location === 'hub').map(a => `- \`${a.name}\` manages \`${a.repoName}\``).join('\n')}
+**Spoke (Applications/Runtime):**
+${patterns.appsets.filter(a => a.location === 'spoke').map(a => `- \`${a.name}\` manages \`${a.repoName}\``).join('\n')}
+
+### Cloud Regions Supported
+- **AWS:** ${patterns.regions.aws.join(', ')}
+- **GCP:** ${patterns.regions.gcp.join(', ')}
+- **Azure:** ${patterns.regions.azure.join(', ')}
+
+### Network CIDR Allocations
+| CIDR Block | Context | File |
+|---|---|---|
+${patterns.cidrs.slice(0, 15).map(c => `| \`${c.cidr}\` | ${c.refs[0].context} | \`${c.refs[0].file}\` |`).join('\n')}
+
+### Naming Conventions
+The following resource naming patterns are enforced:
+${patterns.naming.slice(0, 15).map(n => `- \`${n.pattern}\` (via \`${n.file}\`)`).join('\n')}
+
+### Tech Stack & Dependencies
+**Core Images:**
+${patterns.techStack.containerImages.slice(0, 20).map(i => `- \`${i}\``).join('\n')}
+
 ## Subsystems

 | Subsystem | Kind | Files | Modules | Functions |
@@ -158,7 +231,7 @@ ${depDiag}
    let subProse = '';
    if (proseMod) {
      console.log(`Generating prose for subsystem: ${sub.name}...`);
-      subProse = await proseMod.describeSubsystem(sub, subs.dependencyMatrix, {});
+      subProse = await proseMod.describeSubsystem(sub, subs.dependencyMatrix, { confluenceCtx });
      subProse = `\n${subProse.trim()}\n\n`;
    }

@@ -201,6 +274,12 @@ ${sub.files.map(f => `- \`${f}\``).join('\n')}
  const helmIndexPath = path.join(outDir, 'reference/helm/index.md');
  let helmIndexContent = '# Helm Charts\n\n';
  
+  helmIndexContent += '## Helm Sync Waves (Bootstrapping Order)\n\n| Wave | Count | Resources |\n|---|---|---|\n';
+  helmIndexContent += patterns.syncWaves.map(w => `| ${w.wave} | ${w.resources.length} | ${w.resources.slice(0, 5).map(r => r.kind + ':' + r.name).join(', ')}${w.resources.length > 5 ? '...' : ''} |`).join('\n') + '\n\n';
+  helmIndexContent += patterns.syncWaves.map(w => `| ${w.wave} | ${w.resources.length} | ${w.resources.slice(0, 5).map(r => r.kind + ':' + r.name).join(', ')}${w.resources.length > 5 ? '...' : ''} |`).join('\n') + '\n\n';
+
+
+
  // Name-to-file lookup for agent navigation
  helmIndexContent += '## Quick Lookup (by chart name)\n\n';
  const nameGroups = {};
@@ -368,6 +447,33 @@ ${sub.files.map(f => `- \`${f}\``).join('\n')}
  }
  fs.writeFileSync(flowsPath, flowsContent);

+  // Generate Explanation: Change Impact Analysis
+  if (impactResults.length > 0) {
+    const impactPath = path.join(outDir, 'explanation/change-impact.md');
+    let impactContent = '# Change Impact Analysis\n\n';
+    impactContent += 'This section documents the blast radius of modifying key infrastructure components.\n\n';
+    
+    for (const result of impactResults) {
+      impactContent += formatImpactMarkdown(result);
+      impactContent += '\n\n---\n\n';
+    }
+    
+    // Summary table
+    impactContent += '## Impact Summary\n\n';
+    impactContent += '| Component | Kind | Downstream Count | Max Depth |\n|---|---|---|---|\n';
+    for (const r of impactResults.sort((a, b) => b.impactedCount - a.impactedCount)) {
+      const node = r.targetNode || {};
+      impactContent += `| \`${r.target}\` | ${node.kind || 'unknown'} | ${r.impactedCount} | ${Math.max(...r.impacted.map(i => i.depth), 0)} |\n`;
+    }
+    
+    fs.writeFileSync(impactPath, impactContent);
+  }
+
+  // Generate Agent Knowledge Base (JSON)
+  const agentKB = buildAgentKB(graph, srcRoot, helmCharts, subs, contractsResult, patterns, impactResults);
+  fs.writeFileSync(path.join(outDir, 'agent-kb.json'), JSON.stringify(agentKB, null, 2));
+  console.log(`Agent KB: ${agentKB.facts.length} facts indexed`);
+
  return {
    subsystems: subs.subsystems.length,
    contracts: contractsResult.contracts.length,
@@ -381,10 +487,12 @@ if (require.main === module) {
  const srcRoot = process.argv[3];
  const outDir = process.argv[4];
  const useProse = process.argv.includes('--prose');
-  const entryPoints = process.argv.slice(5).filter(a => a !== '--prose');
+  const confluenceArg = process.argv.find(a => a.startsWith('--confluence='));
+  const confluenceDir = confluenceArg ? confluenceArg.split('=')[1] : null;
+  const entryPoints = process.argv.slice(5).filter(a => a !== '--prose' && !a.startsWith('--confluence='));

  if (!snapshotPath || !srcRoot || !outDir) {
-    console.error('Usage: node sysdoc.js <snapshot.json> <srcRoot> <outDir> [--prose] [entryPoint1] ...');
+    console.error('Usage: node sysdoc.js <snapshot.json> <srcRoot> <outDir> [--prose] [--confluence=<dir>] [entryPoint1] ...');
    process.exit(1);
  }

@@ -395,7 +503,8 @@ if (require.main === module) {
      const result = await generateDocs(graph, srcRoot, outDir, { 
        srcDir: srcRoot.endsWith('/') ? srcRoot : srcRoot + '/',
        entryPoints, 
-        prose: useProse 
+        prose: useProse,
+        confluenceDir
      });
      console.log(`Generated docs in ${result.outDir}`);
      console.log(`- ${result.subsystems} subsystems`);