prose.js

const http = require('http');
const https = require('https');
const fs = require('fs');
const path = require('path');

/**
 * Phase 6+7: LLM Prose Generator
 * Generates human-readable prose for system documentation using Claude Sonnet.
 * All structural analysis is deterministic — LLM is ONLY for prose formatting.
 */

const DEFAULT_URL = process.env.LLM_URL || 'http://192.168.86.11:8000/v1';
const DEFAULT_MODEL = process.env.LLM_MODEL || 'claude-sonnet-4.6';
const DEFAULT_API_KEY = process.env.LLM_API_KEY || 'my-super-secret-password-123';

/**
 * Load Confluence reference + explanation docs as seed context.
 * Returns a map of { topic → content } for injection into LLM prompts.
 */
function loadConfluenceContext(confluenceDir) {
  if (!confluenceDir || !fs.existsSync(confluenceDir)) return {};
  const ctx = {};
  for (const section of ['reference', 'explanation']) {
    const dir = path.join(confluenceDir, section);
    if (!fs.existsSync(dir)) continue;
    for (const f of fs.readdirSync(dir).filter(f => f.endsWith('.md'))) {
      const key = f.replace('.md', '');
      const content = fs.readFileSync(path.join(dir, f), 'utf8').trim();
      if (content.length > 0) ctx[key] = content;
    }
  }
  return ctx;
}

/**
 * Find relevant confluence docs for a given topic by keyword matching.
 * Returns concatenated content, capped at maxChars.
 */
function findRelevantContext(confluenceCtx, keywords, maxChars = 12000) {
  if (!confluenceCtx || Object.keys(confluenceCtx).length === 0) return '';
  const scored = Object.entries(confluenceCtx).map(([key, content]) => {
    let score = 0;
    const lowerKey = key.toLowerCase();
    const lowerContent = content.toLowerCase().substring(0, 2000);
    for (const kw of keywords) {
      const lkw = kw.toLowerCase();
      if (lowerKey.includes(lkw)) score += 10;
      const matches = (lowerContent.match(new RegExp(lkw, 'g')) || []).length;
      score += Math.min(matches, 5);
    }
    return { key, content, score };
  }).filter(s => s.score > 0).sort((a, b) => b.score - a.score);

  let result = '';
  for (const s of scored) {
    if (result.length + s.content.length > maxChars) {
      const remaining = maxChars - result.length;
      if (remaining > 200) result += `\n\n--- ${s.key} ---\n${s.content.substring(0, remaining)}...\n`;
      break;
    }
    result += `\n\n--- ${s.key} ---\n${s.content}\n`;
  }
  return result;
}

/**
 * Call an OpenAI-compatible chat completions API.
 */
function callLLM(prompt, opts = {}) {
  const baseUrl = opts.url || DEFAULT_URL;
  const model = opts.model || DEFAULT_MODEL;
  const apiKey = opts.apiKey || DEFAULT_API_KEY;
  const maxTokens = opts.maxTokens || 1024;
  const temperature = opts.temperature || 0.3;

  return new Promise((resolve, reject) => {
    const url = new URL('/v1/chat/completions', baseUrl.replace(/\/v1\/?$/, ''));
    const body = JSON.stringify({
      model,
      messages: [
        { role: 'system', content: 'You are a senior software architect writing concise, precise technical documentation. Write in present tense. Be specific about domain logic, not syntax. No filler.' },
        { role: 'user', content: prompt },
      ],
      max_tokens: maxTokens,
      temperature,
    });

    const client = url.protocol === 'https:' ? https : http;
    const req = client.request(url, {
      method: 'POST',
      headers: { 'Content-Type': 'application/json', 'Authorization': `Bearer ${apiKey}` },
    }, (res) => {
      let data = '';
      res.on('data', c => data += c);
      res.on('end', () => {
        try {
          if (res.statusCode >= 400) {
            console.error('LLM API Error:', res.statusCode, data);
            return resolve('');
          }
          const parsed = JSON.parse(data);
          resolve(parsed.choices?.[0]?.message?.content || '');
        } catch (e) {
          reject(new Error(`LLM parse error: ${e.message} — raw: ${data.substring(0, 200)}`));
        }
      });
    });
    req.on('error', reject);
    req.setTimeout(120000, () => { req.destroy(); reject(new Error('LLM timeout (120s)')); });
    req.write(body);
    req.end();
  });
}

/**
 * Detect structural anomalies in a subsystem.
 */
function detectAnomalies(sub, deps) {
  const anomalies = [];
  
  if (sub.entities.functions === 0 && sub.files.length > 5) {
    anomalies.push(`Zero functions despite ${sub.files.length} files — likely a configuration-only or IaC subsystem`);
  }
  if (sub.entities.classes === 0 && sub.entities.functions > 50) {
    anomalies.push(`${sub.entities.functions} functions with no classes — procedural/script-heavy architecture`);
  }
  
  // Fan-in/fan-out analysis
  const outgoing = Object.entries(deps).filter(([k]) => k.startsWith(sub.name + '→'));
  const incoming = Object.entries(deps).filter(([k]) => k.endsWith('→' + sub.name));
  
  if (outgoing.length > 5) {
    anomalies.push(`High fan-out: depends on ${outgoing.length} other subsystems — potential orchestrator or integration layer`);
  }
  if (incoming.length > 5) {
    anomalies.push(`High fan-in: ${incoming.length} subsystems depend on this — likely a shared library or core service`);
  }
  if (outgoing.length === 0 && incoming.length === 0 && sub.files.length > 3) {
    anomalies.push(`Isolated subsystem with no cross-subsystem dependencies — may be self-contained tooling or unused`);
  }
  
  return anomalies;
}

/**
 * Generate an explanatory prose overview for a subsystem.
 * Includes dependency rationale and anomaly explanations.
 */
async function describeSubsystem(sub, deps, llmOpts) {
  const outgoing = Object.entries(deps)
    .filter(([k]) => k.startsWith(sub.name + '→'))
    .map(([k, v]) => ({ target: k.split('→')[1], calls: v.calls, imports: v.imports }));
  
  const incoming = Object.entries(deps)
    .filter(([k]) => k.endsWith('→' + sub.name))
    .map(([k, v]) => ({ source: k.split('→')[0], calls: v.calls, imports: v.imports }));

  const anomalies = detectAnomalies(sub, deps);

  const depContext = [];
  if (outgoing.length > 0) {
    depContext.push(`Depends on: ${outgoing.map(d => `${d.target} (${d.calls} calls, ${d.imports} imports)`).join(', ')}`);
  }
  if (incoming.length > 0) {
    depContext.push(`Depended on by: ${incoming.map(d => `${d.source} (${d.calls} calls, ${d.imports} imports)`).join(', ')}`);
  }

  // Confluence seed context
  const confluenceCtx = llmOpts.confluenceCtx || {};
  const seedContent = findRelevantContext(confluenceCtx, [sub.name, sub.name.replace(/-/g, ' '), sub.kind], 8000);

  const prompt = `Write a 3-5 sentence technical overview of the "${sub.name}" subsystem. You MUST explain WHY it depends on its upstream subsystems and WHY downstream subsystems depend on it. If there are structural anomalies, explain their architectural rationale.
${seedContent ? `\nREFERENCE DOCUMENTATION (use this as authoritative context — incorporate key architectural details, naming conventions, deployment patterns, and design rationale from this content):\n${seedContent}\n` : ''}
Facts:
- Kind: ${sub.kind}
- Files: ${sub.files.length}
- Functions: ${sub.entities.functions}, Classes: ${sub.entities.classes}, Modules: ${sub.entities.modules}
- Public exports: ${sub.publicExports.slice(0, 15).join(', ')}${sub.publicExports.length > 15 ? ` (+${sub.publicExports.length - 15} more)` : ''}
${depContext.length > 0 ? `- Dependency matrix:\n  ${depContext.join('\n  ')}` : '- No cross-subsystem dependencies (explain why this subsystem is self-contained)'}
${anomalies.length > 0 ? `- Structural anomalies:\n  ${anomalies.join('\n  ')}` : ''}

Write ONLY the overview paragraph, no heading. Focus on architectural rationale, not just listing components.`;

  return callLLM(prompt, llmOpts);
}

/**
 * Generate a prose narrative for a data flow trace.
 */
async function describeFlow(flowResult, llmOpts) {
  const steps = flowResult.flow.slice(0, 20).map((s, i) =>
    `${i + 1}. [${s.subsystem}] ${s.entity}${s.crossedVia ? ` (crosses via ${s.crossedVia})` : ''}`
  ).join('\n');

  const prompt = `Write a 3-5 sentence narrative describing this data flow through the system.

Entry point: ${flowResult.entryPoint}
Subsystem sequence: ${flowResult.subsystemSequence.join(' → ')}
${flowResult.excludedNodes.length > 0 ? `Excluded (high fan-in): ${flowResult.excludedNodes.slice(0, 5).join(', ')}` : ''}
${flowResult.cyclesDetected.length > 0 ? `Cycles detected: ${flowResult.cyclesDetected.length}` : ''}

Steps:
${steps}${flowResult.flow.length > 20 ? `\n... (+${flowResult.flow.length - 20} more steps)` : ''}

Write ONLY the narrative paragraph, no heading. Explain what happens when this entry point is triggered and how data moves across subsystem boundaries.`;

  return callLLM(prompt, llmOpts);
}

/**
 * Generate a prose description for a contract (interface/type/enum).
 */
async function describeContract(contract, xref, llmOpts) {
  const usedBy = xref?.[contract.name]?.usedBy || [];
  let details = '';
  
  if (contract.type === 'Interface' && contract.fields) {
    details = `Fields: ${contract.fields.map(f => `${f.name}: ${f.type}`).join(', ')}`;
    if (contract.extends) details += `\nExtends: ${contract.extends.join(', ')}`;
  } else if (contract.type === 'Enum' && contract.members) {
    details = `Members: ${contract.members.join(', ')}`;
  } else if (contract.type.startsWith('Helm')) {
    // Helm contract types
    if (contract.fields) {
      details = `Fields: ${contract.fields.slice(0, 20).map(f => `${f.name}: ${f.type}`).join(', ')}`;
      if (contract.fields.length > 20) details += ` (+${contract.fields.length - 20} more)`;
    }
  }

  const typeLabel = contract.type.startsWith('Helm') ? `Helm ${contract.type.replace('Helm', '').toLowerCase()} contract` : `TypeScript ${contract.type.toLowerCase()}`;

  const prompt = `Write a 1-2 sentence description of this ${typeLabel}.

Name: ${contract.name}
Type: ${contract.type}
Defined in: ${contract.id}
Visibility: ${contract.visibility}
${details}
${usedBy.length > 0 ? `Used by subsystems: ${usedBy.join(', ')}` : 'Not referenced cross-subsystem'}

Write ONLY the description, no heading. Do not ask for more information.`;

  return callLLM(prompt, { ...llmOpts, maxTokens: 256 });
}

/**
 * Generate a system-level architecture overview with cross-cutting explanations.
 */
async function describeArchitecture(subsystems, crossCutting, stats, llmOpts, opts = {}) {
  const deps = opts.deps || {};
  const confluenceCtx = llmOpts.confluenceCtx || opts.confluenceCtx || {};
  
  // Pull in the most relevant architecture docs
  const seedContent = findRelevantContext(confluenceCtx, [
    'system-architecture', 'architecture', 'platform-concepts', 'design-decisions',
    'technology-choices', 'multi-cloud', 'hub', 'spoke', 'layered', 'argocd',
    'repository-structure', 'naming-conventions', 'release-process'
  ], 15000);

  const subList = subsystems.slice(0, 20).map(s => {
    const outDeps = Object.entries(deps)
      .filter(([k]) => k.startsWith(s.name + '→'))
      .map(([k]) => k.split('→')[1]);
    return `- ${s.name} (${s.kind}): ${s.entities.functions} functions, ${s.files.length} files${outDeps.length > 0 ? `, depends on: ${outDeps.join(', ')}` : ''}`;
  }).join('\n');

  const anomalySummary = subsystems
    .map(s => {
      const a = detectAnomalies(s, deps);
      return a.length > 0 ? `- ${s.name}: ${a[0]}` : null;
    })
    .filter(Boolean)
    .slice(0, 5)
    .join('\n');

  const prompt = `Write a 5-8 sentence architecture overview for this software system. Explain the architectural rationale: WHY the system is organized this way, WHY certain subsystems are cross-cutting, and WHY some subsystems have unusual structures.
${seedContent ? `\nREFERENCE DOCUMENTATION (use this as authoritative context — incorporate the layered architecture model, hub/spoke deployment pattern, multi-cloud strategy, naming conventions, CIDR allocation, ArgoCD ownership model, release patterns, and any other architectural details from this content):\n${seedContent}\n` : ''}
Total subsystems: ${subsystems.length}
Cross-cutting concerns: ${crossCutting.join(', ') || 'none detected'}

Subsystems:
${subList}

${anomalySummary ? `Structural anomalies:\n${anomalySummary}` : ''}

Write ONLY the overview paragraph, no heading. Focus on explaining the architecture, not just listing components.`;

  return callLLM(prompt, { ...llmOpts, maxTokens: 1536 });
}

/**
 * Synthesize generic reference pages using the extracted facts.
 */
async function synthesizeReferencePages(agentKB, deepData, outDir, archetype, llmOpts) {
  const fs = require('fs');
  const path = require('path');
  console.log(`Synthesizing dynamic reference pages via LLM for archetype: ${archetype}...`);

  const refDir = path.join(outDir, 'reference');
  if (!fs.existsSync(refDir)) fs.mkdirSync(refDir, { recursive: true });

  // Build rich context from agentKB and deepData for synthesis
  const kb = agentKB || {};
  const dd = deepData || {};

  // Extract helm interaction details from agentKB structure
  const rawCharts = (kb.reference && kb.reference.helm && kb.reference.helm.charts) || kb.charts || [];
  const helmCharts = rawCharts.map(c => ({
    name: c.name, dir: c.path || c.dir, version: c.version, appVersion: c.appVersion,
    deps: c.dependencies || [],
    resourceCount: c.resourceCount || 0,
    valuesCount: (c.valuesKeys || []).length || c.valuesCount || 0,
    interactions: c.interactions || []
  }));

  // Shared secrets/configmaps
  const configUsers = {};
  for (const c of helmCharts) {
    for (const i of c.interactions) {
      if (i.type === 'config-ref') {
        if (!configUsers[i.target]) configUsers[i.target] = [];
        configUsers[i.target].push(c.name);
      }
    }
  }
  const sharedSecrets = Object.entries(configUsers)
    .filter(([, users]) => users.length > 1)
    .map(([name, users]) => `${name}: ${[...new Set(users)].join(', ')}`);

  // Service-to-service refs
  const svcRefs = [];
  for (const c of helmCharts) {
    for (const i of c.interactions) {
      if (i.type === 'k8s-service') svcRefs.push(`${c.name} → ${i.target}`);
    }
  }

  // Shared ports
  const portMap = {};
  for (const c of helmCharts) {
    for (const i of c.interactions) {
      if (i.type === 'port' && i.target !== '0') {
        if (!portMap[i.target]) portMap[i.target] = [];
        if (!portMap[i.target].includes(c.name)) portMap[i.target].push(c.name);
      }
    }
  }
  const sharedPorts = Object.entries(portMap)
    .filter(([, users]) => users.length > 1)
    .map(([port, users]) => `Port ${port}: ${users.join(', ')}`);

  // Resource type breakdown
  const kindCounts = {};
  for (const c of helmCharts) {
    for (const i of c.interactions) {
      if (i.type === 'resource-kind') {
        kindCounts[i.target] = (kindCounts[i.target] || 0) + 1;
      }
    }
  }

  // Subsystem summary from agentKB structure
  const rawSubs = (kb.reference && kb.reference.subsystems) || kb.subsystems || [];
  const subsystems = rawSubs.map(s => ({
    name: s.name,
    files: Array.isArray(s.files) ? s.files.length : (s.fileCount || s.files || 0),
    functions: (s.entities && s.entities.functions) || s.functions || 0,
    modules: (s.entities && s.entities.modules) || s.modules || 0
  }));

  const contextStr = `
EXTRACTED SYSTEM FACTS:

## Subsystems (${subsystems.length} total)
${subsystems.map(s => `- ${s.name}: ${s.files} files, ${s.functions} functions, ${s.modules} modules`).join('\n')}

## Helm Charts (${helmCharts.length} total)
${helmCharts.slice(0, 30).map(c => `- ${c.name} (${c.dir}): v${c.version}, appVersion=${c.appVersion}, ${c.resourceCount} K8s resources, ${c.valuesCount} config keys, deps=[${c.deps.join(',')}]`).join('\n')}
${helmCharts.length > 30 ? `... and ${helmCharts.length - 30} more charts` : ''}

## Shared Secrets & ConfigMaps (used by multiple charts)
${sharedSecrets.length > 0 ? sharedSecrets.join('\n') : 'None detected'}

## Service-to-Service References
${svcRefs.length > 0 ? svcRefs.join('\n') : 'None detected'}

## Shared Network Ports (used by multiple charts)
${sharedPorts.length > 0 ? sharedPorts.join('\n') : 'None detected'}

## K8s Resource Types
${Object.entries(kindCounts).sort((a,b) => b[1]-a[1]).slice(0,15).map(([k,v]) => `- ${k}: ${v}`).join('\n') || 'See individual chart docs'}

## Deep Extraction Data
${JSON.stringify(dd).substring(0, 4000)}
  `;

  const pagePrompt = `You are a Senior Technical Writer analyzing a repository with the archetype: "${archetype}".
Given these extracted facts and this repo archetype, what 5 reference pages should be created?

IMPORTANT: You MUST include pages that cover ALL of the following topics (spread across the 5 pages):
- Shared secrets/ConfigMaps and which charts use them
- Service-to-service references between charts
- Network ports used by charts (especially shared ports)
- Kubernetes resource types generated across charts
- Chart dependencies and versions
- Subsystem architecture and cross-cutting concerns

${contextStr}

Respond with ONLY a valid JSON array of objects. Each object must have:
- "title": The human-readable title of the page
- "filename": The markdown filename (e.g. "network-architecture.md")
- "focus": A brief description of what to focus on in this page.

Example for Infrastructure:
[
  { "title": "Service Contracts & Interactions", "filename": "service-contracts.md", "focus": "Shared secrets, ConfigMaps, service-to-service references, and network ports across charts" },
  { "title": "Helm Charts & Dependencies", "filename": "helm-charts-dependencies.md", "focus": "Chart versions, dependencies, and configuration surface" }
]
`;

  let pagesJson = '[]';
  try {
    pagesJson = await module.exports.callLLM(pagePrompt, { ...llmOpts, maxTokens: 1000 });
    // basic cleanup in case the LLM returned markdown blocks
    const match = pagesJson.match(/\[[\s\S]*\]/);
    if (match) pagesJson = match[0];
  } catch (e) {
    console.error('Failed to get page definitions from LLM:', e);
  }

  let pages = [];
  try {
    pages = JSON.parse(pagesJson);
  } catch (e) {
    console.error('Failed to parse pages JSON:', pagesJson);
    pages = [
      { title: 'System Overview', filename: 'overview.md', focus: 'General facts' }
    ];
  }

  const generatedFiles = [];
  for (const page of pages) {
    const pagePrompt = `You are a Senior Technical Writer. Generate a "${page.title}" reference page in Markdown for a "${archetype}" repository.
Focus on: ${page.focus}

CRITICAL INSTRUCTIONS:
- Include ALL specific data points from the extracted facts below. Do not summarize or omit details.
- List every shared secret/ConfigMap with the exact chart names that use it.
- List every service-to-service reference with source and target.
- List every shared network port with the exact chart names.
- List Kubernetes resource types with counts.
- List chart versions and appVersions.
- Use tables and bullet lists for data-dense sections.
- Do NOT invent facts. Only use what is in the extracted data below.

${contextStr}

Respond with ONLY the Markdown content. Use # ${page.title} as the main title.`;
    
    const content = await module.exports.callLLM(pagePrompt, { ...llmOpts, maxTokens: 4000, title: page.title });
    const filename = page.filename.endsWith('.md') ? page.filename : `${page.filename}.md`;
    fs.writeFileSync(path.join(refDir, filename), content);
    generatedFiles.push({ filename, title: page.title, focus: page.focus });
  }

  // Generate Index
  const indexPrompt = `You are a Senior Technical Writer. Create a "reference/index.md" routing table.
I have generated the following files for this ${archetype} repository:
${generatedFiles.map(f => `- \`reference/${f.filename}\` (${f.focus})`).join('\n')}

Create a markdown page with two sections:
## Quick Lookup by Topic
(A table mapping specific topics/keywords to the exact file path)
## File Descriptions
(A table describing what is in each file)

Respond with ONLY the Markdown content.`;

  const indexMd = await module.exports.callLLM(indexPrompt, { ...llmOpts, maxTokens: 1500 });
  fs.writeFileSync(path.join(refDir, 'index.md'), indexMd);
  
  console.log(`Dynamic reference pages and index synthesized for ${archetype}.`);
}

module.exports = { callLLM, describeSubsystem, describeFlow, describeContract, describeArchitecture, detectAnomalies, loadConfluenceContext, findRelevantContext, synthesizeReferencePages };