/**
 * Eval Generator: Repo-Agnostic Question Bank
 * 
 * Generates ground-truth Q&A pairs from graph + Helm data.
 * Questions target what the docs actually cover: subsystems, charts,
 * dependencies, interactions, contracts, resource types.
 * 
 * Usage: node eval-generator.js <snapshot.json> <repo-root> [output.json] [--dry-run]
 */

const fs = require('fs');
const path = require('path');
const GraphStore = require('./graph.js');
const { discoverCharts, chartsToGraph } = require('./extract-helm.js');
const { buildSubsystems } = require('./subsystem.js');

const HELM_IGNORE = new Set([
  'node_modules', '.git', 'venv', '__pycache__', '.terraform',
  '_bmad', '_bmad-output', '.codex', '.claude', '.cursor', '.gemini', '.kiro', '.agents'
]);

function generateQuestions(snapshotPath, srcRoot) {
  const questions = [];
  let qIdx = 1;
  const qid = () => `q-${String(qIdx++).padStart(3, '0')}`;

  // Load graph
  const graph = GraphStore.loadSnapshot(snapshotPath);

  // Load Helm data
  const charts = discoverCharts(srcRoot, HELM_IGNORE);

  // Merge Helm into graph (same as sysdoc.js)
  if (charts.length > 0) {
    const helmGraph = chartsToGraph(charts, srcRoot);
    for (const e of helmGraph.entities) {
      const fakePath = e.dir ? path.join(srcRoot, e.dir, 'Chart.yaml') : path.join(srcRoot, 'Chart.yaml');
      graph.nodes.set(e.id, { ...e, type: e.type || 'Module', _file: fakePath });
      if (!graph.fileIndex.has(fakePath)) graph.fileIndex.set(fakePath, new Set());
      graph.fileIndex.get(fakePath).add(e.id);
    }
    for (const r of helmGraph.relationships) {
      graph.edges.push(r);
    }
  }

  // Build subsystems
  const subs = buildSubsystems(graph, {
    srcDir: (srcRoot || '').endsWith('/') ? (srcRoot || '') : (srcRoot || '') + '/',
    minTraffic: 3,
    crossCuttingThreshold: 0.6
  });

  // ─── Category 1: Structural ───

  // Total chart count
  if (charts.length > 0) {
    questions.push({
      id: qid(), category: 'structural', difficulty: 'easy',
      audience: ['human', 'machine'],
      question: 'How many Helm charts are defined across this repository?',
      expected_answer: String(charts.length),
      answer: String(charts.length),
      answerType: 'exact',
      source: 'Chart.yaml discovery',
      source_entity: 'all charts'
    });
  }

  // Subsystem count
  if (subs.subsystems.length > 0) {
    questions.push({
      id: qid(), category: 'structural', difficulty: 'easy',
      audience: ['human', 'machine'],
      question: 'How many subsystems does this codebase contain?',
      expected_answer: String(subs.subsystems.length),
      answer: String(subs.subsystems.length),
      answerType: 'exact',
      source: 'subsystem aggregation',
      source_entity: 'all subsystems'
    });
  }

  // Top charts by K8s resources
  const topByResources = [...charts]
    .filter(c => c.templates && c.templates.resources)
    .sort((a, b) => b.templates.resources.length - a.templates.resources.length)
    .slice(0, 5);
  if (topByResources.length > 0 && topByResources[0].templates.resources.length > 0) {
    questions.push({
      id: qid(), category: 'structural', difficulty: 'medium',
      audience: ['human', 'machine'],
      question: 'Which 5 Helm charts produce the most Kubernetes resources? List them with their resource counts.',
      expected_answer: topByResources.map(c => `${c.chart.name} (${c.dir}): ${c.templates.resources.length}`).join('\n'),
      answer: topByResources.map(c => `${c.chart.name} (${c.dir}): ${c.templates.resources.length}`).join('\n'),
      answerType: 'ranked-list',
      source: 'template scanning',
      source_entity: topByResources.map(c => c.chart.name).join(', ')
    });
  }

  // Largest subsystem
  const sortedSubs = [...subs.subsystems].sort((a, b) => b.files.length - a.files.length);
  if (sortedSubs.length > 0) {
    questions.push({
      id: qid(), category: 'structural', difficulty: 'easy',
      audience: ['human'],
      question: 'Which subsystem contains the most files, and how many?',
      expected_answer: `${sortedSubs[0].name}: ${sortedSubs[0].files.length} files`,
      answer: `${sortedSubs[0].name}: ${sortedSubs[0].files.length} files`,
      answerType: 'exact',
      source: 'subsystem aggregation',
      source_entity: sortedSubs[0].name
    });
  }

  // ─── Category 2: Dependencies ───

  const chartsWithDeps = charts.filter(c => c.chart.dependencies && c.chart.dependencies.length > 0);
  for (const c of chartsWithDeps.slice(0, 5)) {
    questions.push({
      id: qid(), category: 'dependency', difficulty: 'medium',
      audience: ['human', 'machine'],
      question: `What are the dependencies of the "${c.chart.name}" chart (at ${c.dir})?`,
      expected_answer: c.chart.dependencies.map(d => `${d.name} (${d.version})${d.condition ? ` [condition: ${d.condition}]` : ''}`).join('\n'),
      answer: c.chart.dependencies.map(d => `${d.name} (${d.version})${d.condition ? ` [condition: ${d.condition}]` : ''}`).join('\n'),
      answerType: 'list',
      source: `${c.dir}/Chart.yaml`,
      source_entity: c.chart.name
    });
  }

  // ─── Category 3: Contracts (shared secrets/configs) ───

  const configUsers = {};
  for (const c of charts) {
    for (const i of (c.interactions || [])) {
      if (i.type === 'config-ref') {
        if (!configUsers[i.target]) configUsers[i.target] = [];
        configUsers[i.target].push(c.chart.name);
      }
    }
  }
  const sharedSecrets = Object.entries(configUsers).filter(([, users]) => users.length > 1);
  if (sharedSecrets.length > 0) {
    questions.push({
      id: qid(), category: 'contract', difficulty: 'medium',
      audience: ['human', 'machine'],
      question: 'Which secrets or ConfigMaps are shared across multiple Helm charts? List each with the charts that use it.',
      expected_answer: sharedSecrets.map(([name, users]) => `${name}: ${[...new Set(users)].join(', ')}`).join('\n'),
      answer: sharedSecrets.map(([name, users]) => `${name}: ${[...new Set(users)].join(', ')}`).join('\n'),
      answerType: 'list',
      source: 'template interaction scanning',
      source_entity: sharedSecrets.map(([n]) => n).join(', ')
    });
  }

  // ─── Category 4: Configuration Surface ───

  // Top charts by config surface (most values.yaml keys)
  const chartsByValues = [...charts]
    .filter(c => c.values && c.values.keys && c.values.keys.length > 5)
    .sort((a, b) => b.values.keys.length - a.values.keys.length);

  for (const chart of chartsByValues.slice(0, 5)) {
    questions.push({
      id: qid(), category: 'configuration', difficulty: 'easy',
      audience: ['machine'],
      question: `How many top-level configuration keys does the ${chart.chart.name} chart (at ${chart.dir}) expose in its values.yaml?`,
      expected_answer: String(chart.values.keys.length),
      answer: String(chart.values.keys.length),
      answerType: 'exact',
      source: `${chart.dir}/values.yaml`,
      source_entity: chart.chart.name
    });
  }

  // ─── Category 5: Interactions (service-to-service) ───

  const svcRefs = [];
  for (const c of charts) {
    for (const i of (c.interactions || [])) {
      if (i.type === 'k8s-service') {
        svcRefs.push({ from: c.chart.name, dir: c.dir, to: i.target });
      }
    }
  }
  if (svcRefs.length > 0) {
    questions.push({
      id: qid(), category: 'interaction', difficulty: 'medium',
      audience: ['human', 'machine'],
      question: 'Which Helm charts reference Kubernetes services from other components? List each chart and the service it calls.',
      expected_answer: svcRefs.map(r => `${r.from} (${r.dir}) → ${r.to}`).join('\n'),
      answer: svcRefs.map(r => `${r.from} (${r.dir}) → ${r.to}`).join('\n'),
      answerType: 'list',
      source: 'template interaction scanning',
      source_entity: svcRefs.map(r => r.from).join(', ')
    });
  }

  // Shared ports
  const portMap = {};
  for (const c of charts) {
    for (const i of (c.interactions || [])) {
      if (i.type === 'port' && i.target !== '0') {
        if (!portMap[i.target]) portMap[i.target] = [];
        if (!portMap[i.target].includes(c.chart.name)) portMap[i.target].push(c.chart.name);
      }
    }
  }
  const sharedPorts = Object.entries(portMap).filter(([, users]) => users.length > 1);
  if (sharedPorts.length > 0) {
    questions.push({
      id: qid(), category: 'interaction', difficulty: 'hard',
      audience: ['human', 'machine'],
      question: 'Which network ports are used by multiple Helm charts? List each port and the charts that expose it.',
      expected_answer: sharedPorts.sort((a, b) => Number(a[0]) - Number(b[0])).map(([port, users]) => `Port ${port}: ${users.join(', ')}`).join('\n'),
      answer: sharedPorts.sort((a, b) => Number(a[0]) - Number(b[0])).map(([port, users]) => `Port ${port}: ${users.join(', ')}`).join('\n'),
      answerType: 'list',
      source: 'template port scanning',
      source_entity: 'shared ports'
    });
  }

  // ─── Category 6: Resource Types ───

  const kindCounts = {};
  for (const c of charts) {
    for (const r of (c.templates?.resources || [])) {
      kindCounts[r.kind] = (kindCounts[r.kind] || 0) + 1;
    }
  }
  const topKinds = Object.entries(kindCounts).sort((a, b) => b[1] - a[1]).slice(0, 10);
  if (topKinds.length > 0) {
    questions.push({
      id: qid(), category: 'resource', difficulty: 'medium',
      audience: ['human', 'machine'],
      question: 'What are the most common Kubernetes resource types generated across all Helm charts?',
      expected_answer: topKinds.map(([kind, count]) => `${kind}: ${count}`).join('\n'),
      answer: topKinds.map(([kind, count]) => `${kind}: ${count}`).join('\n'),
      answerType: 'ranked-list',
      source: 'template resource scanning',
      source_entity: 'all charts'
    });
  }

  // Per-chart resource breakdown for top 3 charts by resource count
  for (const chart of topByResources.slice(0, 3)) {
    const kinds = {};
    for (const r of chart.templates.resources) {
      kinds[r.kind] = (kinds[r.kind] || 0) + 1;
    }
    questions.push({
      id: qid(), category: 'resource', difficulty: 'medium',
      audience: ['human', 'machine'],
      question: `What Kubernetes resource types does the ${chart.chart.name} chart generate? List each type and count.`,
      expected_answer: Object.entries(kinds).sort((a, b) => b[1] - a[1]).map(([k, v]) => `${k}: ${v}`).join('\n'),
      answer: Object.entries(kinds).sort((a, b) => b[1] - a[1]).map(([k, v]) => `${k}: ${v}`).join('\n'),
      answerType: 'list',
      source: `${chart.dir}/templates/`,
      source_entity: chart.chart.name
    });
  }

  // ─── Category 7: Cross-Subsystem ───

  if (subs.crossCutting && subs.crossCutting.length > 0) {
    questions.push({
      id: qid(), category: 'cross-subsystem', difficulty: 'easy',
      audience: ['human'],
      question: 'Which subsystems are identified as cross-cutting concerns?',
      expected_answer: subs.crossCutting.join(', '),
      answer: subs.crossCutting.join(', '),
      answerType: 'list',
      source: 'subsystem aggregation',
      source_entity: subs.crossCutting.join(', ')
    });
  }

  // Dependency matrix questions
  if (subs.dependencyMatrix) {
    const heavyDeps = Object.entries(subs.dependencyMatrix)
      .filter(([, v]) => (v.calls + v.imports) > 3)
      .sort((a, b) => (b[1].calls + b[1].imports) - (a[1].calls + a[1].imports))
      .slice(0, 5);
    if (heavyDeps.length > 0) {
      const targetSub = heavyDeps[0][0].split('→')[1];
      const depsForTarget = heavyDeps.filter(([k]) => k.endsWith(`→${targetSub}`));
      if (depsForTarget.length > 0) {
        questions.push({
          id: qid(), category: 'cross-subsystem', difficulty: 'hard',
          audience: ['human', 'machine'],
          question: `Which subsystems depend on ${targetSub}, and how heavily (by call+import count)?`,
          expected_answer: depsForTarget.map(([k, v]) => `${k.split('→')[0]}: ${v.calls + v.imports}`).join('\n'),
          answer: depsForTarget.map(([k, v]) => `${k.split('→')[0]}: ${v.calls + v.imports}`).join('\n'),
          answerType: 'list',
          source: 'dependency matrix',
          source_entity: targetSub
        });
      }
    }
  }

  // ─── Category 8: Architectural ───

  // Empty subsystems (Helm-only)
  const emptySubs = subs.subsystems.filter(s => s.entities.functions === 0 && s.entities.modules === 0);
  if (emptySubs.length > 0) {
    questions.push({
      id: qid(), category: 'architectural', difficulty: 'hard',
      audience: ['human'],
      question: `The following subsystems have 0 detected functions and 0 modules: ${emptySubs.map(s => s.name).join(', ')}. Why might this be the case, and what do they actually contain?`,
      expected_answer: 'These subsystems primarily contain Helm charts with Go-templated YAML, Terraform HCL, and/or Crossplane compositions. The code analysis pipeline detects functions/modules from Python, Go, TypeScript, and shell scripts — but Helm templates use Go template syntax which does not produce traditional function/module entities. Their content is captured through the Helm chart extraction phase instead.',
      answer: 'These subsystems primarily contain Helm charts, Terraform, or Crossplane compositions rather than traditional code.',
      answerType: 'explanation',
      source: 'architectural analysis',
      source_entity: emptySubs.map(s => s.name).join(', ')
    });
  }

  // Chart version for top chart
  if (charts.length > 0) {
    const topChart = topByResources[0] || charts[0];
    questions.push({
      id: qid(), category: 'architectural', difficulty: 'easy',
      audience: ['human', 'machine'],
      question: `What is the current version and appVersion of the ${topChart.chart.name} Helm chart?`,
      expected_answer: `version: ${topChart.chart.version}, appVersion: ${topChart.chart.appVersion}`,
      answer: `version: ${topChart.chart.version}, appVersion: ${topChart.chart.appVersion}`,
      answerType: 'exact',
      source: `${topChart.dir}/Chart.yaml`,
      source_entity: topChart.chart.name
    });
  }

  // ─── Category 9: Scenario-Based ───

  // Secret rotation scenario
  if (sharedSecrets.length > 0) {
    const [secretName, secretUsers] = sharedSecrets[0];
    questions.push({
      id: qid(), category: 'scenario', difficulty: 'hard',
      audience: ['human'],
      question: `If you need to rotate the "${secretName}" shared secret, which Helm charts would be affected and need redeployment?`,
      expected_answer: [...new Set(secretUsers)].join(', '),
      answer: [...new Set(secretUsers)].join(', '),
      answerType: 'list',
      source: 'template interaction scanning',
      source_entity: secretName
    });
  }

  // Deployment scenario for top chart
  if (topByResources.length > 0) {
    const chart = topByResources[0];
    questions.push({
      id: qid(), category: 'scenario', difficulty: 'hard',
      audience: ['human'],
      question: `A new engineer needs to deploy the ${chart.chart.name} application. What charts, configuration values, and external dependencies should they understand first?`,
      expected_answer: `Chart: ${chart.chart.name} (${chart.dir}), Version: ${chart.chart.version}\nKey values: ${(chart.values?.keys || []).slice(0, 10).map(k => k.name).join(', ')}${chart.values?.keys?.length > 10 ? ` (+${chart.values.keys.length - 10} more)` : ''}\nResources generated: ${chart.templates.resources.length} K8s resources\nInteractions: ${(chart.interactions || []).map(i => `${i.type}: ${i.target}`).join(', ') || 'none detected'}`,
      answer: `Chart: ${chart.chart.name} (${chart.dir}), Version: ${chart.chart.version}`,
      answerType: 'explanation',
      source: `${chart.dir}`,
      source_entity: chart.chart.name
    });
  }

  return questions;
}

module.exports = { generateQuestions };

if (require.main === module) {
  const args = process.argv.slice(2).filter(a => !a.startsWith('-'));
  const snapshotPath = args[0];
  const srcRoot = args[1];
  const outFile = args[2] || null;

  if (!snapshotPath || !srcRoot) {
    console.error('Usage: node eval-generator.js <snapshot.json> <repo-root> [output.json] [--dry-run]');
    process.exit(1);
  }

  if (!fs.existsSync(snapshotPath)) {
    console.error(`Snapshot not found: ${snapshotPath}`);
    process.exit(1);
  }

  const questions = generateQuestions(snapshotPath, srcRoot);

  // Summary
  const cats = {};
  for (const q of questions) {
    cats[q.category] = (cats[q.category] || 0) + 1;
  }

  console.log(`Generated ${questions.length} questions:`);
  for (const [cat, count] of Object.entries(cats).sort((a, b) => b[1] - a[1])) {
    console.log(`  ${cat}: ${count}`);
  }
  console.log(`Difficulty: easy=${questions.filter(q => q.difficulty === 'easy').length}, medium=${questions.filter(q => q.difficulty === 'medium').length}, hard=${questions.filter(q => q.difficulty === 'hard').length}`);

  const result = { generated: new Date().toISOString(), count: questions.length, questions };
  const json = JSON.stringify(result, null, 2);

  if (outFile) {
    fs.writeFileSync(outFile, json);
    console.log(`\nWritten to ${outFile}`);
  } else {
    console.log(json);
  }
}