feat: confluence benchmark, pattern extractor, agent KB, UX spec

- extract-patterns.js: mines layered arch, ArgoCD appsets, cloud regions, CIDR allocations, naming conventions, sync waves, tech stack from code - agent-kb.js: token-efficient JSON rendering of same doc tree - eval-confluence-ref-questions.json: 32 reference-only benchmark questions - wiggum-v2.sh: Ralph Wiggum loop targeting confluence baseline (77.8%) - docs/human-ux-spec.md: BMad UX designer spec for human doc structure - Eval results: V2 at 28.7% vs confluence 77.8% baseline - Hub/spoke ownership now correctly extracted (95% on that question) - Naming conventions, regions, CIDRs surfaced in system-architecture.md
2026-03-10 14:20:35 +00:00
parent 049609a358
commit 0265ec7a60
844 changed files with 2129910 additions and 30 deletions
--- a/eval-agent.js
+++ b/eval-agent.js
@@ -144,7 +144,7 @@ Respond in EXACTLY this JSON format:
 /** Run the agent eval */
 async function runAgentEval(docsDir, questionsPath, llmOpts = {}) {
  const questionsData = JSON.parse(fs.readFileSync(questionsPath, 'utf8'));
-  const questions = questionsData.questions.filter(q => q.audience.includes('machine'));
+  const questions = questionsData.questions.filter(q => !q.audience || q.audience.includes('machine') || true);
  
  console.log(`Agent Eval: ${questions.length} machine-audience questions`);