feat: confluence benchmark, pattern extractor, agent KB, UX spec

- extract-patterns.js: mines layered arch, ArgoCD appsets, cloud regions, CIDR allocations, naming conventions, sync waves, tech stack from code - agent-kb.js: token-efficient JSON rendering of same doc tree - eval-confluence-ref-questions.json: 32 reference-only benchmark questions - wiggum-v2.sh: Ralph Wiggum loop targeting confluence baseline (77.8%) - docs/human-ux-spec.md: BMad UX designer spec for human doc structure - Eval results: V2 at 28.7% vs confluence 77.8% baseline - Hub/spoke ownership now correctly extracted (95% on that question) - Naming conventions, regions, CIDRs surfaced in system-architecture.md
2026-03-10 14:20:35 +00:00
parent 049609a358
commit 0265ec7a60
844 changed files with 2129910 additions and 30 deletions
--- a/wiggum-v2.sh
+++ b/wiggum-v2.sh
@@ -0,0 +1,67 @@
+#!/bin/bash
+# Dev Intel V2 — Ralph Wiggum Loop (Confluence Benchmark)
+# Run pipeline → eval → check threshold → iterate
+
+set -euo pipefail
+
+MAX_ITERATIONS=${1:-3}
+THRESHOLD=${2:-77} # Target is Confluence baseline score (77.8%)
+REPO_ROOT="${3:-/home/node/.openclaw/workspace/agents/max/foxtrot/}"
+CONFLUENCE_DIR="${4:-/home/node/.openclaw/workspace/agents/max/foxtrot/docs/confluence}"
+SNAPSHOT="./snapshots/foxtrot-clean.json"
+OUT_DIR="./foxtrot-docs"
+SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
+QUESTIONS="$SCRIPT_DIR/eval-confluence-ref-questions.json"
+
+export PATH="/home/node/.local/bin:$PATH"
+
+echo "🔁 Ralph Wiggum Loop (V2) — max $MAX_ITERATIONS iterations, target $THRESHOLD%"
+echo "Benchmark: Confluence Gold Standard ($QUESTIONS)"
+echo ""
+
+for i in $(seq 1 $MAX_ITERATIONS); do
+  echo "=== Iteration $i/$MAX_ITERATIONS ==="
+
+  # 1. Generate Docs
+  echo "📝 Running V2 pipeline..."
+  node "$SCRIPT_DIR/sysdoc.js" "$SNAPSHOT" "$REPO_ROOT" "$OUT_DIR" --prose 2>&1 | tail -n 15
+
+  # 2. Evaluate
+  echo "📊 Running agent file-browsing eval against Confluence questions..."
+  EVAL_OUT="$SCRIPT_DIR/eval-wiggum-v2-iter-$i.json"
+  
+  # Run the eval (haiku for speed)
+  LLM_MODEL="claude-haiku-4.5" node "$SCRIPT_DIR/eval-agent.js" "$OUT_DIR" "$QUESTIONS" "$EVAL_OUT"
+
+  # 3. Check score
+  if [ -f "$EVAL_OUT" ]; then
+    SCORE=$(node -e "
+      const r = require('$EVAL_OUT');
+      console.log(Math.round(r.overallScore || 0));
+    " 2>/dev/null || echo "0")
+  else
+    SCORE=0
+  fi
+
+  echo ""
+  echo "🏁 Iteration $i Score: ${SCORE}% (Target: ${THRESHOLD}%)"
+
+  if [ "$SCORE" -ge "$THRESHOLD" ]; then
+    echo "✅ Target met or exceeded Confluence baseline! Exiting loop."
+    exit 0
+  fi
+
+  echo "❌ Below threshold. To iterate, we need a diagnosis and code fix step here."
+  
+  # echo "🤖 Diagnosing failures and generating fix..."
+  # node "$SCRIPT_DIR/wiggum-fix.js" "$EVAL_OUT" "$SCRIPT_DIR/sysdoc.js"
+  
+  # if [ $? -ne 0 ]; then
+  #   echo "⚠️  Fix failed to apply. Breaking loop."
+  #   break
+  # fi
+  
+  # echo "✅ Fix applied! Proceeding to next iteration..."
+  # echo ""
+  break
+done