feat: confluence benchmark, pattern extractor, agent KB, UX spec
- extract-patterns.js: mines layered arch, ArgoCD appsets, cloud regions, CIDR allocations, naming conventions, sync waves, tech stack from code - agent-kb.js: token-efficient JSON rendering of same doc tree - eval-confluence-ref-questions.json: 32 reference-only benchmark questions - wiggum-v2.sh: Ralph Wiggum loop targeting confluence baseline (77.8%) - docs/human-ux-spec.md: BMad UX designer spec for human doc structure - Eval results: V2 at 28.7% vs confluence 77.8% baseline - Hub/spoke ownership now correctly extracted (95% on that question) - Naming conventions, regions, CIDRs surfaced in system-architecture.md
This commit is contained in:
67
wiggum-v2.sh
Executable file
67
wiggum-v2.sh
Executable file
@@ -0,0 +1,67 @@
|
||||
#!/bin/bash
|
||||
# Dev Intel V2 — Ralph Wiggum Loop (Confluence Benchmark)
|
||||
# Run pipeline → eval → check threshold → iterate
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
MAX_ITERATIONS=${1:-3}
|
||||
THRESHOLD=${2:-77} # Target is Confluence baseline score (77.8%)
|
||||
REPO_ROOT="${3:-/home/node/.openclaw/workspace/agents/max/foxtrot/}"
|
||||
CONFLUENCE_DIR="${4:-/home/node/.openclaw/workspace/agents/max/foxtrot/docs/confluence}"
|
||||
SNAPSHOT="./snapshots/foxtrot-clean.json"
|
||||
OUT_DIR="./foxtrot-docs"
|
||||
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
|
||||
QUESTIONS="$SCRIPT_DIR/eval-confluence-ref-questions.json"
|
||||
|
||||
export PATH="/home/node/.local/bin:$PATH"
|
||||
|
||||
echo "🔁 Ralph Wiggum Loop (V2) — max $MAX_ITERATIONS iterations, target $THRESHOLD%"
|
||||
echo "Benchmark: Confluence Gold Standard ($QUESTIONS)"
|
||||
echo ""
|
||||
|
||||
for i in $(seq 1 $MAX_ITERATIONS); do
|
||||
echo "=== Iteration $i/$MAX_ITERATIONS ==="
|
||||
|
||||
# 1. Generate Docs
|
||||
echo "📝 Running V2 pipeline..."
|
||||
node "$SCRIPT_DIR/sysdoc.js" "$SNAPSHOT" "$REPO_ROOT" "$OUT_DIR" --prose 2>&1 | tail -n 15
|
||||
|
||||
# 2. Evaluate
|
||||
echo "📊 Running agent file-browsing eval against Confluence questions..."
|
||||
EVAL_OUT="$SCRIPT_DIR/eval-wiggum-v2-iter-$i.json"
|
||||
|
||||
# Run the eval (haiku for speed)
|
||||
LLM_MODEL="claude-haiku-4.5" node "$SCRIPT_DIR/eval-agent.js" "$OUT_DIR" "$QUESTIONS" "$EVAL_OUT"
|
||||
|
||||
# 3. Check score
|
||||
if [ -f "$EVAL_OUT" ]; then
|
||||
SCORE=$(node -e "
|
||||
const r = require('$EVAL_OUT');
|
||||
console.log(Math.round(r.overallScore || 0));
|
||||
" 2>/dev/null || echo "0")
|
||||
else
|
||||
SCORE=0
|
||||
fi
|
||||
|
||||
echo ""
|
||||
echo "🏁 Iteration $i Score: ${SCORE}% (Target: ${THRESHOLD}%)"
|
||||
|
||||
if [ "$SCORE" -ge "$THRESHOLD" ]; then
|
||||
echo "✅ Target met or exceeded Confluence baseline! Exiting loop."
|
||||
exit 0
|
||||
fi
|
||||
|
||||
echo "❌ Below threshold. To iterate, we need a diagnosis and code fix step here."
|
||||
|
||||
# echo "🤖 Diagnosing failures and generating fix..."
|
||||
# node "$SCRIPT_DIR/wiggum-fix.js" "$EVAL_OUT" "$SCRIPT_DIR/sysdoc.js"
|
||||
|
||||
# if [ $? -ne 0 ]; then
|
||||
# echo "⚠️ Fix failed to apply. Breaking loop."
|
||||
# break
|
||||
# fi
|
||||
|
||||
# echo "✅ Fix applied! Proceeding to next iteration..."
|
||||
# echo ""
|
||||
break
|
||||
done
|
||||
Reference in New Issue
Block a user