#!/bin/bash # Dev Intel V2 — Ralph Wiggum Loop (Confluence Benchmark) # Run pipeline → eval → check threshold → iterate set -euo pipefail MAX_ITERATIONS=${1:-3} THRESHOLD=${2:-77} # Target is Confluence baseline score (77.8%) REPO_ROOT="${3:-/home/node/.openclaw/workspace/agents/max/foxtrot/}" CONFLUENCE_DIR="${4:-/home/node/.openclaw/workspace/agents/max/foxtrot/docs/confluence}" SNAPSHOT="./snapshots/foxtrot-clean.json" OUT_DIR="./foxtrot-docs" SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)" QUESTIONS="$SCRIPT_DIR/eval-generated-questions.json" export PATH="/home/node/.local/bin:$PATH" echo "🔁 Ralph Wiggum Loop (V2) — max $MAX_ITERATIONS iterations, target $THRESHOLD%" echo "Benchmark: Generated Questions ($QUESTIONS)" echo "" for i in $(seq 1 $MAX_ITERATIONS); do echo "=== Iteration $i/$MAX_ITERATIONS ===" # 1. Generate Docs echo "📝 Running V2 pipeline..." LLM_MODEL="claude-haiku-4.5" node "$SCRIPT_DIR/sysdoc.js" "$SNAPSHOT" "$REPO_ROOT" "$OUT_DIR" --prose 2>&1 | tail -n 25 # 1.5 Generate Questions for Eval echo "🤖 Generating ground truth questions for eval..." node "$SCRIPT_DIR/eval-generator.js" "$SNAPSHOT" "$REPO_ROOT" "$QUESTIONS" # 2. Evaluate echo "📊 Running agent file-browsing eval against generated questions..." EVAL_OUT="$SCRIPT_DIR/eval-wiggum-v2-iter-$i.json" # Run the eval (haiku for speed) LLM_MODEL="claude-haiku-4.5" node "$SCRIPT_DIR/eval-agent.js" "$OUT_DIR" "$QUESTIONS" "$EVAL_OUT" # 3. Check score if [ -f "$EVAL_OUT" ]; then SCORE=$(node -e " const r = require('$EVAL_OUT'); console.log(Math.round(r.overallScore || 0)); " 2>/dev/null || echo "0") else SCORE=0 fi echo "" echo "🏁 Iteration $i Score: ${SCORE}% (Target: ${THRESHOLD}%)" if [ "$SCORE" -ge "$THRESHOLD" ]; then echo "✅ Target met or exceeded baseline! Exiting loop." exit 0 fi echo "❌ Below threshold. To iterate, we need a diagnosis and code fix step here." break done