feat: confluence benchmark, pattern extractor, agent KB, UX spec
- extract-patterns.js: mines layered arch, ArgoCD appsets, cloud regions, CIDR allocations, naming conventions, sync waves, tech stack from code - agent-kb.js: token-efficient JSON rendering of same doc tree - eval-confluence-ref-questions.json: 32 reference-only benchmark questions - wiggum-v2.sh: Ralph Wiggum loop targeting confluence baseline (77.8%) - docs/human-ux-spec.md: BMad UX designer spec for human doc structure - Eval results: V2 at 28.7% vs confluence 77.8% baseline - Hub/spoke ownership now correctly extracted (95% on that question) - Naming conventions, regions, CIDRs surfaced in system-architecture.md
This commit is contained in:
63
wiggum.sh
Executable file
63
wiggum.sh
Executable file
@@ -0,0 +1,63 @@
|
||||
#!/bin/bash
|
||||
# Dev Intel V3 — Ralph Wiggum Loop
|
||||
# Run pipeline → eval → check threshold → iterate
|
||||
# Named after the well-known agentic pattern: loop until objective criteria met.
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
MAX_ITERATIONS=${1:-3}
|
||||
THRESHOLD=${2:-90}
|
||||
REPO_ROOT="${3:-/home/node/.openclaw/workspace/agents/max/foxtrot/}"
|
||||
OUT_DIR="./foxtrot-docs-v3"
|
||||
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
|
||||
|
||||
export PATH="/home/node/.local/bin:$PATH"
|
||||
|
||||
echo "🔁 Ralph Wiggum Loop — max $MAX_ITERATIONS iterations, target $THRESHOLD%"
|
||||
echo ""
|
||||
|
||||
for i in $(seq 1 $MAX_ITERATIONS); do
|
||||
echo "=== Iteration $i/$MAX_ITERATIONS ==="
|
||||
|
||||
# 1. Generate
|
||||
echo "📝 Running V3 pipeline..."
|
||||
node "$SCRIPT_DIR/pipeline-v3.js" "$REPO_ROOT" "$OUT_DIR" 2>&1 | tail -10
|
||||
|
||||
# 2. Evaluate
|
||||
echo "📊 Running promptfoo eval..."
|
||||
EVAL_OUT="$SCRIPT_DIR/eval-wiggum-$i.json"
|
||||
npx --yes promptfoo@latest eval \
|
||||
-c "$SCRIPT_DIR/promptfoo.yaml" \
|
||||
-o "$EVAL_OUT" \
|
||||
--no-progress-bar 2>&1 | tail -5
|
||||
|
||||
# 3. Check score
|
||||
if [ -f "$EVAL_OUT" ]; then
|
||||
SCORE=$(node -e "
|
||||
const r = require('$EVAL_OUT');
|
||||
const results = r.results || r;
|
||||
if (results.stats) {
|
||||
const s = results.stats;
|
||||
const pct = Math.round((s.successes / (s.successes + s.failures)) * 100);
|
||||
console.log(pct);
|
||||
} else {
|
||||
console.log(0);
|
||||
}
|
||||
" 2>/dev/null || echo "0")
|
||||
else
|
||||
SCORE=0
|
||||
fi
|
||||
|
||||
echo "Score: ${SCORE}% (target: ${THRESHOLD}%)"
|
||||
|
||||
if [ "$SCORE" -ge "$THRESHOLD" ]; then
|
||||
echo "✅ Target met! Exiting."
|
||||
exit 0
|
||||
fi
|
||||
|
||||
echo "❌ Below threshold. Iterating..."
|
||||
echo ""
|
||||
done
|
||||
|
||||
echo "⚠️ Max iterations reached without hitting ${THRESHOLD}%."
|
||||
exit 1
|
||||
Reference in New Issue
Block a user