feat: repo-agnostic refactor (BMad spec-test-build loop)
- NEW: repo-profiler.js — deterministic archetype detection (Infra, Frontend, Backend, etc.) - NEW: extract-dynamic.js — generic extractor replacing hardcoded Foxtrot patterns - NEW: eval-generator.js — dynamic ground-truth question generation from any repo graph - NEW: specs/bmad-agnostic-refactor-spec.md — full BMad spec with acceptance criteria - REFACTORED: prose.js — two-pass LLM synthesis with rich context (shared secrets, ports, service refs) - REFACTORED: sysdoc.js — wired repo-profiler + extract-dynamic, --legacy escape hatch - REFACTORED: wiggum-v2.sh — uses eval-generator before benchmarks - FIXED: graph.js — _edgeSet rebuilt on loadSnapshot() (edge dedup was broken) - FIXED: graph.js — recursive sortKeys() for deep equality in diffing - FIXED: prose.js — robust JSON array extraction from LLM output - FIXED: ratchet.js — syntax validation (node --check) before saving LLM mutations - FIXED: extract-dynamic.js — centralized state services regex, added console.warn for silent failures - TESTS: test-eval-generator, test-repo-profiler, test-synthesis-quality + mock fixtures Eval: 81.5% on Foxtrot (fully repo-agnostic, no hardcoded reference pages) BMad reviews: Architect B+, Dev Lead B-, TEA B-
This commit is contained in:
25
wiggum-v2.sh
25
wiggum-v2.sh
@@ -11,12 +11,12 @@ CONFLUENCE_DIR="${4:-/home/node/.openclaw/workspace/agents/max/foxtrot/docs/conf
|
||||
SNAPSHOT="./snapshots/foxtrot-clean.json"
|
||||
OUT_DIR="./foxtrot-docs"
|
||||
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
|
||||
QUESTIONS="$SCRIPT_DIR/eval-confluence-ref-questions.json"
|
||||
QUESTIONS="$SCRIPT_DIR/eval-generated-questions.json"
|
||||
|
||||
export PATH="/home/node/.local/bin:$PATH"
|
||||
|
||||
echo "🔁 Ralph Wiggum Loop (V2) — max $MAX_ITERATIONS iterations, target $THRESHOLD%"
|
||||
echo "Benchmark: Confluence Gold Standard ($QUESTIONS)"
|
||||
echo "Benchmark: Generated Questions ($QUESTIONS)"
|
||||
echo ""
|
||||
|
||||
for i in $(seq 1 $MAX_ITERATIONS); do
|
||||
@@ -24,10 +24,14 @@ for i in $(seq 1 $MAX_ITERATIONS); do
|
||||
|
||||
# 1. Generate Docs
|
||||
echo "📝 Running V2 pipeline..."
|
||||
node "$SCRIPT_DIR/sysdoc.js" "$SNAPSHOT" "$REPO_ROOT" "$OUT_DIR" --prose 2>&1 | tail -n 15
|
||||
LLM_MODEL="claude-haiku-4.5" node "$SCRIPT_DIR/sysdoc.js" "$SNAPSHOT" "$REPO_ROOT" "$OUT_DIR" --prose 2>&1 | tail -n 25
|
||||
|
||||
# 1.5 Generate Questions for Eval
|
||||
echo "🤖 Generating ground truth questions for eval..."
|
||||
node "$SCRIPT_DIR/eval-generator.js" "$SNAPSHOT" "$REPO_ROOT" "$QUESTIONS"
|
||||
|
||||
# 2. Evaluate
|
||||
echo "📊 Running agent file-browsing eval against Confluence questions..."
|
||||
echo "📊 Running agent file-browsing eval against generated questions..."
|
||||
EVAL_OUT="$SCRIPT_DIR/eval-wiggum-v2-iter-$i.json"
|
||||
|
||||
# Run the eval (haiku for speed)
|
||||
@@ -47,21 +51,10 @@ for i in $(seq 1 $MAX_ITERATIONS); do
|
||||
echo "🏁 Iteration $i Score: ${SCORE}% (Target: ${THRESHOLD}%)"
|
||||
|
||||
if [ "$SCORE" -ge "$THRESHOLD" ]; then
|
||||
echo "✅ Target met or exceeded Confluence baseline! Exiting loop."
|
||||
echo "✅ Target met or exceeded baseline! Exiting loop."
|
||||
exit 0
|
||||
fi
|
||||
|
||||
echo "❌ Below threshold. To iterate, we need a diagnosis and code fix step here."
|
||||
|
||||
# echo "🤖 Diagnosing failures and generating fix..."
|
||||
# node "$SCRIPT_DIR/wiggum-fix.js" "$EVAL_OUT" "$SCRIPT_DIR/sysdoc.js"
|
||||
|
||||
# if [ $? -ne 0 ]; then
|
||||
# echo "⚠️ Fix failed to apply. Breaking loop."
|
||||
# break
|
||||
# fi
|
||||
|
||||
# echo "✅ Fix applied! Proceeding to next iteration..."
|
||||
# echo ""
|
||||
break
|
||||
done
|
||||
|
||||
Reference in New Issue
Block a user