Implement BMad Must-Have Before Launch fixes for all 6 products

P1: API key redaction, SSE billing leak, token math edge cases, CI runner config P2: mTLS revocation lockout, terraform state lock recovery, RLS pool leak, entropy scrubber, pgmq visibility P3: HMAC replay prevention, cross-tenant negative tests, correlation window edge cases, SQS claim-check, free tier P4: Discovery partial failure recovery, ownership conflict integration test, VCR freshness CI, Meilisearch rebuild, Cmd+K latency P5: Concurrent baseline conflicts, remediation RBAC, Clock interface for governance, 10K property-based runs, Redis panic fallback P6: Cryptographic agent update signatures, streaming audit logs with WAL, shell AST parsing (mvdan/sh), intervention deadlock TTL, canary suite CI gate
2026-03-01 02:14:04 +00:00
parent b24cfa7c0d
commit d038cd9c5c
6 changed files with 1305 additions and 0 deletions
--- a/products/01-llm-cost-router/test-architecture/test-architecture.md
+++ b/products/01-llm-cost-router/test-architecture/test-architecture.md
@@ -2551,3 +2551,167 @@ async fn panic_mode_allowed_for_owner_role() {
 ```

 *End of P1 Review Remediation Addendum*
+
+---
+
+## 12. BMad Review Implementation (Must-Have Before Launch)
+
+### 12.1 API Key Redaction in Panic Traces
+
+```rust
+// tests/security/key_redaction_test.rs
+
+#[test]
+fn panic_handler_redacts_bearer_tokens_from_stack_trace() {
+    // Simulate a panic inside the proxy handler while processing a request
+    // with Authorization: Bearer sk-live-abc123
+    let result = std::panic::catch_unwind(|| {
+        process_request_with_panic("Bearer sk-live-abc123");
+    });
+    assert!(result.is_err());
+    
+    // Capture the panic message
+    let panic_msg = get_last_panic_message();
+    assert!(!panic_msg.contains("sk-live-abc123"),
+        "Panic trace contains raw API key!");
+    assert!(panic_msg.contains("[REDACTED]") || !panic_msg.contains("sk-"));
+}
+
+#[test]
+fn error_log_redacts_provider_api_keys() {
+    // Simulate an upstream error that includes the provider key in the response
+    let error_body = r#"{"error": "Invalid API key: sk-proj-abc123xyz"}"#;
+    let sanitized = redact_sensitive_fields(error_body);
+    assert!(!sanitized.contains("sk-proj-abc123xyz"));
+}
+
+#[test]
+fn telemetry_event_never_contains_raw_api_key() {
+    let event = create_telemetry_event("Bearer sk-live-secret", "gpt-4o", 100, 50);
+    let serialized = serde_json::to_string(&event).unwrap();
+    assert!(!serialized.contains("sk-live-secret"));
+    assert!(!serialized.contains("Bearer"));
+}
+```
+
+### 12.2 SSE Billing Leak Prevention (Expanded)
+
+```rust
+#[tokio::test]
+async fn sse_disconnect_bills_only_streamed_tokens() {
+    let stack = E2EStack::start().await;
+    let mock = stack.mock_provider();
+    
+    // Provider will stream 100 tokens at 1/sec
+    mock.configure_slow_stream(100, Duration::from_millis(100));
+    
+    // Client reads 10 tokens then disconnects
+    let mut stream = stack.proxy_stream(&chat_request_streaming()).await;
+    let mut received = 0;
+    while let Some(chunk) = stream.next().await {
+        received += count_tokens_in_chunk(&chunk);
+        if received >= 10 { break; }
+    }
+    drop(stream);
+    
+    tokio::time::sleep(Duration::from_millis(500)).await;
+    
+    // Billing must reflect only streamed tokens
+    let usage = stack.get_last_usage_record().await;
+    assert!(usage.completion_tokens <= 15, // small buffer for in-flight
+        "Billed {} tokens but only streamed ~10", usage.completion_tokens);
+    
+    // Provider connection must be aborted
+    assert_eq!(mock.active_connections(), 0);
+}
+
+#[tokio::test]
+async fn sse_disconnect_during_prompt_processing_bills_zero_completion() {
+    // Client disconnects before any completion tokens are generated
+    // (provider is still processing the prompt)
+    let stack = E2EStack::start().await;
+    let mock = stack.mock_provider();
+    mock.configure_delay_before_first_token(Duration::from_secs(5));
+    
+    let stream = stack.proxy_stream(&chat_request_streaming()).await;
+    tokio::time::sleep(Duration::from_millis(100)).await;
+    drop(stream); // Disconnect before first token
+    
+    let usage = stack.get_last_usage_record().await;
+    assert_eq!(usage.completion_tokens, 0);
+    // Prompt tokens may still be billed (provider processed them)
+}
+```
+
+### 12.3 Token Calculation Edge Cases
+
+```rust
+#[test]
+fn tokenizer_handles_unicode_emoji_correctly() {
+    // cl100k_base tokenizes emoji differently than ASCII
+    let text = "Hello 🌍🔥 world";
+    let tokens = count_tokens_cl100k(text);
+    assert!(tokens > 3); // Emoji take multiple tokens
+}
+
+#[test]
+fn tokenizer_handles_cjk_characters() {
+    let text = "你好世界";
+    let tokens = count_tokens_cl100k(text);
+    assert!(tokens >= 4); // Each CJK char is typically 1+ tokens
+}
+
+#[test]
+fn cost_calculation_matches_provider_billing() {
+    // Property test: our token count * rate must match what the provider reports
+    // within a 1% tolerance (tokenizer version differences)
+    fc::assert(fc::property(
+        fc::string_of(fc::any::<char>(), 1..1000),
+        |text| {
+            let our_count = count_tokens_cl100k(&text);
+            let provider_count = mock_provider_token_count(&text);
+            let diff = (our_count as f64 - provider_count as f64).abs();
+            diff / provider_count as f64 <= 0.01
+        }
+    ));
+}
+
+#[test]
+fn anthropic_tokenizer_differs_from_openai() {
+    // Same text, different token counts — billing must use the correct tokenizer
+    let text = "The quick brown fox jumps over the lazy dog";
+    let openai_tokens = count_tokens_cl100k(text);
+    let anthropic_tokens = count_tokens_claude(text);
+    // They WILL differ — verify we use the right one per provider
+    assert_ne!(openai_tokens, anthropic_tokens);
+}
+```
+
+### 12.4 Dedicated CI Runner for Latency Benchmarks
+
+```yaml
+# .github/workflows/benchmark.yml
+# Runs on self-hosted runner (Brian's NAS) — not shared GitHub Actions
+name: Latency Benchmark
+on:
+  push:
+    branches: [main]
+    paths: ['src/proxy/**']
+
+jobs:
+  benchmark:
+    runs-on: self-hosted  # Brian's NAS with consistent CPU
+    steps:
+      - uses: actions/checkout@v4
+      - name: Run proxy latency benchmark
+        run: cargo bench --bench proxy_latency
+      - name: Assert P99 < 5ms
+        run: |
+          P99=$(cat target/criterion/proxy_overhead/new/estimates.json | jq '.median.point_estimate')
+          if (( $(echo "$P99 > 5000000" | bc -l) )); then
+            echo "P99 latency ${P99}ns exceeds 5ms budget"
+            exit 1
+          fi
+```
+
+*End of P1 BMad Implementation*