Implement BMad Must-Have Before Launch fixes for all 6 products

P1: API key redaction, SSE billing leak, token math edge cases, CI runner config
P2: mTLS revocation lockout, terraform state lock recovery, RLS pool leak, entropy scrubber, pgmq visibility
P3: HMAC replay prevention, cross-tenant negative tests, correlation window edge cases, SQS claim-check, free tier
P4: Discovery partial failure recovery, ownership conflict integration test, VCR freshness CI, Meilisearch rebuild, Cmd+K latency
P5: Concurrent baseline conflicts, remediation RBAC, Clock interface for governance, 10K property-based runs, Redis panic fallback
P6: Cryptographic agent update signatures, streaming audit logs with WAL, shell AST parsing (mvdan/sh), intervention deadlock TTL, canary suite CI gate
This commit is contained in:
2026-03-01 02:14:04 +00:00
parent b24cfa7c0d
commit d038cd9c5c
6 changed files with 1305 additions and 0 deletions

View File

@@ -2551,3 +2551,167 @@ async fn panic_mode_allowed_for_owner_role() {
```
*End of P1 Review Remediation Addendum*
---
## 12. BMad Review Implementation (Must-Have Before Launch)
### 12.1 API Key Redaction in Panic Traces
```rust
// tests/security/key_redaction_test.rs
#[test]
fn panic_handler_redacts_bearer_tokens_from_stack_trace() {
// Simulate a panic inside the proxy handler while processing a request
// with Authorization: Bearer sk-live-abc123
let result = std::panic::catch_unwind(|| {
process_request_with_panic("Bearer sk-live-abc123");
});
assert!(result.is_err());
// Capture the panic message
let panic_msg = get_last_panic_message();
assert!(!panic_msg.contains("sk-live-abc123"),
"Panic trace contains raw API key!");
assert!(panic_msg.contains("[REDACTED]") || !panic_msg.contains("sk-"));
}
#[test]
fn error_log_redacts_provider_api_keys() {
// Simulate an upstream error that includes the provider key in the response
let error_body = r#"{"error": "Invalid API key: sk-proj-abc123xyz"}"#;
let sanitized = redact_sensitive_fields(error_body);
assert!(!sanitized.contains("sk-proj-abc123xyz"));
}
#[test]
fn telemetry_event_never_contains_raw_api_key() {
let event = create_telemetry_event("Bearer sk-live-secret", "gpt-4o", 100, 50);
let serialized = serde_json::to_string(&event).unwrap();
assert!(!serialized.contains("sk-live-secret"));
assert!(!serialized.contains("Bearer"));
}
```
### 12.2 SSE Billing Leak Prevention (Expanded)
```rust
#[tokio::test]
async fn sse_disconnect_bills_only_streamed_tokens() {
let stack = E2EStack::start().await;
let mock = stack.mock_provider();
// Provider will stream 100 tokens at 1/sec
mock.configure_slow_stream(100, Duration::from_millis(100));
// Client reads 10 tokens then disconnects
let mut stream = stack.proxy_stream(&chat_request_streaming()).await;
let mut received = 0;
while let Some(chunk) = stream.next().await {
received += count_tokens_in_chunk(&chunk);
if received >= 10 { break; }
}
drop(stream);
tokio::time::sleep(Duration::from_millis(500)).await;
// Billing must reflect only streamed tokens
let usage = stack.get_last_usage_record().await;
assert!(usage.completion_tokens <= 15, // small buffer for in-flight
"Billed {} tokens but only streamed ~10", usage.completion_tokens);
// Provider connection must be aborted
assert_eq!(mock.active_connections(), 0);
}
#[tokio::test]
async fn sse_disconnect_during_prompt_processing_bills_zero_completion() {
// Client disconnects before any completion tokens are generated
// (provider is still processing the prompt)
let stack = E2EStack::start().await;
let mock = stack.mock_provider();
mock.configure_delay_before_first_token(Duration::from_secs(5));
let stream = stack.proxy_stream(&chat_request_streaming()).await;
tokio::time::sleep(Duration::from_millis(100)).await;
drop(stream); // Disconnect before first token
let usage = stack.get_last_usage_record().await;
assert_eq!(usage.completion_tokens, 0);
// Prompt tokens may still be billed (provider processed them)
}
```
### 12.3 Token Calculation Edge Cases
```rust
#[test]
fn tokenizer_handles_unicode_emoji_correctly() {
// cl100k_base tokenizes emoji differently than ASCII
let text = "Hello 🌍🔥 world";
let tokens = count_tokens_cl100k(text);
assert!(tokens > 3); // Emoji take multiple tokens
}
#[test]
fn tokenizer_handles_cjk_characters() {
let text = "你好世界";
let tokens = count_tokens_cl100k(text);
assert!(tokens >= 4); // Each CJK char is typically 1+ tokens
}
#[test]
fn cost_calculation_matches_provider_billing() {
// Property test: our token count * rate must match what the provider reports
// within a 1% tolerance (tokenizer version differences)
fc::assert(fc::property(
fc::string_of(fc::any::<char>(), 1..1000),
|text| {
let our_count = count_tokens_cl100k(&text);
let provider_count = mock_provider_token_count(&text);
let diff = (our_count as f64 - provider_count as f64).abs();
diff / provider_count as f64 <= 0.01
}
));
}
#[test]
fn anthropic_tokenizer_differs_from_openai() {
// Same text, different token counts — billing must use the correct tokenizer
let text = "The quick brown fox jumps over the lazy dog";
let openai_tokens = count_tokens_cl100k(text);
let anthropic_tokens = count_tokens_claude(text);
// They WILL differ — verify we use the right one per provider
assert_ne!(openai_tokens, anthropic_tokens);
}
```
### 12.4 Dedicated CI Runner for Latency Benchmarks
```yaml
# .github/workflows/benchmark.yml
# Runs on self-hosted runner (Brian's NAS) — not shared GitHub Actions
name: Latency Benchmark
on:
push:
branches: [main]
paths: ['src/proxy/**']
jobs:
benchmark:
runs-on: self-hosted # Brian's NAS with consistent CPU
steps:
- uses: actions/checkout@v4
- name: Run proxy latency benchmark
run: cargo bench --bench proxy_latency
- name: Assert P99 < 5ms
run: |
P99=$(cat target/criterion/proxy_overhead/new/estimates.json | jq '.median.point_estimate')
if (( $(echo "$P99 > 5000000" | bc -l) )); then
echo "P99 latency ${P99}ns exceeds 5ms budget"
exit 1
fi
```
*End of P1 BMad Implementation*