Implement BMad Must-Have Before Launch fixes for all 6 products
P1: API key redaction, SSE billing leak, token math edge cases, CI runner config P2: mTLS revocation lockout, terraform state lock recovery, RLS pool leak, entropy scrubber, pgmq visibility P3: HMAC replay prevention, cross-tenant negative tests, correlation window edge cases, SQS claim-check, free tier P4: Discovery partial failure recovery, ownership conflict integration test, VCR freshness CI, Meilisearch rebuild, Cmd+K latency P5: Concurrent baseline conflicts, remediation RBAC, Clock interface for governance, 10K property-based runs, Redis panic fallback P6: Cryptographic agent update signatures, streaming audit logs with WAL, shell AST parsing (mvdan/sh), intervention deadlock TTL, canary suite CI gate
This commit is contained in:
@@ -2551,3 +2551,167 @@ async fn panic_mode_allowed_for_owner_role() {
|
||||
```
|
||||
|
||||
*End of P1 Review Remediation Addendum*
|
||||
|
||||
---
|
||||
|
||||
## 12. BMad Review Implementation (Must-Have Before Launch)
|
||||
|
||||
### 12.1 API Key Redaction in Panic Traces
|
||||
|
||||
```rust
|
||||
// tests/security/key_redaction_test.rs
|
||||
|
||||
#[test]
|
||||
fn panic_handler_redacts_bearer_tokens_from_stack_trace() {
|
||||
// Simulate a panic inside the proxy handler while processing a request
|
||||
// with Authorization: Bearer sk-live-abc123
|
||||
let result = std::panic::catch_unwind(|| {
|
||||
process_request_with_panic("Bearer sk-live-abc123");
|
||||
});
|
||||
assert!(result.is_err());
|
||||
|
||||
// Capture the panic message
|
||||
let panic_msg = get_last_panic_message();
|
||||
assert!(!panic_msg.contains("sk-live-abc123"),
|
||||
"Panic trace contains raw API key!");
|
||||
assert!(panic_msg.contains("[REDACTED]") || !panic_msg.contains("sk-"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn error_log_redacts_provider_api_keys() {
|
||||
// Simulate an upstream error that includes the provider key in the response
|
||||
let error_body = r#"{"error": "Invalid API key: sk-proj-abc123xyz"}"#;
|
||||
let sanitized = redact_sensitive_fields(error_body);
|
||||
assert!(!sanitized.contains("sk-proj-abc123xyz"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn telemetry_event_never_contains_raw_api_key() {
|
||||
let event = create_telemetry_event("Bearer sk-live-secret", "gpt-4o", 100, 50);
|
||||
let serialized = serde_json::to_string(&event).unwrap();
|
||||
assert!(!serialized.contains("sk-live-secret"));
|
||||
assert!(!serialized.contains("Bearer"));
|
||||
}
|
||||
```
|
||||
|
||||
### 12.2 SSE Billing Leak Prevention (Expanded)
|
||||
|
||||
```rust
|
||||
#[tokio::test]
|
||||
async fn sse_disconnect_bills_only_streamed_tokens() {
|
||||
let stack = E2EStack::start().await;
|
||||
let mock = stack.mock_provider();
|
||||
|
||||
// Provider will stream 100 tokens at 1/sec
|
||||
mock.configure_slow_stream(100, Duration::from_millis(100));
|
||||
|
||||
// Client reads 10 tokens then disconnects
|
||||
let mut stream = stack.proxy_stream(&chat_request_streaming()).await;
|
||||
let mut received = 0;
|
||||
while let Some(chunk) = stream.next().await {
|
||||
received += count_tokens_in_chunk(&chunk);
|
||||
if received >= 10 { break; }
|
||||
}
|
||||
drop(stream);
|
||||
|
||||
tokio::time::sleep(Duration::from_millis(500)).await;
|
||||
|
||||
// Billing must reflect only streamed tokens
|
||||
let usage = stack.get_last_usage_record().await;
|
||||
assert!(usage.completion_tokens <= 15, // small buffer for in-flight
|
||||
"Billed {} tokens but only streamed ~10", usage.completion_tokens);
|
||||
|
||||
// Provider connection must be aborted
|
||||
assert_eq!(mock.active_connections(), 0);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn sse_disconnect_during_prompt_processing_bills_zero_completion() {
|
||||
// Client disconnects before any completion tokens are generated
|
||||
// (provider is still processing the prompt)
|
||||
let stack = E2EStack::start().await;
|
||||
let mock = stack.mock_provider();
|
||||
mock.configure_delay_before_first_token(Duration::from_secs(5));
|
||||
|
||||
let stream = stack.proxy_stream(&chat_request_streaming()).await;
|
||||
tokio::time::sleep(Duration::from_millis(100)).await;
|
||||
drop(stream); // Disconnect before first token
|
||||
|
||||
let usage = stack.get_last_usage_record().await;
|
||||
assert_eq!(usage.completion_tokens, 0);
|
||||
// Prompt tokens may still be billed (provider processed them)
|
||||
}
|
||||
```
|
||||
|
||||
### 12.3 Token Calculation Edge Cases
|
||||
|
||||
```rust
|
||||
#[test]
|
||||
fn tokenizer_handles_unicode_emoji_correctly() {
|
||||
// cl100k_base tokenizes emoji differently than ASCII
|
||||
let text = "Hello 🌍🔥 world";
|
||||
let tokens = count_tokens_cl100k(text);
|
||||
assert!(tokens > 3); // Emoji take multiple tokens
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn tokenizer_handles_cjk_characters() {
|
||||
let text = "你好世界";
|
||||
let tokens = count_tokens_cl100k(text);
|
||||
assert!(tokens >= 4); // Each CJK char is typically 1+ tokens
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn cost_calculation_matches_provider_billing() {
|
||||
// Property test: our token count * rate must match what the provider reports
|
||||
// within a 1% tolerance (tokenizer version differences)
|
||||
fc::assert(fc::property(
|
||||
fc::string_of(fc::any::<char>(), 1..1000),
|
||||
|text| {
|
||||
let our_count = count_tokens_cl100k(&text);
|
||||
let provider_count = mock_provider_token_count(&text);
|
||||
let diff = (our_count as f64 - provider_count as f64).abs();
|
||||
diff / provider_count as f64 <= 0.01
|
||||
}
|
||||
));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn anthropic_tokenizer_differs_from_openai() {
|
||||
// Same text, different token counts — billing must use the correct tokenizer
|
||||
let text = "The quick brown fox jumps over the lazy dog";
|
||||
let openai_tokens = count_tokens_cl100k(text);
|
||||
let anthropic_tokens = count_tokens_claude(text);
|
||||
// They WILL differ — verify we use the right one per provider
|
||||
assert_ne!(openai_tokens, anthropic_tokens);
|
||||
}
|
||||
```
|
||||
|
||||
### 12.4 Dedicated CI Runner for Latency Benchmarks
|
||||
|
||||
```yaml
|
||||
# .github/workflows/benchmark.yml
|
||||
# Runs on self-hosted runner (Brian's NAS) — not shared GitHub Actions
|
||||
name: Latency Benchmark
|
||||
on:
|
||||
push:
|
||||
branches: [main]
|
||||
paths: ['src/proxy/**']
|
||||
|
||||
jobs:
|
||||
benchmark:
|
||||
runs-on: self-hosted # Brian's NAS with consistent CPU
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- name: Run proxy latency benchmark
|
||||
run: cargo bench --bench proxy_latency
|
||||
- name: Assert P99 < 5ms
|
||||
run: |
|
||||
P99=$(cat target/criterion/proxy_overhead/new/estimates.json | jq '.median.point_estimate')
|
||||
if (( $(echo "$P99 > 5000000" | bc -l) )); then
|
||||
echo "P99 latency ${P99}ns exceeds 5ms budget"
|
||||
exit 1
|
||||
fi
|
||||
```
|
||||
|
||||
*End of P1 BMad Implementation*
|
||||
|
||||
Reference in New Issue
Block a user