Implement review remediation + PLG analytics SDK
- All 6 test architectures patched with Section 11 addendums - P5 (cost) fully rewritten from 232 to ~600 lines - PLG brainstorm + party mode advisory board results - Analytics SDK v2 (PostHog Cloud, Zod strict, Lambda-safe) - Analytics tests v2 (safeParse, no , no timestamp, no PII) - Addresses all Gemini review findings across P1-P6
This commit is contained in:
@@ -2239,3 +2239,315 @@ Before writing any new function, ask:
|
||||
*Test Architecture document generated for dd0c/route V1 MVP.*
|
||||
*Total estimated test count at V1 launch: ~400 tests.*
|
||||
*Target CI runtime: <8 minutes (unit + integration), <15 minutes (full pipeline with E2E).*
|
||||
|
||||
---
|
||||
|
||||
## 11. Review Remediation Addendum (Post-Gemini Review)
|
||||
|
||||
### 11.1 Replace MockKeyCache/MockKeyStore with Testcontainers
|
||||
|
||||
```rust
|
||||
// BEFORE (anti-pattern — mocks hide real latency):
|
||||
// let cache = MockKeyCache::new();
|
||||
// let store = MockKeyStore::new();
|
||||
|
||||
// AFTER: Use Testcontainers for hot-path auth tests
|
||||
#[tokio::test]
|
||||
async fn auth_middleware_validates_key_under_5ms_with_real_redis() {
|
||||
let redis = TestcontainersRedis::start().await;
|
||||
let pg = TestcontainersPostgres::start().await;
|
||||
let cache = RedisKeyCache::new(redis.connection_string());
|
||||
let store = PgKeyStore::new(pg.connection_string());
|
||||
|
||||
let start = Instant::now();
|
||||
let result = auth_middleware(&cache, &store, "sk-valid-key").await;
|
||||
assert!(start.elapsed() < Duration::from_millis(5));
|
||||
assert!(result.is_ok());
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn auth_middleware_handles_redis_connection_pool_exhaustion() {
|
||||
// Exhaust all connections, verify fallback to PG
|
||||
let redis = TestcontainersRedis::start().await;
|
||||
let cache = RedisKeyCache::with_pool_size(redis.connection_string(), 1);
|
||||
// Hold the single connection
|
||||
let _held = cache.raw_connection().await;
|
||||
// Auth must still work via PG fallback
|
||||
let result = auth_middleware(&cache, &pg_store, "sk-valid-key").await;
|
||||
assert!(result.is_ok());
|
||||
}
|
||||
```
|
||||
|
||||
### 11.2 Fix Encryption Test (Decrypt, Don't Just Assert Non-Plaintext)
|
||||
|
||||
```rust
|
||||
// BEFORE (anti-pattern — passes if stored as random garbage):
|
||||
// assert_ne!(stored.encrypted_key, b"sk-plaintext-key");
|
||||
|
||||
// AFTER: Full round-trip encryption test
|
||||
#[tokio::test]
|
||||
async fn provider_credential_encrypts_and_decrypts_correctly() {
|
||||
let kms = LocalStackKMS::start().await;
|
||||
let key_id = kms.create_key().await;
|
||||
let store = CredentialStore::new(pg.pool(), kms.client(), key_id);
|
||||
|
||||
let original = "sk-live-abc123xyz";
|
||||
store.save_credential("org-1", "openai", original).await.unwrap();
|
||||
|
||||
// Read raw from DB — must NOT be plaintext
|
||||
let raw = pg.query_raw("SELECT encrypted_key FROM credentials LIMIT 1").await;
|
||||
assert!(!String::from_utf8_lossy(&raw).contains(original));
|
||||
|
||||
// Decrypt via the store — must match original
|
||||
let decrypted = store.get_credential("org-1", "openai").await.unwrap();
|
||||
assert_eq!(decrypted, original);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn kms_key_rotation_old_deks_still_decrypt_old_credentials() {
|
||||
let kms = LocalStackKMS::start().await;
|
||||
let key_id = kms.create_key().await;
|
||||
let store = CredentialStore::new(pg.pool(), kms.client(), key_id);
|
||||
|
||||
// Save with original key
|
||||
store.save_credential("org-1", "openai", "sk-old").await.unwrap();
|
||||
|
||||
// Rotate KMS key
|
||||
kms.rotate_key(key_id).await;
|
||||
|
||||
// Old credential must still decrypt
|
||||
let decrypted = store.get_credential("org-1", "openai").await.unwrap();
|
||||
assert_eq!(decrypted, "sk-old");
|
||||
|
||||
// New credential uses new DEK
|
||||
store.save_credential("org-1", "anthropic", "sk-new").await.unwrap();
|
||||
let decrypted_new = store.get_credential("org-1", "anthropic").await.unwrap();
|
||||
assert_eq!(decrypted_new, "sk-new");
|
||||
}
|
||||
```
|
||||
|
||||
### 11.3 Slow Dependency Chaos Test
|
||||
|
||||
```rust
|
||||
#[tokio::test]
|
||||
async fn chaos_slow_db_does_not_block_proxy_hot_path() {
|
||||
let stack = E2EStack::start().await;
|
||||
|
||||
// Inject 5-second network delay on TimescaleDB port via tc netem
|
||||
stack.inject_latency("timescaledb", Duration::from_secs(5)).await;
|
||||
|
||||
// Proxy must still route requests within SLA
|
||||
let start = Instant::now();
|
||||
let resp = stack.proxy()
|
||||
.post("/v1/chat/completions")
|
||||
.header("Authorization", "Bearer sk-valid")
|
||||
.json(&chat_request())
|
||||
.send().await;
|
||||
let latency = start.elapsed();
|
||||
|
||||
assert_eq!(resp.status(), 200);
|
||||
// Telemetry is dropped, but routing works
|
||||
assert!(latency < Duration::from_millis(50),
|
||||
"Proxy blocked by slow DB: {:?}", latency);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn chaos_slow_redis_falls_back_to_pg_for_auth() {
|
||||
let stack = E2EStack::start().await;
|
||||
stack.inject_latency("redis", Duration::from_secs(3)).await;
|
||||
|
||||
let resp = stack.proxy()
|
||||
.post("/v1/chat/completions")
|
||||
.header("Authorization", "Bearer sk-valid")
|
||||
.json(&chat_request())
|
||||
.send().await;
|
||||
assert_eq!(resp.status(), 200);
|
||||
}
|
||||
```
|
||||
|
||||
### 11.4 IDOR / Cross-Tenant Test Suite
|
||||
|
||||
```rust
|
||||
// tests/integration/idor_test.rs
|
||||
|
||||
#[tokio::test]
|
||||
async fn idor_org_a_cannot_read_org_b_routing_rules() {
|
||||
let stack = E2EStack::start().await;
|
||||
let org_a_token = stack.create_org_and_token("org-a").await;
|
||||
let org_b_token = stack.create_org_and_token("org-b").await;
|
||||
|
||||
// Org B creates a routing rule
|
||||
let rule = stack.api()
|
||||
.post("/v1/routing-rules")
|
||||
.bearer_auth(&org_b_token)
|
||||
.json(&json!({ "name": "secret-rule", "model": "gpt-4" }))
|
||||
.send().await.json::<RoutingRule>().await;
|
||||
|
||||
// Org A tries to read it
|
||||
let resp = stack.api()
|
||||
.get(&format!("/v1/routing-rules/{}", rule.id))
|
||||
.bearer_auth(&org_a_token)
|
||||
.send().await;
|
||||
assert_eq!(resp.status(), 404); // Not 403 — don't leak existence
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn idor_org_a_cannot_read_org_b_api_keys() {
|
||||
// Same pattern — create key as org B, attempt read as org A
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn idor_org_a_cannot_read_org_b_telemetry() {}
|
||||
|
||||
#[tokio::test]
|
||||
async fn idor_org_a_cannot_mutate_org_b_routing_rules() {}
|
||||
```
|
||||
|
||||
### 11.5 SSE Connection Drop / Billing Leak Test
|
||||
|
||||
```rust
|
||||
#[tokio::test]
|
||||
async fn sse_client_disconnect_aborts_upstream_provider_request() {
|
||||
let stack = E2EStack::start().await;
|
||||
let mock_provider = stack.mock_provider();
|
||||
|
||||
// Configure provider to stream slowly (1 token/sec for 60 tokens)
|
||||
mock_provider.configure_slow_stream(60, Duration::from_secs(1));
|
||||
|
||||
// Start streaming request
|
||||
let mut stream = stack.proxy()
|
||||
.post("/v1/chat/completions")
|
||||
.json(&json!({ "stream": true, "model": "gpt-4" }))
|
||||
.send().await
|
||||
.bytes_stream();
|
||||
|
||||
// Read 5 tokens then drop the connection
|
||||
for _ in 0..5 {
|
||||
stream.next().await;
|
||||
}
|
||||
drop(stream);
|
||||
|
||||
// Wait briefly for cleanup
|
||||
tokio::time::sleep(Duration::from_millis(500)).await;
|
||||
|
||||
// Provider connection must be aborted — not still streaming
|
||||
assert_eq!(mock_provider.active_connections(), 0);
|
||||
|
||||
// Billing: customer should only be charged for 5 tokens, not 60
|
||||
let usage = stack.get_last_usage_record().await;
|
||||
assert!(usage.completion_tokens <= 10); // Some buffer for in-flight
|
||||
}
|
||||
```
|
||||
|
||||
### 11.6 Concurrent Circuit Breaker Race Condition
|
||||
|
||||
```rust
|
||||
#[tokio::test]
|
||||
async fn circuit_breaker_handles_50_concurrent_failures_cleanly() {
|
||||
let redis = TestcontainersRedis::start().await;
|
||||
let breaker = RedisCircuitBreaker::new(redis.connection_string(), "openai", 10);
|
||||
|
||||
let mut handles = vec![];
|
||||
for _ in 0..50 {
|
||||
let b = breaker.clone();
|
||||
handles.push(tokio::spawn(async move {
|
||||
b.record_failure().await;
|
||||
}));
|
||||
}
|
||||
futures::future::join_all(handles).await;
|
||||
|
||||
// Breaker must be open — no race condition leaving it closed
|
||||
assert_eq!(breaker.state().await, CircuitState::Open);
|
||||
// Failure count must be exactly 50 (atomic increments)
|
||||
assert_eq!(breaker.failure_count().await, 50);
|
||||
}
|
||||
```
|
||||
|
||||
### 11.7 Trace Context Propagation
|
||||
|
||||
```rust
|
||||
#[tokio::test]
|
||||
async fn otel_trace_propagates_from_client_through_proxy_to_provider() {
|
||||
let stack = E2EStack::start().await;
|
||||
let tracer = stack.in_memory_tracer();
|
||||
|
||||
let resp = stack.proxy()
|
||||
.post("/v1/chat/completions")
|
||||
.header("traceparent", "00-4bf92f3577b34da6a3ce929d0e0e4736-00f067aa0ba902b7-01")
|
||||
.json(&chat_request())
|
||||
.send().await;
|
||||
|
||||
let spans = tracer.finished_spans();
|
||||
let proxy_span = spans.iter().find(|s| s.name == "proxy.route").unwrap();
|
||||
|
||||
// Proxy span must be child of the incoming trace
|
||||
assert_eq!(proxy_span.trace_id, "4bf92f3577b34da6a3ce929d0e0e4736");
|
||||
|
||||
// Provider request must carry the same trace_id
|
||||
let provider_req = stack.mock_provider().last_request();
|
||||
assert!(provider_req.headers["traceparent"].contains("4bf92f3577b34da6a3ce929d0e0e4736"));
|
||||
}
|
||||
```
|
||||
|
||||
### 11.8 Flag Provider Fallback Test
|
||||
|
||||
```rust
|
||||
#[test]
|
||||
fn flag_provider_unreachable_falls_back_to_safe_default() {
|
||||
// Simulate missing/corrupt flag config file
|
||||
let provider = JsonFileProvider::new("/nonexistent/flags.json");
|
||||
let result = provider.evaluate("enable_new_router", false);
|
||||
// Must return the safe default (false), not panic or error
|
||||
assert_eq!(result, false);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn flag_provider_malformed_json_falls_back_to_safe_default() {
|
||||
let provider = JsonFileProvider::from_string("{ invalid json }}}");
|
||||
let result = provider.evaluate("enable_new_router", false);
|
||||
assert_eq!(result, false);
|
||||
}
|
||||
```
|
||||
|
||||
### 11.9 24-Hour Soak Test Spec
|
||||
|
||||
```rust
|
||||
// tests/soak/long_running_latency.rs
|
||||
// Run manually: cargo test --test soak -- --ignored
|
||||
|
||||
#[tokio::test]
|
||||
#[ignore] // Only run in nightly CI
|
||||
async fn soak_24h_proxy_latency_stays_under_5ms_p99() {
|
||||
// k6 config: 10 RPS sustained for 24 hours
|
||||
// Assert: p99 < 5ms, no memory growth > 50MB, no connection leaks
|
||||
// This catches memory fragmentation and connection pool exhaustion
|
||||
}
|
||||
```
|
||||
|
||||
### 11.10 Panic Mode Authorization
|
||||
|
||||
```rust
|
||||
#[tokio::test]
|
||||
async fn panic_mode_requires_owner_role() {
|
||||
let stack = E2EStack::start().await;
|
||||
let viewer_token = stack.create_token_with_role("org-1", Role::Viewer).await;
|
||||
|
||||
let resp = stack.api()
|
||||
.post("/admin/panic")
|
||||
.bearer_auth(&viewer_token)
|
||||
.send().await;
|
||||
assert_eq!(resp.status(), 403);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn panic_mode_allowed_for_owner_role() {
|
||||
let owner_token = stack.create_token_with_role("org-1", Role::Owner).await;
|
||||
let resp = stack.api()
|
||||
.post("/admin/panic")
|
||||
.bearer_auth(&owner_token)
|
||||
.send().await;
|
||||
assert_eq!(resp.status(), 200);
|
||||
}
|
||||
```
|
||||
|
||||
*End of P1 Review Remediation Addendum*
|
||||
|
||||
Reference in New Issue
Block a user