Implement BMad Must-Have Before Launch fixes for all 6 products
P1: API key redaction, SSE billing leak, token math edge cases, CI runner config P2: mTLS revocation lockout, terraform state lock recovery, RLS pool leak, entropy scrubber, pgmq visibility P3: HMAC replay prevention, cross-tenant negative tests, correlation window edge cases, SQS claim-check, free tier P4: Discovery partial failure recovery, ownership conflict integration test, VCR freshness CI, Meilisearch rebuild, Cmd+K latency P5: Concurrent baseline conflicts, remediation RBAC, Clock interface for governance, 10K property-based runs, Redis panic fallback P6: Cryptographic agent update signatures, streaming audit logs with WAL, shell AST parsing (mvdan/sh), intervention deadlock TTL, canary suite CI gate
This commit is contained in:
@@ -1865,3 +1865,227 @@ describe('Slack Notification Circuit Breaker', () => {
|
||||
| E2E | 10% (~20) | 10% (~28) | Dashboard UI (Playwright), onboarding flow |
|
||||
|
||||
*End of P3 Review Remediation Addendum*
|
||||
|
||||
---
|
||||
|
||||
## 12. BMad Review Implementation (Must-Have Before Launch)
|
||||
|
||||
### 12.1 HMAC Timestamp Freshness (Replay Attack Prevention)
|
||||
|
||||
```typescript
|
||||
describe('HMAC Replay Attack Prevention', () => {
|
||||
it('rejects Datadog webhook with timestamp older than 5 minutes', async () => {
|
||||
const payload = makeDatadogPayload();
|
||||
const staleTimestamp = Math.floor(Date.now() / 1000) - 301; // 5min + 1s
|
||||
const sig = computeDatadogHMAC(payload, staleTimestamp);
|
||||
|
||||
const resp = await ingest(payload, {
|
||||
'dd-webhook-timestamp': staleTimestamp.toString(),
|
||||
'dd-webhook-signature': sig,
|
||||
});
|
||||
expect(resp.status).toBe(401);
|
||||
expect(resp.body.error).toContain('stale timestamp');
|
||||
});
|
||||
|
||||
it('rejects PagerDuty webhook with missing timestamp', async () => {
|
||||
const payload = makePagerDutyPayload();
|
||||
const sig = computePagerDutyHMAC(payload);
|
||||
|
||||
const resp = await ingest(payload, {
|
||||
'x-pagerduty-signature': sig,
|
||||
// No timestamp header
|
||||
});
|
||||
expect(resp.status).toBe(401);
|
||||
});
|
||||
|
||||
it('rejects OpsGenie webhook replayed after 5 minutes', async () => {
|
||||
// OpsGenie doesn't always package timestamp cleanly
|
||||
// Must extract from payload body and validate
|
||||
const payload = makeOpsGeniePayload({ timestamp: fiveMinutesAgo() });
|
||||
const sig = computeOpsGenieHMAC(payload);
|
||||
|
||||
const resp = await ingest(payload, { 'x-opsgenie-signature': sig });
|
||||
expect(resp.status).toBe(401);
|
||||
});
|
||||
|
||||
it('accepts fresh webhook within 5-minute window', async () => {
|
||||
const payload = makeDatadogPayload();
|
||||
const freshTimestamp = Math.floor(Date.now() / 1000);
|
||||
const sig = computeDatadogHMAC(payload, freshTimestamp);
|
||||
|
||||
const resp = await ingest(payload, {
|
||||
'dd-webhook-timestamp': freshTimestamp.toString(),
|
||||
'dd-webhook-signature': sig,
|
||||
});
|
||||
expect(resp.status).toBe(200);
|
||||
});
|
||||
});
|
||||
```
|
||||
|
||||
### 12.2 Cross-Tenant Negative Isolation Tests
|
||||
|
||||
```typescript
|
||||
describe('DynamoDB Tenant Isolation (Negative Tests)', () => {
|
||||
it('Tenant A cannot read Tenant B incidents', async () => {
|
||||
// Seed data for both tenants
|
||||
await createIncident('tenant-a', { title: 'A incident' });
|
||||
await createIncident('tenant-b', { title: 'B incident' });
|
||||
|
||||
// Query as Tenant A
|
||||
const results = await dao.listIncidents('tenant-a');
|
||||
|
||||
// Explicitly assert Tenant B data is absent
|
||||
const tenantIds = results.map(r => r.tenantId);
|
||||
expect(tenantIds).not.toContain('tenant-b');
|
||||
expect(results.every(r => r.tenantId === 'tenant-a')).toBe(true);
|
||||
});
|
||||
|
||||
it('Tenant A cannot read Tenant B analytics', async () => {
|
||||
await seedAnalytics('tenant-a', { alertCount: 100 });
|
||||
await seedAnalytics('tenant-b', { alertCount: 200 });
|
||||
|
||||
const analytics = await dao.getAnalytics('tenant-a');
|
||||
expect(analytics.alertCount).toBe(100); // Not 300 (combined)
|
||||
});
|
||||
|
||||
it('API returns 404 (not 403) for cross-tenant incident access', async () => {
|
||||
const incident = await createIncident('tenant-b', { title: 'secret' });
|
||||
|
||||
const resp = await api.get(`/v1/incidents/${incident.id}`)
|
||||
.set('Authorization', `Bearer ${tenantAToken}`);
|
||||
|
||||
// 404 not 403 — don't leak existence
|
||||
expect(resp.status).toBe(404);
|
||||
});
|
||||
});
|
||||
```
|
||||
|
||||
### 12.3 Correlation Window Edge Cases
|
||||
|
||||
```typescript
|
||||
describe('Out-of-Order Alert Delivery', () => {
|
||||
it('late alert attaches to existing incident (not duplicate)', async () => {
|
||||
const clock = new FakeClock();
|
||||
const engine = new CorrelationEngine(new InMemoryWindowStore(), clock);
|
||||
|
||||
// Alert 1 arrives at T=0
|
||||
const alert1 = makeAlert({ service: 'auth', fingerprint: 'cpu-high', timestamp: 0 });
|
||||
const incident1 = await engine.process(alert1);
|
||||
|
||||
// Window closes at T=5min, incident shipped
|
||||
clock.advanceBy(5 * 60 * 1000);
|
||||
await engine.flushWindows();
|
||||
|
||||
// Late alert arrives at T=6min with timestamp T=2min (within original window)
|
||||
const lateAlert = makeAlert({ service: 'auth', fingerprint: 'cpu-high', timestamp: 2 * 60 * 1000 });
|
||||
const result = await engine.process(lateAlert);
|
||||
|
||||
// Must attach to existing incident, not create new one
|
||||
expect(result.incidentId).toBe(incident1.incidentId);
|
||||
expect(result.action).toBe('attached_to_existing');
|
||||
});
|
||||
|
||||
it('very late alert (>2x window) creates new incident', async () => {
|
||||
const clock = new FakeClock();
|
||||
const engine = new CorrelationEngine(new InMemoryWindowStore(), clock);
|
||||
|
||||
const alert1 = makeAlert({ service: 'auth', fingerprint: 'cpu-high' });
|
||||
const incident1 = await engine.process(alert1);
|
||||
|
||||
// 15 minutes later (3x the 5-min window)
|
||||
clock.advanceBy(15 * 60 * 1000);
|
||||
|
||||
const lateAlert = makeAlert({ service: 'auth', fingerprint: 'cpu-high' });
|
||||
const result = await engine.process(lateAlert);
|
||||
|
||||
expect(result.incidentId).not.toBe(incident1.incidentId);
|
||||
expect(result.action).toBe('new_incident');
|
||||
});
|
||||
});
|
||||
```
|
||||
|
||||
### 12.4 SQS Claim-Check Round-Trip
|
||||
|
||||
```typescript
|
||||
describe('SQS 256KB Claim-Check End-to-End', () => {
|
||||
it('large payload round-trips through S3 pointer', async () => {
|
||||
const largePayload = makeLargeAlertPayload(300 * 1024); // 300KB
|
||||
|
||||
// Ingestion compresses and stores in S3
|
||||
const resp = await ingest(largePayload);
|
||||
expect(resp.status).toBe(200);
|
||||
|
||||
// SQS message contains S3 pointer
|
||||
const sqsMsg = await getLastSQSMessage(localstack, 'alert-queue');
|
||||
const body = JSON.parse(sqsMsg.Body);
|
||||
expect(body.s3Pointer).toBeDefined();
|
||||
|
||||
// Correlation engine fetches from S3 and processes
|
||||
const incident = await waitForIncidentCreated(5000);
|
||||
expect(incident).toBeDefined();
|
||||
expect(incident.sourceAlertCount).toBeGreaterThan(0);
|
||||
});
|
||||
|
||||
it('S3 fetch timeout does not crash correlation engine', async () => {
|
||||
// Inject S3 latency (10 second delay)
|
||||
mockS3.setLatency(10000);
|
||||
|
||||
const largePayload = makeLargeAlertPayload(300 * 1024);
|
||||
await ingest(largePayload);
|
||||
|
||||
// Correlation engine should timeout and send to DLQ
|
||||
const dlqMsg = await getDLQMessage(localstack, 'alert-dlq', 15000);
|
||||
expect(dlqMsg).toBeDefined();
|
||||
|
||||
// Engine is still healthy
|
||||
const health = await api.get('/health');
|
||||
expect(health.status).toBe(200);
|
||||
});
|
||||
});
|
||||
```
|
||||
|
||||
### 12.5 Free Tier Enforcement
|
||||
|
||||
```typescript
|
||||
describe('Free Tier (10K alerts/month, 7-day retention)', () => {
|
||||
it('accepts alert at 9,999 count', async () => {
|
||||
await setAlertCounter('tenant-free', 9999);
|
||||
const resp = await ingestAsTenat('tenant-free', makeAlert());
|
||||
expect(resp.status).toBe(200);
|
||||
});
|
||||
|
||||
it('rejects alert at 10,001 with upgrade prompt', async () => {
|
||||
await setAlertCounter('tenant-free', 10000);
|
||||
const resp = await ingestAsTenant('tenant-free', makeAlert());
|
||||
expect(resp.status).toBe(429);
|
||||
expect(resp.body.upgrade_url).toContain('stripe');
|
||||
});
|
||||
|
||||
it('counter resets on first of month', async () => {
|
||||
await setAlertCounter('tenant-free', 10000);
|
||||
clock.advanceToFirstOfNextMonth();
|
||||
await runMonthlyReset();
|
||||
|
||||
const resp = await ingestAsTenant('tenant-free', makeAlert());
|
||||
expect(resp.status).toBe(200);
|
||||
});
|
||||
|
||||
it('purges data older than 7 days on free tier', async () => {
|
||||
await createIncident('tenant-free', { createdAt: eightDaysAgo() });
|
||||
await runRetentionPurge();
|
||||
|
||||
const incidents = await dao.listIncidents('tenant-free');
|
||||
expect(incidents).toHaveLength(0);
|
||||
});
|
||||
|
||||
it('retains data for 90 days on pro tier', async () => {
|
||||
await createIncident('tenant-pro', { createdAt: thirtyDaysAgo() });
|
||||
await runRetentionPurge();
|
||||
|
||||
const incidents = await dao.listIncidents('tenant-pro');
|
||||
expect(incidents).toHaveLength(1);
|
||||
});
|
||||
});
|
||||
```
|
||||
|
||||
*End of P3 BMad Implementation*
|
||||
|
||||
Reference in New Issue
Block a user