Implement review remediation + PLG analytics SDK
- All 6 test architectures patched with Section 11 addendums - P5 (cost) fully rewritten from 232 to ~600 lines - PLG brainstorm + party mode advisory board results - Analytics SDK v2 (PostHog Cloud, Zod strict, Lambda-safe) - Analytics tests v2 (safeParse, no , no timestamp, no PII) - Addresses all Gemini review findings across P1-P6
This commit is contained in:
@@ -1727,3 +1727,370 @@ Before any code ships to production, these tests must be green:
|
||||
---
|
||||
|
||||
*Document complete. Total estimated test count at V1 launch: ~500 tests. Target by month 3: ~1,000 tests.*
|
||||
|
||||
---
|
||||
|
||||
## 11. Review Remediation Addendum (Post-Gemini Review)
|
||||
|
||||
### 11.1 Missing Epic Coverage
|
||||
|
||||
#### Epic 6: Dashboard UI (React Testing Library + Playwright)
|
||||
|
||||
```typescript
|
||||
// tests/ui/components/DiffViewer.test.tsx
|
||||
describe('DiffViewer Component', () => {
|
||||
it('renders added lines in green', () => {});
|
||||
it('renders removed lines in red', () => {});
|
||||
it('renders unchanged lines in default color', () => {});
|
||||
it('collapses large diffs with "Show more" toggle', () => {});
|
||||
it('highlights HCL syntax in diff blocks', () => {});
|
||||
it('shows resource type icon next to each drift item', () => {});
|
||||
});
|
||||
|
||||
describe('StackOverview Component', () => {
|
||||
it('renders drift count badge per stack', () => {});
|
||||
it('sorts stacks by drift severity (critical first)', () => {});
|
||||
it('shows last scan timestamp', () => {});
|
||||
it('shows agent health indicator (green/yellow/red)', () => {});
|
||||
});
|
||||
|
||||
// tests/e2e/ui/dashboard.spec.ts (Playwright)
|
||||
test('OAuth login redirects to Cognito and back', async ({ page }) => {
|
||||
await page.goto('/dashboard');
|
||||
await expect(page).toHaveURL(/cognito/);
|
||||
});
|
||||
|
||||
test('stack list renders with drift counts', async ({ page }) => {
|
||||
await page.goto('/dashboard/stacks');
|
||||
await expect(page.locator('[data-testid="stack-card"]')).toHaveCountGreaterThan(0);
|
||||
});
|
||||
|
||||
test('diff viewer renders inline diff for Terraform resource', async ({ page }) => {
|
||||
await page.goto('/dashboard/stacks/stack-1/drifts/drift-1');
|
||||
await expect(page.locator('[data-testid="diff-viewer"]')).toBeVisible();
|
||||
await expect(page.locator('.diff-added')).toHaveCountGreaterThan(0);
|
||||
});
|
||||
|
||||
test('revert button triggers confirmation modal', async ({ page }) => {
|
||||
await page.goto('/dashboard/stacks/stack-1/drifts/drift-1');
|
||||
await page.click('[data-testid="revert-btn"]');
|
||||
await expect(page.locator('[data-testid="confirm-modal"]')).toBeVisible();
|
||||
});
|
||||
```
|
||||
|
||||
#### Epic 9: Onboarding & PLG (Stripe + drift init)
|
||||
|
||||
```go
|
||||
// pkg/onboarding/stripe_test.go
|
||||
|
||||
func TestStripeWebhookCheckoutCompleted_UpgradesTenant(t *testing.T) {}
|
||||
func TestStripeWebhookSubscriptionDeleted_DowngradesTenant(t *testing.T) {}
|
||||
func TestStripeWebhookInvalidSignature_Returns401(t *testing.T) {}
|
||||
func TestStripeWebhookReplayedEvent_IsIdempotent(t *testing.T) {}
|
||||
|
||||
// pkg/agent/init_test.go
|
||||
|
||||
func TestDriftInit_DetectsTerraformInCurrentDir(t *testing.T) {}
|
||||
func TestDriftInit_DetectsCloudFormationInCurrentDir(t *testing.T) {}
|
||||
func TestDriftInit_DetectsPulumiInCurrentDir(t *testing.T) {}
|
||||
func TestDriftInit_GeneratesValidYAMLConfig(t *testing.T) {}
|
||||
func TestDriftInit_HandlesWindowsPaths(t *testing.T) {}
|
||||
func TestDriftInit_HandlesMacPaths(t *testing.T) {}
|
||||
func TestDriftInit_HandlesLinuxPaths(t *testing.T) {}
|
||||
func TestDriftInit_FailsGracefullyOnEmptyDir(t *testing.T) {}
|
||||
```
|
||||
|
||||
#### Epic 8: Infrastructure (Terratest)
|
||||
|
||||
```go
|
||||
// tests/infra/terraform_test.go
|
||||
|
||||
func TestTerraformPlan_CreatesExpectedResources(t *testing.T) {
|
||||
terraformOptions := terraform.WithDefaultRetryableErrors(t, &terraform.Options{
|
||||
TerraformDir: "../../infra/terraform",
|
||||
})
|
||||
defer terraform.Destroy(t, terraformOptions)
|
||||
terraform.InitAndPlan(t, terraformOptions)
|
||||
}
|
||||
|
||||
func TestTerraformApply_SQSFIFOQueueCreated(t *testing.T) {}
|
||||
func TestTerraformApply_RDSInstanceCreated(t *testing.T) {}
|
||||
func TestTerraformApply_IAMRolesHaveLeastPrivilege(t *testing.T) {
|
||||
// Verify no IAM policy has Action: "*"
|
||||
}
|
||||
func TestTerraformApply_VPCSecurityGroupsRestrictIngress(t *testing.T) {}
|
||||
```
|
||||
|
||||
#### Epic 2: mTLS Certificate Lifecycle
|
||||
|
||||
```go
|
||||
// pkg/agent/mtls_test.go
|
||||
|
||||
func TestMTLS_CertificateGeneration_ValidX509(t *testing.T) {}
|
||||
func TestMTLS_CertificateExpiration_AgentRejectsExpiredCert(t *testing.T) {}
|
||||
func TestMTLS_CertificateRotation_NewCertAcceptedMidConnection(t *testing.T) {}
|
||||
func TestMTLS_CertificateRevocation_RevokedCertRejected(t *testing.T) {}
|
||||
func TestMTLS_SelfSignedCert_RejectedBySaaS(t *testing.T) {}
|
||||
func TestMTLS_CertificateChain_IntermediateCAValidated(t *testing.T) {}
|
||||
```
|
||||
|
||||
### 11.2 Add t.Parallel() to Table-Driven Tests
|
||||
|
||||
```go
|
||||
// BEFORE (sequential — wastes CI time):
|
||||
func TestSecretScrubber(t *testing.T) {
|
||||
tests := []struct{ name, input, expected string }{...}
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
// runs sequentially
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// AFTER (parallel):
|
||||
func TestSecretScrubber(t *testing.T) {
|
||||
t.Parallel()
|
||||
tests := []struct{ name, input, expected string }{...}
|
||||
for _, tt := range tests {
|
||||
tt := tt // capture range variable
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
t.Parallel()
|
||||
// runs in parallel
|
||||
})
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
### 11.3 Dynamic Resource Naming for LocalStack
|
||||
|
||||
```go
|
||||
// BEFORE (shared state — flaky):
|
||||
// bucket := "drift-reports"
|
||||
|
||||
// AFTER (per-test isolation):
|
||||
func uniqueBucket(t *testing.T) string {
|
||||
return fmt.Sprintf("drift-reports-%s-%d", t.Name(), time.Now().UnixNano())
|
||||
}
|
||||
|
||||
func TestDriftReportUpload(t *testing.T) {
|
||||
t.Parallel()
|
||||
bucket := uniqueBucket(t)
|
||||
s3Client.CreateBucket(ctx, &s3.CreateBucketInput{Bucket: &bucket})
|
||||
// Test uses isolated bucket — no cross-test contamination
|
||||
}
|
||||
```
|
||||
|
||||
### 11.4 Distributed Tracing Cross-Boundary Tests
|
||||
|
||||
```go
|
||||
// tests/integration/trace_propagation_test.go
|
||||
|
||||
func TestTraceContext_AgentToSaaS_SpanParentChain(t *testing.T) {
|
||||
// Agent generates drift_scan span with trace_id
|
||||
// POST /v1/drift-reports carries traceparent header
|
||||
// SaaS Event Processor creates child span
|
||||
// Verify parent-child relationship across HTTP boundary
|
||||
|
||||
exporter := tracetest.NewInMemoryExporter()
|
||||
|
||||
// Fire drift report with traceparent
|
||||
traceID := "4bf92f3577b34da6a3ce929d0e0e4736"
|
||||
resp := postDriftReport(t, stack, traceID)
|
||||
assert.Equal(t, 200, resp.StatusCode)
|
||||
|
||||
spans := exporter.GetSpans()
|
||||
eventProcessorSpan := findSpan(spans, "drift_report.process")
|
||||
assert.Equal(t, traceID, eventProcessorSpan.SpanContext().TraceID().String())
|
||||
}
|
||||
|
||||
func TestTraceContext_SQSBoundary_PreservesTraceID(t *testing.T) {
|
||||
// Verify SQS message attributes contain traceparent
|
||||
// Verify consumer extracts and continues the trace
|
||||
}
|
||||
|
||||
func TestTraceContext_AgentScan_CreatesParentSpan(t *testing.T) {
|
||||
// Verify agent drift_scan span has correct attributes:
|
||||
// drift.stack_id, drift.resource_count, drift.duration_ms
|
||||
}
|
||||
```
|
||||
|
||||
### 11.5 Backward Compatibility Serialization (Elastic Schema)
|
||||
|
||||
```go
|
||||
// tests/schema/backward_compat_test.go
|
||||
|
||||
func TestOldAgent_ParsesNewDynamoDBItem_WithV2Attributes(t *testing.T) {
|
||||
// Simulate V2 DynamoDB item with new _v2 fields
|
||||
item := map[string]types.AttributeValue{
|
||||
"PK": &types.AttributeValueMemberS{Value: "STACK#123"},
|
||||
"drift_score": &types.AttributeValueMemberN{Value: "85"},
|
||||
"drift_score_v2": &types.AttributeValueMemberN{Value: "92"}, // New field
|
||||
"remediation_v2": &types.AttributeValueMemberS{Value: "auto"}, // New field
|
||||
}
|
||||
|
||||
// V1 parser must ignore unknown fields
|
||||
result, err := ParseDriftItem(item)
|
||||
assert.NoError(t, err)
|
||||
assert.Equal(t, 85, result.DriftScore) // Uses V1 field
|
||||
}
|
||||
|
||||
func TestV1Code_ReadsV2Writes_DuringMigrationWindow(t *testing.T) {
|
||||
// V2 writes both drift_score and drift_score_v2
|
||||
// V1 reads drift_score (ignores _v2)
|
||||
// Verify no data loss
|
||||
}
|
||||
```
|
||||
|
||||
### 11.6 Security: RBAC Forgery & Replay Attacks
|
||||
|
||||
```go
|
||||
// tests/integration/security_test.go
|
||||
|
||||
func TestAgentCannotForgeStackID(t *testing.T) {
|
||||
// Agent with API key for org-A sends drift report claiming stack belongs to org-B
|
||||
orgAKey := createAPIKey(t, "org-a")
|
||||
report := makeDriftReport("org-b-stack-id") // Wrong org
|
||||
|
||||
resp := postDriftReportWithKey(t, report, orgAKey)
|
||||
assert.Equal(t, 403, resp.StatusCode)
|
||||
}
|
||||
|
||||
func TestReplayAttack_DuplicateReportID_Rejected(t *testing.T) {
|
||||
report := makeDriftReport("stack-1")
|
||||
resp1 := postDriftReport(t, report)
|
||||
assert.Equal(t, 200, resp1.StatusCode)
|
||||
|
||||
// Replay exact same report
|
||||
resp2 := postDriftReport(t, report)
|
||||
assert.Equal(t, 409, resp2.StatusCode) // Conflict — already processed
|
||||
}
|
||||
|
||||
func TestReplayAttack_OldTimestamp_Rejected(t *testing.T) {
|
||||
report := makeDriftReport("stack-1")
|
||||
report.Timestamp = time.Now().Add(-10 * time.Minute) // 10 min old
|
||||
|
||||
resp := postDriftReport(t, report)
|
||||
assert.Equal(t, 400, resp.StatusCode) // Stale report
|
||||
}
|
||||
```
|
||||
|
||||
### 11.7 Noisy Neighbor & Fair-Share Processing
|
||||
|
||||
```go
|
||||
// tests/integration/fair_share_test.go
|
||||
|
||||
func TestNoisyNeighbor_LargeOrgDoesNotStarveSmallOrg(t *testing.T) {
|
||||
// Org A: 10,000 drifted resources
|
||||
// Org B: 10 drifted resources
|
||||
// Both submit reports simultaneously
|
||||
|
||||
seedDriftReports(t, "org-a", 10000)
|
||||
seedDriftReports(t, "org-b", 10)
|
||||
|
||||
// Org B's reports must be processed within 30 seconds
|
||||
// (not queued behind all 10K of Org A's)
|
||||
start := time.Now()
|
||||
waitForProcessed(t, "org-b", 10, 30*time.Second)
|
||||
assert.Less(t, time.Since(start), 30*time.Second)
|
||||
}
|
||||
```
|
||||
|
||||
### 11.8 Panic Mode Mid-Remediation Race Condition
|
||||
|
||||
```go
|
||||
// tests/integration/panic_remediation_test.go
|
||||
|
||||
func TestPanicMode_AbortsInFlightRemediation(t *testing.T) {
|
||||
// Start a remediation (terraform apply)
|
||||
execID := startRemediation(t, "stack-1", "drift-1")
|
||||
waitForState(t, execID, "applying")
|
||||
|
||||
// Trigger panic mode
|
||||
triggerPanicMode(t)
|
||||
|
||||
// Remediation must be aborted, not completed
|
||||
state := waitForState(t, execID, "aborted")
|
||||
assert.Equal(t, "aborted", state)
|
||||
|
||||
// Verify terraform state is not corrupted
|
||||
// (agent should have run terraform state pull to verify)
|
||||
}
|
||||
|
||||
func TestPanicMode_DoesNotAbortReadOnlyScans(t *testing.T) {
|
||||
// Drift scans (read-only) should continue during panic
|
||||
// Only write operations (remediation) are halted
|
||||
scanID := startDriftScan(t, "stack-1")
|
||||
triggerPanicMode(t)
|
||||
|
||||
state := waitForState(t, scanID, "completed")
|
||||
assert.Equal(t, "completed", state) // Scan finishes normally
|
||||
}
|
||||
```
|
||||
|
||||
### 11.9 Remediation vs. Concurrent Scan Race Condition
|
||||
|
||||
```go
|
||||
func TestConcurrentScanDuringRemediation_DoesNotReportHalfAppliedState(t *testing.T) {
|
||||
// Start remediation (terraform apply — takes ~30s)
|
||||
execID := startRemediation(t, "stack-1", "drift-1")
|
||||
waitForState(t, execID, "applying")
|
||||
|
||||
// Trigger a drift scan while remediation is in progress
|
||||
scanID := startDriftScan(t, "stack-1")
|
||||
|
||||
// Scan must either:
|
||||
// a) Wait for remediation to complete, OR
|
||||
// b) Skip the stack with "remediation in progress" status
|
||||
scanResult := waitForScanComplete(t, scanID)
|
||||
assert.NotEqual(t, "half-applied", scanResult.Status)
|
||||
// Must be either "skipped_remediation_in_progress" or show post-remediation state
|
||||
}
|
||||
```
|
||||
|
||||
### 11.10 SaaS API Memory Profiling
|
||||
|
||||
```go
|
||||
// tests/load/memory_profile_test.go
|
||||
|
||||
func TestEventProcessor_DoesNotOOM_On1MB_DriftReport(t *testing.T) {
|
||||
// Generate a 1MB drift report (1000 resources with large diffs)
|
||||
report := makeLargeDriftReport(1000)
|
||||
assert.Greater(t, len(report), 1024*1024)
|
||||
|
||||
var memBefore, memAfter runtime.MemStats
|
||||
runtime.ReadMemStats(&memBefore)
|
||||
|
||||
processReport(t, report)
|
||||
|
||||
runtime.ReadMemStats(&memAfter)
|
||||
growth := memAfter.Alloc - memBefore.Alloc
|
||||
assert.Less(t, growth, uint64(50*1024*1024)) // <50MB growth
|
||||
}
|
||||
```
|
||||
|
||||
### 11.11 Trim E2E to Smoke Tier
|
||||
|
||||
Per review recommendation, cap E2E at 10 critical paths. Remaining 40 tests pushed to integration:
|
||||
|
||||
| E2E (Keep — 10 max) | Demoted to Integration |
|
||||
|---------------------|----------------------|
|
||||
| Onboarding: init → connect → first scan | Agent heartbeat variations |
|
||||
| First drift detected → Slack alert | Individual parser format tests |
|
||||
| Revert flow: Slack → agent apply → verify | Secret scrubber edge cases |
|
||||
| Panic mode halts remediation | DynamoDB access pattern tests |
|
||||
| Cross-tenant isolation | Individual webhook format tests |
|
||||
| OAuth login → dashboard → view diff | Notification batching |
|
||||
| Free tier limit enforcement | Agent config reload |
|
||||
| Agent disconnect → reconnect → resume | Baseline score calculations |
|
||||
| mTLS cert rotation mid-scan | Individual API endpoint tests |
|
||||
| Stripe upgrade → unlock features | Cache invalidation patterns |
|
||||
|
||||
### 11.12 Updated Test Pyramid (Post-Review)
|
||||
|
||||
| Level | Original | Revised | Rationale |
|
||||
|-------|----------|---------|-----------|
|
||||
| Unit | 70% (~350) | 65% (~350) | Add t.Parallel(), keep count but add UI component tests |
|
||||
| Integration | 20% (~100) | 28% (~150) | Terratest, mTLS, trace propagation, fair-share, security |
|
||||
| E2E/Smoke | 10% (~50) | 7% (~35) | Capped at 10 true E2E + 25 Playwright UI tests |
|
||||
|
||||
*End of P2 Review Remediation Addendum*
|
||||
|
||||
Reference in New Issue
Block a user