Scaffold dd0c/alert: ingestion, correlation engine, HMAC validation, tests
- Webhook ingestion: HMAC validation for Datadog/PagerDuty/OpsGenie with 5-min timestamp freshness - Payload normalizers: canonical alert schema with severity mapping per provider - Correlation engine: time-window grouping, late-alert attachment (2x window), FakeClock for testing - InMemoryWindowStore for unit tests - Tests: 12 HMAC validation cases, 5 normalizer cases, 7 correlation engine cases - PostgreSQL schema with RLS: tenants, incidents, alerts, webhook_secrets, notification_configs - Free tier enforcement columns (alert_count_month, reset_at) - Fly.io config, Dockerfile, Gitea Actions CI
This commit is contained in:
27
products/03-alert-intelligence/.gitea/workflows/ci.yml
Normal file
27
products/03-alert-intelligence/.gitea/workflows/ci.yml
Normal file
@@ -0,0 +1,27 @@
|
|||||||
|
name: CI
|
||||||
|
on:
|
||||||
|
push:
|
||||||
|
branches: [main]
|
||||||
|
pull_request:
|
||||||
|
|
||||||
|
jobs:
|
||||||
|
test:
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
steps:
|
||||||
|
- uses: actions/checkout@v4
|
||||||
|
|
||||||
|
- uses: actions/setup-node@v4
|
||||||
|
with:
|
||||||
|
node-version: '22'
|
||||||
|
|
||||||
|
- name: Install deps
|
||||||
|
run: npm ci
|
||||||
|
working-directory: products/03-alert-intelligence
|
||||||
|
|
||||||
|
- name: Type check
|
||||||
|
run: npx tsc --noEmit
|
||||||
|
working-directory: products/03-alert-intelligence
|
||||||
|
|
||||||
|
- name: Test
|
||||||
|
run: npm test
|
||||||
|
working-directory: products/03-alert-intelligence
|
||||||
14
products/03-alert-intelligence/Dockerfile
Normal file
14
products/03-alert-intelligence/Dockerfile
Normal file
@@ -0,0 +1,14 @@
|
|||||||
|
FROM node:22-slim AS builder
|
||||||
|
WORKDIR /app
|
||||||
|
COPY package.json package-lock.json* ./
|
||||||
|
RUN npm ci
|
||||||
|
COPY . .
|
||||||
|
RUN npm run build
|
||||||
|
|
||||||
|
FROM node:22-slim
|
||||||
|
WORKDIR /app
|
||||||
|
COPY --from=builder /app/dist ./dist
|
||||||
|
COPY --from=builder /app/node_modules ./node_modules
|
||||||
|
COPY --from=builder /app/package.json ./
|
||||||
|
EXPOSE 3000
|
||||||
|
CMD ["node", "dist/index.js"]
|
||||||
27
products/03-alert-intelligence/fly.toml
Normal file
27
products/03-alert-intelligence/fly.toml
Normal file
@@ -0,0 +1,27 @@
|
|||||||
|
app = "dd0c-alert"
|
||||||
|
primary_region = "iad"
|
||||||
|
|
||||||
|
[build]
|
||||||
|
dockerfile = "Dockerfile"
|
||||||
|
|
||||||
|
[env]
|
||||||
|
NODE_ENV = "production"
|
||||||
|
PORT = "3000"
|
||||||
|
LOG_LEVEL = "info"
|
||||||
|
|
||||||
|
[http_service]
|
||||||
|
internal_port = 3000
|
||||||
|
force_https = true
|
||||||
|
auto_stop_machines = true
|
||||||
|
auto_start_machines = true
|
||||||
|
min_machines_running = 0
|
||||||
|
|
||||||
|
[http_service.concurrency]
|
||||||
|
type = "requests"
|
||||||
|
hard_limit = 200
|
||||||
|
soft_limit = 150
|
||||||
|
|
||||||
|
[[vm]]
|
||||||
|
cpu_kind = "shared"
|
||||||
|
cpus = 1
|
||||||
|
memory_mb = 256
|
||||||
90
products/03-alert-intelligence/migrations/001_init.sql
Normal file
90
products/03-alert-intelligence/migrations/001_init.sql
Normal file
@@ -0,0 +1,90 @@
|
|||||||
|
-- dd0c/alert V1 schema — DynamoDB-style in PostgreSQL (Neon)
|
||||||
|
|
||||||
|
CREATE EXTENSION IF NOT EXISTS "uuid-ossp";
|
||||||
|
|
||||||
|
-- Tenants
|
||||||
|
CREATE TABLE tenants (
|
||||||
|
id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
|
||||||
|
name TEXT NOT NULL,
|
||||||
|
slug TEXT NOT NULL UNIQUE,
|
||||||
|
tier TEXT NOT NULL DEFAULT 'free' CHECK (tier IN ('free', 'pro')),
|
||||||
|
alert_count_month INT NOT NULL DEFAULT 0,
|
||||||
|
alert_count_reset_at TIMESTAMPTZ NOT NULL DEFAULT date_trunc('month', now()) + interval '1 month',
|
||||||
|
stripe_customer_id TEXT,
|
||||||
|
created_at TIMESTAMPTZ NOT NULL DEFAULT now()
|
||||||
|
);
|
||||||
|
|
||||||
|
-- Webhook secrets per provider per tenant
|
||||||
|
CREATE TABLE webhook_secrets (
|
||||||
|
id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
|
||||||
|
tenant_id UUID NOT NULL REFERENCES tenants(id) ON DELETE CASCADE,
|
||||||
|
provider TEXT NOT NULL CHECK (provider IN ('datadog', 'pagerduty', 'opsgenie', 'grafana', 'custom')),
|
||||||
|
secret TEXT NOT NULL,
|
||||||
|
created_at TIMESTAMPTZ NOT NULL DEFAULT now(),
|
||||||
|
UNIQUE(tenant_id, provider)
|
||||||
|
);
|
||||||
|
|
||||||
|
-- Incidents (correlation output)
|
||||||
|
CREATE TABLE incidents (
|
||||||
|
id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
|
||||||
|
tenant_id UUID NOT NULL REFERENCES tenants(id) ON DELETE CASCADE,
|
||||||
|
incident_key TEXT NOT NULL,
|
||||||
|
fingerprint TEXT NOT NULL,
|
||||||
|
service TEXT,
|
||||||
|
title TEXT NOT NULL,
|
||||||
|
severity TEXT NOT NULL CHECK (severity IN ('critical', 'high', 'medium', 'low', 'info')),
|
||||||
|
status TEXT NOT NULL DEFAULT 'open' CHECK (status IN ('open', 'acknowledged', 'resolved', 'suppressed')),
|
||||||
|
alert_count INT NOT NULL DEFAULT 1,
|
||||||
|
first_alert_at TIMESTAMPTZ NOT NULL,
|
||||||
|
last_alert_at TIMESTAMPTZ NOT NULL,
|
||||||
|
resolved_at TIMESTAMPTZ,
|
||||||
|
created_at TIMESTAMPTZ NOT NULL DEFAULT now()
|
||||||
|
);
|
||||||
|
CREATE INDEX idx_incidents_tenant ON incidents(tenant_id, status, created_at DESC);
|
||||||
|
CREATE INDEX idx_incidents_fingerprint ON incidents(tenant_id, fingerprint);
|
||||||
|
|
||||||
|
-- Alerts (raw, linked to incidents)
|
||||||
|
CREATE TABLE alerts (
|
||||||
|
id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
|
||||||
|
tenant_id UUID NOT NULL REFERENCES tenants(id) ON DELETE CASCADE,
|
||||||
|
incident_id UUID REFERENCES incidents(id) ON DELETE SET NULL,
|
||||||
|
source_provider TEXT NOT NULL,
|
||||||
|
source_id TEXT NOT NULL,
|
||||||
|
fingerprint TEXT NOT NULL,
|
||||||
|
title TEXT NOT NULL,
|
||||||
|
severity TEXT NOT NULL,
|
||||||
|
status TEXT NOT NULL,
|
||||||
|
service TEXT,
|
||||||
|
environment TEXT,
|
||||||
|
tags JSONB NOT NULL DEFAULT '{}',
|
||||||
|
raw_payload JSONB NOT NULL,
|
||||||
|
received_at TIMESTAMPTZ NOT NULL DEFAULT now()
|
||||||
|
);
|
||||||
|
CREATE INDEX idx_alerts_tenant ON alerts(tenant_id, received_at DESC);
|
||||||
|
CREATE INDEX idx_alerts_incident ON alerts(incident_id);
|
||||||
|
|
||||||
|
-- Notification configs
|
||||||
|
CREATE TABLE notification_configs (
|
||||||
|
id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
|
||||||
|
tenant_id UUID NOT NULL REFERENCES tenants(id) ON DELETE CASCADE,
|
||||||
|
channel TEXT NOT NULL CHECK (channel IN ('slack', 'email', 'webhook')),
|
||||||
|
config JSONB NOT NULL DEFAULT '{}',
|
||||||
|
min_severity TEXT NOT NULL DEFAULT 'medium',
|
||||||
|
enabled BOOLEAN NOT NULL DEFAULT true,
|
||||||
|
UNIQUE(tenant_id, channel)
|
||||||
|
);
|
||||||
|
|
||||||
|
-- Free tier enforcement: 10K alerts/month, 7-day retention
|
||||||
|
-- Pro tier: unlimited alerts, 90-day retention
|
||||||
|
|
||||||
|
-- RLS
|
||||||
|
ALTER TABLE incidents ENABLE ROW LEVEL SECURITY;
|
||||||
|
ALTER TABLE alerts ENABLE ROW LEVEL SECURITY;
|
||||||
|
ALTER TABLE notification_configs ENABLE ROW LEVEL SECURITY;
|
||||||
|
|
||||||
|
CREATE POLICY tenant_iso_incidents ON incidents
|
||||||
|
USING (tenant_id::text = current_setting('app.tenant_id', true));
|
||||||
|
CREATE POLICY tenant_iso_alerts ON alerts
|
||||||
|
USING (tenant_id::text = current_setting('app.tenant_id', true));
|
||||||
|
CREATE POLICY tenant_iso_notif ON notification_configs
|
||||||
|
USING (tenant_id::text = current_setting('app.tenant_id', true));
|
||||||
36
products/03-alert-intelligence/package.json
Normal file
36
products/03-alert-intelligence/package.json
Normal file
@@ -0,0 +1,36 @@
|
|||||||
|
{
|
||||||
|
"name": "dd0c-alert",
|
||||||
|
"version": "0.1.0",
|
||||||
|
"private": true,
|
||||||
|
"type": "module",
|
||||||
|
"scripts": {
|
||||||
|
"dev": "tsx watch src/index.ts",
|
||||||
|
"build": "tsc",
|
||||||
|
"start": "node dist/index.js",
|
||||||
|
"test": "vitest run",
|
||||||
|
"lint": "eslint src/ tests/"
|
||||||
|
},
|
||||||
|
"dependencies": {
|
||||||
|
"fastify": "^4.28.0",
|
||||||
|
"@fastify/cors": "^9.0.0",
|
||||||
|
"@fastify/helmet": "^11.1.0",
|
||||||
|
"pg": "^8.12.0",
|
||||||
|
"ioredis": "^5.4.0",
|
||||||
|
"zod": "^3.23.0",
|
||||||
|
"jsonwebtoken": "^9.0.2",
|
||||||
|
"pino": "^9.1.0",
|
||||||
|
"uuid": "^9.0.1",
|
||||||
|
"@aws-sdk/client-sqs": "^3.600.0",
|
||||||
|
"@aws-sdk/client-s3": "^3.600.0"
|
||||||
|
},
|
||||||
|
"devDependencies": {
|
||||||
|
"typescript": "^5.5.0",
|
||||||
|
"tsx": "^4.15.0",
|
||||||
|
"vitest": "^1.6.0",
|
||||||
|
"@types/node": "^20.14.0",
|
||||||
|
"@types/pg": "^8.11.0",
|
||||||
|
"@types/jsonwebtoken": "^9.0.6",
|
||||||
|
"@types/uuid": "^9.0.8",
|
||||||
|
"eslint": "^9.5.0"
|
||||||
|
}
|
||||||
|
}
|
||||||
165
products/03-alert-intelligence/src/correlation/engine.ts
Normal file
165
products/03-alert-intelligence/src/correlation/engine.ts
Normal file
@@ -0,0 +1,165 @@
|
|||||||
|
import pino from 'pino';
|
||||||
|
import type { CanonicalAlert } from '../ingestion/webhook.js';
|
||||||
|
|
||||||
|
const logger = pino({ name: 'correlation' });
|
||||||
|
|
||||||
|
// --- Interfaces ---
|
||||||
|
|
||||||
|
export interface WindowStore {
|
||||||
|
getWindow(tenantId: string, fingerprint: string): Promise<CorrelationWindow | null>;
|
||||||
|
upsertWindow(tenantId: string, window: CorrelationWindow): Promise<void>;
|
||||||
|
closeWindow(tenantId: string, fingerprint: string): Promise<CorrelationWindow | null>;
|
||||||
|
getExpiredWindows(cutoffMs: number): Promise<CorrelationWindow[]>;
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface Clock {
|
||||||
|
now(): number;
|
||||||
|
}
|
||||||
|
|
||||||
|
export class RealClock implements Clock {
|
||||||
|
now() { return Date.now(); }
|
||||||
|
}
|
||||||
|
|
||||||
|
export class FakeClock implements Clock {
|
||||||
|
private current: number;
|
||||||
|
constructor(start = Date.now()) { this.current = start; }
|
||||||
|
now() { return this.current; }
|
||||||
|
advanceBy(ms: number) { this.current += ms; }
|
||||||
|
set(ms: number) { this.current = ms; }
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface CorrelationWindow {
|
||||||
|
fingerprint: string;
|
||||||
|
service: string;
|
||||||
|
alerts: CanonicalAlert[];
|
||||||
|
openedAt: number;
|
||||||
|
lastAlertAt: number;
|
||||||
|
incidentId?: string;
|
||||||
|
shipped: boolean;
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface CorrelationResult {
|
||||||
|
incidentId: string;
|
||||||
|
action: 'new_incident' | 'attached_to_existing' | 'window_updated';
|
||||||
|
alertCount: number;
|
||||||
|
}
|
||||||
|
|
||||||
|
// --- Correlation Engine ---
|
||||||
|
|
||||||
|
const DEFAULT_WINDOW_MS = 5 * 60 * 1000; // 5 minutes
|
||||||
|
const LATE_ATTACH_MULTIPLIER = 2; // Attach late alerts up to 2x window
|
||||||
|
|
||||||
|
export class CorrelationEngine {
|
||||||
|
private windowStore: WindowStore;
|
||||||
|
private clock: Clock;
|
||||||
|
private windowMs: number;
|
||||||
|
|
||||||
|
constructor(windowStore: WindowStore, clock: Clock = new RealClock(), windowMs = DEFAULT_WINDOW_MS) {
|
||||||
|
this.windowStore = windowStore;
|
||||||
|
this.clock = clock;
|
||||||
|
this.windowMs = windowMs;
|
||||||
|
}
|
||||||
|
|
||||||
|
async process(tenantId: string, alert: CanonicalAlert): Promise<CorrelationResult> {
|
||||||
|
const now = this.clock.now();
|
||||||
|
const existing = await this.windowStore.getWindow(tenantId, alert.fingerprint);
|
||||||
|
|
||||||
|
// Case 1: No existing window — create new
|
||||||
|
if (!existing) {
|
||||||
|
const window: CorrelationWindow = {
|
||||||
|
fingerprint: alert.fingerprint,
|
||||||
|
service: alert.service ?? 'unknown',
|
||||||
|
alerts: [alert],
|
||||||
|
openedAt: now,
|
||||||
|
lastAlertAt: now,
|
||||||
|
shipped: false,
|
||||||
|
};
|
||||||
|
await this.windowStore.upsertWindow(tenantId, window);
|
||||||
|
return { incidentId: '', action: 'window_updated', alertCount: 1 };
|
||||||
|
}
|
||||||
|
|
||||||
|
// Case 2: Window exists and is still open
|
||||||
|
if (!existing.shipped) {
|
||||||
|
existing.alerts.push(alert);
|
||||||
|
existing.lastAlertAt = now;
|
||||||
|
await this.windowStore.upsertWindow(tenantId, existing);
|
||||||
|
return {
|
||||||
|
incidentId: existing.incidentId ?? '',
|
||||||
|
action: 'window_updated',
|
||||||
|
alertCount: existing.alerts.length,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
// Case 3: Window shipped (incident created) — late alert
|
||||||
|
const windowAge = now - existing.openedAt;
|
||||||
|
|
||||||
|
if (windowAge <= this.windowMs * LATE_ATTACH_MULTIPLIER) {
|
||||||
|
// Within 2x window — attach to existing incident
|
||||||
|
existing.alerts.push(alert);
|
||||||
|
existing.lastAlertAt = now;
|
||||||
|
await this.windowStore.upsertWindow(tenantId, existing);
|
||||||
|
return {
|
||||||
|
incidentId: existing.incidentId ?? '',
|
||||||
|
action: 'attached_to_existing',
|
||||||
|
alertCount: existing.alerts.length,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
// Case 4: Very late alert (>2x window) — new incident
|
||||||
|
const newWindow: CorrelationWindow = {
|
||||||
|
fingerprint: alert.fingerprint,
|
||||||
|
service: alert.service ?? 'unknown',
|
||||||
|
alerts: [alert],
|
||||||
|
openedAt: now,
|
||||||
|
lastAlertAt: now,
|
||||||
|
shipped: false,
|
||||||
|
};
|
||||||
|
await this.windowStore.upsertWindow(tenantId, newWindow);
|
||||||
|
return { incidentId: '', action: 'new_incident', alertCount: 1 };
|
||||||
|
}
|
||||||
|
|
||||||
|
async flushWindows(tenantId: string): Promise<CorrelationWindow[]> {
|
||||||
|
const now = this.clock.now();
|
||||||
|
const cutoff = now - this.windowMs;
|
||||||
|
const expired = await this.windowStore.getExpiredWindows(cutoff);
|
||||||
|
|
||||||
|
const shipped: CorrelationWindow[] = [];
|
||||||
|
for (const window of expired) {
|
||||||
|
if (window.shipped) continue;
|
||||||
|
window.shipped = true;
|
||||||
|
window.incidentId = `inc_${crypto.randomUUID().slice(0, 8)}`;
|
||||||
|
await this.windowStore.upsertWindow(tenantId, window);
|
||||||
|
shipped.push(window);
|
||||||
|
}
|
||||||
|
|
||||||
|
return shipped;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// --- In-Memory Window Store (for testing) ---
|
||||||
|
|
||||||
|
export class InMemoryWindowStore implements WindowStore {
|
||||||
|
private windows = new Map<string, CorrelationWindow>();
|
||||||
|
|
||||||
|
private key(tenantId: string, fingerprint: string) {
|
||||||
|
return `${tenantId}:${fingerprint}`;
|
||||||
|
}
|
||||||
|
|
||||||
|
async getWindow(tenantId: string, fingerprint: string) {
|
||||||
|
return this.windows.get(this.key(tenantId, fingerprint)) ?? null;
|
||||||
|
}
|
||||||
|
|
||||||
|
async upsertWindow(tenantId: string, window: CorrelationWindow) {
|
||||||
|
this.windows.set(this.key(tenantId, window.fingerprint), window);
|
||||||
|
}
|
||||||
|
|
||||||
|
async closeWindow(tenantId: string, fingerprint: string) {
|
||||||
|
const w = this.windows.get(this.key(tenantId, fingerprint));
|
||||||
|
if (w) this.windows.delete(this.key(tenantId, fingerprint));
|
||||||
|
return w ?? null;
|
||||||
|
}
|
||||||
|
|
||||||
|
async getExpiredWindows(cutoffMs: number) {
|
||||||
|
return Array.from(this.windows.values()).filter(w => w.lastAlertAt <= cutoffMs && !w.shipped);
|
||||||
|
}
|
||||||
|
}
|
||||||
217
products/03-alert-intelligence/src/ingestion/webhook.ts
Normal file
217
products/03-alert-intelligence/src/ingestion/webhook.ts
Normal file
@@ -0,0 +1,217 @@
|
|||||||
|
import { z } from 'zod';
|
||||||
|
import crypto from 'node:crypto';
|
||||||
|
import pino from 'pino';
|
||||||
|
|
||||||
|
const logger = pino({ name: 'ingestion' });
|
||||||
|
|
||||||
|
// --- Canonical Alert Schema ---
|
||||||
|
|
||||||
|
export const canonicalAlertSchema = z.object({
|
||||||
|
sourceProvider: z.enum(['datadog', 'pagerduty', 'opsgenie', 'grafana', 'custom']),
|
||||||
|
sourceId: z.string(),
|
||||||
|
fingerprint: z.string(),
|
||||||
|
title: z.string(),
|
||||||
|
severity: z.enum(['critical', 'high', 'medium', 'low', 'info']),
|
||||||
|
status: z.enum(['firing', 'resolved']),
|
||||||
|
service: z.string().optional(),
|
||||||
|
environment: z.string().optional(),
|
||||||
|
tags: z.record(z.string()).default({}),
|
||||||
|
rawPayload: z.any(),
|
||||||
|
timestamp: z.number(), // Unix ms
|
||||||
|
});
|
||||||
|
|
||||||
|
export type CanonicalAlert = z.infer<typeof canonicalAlertSchema>;
|
||||||
|
|
||||||
|
// --- HMAC Validation (BMad Must-Have: Replay Prevention) ---
|
||||||
|
|
||||||
|
const MAX_TIMESTAMP_DRIFT_SECONDS = 300; // 5 minutes
|
||||||
|
|
||||||
|
export interface HmacValidationResult {
|
||||||
|
valid: boolean;
|
||||||
|
error?: string;
|
||||||
|
}
|
||||||
|
|
||||||
|
export function validateDatadogHmac(
|
||||||
|
body: string,
|
||||||
|
signature: string | undefined,
|
||||||
|
timestamp: string | undefined,
|
||||||
|
secret: string,
|
||||||
|
): HmacValidationResult {
|
||||||
|
if (!signature || !timestamp) {
|
||||||
|
return { valid: false, error: 'Missing signature or timestamp header' };
|
||||||
|
}
|
||||||
|
|
||||||
|
// Timestamp freshness check
|
||||||
|
const ts = parseInt(timestamp, 10);
|
||||||
|
const now = Math.floor(Date.now() / 1000);
|
||||||
|
if (Math.abs(now - ts) > MAX_TIMESTAMP_DRIFT_SECONDS) {
|
||||||
|
return { valid: false, error: 'stale timestamp' };
|
||||||
|
}
|
||||||
|
|
||||||
|
const expected = crypto
|
||||||
|
.createHmac('sha256', secret)
|
||||||
|
.update(timestamp + body)
|
||||||
|
.digest('hex');
|
||||||
|
|
||||||
|
if (!crypto.timingSafeEqual(Buffer.from(signature), Buffer.from(expected))) {
|
||||||
|
return { valid: false, error: 'Invalid signature' };
|
||||||
|
}
|
||||||
|
|
||||||
|
return { valid: true };
|
||||||
|
}
|
||||||
|
|
||||||
|
export function validatePagerdutyHmac(
|
||||||
|
body: string,
|
||||||
|
signature: string | undefined,
|
||||||
|
secret: string,
|
||||||
|
): HmacValidationResult {
|
||||||
|
if (!signature) {
|
||||||
|
return { valid: false, error: 'Missing signature header' };
|
||||||
|
}
|
||||||
|
|
||||||
|
// PagerDuty v1 signatures include timestamp in the signature header
|
||||||
|
const parts = signature.split(',');
|
||||||
|
const tsPart = parts.find(p => p.startsWith('t='));
|
||||||
|
const sigPart = parts.find(p => p.startsWith('v1='));
|
||||||
|
|
||||||
|
if (!tsPart || !sigPart) {
|
||||||
|
return { valid: false, error: 'Malformed PagerDuty signature' };
|
||||||
|
}
|
||||||
|
|
||||||
|
const ts = parseInt(tsPart.slice(2), 10);
|
||||||
|
const now = Math.floor(Date.now() / 1000);
|
||||||
|
if (Math.abs(now - ts) > MAX_TIMESTAMP_DRIFT_SECONDS) {
|
||||||
|
return { valid: false, error: 'stale timestamp' };
|
||||||
|
}
|
||||||
|
|
||||||
|
const expected = crypto
|
||||||
|
.createHmac('sha256', secret)
|
||||||
|
.update(`${ts}.${body}`)
|
||||||
|
.digest('hex');
|
||||||
|
|
||||||
|
const sig = sigPart.slice(3);
|
||||||
|
if (!crypto.timingSafeEqual(Buffer.from(sig), Buffer.from(expected))) {
|
||||||
|
return { valid: false, error: 'Invalid signature' };
|
||||||
|
}
|
||||||
|
|
||||||
|
return { valid: true };
|
||||||
|
}
|
||||||
|
|
||||||
|
export function validateOpsgenieHmac(
|
||||||
|
body: string,
|
||||||
|
signature: string | undefined,
|
||||||
|
secret: string,
|
||||||
|
): HmacValidationResult {
|
||||||
|
if (!signature) {
|
||||||
|
return { valid: false, error: 'Missing signature header' };
|
||||||
|
}
|
||||||
|
|
||||||
|
// OpsGenie: extract timestamp from payload body
|
||||||
|
let payload: any;
|
||||||
|
try {
|
||||||
|
payload = JSON.parse(body);
|
||||||
|
} catch {
|
||||||
|
return { valid: false, error: 'Invalid JSON body' };
|
||||||
|
}
|
||||||
|
|
||||||
|
const ts = payload?.timestamp;
|
||||||
|
if (ts) {
|
||||||
|
const tsSeconds = typeof ts === 'number' ? ts / 1000 : parseInt(ts, 10);
|
||||||
|
const now = Math.floor(Date.now() / 1000);
|
||||||
|
if (Math.abs(now - tsSeconds) > MAX_TIMESTAMP_DRIFT_SECONDS) {
|
||||||
|
return { valid: false, error: 'stale timestamp' };
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
const expected = crypto
|
||||||
|
.createHmac('sha256', secret)
|
||||||
|
.update(body)
|
||||||
|
.digest('hex');
|
||||||
|
|
||||||
|
if (!crypto.timingSafeEqual(Buffer.from(signature), Buffer.from(expected))) {
|
||||||
|
return { valid: false, error: 'Invalid signature' };
|
||||||
|
}
|
||||||
|
|
||||||
|
return { valid: true };
|
||||||
|
}
|
||||||
|
|
||||||
|
// --- Payload Normalizers ---
|
||||||
|
|
||||||
|
export function normalizeDatadog(payload: any): CanonicalAlert {
|
||||||
|
return {
|
||||||
|
sourceProvider: 'datadog',
|
||||||
|
sourceId: payload.id ?? payload.alert_id ?? crypto.randomUUID(),
|
||||||
|
fingerprint: payload.aggregation_key ?? payload.alert_id ?? '',
|
||||||
|
title: payload.title ?? payload.msg_title ?? 'Datadog Alert',
|
||||||
|
severity: mapDatadogPriority(payload.priority),
|
||||||
|
status: payload.alert_transition === 'Recovered' ? 'resolved' : 'firing',
|
||||||
|
service: payload.tags?.service,
|
||||||
|
environment: payload.tags?.env,
|
||||||
|
tags: payload.tags ?? {},
|
||||||
|
rawPayload: payload,
|
||||||
|
timestamp: payload.date_happened ? payload.date_happened * 1000 : Date.now(),
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
export function normalizePagerduty(payload: any): CanonicalAlert {
|
||||||
|
const event = payload.event ?? payload;
|
||||||
|
const data = event.data ?? event.incident ?? {};
|
||||||
|
return {
|
||||||
|
sourceProvider: 'pagerduty',
|
||||||
|
sourceId: data.id ?? crypto.randomUUID(),
|
||||||
|
fingerprint: data.incident_key ?? data.id ?? '',
|
||||||
|
title: data.title ?? data.description ?? 'PagerDuty Incident',
|
||||||
|
severity: mapPagerdutyUrgency(data.urgency),
|
||||||
|
status: event.event_type?.includes('resolve') ? 'resolved' : 'firing',
|
||||||
|
service: data.service?.name,
|
||||||
|
environment: data.body?.details?.environment,
|
||||||
|
tags: {},
|
||||||
|
rawPayload: payload,
|
||||||
|
timestamp: data.created_at ? new Date(data.created_at).getTime() : Date.now(),
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
export function normalizeOpsgenie(payload: any): CanonicalAlert {
|
||||||
|
return {
|
||||||
|
sourceProvider: 'opsgenie',
|
||||||
|
sourceId: payload.alert?.alertId ?? crypto.randomUUID(),
|
||||||
|
fingerprint: payload.alert?.alias ?? payload.alert?.alertId ?? '',
|
||||||
|
title: payload.alert?.message ?? 'OpsGenie Alert',
|
||||||
|
severity: mapOpsgeniePriority(payload.alert?.priority),
|
||||||
|
status: payload.action === 'Close' ? 'resolved' : 'firing',
|
||||||
|
service: payload.alert?.tags?.find((t: string) => t.startsWith('service:'))?.slice(8),
|
||||||
|
tags: {},
|
||||||
|
rawPayload: payload,
|
||||||
|
timestamp: payload.alert?.createdAt ? new Date(payload.alert.createdAt).getTime() : Date.now(),
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
// --- Severity Mappers ---
|
||||||
|
|
||||||
|
function mapDatadogPriority(p: string | undefined): CanonicalAlert['severity'] {
|
||||||
|
switch (p) {
|
||||||
|
case 'P1': return 'critical';
|
||||||
|
case 'P2': return 'high';
|
||||||
|
case 'P3': return 'medium';
|
||||||
|
case 'P4': return 'low';
|
||||||
|
default: return 'medium';
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
function mapPagerdutyUrgency(u: string | undefined): CanonicalAlert['severity'] {
|
||||||
|
switch (u) {
|
||||||
|
case 'high': return 'critical';
|
||||||
|
case 'low': return 'low';
|
||||||
|
default: return 'medium';
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
function mapOpsgeniePriority(p: string | undefined): CanonicalAlert['severity'] {
|
||||||
|
switch (p) {
|
||||||
|
case 'P1': return 'critical';
|
||||||
|
case 'P2': return 'high';
|
||||||
|
case 'P3': return 'medium';
|
||||||
|
case 'P4': case 'P5': return 'low';
|
||||||
|
default: return 'medium';
|
||||||
|
}
|
||||||
|
}
|
||||||
106
products/03-alert-intelligence/tests/unit/correlation.test.ts
Normal file
106
products/03-alert-intelligence/tests/unit/correlation.test.ts
Normal file
@@ -0,0 +1,106 @@
|
|||||||
|
import { describe, it, expect, beforeEach } from 'vitest';
|
||||||
|
import {
|
||||||
|
CorrelationEngine,
|
||||||
|
InMemoryWindowStore,
|
||||||
|
FakeClock,
|
||||||
|
} from '../../src/correlation/engine.js';
|
||||||
|
import type { CanonicalAlert } from '../../src/ingestion/webhook.js';
|
||||||
|
|
||||||
|
function makeAlert(overrides: Partial<CanonicalAlert> = {}): CanonicalAlert {
|
||||||
|
return {
|
||||||
|
sourceProvider: 'datadog',
|
||||||
|
sourceId: `alert-${Math.random().toString(36).slice(2)}`,
|
||||||
|
fingerprint: 'cpu-high',
|
||||||
|
title: 'CPU High',
|
||||||
|
severity: 'high',
|
||||||
|
status: 'firing',
|
||||||
|
service: 'auth',
|
||||||
|
tags: {},
|
||||||
|
rawPayload: {},
|
||||||
|
timestamp: Date.now(),
|
||||||
|
...overrides,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
describe('CorrelationEngine', () => {
|
||||||
|
let store: InMemoryWindowStore;
|
||||||
|
let clock: FakeClock;
|
||||||
|
let engine: CorrelationEngine;
|
||||||
|
|
||||||
|
beforeEach(() => {
|
||||||
|
store = new InMemoryWindowStore();
|
||||||
|
clock = new FakeClock(1000000);
|
||||||
|
engine = new CorrelationEngine(store, clock, 5 * 60 * 1000); // 5min window
|
||||||
|
});
|
||||||
|
|
||||||
|
it('creates a new window for first alert', async () => {
|
||||||
|
const result = await engine.process('t1', makeAlert());
|
||||||
|
expect(result.action).toBe('window_updated');
|
||||||
|
expect(result.alertCount).toBe(1);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('groups alerts with same fingerprint into one window', async () => {
|
||||||
|
await engine.process('t1', makeAlert({ fingerprint: 'cpu-high' }));
|
||||||
|
const result = await engine.process('t1', makeAlert({ fingerprint: 'cpu-high' }));
|
||||||
|
expect(result.action).toBe('window_updated');
|
||||||
|
expect(result.alertCount).toBe(2);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('keeps different fingerprints in separate windows', async () => {
|
||||||
|
await engine.process('t1', makeAlert({ fingerprint: 'cpu-high' }));
|
||||||
|
const result = await engine.process('t1', makeAlert({ fingerprint: 'disk-full' }));
|
||||||
|
expect(result.action).toBe('window_updated');
|
||||||
|
expect(result.alertCount).toBe(1); // New window, only 1 alert
|
||||||
|
});
|
||||||
|
|
||||||
|
it('late alert within 2x window attaches to existing incident', async () => {
|
||||||
|
// Alert 1 at T=0
|
||||||
|
await engine.process('t1', makeAlert());
|
||||||
|
|
||||||
|
// Flush at T=5min (window closes, incident created)
|
||||||
|
clock.advanceBy(5 * 60 * 1000);
|
||||||
|
const shipped = await engine.flushWindows('t1');
|
||||||
|
expect(shipped).toHaveLength(1);
|
||||||
|
expect(shipped[0].incidentId).toBeTruthy();
|
||||||
|
|
||||||
|
// Late alert at T=6min (within 2x window = 10min)
|
||||||
|
clock.advanceBy(1 * 60 * 1000);
|
||||||
|
const result = await engine.process('t1', makeAlert());
|
||||||
|
expect(result.action).toBe('attached_to_existing');
|
||||||
|
expect(result.incidentId).toBe(shipped[0].incidentId);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('very late alert (>2x window) creates new incident', async () => {
|
||||||
|
await engine.process('t1', makeAlert());
|
||||||
|
|
||||||
|
clock.advanceBy(5 * 60 * 1000);
|
||||||
|
await engine.flushWindows('t1');
|
||||||
|
|
||||||
|
// 15 minutes later (3x window)
|
||||||
|
clock.advanceBy(10 * 60 * 1000);
|
||||||
|
const result = await engine.process('t1', makeAlert());
|
||||||
|
expect(result.action).toBe('new_incident');
|
||||||
|
});
|
||||||
|
|
||||||
|
it('flushWindows ships expired windows', async () => {
|
||||||
|
await engine.process('t1', makeAlert({ fingerprint: 'a' }));
|
||||||
|
await engine.process('t1', makeAlert({ fingerprint: 'b' }));
|
||||||
|
|
||||||
|
clock.advanceBy(6 * 60 * 1000); // Past 5min window
|
||||||
|
const shipped = await engine.flushWindows('t1');
|
||||||
|
expect(shipped).toHaveLength(2);
|
||||||
|
expect(shipped[0].incidentId).toBeTruthy();
|
||||||
|
expect(shipped[1].incidentId).toBeTruthy();
|
||||||
|
});
|
||||||
|
|
||||||
|
it('does not re-ship already shipped windows', async () => {
|
||||||
|
await engine.process('t1', makeAlert());
|
||||||
|
clock.advanceBy(6 * 60 * 1000);
|
||||||
|
|
||||||
|
const first = await engine.flushWindows('t1');
|
||||||
|
expect(first).toHaveLength(1);
|
||||||
|
|
||||||
|
const second = await engine.flushWindows('t1');
|
||||||
|
expect(second).toHaveLength(0);
|
||||||
|
});
|
||||||
|
});
|
||||||
138
products/03-alert-intelligence/tests/unit/ingestion.test.ts
Normal file
138
products/03-alert-intelligence/tests/unit/ingestion.test.ts
Normal file
@@ -0,0 +1,138 @@
|
|||||||
|
import { describe, it, expect } from 'vitest';
|
||||||
|
import {
|
||||||
|
validateDatadogHmac,
|
||||||
|
validatePagerdutyHmac,
|
||||||
|
validateOpsgenieHmac,
|
||||||
|
normalizeDatadog,
|
||||||
|
normalizePagerduty,
|
||||||
|
normalizeOpsgenie,
|
||||||
|
} from '../../src/ingestion/webhook.js';
|
||||||
|
import crypto from 'node:crypto';
|
||||||
|
|
||||||
|
const SECRET = 'test-webhook-secret';
|
||||||
|
|
||||||
|
describe('HMAC Validation', () => {
|
||||||
|
describe('Datadog', () => {
|
||||||
|
it('accepts valid signature with fresh timestamp', () => {
|
||||||
|
const body = '{"alert_id":"123"}';
|
||||||
|
const ts = Math.floor(Date.now() / 1000).toString();
|
||||||
|
const sig = crypto.createHmac('sha256', SECRET).update(ts + body).digest('hex');
|
||||||
|
|
||||||
|
const result = validateDatadogHmac(body, sig, ts, SECRET);
|
||||||
|
expect(result.valid).toBe(true);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('rejects stale timestamp (>5 minutes)', () => {
|
||||||
|
const body = '{"alert_id":"123"}';
|
||||||
|
const ts = (Math.floor(Date.now() / 1000) - 301).toString();
|
||||||
|
const sig = crypto.createHmac('sha256', SECRET).update(ts + body).digest('hex');
|
||||||
|
|
||||||
|
const result = validateDatadogHmac(body, sig, ts, SECRET);
|
||||||
|
expect(result.valid).toBe(false);
|
||||||
|
expect(result.error).toContain('stale timestamp');
|
||||||
|
});
|
||||||
|
|
||||||
|
it('rejects missing signature', () => {
|
||||||
|
const result = validateDatadogHmac('{}', undefined, '123', SECRET);
|
||||||
|
expect(result.valid).toBe(false);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('rejects invalid signature', () => {
|
||||||
|
const ts = Math.floor(Date.now() / 1000).toString();
|
||||||
|
const result = validateDatadogHmac('{}', 'bad-sig', ts, SECRET);
|
||||||
|
expect(result.valid).toBe(false);
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
describe('PagerDuty', () => {
|
||||||
|
it('rejects missing signature', () => {
|
||||||
|
const result = validatePagerdutyHmac('{}', undefined, SECRET);
|
||||||
|
expect(result.valid).toBe(false);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('rejects malformed signature', () => {
|
||||||
|
const result = validatePagerdutyHmac('{}', 'garbage', SECRET);
|
||||||
|
expect(result.valid).toBe(false);
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
describe('OpsGenie', () => {
|
||||||
|
it('rejects stale timestamp from payload body', () => {
|
||||||
|
const staleTs = Date.now() - 6 * 60 * 1000; // 6 minutes ago
|
||||||
|
const body = JSON.stringify({ alert: { alertId: '1' }, timestamp: staleTs });
|
||||||
|
const sig = crypto.createHmac('sha256', SECRET).update(body).digest('hex');
|
||||||
|
|
||||||
|
const result = validateOpsgenieHmac(body, sig, SECRET);
|
||||||
|
expect(result.valid).toBe(false);
|
||||||
|
expect(result.error).toContain('stale timestamp');
|
||||||
|
});
|
||||||
|
|
||||||
|
it('accepts fresh payload', () => {
|
||||||
|
const body = JSON.stringify({ alert: { alertId: '1' }, timestamp: Date.now() });
|
||||||
|
const sig = crypto.createHmac('sha256', SECRET).update(body).digest('hex');
|
||||||
|
|
||||||
|
const result = validateOpsgenieHmac(body, sig, SECRET);
|
||||||
|
expect(result.valid).toBe(true);
|
||||||
|
});
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
describe('Payload Normalizers', () => {
|
||||||
|
it('normalizes Datadog payload', () => {
|
||||||
|
const alert = normalizeDatadog({
|
||||||
|
alert_id: 'dd-123',
|
||||||
|
title: 'High CPU',
|
||||||
|
priority: 'P1',
|
||||||
|
alert_transition: 'Triggered',
|
||||||
|
tags: { service: 'auth', env: 'prod' },
|
||||||
|
date_happened: 1709251200,
|
||||||
|
});
|
||||||
|
expect(alert.sourceProvider).toBe('datadog');
|
||||||
|
expect(alert.severity).toBe('critical');
|
||||||
|
expect(alert.status).toBe('firing');
|
||||||
|
expect(alert.service).toBe('auth');
|
||||||
|
});
|
||||||
|
|
||||||
|
it('normalizes PagerDuty payload', () => {
|
||||||
|
const alert = normalizePagerduty({
|
||||||
|
event: {
|
||||||
|
event_type: 'incident.triggered',
|
||||||
|
data: {
|
||||||
|
id: 'pd-456',
|
||||||
|
title: 'Disk Full',
|
||||||
|
urgency: 'high',
|
||||||
|
service: { name: 'storage' },
|
||||||
|
created_at: '2026-03-01T00:00:00Z',
|
||||||
|
},
|
||||||
|
},
|
||||||
|
});
|
||||||
|
expect(alert.sourceProvider).toBe('pagerduty');
|
||||||
|
expect(alert.severity).toBe('critical');
|
||||||
|
expect(alert.service).toBe('storage');
|
||||||
|
});
|
||||||
|
|
||||||
|
it('normalizes OpsGenie payload', () => {
|
||||||
|
const alert = normalizeOpsgenie({
|
||||||
|
action: 'Create',
|
||||||
|
alert: {
|
||||||
|
alertId: 'og-789',
|
||||||
|
message: 'Memory Leak',
|
||||||
|
priority: 'P2',
|
||||||
|
tags: ['service:api'],
|
||||||
|
},
|
||||||
|
});
|
||||||
|
expect(alert.sourceProvider).toBe('opsgenie');
|
||||||
|
expect(alert.severity).toBe('high');
|
||||||
|
expect(alert.status).toBe('firing');
|
||||||
|
});
|
||||||
|
|
||||||
|
it('maps Datadog Recovered to resolved', () => {
|
||||||
|
const alert = normalizeDatadog({ alert_transition: 'Recovered' });
|
||||||
|
expect(alert.status).toBe('resolved');
|
||||||
|
});
|
||||||
|
|
||||||
|
it('maps OpsGenie Close to resolved', () => {
|
||||||
|
const alert = normalizeOpsgenie({ action: 'Close', alert: {} });
|
||||||
|
expect(alert.status).toBe('resolved');
|
||||||
|
});
|
||||||
|
});
|
||||||
18
products/03-alert-intelligence/tsconfig.json
Normal file
18
products/03-alert-intelligence/tsconfig.json
Normal file
@@ -0,0 +1,18 @@
|
|||||||
|
{
|
||||||
|
"compilerOptions": {
|
||||||
|
"target": "ES2022",
|
||||||
|
"module": "ESNext",
|
||||||
|
"moduleResolution": "bundler",
|
||||||
|
"outDir": "dist",
|
||||||
|
"rootDir": "src",
|
||||||
|
"strict": true,
|
||||||
|
"esModuleInterop": true,
|
||||||
|
"skipLibCheck": true,
|
||||||
|
"forceConsistentCasingInFileNames": true,
|
||||||
|
"resolveJsonModule": true,
|
||||||
|
"declaration": true,
|
||||||
|
"sourceMap": true
|
||||||
|
},
|
||||||
|
"include": ["src"],
|
||||||
|
"exclude": ["node_modules", "dist", "tests"]
|
||||||
|
}
|
||||||
Reference in New Issue
Block a user