Flesh out dd0c/alert: webhook routes, incident API, notification config, data layer
- Webhook routes: Datadog, PagerDuty, OpsGenie, Grafana with per-tenant HMAC/token auth
- Incident API: list (filtered), detail with alerts, acknowledge/resolve/suppress, dashboard summary
- Notification config: CRUD with upsert, test endpoint, Slack/email/webhook channels
- Grafana normalizer: severity mapping (critical/warning/info)
- Data layer: withTenant() RLS wrapper, Zod config validation
- Fastify server entry point with cors/helmet
2026-03-01 03:04:57 +00:00
|
|
|
import type { FastifyInstance } from 'fastify';
|
|
|
|
|
import pino from 'pino';
|
|
|
|
|
import {
|
|
|
|
|
validateDatadogHmac,
|
|
|
|
|
validatePagerdutyHmac,
|
|
|
|
|
validateOpsgenieHmac,
|
|
|
|
|
normalizeDatadog,
|
|
|
|
|
normalizePagerduty,
|
|
|
|
|
normalizeOpsgenie,
|
|
|
|
|
type CanonicalAlert,
|
|
|
|
|
} from '../ingestion/webhook.js';
|
|
|
|
|
import { withTenant } from '../data/db.js';
|
|
|
|
|
|
|
|
|
|
const logger = pino({ name: 'api-webhooks' });
|
|
|
|
|
|
|
|
|
|
export function registerWebhookRoutes(app: FastifyInstance) {
|
|
|
|
|
// Datadog webhook
|
|
|
|
|
app.post('/webhooks/datadog/:tenantSlug', async (req, reply) => {
|
|
|
|
|
const { tenantSlug } = req.params as { tenantSlug: string };
|
|
|
|
|
const body = req.body as any;
|
|
|
|
|
const rawBody = JSON.stringify(body);
|
|
|
|
|
|
|
|
|
|
const secret = await getWebhookSecret(tenantSlug, 'datadog');
|
|
|
|
|
if (!secret) return reply.status(404).send({ error: 'Unknown tenant' });
|
|
|
|
|
|
|
|
|
|
const hmac = validateDatadogHmac(
|
|
|
|
|
rawBody,
|
|
|
|
|
req.headers['dd-webhook-signature'] as string,
|
|
|
|
|
req.headers['dd-webhook-timestamp'] as string,
|
|
|
|
|
secret.secret,
|
|
|
|
|
);
|
|
|
|
|
if (!hmac.valid) {
|
|
|
|
|
logger.warn({ tenant: tenantSlug, error: hmac.error }, 'Datadog HMAC failed');
|
|
|
|
|
return reply.status(401).send({ error: hmac.error });
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
const alert = normalizeDatadog(body);
|
|
|
|
|
await ingestAlert(secret.tenantId, alert);
|
|
|
|
|
return reply.status(202).send({ status: 'accepted' });
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
// PagerDuty webhook
|
|
|
|
|
app.post('/webhooks/pagerduty/:tenantSlug', async (req, reply) => {
|
|
|
|
|
const { tenantSlug } = req.params as { tenantSlug: string };
|
|
|
|
|
const body = req.body as any;
|
|
|
|
|
const rawBody = JSON.stringify(body);
|
|
|
|
|
|
|
|
|
|
const secret = await getWebhookSecret(tenantSlug, 'pagerduty');
|
|
|
|
|
if (!secret) return reply.status(404).send({ error: 'Unknown tenant' });
|
|
|
|
|
|
|
|
|
|
const hmac = validatePagerdutyHmac(
|
|
|
|
|
rawBody,
|
|
|
|
|
req.headers['x-pagerduty-signature'] as string,
|
|
|
|
|
secret.secret,
|
|
|
|
|
);
|
|
|
|
|
if (!hmac.valid) {
|
|
|
|
|
logger.warn({ tenant: tenantSlug, error: hmac.error }, 'PagerDuty HMAC failed');
|
|
|
|
|
return reply.status(401).send({ error: hmac.error });
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
const alert = normalizePagerduty(body);
|
|
|
|
|
await ingestAlert(secret.tenantId, alert);
|
|
|
|
|
return reply.status(202).send({ status: 'accepted' });
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
// OpsGenie webhook
|
|
|
|
|
app.post('/webhooks/opsgenie/:tenantSlug', async (req, reply) => {
|
|
|
|
|
const { tenantSlug } = req.params as { tenantSlug: string };
|
|
|
|
|
const body = req.body as any;
|
|
|
|
|
const rawBody = JSON.stringify(body);
|
|
|
|
|
|
|
|
|
|
const secret = await getWebhookSecret(tenantSlug, 'opsgenie');
|
|
|
|
|
if (!secret) return reply.status(404).send({ error: 'Unknown tenant' });
|
|
|
|
|
|
|
|
|
|
const hmac = validateOpsgenieHmac(
|
|
|
|
|
rawBody,
|
|
|
|
|
req.headers['x-opsgenie-signature'] as string,
|
|
|
|
|
secret.secret,
|
|
|
|
|
);
|
|
|
|
|
if (!hmac.valid) {
|
|
|
|
|
logger.warn({ tenant: tenantSlug, error: hmac.error }, 'OpsGenie HMAC failed');
|
|
|
|
|
return reply.status(401).send({ error: hmac.error });
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
const alert = normalizeOpsgenie(body);
|
|
|
|
|
await ingestAlert(secret.tenantId, alert);
|
|
|
|
|
return reply.status(202).send({ status: 'accepted' });
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
// Grafana webhook (token-based auth, no HMAC)
|
|
|
|
|
app.post('/webhooks/grafana/:tenantSlug', async (req, reply) => {
|
|
|
|
|
const { tenantSlug } = req.params as { tenantSlug: string };
|
|
|
|
|
const body = req.body as any;
|
|
|
|
|
|
|
|
|
|
const secret = await getWebhookSecret(tenantSlug, 'grafana');
|
|
|
|
|
if (!secret) return reply.status(404).send({ error: 'Unknown tenant' });
|
|
|
|
|
|
|
|
|
|
const token = req.headers['authorization']?.replace('Bearer ', '');
|
|
|
|
|
if (token !== secret.secret) {
|
|
|
|
|
return reply.status(401).send({ error: 'Invalid token' });
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
const alert = normalizeGrafana(body);
|
|
|
|
|
await ingestAlert(secret.tenantId, alert);
|
|
|
|
|
return reply.status(202).send({ status: 'accepted' });
|
|
|
|
|
});
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
function normalizeGrafana(payload: any): CanonicalAlert {
|
|
|
|
|
const alert = payload.alerts?.[0] ?? payload;
|
|
|
|
|
return {
|
|
|
|
|
sourceProvider: 'grafana' as any,
|
|
|
|
|
sourceId: alert.fingerprint ?? crypto.randomUUID(),
|
|
|
|
|
fingerprint: alert.fingerprint ?? '',
|
|
|
|
|
title: alert.labels?.alertname ?? payload.title ?? 'Grafana Alert',
|
|
|
|
|
severity: mapGrafanaSeverity(alert.labels?.severity),
|
|
|
|
|
status: alert.status === 'resolved' ? 'resolved' : 'firing',
|
|
|
|
|
service: alert.labels?.service,
|
|
|
|
|
environment: alert.labels?.env,
|
|
|
|
|
tags: alert.labels ?? {},
|
|
|
|
|
rawPayload: payload,
|
|
|
|
|
timestamp: alert.startsAt ? new Date(alert.startsAt).getTime() : Date.now(),
|
|
|
|
|
};
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
function mapGrafanaSeverity(s: string | undefined): CanonicalAlert['severity'] {
|
|
|
|
|
switch (s) {
|
|
|
|
|
case 'critical': return 'critical';
|
|
|
|
|
case 'warning': return 'high';
|
|
|
|
|
case 'info': return 'info';
|
|
|
|
|
default: return 'medium';
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
async function getWebhookSecret(tenantSlug: string, provider: string): Promise<{ tenantId: string; secret: string } | null> {
|
2026-03-01 03:18:05 +00:00
|
|
|
const { pool } = await import('../data/db.js');
|
|
|
|
|
const result = await pool.query(
|
|
|
|
|
`SELECT ws.secret, t.id as tenant_id
|
|
|
|
|
FROM webhook_secrets ws
|
|
|
|
|
JOIN tenants t ON ws.tenant_id = t.id
|
|
|
|
|
WHERE t.slug = $1 AND ws.provider = $2`,
|
|
|
|
|
[tenantSlug, provider],
|
|
|
|
|
);
|
|
|
|
|
if (!result.rows[0]) return null;
|
|
|
|
|
return { tenantId: result.rows[0].tenant_id, secret: result.rows[0].secret };
|
Flesh out dd0c/alert: webhook routes, incident API, notification config, data layer
- Webhook routes: Datadog, PagerDuty, OpsGenie, Grafana with per-tenant HMAC/token auth
- Incident API: list (filtered), detail with alerts, acknowledge/resolve/suppress, dashboard summary
- Notification config: CRUD with upsert, test endpoint, Slack/email/webhook channels
- Grafana normalizer: severity mapping (critical/warning/info)
- Data layer: withTenant() RLS wrapper, Zod config validation
- Fastify server entry point with cors/helmet
2026-03-01 03:04:57 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
async function ingestAlert(tenantId: string, alert: CanonicalAlert): Promise<void> {
|
|
|
|
|
await withTenant(tenantId, async (client) => {
|
2026-03-01 03:18:05 +00:00
|
|
|
// Persist raw alert
|
|
|
|
|
const alertResult = await client.query(
|
Flesh out dd0c/alert: webhook routes, incident API, notification config, data layer
- Webhook routes: Datadog, PagerDuty, OpsGenie, Grafana with per-tenant HMAC/token auth
- Incident API: list (filtered), detail with alerts, acknowledge/resolve/suppress, dashboard summary
- Notification config: CRUD with upsert, test endpoint, Slack/email/webhook channels
- Grafana normalizer: severity mapping (critical/warning/info)
- Data layer: withTenant() RLS wrapper, Zod config validation
- Fastify server entry point with cors/helmet
2026-03-01 03:04:57 +00:00
|
|
|
`INSERT INTO alerts (tenant_id, source_provider, source_id, fingerprint, title, severity, status, service, environment, tags, raw_payload)
|
2026-03-01 03:18:05 +00:00
|
|
|
VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11)
|
|
|
|
|
RETURNING id`,
|
Flesh out dd0c/alert: webhook routes, incident API, notification config, data layer
- Webhook routes: Datadog, PagerDuty, OpsGenie, Grafana with per-tenant HMAC/token auth
- Incident API: list (filtered), detail with alerts, acknowledge/resolve/suppress, dashboard summary
- Notification config: CRUD with upsert, test endpoint, Slack/email/webhook channels
- Grafana normalizer: severity mapping (critical/warning/info)
- Data layer: withTenant() RLS wrapper, Zod config validation
- Fastify server entry point with cors/helmet
2026-03-01 03:04:57 +00:00
|
|
|
[tenantId, alert.sourceProvider, alert.sourceId, alert.fingerprint, alert.title, alert.severity, alert.status, alert.service, alert.environment, JSON.stringify(alert.tags), JSON.stringify(alert.rawPayload)],
|
|
|
|
|
);
|
2026-03-01 03:18:05 +00:00
|
|
|
const alertId = alertResult.rows[0].id;
|
|
|
|
|
|
|
|
|
|
// Check for existing open incident with same fingerprint
|
|
|
|
|
const existing = await client.query(
|
|
|
|
|
`SELECT id, alert_count FROM incidents
|
|
|
|
|
WHERE fingerprint = $1 AND status IN ('open', 'acknowledged')
|
|
|
|
|
ORDER BY created_at DESC LIMIT 1`,
|
|
|
|
|
[alert.fingerprint],
|
|
|
|
|
);
|
|
|
|
|
|
|
|
|
|
if (existing.rows[0]) {
|
|
|
|
|
// Attach to existing incident
|
|
|
|
|
await client.query('UPDATE alerts SET incident_id = $1 WHERE id = $2', [existing.rows[0].id, alertId]);
|
|
|
|
|
await client.query(
|
|
|
|
|
`UPDATE incidents SET alert_count = alert_count + 1, last_alert_at = now() WHERE id = $1`,
|
|
|
|
|
[existing.rows[0].id],
|
|
|
|
|
);
|
|
|
|
|
} else if (alert.status === 'firing') {
|
|
|
|
|
// Create new incident
|
|
|
|
|
const incident = await client.query(
|
|
|
|
|
`INSERT INTO incidents (tenant_id, incident_key, fingerprint, service, title, severity, alert_count, first_alert_at, last_alert_at)
|
|
|
|
|
VALUES ($1, $2, $3, $4, $5, $6, 1, now(), now())
|
|
|
|
|
RETURNING id`,
|
|
|
|
|
[tenantId, `inc_${crypto.randomUUID().slice(0, 8)}`, alert.fingerprint, alert.service ?? 'unknown', alert.title, alert.severity],
|
|
|
|
|
);
|
|
|
|
|
await client.query('UPDATE alerts SET incident_id = $1 WHERE id = $2', [incident.rows[0].id, alertId]);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Auto-resolve if alert status is resolved
|
|
|
|
|
if (alert.status === 'resolved') {
|
|
|
|
|
await client.query(
|
|
|
|
|
`UPDATE incidents SET status = 'resolved', resolved_at = now()
|
|
|
|
|
WHERE fingerprint = $1 AND status IN ('open', 'acknowledged')`,
|
|
|
|
|
[alert.fingerprint],
|
|
|
|
|
);
|
|
|
|
|
}
|
Flesh out dd0c/alert: webhook routes, incident API, notification config, data layer
- Webhook routes: Datadog, PagerDuty, OpsGenie, Grafana with per-tenant HMAC/token auth
- Incident API: list (filtered), detail with alerts, acknowledge/resolve/suppress, dashboard summary
- Notification config: CRUD with upsert, test endpoint, Slack/email/webhook channels
- Grafana normalizer: severity mapping (critical/warning/info)
- Data layer: withTenant() RLS wrapper, Zod config validation
- Fastify server entry point with cors/helmet
2026-03-01 03:04:57 +00:00
|
|
|
});
|
|
|
|
|
}
|