From 23db74b306912168b5ce9dc5f5c9a2dcec3929b6 Mon Sep 17 00:00:00 2001 From: Max Mayfield Date: Sun, 1 Mar 2026 02:51:02 +0000 Subject: [PATCH] Scaffold dd0c/portal: AWS+GitHub discovery, catalog service, ownership resolution - AWS scanner: ECS/Lambda/RDS discovery with partial failure handling - GitHub scanner: CODEOWNERS parsing, commit-based heuristic ownership, rate limit resilience - Catalog service: ownership resolution (config > codeowners > aws-tag > heuristic), staged updates for partial scans - Ownership tests: 6 cases covering full priority chain - PostgreSQL schema with RLS: services, staged_updates, scan_history, free tier (50 services) - Fly.io config, Dockerfile --- products/04-lightweight-idp/Dockerfile | 14 ++ products/04-lightweight-idp/fly.toml | 27 ++++ .../migrations/001_init.sql | 73 +++++++++ products/04-lightweight-idp/package.json | 40 +++++ .../04-lightweight-idp/src/catalog/service.ts | 152 ++++++++++++++++++ .../src/discovery/aws-scanner.ts | 95 +++++++++++ .../src/discovery/github-scanner.ts | 139 ++++++++++++++++ .../tests/unit/ownership.test.ts | 60 +++++++ 8 files changed, 600 insertions(+) create mode 100644 products/04-lightweight-idp/Dockerfile create mode 100644 products/04-lightweight-idp/fly.toml create mode 100644 products/04-lightweight-idp/migrations/001_init.sql create mode 100644 products/04-lightweight-idp/package.json create mode 100644 products/04-lightweight-idp/src/catalog/service.ts create mode 100644 products/04-lightweight-idp/src/discovery/aws-scanner.ts create mode 100644 products/04-lightweight-idp/src/discovery/github-scanner.ts create mode 100644 products/04-lightweight-idp/tests/unit/ownership.test.ts diff --git a/products/04-lightweight-idp/Dockerfile b/products/04-lightweight-idp/Dockerfile new file mode 100644 index 0000000..690069b --- /dev/null +++ b/products/04-lightweight-idp/Dockerfile @@ -0,0 +1,14 @@ +FROM node:22-slim AS builder +WORKDIR /app +COPY package.json package-lock.json* ./ +RUN npm ci +COPY . . +RUN npm run build + +FROM node:22-slim +WORKDIR /app +COPY --from=builder /app/dist ./dist +COPY --from=builder /app/node_modules ./node_modules +COPY --from=builder /app/package.json ./ +EXPOSE 3000 +CMD ["node", "dist/index.js"] diff --git a/products/04-lightweight-idp/fly.toml b/products/04-lightweight-idp/fly.toml new file mode 100644 index 0000000..7be5c1e --- /dev/null +++ b/products/04-lightweight-idp/fly.toml @@ -0,0 +1,27 @@ +app = "dd0c-portal" +primary_region = "iad" + +[build] + dockerfile = "Dockerfile" + +[env] + NODE_ENV = "production" + PORT = "3000" + LOG_LEVEL = "info" + +[http_service] + internal_port = 3000 + force_https = true + auto_stop_machines = true + auto_start_machines = true + min_machines_running = 0 + + [http_service.concurrency] + type = "requests" + hard_limit = 100 + soft_limit = 80 + +[[vm]] + cpu_kind = "shared" + cpus = 1 + memory_mb = 256 diff --git a/products/04-lightweight-idp/migrations/001_init.sql b/products/04-lightweight-idp/migrations/001_init.sql new file mode 100644 index 0000000..fd9e85d --- /dev/null +++ b/products/04-lightweight-idp/migrations/001_init.sql @@ -0,0 +1,73 @@ +-- dd0c/portal V1 schema — PostgreSQL with RLS + Meilisearch for search + +CREATE EXTENSION IF NOT EXISTS "uuid-ossp"; + +-- Tenants +CREATE TABLE tenants ( + id UUID PRIMARY KEY DEFAULT uuid_generate_v4(), + name TEXT NOT NULL, + slug TEXT NOT NULL UNIQUE, + tier TEXT NOT NULL DEFAULT 'free' CHECK (tier IN ('free', 'pro')), + service_count INT NOT NULL DEFAULT 0, + max_services INT NOT NULL DEFAULT 50, -- Free tier: 50 + created_at TIMESTAMPTZ NOT NULL DEFAULT now() +); + +-- Service catalog +CREATE TABLE services ( + id UUID PRIMARY KEY DEFAULT uuid_generate_v4(), + tenant_id UUID NOT NULL REFERENCES tenants(id) ON DELETE CASCADE, + name TEXT NOT NULL, + type TEXT NOT NULL DEFAULT 'unknown', + owner TEXT NOT NULL DEFAULT 'unknown', + owner_source TEXT NOT NULL DEFAULT 'heuristic' CHECK (owner_source IN ('config', 'codeowners', 'aws-tag', 'heuristic')), + description TEXT, + tier TEXT DEFAULT 'medium' CHECK (tier IN ('critical', 'high', 'medium', 'low')), + lifecycle TEXT DEFAULT 'active' CHECK (lifecycle IN ('active', 'deprecated', 'decommissioned')), + links JSONB NOT NULL DEFAULT '{}', + tags JSONB NOT NULL DEFAULT '{}', + metadata JSONB NOT NULL DEFAULT '{}', + last_discovered_at TIMESTAMPTZ, + created_at TIMESTAMPTZ NOT NULL DEFAULT now(), + updated_at TIMESTAMPTZ NOT NULL DEFAULT now(), + UNIQUE(tenant_id, name) +); +CREATE INDEX idx_services_tenant ON services(tenant_id); +CREATE INDEX idx_services_owner ON services(tenant_id, owner); +CREATE INDEX idx_services_type ON services(tenant_id, type); + +-- Staged updates (partial scan results awaiting review) +CREATE TABLE staged_updates ( + id UUID PRIMARY KEY DEFAULT uuid_generate_v4(), + tenant_id UUID NOT NULL REFERENCES tenants(id) ON DELETE CASCADE, + service_name TEXT NOT NULL, + source TEXT NOT NULL CHECK (source IN ('aws', 'github', 'manual')), + changes JSONB NOT NULL, + status TEXT NOT NULL DEFAULT 'pending' CHECK (status IN ('pending', 'applied', 'rejected')), + created_at TIMESTAMPTZ NOT NULL DEFAULT now() +); +CREATE INDEX idx_staged_tenant ON staged_updates(tenant_id, status); + +-- Discovery scan history +CREATE TABLE scan_history ( + id UUID PRIMARY KEY DEFAULT uuid_generate_v4(), + tenant_id UUID NOT NULL REFERENCES tenants(id) ON DELETE CASCADE, + scanner TEXT NOT NULL CHECK (scanner IN ('aws', 'github')), + status TEXT NOT NULL CHECK (status IN ('success', 'partial_failure', 'failed')), + discovered INT NOT NULL DEFAULT 0, + errors TEXT[] NOT NULL DEFAULT '{}', + started_at TIMESTAMPTZ NOT NULL DEFAULT now(), + completed_at TIMESTAMPTZ +); + +-- RLS +ALTER TABLE services ENABLE ROW LEVEL SECURITY; +ALTER TABLE staged_updates ENABLE ROW LEVEL SECURITY; +ALTER TABLE scan_history ENABLE ROW LEVEL SECURITY; + +CREATE POLICY tenant_iso_services ON services + USING (tenant_id::text = current_setting('app.tenant_id', true)); +CREATE POLICY tenant_iso_staged ON staged_updates + USING (tenant_id::text = current_setting('app.tenant_id', true)); +CREATE POLICY tenant_iso_scans ON scan_history + USING (tenant_id::text = current_setting('app.tenant_id', true)); diff --git a/products/04-lightweight-idp/package.json b/products/04-lightweight-idp/package.json new file mode 100644 index 0000000..e13be6a --- /dev/null +++ b/products/04-lightweight-idp/package.json @@ -0,0 +1,40 @@ +{ + "name": "dd0c-portal", + "version": "0.1.0", + "private": true, + "type": "module", + "scripts": { + "dev": "tsx watch src/index.ts", + "build": "tsc", + "start": "node dist/index.js", + "test": "vitest run", + "lint": "eslint src/ tests/" + }, + "dependencies": { + "fastify": "^4.28.0", + "@fastify/cors": "^9.0.0", + "@fastify/helmet": "^11.1.0", + "pg": "^8.12.0", + "ioredis": "^5.4.0", + "meilisearch": "^0.41.0", + "zod": "^3.23.0", + "jsonwebtoken": "^9.0.2", + "pino": "^9.1.0", + "uuid": "^9.0.1", + "@aws-sdk/client-organizations": "^3.600.0", + "@aws-sdk/client-ecs": "^3.600.0", + "@aws-sdk/client-lambda": "^3.600.0", + "@aws-sdk/client-rds": "^3.600.0", + "@octokit/rest": "^20.1.0" + }, + "devDependencies": { + "typescript": "^5.5.0", + "tsx": "^4.15.0", + "vitest": "^1.6.0", + "@types/node": "^20.14.0", + "@types/pg": "^8.11.0", + "@types/jsonwebtoken": "^9.0.6", + "@types/uuid": "^9.0.8", + "eslint": "^9.5.0" + } +} diff --git a/products/04-lightweight-idp/src/catalog/service.ts b/products/04-lightweight-idp/src/catalog/service.ts new file mode 100644 index 0000000..ee956b6 --- /dev/null +++ b/products/04-lightweight-idp/src/catalog/service.ts @@ -0,0 +1,152 @@ +import pino from 'pino'; +import type { DiscoveredService } from './aws-scanner.js'; +import type { GitHubRepo } from './github-scanner.js'; + +const logger = pino({ name: 'catalog' }); + +// --- Ownership Resolution (BMad must-have: explicit > implicit > heuristic) --- + +export type OwnerSource = 'config' | 'codeowners' | 'aws-tag' | 'heuristic'; + +export interface OwnershipRecord { + owner: string; + source: OwnerSource; + confidence: number; // 0-1 +} + +const SOURCE_PRIORITY: Record = { + config: 4, + codeowners: 3, + 'aws-tag': 2, + heuristic: 1, +}; + +export function resolveOwnership(candidates: OwnershipRecord[]): OwnershipRecord { + if (candidates.length === 0) { + return { owner: 'unknown', source: 'heuristic', confidence: 0 }; + } + // Highest priority source wins + return candidates.sort((a, b) => SOURCE_PRIORITY[b.source] - SOURCE_PRIORITY[a.source])[0]; +} + +// --- Catalog Service --- + +export interface CatalogEntry { + id: string; + tenantId: string; + name: string; + type: string; + owner: string; + ownerSource: OwnerSource; + description?: string; + tier?: 'critical' | 'high' | 'medium' | 'low'; + lifecycle?: 'active' | 'deprecated' | 'decommissioned'; + links: Record; // repo, dashboard, runbook, etc. + tags: Record; + metadata: Record; + lastDiscoveredAt: Date; + createdAt: Date; + updatedAt: Date; +} + +export interface StagedUpdate { + serviceName: string; + changes: Partial; + source: 'aws' | 'github' | 'manual'; +} + +/** + * Catalog service handles merging discovery results into the catalog. + * Partial scan failures stage results without committing (BMad must-have). + */ +export class CatalogService { + private pool: any; + + constructor(pool: any) { + this.pool = pool; + } + + async mergeAwsDiscovery(tenantId: string, services: DiscoveredService[], isPartial: boolean): Promise { + if (isPartial) { + // Stage results — don't delete or overwrite existing entries + logger.warn({ tenantId, count: services.length }, 'Partial scan — staging results'); + return this.stageUpdates(tenantId, services.map(s => ({ + serviceName: s.name, + changes: { + type: s.type, + tags: s.tags, + owner: s.owner, + ownerSource: s.ownerSource as OwnerSource, + lastDiscoveredAt: s.discoveredAt, + }, + source: 'aws' as const, + }))); + } + + // Full scan — upsert all discovered services + let upserted = 0; + for (const svc of services) { + await this.upsertService(tenantId, { + name: svc.name, + type: svc.type, + tags: svc.tags, + metadata: svc.metadata, + owner: svc.owner ?? 'unknown', + ownerSource: (svc.ownerSource as OwnerSource) ?? 'aws-tag', + lastDiscoveredAt: svc.discoveredAt, + }); + upserted++; + } + return upserted; + } + + async mergeGitHubDiscovery(tenantId: string, repos: GitHubRepo[], isPartial: boolean): Promise { + if (isPartial) { + return this.stageUpdates(tenantId, repos.map(r => ({ + serviceName: r.name, + changes: { + owner: r.owner, + ownerSource: r.ownerSource as OwnerSource, + links: { repo: `https://github.com/${r.fullName}` }, + tags: { language: r.language }, + }, + source: 'github' as const, + }))); + } + + let upserted = 0; + for (const repo of repos) { + // Only update ownership if GitHub source has higher priority + const existing = await this.getService(tenantId, repo.name); + if (existing) { + const resolved = resolveOwnership([ + { owner: existing.owner, source: existing.ownerSource, confidence: 1 }, + { owner: repo.owner, source: repo.ownerSource as OwnerSource, confidence: 0.8 }, + ]); + await this.updateOwner(tenantId, repo.name, resolved.owner, resolved.source); + } + upserted++; + } + return upserted; + } + + private async stageUpdates(tenantId: string, updates: StagedUpdate[]): Promise { + // Write to staging table — admin reviews before committing + // TODO: INSERT INTO staged_updates + logger.info({ tenantId, count: updates.length }, 'Staged updates for review'); + return updates.length; + } + + private async upsertService(tenantId: string, data: Partial): Promise { + // TODO: INSERT ... ON CONFLICT (tenant_id, name) DO UPDATE + } + + private async getService(tenantId: string, name: string): Promise { + // TODO: SELECT from catalog + return null; + } + + private async updateOwner(tenantId: string, name: string, owner: string, source: OwnerSource): Promise { + // TODO: UPDATE catalog SET owner, owner_source + } +} diff --git a/products/04-lightweight-idp/src/discovery/aws-scanner.ts b/products/04-lightweight-idp/src/discovery/aws-scanner.ts new file mode 100644 index 0000000..fb83f75 --- /dev/null +++ b/products/04-lightweight-idp/src/discovery/aws-scanner.ts @@ -0,0 +1,95 @@ +import pino from 'pino'; + +const logger = pino({ name: 'discovery-aws' }); + +export interface DiscoveredService { + name: string; + type: string; // 'ecs-service', 'lambda', 'rds', 'ec2', etc. + arn: string; + region: string; + account: string; + tags: Record; + owner?: string; // From tags or CODEOWNERS + ownerSource?: 'aws-tag' | 'codeowners' | 'config' | 'heuristic'; + metadata: Record; + discoveredAt: Date; +} + +export interface ScanResult { + status: 'success' | 'partial_failure' | 'failed'; + discovered: number; + errors: string[]; + services: DiscoveredService[]; +} + +/** + * AWS Discovery Scanner. + * Scans ECS services, Lambda functions, RDS instances. + * Partial failures preserve existing catalog (BMad must-have). + */ +export class AwsDiscoveryScanner { + private ecsClient: any; + private lambdaClient: any; + private rdsClient: any; + + constructor(clients: { ecs: any; lambda: any; rds: any }) { + this.ecsClient = clients.ecs; + this.lambdaClient = clients.lambda; + this.rdsClient = clients.rds; + } + + async scan(region: string, account: string): Promise { + const services: DiscoveredService[] = []; + const errors: string[] = []; + + // Scan ECS + try { + const ecsServices = await this.scanEcs(region, account); + services.push(...ecsServices); + } catch (err) { + errors.push(`ECS scan failed: ${(err as Error).message}`); + logger.warn({ region, error: (err as Error).message }, 'ECS scan failed'); + } + + // Scan Lambda + try { + const lambdaFns = await this.scanLambda(region, account); + services.push(...lambdaFns); + } catch (err) { + errors.push(`Lambda scan failed: ${(err as Error).message}`); + logger.warn({ region, error: (err as Error).message }, 'Lambda scan failed'); + } + + // Scan RDS + try { + const rdsInstances = await this.scanRds(region, account); + services.push(...rdsInstances); + } catch (err) { + errors.push(`RDS scan failed: ${(err as Error).message}`); + logger.warn({ region, error: (err as Error).message }, 'RDS scan failed'); + } + + const status = errors.length === 0 + ? 'success' + : services.length > 0 + ? 'partial_failure' + : 'failed'; + + return { status, discovered: services.length, errors, services }; + } + + private async scanEcs(region: string, account: string): Promise { + // TODO: List clusters → list services → describe services → extract tags + return []; + } + + private async scanLambda(region: string, account: string): Promise { + // TODO: List functions → get tags → map to DiscoveredService + return []; + } + + private async scanRds(region: string, account: string): Promise { + // TODO: Describe DB instances → extract tags + return []; + } +} diff --git a/products/04-lightweight-idp/src/discovery/github-scanner.ts b/products/04-lightweight-idp/src/discovery/github-scanner.ts new file mode 100644 index 0000000..3554848 --- /dev/null +++ b/products/04-lightweight-idp/src/discovery/github-scanner.ts @@ -0,0 +1,139 @@ +import pino from 'pino'; +import type { DiscoveredService } from './aws-scanner.js'; + +const logger = pino({ name: 'discovery-github' }); + +export interface GitHubScanResult { + status: 'success' | 'partial_failure' | 'failed'; + discovered: number; + errors: string[]; + repos: GitHubRepo[]; +} + +export interface GitHubRepo { + name: string; + fullName: string; + owner: string; + ownerSource: 'codeowners' | 'heuristic'; + language: string; + defaultBranch: string; + topics: string[]; + lastPush: Date; + codeownersContent?: string; +} + +/** + * GitHub Discovery Scanner. + * Reads repos, CODEOWNERS, and infers ownership from commit history. + * Partial failures (rate limits) preserve existing catalog entries. + */ +export class GitHubDiscoveryScanner { + private octokit: any; + + constructor(octokit: any) { + this.octokit = octokit; + } + + async scan(org: string): Promise { + const repos: GitHubRepo[] = []; + const errors: string[] = []; + + try { + const { data } = await this.octokit.repos.listForOrg({ + org, + per_page: 100, + sort: 'pushed', + }); + + for (const repo of data) { + try { + const owner = await this.resolveOwner(org, repo.name, repo.default_branch); + repos.push({ + name: repo.name, + fullName: repo.full_name, + owner: owner.owner, + ownerSource: owner.source, + language: repo.language ?? 'unknown', + defaultBranch: repo.default_branch, + topics: repo.topics ?? [], + lastPush: new Date(repo.pushed_at), + }); + } catch (err) { + errors.push(`Repo ${repo.name}: ${(err as Error).message}`); + } + } + } catch (err) { + const msg = (err as Error).message; + if (msg.includes('rate limit')) { + logger.warn({ org }, 'GitHub rate limited during scan'); + return { status: 'partial_failure', discovered: repos.length, errors: [msg], repos }; + } + return { status: 'failed', discovered: 0, errors: [msg], repos: [] }; + } + + return { + status: errors.length > 0 ? 'partial_failure' : 'success', + discovered: repos.length, + errors, + repos, + }; + } + + private async resolveOwner(org: string, repo: string, branch: string): Promise<{ owner: string; source: 'codeowners' | 'heuristic' }> { + // Try CODEOWNERS first (explicit > heuristic) + try { + const { data } = await this.octokit.repos.getContent({ + owner: org, + repo, + path: '.github/CODEOWNERS', + ref: branch, + }); + const content = Buffer.from(data.content, 'base64').toString(); + const owner = parseCodeowners(content); + if (owner) return { owner, source: 'codeowners' }; + } catch { + // No CODEOWNERS file — fall through to heuristic + } + + // Heuristic: top committer in last 90 days + try { + const { data } = await this.octokit.repos.listCommits({ + owner: org, + repo, + per_page: 50, + since: new Date(Date.now() - 90 * 24 * 60 * 60 * 1000).toISOString(), + }); + const authors = data + .map((c: any) => c.author?.login) + .filter(Boolean); + const topAuthor = mode(authors); + if (topAuthor) return { owner: topAuthor, source: 'heuristic' }; + } catch { + // Can't determine owner + } + + return { owner: 'unknown', source: 'heuristic' }; + } +} + +function parseCodeowners(content: string): string | null { + const lines = content.split('\n').filter(l => l.trim() && !l.startsWith('#')); + // Last matching rule wins in CODEOWNERS — take the global rule (* @team) + const globalRule = lines.find(l => l.startsWith('*')); + if (globalRule) { + const parts = globalRule.split(/\s+/); + return parts[1]?.replace('@', '') ?? null; + } + return lines[0]?.split(/\s+/)[1]?.replace('@', '') ?? null; +} + +function mode(arr: string[]): string | null { + const freq: Record = {}; + for (const item of arr) freq[item] = (freq[item] ?? 0) + 1; + let max = 0; + let result: string | null = null; + for (const [key, count] of Object.entries(freq)) { + if (count > max) { max = count; result = key; } + } + return result; +} diff --git a/products/04-lightweight-idp/tests/unit/ownership.test.ts b/products/04-lightweight-idp/tests/unit/ownership.test.ts new file mode 100644 index 0000000..b48f10c --- /dev/null +++ b/products/04-lightweight-idp/tests/unit/ownership.test.ts @@ -0,0 +1,60 @@ +import { describe, it, expect } from 'vitest'; +import { resolveOwnership, type OwnershipRecord } from '../../src/catalog/service.js'; + +describe('Ownership Resolution', () => { + it('explicit config overrides AWS tag', () => { + const candidates: OwnershipRecord[] = [ + { owner: 'team-infra', source: 'aws-tag', confidence: 1 }, + { owner: 'team-platform', source: 'config', confidence: 1 }, + ]; + const result = resolveOwnership(candidates); + expect(result.owner).toBe('team-platform'); + expect(result.source).toBe('config'); + }); + + it('CODEOWNERS overrides AWS tag', () => { + const candidates: OwnershipRecord[] = [ + { owner: 'team-infra', source: 'aws-tag', confidence: 1 }, + { owner: 'team-platform', source: 'codeowners', confidence: 1 }, + ]; + const result = resolveOwnership(candidates); + expect(result.owner).toBe('team-platform'); + expect(result.source).toBe('codeowners'); + }); + + it('AWS tag overrides heuristic', () => { + const candidates: OwnershipRecord[] = [ + { owner: 'dev@other.com', source: 'heuristic', confidence: 0.5 }, + { owner: 'team-infra', source: 'aws-tag', confidence: 1 }, + ]; + const result = resolveOwnership(candidates); + expect(result.owner).toBe('team-infra'); + }); + + it('heuristic does not override explicit config', () => { + const candidates: OwnershipRecord[] = [ + { owner: 'team-platform', source: 'config', confidence: 1 }, + { owner: 'dev@other.com', source: 'heuristic', confidence: 0.8 }, + ]; + const result = resolveOwnership(candidates); + expect(result.owner).toBe('team-platform'); + }); + + it('returns unknown for empty candidates', () => { + const result = resolveOwnership([]); + expect(result.owner).toBe('unknown'); + expect(result.source).toBe('heuristic'); + expect(result.confidence).toBe(0); + }); + + it('config > codeowners > aws-tag > heuristic (full chain)', () => { + const candidates: OwnershipRecord[] = [ + { owner: 'heuristic-team', source: 'heuristic', confidence: 0.3 }, + { owner: 'aws-team', source: 'aws-tag', confidence: 0.8 }, + { owner: 'codeowners-team', source: 'codeowners', confidence: 0.9 }, + { owner: 'config-team', source: 'config', confidence: 1 }, + ]; + const result = resolveOwnership(candidates); + expect(result.owner).toBe('config-team'); + }); +});