Scaffold dd0c/portal: AWS+GitHub discovery, catalog service, ownership resolution

- AWS scanner: ECS/Lambda/RDS discovery with partial failure handling
- GitHub scanner: CODEOWNERS parsing, commit-based heuristic ownership, rate limit resilience
- Catalog service: ownership resolution (config > codeowners > aws-tag > heuristic), staged updates for partial scans
- Ownership tests: 6 cases covering full priority chain
- PostgreSQL schema with RLS: services, staged_updates, scan_history, free tier (50 services)
- Fly.io config, Dockerfile
This commit is contained in:
2026-03-01 02:51:02 +00:00
parent ccc4cd1c32
commit 23db74b306
8 changed files with 600 additions and 0 deletions

View File

@@ -0,0 +1,152 @@
import pino from 'pino';
import type { DiscoveredService } from './aws-scanner.js';
import type { GitHubRepo } from './github-scanner.js';
const logger = pino({ name: 'catalog' });
// --- Ownership Resolution (BMad must-have: explicit > implicit > heuristic) ---
export type OwnerSource = 'config' | 'codeowners' | 'aws-tag' | 'heuristic';
export interface OwnershipRecord {
owner: string;
source: OwnerSource;
confidence: number; // 0-1
}
const SOURCE_PRIORITY: Record<OwnerSource, number> = {
config: 4,
codeowners: 3,
'aws-tag': 2,
heuristic: 1,
};
export function resolveOwnership(candidates: OwnershipRecord[]): OwnershipRecord {
if (candidates.length === 0) {
return { owner: 'unknown', source: 'heuristic', confidence: 0 };
}
// Highest priority source wins
return candidates.sort((a, b) => SOURCE_PRIORITY[b.source] - SOURCE_PRIORITY[a.source])[0];
}
// --- Catalog Service ---
export interface CatalogEntry {
id: string;
tenantId: string;
name: string;
type: string;
owner: string;
ownerSource: OwnerSource;
description?: string;
tier?: 'critical' | 'high' | 'medium' | 'low';
lifecycle?: 'active' | 'deprecated' | 'decommissioned';
links: Record<string, string>; // repo, dashboard, runbook, etc.
tags: Record<string, string>;
metadata: Record<string, any>;
lastDiscoveredAt: Date;
createdAt: Date;
updatedAt: Date;
}
export interface StagedUpdate {
serviceName: string;
changes: Partial<CatalogEntry>;
source: 'aws' | 'github' | 'manual';
}
/**
* Catalog service handles merging discovery results into the catalog.
* Partial scan failures stage results without committing (BMad must-have).
*/
export class CatalogService {
private pool: any;
constructor(pool: any) {
this.pool = pool;
}
async mergeAwsDiscovery(tenantId: string, services: DiscoveredService[], isPartial: boolean): Promise<number> {
if (isPartial) {
// Stage results — don't delete or overwrite existing entries
logger.warn({ tenantId, count: services.length }, 'Partial scan — staging results');
return this.stageUpdates(tenantId, services.map(s => ({
serviceName: s.name,
changes: {
type: s.type,
tags: s.tags,
owner: s.owner,
ownerSource: s.ownerSource as OwnerSource,
lastDiscoveredAt: s.discoveredAt,
},
source: 'aws' as const,
})));
}
// Full scan — upsert all discovered services
let upserted = 0;
for (const svc of services) {
await this.upsertService(tenantId, {
name: svc.name,
type: svc.type,
tags: svc.tags,
metadata: svc.metadata,
owner: svc.owner ?? 'unknown',
ownerSource: (svc.ownerSource as OwnerSource) ?? 'aws-tag',
lastDiscoveredAt: svc.discoveredAt,
});
upserted++;
}
return upserted;
}
async mergeGitHubDiscovery(tenantId: string, repos: GitHubRepo[], isPartial: boolean): Promise<number> {
if (isPartial) {
return this.stageUpdates(tenantId, repos.map(r => ({
serviceName: r.name,
changes: {
owner: r.owner,
ownerSource: r.ownerSource as OwnerSource,
links: { repo: `https://github.com/${r.fullName}` },
tags: { language: r.language },
},
source: 'github' as const,
})));
}
let upserted = 0;
for (const repo of repos) {
// Only update ownership if GitHub source has higher priority
const existing = await this.getService(tenantId, repo.name);
if (existing) {
const resolved = resolveOwnership([
{ owner: existing.owner, source: existing.ownerSource, confidence: 1 },
{ owner: repo.owner, source: repo.ownerSource as OwnerSource, confidence: 0.8 },
]);
await this.updateOwner(tenantId, repo.name, resolved.owner, resolved.source);
}
upserted++;
}
return upserted;
}
private async stageUpdates(tenantId: string, updates: StagedUpdate[]): Promise<number> {
// Write to staging table — admin reviews before committing
// TODO: INSERT INTO staged_updates
logger.info({ tenantId, count: updates.length }, 'Staged updates for review');
return updates.length;
}
private async upsertService(tenantId: string, data: Partial<CatalogEntry>): Promise<void> {
// TODO: INSERT ... ON CONFLICT (tenant_id, name) DO UPDATE
}
private async getService(tenantId: string, name: string): Promise<CatalogEntry | null> {
// TODO: SELECT from catalog
return null;
}
private async updateOwner(tenantId: string, name: string, owner: string, source: OwnerSource): Promise<void> {
// TODO: UPDATE catalog SET owner, owner_source
}
}

View File

@@ -0,0 +1,95 @@
import pino from 'pino';
const logger = pino({ name: 'discovery-aws' });
export interface DiscoveredService {
name: string;
type: string; // 'ecs-service', 'lambda', 'rds', 'ec2', etc.
arn: string;
region: string;
account: string;
tags: Record<string, string>;
owner?: string; // From tags or CODEOWNERS
ownerSource?: 'aws-tag' | 'codeowners' | 'config' | 'heuristic';
metadata: Record<string, any>;
discoveredAt: Date;
}
export interface ScanResult {
status: 'success' | 'partial_failure' | 'failed';
discovered: number;
errors: string[];
services: DiscoveredService[];
}
/**
* AWS Discovery Scanner.
* Scans ECS services, Lambda functions, RDS instances.
* Partial failures preserve existing catalog (BMad must-have).
*/
export class AwsDiscoveryScanner {
private ecsClient: any;
private lambdaClient: any;
private rdsClient: any;
constructor(clients: { ecs: any; lambda: any; rds: any }) {
this.ecsClient = clients.ecs;
this.lambdaClient = clients.lambda;
this.rdsClient = clients.rds;
}
async scan(region: string, account: string): Promise<ScanResult> {
const services: DiscoveredService[] = [];
const errors: string[] = [];
// Scan ECS
try {
const ecsServices = await this.scanEcs(region, account);
services.push(...ecsServices);
} catch (err) {
errors.push(`ECS scan failed: ${(err as Error).message}`);
logger.warn({ region, error: (err as Error).message }, 'ECS scan failed');
}
// Scan Lambda
try {
const lambdaFns = await this.scanLambda(region, account);
services.push(...lambdaFns);
} catch (err) {
errors.push(`Lambda scan failed: ${(err as Error).message}`);
logger.warn({ region, error: (err as Error).message }, 'Lambda scan failed');
}
// Scan RDS
try {
const rdsInstances = await this.scanRds(region, account);
services.push(...rdsInstances);
} catch (err) {
errors.push(`RDS scan failed: ${(err as Error).message}`);
logger.warn({ region, error: (err as Error).message }, 'RDS scan failed');
}
const status = errors.length === 0
? 'success'
: services.length > 0
? 'partial_failure'
: 'failed';
return { status, discovered: services.length, errors, services };
}
private async scanEcs(region: string, account: string): Promise<DiscoveredService[]> {
// TODO: List clusters → list services → describe services → extract tags
return [];
}
private async scanLambda(region: string, account: string): Promise<DiscoveredService[]> {
// TODO: List functions → get tags → map to DiscoveredService
return [];
}
private async scanRds(region: string, account: string): Promise<DiscoveredService[]> {
// TODO: Describe DB instances → extract tags
return [];
}
}

View File

@@ -0,0 +1,139 @@
import pino from 'pino';
import type { DiscoveredService } from './aws-scanner.js';
const logger = pino({ name: 'discovery-github' });
export interface GitHubScanResult {
status: 'success' | 'partial_failure' | 'failed';
discovered: number;
errors: string[];
repos: GitHubRepo[];
}
export interface GitHubRepo {
name: string;
fullName: string;
owner: string;
ownerSource: 'codeowners' | 'heuristic';
language: string;
defaultBranch: string;
topics: string[];
lastPush: Date;
codeownersContent?: string;
}
/**
* GitHub Discovery Scanner.
* Reads repos, CODEOWNERS, and infers ownership from commit history.
* Partial failures (rate limits) preserve existing catalog entries.
*/
export class GitHubDiscoveryScanner {
private octokit: any;
constructor(octokit: any) {
this.octokit = octokit;
}
async scan(org: string): Promise<GitHubScanResult> {
const repos: GitHubRepo[] = [];
const errors: string[] = [];
try {
const { data } = await this.octokit.repos.listForOrg({
org,
per_page: 100,
sort: 'pushed',
});
for (const repo of data) {
try {
const owner = await this.resolveOwner(org, repo.name, repo.default_branch);
repos.push({
name: repo.name,
fullName: repo.full_name,
owner: owner.owner,
ownerSource: owner.source,
language: repo.language ?? 'unknown',
defaultBranch: repo.default_branch,
topics: repo.topics ?? [],
lastPush: new Date(repo.pushed_at),
});
} catch (err) {
errors.push(`Repo ${repo.name}: ${(err as Error).message}`);
}
}
} catch (err) {
const msg = (err as Error).message;
if (msg.includes('rate limit')) {
logger.warn({ org }, 'GitHub rate limited during scan');
return { status: 'partial_failure', discovered: repos.length, errors: [msg], repos };
}
return { status: 'failed', discovered: 0, errors: [msg], repos: [] };
}
return {
status: errors.length > 0 ? 'partial_failure' : 'success',
discovered: repos.length,
errors,
repos,
};
}
private async resolveOwner(org: string, repo: string, branch: string): Promise<{ owner: string; source: 'codeowners' | 'heuristic' }> {
// Try CODEOWNERS first (explicit > heuristic)
try {
const { data } = await this.octokit.repos.getContent({
owner: org,
repo,
path: '.github/CODEOWNERS',
ref: branch,
});
const content = Buffer.from(data.content, 'base64').toString();
const owner = parseCodeowners(content);
if (owner) return { owner, source: 'codeowners' };
} catch {
// No CODEOWNERS file — fall through to heuristic
}
// Heuristic: top committer in last 90 days
try {
const { data } = await this.octokit.repos.listCommits({
owner: org,
repo,
per_page: 50,
since: new Date(Date.now() - 90 * 24 * 60 * 60 * 1000).toISOString(),
});
const authors = data
.map((c: any) => c.author?.login)
.filter(Boolean);
const topAuthor = mode(authors);
if (topAuthor) return { owner: topAuthor, source: 'heuristic' };
} catch {
// Can't determine owner
}
return { owner: 'unknown', source: 'heuristic' };
}
}
function parseCodeowners(content: string): string | null {
const lines = content.split('\n').filter(l => l.trim() && !l.startsWith('#'));
// Last matching rule wins in CODEOWNERS — take the global rule (* @team)
const globalRule = lines.find(l => l.startsWith('*'));
if (globalRule) {
const parts = globalRule.split(/\s+/);
return parts[1]?.replace('@', '') ?? null;
}
return lines[0]?.split(/\s+/)[1]?.replace('@', '') ?? null;
}
function mode(arr: string[]): string | null {
const freq: Record<string, number> = {};
for (const item of arr) freq[item] = (freq[item] ?? 0) + 1;
let max = 0;
let result: string | null = null;
for (const [key, count] of Object.entries(freq)) {
if (count > max) { max = count; result = key; }
}
return result;
}