Files
dd0c/products/04-lightweight-idp/src/discovery/github-scanner.ts
Max Mayfield 23db74b306 Scaffold dd0c/portal: AWS+GitHub discovery, catalog service, ownership resolution
- AWS scanner: ECS/Lambda/RDS discovery with partial failure handling
- GitHub scanner: CODEOWNERS parsing, commit-based heuristic ownership, rate limit resilience
- Catalog service: ownership resolution (config > codeowners > aws-tag > heuristic), staged updates for partial scans
- Ownership tests: 6 cases covering full priority chain
- PostgreSQL schema with RLS: services, staged_updates, scan_history, free tier (50 services)
- Fly.io config, Dockerfile
2026-03-01 02:51:02 +00:00

140 lines
4.1 KiB
TypeScript

import pino from 'pino';
import type { DiscoveredService } from './aws-scanner.js';
const logger = pino({ name: 'discovery-github' });
export interface GitHubScanResult {
status: 'success' | 'partial_failure' | 'failed';
discovered: number;
errors: string[];
repos: GitHubRepo[];
}
export interface GitHubRepo {
name: string;
fullName: string;
owner: string;
ownerSource: 'codeowners' | 'heuristic';
language: string;
defaultBranch: string;
topics: string[];
lastPush: Date;
codeownersContent?: string;
}
/**
* GitHub Discovery Scanner.
* Reads repos, CODEOWNERS, and infers ownership from commit history.
* Partial failures (rate limits) preserve existing catalog entries.
*/
export class GitHubDiscoveryScanner {
private octokit: any;
constructor(octokit: any) {
this.octokit = octokit;
}
async scan(org: string): Promise<GitHubScanResult> {
const repos: GitHubRepo[] = [];
const errors: string[] = [];
try {
const { data } = await this.octokit.repos.listForOrg({
org,
per_page: 100,
sort: 'pushed',
});
for (const repo of data) {
try {
const owner = await this.resolveOwner(org, repo.name, repo.default_branch);
repos.push({
name: repo.name,
fullName: repo.full_name,
owner: owner.owner,
ownerSource: owner.source,
language: repo.language ?? 'unknown',
defaultBranch: repo.default_branch,
topics: repo.topics ?? [],
lastPush: new Date(repo.pushed_at),
});
} catch (err) {
errors.push(`Repo ${repo.name}: ${(err as Error).message}`);
}
}
} catch (err) {
const msg = (err as Error).message;
if (msg.includes('rate limit')) {
logger.warn({ org }, 'GitHub rate limited during scan');
return { status: 'partial_failure', discovered: repos.length, errors: [msg], repos };
}
return { status: 'failed', discovered: 0, errors: [msg], repos: [] };
}
return {
status: errors.length > 0 ? 'partial_failure' : 'success',
discovered: repos.length,
errors,
repos,
};
}
private async resolveOwner(org: string, repo: string, branch: string): Promise<{ owner: string; source: 'codeowners' | 'heuristic' }> {
// Try CODEOWNERS first (explicit > heuristic)
try {
const { data } = await this.octokit.repos.getContent({
owner: org,
repo,
path: '.github/CODEOWNERS',
ref: branch,
});
const content = Buffer.from(data.content, 'base64').toString();
const owner = parseCodeowners(content);
if (owner) return { owner, source: 'codeowners' };
} catch {
// No CODEOWNERS file — fall through to heuristic
}
// Heuristic: top committer in last 90 days
try {
const { data } = await this.octokit.repos.listCommits({
owner: org,
repo,
per_page: 50,
since: new Date(Date.now() - 90 * 24 * 60 * 60 * 1000).toISOString(),
});
const authors = data
.map((c: any) => c.author?.login)
.filter(Boolean);
const topAuthor = mode(authors);
if (topAuthor) return { owner: topAuthor, source: 'heuristic' };
} catch {
// Can't determine owner
}
return { owner: 'unknown', source: 'heuristic' };
}
}
function parseCodeowners(content: string): string | null {
const lines = content.split('\n').filter(l => l.trim() && !l.startsWith('#'));
// Last matching rule wins in CODEOWNERS — take the global rule (* @team)
const globalRule = lines.find(l => l.startsWith('*'));
if (globalRule) {
const parts = globalRule.split(/\s+/);
return parts[1]?.replace('@', '') ?? null;
}
return lines[0]?.split(/\s+/)[1]?.replace('@', '') ?? null;
}
function mode(arr: string[]): string | null {
const freq: Record<string, number> = {};
for (const item of arr) freq[item] = (freq[item] ?? 0) + 1;
let max = 0;
let result: string | null = null;
for (const [key, count] of Object.entries(freq)) {
if (count > max) { max = count; result = key; }
}
return result;
}