Scaffold dd0c/portal: AWS+GitHub discovery, catalog service, ownership resolution
- AWS scanner: ECS/Lambda/RDS discovery with partial failure handling - GitHub scanner: CODEOWNERS parsing, commit-based heuristic ownership, rate limit resilience - Catalog service: ownership resolution (config > codeowners > aws-tag > heuristic), staged updates for partial scans - Ownership tests: 6 cases covering full priority chain - PostgreSQL schema with RLS: services, staged_updates, scan_history, free tier (50 services) - Fly.io config, Dockerfile
This commit is contained in:
139
products/04-lightweight-idp/src/discovery/github-scanner.ts
Normal file
139
products/04-lightweight-idp/src/discovery/github-scanner.ts
Normal file
@@ -0,0 +1,139 @@
|
||||
import pino from 'pino';
|
||||
import type { DiscoveredService } from './aws-scanner.js';
|
||||
|
||||
const logger = pino({ name: 'discovery-github' });
|
||||
|
||||
export interface GitHubScanResult {
|
||||
status: 'success' | 'partial_failure' | 'failed';
|
||||
discovered: number;
|
||||
errors: string[];
|
||||
repos: GitHubRepo[];
|
||||
}
|
||||
|
||||
export interface GitHubRepo {
|
||||
name: string;
|
||||
fullName: string;
|
||||
owner: string;
|
||||
ownerSource: 'codeowners' | 'heuristic';
|
||||
language: string;
|
||||
defaultBranch: string;
|
||||
topics: string[];
|
||||
lastPush: Date;
|
||||
codeownersContent?: string;
|
||||
}
|
||||
|
||||
/**
|
||||
* GitHub Discovery Scanner.
|
||||
* Reads repos, CODEOWNERS, and infers ownership from commit history.
|
||||
* Partial failures (rate limits) preserve existing catalog entries.
|
||||
*/
|
||||
export class GitHubDiscoveryScanner {
|
||||
private octokit: any;
|
||||
|
||||
constructor(octokit: any) {
|
||||
this.octokit = octokit;
|
||||
}
|
||||
|
||||
async scan(org: string): Promise<GitHubScanResult> {
|
||||
const repos: GitHubRepo[] = [];
|
||||
const errors: string[] = [];
|
||||
|
||||
try {
|
||||
const { data } = await this.octokit.repos.listForOrg({
|
||||
org,
|
||||
per_page: 100,
|
||||
sort: 'pushed',
|
||||
});
|
||||
|
||||
for (const repo of data) {
|
||||
try {
|
||||
const owner = await this.resolveOwner(org, repo.name, repo.default_branch);
|
||||
repos.push({
|
||||
name: repo.name,
|
||||
fullName: repo.full_name,
|
||||
owner: owner.owner,
|
||||
ownerSource: owner.source,
|
||||
language: repo.language ?? 'unknown',
|
||||
defaultBranch: repo.default_branch,
|
||||
topics: repo.topics ?? [],
|
||||
lastPush: new Date(repo.pushed_at),
|
||||
});
|
||||
} catch (err) {
|
||||
errors.push(`Repo ${repo.name}: ${(err as Error).message}`);
|
||||
}
|
||||
}
|
||||
} catch (err) {
|
||||
const msg = (err as Error).message;
|
||||
if (msg.includes('rate limit')) {
|
||||
logger.warn({ org }, 'GitHub rate limited during scan');
|
||||
return { status: 'partial_failure', discovered: repos.length, errors: [msg], repos };
|
||||
}
|
||||
return { status: 'failed', discovered: 0, errors: [msg], repos: [] };
|
||||
}
|
||||
|
||||
return {
|
||||
status: errors.length > 0 ? 'partial_failure' : 'success',
|
||||
discovered: repos.length,
|
||||
errors,
|
||||
repos,
|
||||
};
|
||||
}
|
||||
|
||||
private async resolveOwner(org: string, repo: string, branch: string): Promise<{ owner: string; source: 'codeowners' | 'heuristic' }> {
|
||||
// Try CODEOWNERS first (explicit > heuristic)
|
||||
try {
|
||||
const { data } = await this.octokit.repos.getContent({
|
||||
owner: org,
|
||||
repo,
|
||||
path: '.github/CODEOWNERS',
|
||||
ref: branch,
|
||||
});
|
||||
const content = Buffer.from(data.content, 'base64').toString();
|
||||
const owner = parseCodeowners(content);
|
||||
if (owner) return { owner, source: 'codeowners' };
|
||||
} catch {
|
||||
// No CODEOWNERS file — fall through to heuristic
|
||||
}
|
||||
|
||||
// Heuristic: top committer in last 90 days
|
||||
try {
|
||||
const { data } = await this.octokit.repos.listCommits({
|
||||
owner: org,
|
||||
repo,
|
||||
per_page: 50,
|
||||
since: new Date(Date.now() - 90 * 24 * 60 * 60 * 1000).toISOString(),
|
||||
});
|
||||
const authors = data
|
||||
.map((c: any) => c.author?.login)
|
||||
.filter(Boolean);
|
||||
const topAuthor = mode(authors);
|
||||
if (topAuthor) return { owner: topAuthor, source: 'heuristic' };
|
||||
} catch {
|
||||
// Can't determine owner
|
||||
}
|
||||
|
||||
return { owner: 'unknown', source: 'heuristic' };
|
||||
}
|
||||
}
|
||||
|
||||
function parseCodeowners(content: string): string | null {
|
||||
const lines = content.split('\n').filter(l => l.trim() && !l.startsWith('#'));
|
||||
// Last matching rule wins in CODEOWNERS — take the global rule (* @team)
|
||||
const globalRule = lines.find(l => l.startsWith('*'));
|
||||
if (globalRule) {
|
||||
const parts = globalRule.split(/\s+/);
|
||||
return parts[1]?.replace('@', '') ?? null;
|
||||
}
|
||||
return lines[0]?.split(/\s+/)[1]?.replace('@', '') ?? null;
|
||||
}
|
||||
|
||||
function mode(arr: string[]): string | null {
|
||||
const freq: Record<string, number> = {};
|
||||
for (const item of arr) freq[item] = (freq[item] ?? 0) + 1;
|
||||
let max = 0;
|
||||
let result: string | null = null;
|
||||
for (const [key, count] of Object.entries(freq)) {
|
||||
if (count > max) { max = count; result = key; }
|
||||
}
|
||||
return result;
|
||||
}
|
||||
Reference in New Issue
Block a user