import pino from 'pino'; import type { DiscoveredService } from './aws-scanner.js'; const logger = pino({ name: 'discovery-github' }); export interface GitHubScanResult { status: 'success' | 'partial_failure' | 'failed'; discovered: number; errors: string[]; repos: GitHubRepo[]; } export interface GitHubRepo { name: string; fullName: string; owner: string; ownerSource: 'codeowners' | 'heuristic'; language: string; defaultBranch: string; topics: string[]; lastPush: Date; codeownersContent?: string; } /** * GitHub Discovery Scanner. * Reads repos, CODEOWNERS, and infers ownership from commit history. * Partial failures (rate limits) preserve existing catalog entries. */ export class GitHubDiscoveryScanner { private octokit: any; constructor(octokit: any) { this.octokit = octokit; } async scan(org: string): Promise { const repos: GitHubRepo[] = []; const errors: string[] = []; try { const { data } = await this.octokit.repos.listForOrg({ org, per_page: 100, sort: 'pushed', }); for (const repo of data) { try { const owner = await this.resolveOwner(org, repo.name, repo.default_branch); repos.push({ name: repo.name, fullName: repo.full_name, owner: owner.owner, ownerSource: owner.source, language: repo.language ?? 'unknown', defaultBranch: repo.default_branch, topics: repo.topics ?? [], lastPush: new Date(repo.pushed_at), }); } catch (err) { errors.push(`Repo ${repo.name}: ${(err as Error).message}`); } } } catch (err) { const msg = (err as Error).message; if (msg.includes('rate limit')) { logger.warn({ org }, 'GitHub rate limited during scan'); return { status: 'partial_failure', discovered: repos.length, errors: [msg], repos }; } return { status: 'failed', discovered: 0, errors: [msg], repos: [] }; } return { status: errors.length > 0 ? 'partial_failure' : 'success', discovered: repos.length, errors, repos, }; } private async resolveOwner(org: string, repo: string, branch: string): Promise<{ owner: string; source: 'codeowners' | 'heuristic' }> { // Try CODEOWNERS first (explicit > heuristic) try { const { data } = await this.octokit.repos.getContent({ owner: org, repo, path: '.github/CODEOWNERS', ref: branch, }); const content = Buffer.from(data.content, 'base64').toString(); const owner = parseCodeowners(content); if (owner) return { owner, source: 'codeowners' }; } catch { // No CODEOWNERS file — fall through to heuristic } // Heuristic: top committer in last 90 days try { const { data } = await this.octokit.repos.listCommits({ owner: org, repo, per_page: 50, since: new Date(Date.now() - 90 * 24 * 60 * 60 * 1000).toISOString(), }); const authors = data .map((c: any) => c.author?.login) .filter(Boolean); const topAuthor = mode(authors); if (topAuthor) return { owner: topAuthor, source: 'heuristic' }; } catch { // Can't determine owner } return { owner: 'unknown', source: 'heuristic' }; } } function parseCodeowners(content: string): string | null { const lines = content.split('\n').filter(l => l.trim() && !l.startsWith('#')); // Last matching rule wins in CODEOWNERS — take the global rule (* @team) const globalRule = lines.find(l => l.startsWith('*')); if (globalRule) { const parts = globalRule.split(/\s+/); return parts[1]?.replace('@', '') ?? null; } return lines[0]?.split(/\s+/)[1]?.replace('@', '') ?? null; } function mode(arr: string[]): string | null { const freq: Record = {}; for (const item of arr) freq[item] = (freq[item] ?? 0) + 1; let max = 0; let result: string | null = null; for (const [key, count] of Object.entries(freq)) { if (count > max) { max = count; result = key; } } return result; }