From ca02fe131b0fa4eeed462868fb4d6c3d0f18e222 Mon Sep 17 00:00:00 2001 From: Jarvis Prime Date: Mon, 9 Mar 2026 18:19:14 +0000 Subject: [PATCH] Phase 7F: Supergraph Multi-Repo Merge --- supergraph.js | 195 ++++++++++++++++++++++++++++++++++++++++ test/test-supergraph.js | 97 ++++++++++++++++++++ 2 files changed, 292 insertions(+) create mode 100644 supergraph.js create mode 100644 test/test-supergraph.js diff --git a/supergraph.js b/supergraph.js new file mode 100644 index 0000000..78f0da9 --- /dev/null +++ b/supergraph.js @@ -0,0 +1,195 @@ +const fs = require('fs'); +const path = require('path'); +const GraphStore = require('./graph.js'); +const NamespaceRegistry = require('./namespace.js'); + +/** + * Phase 7F: Supergraph — Multi-Repo Merge + * Merges multiple repo graph snapshots into a unified super-graph, + * resolving cross-repo references via the namespace registry. + */ + +/** + * Merge multiple repo snapshots into a single super-graph. + * Prefixes all node IDs and edge source/targets with repoId to avoid collisions. + * Resolves cross-repo CALLS/IMPORTS via namespace registry. + * + * @param {Array<{repoId: string, snapshotPath: string}>} repos + * @param {object} opts - { overridesPath } + * @returns {{ graph: GraphStore, registry: NamespaceRegistry, crossRepoEdges: Array, stats: object }} + */ +function buildSupergraph(repos, opts = {}) { + const snapshots = []; + const graphs = []; + + // Load all snapshots + for (const repo of repos) { + const graph = GraphStore.loadSnapshot(repo.snapshotPath); + snapshots.push({ repoId: repo.repoId, snapshot: graph }); + graphs.push({ repoId: repo.repoId, graph }); + } + + // Build namespace registry + const registry = NamespaceRegistry.build(snapshots); + if (opts.overridesPath) { + registry.loadOverrides(opts.overridesPath); + } + + // Create merged graph + const merged = new GraphStore(); + const crossRepoEdges = []; + const stats = { + repos: repos.length, + totalNodes: 0, + totalEdges: 0, + crossRepoEdges: 0, + resolvedCalls: 0, + unresolvedCalls: 0, + }; + + // 1. Merge all nodes with repo-prefixed IDs + for (const { repoId, graph } of graphs) { + for (const [id, node] of graph.nodes) { + const prefixedId = `${repoId}::${id}`; + merged.nodes.set(prefixedId, { + ...node, + _repo: repoId, + _originalId: id, + }); + stats.totalNodes++; + } + + // Merge file index + for (const [file, ids] of graph.fileIndex) { + const prefixedFile = `${repoId}::${file}`; + const prefixedIds = (Array.isArray(ids) ? ids : [...ids]).map(id => `${repoId}::${id}`); + merged.fileIndex.set(prefixedFile, prefixedIds); + } + } + + // 2. Merge all edges, resolving cross-repo references + for (const { repoId, graph } of graphs) { + for (const edge of graph.edges) { + const prefixedSource = `${repoId}::${edge.source}`; + + // Check if target is internal + if (graph.nodes.has(edge.target) || edge.target.startsWith('dep:')) { + const prefixedTarget = edge.target.startsWith('dep:') + ? `${repoId}::${edge.target}` + : `${repoId}::${edge.target}`; + + merged.edges.push({ + type: edge.type, + source: prefixedSource, + target: prefixedTarget, + _repo: repoId, + }); + stats.totalEdges++; + continue; + } + + // Try to resolve via namespace registry + if (edge.type === 'CALLS' || edge.type === 'IMPORTS') { + const resolution = registry.resolve(edge.target, repoId); + if (resolution) { + const resolvedTarget = `${resolution.resolvedTo.repoId}::${resolution.resolvedTo.entityId}`; + const crossEdge = { + type: edge.type, + source: prefixedSource, + target: resolvedTarget, + _repo: repoId, + _crossRepo: true, + _tier: resolution.tier, + _confidence: resolution.confidence, + _targetRepo: resolution.resolvedTo.repoId, + }; + merged.edges.push(crossEdge); + crossRepoEdges.push(crossEdge); + stats.totalEdges++; + stats.crossRepoEdges++; + stats.resolvedCalls++; + } else { + // Unresolved — keep as-is with repo prefix + merged.edges.push({ + type: edge.type, + source: prefixedSource, + target: `${repoId}::${edge.target}`, + _repo: repoId, + _unresolved: true, + }); + stats.totalEdges++; + stats.unresolvedCalls++; + } + } else { + // Non-CALLS/IMPORTS edges (CONTAINS, IMPLEMENTS) + merged.edges.push({ + type: edge.type, + source: prefixedSource, + target: `${repoId}::${edge.target}`, + _repo: repoId, + }); + stats.totalEdges++; + } + } + } + + return { graph: merged, registry, crossRepoEdges, stats }; +} + +/** + * Save the supergraph snapshot to disk. + */ +function saveSupergraph(result, outputDir) { + fs.mkdirSync(outputDir, { recursive: true }); + + // Save merged graph + const graphPath = path.join(outputDir, 'supergraph.json'); + const graphData = { + nodes: Object.fromEntries(result.graph.nodes), + edges: result.graph.edges, + fileIndex: Object.fromEntries(result.graph.fileIndex), + }; + fs.writeFileSync(graphPath, JSON.stringify(graphData, null, 2)); + + // Save registry + const regPath = path.join(outputDir, 'registry.json'); + fs.writeFileSync(regPath, JSON.stringify(result.registry.toJSON(), null, 2)); + + // Save cross-repo edges summary + const xrepoPath = path.join(outputDir, 'cross-repo-edges.json'); + fs.writeFileSync(xrepoPath, JSON.stringify(result.crossRepoEdges, null, 2)); + + // Save stats + const statsPath = path.join(outputDir, 'stats.json'); + fs.writeFileSync(statsPath, JSON.stringify(result.stats, null, 2)); + + return { graphPath, regPath, xrepoPath, statsPath }; +} + +if (require.main === module) { + const args = process.argv.slice(2); + if (args.length < 2) { + console.error('Usage: node supergraph.js [repoId2:snapshot2.json ...]'); + process.exit(1); + } + + const outputDir = args[0]; + const repos = args.slice(1).map(arg => { + const [repoId, snapshotPath] = arg.split(':'); + return { repoId, snapshotPath }; + }); + + console.log(`Merging ${repos.length} repos...`); + const result = buildSupergraph(repos); + const paths = saveSupergraph(result, outputDir); + + console.log(`Supergraph built:`); + console.log(` Repos: ${result.stats.repos}`); + console.log(` Nodes: ${result.stats.totalNodes}`); + console.log(` Edges: ${result.stats.totalEdges}`); + console.log(` Cross-repo edges: ${result.stats.crossRepoEdges}`); + console.log(` Resolved: ${result.stats.resolvedCalls}, Unresolved: ${result.stats.unresolvedCalls}`); + console.log(` Saved to: ${outputDir}`); +} + +module.exports = { buildSupergraph, saveSupergraph }; diff --git a/test/test-supergraph.js b/test/test-supergraph.js new file mode 100644 index 0000000..c8f68c0 --- /dev/null +++ b/test/test-supergraph.js @@ -0,0 +1,97 @@ +const fs = require('fs'); +const path = require('path'); +const GraphStore = require('../graph.js'); +const { buildSupergraph, saveSupergraph } = require('../supergraph.js'); + +const FIXTURE_DIR = path.join(__dirname, 'fixtures/system-docs'); +const SNAPSHOT = path.join(FIXTURE_DIR, 'snapshot.json'); + +let passed = 0; +let failed = 0; + +function assert(condition, name) { + if (condition) { passed++; console.log(` ✓ ${name}`); } + else { failed++; console.log(` ✗ ${name}`); } +} + +console.log('=== 7F: Supergraph (Multi-Repo Merge) Tests ===\n'); + +// Test 1: Merge two copies of the fixture as separate "repos" +console.log('Test 1: Basic merge'); +const result = buildSupergraph([ + { repoId: 'repo-a', snapshotPath: SNAPSHOT }, + { repoId: 'repo-b', snapshotPath: SNAPSHOT }, +]); +assert(result.stats.repos === 2, 'Merged 2 repos'); +assert(result.stats.totalNodes > 100, `Total nodes > 100 (${result.stats.totalNodes})`); +// Each repo has same nodes, so merged should be ~2x +const singleGraph = GraphStore.loadSnapshot(SNAPSHOT); +assert(result.stats.totalNodes === singleGraph.nodes.size * 2, 'Node count is 2x single repo'); + +// Test 2: Node ID prefixing +console.log('\nTest 2: Node ID prefixing'); +const hasRepoANode = [...result.graph.nodes.keys()].some(k => k.startsWith('repo-a::')); +const hasRepoBNode = [...result.graph.nodes.keys()].some(k => k.startsWith('repo-b::')); +assert(hasRepoANode, 'Has repo-a prefixed nodes'); +assert(hasRepoBNode, 'Has repo-b prefixed nodes'); +assert(!result.graph.nodes.has('gateway/server.ts:handleRequest'), 'No unprefixed nodes'); + +// Test 3: Edge prefixing +console.log('\nTest 3: Edge prefixing'); +const prefixedEdges = result.graph.edges.filter(e => e.source.includes('::')); +assert(prefixedEdges.length === result.graph.edges.length, 'All edges have prefixed sources'); + +// Test 4: Cross-repo resolution (same snapshot = same names, but different repoIds) +console.log('\nTest 4: Cross-repo edge stats'); +assert(result.stats.totalEdges > 0, `Has edges (${result.stats.totalEdges})`); +// With identical repos, cross-repo resolution happens when bare-name CALLS targets +// match entities in the other repo +console.log(` Cross-repo edges: ${result.stats.crossRepoEdges}`); +console.log(` Resolved: ${result.stats.resolvedCalls}, Unresolved: ${result.stats.unresolvedCalls}`); + +// Test 5: Registry built correctly +console.log('\nTest 5: Namespace registry'); +const regJson = result.registry.toJSON(); +assert(Object.keys(regJson.byShortName).length > 10, `Registry has >10 names (${Object.keys(regJson.byShortName).length})`); +assert(Object.keys(regJson.byEntityId).length > 50, `Registry has >50 entity IDs (${Object.keys(regJson.byEntityId).length})`); + +// Test 6: Save to disk +console.log('\nTest 6: Save supergraph'); +const outDir = path.join(__dirname, 'tmp-supergraph'); +if (fs.existsSync(outDir)) fs.rmSync(outDir, { recursive: true, force: true }); +const paths = saveSupergraph(result, outDir); +assert(fs.existsSync(paths.graphPath), 'supergraph.json saved'); +assert(fs.existsSync(paths.regPath), 'registry.json saved'); +assert(fs.existsSync(paths.xrepoPath), 'cross-repo-edges.json saved'); +assert(fs.existsSync(paths.statsPath), 'stats.json saved'); + +const savedStats = JSON.parse(fs.readFileSync(paths.statsPath, 'utf8')); +assert(savedStats.repos === 2, 'Saved stats match'); + +// Test 7: Single repo (no cross-repo edges expected) +console.log('\nTest 7: Single repo merge'); +const single = buildSupergraph([ + { repoId: 'solo', snapshotPath: SNAPSHOT }, +]); +assert(single.stats.repos === 1, 'Single repo'); +assert(single.stats.crossRepoEdges === 0, 'No cross-repo edges for single repo'); +assert(single.stats.totalNodes === singleGraph.nodes.size, 'Node count matches original'); + +// Test 8: OpenClaw full snapshot (if available) +console.log('\nTest 8: OpenClaw scale'); +const fullSnap = path.join(__dirname, '..', 'snapshots', 'openclaw-full.json'); +if (fs.existsSync(fullSnap)) { + const start = Date.now(); + const fullResult = buildSupergraph([ + { repoId: 'openclaw', snapshotPath: fullSnap }, + ]); + const elapsed = Date.now() - start; + console.log(` OpenClaw: ${fullResult.stats.totalNodes} nodes, ${fullResult.stats.totalEdges} edges in ${elapsed}ms`); + assert(fullResult.stats.totalNodes > 20000, `>20k nodes (${fullResult.stats.totalNodes})`); + assert(elapsed < 10000, `Completed in <10s (${elapsed}ms)`); +} else { + console.log(' (skipped — openclaw-full.json not found)'); +} + +console.log(`\n=== Results: ${passed} passed, ${failed} failed ===`); +process.exit(failed > 0 ? 1 : 0);