From af2e54b5f374969b17054e238b3ab0132128beab Mon Sep 17 00:00:00 2001 From: Jarvis Prime Date: Wed, 4 Mar 2026 04:25:14 +0000 Subject: [PATCH] Initial POC: Developer Intelligence knowledge graph - SQLite backend with file/repo/relationship entities - tree-sitter Go AST parser for deterministic import detection - Ollama doc generation with concurrent batch processing - MCP server (FastMCP) for Claude Code integration - Merge simulation with staleness cascade - Lazy refresh for stale relationship and repo docs - CLAUDE.md for agent context --- .mcp.json | 9 ++ CLAUDE.md | 38 ++++++++ README.md | 132 ++++++++++++++++++++++++++ db.py | 229 ++++++++++++++++++++++++++++++++++++++++++++++ ingest.py | 149 ++++++++++++++++++++++++++++++ mcp_server.py | 154 +++++++++++++++++++++++++++++++ refresh_stale.py | 75 +++++++++++++++ requirements.txt | 4 + simulate_merge.py | 99 ++++++++++++++++++++ 9 files changed, 889 insertions(+) create mode 100644 .mcp.json create mode 100644 CLAUDE.md create mode 100644 README.md create mode 100644 db.py create mode 100644 ingest.py create mode 100644 mcp_server.py create mode 100644 refresh_stale.py create mode 100644 requirements.txt create mode 100644 simulate_merge.py diff --git a/.mcp.json b/.mcp.json new file mode 100644 index 0000000..270681b --- /dev/null +++ b/.mcp.json @@ -0,0 +1,9 @@ +{ + "mcpServers": { + "dev-intel": { + "command": "python", + "args": ["mcp_server.py"], + "cwd": "." + } + } +} diff --git a/CLAUDE.md b/CLAUDE.md new file mode 100644 index 0000000..ef3f273 --- /dev/null +++ b/CLAUDE.md @@ -0,0 +1,38 @@ +# Developer Intelligence Knowledge Graph + +This project contains a knowledge graph of LLM-generated documentation for a Go codebase. Instead of reading raw source files, query the knowledge graph via MCP tools. + +## How to use + +**Always prefer the knowledge graph over reading raw files.** The graph contains pre-generated English documentation for every file and relationship in the codebase. + +### Query patterns + +- "What does X do?" → `get_file_doc("path/to/file.go")` +- "How do X and Y interact?" → `get_relationship("file_a.go", "file_b.go")` +- "What's this project about?" → `get_repo_overview()` +- "What depends on X?" → `get_dependents("path/to/file.go")` +- "What does X depend on?" → `get_dependencies("path/to/file.go")` +- "Find anything about routing" → `search_docs("routing")` +- "What's outdated?" → `get_stale_docs()` +- "How big is the graph?" → `get_graph_stats()` + +### Schema + +The knowledge graph has three entity types: + +- **File** — one per source file. Has `documentation` (English description of what the file does), `staleness` (fresh or stale), `prev_documentation` (previous version after a merge). +- **Repo** — one per repository. Has `documentation` (project-level summary composed from file docs). +- **Relationship** — import edges between files. Has `documentation` (how the two files interact), `staleness`. + +### Staleness + +When a file changes, its documentation is regenerated immediately. All downstream relationships and the repo summary are marked **stale** — meaning the code has changed but the doc hasn't been regenerated yet. Stale docs are still returned but flagged with `[STALE]`. + +### Tips + +- Start broad (`get_repo_overview`) then drill into specifics (`get_file_doc`, `get_dependents`). +- Use `search_docs` for concept-level queries ("middleware", "authentication", "error handling"). +- If a doc says `[STALE]`, the underlying code changed since it was generated. Mention this to the user. +- `get_dependents` is the impact analysis tool — "what breaks if I change this file?" +- File paths are relative to the repo root (e.g., `echo.go`, `middleware/compress.go`). diff --git a/README.md b/README.md new file mode 100644 index 0000000..cba7f60 --- /dev/null +++ b/README.md @@ -0,0 +1,132 @@ +# Developer Intelligence POC + +A local proof-of-concept that builds a living knowledge graph from a Go codebase. Every file gets LLM-generated documentation. Every relationship gets documented. Everything stays current on every merge. Query it from Claude Code via MCP. + +## Quick Start + +```bash +cd dev-intel-poc +pip install -r requirements.txt +python ingest.py # clone echo, parse, generate docs (~15-20 min) +claude --mcp-config .mcp.json # start Claude Code with the knowledge graph +``` + +Then ask Claude Code: +- "How does routing work in echo?" +- "What files depend on context.go?" +- "Give me an overview of this project" + +## Prerequisites + +- Python 3.11+ +- Ollama running at `192.168.86.172:11434` with `qwen2.5:7b` +- Claude Code CLI (`claude`) +- git + +## Demo Walkthrough + +### Act 1: "Here's what your codebase knows about itself" + +After `python ingest.py` completes, start Claude Code: + +```bash +claude --mcp-config .mcp.json +``` + +Ask it: +``` +> What does echo.go do? +> How does echo.go interact with router.go? +> Give me an overview of the whole project +> What files depend on context.go? +> Search for anything related to "middleware" +``` + +Every answer comes from LLM-generated documentation stored in the knowledge graph — not from reading raw source code. + +### Act 2: "Now someone pushes a change" + +In another terminal: +```bash +python simulate_merge.py echo.go +``` + +This: +1. Regenerates echo.go's documentation (reflects the new code) +2. Marks all relationships involving echo.go as STALE +3. Marks the repo summary as STALE + +Back in Claude Code: +``` +> What does echo.go do? # fresh doc — mentions the new tracing feature +> What's the repo overview? # shows [STALE] — knows it's outdated +> Show me all stale docs # lists everything that needs refresh +``` + +### Act 3: "The system heals itself" + +```bash +python refresh_stale.py +``` + +Back in Claude Code: +``` +> What's the repo overview? # fresh again — rewritten to include new capabilities +> Show me all stale docs # "Everything is fresh!" +``` + +## Architecture + +``` +ingest.py ──→ repos/target/ (git clone) + │ │ + │ parser.py (tree-sitter AST) + │ │ + │ docgen.py (Ollama qwen2.5:7b) + │ │ + └──────→ devintel.db (SQLite) + │ + mcp_server.py ──→ Claude Code +``` + +No Docker. No external databases. One SQLite file. One MCP server. + +## MCP Tools + +| Tool | What it does | +|---|---| +| `get_file_doc(path)` | Read a file's generated documentation | +| `get_relationship(a, b)` | How two files interact | +| `get_repo_overview()` | Project-level summary | +| `get_dependents(path)` | What breaks if you change this file | +| `get_dependencies(path)` | What this file depends on | +| `search_docs(query)` | Keyword search across all docs | +| `get_stale_docs()` | List outdated documentation | +| `get_graph_stats()` | File count, relationship count, staleness | + +## Project Structure + +``` +dev-intel-poc/ +├── requirements.txt # Python deps +├── .mcp.json # Claude Code MCP config +├── ingest.py # Main ingestion pipeline +├── simulate_merge.py # Simulate a code change +├── refresh_stale.py # Refresh stale docs +├── db.py # SQLite backend +├── parser.py # tree-sitter Go AST parser +├── docgen.py # Ollama doc generation +├── mcp_server.py # MCP server for Claude Code +└── devintel.db # Generated — the knowledge graph +``` + +## Configuration + +| Env Variable | Default | Description | +|---|---|---| +| `OLLAMA_URL` | `http://192.168.86.172:11434` | Ollama endpoint | +| `OLLAMA_MODEL` | `qwen2.5:7b` | Model for doc generation | +| `TARGET_REPO` | `https://github.com/labstack/echo.git` | Repo to ingest | +| `MAX_CONCURRENT` | `4` | Parallel Ollama requests | +| `DEVINTEL_DB` | `./devintel.db` | SQLite database path | +| `REPO_DIR` | `./repos/target` | Cloned repo location | diff --git a/db.py b/db.py new file mode 100644 index 0000000..190dbfe --- /dev/null +++ b/db.py @@ -0,0 +1,229 @@ +"""SQLite backend for Developer Intelligence POC.""" + +import sqlite3 +import os +from datetime import datetime + +DB_PATH = os.environ.get("DEVINTEL_DB", os.path.join(os.path.dirname(__file__), "devintel.db")) + + +def get_db() -> sqlite3.Connection: + conn = sqlite3.connect(DB_PATH) + conn.row_factory = sqlite3.Row + conn.execute("PRAGMA journal_mode=WAL") + conn.execute("PRAGMA foreign_keys=ON") + return conn + + +def init_db(): + """Create tables if they don't exist.""" + conn = get_db() + conn.executescript(""" + CREATE TABLE IF NOT EXISTS repos ( + name TEXT PRIMARY KEY, + url TEXT, + language TEXT, + documentation TEXT, + staleness TEXT DEFAULT 'fresh', + updated_at TEXT + ); + + CREATE TABLE IF NOT EXISTS files ( + path TEXT PRIMARY KEY, + repo TEXT REFERENCES repos(name), + language TEXT, + documentation TEXT, + prev_documentation TEXT, + functions TEXT, -- JSON array + last_commit TEXT, + staleness TEXT DEFAULT 'fresh', + updated_at TEXT + ); + + CREATE TABLE IF NOT EXISTS relationships ( + from_file TEXT REFERENCES files(path), + to_file TEXT REFERENCES files(path), + rel_type TEXT DEFAULT 'IMPORTS', + documentation TEXT, + staleness TEXT DEFAULT 'fresh', + updated_at TEXT, + PRIMARY KEY (from_file, to_file, rel_type) + ); + + CREATE INDEX IF NOT EXISTS idx_files_repo ON files(repo); + CREATE INDEX IF NOT EXISTS idx_files_staleness ON files(staleness); + CREATE INDEX IF NOT EXISTS idx_rels_staleness ON relationships(staleness); + CREATE INDEX IF NOT EXISTS idx_rels_to ON relationships(to_file); + """) + conn.commit() + conn.close() + + +class GraphDB: + def __init__(self): + init_db() + self.conn = get_db() + + def create_repo(self, name: str, url: str, language: str, documentation: str): + self.conn.execute( + """INSERT INTO repos (name, url, language, documentation, staleness, updated_at) + VALUES (?, ?, ?, ?, 'fresh', ?) + ON CONFLICT(name) DO UPDATE SET + url=excluded.url, language=excluded.language, + documentation=excluded.documentation, staleness='fresh', + updated_at=excluded.updated_at""", + (name, url, language, documentation, datetime.utcnow().isoformat()), + ) + self.conn.commit() + + def create_file(self, path: str, repo: str, language: str, documentation: str, + functions: list[str], commit: str = "initial"): + import json + self.conn.execute( + """INSERT INTO files (path, repo, language, documentation, functions, last_commit, staleness, updated_at) + VALUES (?, ?, ?, ?, ?, ?, 'fresh', ?) + ON CONFLICT(path) DO UPDATE SET + repo=excluded.repo, language=excluded.language, + documentation=excluded.documentation, functions=excluded.functions, + last_commit=excluded.last_commit, staleness='fresh', + updated_at=excluded.updated_at""", + (path, repo, language, documentation, json.dumps(functions), commit, + datetime.utcnow().isoformat()), + ) + self.conn.commit() + + def create_relationship(self, from_file: str, to_file: str, rel_type: str = "IMPORTS", + documentation: str = ""): + self.conn.execute( + """INSERT INTO relationships (from_file, to_file, rel_type, documentation, staleness, updated_at) + VALUES (?, ?, ?, ?, 'fresh', ?) + ON CONFLICT(from_file, to_file, rel_type) DO UPDATE SET + documentation=excluded.documentation, staleness=excluded.staleness, + updated_at=excluded.updated_at""", + (from_file, to_file, rel_type, documentation, datetime.utcnow().isoformat()), + ) + self.conn.commit() + + def mark_relationships_stale(self, file_path: str): + now = datetime.utcnow().isoformat() + self.conn.execute( + "UPDATE relationships SET staleness='stale', updated_at=? WHERE from_file=? OR to_file=?", + (now, file_path, file_path), + ) + self.conn.commit() + + def mark_repo_stale(self, repo: str): + self.conn.execute( + "UPDATE repos SET staleness='stale', updated_at=? WHERE name=?", + (datetime.utcnow().isoformat(), repo), + ) + self.conn.commit() + + def update_file_doc(self, path: str, documentation: str, commit: str): + self.conn.execute( + """UPDATE files SET prev_documentation=documentation, + documentation=?, staleness='fresh', last_commit=?, updated_at=? + WHERE path=?""", + (documentation, commit, datetime.utcnow().isoformat(), path), + ) + self.conn.commit() + + def get_file(self, path: str) -> dict | None: + row = self.conn.execute("SELECT * FROM files WHERE path=?", (path,)).fetchone() + return dict(row) if row else None + + def get_repo(self, name: str = None) -> dict | None: + if name: + row = self.conn.execute("SELECT * FROM repos WHERE name=?", (name,)).fetchone() + else: + row = self.conn.execute("SELECT * FROM repos LIMIT 1").fetchone() + return dict(row) if row else None + + def get_dependents(self, path: str) -> list[dict]: + rows = self.conn.execute( + """SELECT r.from_file, r.documentation AS rel_doc, r.staleness AS rel_staleness, + f.documentation AS file_doc + FROM relationships r + JOIN files f ON f.path = r.from_file + WHERE r.to_file = ? + ORDER BY r.from_file""", + (path,), + ).fetchall() + return [dict(r) for r in rows] + + def get_dependencies(self, path: str) -> list[dict]: + rows = self.conn.execute( + """SELECT r.to_file, r.documentation AS rel_doc, r.staleness AS rel_staleness, + f.documentation AS file_doc + FROM relationships r + JOIN files f ON f.path = r.to_file + WHERE r.from_file = ? + ORDER BY r.to_file""", + (path,), + ).fetchall() + return [dict(r) for r in rows] + + def get_relationship(self, from_file: str, to_file: str) -> dict | None: + row = self.conn.execute( + "SELECT * FROM relationships WHERE from_file=? AND to_file=?", + (from_file, to_file), + ).fetchone() + return dict(row) if row else None + + def search_docs(self, query: str, limit: int = 10) -> list[dict]: + rows = self.conn.execute( + """SELECT path, documentation, staleness FROM files + WHERE LOWER(documentation) LIKE LOWER(?) + ORDER BY path LIMIT ?""", + (f"%{query}%", limit), + ).fetchall() + return [dict(r) for r in rows] + + def get_stale_relationships(self) -> list[dict]: + rows = self.conn.execute( + """SELECT r.from_file, r.to_file, + f1.documentation AS from_doc, f2.documentation AS to_doc + FROM relationships r + JOIN files f1 ON f1.path = r.from_file + JOIN files f2 ON f2.path = r.to_file + WHERE r.staleness = 'stale'""" + ).fetchall() + return [dict(r) for r in rows] + + def get_stale_repos(self) -> list[dict]: + rows = self.conn.execute( + "SELECT name, url FROM repos WHERE staleness='stale'" + ).fetchall() + return [dict(r) for r in rows] + + def get_repo_files_docs(self, repo: str) -> list[tuple[str, str]]: + rows = self.conn.execute( + "SELECT path, documentation FROM files WHERE repo=? ORDER BY path", + (repo,), + ).fetchall() + return [(r["path"], r["documentation"]) for r in rows] + + def update_relationship_doc(self, from_file: str, to_file: str, documentation: str): + self.conn.execute( + """UPDATE relationships SET documentation=?, staleness='fresh', updated_at=? + WHERE from_file=? AND to_file=?""", + (documentation, datetime.utcnow().isoformat(), from_file, to_file), + ) + self.conn.commit() + + def update_repo_doc(self, name: str, documentation: str): + self.conn.execute( + "UPDATE repos SET documentation=?, staleness='fresh', updated_at=? WHERE name=?", + (documentation, datetime.utcnow().isoformat(), name), + ) + self.conn.commit() + + def get_stats(self) -> dict: + files = self.conn.execute("SELECT count(*) AS c FROM files").fetchone()["c"] + rels = self.conn.execute("SELECT count(*) AS c FROM relationships").fetchone()["c"] + stale_f = self.conn.execute("SELECT count(*) AS c FROM files WHERE staleness='stale'").fetchone()["c"] + stale_r = self.conn.execute("SELECT count(*) AS c FROM relationships WHERE staleness='stale'").fetchone()["c"] + return {"files": files, "relationships": rels, "stale_files": stale_f, "stale_relationships": stale_r} + + def close(self): + self.conn.close() diff --git a/ingest.py b/ingest.py new file mode 100644 index 0000000..d194af9 --- /dev/null +++ b/ingest.py @@ -0,0 +1,149 @@ +"""Main ingestion script. Clone repo, parse, generate docs, load SQLite.""" + +import os +import subprocess +import time +import json +from pathlib import Path + +from parser import parse_go_file, filter_imports, get_repo_module, resolve_import_to_file +from docgen import generate_file_doc, generate_repo_doc, generate_docs_batch +from db import GraphDB + +TARGET_REPO = os.environ.get("TARGET_REPO", "https://github.com/labstack/echo.git") +REPO_DIR = os.environ.get("REPO_DIR", os.path.join(os.path.dirname(__file__), "repos", "target")) + + +def clone_repo(): + if Path(REPO_DIR).exists() and (Path(REPO_DIR) / ".git").exists(): + print(f"Repo already cloned at {REPO_DIR}, pulling latest...") + subprocess.run(["git", "-C", REPO_DIR, "pull"], check=True) + else: + print(f"Cloning {TARGET_REPO}...") + Path(REPO_DIR).parent.mkdir(parents=True, exist_ok=True) + subprocess.run(["git", "clone", "--depth=1", TARGET_REPO, REPO_DIR], check=True) + + result = subprocess.run( + ["git", "-C", REPO_DIR, "rev-parse", "HEAD"], + capture_output=True, text=True, check=True, + ) + return result.stdout.strip() + + +def discover_go_files() -> dict[str, str]: + files = {} + repo = Path(REPO_DIR) + for gofile in repo.rglob("*.go"): + rel = str(gofile.relative_to(repo)) + if "vendor/" in rel or "_test.go" in rel: + continue + try: + files[rel] = gofile.read_text(errors="replace") + except Exception as e: + print(f" Skipping {rel}: {e}") + return files + + +def run(): + start = time.time() + print("=" * 60) + print("Developer Intelligence POC — Ingestion") + print("=" * 60) + + # Step 1: Clone + print("\n[1/6] Cloning repository...") + commit = clone_repo() + repo_name = TARGET_REPO.rstrip("/").split("/")[-1].replace(".git", "") + print(f" Repo: {repo_name}, commit: {commit[:8]}") + + # Step 2: Discover files + print("\n[2/6] Discovering Go files...") + go_files = discover_go_files() + print(f" Found {len(go_files)} Go files (excluding tests and vendor)") + + # Step 3: Parse AST + print("\n[3/6] Parsing AST (tree-sitter)...") + repo_module = get_repo_module(REPO_DIR) + print(f" Module: {repo_module}") + + parsed = {} + all_imports = {} + for rel_path, content in go_files.items(): + info = parse_go_file(rel_path, content, repo_module) + parsed[rel_path] = info + filtered = filter_imports(info.imports, repo_module) + all_imports[rel_path] = filtered + + total_imports = sum(len(v) for v in all_imports.values()) + first_party = sum(1 for v in all_imports.values() for i in v if i.startswith(repo_module)) + print(f" Parsed {len(parsed)} files, {total_imports} filtered imports ({first_party} first-party)") + + # Step 4: Generate file docs + print("\n[4/6] Generating file documentation (Ollama)...") + file_items = [(path, content) for path, content in go_files.items()] + file_docs = generate_docs_batch(file_items, generate_file_doc) + file_doc_map = {file_items[i][0]: file_docs[i] for i in range(len(file_items))} + + # Step 5: Load into SQLite + print("\n[5/6] Loading into database...") + db = GraphDB() + + for rel_path, content in go_files.items(): + info = parsed[rel_path] + doc = file_doc_map.get(rel_path, "") + db.create_file( + path=rel_path, + repo=repo_name, + language="go", + documentation=doc, + functions=info.functions, + commit=commit[:8], + ) + + edges_created = 0 + for from_file, imports in all_imports.items(): + for imp in imports: + if not imp.startswith(repo_module): + continue + target_dir = resolve_import_to_file(imp, repo_module, go_files) + if target_dir: + # Find actual files in that directory + for fpath in go_files: + fdir = str(Path(fpath).parent) + if fdir == target_dir or fdir.endswith("/" + target_dir): + db.create_relationship(from_file, fpath) + edges_created += 1 + + print(f" Created {len(go_files)} file nodes, {edges_created} import edges") + + # Step 6: Generate repo-level doc + print("\n[6/6] Generating repo-level documentation...") + readme_path = Path(REPO_DIR) / "README.md" + readme = readme_path.read_text(errors="replace") if readme_path.exists() else "" + + entry_candidates = ["echo.go", "router.go", "context.go", "group.go", "middleware.go"] + entry_files = [] + for candidate in entry_candidates: + for path, content in go_files.items(): + if path.endswith(candidate): + entry_files.append((path, content)) + break + + repo_doc = generate_repo_doc(readme, entry_files) + db.create_repo(name=repo_name, url=TARGET_REPO, language="go", documentation=repo_doc) + + stats = db.get_stats() + elapsed = time.time() - start + print("\n" + "=" * 60) + print("Ingestion complete!") + print(f" Files: {stats['files']}") + print(f" Relationships: {stats['relationships']}") + print(f" Time: {elapsed:.1f}s ({elapsed/60:.1f}m)") + print(f" Database: {os.path.abspath(db.conn.execute('PRAGMA database_list').fetchone()[2])}") + print("=" * 60) + + db.close() + + +if __name__ == "__main__": + run() diff --git a/mcp_server.py b/mcp_server.py new file mode 100644 index 0000000..221fe80 --- /dev/null +++ b/mcp_server.py @@ -0,0 +1,154 @@ +"""MCP server for Developer Intelligence POC. Queries SQLite, serves to Claude Code.""" + +import os +import sys +import json + +sys.path.insert(0, os.path.dirname(__file__)) + +from db import GraphDB +from mcp.server.fastmcp import FastMCP + +mcp = FastMCP("Developer Intelligence") + + +def _get_db(): + return GraphDB() + + +@mcp.tool() +def get_file_doc(path: str) -> str: + """Get the generated documentation for a source file. Pass the relative file path (e.g. 'echo.go' or 'middleware/compress.go').""" + db = _get_db() + f = db.get_file(path) + db.close() + if not f: + return f"File not found: {path}" + staleness = " [STALE]" if f["staleness"] == "stale" else "" + prev = "" + if f.get("prev_documentation"): + prev = f"\n\n--- Previous version ---\n{f['prev_documentation']}" + return f"{f['documentation']}{staleness}\n\n(commit: {f['last_commit']}, updated: {f['updated_at']}){prev}" + + +@mcp.tool() +def get_relationship(file_a: str, file_b: str) -> str: + """Get the documentation for the import relationship between two files.""" + db = _get_db() + rel = db.get_relationship(file_a, file_b) + db.close() + if not rel: + return f"No relationship found between {file_a} and {file_b}" + doc = rel["documentation"] or "(no relationship documentation generated yet)" + staleness = " [STALE]" if rel["staleness"] == "stale" else "" + return f"{doc}{staleness}" + + +@mcp.tool() +def get_repo_overview() -> str: + """Get the repo-level documentation summary — a high-level overview of the entire project.""" + db = _get_db() + repo = db.get_repo() + db.close() + if not repo: + return "No repo found" + staleness = " [STALE]" if repo["staleness"] == "stale" else "" + return f"# {repo['name']}{staleness}\n\n{repo['documentation']}" + + +@mcp.tool() +def get_dependents(path: str) -> str: + """Get all files that import/depend on the given file. Shows what breaks if you change this file.""" + db = _get_db() + deps = db.get_dependents(path) + db.close() + if not deps: + return f"No files depend on {path}" + lines = [f"Files that depend on {path} ({len(deps)} total):\n"] + for d in deps: + staleness = " [STALE]" if d["rel_staleness"] == "stale" else "" + doc = d["rel_doc"] or "(no relationship doc)" + lines.append(f" {d['from_file']}{staleness}") + lines.append(f" Relationship: {doc}") + lines.append(f" File: {d['file_doc'][:150]}...") + return "\n".join(lines) + + +@mcp.tool() +def get_dependencies(path: str) -> str: + """Get all files that the given file imports/depends on.""" + db = _get_db() + deps = db.get_dependencies(path) + db.close() + if not deps: + return f"{path} has no tracked dependencies" + lines = [f"Dependencies of {path} ({len(deps)} total):\n"] + for d in deps: + staleness = " [STALE]" if d["rel_staleness"] == "stale" else "" + doc = d["rel_doc"] or "(no relationship doc)" + lines.append(f" {d['to_file']}{staleness}") + lines.append(f" Relationship: {doc}") + return "\n".join(lines) + + +@mcp.tool() +def search_docs(query: str) -> str: + """Search across all file documentation by keyword. Use to find files related to a concept (e.g. 'routing', 'middleware', 'authentication').""" + db = _get_db() + results = db.search_docs(query) + db.close() + if not results: + return f"No files found matching '{query}'" + lines = [f"Files matching '{query}' ({len(results)} results):\n"] + for r in results: + staleness = " [STALE]" if r["staleness"] == "stale" else "" + doc = r["documentation"][:200] + "..." if len(r["documentation"]) > 200 else r["documentation"] + lines.append(f" {r['path']}{staleness}: {doc}") + return "\n".join(lines) + + +@mcp.tool() +def get_stale_docs() -> str: + """List all entities and relationships with stale (outdated) documentation.""" + db = _get_db() + stale_rels = db.get_stale_relationships() + stale_repos = db.get_stale_repos() + stats = db.get_stats() + db.close() + + lines = ["Stale documentation:\n"] + if stale_repos: + lines.append(f" Repos ({len(stale_repos)}):") + for r in stale_repos: + lines.append(f" {r['name']}") + lines.append(f" Files: {stats['stale_files']} stale") + if stale_rels: + lines.append(f" Relationships ({len(stale_rels)}):") + for r in stale_rels[:20]: # Cap output + lines.append(f" {r['from_file']} -> {r['to_file']}") + if len(stale_rels) > 20: + lines.append(f" ... and {len(stale_rels) - 20} more") + if stats["stale_files"] == 0 and stats["stale_relationships"] == 0: + lines.append(" Everything is fresh!") + return "\n".join(lines) + + +@mcp.tool() +def get_graph_stats() -> str: + """Get overall knowledge graph statistics — file count, relationship count, staleness.""" + db = _get_db() + stats = db.get_stats() + repo = db.get_repo() + db.close() + return json.dumps({ + "repo": repo["name"] if repo else None, + "files": stats["files"], + "relationships": stats["relationships"], + "stale_files": stats["stale_files"], + "stale_relationships": stats["stale_relationships"], + }, indent=2) + + +if __name__ == "__main__": + print("Starting Developer Intelligence MCP Server (stdio)...") + mcp.run(transport="stdio") diff --git a/refresh_stale.py b/refresh_stale.py new file mode 100644 index 0000000..9d02706 --- /dev/null +++ b/refresh_stale.py @@ -0,0 +1,75 @@ +"""Refresh all stale documentation — relationships and repo summaries.""" + +import sys +import os + +sys.path.insert(0, os.path.dirname(__file__)) + +from db import GraphDB +from docgen import generate_relationship_doc, generate_repo_doc + +REPO_DIR = os.environ.get("REPO_DIR", os.path.join(os.path.dirname(__file__), "repos", "target")) + + +def refresh_stale(): + db = GraphDB() + + print(f"\n{'='*60}") + print("Refreshing stale documentation") + print(f"{'='*60}") + + stats_before = db.get_stats() + print(f"\nBefore: {stats_before['stale_relationships']} stale relationships") + + # Refresh stale relationship docs + stale_rels = db.get_stale_relationships() + if stale_rels: + print(f"\n[1/2] Regenerating {len(stale_rels)} stale relationship docs...") + for i, rel in enumerate(stale_rels): + doc = generate_relationship_doc( + rel["from_file"], rel["from_doc"] or "", + rel["to_file"], rel["to_doc"] or "", + ) + db.update_relationship_doc(rel["from_file"], rel["to_file"], doc) + if (i + 1) % 5 == 0 or (i + 1) == len(stale_rels): + print(f" Refreshed {i+1}/{len(stale_rels)}") + else: + print("\n[1/2] No stale relationships.") + + # Refresh stale repo docs + stale_repos = db.get_stale_repos() + if stale_repos: + print(f"\n[2/2] Regenerating {len(stale_repos)} stale repo docs...") + for repo in stale_repos: + file_docs = db.get_repo_files_docs(repo["name"]) + priority_names = ["echo.go", "router.go", "context.go", "group.go", "main.go", "server.go"] + entry_files = [] + for name in priority_names: + for path, doc in file_docs: + if path.endswith(name) and doc: + entry_files.append((path, doc)) + break + + readme_path = os.path.join(REPO_DIR, "README.md") + readme = "" + if os.path.exists(readme_path): + with open(readme_path) as f: + readme = f.read() + + repo_doc = generate_repo_doc(readme, entry_files) + db.update_repo_doc(repo["name"], repo_doc) + print(f" Refreshed repo: {repo['name']}") + else: + print("\n[2/2] No stale repos.") + + stats_after = db.get_stats() + print(f"\n{'='*60}") + print("Refresh complete!") + print(f" Stale rels: {stats_before['stale_relationships']} → {stats_after['stale_relationships']}") + print(f"{'='*60}") + + db.close() + + +if __name__ == "__main__": + refresh_stale() diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..fd7d81f --- /dev/null +++ b/requirements.txt @@ -0,0 +1,4 @@ +requests==2.32.3 +tree-sitter==0.24.0 +tree-sitter-go==0.23.4 +mcp==1.9.0 diff --git a/simulate_merge.py b/simulate_merge.py new file mode 100644 index 0000000..6297071 --- /dev/null +++ b/simulate_merge.py @@ -0,0 +1,99 @@ +"""Simulate a merge: modify a file, regenerate its doc, mark downstream stale.""" + +import sys +import os +import time + +sys.path.insert(0, os.path.dirname(__file__)) + +from db import GraphDB +from docgen import generate_file_doc + +REPO_DIR = os.environ.get("REPO_DIR", os.path.join(os.path.dirname(__file__), "repos", "target")) + + +def simulate_merge(file_path: str, new_content: str = None): + db = GraphDB() + + print(f"\n{'='*60}") + print(f"Simulating merge: {file_path}") + print(f"{'='*60}") + + # Get current doc (before) + f = db.get_file(file_path) + if not f: + print(f"ERROR: File {file_path} not found in graph") + db.close() + return + + print(f"\n[BEFORE] Documentation for {file_path}:") + print(f" {f['documentation'][:300]}") + + # Read file content (or use provided) + if new_content is None: + full_path = os.path.join(REPO_DIR, file_path) + if os.path.exists(full_path): + with open(full_path) as fh: + original = fh.read() + new_content = original + """ + +// EnableTracing adds distributed request tracing with correlation IDs +// across the middleware chain. Each request gets a unique trace ID propagated +// through all handlers for end-to-end debugging in microservice architectures. +func (e *Echo) EnableTracing(correlationHeader string) { + e.tracingEnabled = true + e.correlationHeader = correlationHeader +} +""" + else: + print(f"ERROR: File not found on disk: {full_path}") + db.close() + return + + # Regenerate doc for changed file + print(f"\n[MERGE] Regenerating documentation for {file_path}...") + new_doc = generate_file_doc(file_path, new_content) + + commit_hash = f"sim-{int(time.time())}" + db.update_file_doc(file_path, new_doc, commit_hash) + + print(f"\n[AFTER] Documentation for {file_path}:") + print(f" {new_doc[:300]}") + + # Mark downstream stale + print(f"\n[CASCADE] Marking downstream as stale...") + db.mark_relationships_stale(file_path) + + repo = db.get_repo() + if repo: + db.mark_repo_stale(repo["name"]) + print(f" Repo '{repo['name']}' marked stale") + + stats = db.get_stats() + print(f" Stale relationships: {stats['stale_relationships']}") + + print(f"\n{'='*60}") + print("Merge simulation complete.") + print(f" File doc: REGENERATED (fresh)") + print(f" Relationships: STALE (awaiting refresh)") + print(f" Repo doc: STALE (awaiting refresh)") + print(f"\nRun: python refresh_stale.py") + print(f"Or ask Claude Code — stale docs show [STALE] indicator.") + print(f"{'='*60}") + + db.close() + + +if __name__ == "__main__": + if len(sys.argv) < 2: + print("Usage: python simulate_merge.py ") + print("Example: python simulate_merge.py echo.go") + sys.exit(1) + + target = sys.argv[1] + content = None + if len(sys.argv) > 2: + with open(sys.argv[2]) as f: + content = f.read() + + simulate_merge(target, content)