From af2e54b5f374969b17054e238b3ab0132128beab Mon Sep 17 00:00:00 2001
From: Jarvis Prime <jarvis@openclaw.local>
Date: Wed, 4 Mar 2026 04:25:14 +0000
Subject: [PATCH] Initial POC: Developer Intelligence knowledge graph

- SQLite backend with file/repo/relationship entities
- tree-sitter Go AST parser for deterministic import detection
- Ollama doc generation with concurrent batch processing
- MCP server (FastMCP) for Claude Code integration
- Merge simulation with staleness cascade
- Lazy refresh for stale relationship and repo docs
- CLAUDE.md for agent context
---
 .mcp.json         |   9 ++
 CLAUDE.md         |  38 ++++++++
 README.md         | 132 ++++++++++++++++++++++++++
 db.py             | 229 ++++++++++++++++++++++++++++++++++++++++++++++
 ingest.py         | 149 ++++++++++++++++++++++++++++++
 mcp_server.py     | 154 +++++++++++++++++++++++++++++++
 refresh_stale.py  |  75 +++++++++++++++
 requirements.txt  |   4 +
 simulate_merge.py |  99 ++++++++++++++++++++
 9 files changed, 889 insertions(+)
 create mode 100644 .mcp.json
 create mode 100644 CLAUDE.md
 create mode 100644 README.md
 create mode 100644 db.py
 create mode 100644 ingest.py
 create mode 100644 mcp_server.py
 create mode 100644 refresh_stale.py
 create mode 100644 requirements.txt
 create mode 100644 simulate_merge.py

diff --git a/.mcp.json b/.mcp.json
new file mode 100644
index 0000000..270681b
--- /dev/null
+++ b/.mcp.json
@@ -0,0 +1,9 @@
+{
+  "mcpServers": {
+    "dev-intel": {
+      "command": "python",
+      "args": ["mcp_server.py"],
+      "cwd": "."
+    }
+  }
+}
diff --git a/CLAUDE.md b/CLAUDE.md
new file mode 100644
index 0000000..ef3f273
--- /dev/null
+++ b/CLAUDE.md
@@ -0,0 +1,38 @@
+# Developer Intelligence Knowledge Graph
+
+This project contains a knowledge graph of LLM-generated documentation for a Go codebase. Instead of reading raw source files, query the knowledge graph via MCP tools.
+
+## How to use
+
+**Always prefer the knowledge graph over reading raw files.** The graph contains pre-generated English documentation for every file and relationship in the codebase.
+
+### Query patterns
+
+- "What does X do?" → `get_file_doc("path/to/file.go")`
+- "How do X and Y interact?" → `get_relationship("file_a.go", "file_b.go")`
+- "What's this project about?" → `get_repo_overview()`
+- "What depends on X?" → `get_dependents("path/to/file.go")`
+- "What does X depend on?" → `get_dependencies("path/to/file.go")`
+- "Find anything about routing" → `search_docs("routing")`
+- "What's outdated?" → `get_stale_docs()`
+- "How big is the graph?" → `get_graph_stats()`
+
+### Schema
+
+The knowledge graph has three entity types:
+
+- **File** — one per source file. Has `documentation` (English description of what the file does), `staleness` (fresh or stale), `prev_documentation` (previous version after a merge).
+- **Repo** — one per repository. Has `documentation` (project-level summary composed from file docs).
+- **Relationship** — import edges between files. Has `documentation` (how the two files interact), `staleness`.
+
+### Staleness
+
+When a file changes, its documentation is regenerated immediately. All downstream relationships and the repo summary are marked **stale** — meaning the code has changed but the doc hasn't been regenerated yet. Stale docs are still returned but flagged with `[STALE]`.
+
+### Tips
+
+- Start broad (`get_repo_overview`) then drill into specifics (`get_file_doc`, `get_dependents`).
+- Use `search_docs` for concept-level queries ("middleware", "authentication", "error handling").
+- If a doc says `[STALE]`, the underlying code changed since it was generated. Mention this to the user.
+- `get_dependents` is the impact analysis tool — "what breaks if I change this file?"
+- File paths are relative to the repo root (e.g., `echo.go`, `middleware/compress.go`).
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..cba7f60
--- /dev/null
+++ b/README.md
@@ -0,0 +1,132 @@
+# Developer Intelligence POC
+
+A local proof-of-concept that builds a living knowledge graph from a Go codebase. Every file gets LLM-generated documentation. Every relationship gets documented. Everything stays current on every merge. Query it from Claude Code via MCP.
+
+## Quick Start
+
+```bash
+cd dev-intel-poc
+pip install -r requirements.txt
+python ingest.py                # clone echo, parse, generate docs (~15-20 min)
+claude --mcp-config .mcp.json   # start Claude Code with the knowledge graph
+```
+
+Then ask Claude Code:
+- "How does routing work in echo?"
+- "What files depend on context.go?"
+- "Give me an overview of this project"
+
+## Prerequisites
+
+- Python 3.11+
+- Ollama running at `192.168.86.172:11434` with `qwen2.5:7b`
+- Claude Code CLI (`claude`)
+- git
+
+## Demo Walkthrough
+
+### Act 1: "Here's what your codebase knows about itself"
+
+After `python ingest.py` completes, start Claude Code:
+
+```bash
+claude --mcp-config .mcp.json
+```
+
+Ask it:
+```
+> What does echo.go do?
+> How does echo.go interact with router.go?
+> Give me an overview of the whole project
+> What files depend on context.go?
+> Search for anything related to "middleware"
+```
+
+Every answer comes from LLM-generated documentation stored in the knowledge graph — not from reading raw source code.
+
+### Act 2: "Now someone pushes a change"
+
+In another terminal:
+```bash
+python simulate_merge.py echo.go
+```
+
+This:
+1. Regenerates echo.go's documentation (reflects the new code)
+2. Marks all relationships involving echo.go as STALE
+3. Marks the repo summary as STALE
+
+Back in Claude Code:
+```
+> What does echo.go do?          # fresh doc — mentions the new tracing feature
+> What's the repo overview?       # shows [STALE] — knows it's outdated
+> Show me all stale docs          # lists everything that needs refresh
+```
+
+### Act 3: "The system heals itself"
+
+```bash
+python refresh_stale.py
+```
+
+Back in Claude Code:
+```
+> What's the repo overview?       # fresh again — rewritten to include new capabilities
+> Show me all stale docs          # "Everything is fresh!"
+```
+
+## Architecture
+
+```
+ingest.py ──→ repos/target/ (git clone)
+    │              │
+    │         parser.py (tree-sitter AST)
+    │              │
+    │         docgen.py (Ollama qwen2.5:7b)
+    │              │
+    └──────→ devintel.db (SQLite)
+                   │
+              mcp_server.py ──→ Claude Code
+```
+
+No Docker. No external databases. One SQLite file. One MCP server.
+
+## MCP Tools
+
+| Tool | What it does |
+|---|---|
+| `get_file_doc(path)` | Read a file's generated documentation |
+| `get_relationship(a, b)` | How two files interact |
+| `get_repo_overview()` | Project-level summary |
+| `get_dependents(path)` | What breaks if you change this file |
+| `get_dependencies(path)` | What this file depends on |
+| `search_docs(query)` | Keyword search across all docs |
+| `get_stale_docs()` | List outdated documentation |
+| `get_graph_stats()` | File count, relationship count, staleness |
+
+## Project Structure
+
+```
+dev-intel-poc/
+├── requirements.txt       # Python deps
+├── .mcp.json              # Claude Code MCP config
+├── ingest.py              # Main ingestion pipeline
+├── simulate_merge.py      # Simulate a code change
+├── refresh_stale.py       # Refresh stale docs
+├── db.py                  # SQLite backend
+├── parser.py              # tree-sitter Go AST parser
+├── docgen.py              # Ollama doc generation
+├── mcp_server.py          # MCP server for Claude Code
+└── devintel.db            # Generated — the knowledge graph
+```
+
+## Configuration
+
+| Env Variable | Default | Description |
+|---|---|---|
+| `OLLAMA_URL` | `http://192.168.86.172:11434` | Ollama endpoint |
+| `OLLAMA_MODEL` | `qwen2.5:7b` | Model for doc generation |
+| `TARGET_REPO` | `https://github.com/labstack/echo.git` | Repo to ingest |
+| `MAX_CONCURRENT` | `4` | Parallel Ollama requests |
+| `DEVINTEL_DB` | `./devintel.db` | SQLite database path |
+| `REPO_DIR` | `./repos/target` | Cloned repo location |
diff --git a/db.py b/db.py
new file mode 100644
index 0000000..190dbfe
--- /dev/null
+++ b/db.py
@@ -0,0 +1,229 @@
+"""SQLite backend for Developer Intelligence POC."""
+
+import sqlite3
+import os
+from datetime import datetime
+
+DB_PATH = os.environ.get("DEVINTEL_DB", os.path.join(os.path.dirname(__file__), "devintel.db"))
+
+
+def get_db() -> sqlite3.Connection:
+    conn = sqlite3.connect(DB_PATH)
+    conn.row_factory = sqlite3.Row
+    conn.execute("PRAGMA journal_mode=WAL")
+    conn.execute("PRAGMA foreign_keys=ON")
+    return conn
+
+
+def init_db():
+    """Create tables if they don't exist."""
+    conn = get_db()
+    conn.executescript("""
+        CREATE TABLE IF NOT EXISTS repos (
+            name TEXT PRIMARY KEY,
+            url TEXT,
+            language TEXT,
+            documentation TEXT,
+            staleness TEXT DEFAULT 'fresh',
+            updated_at TEXT
+        );
+
+        CREATE TABLE IF NOT EXISTS files (
+            path TEXT PRIMARY KEY,
+            repo TEXT REFERENCES repos(name),
+            language TEXT,
+            documentation TEXT,
+            prev_documentation TEXT,
+            functions TEXT,  -- JSON array
+            last_commit TEXT,
+            staleness TEXT DEFAULT 'fresh',
+            updated_at TEXT
+        );
+
+        CREATE TABLE IF NOT EXISTS relationships (
+            from_file TEXT REFERENCES files(path),
+            to_file TEXT REFERENCES files(path),
+            rel_type TEXT DEFAULT 'IMPORTS',
+            documentation TEXT,
+            staleness TEXT DEFAULT 'fresh',
+            updated_at TEXT,
+            PRIMARY KEY (from_file, to_file, rel_type)
+        );
+
+        CREATE INDEX IF NOT EXISTS idx_files_repo ON files(repo);
+        CREATE INDEX IF NOT EXISTS idx_files_staleness ON files(staleness);
+        CREATE INDEX IF NOT EXISTS idx_rels_staleness ON relationships(staleness);
+        CREATE INDEX IF NOT EXISTS idx_rels_to ON relationships(to_file);
+    """)
+    conn.commit()
+    conn.close()
+
+
+class GraphDB:
+    def __init__(self):
+        init_db()
+        self.conn = get_db()
+
+    def create_repo(self, name: str, url: str, language: str, documentation: str):
+        self.conn.execute(
+            """INSERT INTO repos (name, url, language, documentation, staleness, updated_at)
+               VALUES (?, ?, ?, ?, 'fresh', ?)
+               ON CONFLICT(name) DO UPDATE SET
+                 url=excluded.url, language=excluded.language,
+                 documentation=excluded.documentation, staleness='fresh',
+                 updated_at=excluded.updated_at""",
+            (name, url, language, documentation, datetime.utcnow().isoformat()),
+        )
+        self.conn.commit()
+
+    def create_file(self, path: str, repo: str, language: str, documentation: str,
+                    functions: list[str], commit: str = "initial"):
+        import json
+        self.conn.execute(
+            """INSERT INTO files (path, repo, language, documentation, functions, last_commit, staleness, updated_at)
+               VALUES (?, ?, ?, ?, ?, ?, 'fresh', ?)
+               ON CONFLICT(path) DO UPDATE SET
+                 repo=excluded.repo, language=excluded.language,
+                 documentation=excluded.documentation, functions=excluded.functions,
+                 last_commit=excluded.last_commit, staleness='fresh',
+                 updated_at=excluded.updated_at""",
+            (path, repo, language, documentation, json.dumps(functions), commit,
+             datetime.utcnow().isoformat()),
+        )
+        self.conn.commit()
+
+    def create_relationship(self, from_file: str, to_file: str, rel_type: str = "IMPORTS",
+                            documentation: str = ""):
+        self.conn.execute(
+            """INSERT INTO relationships (from_file, to_file, rel_type, documentation, staleness, updated_at)
+               VALUES (?, ?, ?, ?, 'fresh', ?)
+               ON CONFLICT(from_file, to_file, rel_type) DO UPDATE SET
+                 documentation=excluded.documentation, staleness=excluded.staleness,
+                 updated_at=excluded.updated_at""",
+            (from_file, to_file, rel_type, documentation, datetime.utcnow().isoformat()),
+        )
+        self.conn.commit()
+
+    def mark_relationships_stale(self, file_path: str):
+        now = datetime.utcnow().isoformat()
+        self.conn.execute(
+            "UPDATE relationships SET staleness='stale', updated_at=? WHERE from_file=? OR to_file=?",
+            (now, file_path, file_path),
+        )
+        self.conn.commit()
+
+    def mark_repo_stale(self, repo: str):
+        self.conn.execute(
+            "UPDATE repos SET staleness='stale', updated_at=? WHERE name=?",
+            (datetime.utcnow().isoformat(), repo),
+        )
+        self.conn.commit()
+
+    def update_file_doc(self, path: str, documentation: str, commit: str):
+        self.conn.execute(
+            """UPDATE files SET prev_documentation=documentation,
+                 documentation=?, staleness='fresh', last_commit=?, updated_at=?
+               WHERE path=?""",
+            (documentation, commit, datetime.utcnow().isoformat(), path),
+        )
+        self.conn.commit()
+
+    def get_file(self, path: str) -> dict | None:
+        row = self.conn.execute("SELECT * FROM files WHERE path=?", (path,)).fetchone()
+        return dict(row) if row else None
+
+    def get_repo(self, name: str = None) -> dict | None:
+        if name:
+            row = self.conn.execute("SELECT * FROM repos WHERE name=?", (name,)).fetchone()
+        else:
+            row = self.conn.execute("SELECT * FROM repos LIMIT 1").fetchone()
+        return dict(row) if row else None
+
+    def get_dependents(self, path: str) -> list[dict]:
+        rows = self.conn.execute(
+            """SELECT r.from_file, r.documentation AS rel_doc, r.staleness AS rel_staleness,
+                      f.documentation AS file_doc
+               FROM relationships r
+               JOIN files f ON f.path = r.from_file
+               WHERE r.to_file = ?
+               ORDER BY r.from_file""",
+            (path,),
+        ).fetchall()
+        return [dict(r) for r in rows]
+
+    def get_dependencies(self, path: str) -> list[dict]:
+        rows = self.conn.execute(
+            """SELECT r.to_file, r.documentation AS rel_doc, r.staleness AS rel_staleness,
+                      f.documentation AS file_doc
+               FROM relationships r
+               JOIN files f ON f.path = r.to_file
+               WHERE r.from_file = ?
+               ORDER BY r.to_file""",
+            (path,),
+        ).fetchall()
+        return [dict(r) for r in rows]
+
+    def get_relationship(self, from_file: str, to_file: str) -> dict | None:
+        row = self.conn.execute(
+            "SELECT * FROM relationships WHERE from_file=? AND to_file=?",
+            (from_file, to_file),
+        ).fetchone()
+        return dict(row) if row else None
+
+    def search_docs(self, query: str, limit: int = 10) -> list[dict]:
+        rows = self.conn.execute(
+            """SELECT path, documentation, staleness FROM files
+               WHERE LOWER(documentation) LIKE LOWER(?)
+               ORDER BY path LIMIT ?""",
+            (f"%{query}%", limit),
+        ).fetchall()
+        return [dict(r) for r in rows]
+
+    def get_stale_relationships(self) -> list[dict]:
+        rows = self.conn.execute(
+            """SELECT r.from_file, r.to_file,
+                      f1.documentation AS from_doc, f2.documentation AS to_doc
+               FROM relationships r
+               JOIN files f1 ON f1.path = r.from_file
+               JOIN files f2 ON f2.path = r.to_file
+               WHERE r.staleness = 'stale'"""
+        ).fetchall()
+        return [dict(r) for r in rows]
+
+    def get_stale_repos(self) -> list[dict]:
+        rows = self.conn.execute(
+            "SELECT name, url FROM repos WHERE staleness='stale'"
+        ).fetchall()
+        return [dict(r) for r in rows]
+
+    def get_repo_files_docs(self, repo: str) -> list[tuple[str, str]]:
+        rows = self.conn.execute(
+            "SELECT path, documentation FROM files WHERE repo=? ORDER BY path",
+            (repo,),
+        ).fetchall()
+        return [(r["path"], r["documentation"]) for r in rows]
+
+    def update_relationship_doc(self, from_file: str, to_file: str, documentation: str):
+        self.conn.execute(
+            """UPDATE relationships SET documentation=?, staleness='fresh', updated_at=?
+               WHERE from_file=? AND to_file=?""",
+            (documentation, datetime.utcnow().isoformat(), from_file, to_file),
+        )
+        self.conn.commit()
+
+    def update_repo_doc(self, name: str, documentation: str):
+        self.conn.execute(
+            "UPDATE repos SET documentation=?, staleness='fresh', updated_at=? WHERE name=?",
+            (documentation, datetime.utcnow().isoformat(), name),
+        )
+        self.conn.commit()
+
+    def get_stats(self) -> dict:
+        files = self.conn.execute("SELECT count(*) AS c FROM files").fetchone()["c"]
+        rels = self.conn.execute("SELECT count(*) AS c FROM relationships").fetchone()["c"]
+        stale_f = self.conn.execute("SELECT count(*) AS c FROM files WHERE staleness='stale'").fetchone()["c"]
+        stale_r = self.conn.execute("SELECT count(*) AS c FROM relationships WHERE staleness='stale'").fetchone()["c"]
+        return {"files": files, "relationships": rels, "stale_files": stale_f, "stale_relationships": stale_r}
+
+    def close(self):
+        self.conn.close()
diff --git a/ingest.py b/ingest.py
new file mode 100644
index 0000000..d194af9
--- /dev/null
+++ b/ingest.py
@@ -0,0 +1,149 @@
+"""Main ingestion script. Clone repo, parse, generate docs, load SQLite."""
+
+import os
+import subprocess
+import time
+import json
+from pathlib import Path
+
+from parser import parse_go_file, filter_imports, get_repo_module, resolve_import_to_file
+from docgen import generate_file_doc, generate_repo_doc, generate_docs_batch
+from db import GraphDB
+
+TARGET_REPO = os.environ.get("TARGET_REPO", "https://github.com/labstack/echo.git")
+REPO_DIR = os.environ.get("REPO_DIR", os.path.join(os.path.dirname(__file__), "repos", "target"))
+
+
+def clone_repo():
+    if Path(REPO_DIR).exists() and (Path(REPO_DIR) / ".git").exists():
+        print(f"Repo already cloned at {REPO_DIR}, pulling latest...")
+        subprocess.run(["git", "-C", REPO_DIR, "pull"], check=True)
+    else:
+        print(f"Cloning {TARGET_REPO}...")
+        Path(REPO_DIR).parent.mkdir(parents=True, exist_ok=True)
+        subprocess.run(["git", "clone", "--depth=1", TARGET_REPO, REPO_DIR], check=True)
+
+    result = subprocess.run(
+        ["git", "-C", REPO_DIR, "rev-parse", "HEAD"],
+        capture_output=True, text=True, check=True,
+    )
+    return result.stdout.strip()
+
+
+def discover_go_files() -> dict[str, str]:
+    files = {}
+    repo = Path(REPO_DIR)
+    for gofile in repo.rglob("*.go"):
+        rel = str(gofile.relative_to(repo))
+        if "vendor/" in rel or "_test.go" in rel:
+            continue
+        try:
+            files[rel] = gofile.read_text(errors="replace")
+        except Exception as e:
+            print(f"  Skipping {rel}: {e}")
+    return files
+
+
+def run():
+    start = time.time()
+    print("=" * 60)
+    print("Developer Intelligence POC — Ingestion")
+    print("=" * 60)
+
+    # Step 1: Clone
+    print("\n[1/6] Cloning repository...")
+    commit = clone_repo()
+    repo_name = TARGET_REPO.rstrip("/").split("/")[-1].replace(".git", "")
+    print(f"  Repo: {repo_name}, commit: {commit[:8]}")
+
+    # Step 2: Discover files
+    print("\n[2/6] Discovering Go files...")
+    go_files = discover_go_files()
+    print(f"  Found {len(go_files)} Go files (excluding tests and vendor)")
+
+    # Step 3: Parse AST
+    print("\n[3/6] Parsing AST (tree-sitter)...")
+    repo_module = get_repo_module(REPO_DIR)
+    print(f"  Module: {repo_module}")
+
+    parsed = {}
+    all_imports = {}
+    for rel_path, content in go_files.items():
+        info = parse_go_file(rel_path, content, repo_module)
+        parsed[rel_path] = info
+        filtered = filter_imports(info.imports, repo_module)
+        all_imports[rel_path] = filtered
+
+    total_imports = sum(len(v) for v in all_imports.values())
+    first_party = sum(1 for v in all_imports.values() for i in v if i.startswith(repo_module))
+    print(f"  Parsed {len(parsed)} files, {total_imports} filtered imports ({first_party} first-party)")
+
+    # Step 4: Generate file docs
+    print("\n[4/6] Generating file documentation (Ollama)...")
+    file_items = [(path, content) for path, content in go_files.items()]
+    file_docs = generate_docs_batch(file_items, generate_file_doc)
+    file_doc_map = {file_items[i][0]: file_docs[i] for i in range(len(file_items))}
+
+    # Step 5: Load into SQLite
+    print("\n[5/6] Loading into database...")
+    db = GraphDB()
+
+    for rel_path, content in go_files.items():
+        info = parsed[rel_path]
+        doc = file_doc_map.get(rel_path, "")
+        db.create_file(
+            path=rel_path,
+            repo=repo_name,
+            language="go",
+            documentation=doc,
+            functions=info.functions,
+            commit=commit[:8],
+        )
+
+    edges_created = 0
+    for from_file, imports in all_imports.items():
+        for imp in imports:
+            if not imp.startswith(repo_module):
+                continue
+            target_dir = resolve_import_to_file(imp, repo_module, go_files)
+            if target_dir:
+                # Find actual files in that directory
+                for fpath in go_files:
+                    fdir = str(Path(fpath).parent)
+                    if fdir == target_dir or fdir.endswith("/" + target_dir):
+                        db.create_relationship(from_file, fpath)
+                        edges_created += 1
+
+    print(f"  Created {len(go_files)} file nodes, {edges_created} import edges")
+
+    # Step 6: Generate repo-level doc
+    print("\n[6/6] Generating repo-level documentation...")
+    readme_path = Path(REPO_DIR) / "README.md"
+    readme = readme_path.read_text(errors="replace") if readme_path.exists() else ""
+
+    entry_candidates = ["echo.go", "router.go", "context.go", "group.go", "middleware.go"]
+    entry_files = []
+    for candidate in entry_candidates:
+        for path, content in go_files.items():
+            if path.endswith(candidate):
+                entry_files.append((path, content))
+                break
+
+    repo_doc = generate_repo_doc(readme, entry_files)
+    db.create_repo(name=repo_name, url=TARGET_REPO, language="go", documentation=repo_doc)
+
+    stats = db.get_stats()
+    elapsed = time.time() - start
+    print("\n" + "=" * 60)
+    print("Ingestion complete!")
+    print(f"  Files:         {stats['files']}")
+    print(f"  Relationships: {stats['relationships']}")
+    print(f"  Time:          {elapsed:.1f}s ({elapsed/60:.1f}m)")
+    print(f"  Database:      {os.path.abspath(db.conn.execute('PRAGMA database_list').fetchone()[2])}")
+    print("=" * 60)
+
+    db.close()
+
+
+if __name__ == "__main__":
+    run()
diff --git a/mcp_server.py b/mcp_server.py
new file mode 100644
index 0000000..221fe80
--- /dev/null
+++ b/mcp_server.py
@@ -0,0 +1,154 @@
+"""MCP server for Developer Intelligence POC. Queries SQLite, serves to Claude Code."""
+
+import os
+import sys
+import json
+
+sys.path.insert(0, os.path.dirname(__file__))
+
+from db import GraphDB
+from mcp.server.fastmcp import FastMCP
+
+mcp = FastMCP("Developer Intelligence")
+
+
+def _get_db():
+    return GraphDB()
+
+
+@mcp.tool()
+def get_file_doc(path: str) -> str:
+    """Get the generated documentation for a source file. Pass the relative file path (e.g. 'echo.go' or 'middleware/compress.go')."""
+    db = _get_db()
+    f = db.get_file(path)
+    db.close()
+    if not f:
+        return f"File not found: {path}"
+    staleness = " [STALE]" if f["staleness"] == "stale" else ""
+    prev = ""
+    if f.get("prev_documentation"):
+        prev = f"\n\n--- Previous version ---\n{f['prev_documentation']}"
+    return f"{f['documentation']}{staleness}\n\n(commit: {f['last_commit']}, updated: {f['updated_at']}){prev}"
+
+
+@mcp.tool()
+def get_relationship(file_a: str, file_b: str) -> str:
+    """Get the documentation for the import relationship between two files."""
+    db = _get_db()
+    rel = db.get_relationship(file_a, file_b)
+    db.close()
+    if not rel:
+        return f"No relationship found between {file_a} and {file_b}"
+    doc = rel["documentation"] or "(no relationship documentation generated yet)"
+    staleness = " [STALE]" if rel["staleness"] == "stale" else ""
+    return f"{doc}{staleness}"
+
+
+@mcp.tool()
+def get_repo_overview() -> str:
+    """Get the repo-level documentation summary — a high-level overview of the entire project."""
+    db = _get_db()
+    repo = db.get_repo()
+    db.close()
+    if not repo:
+        return "No repo found"
+    staleness = " [STALE]" if repo["staleness"] == "stale" else ""
+    return f"# {repo['name']}{staleness}\n\n{repo['documentation']}"
+
+
+@mcp.tool()
+def get_dependents(path: str) -> str:
+    """Get all files that import/depend on the given file. Shows what breaks if you change this file."""
+    db = _get_db()
+    deps = db.get_dependents(path)
+    db.close()
+    if not deps:
+        return f"No files depend on {path}"
+    lines = [f"Files that depend on {path} ({len(deps)} total):\n"]
+    for d in deps:
+        staleness = " [STALE]" if d["rel_staleness"] == "stale" else ""
+        doc = d["rel_doc"] or "(no relationship doc)"
+        lines.append(f"  {d['from_file']}{staleness}")
+        lines.append(f"    Relationship: {doc}")
+        lines.append(f"    File: {d['file_doc'][:150]}...")
+    return "\n".join(lines)
+
+
+@mcp.tool()
+def get_dependencies(path: str) -> str:
+    """Get all files that the given file imports/depends on."""
+    db = _get_db()
+    deps = db.get_dependencies(path)
+    db.close()
+    if not deps:
+        return f"{path} has no tracked dependencies"
+    lines = [f"Dependencies of {path} ({len(deps)} total):\n"]
+    for d in deps:
+        staleness = " [STALE]" if d["rel_staleness"] == "stale" else ""
+        doc = d["rel_doc"] or "(no relationship doc)"
+        lines.append(f"  {d['to_file']}{staleness}")
+        lines.append(f"    Relationship: {doc}")
+    return "\n".join(lines)
+
+
+@mcp.tool()
+def search_docs(query: str) -> str:
+    """Search across all file documentation by keyword. Use to find files related to a concept (e.g. 'routing', 'middleware', 'authentication')."""
+    db = _get_db()
+    results = db.search_docs(query)
+    db.close()
+    if not results:
+        return f"No files found matching '{query}'"
+    lines = [f"Files matching '{query}' ({len(results)} results):\n"]
+    for r in results:
+        staleness = " [STALE]" if r["staleness"] == "stale" else ""
+        doc = r["documentation"][:200] + "..." if len(r["documentation"]) > 200 else r["documentation"]
+        lines.append(f"  {r['path']}{staleness}: {doc}")
+    return "\n".join(lines)
+
+
+@mcp.tool()
+def get_stale_docs() -> str:
+    """List all entities and relationships with stale (outdated) documentation."""
+    db = _get_db()
+    stale_rels = db.get_stale_relationships()
+    stale_repos = db.get_stale_repos()
+    stats = db.get_stats()
+    db.close()
+
+    lines = ["Stale documentation:\n"]
+    if stale_repos:
+        lines.append(f"  Repos ({len(stale_repos)}):")
+        for r in stale_repos:
+            lines.append(f"    {r['name']}")
+    lines.append(f"  Files: {stats['stale_files']} stale")
+    if stale_rels:
+        lines.append(f"  Relationships ({len(stale_rels)}):")
+        for r in stale_rels[:20]:  # Cap output
+            lines.append(f"    {r['from_file']} -> {r['to_file']}")
+        if len(stale_rels) > 20:
+            lines.append(f"    ... and {len(stale_rels) - 20} more")
+    if stats["stale_files"] == 0 and stats["stale_relationships"] == 0:
+        lines.append("  Everything is fresh!")
+    return "\n".join(lines)
+
+
+@mcp.tool()
+def get_graph_stats() -> str:
+    """Get overall knowledge graph statistics — file count, relationship count, staleness."""
+    db = _get_db()
+    stats = db.get_stats()
+    repo = db.get_repo()
+    db.close()
+    return json.dumps({
+        "repo": repo["name"] if repo else None,
+        "files": stats["files"],
+        "relationships": stats["relationships"],
+        "stale_files": stats["stale_files"],
+        "stale_relationships": stats["stale_relationships"],
+    }, indent=2)
+
+
+if __name__ == "__main__":
+    print("Starting Developer Intelligence MCP Server (stdio)...")
+    mcp.run(transport="stdio")
diff --git a/refresh_stale.py b/refresh_stale.py
new file mode 100644
index 0000000..9d02706
--- /dev/null
+++ b/refresh_stale.py
@@ -0,0 +1,75 @@
+"""Refresh all stale documentation — relationships and repo summaries."""
+
+import sys
+import os
+
+sys.path.insert(0, os.path.dirname(__file__))
+
+from db import GraphDB
+from docgen import generate_relationship_doc, generate_repo_doc
+
+REPO_DIR = os.environ.get("REPO_DIR", os.path.join(os.path.dirname(__file__), "repos", "target"))
+
+
+def refresh_stale():
+    db = GraphDB()
+
+    print(f"\n{'='*60}")
+    print("Refreshing stale documentation")
+    print(f"{'='*60}")
+
+    stats_before = db.get_stats()
+    print(f"\nBefore: {stats_before['stale_relationships']} stale relationships")
+
+    # Refresh stale relationship docs
+    stale_rels = db.get_stale_relationships()
+    if stale_rels:
+        print(f"\n[1/2] Regenerating {len(stale_rels)} stale relationship docs...")
+        for i, rel in enumerate(stale_rels):
+            doc = generate_relationship_doc(
+                rel["from_file"], rel["from_doc"] or "",
+                rel["to_file"], rel["to_doc"] or "",
+            )
+            db.update_relationship_doc(rel["from_file"], rel["to_file"], doc)
+            if (i + 1) % 5 == 0 or (i + 1) == len(stale_rels):
+                print(f"  Refreshed {i+1}/{len(stale_rels)}")
+    else:
+        print("\n[1/2] No stale relationships.")
+
+    # Refresh stale repo docs
+    stale_repos = db.get_stale_repos()
+    if stale_repos:
+        print(f"\n[2/2] Regenerating {len(stale_repos)} stale repo docs...")
+        for repo in stale_repos:
+            file_docs = db.get_repo_files_docs(repo["name"])
+            priority_names = ["echo.go", "router.go", "context.go", "group.go", "main.go", "server.go"]
+            entry_files = []
+            for name in priority_names:
+                for path, doc in file_docs:
+                    if path.endswith(name) and doc:
+                        entry_files.append((path, doc))
+                        break
+
+            readme_path = os.path.join(REPO_DIR, "README.md")
+            readme = ""
+            if os.path.exists(readme_path):
+                with open(readme_path) as f:
+                    readme = f.read()
+
+            repo_doc = generate_repo_doc(readme, entry_files)
+            db.update_repo_doc(repo["name"], repo_doc)
+            print(f"  Refreshed repo: {repo['name']}")
+    else:
+        print("\n[2/2] No stale repos.")
+
+    stats_after = db.get_stats()
+    print(f"\n{'='*60}")
+    print("Refresh complete!")
+    print(f"  Stale rels:  {stats_before['stale_relationships']} → {stats_after['stale_relationships']}")
+    print(f"{'='*60}")
+
+    db.close()
+
+
+if __name__ == "__main__":
+    refresh_stale()
diff --git a/requirements.txt b/requirements.txt
new file mode 100644
index 0000000..fd7d81f
--- /dev/null
+++ b/requirements.txt
@@ -0,0 +1,4 @@
+requests==2.32.3
+tree-sitter==0.24.0
+tree-sitter-go==0.23.4
+mcp==1.9.0
diff --git a/simulate_merge.py b/simulate_merge.py
new file mode 100644
index 0000000..6297071
--- /dev/null
+++ b/simulate_merge.py
@@ -0,0 +1,99 @@
+"""Simulate a merge: modify a file, regenerate its doc, mark downstream stale."""
+
+import sys
+import os
+import time
+
+sys.path.insert(0, os.path.dirname(__file__))
+
+from db import GraphDB
+from docgen import generate_file_doc
+
+REPO_DIR = os.environ.get("REPO_DIR", os.path.join(os.path.dirname(__file__), "repos", "target"))
+
+
+def simulate_merge(file_path: str, new_content: str = None):
+    db = GraphDB()
+
+    print(f"\n{'='*60}")
+    print(f"Simulating merge: {file_path}")
+    print(f"{'='*60}")
+
+    # Get current doc (before)
+    f = db.get_file(file_path)
+    if not f:
+        print(f"ERROR: File {file_path} not found in graph")
+        db.close()
+        return
+
+    print(f"\n[BEFORE] Documentation for {file_path}:")
+    print(f"  {f['documentation'][:300]}")
+
+    # Read file content (or use provided)
+    if new_content is None:
+        full_path = os.path.join(REPO_DIR, file_path)
+        if os.path.exists(full_path):
+            with open(full_path) as fh:
+                original = fh.read()
+            new_content = original + """
+
+// EnableTracing adds distributed request tracing with correlation IDs
+// across the middleware chain. Each request gets a unique trace ID propagated
+// through all handlers for end-to-end debugging in microservice architectures.
+func (e *Echo) EnableTracing(correlationHeader string) {
+    e.tracingEnabled = true
+    e.correlationHeader = correlationHeader
+}
+"""
+        else:
+            print(f"ERROR: File not found on disk: {full_path}")
+            db.close()
+            return
+
+    # Regenerate doc for changed file
+    print(f"\n[MERGE] Regenerating documentation for {file_path}...")
+    new_doc = generate_file_doc(file_path, new_content)
+
+    commit_hash = f"sim-{int(time.time())}"
+    db.update_file_doc(file_path, new_doc, commit_hash)
+
+    print(f"\n[AFTER] Documentation for {file_path}:")
+    print(f"  {new_doc[:300]}")
+
+    # Mark downstream stale
+    print(f"\n[CASCADE] Marking downstream as stale...")
+    db.mark_relationships_stale(file_path)
+
+    repo = db.get_repo()
+    if repo:
+        db.mark_repo_stale(repo["name"])
+        print(f"  Repo '{repo['name']}' marked stale")
+
+    stats = db.get_stats()
+    print(f"  Stale relationships: {stats['stale_relationships']}")
+
+    print(f"\n{'='*60}")
+    print("Merge simulation complete.")
+    print(f"  File doc:       REGENERATED (fresh)")
+    print(f"  Relationships:  STALE (awaiting refresh)")
+    print(f"  Repo doc:       STALE (awaiting refresh)")
+    print(f"\nRun: python refresh_stale.py")
+    print(f"Or ask Claude Code — stale docs show [STALE] indicator.")
+    print(f"{'='*60}")
+
+    db.close()
+
+
+if __name__ == "__main__":
+    if len(sys.argv) < 2:
+        print("Usage: python simulate_merge.py <file_path>")
+        print("Example: python simulate_merge.py echo.go")
+        sys.exit(1)
+
+    target = sys.argv[1]
+    content = None
+    if len(sys.argv) > 2:
+        with open(sys.argv[2]) as f:
+            content = f.read()
+
+    simulate_merge(target, content)