diff --git a/.mcp.json b/.mcp.json index 270681b..720ab77 100644 --- a/.mcp.json +++ b/.mcp.json @@ -1,8 +1,8 @@ { "mcpServers": { "dev-intel": { - "command": "python", - "args": ["mcp_server.py"], + "command": "uv", + "args": ["run", "python", "mcp_server.py"], "cwd": "." } } diff --git a/README.md b/README.md index cba7f60..fed62f7 100644 --- a/README.md +++ b/README.md @@ -6,8 +6,8 @@ A local proof-of-concept that builds a living knowledge graph from a Go codebase ```bash cd dev-intel-poc -pip install -r requirements.txt -python ingest.py # clone echo, parse, generate docs (~15-20 min) +uv sync # install deps +uv run python ingest.py # clone echo, parse, generate docs (~15-20 min) claude --mcp-config .mcp.json # start Claude Code with the knowledge graph ``` @@ -19,6 +19,7 @@ Then ask Claude Code: ## Prerequisites - Python 3.11+ +- [uv](https://docs.astral.sh/uv/) (`curl -LsSf https://astral.sh/uv/install.sh | sh`) - Ollama running at `192.168.86.172:11434` with `qwen2.5:7b` - Claude Code CLI (`claude`) - git diff --git a/docgen.py b/docgen.py new file mode 100644 index 0000000..1eb5907 --- /dev/null +++ b/docgen.py @@ -0,0 +1,139 @@ +"""Ollama client for generating documentation.""" + +import requests +import os +import concurrent.futures +import time + +OLLAMA_URL = os.environ.get("OLLAMA_URL", "http://192.168.86.172:11434") +OLLAMA_MODEL = os.environ.get("OLLAMA_MODEL", "qwen2.5:7b") +MAX_CONCURRENT = int(os.environ.get("MAX_CONCURRENT", "4")) + + +def generate_file_doc(filepath: str, content: str) -> str: + """Generate documentation for a single file.""" + if len(content) > 8000: + content = content[:8000] + "\n\n... [truncated]" + + prompt = f"""You are a senior software engineer documenting a Go codebase. + +Describe what this file does in 2-4 sentences. Be specific about: +- The domain logic and purpose (not just "this file contains functions") +- Key types, interfaces, or structs defined +- How it fits into the larger system (if apparent from imports/naming) + +Do NOT describe Go syntax or language mechanics. Describe WHAT the code does and WHY. + +File: {filepath} + +```go +{content} +``` + +Documentation:""" + + return _call_ollama(prompt) + + +def generate_relationship_doc(file_a: str, content_a: str, file_b: str, content_b: str) -> str: + """Generate documentation for a relationship between two files.""" + if len(content_a) > 4000: + content_a = content_a[:4000] + "\n... [truncated]" + if len(content_b) > 4000: + content_b = content_b[:4000] + "\n... [truncated]" + + prompt = f"""You are a senior software engineer documenting how two files in a Go codebase interact. + +Describe in 1-2 sentences how File A uses or depends on File B. Be specific about which types, functions, or interfaces are shared. + +File A: {file_a} +```go +{content_a} +``` + +File B: {file_b} +```go +{content_b} +``` + +Relationship:""" + + return _call_ollama(prompt) + + +def generate_repo_doc(readme: str, entry_files: list[tuple[str, str]]) -> str: + """Generate repo-level documentation from README and key entry points.""" + files_section = "" + for path, content in entry_files[:5]: + snippet = content[:2000] if len(content) > 2000 else content + files_section += f"\n--- {path} ---\n{snippet}\n" + + readme_section = readme[:3000] if len(readme) > 3000 else readme + + prompt = f"""You are a senior software engineer writing a project overview. + +Based on the README and key source files below, write a 4-6 sentence summary of this project. Cover: +- What the project does (its purpose) +- Key architectural patterns (routing, middleware, etc.) +- The main abstractions and how they fit together + +README: +{readme_section} + +Key source files: +{files_section} + +Project Overview:""" + + return _call_ollama(prompt) + + +def generate_docs_batch(items: list[tuple[str, str]], doc_fn) -> list[str]: + """Generate docs for multiple items concurrently.""" + results = [None] * len(items) + + with concurrent.futures.ThreadPoolExecutor(max_workers=MAX_CONCURRENT) as executor: + future_to_idx = {} + for i, (filepath, content) in enumerate(items): + future = executor.submit(doc_fn, filepath, content) + future_to_idx[future] = i + + done = 0 + total = len(items) + for future in concurrent.futures.as_completed(future_to_idx): + idx = future_to_idx[future] + try: + results[idx] = future.result() + except Exception as e: + results[idx] = f"[doc generation failed: {e}]" + done += 1 + if done % 10 == 0 or done == total: + print(f" Generated {done}/{total} docs") + + return results + + +def _call_ollama(prompt: str, retries: int = 3) -> str: + """Call Ollama API with retries.""" + for attempt in range(retries): + try: + resp = requests.post( + f"{OLLAMA_URL}/api/generate", + json={ + "model": OLLAMA_MODEL, + "prompt": prompt, + "stream": False, + "options": { + "temperature": 0.3, + "num_predict": 256, + }, + }, + timeout=120, + ) + resp.raise_for_status() + return resp.json()["response"].strip() + except Exception as e: + if attempt < retries - 1: + time.sleep(2 ** attempt) + continue + return f"[doc generation failed after {retries} attempts: {e}]" diff --git a/go_parser.py b/go_parser.py new file mode 100644 index 0000000..98b3205 --- /dev/null +++ b/go_parser.py @@ -0,0 +1,104 @@ +"""Go AST parser using tree-sitter. Extracts imports and function calls.""" + +import tree_sitter_go as tsgo +from tree_sitter import Language, Parser +from pathlib import Path +from dataclasses import dataclass, field + +GO_LANGUAGE = Language(tsgo.language()) + +# stdlib packages to filter out +GO_STDLIB = { + "fmt", "os", "io", "log", "net", "http", "context", "sync", "time", + "strings", "strconv", "bytes", "errors", "sort", "math", "path", + "encoding", "crypto", "reflect", "testing", "flag", "regexp", + "bufio", "archive", "compress", "container", "database", "debug", + "embed", "go", "hash", "html", "image", "index", "internal", + "mime", "plugin", "runtime", "syscall", "text", "unicode", "unsafe", + "encoding/json", "encoding/xml", "encoding/base64", "encoding/binary", + "encoding/csv", "encoding/gob", "encoding/hex", "encoding/pem", + "net/http", "net/url", "net/http/httptest", "io/ioutil", "io/fs", + "os/exec", "os/signal", "path/filepath", "sync/atomic", + "crypto/tls", "crypto/rand", "crypto/sha256", "crypto/hmac", + "log/slog", "testing/fstest", +} + + +@dataclass +class FileInfo: + path: str + content: str + imports: list[str] = field(default_factory=list) + functions: list[str] = field(default_factory=list) + + +def parse_go_file(filepath: str, content: str, repo_module: str) -> FileInfo: + """Parse a Go file and extract imports and exported functions.""" + parser = Parser(GO_LANGUAGE) + tree = parser.parse(content.encode()) + root = tree.root_node + + info = FileInfo(path=filepath, content=content) + + for node in _find_nodes(root, "import_declaration"): + for spec in _find_nodes(node, "import_spec"): + path_node = spec.child_by_field_name("path") + if path_node: + import_path = path_node.text.decode().strip('"') + info.imports.append(import_path) + + for node in _find_nodes(root, "function_declaration"): + name_node = node.child_by_field_name("name") + if name_node: + info.functions.append(name_node.text.decode()) + + for node in _find_nodes(root, "method_declaration"): + name_node = node.child_by_field_name("name") + if name_node: + info.functions.append(name_node.text.decode()) + + return info + + +def filter_imports(imports: list[str], repo_module: str) -> list[str]: + """Keep only first-party imports (same module) and significant third-party.""" + result = [] + for imp in imports: + top = imp.split("/")[0] + if imp in GO_STDLIB or top in GO_STDLIB: + continue + if imp.startswith(repo_module): + result.append(imp) + elif "." in top: + result.append(imp) + return result + + +def get_repo_module(repo_path: str) -> str: + """Read the module path from go.mod.""" + gomod = Path(repo_path) / "go.mod" + if gomod.exists(): + for line in gomod.read_text().splitlines(): + if line.startswith("module "): + return line.split("module ", 1)[1].strip() + return "" + + +def resolve_import_to_file(import_path: str, repo_module: str, go_files: dict[str, str]) -> str | None: + """Try to resolve an import path to a directory in the repo.""" + if not import_path.startswith(repo_module): + return None + rel_dir = import_path[len(repo_module):].lstrip("/") + for fpath in go_files: + fdir = str(Path(fpath).parent) + if fdir == rel_dir or fdir.endswith("/" + rel_dir): + return rel_dir + return None + + +def _find_nodes(node, type_name: str): + """Recursively find all nodes of a given type.""" + if node.type == type_name: + yield node + for child in node.children: + yield from _find_nodes(child, type_name) diff --git a/ingest.py b/ingest.py index d194af9..2e49277 100644 --- a/ingest.py +++ b/ingest.py @@ -6,7 +6,7 @@ import time import json from pathlib import Path -from parser import parse_go_file, filter_imports, get_repo_module, resolve_import_to_file +from go_parser import parse_go_file, filter_imports, get_repo_module, resolve_import_to_file from docgen import generate_file_doc, generate_repo_doc, generate_docs_batch from db import GraphDB diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..5e1c8cf --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,14 @@ +[project] +name = "dev-intel-poc" +version = "0.1.0" +description = "Developer Intelligence POC — knowledge graph from Go codebase" +requires-python = ">=3.11" +dependencies = [ + "requests>=2.32", + "tree-sitter>=0.24", + "tree-sitter-go>=0.23", + "mcp>=1.9", +] + +[tool.uv] +dev-dependencies = []