Fix: add missing parser/docgen, rename parser→go_parser, add uv packaging

- parser.py renamed to go_parser.py (avoids Python builtin conflict) - docgen.py was missing from flat structure - Added pyproject.toml for uv - Updated .mcp.json to use uv run - Updated README for uv workflow
2026-03-04 04:30:43 +00:00
parent 81b0c699a0
commit 9680dc07eb
6 changed files with 263 additions and 5 deletions
--- a/.mcp.json
+++ b/.mcp.json
@@ -1,8 +1,8 @@
 {
  "mcpServers": {
    "dev-intel": {
-      "command": "python",
+      "command": "uv",
-      "args": ["mcp_server.py"],
+      "args": ["run", "python", "mcp_server.py"],
      "cwd": "."
    }
  }
--- a/README.md
+++ b/README.md
@@ -6,8 +6,8 @@ A local proof-of-concept that builds a living knowledge graph from a Go codebase
 ```bash
 cd dev-intel-poc
-pip install -r requirements.txt
+uv sync                         # install deps
-python ingest.py                # clone echo, parse, generate docs (~15-20 min)
+uv run python ingest.py         # clone echo, parse, generate docs (~15-20 min)
 claude --mcp-config .mcp.json   # start Claude Code with the knowledge graph
 ```
@@ -19,6 +19,7 @@ Then ask Claude Code:
 ## Prerequisites
 - Python 3.11+
 - [uv](https://docs.astral.sh/uv/) (`curl -LsSf https://astral.sh/uv/install.sh | sh`)
 - Ollama running at `192.168.86.172:11434` with `qwen2.5:7b`
 - Claude Code CLI (`claude`)
 - git
--- a/docgen.py
+++ b/docgen.py
@@ -0,0 +1,139 @@
 """Ollama client for generating documentation."""
 import requests
 import os
 import concurrent.futures
 import time
 OLLAMA_URL = os.environ.get("OLLAMA_URL", "http://192.168.86.172:11434")
 OLLAMA_MODEL = os.environ.get("OLLAMA_MODEL", "qwen2.5:7b")
 MAX_CONCURRENT = int(os.environ.get("MAX_CONCURRENT", "4"))
 def generate_file_doc(filepath: str, content: str) -> str:
    """Generate documentation for a single file."""
    if len(content) > 8000:
        content = content[:8000] + "\n\n... [truncated]"
    prompt = f"""You are a senior software engineer documenting a Go codebase.
 Describe what this file does in 2-4 sentences. Be specific about:
 - The domain logic and purpose (not just "this file contains functions")
 - Key types, interfaces, or structs defined
 - How it fits into the larger system (if apparent from imports/naming)
 Do NOT describe Go syntax or language mechanics. Describe WHAT the code does and WHY.
 File: {filepath}
 ```go
 {content}
 ```
 Documentation:"""
    return _call_ollama(prompt)
 def generate_relationship_doc(file_a: str, content_a: str, file_b: str, content_b: str) -> str:
    """Generate documentation for a relationship between two files."""
    if len(content_a) > 4000:
        content_a = content_a[:4000] + "\n... [truncated]"
    if len(content_b) > 4000:
        content_b = content_b[:4000] + "\n... [truncated]"
    prompt = f"""You are a senior software engineer documenting how two files in a Go codebase interact.
 Describe in 1-2 sentences how File A uses or depends on File B. Be specific about which types, functions, or interfaces are shared.
 File A: {file_a}
 ```go
 {content_a}
 ```
 File B: {file_b}
 ```go
 {content_b}
 ```
 Relationship:"""
    return _call_ollama(prompt)
 def generate_repo_doc(readme: str, entry_files: list[tuple[str, str]]) -> str:
    """Generate repo-level documentation from README and key entry points."""
    files_section = ""
    for path, content in entry_files[:5]:
        snippet = content[:2000] if len(content) > 2000 else content
        files_section += f"\n--- {path} ---\n{snippet}\n"
    readme_section = readme[:3000] if len(readme) > 3000 else readme
    prompt = f"""You are a senior software engineer writing a project overview.
 Based on the README and key source files below, write a 4-6 sentence summary of this project. Cover:
 - What the project does (its purpose)
 - Key architectural patterns (routing, middleware, etc.)
 - The main abstractions and how they fit together
 README:
 {readme_section}
 Key source files:
 {files_section}
 Project Overview:"""
    return _call_ollama(prompt)
 def generate_docs_batch(items: list[tuple[str, str]], doc_fn) -> list[str]:
    """Generate docs for multiple items concurrently."""
    results = [None] * len(items)
    with concurrent.futures.ThreadPoolExecutor(max_workers=MAX_CONCURRENT) as executor:
        future_to_idx = {}
        for i, (filepath, content) in enumerate(items):
            future = executor.submit(doc_fn, filepath, content)
            future_to_idx[future] = i
        done = 0
        total = len(items)
        for future in concurrent.futures.as_completed(future_to_idx):
            idx = future_to_idx[future]
            try:
                results[idx] = future.result()
            except Exception as e:
                results[idx] = f"[doc generation failed: {e}]"
            done += 1
            if done % 10 == 0 or done == total:
                print(f"  Generated {done}/{total} docs")
    return results
 def _call_ollama(prompt: str, retries: int = 3) -> str:
    """Call Ollama API with retries."""
    for attempt in range(retries):
        try:
            resp = requests.post(
                f"{OLLAMA_URL}/api/generate",
                json={
                    "model": OLLAMA_MODEL,
                    "prompt": prompt,
                    "stream": False,
                    "options": {
                        "temperature": 0.3,
                        "num_predict": 256,
                    },
                },
                timeout=120,
            )
            resp.raise_for_status()
            return resp.json()["response"].strip()
        except Exception as e:
            if attempt < retries - 1:
                time.sleep(2 ** attempt)
                continue
            return f"[doc generation failed after {retries} attempts: {e}]"
--- a/go_parser.py
+++ b/go_parser.py
@@ -0,0 +1,104 @@
 """Go AST parser using tree-sitter. Extracts imports and function calls."""
 import tree_sitter_go as tsgo
 from tree_sitter import Language, Parser
 from pathlib import Path
 from dataclasses import dataclass, field
 GO_LANGUAGE = Language(tsgo.language())
 # stdlib packages to filter out
 GO_STDLIB = {
    "fmt", "os", "io", "log", "net", "http", "context", "sync", "time",
    "strings", "strconv", "bytes", "errors", "sort", "math", "path",
    "encoding", "crypto", "reflect", "testing", "flag", "regexp",
    "bufio", "archive", "compress", "container", "database", "debug",
    "embed", "go", "hash", "html", "image", "index", "internal",
    "mime", "plugin", "runtime", "syscall", "text", "unicode", "unsafe",
    "encoding/json", "encoding/xml", "encoding/base64", "encoding/binary",
    "encoding/csv", "encoding/gob", "encoding/hex", "encoding/pem",
    "net/http", "net/url", "net/http/httptest", "io/ioutil", "io/fs",
    "os/exec", "os/signal", "path/filepath", "sync/atomic",
    "crypto/tls", "crypto/rand", "crypto/sha256", "crypto/hmac",
    "log/slog", "testing/fstest",
 }
@dataclass
 class FileInfo:
    path: str
    content: str
    imports: list[str] = field(default_factory=list)
    functions: list[str] = field(default_factory=list)
 def parse_go_file(filepath: str, content: str, repo_module: str) -> FileInfo:
    """Parse a Go file and extract imports and exported functions."""
    parser = Parser(GO_LANGUAGE)
    tree = parser.parse(content.encode())
    root = tree.root_node
    info = FileInfo(path=filepath, content=content)
    for node in _find_nodes(root, "import_declaration"):
        for spec in _find_nodes(node, "import_spec"):
            path_node = spec.child_by_field_name("path")
            if path_node:
                import_path = path_node.text.decode().strip('"')
                info.imports.append(import_path)
    for node in _find_nodes(root, "function_declaration"):
        name_node = node.child_by_field_name("name")
        if name_node:
            info.functions.append(name_node.text.decode())
    for node in _find_nodes(root, "method_declaration"):
        name_node = node.child_by_field_name("name")
        if name_node:
            info.functions.append(name_node.text.decode())
    return info
 def filter_imports(imports: list[str], repo_module: str) -> list[str]:
    """Keep only first-party imports (same module) and significant third-party."""
    result = []
    for imp in imports:
        top = imp.split("/")[0]
        if imp in GO_STDLIB or top in GO_STDLIB:
            continue
        if imp.startswith(repo_module):
            result.append(imp)
        elif "." in top:
            result.append(imp)
    return result
 def get_repo_module(repo_path: str) -> str:
    """Read the module path from go.mod."""
    gomod = Path(repo_path) / "go.mod"
    if gomod.exists():
        for line in gomod.read_text().splitlines():
            if line.startswith("module "):
                return line.split("module ", 1)[1].strip()
    return ""
 def resolve_import_to_file(import_path: str, repo_module: str, go_files: dict[str, str]) -> str | None:
    """Try to resolve an import path to a directory in the repo."""
    if not import_path.startswith(repo_module):
        return None
    rel_dir = import_path[len(repo_module):].lstrip("/")
    for fpath in go_files:
        fdir = str(Path(fpath).parent)
        if fdir == rel_dir or fdir.endswith("/" + rel_dir):
            return rel_dir
    return None
 def _find_nodes(node, type_name: str):
    """Recursively find all nodes of a given type."""
    if node.type == type_name:
        yield node
    for child in node.children:
        yield from _find_nodes(child, type_name)
--- a/ingest.py
+++ b/ingest.py
@@ -6,7 +6,7 @@ import time
 import json
 from pathlib import Path
-from parser import parse_go_file, filter_imports, get_repo_module, resolve_import_to_file
+from go_parser import parse_go_file, filter_imports, get_repo_module, resolve_import_to_file
 from docgen import generate_file_doc, generate_repo_doc, generate_docs_batch
 from db import GraphDB
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -0,0 +1,14 @@
 [project]
 name = "dev-intel-poc"
 version = "0.1.0"
 description = "Developer Intelligence POC — knowledge graph from Go codebase"
 requires-python = ">=3.11"
 dependencies = [
    "requests>=2.32",
    "tree-sitter>=0.24",
    "tree-sitter-go>=0.23",
    "mcp>=1.9",
 ]
 [tool.uv]
 dev-dependencies = []