Fix: add missing parser/docgen, rename parser→go_parser, add uv packaging
- parser.py renamed to go_parser.py (avoids Python builtin conflict) - docgen.py was missing from flat structure - Added pyproject.toml for uv - Updated .mcp.json to use uv run - Updated README for uv workflow
This commit is contained in:
@@ -1,8 +1,8 @@
|
|||||||
{
|
{
|
||||||
"mcpServers": {
|
"mcpServers": {
|
||||||
"dev-intel": {
|
"dev-intel": {
|
||||||
"command": "python",
|
"command": "uv",
|
||||||
"args": ["mcp_server.py"],
|
"args": ["run", "python", "mcp_server.py"],
|
||||||
"cwd": "."
|
"cwd": "."
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -6,8 +6,8 @@ A local proof-of-concept that builds a living knowledge graph from a Go codebase
|
|||||||
|
|
||||||
```bash
|
```bash
|
||||||
cd dev-intel-poc
|
cd dev-intel-poc
|
||||||
pip install -r requirements.txt
|
uv sync # install deps
|
||||||
python ingest.py # clone echo, parse, generate docs (~15-20 min)
|
uv run python ingest.py # clone echo, parse, generate docs (~15-20 min)
|
||||||
claude --mcp-config .mcp.json # start Claude Code with the knowledge graph
|
claude --mcp-config .mcp.json # start Claude Code with the knowledge graph
|
||||||
```
|
```
|
||||||
|
|
||||||
@@ -19,6 +19,7 @@ Then ask Claude Code:
|
|||||||
## Prerequisites
|
## Prerequisites
|
||||||
|
|
||||||
- Python 3.11+
|
- Python 3.11+
|
||||||
|
- [uv](https://docs.astral.sh/uv/) (`curl -LsSf https://astral.sh/uv/install.sh | sh`)
|
||||||
- Ollama running at `192.168.86.172:11434` with `qwen2.5:7b`
|
- Ollama running at `192.168.86.172:11434` with `qwen2.5:7b`
|
||||||
- Claude Code CLI (`claude`)
|
- Claude Code CLI (`claude`)
|
||||||
- git
|
- git
|
||||||
|
|||||||
139
docgen.py
Normal file
139
docgen.py
Normal file
@@ -0,0 +1,139 @@
|
|||||||
|
"""Ollama client for generating documentation."""
|
||||||
|
|
||||||
|
import requests
|
||||||
|
import os
|
||||||
|
import concurrent.futures
|
||||||
|
import time
|
||||||
|
|
||||||
|
OLLAMA_URL = os.environ.get("OLLAMA_URL", "http://192.168.86.172:11434")
|
||||||
|
OLLAMA_MODEL = os.environ.get("OLLAMA_MODEL", "qwen2.5:7b")
|
||||||
|
MAX_CONCURRENT = int(os.environ.get("MAX_CONCURRENT", "4"))
|
||||||
|
|
||||||
|
|
||||||
|
def generate_file_doc(filepath: str, content: str) -> str:
|
||||||
|
"""Generate documentation for a single file."""
|
||||||
|
if len(content) > 8000:
|
||||||
|
content = content[:8000] + "\n\n... [truncated]"
|
||||||
|
|
||||||
|
prompt = f"""You are a senior software engineer documenting a Go codebase.
|
||||||
|
|
||||||
|
Describe what this file does in 2-4 sentences. Be specific about:
|
||||||
|
- The domain logic and purpose (not just "this file contains functions")
|
||||||
|
- Key types, interfaces, or structs defined
|
||||||
|
- How it fits into the larger system (if apparent from imports/naming)
|
||||||
|
|
||||||
|
Do NOT describe Go syntax or language mechanics. Describe WHAT the code does and WHY.
|
||||||
|
|
||||||
|
File: {filepath}
|
||||||
|
|
||||||
|
```go
|
||||||
|
{content}
|
||||||
|
```
|
||||||
|
|
||||||
|
Documentation:"""
|
||||||
|
|
||||||
|
return _call_ollama(prompt)
|
||||||
|
|
||||||
|
|
||||||
|
def generate_relationship_doc(file_a: str, content_a: str, file_b: str, content_b: str) -> str:
|
||||||
|
"""Generate documentation for a relationship between two files."""
|
||||||
|
if len(content_a) > 4000:
|
||||||
|
content_a = content_a[:4000] + "\n... [truncated]"
|
||||||
|
if len(content_b) > 4000:
|
||||||
|
content_b = content_b[:4000] + "\n... [truncated]"
|
||||||
|
|
||||||
|
prompt = f"""You are a senior software engineer documenting how two files in a Go codebase interact.
|
||||||
|
|
||||||
|
Describe in 1-2 sentences how File A uses or depends on File B. Be specific about which types, functions, or interfaces are shared.
|
||||||
|
|
||||||
|
File A: {file_a}
|
||||||
|
```go
|
||||||
|
{content_a}
|
||||||
|
```
|
||||||
|
|
||||||
|
File B: {file_b}
|
||||||
|
```go
|
||||||
|
{content_b}
|
||||||
|
```
|
||||||
|
|
||||||
|
Relationship:"""
|
||||||
|
|
||||||
|
return _call_ollama(prompt)
|
||||||
|
|
||||||
|
|
||||||
|
def generate_repo_doc(readme: str, entry_files: list[tuple[str, str]]) -> str:
|
||||||
|
"""Generate repo-level documentation from README and key entry points."""
|
||||||
|
files_section = ""
|
||||||
|
for path, content in entry_files[:5]:
|
||||||
|
snippet = content[:2000] if len(content) > 2000 else content
|
||||||
|
files_section += f"\n--- {path} ---\n{snippet}\n"
|
||||||
|
|
||||||
|
readme_section = readme[:3000] if len(readme) > 3000 else readme
|
||||||
|
|
||||||
|
prompt = f"""You are a senior software engineer writing a project overview.
|
||||||
|
|
||||||
|
Based on the README and key source files below, write a 4-6 sentence summary of this project. Cover:
|
||||||
|
- What the project does (its purpose)
|
||||||
|
- Key architectural patterns (routing, middleware, etc.)
|
||||||
|
- The main abstractions and how they fit together
|
||||||
|
|
||||||
|
README:
|
||||||
|
{readme_section}
|
||||||
|
|
||||||
|
Key source files:
|
||||||
|
{files_section}
|
||||||
|
|
||||||
|
Project Overview:"""
|
||||||
|
|
||||||
|
return _call_ollama(prompt)
|
||||||
|
|
||||||
|
|
||||||
|
def generate_docs_batch(items: list[tuple[str, str]], doc_fn) -> list[str]:
|
||||||
|
"""Generate docs for multiple items concurrently."""
|
||||||
|
results = [None] * len(items)
|
||||||
|
|
||||||
|
with concurrent.futures.ThreadPoolExecutor(max_workers=MAX_CONCURRENT) as executor:
|
||||||
|
future_to_idx = {}
|
||||||
|
for i, (filepath, content) in enumerate(items):
|
||||||
|
future = executor.submit(doc_fn, filepath, content)
|
||||||
|
future_to_idx[future] = i
|
||||||
|
|
||||||
|
done = 0
|
||||||
|
total = len(items)
|
||||||
|
for future in concurrent.futures.as_completed(future_to_idx):
|
||||||
|
idx = future_to_idx[future]
|
||||||
|
try:
|
||||||
|
results[idx] = future.result()
|
||||||
|
except Exception as e:
|
||||||
|
results[idx] = f"[doc generation failed: {e}]"
|
||||||
|
done += 1
|
||||||
|
if done % 10 == 0 or done == total:
|
||||||
|
print(f" Generated {done}/{total} docs")
|
||||||
|
|
||||||
|
return results
|
||||||
|
|
||||||
|
|
||||||
|
def _call_ollama(prompt: str, retries: int = 3) -> str:
|
||||||
|
"""Call Ollama API with retries."""
|
||||||
|
for attempt in range(retries):
|
||||||
|
try:
|
||||||
|
resp = requests.post(
|
||||||
|
f"{OLLAMA_URL}/api/generate",
|
||||||
|
json={
|
||||||
|
"model": OLLAMA_MODEL,
|
||||||
|
"prompt": prompt,
|
||||||
|
"stream": False,
|
||||||
|
"options": {
|
||||||
|
"temperature": 0.3,
|
||||||
|
"num_predict": 256,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
timeout=120,
|
||||||
|
)
|
||||||
|
resp.raise_for_status()
|
||||||
|
return resp.json()["response"].strip()
|
||||||
|
except Exception as e:
|
||||||
|
if attempt < retries - 1:
|
||||||
|
time.sleep(2 ** attempt)
|
||||||
|
continue
|
||||||
|
return f"[doc generation failed after {retries} attempts: {e}]"
|
||||||
104
go_parser.py
Normal file
104
go_parser.py
Normal file
@@ -0,0 +1,104 @@
|
|||||||
|
"""Go AST parser using tree-sitter. Extracts imports and function calls."""
|
||||||
|
|
||||||
|
import tree_sitter_go as tsgo
|
||||||
|
from tree_sitter import Language, Parser
|
||||||
|
from pathlib import Path
|
||||||
|
from dataclasses import dataclass, field
|
||||||
|
|
||||||
|
GO_LANGUAGE = Language(tsgo.language())
|
||||||
|
|
||||||
|
# stdlib packages to filter out
|
||||||
|
GO_STDLIB = {
|
||||||
|
"fmt", "os", "io", "log", "net", "http", "context", "sync", "time",
|
||||||
|
"strings", "strconv", "bytes", "errors", "sort", "math", "path",
|
||||||
|
"encoding", "crypto", "reflect", "testing", "flag", "regexp",
|
||||||
|
"bufio", "archive", "compress", "container", "database", "debug",
|
||||||
|
"embed", "go", "hash", "html", "image", "index", "internal",
|
||||||
|
"mime", "plugin", "runtime", "syscall", "text", "unicode", "unsafe",
|
||||||
|
"encoding/json", "encoding/xml", "encoding/base64", "encoding/binary",
|
||||||
|
"encoding/csv", "encoding/gob", "encoding/hex", "encoding/pem",
|
||||||
|
"net/http", "net/url", "net/http/httptest", "io/ioutil", "io/fs",
|
||||||
|
"os/exec", "os/signal", "path/filepath", "sync/atomic",
|
||||||
|
"crypto/tls", "crypto/rand", "crypto/sha256", "crypto/hmac",
|
||||||
|
"log/slog", "testing/fstest",
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class FileInfo:
|
||||||
|
path: str
|
||||||
|
content: str
|
||||||
|
imports: list[str] = field(default_factory=list)
|
||||||
|
functions: list[str] = field(default_factory=list)
|
||||||
|
|
||||||
|
|
||||||
|
def parse_go_file(filepath: str, content: str, repo_module: str) -> FileInfo:
|
||||||
|
"""Parse a Go file and extract imports and exported functions."""
|
||||||
|
parser = Parser(GO_LANGUAGE)
|
||||||
|
tree = parser.parse(content.encode())
|
||||||
|
root = tree.root_node
|
||||||
|
|
||||||
|
info = FileInfo(path=filepath, content=content)
|
||||||
|
|
||||||
|
for node in _find_nodes(root, "import_declaration"):
|
||||||
|
for spec in _find_nodes(node, "import_spec"):
|
||||||
|
path_node = spec.child_by_field_name("path")
|
||||||
|
if path_node:
|
||||||
|
import_path = path_node.text.decode().strip('"')
|
||||||
|
info.imports.append(import_path)
|
||||||
|
|
||||||
|
for node in _find_nodes(root, "function_declaration"):
|
||||||
|
name_node = node.child_by_field_name("name")
|
||||||
|
if name_node:
|
||||||
|
info.functions.append(name_node.text.decode())
|
||||||
|
|
||||||
|
for node in _find_nodes(root, "method_declaration"):
|
||||||
|
name_node = node.child_by_field_name("name")
|
||||||
|
if name_node:
|
||||||
|
info.functions.append(name_node.text.decode())
|
||||||
|
|
||||||
|
return info
|
||||||
|
|
||||||
|
|
||||||
|
def filter_imports(imports: list[str], repo_module: str) -> list[str]:
|
||||||
|
"""Keep only first-party imports (same module) and significant third-party."""
|
||||||
|
result = []
|
||||||
|
for imp in imports:
|
||||||
|
top = imp.split("/")[0]
|
||||||
|
if imp in GO_STDLIB or top in GO_STDLIB:
|
||||||
|
continue
|
||||||
|
if imp.startswith(repo_module):
|
||||||
|
result.append(imp)
|
||||||
|
elif "." in top:
|
||||||
|
result.append(imp)
|
||||||
|
return result
|
||||||
|
|
||||||
|
|
||||||
|
def get_repo_module(repo_path: str) -> str:
|
||||||
|
"""Read the module path from go.mod."""
|
||||||
|
gomod = Path(repo_path) / "go.mod"
|
||||||
|
if gomod.exists():
|
||||||
|
for line in gomod.read_text().splitlines():
|
||||||
|
if line.startswith("module "):
|
||||||
|
return line.split("module ", 1)[1].strip()
|
||||||
|
return ""
|
||||||
|
|
||||||
|
|
||||||
|
def resolve_import_to_file(import_path: str, repo_module: str, go_files: dict[str, str]) -> str | None:
|
||||||
|
"""Try to resolve an import path to a directory in the repo."""
|
||||||
|
if not import_path.startswith(repo_module):
|
||||||
|
return None
|
||||||
|
rel_dir = import_path[len(repo_module):].lstrip("/")
|
||||||
|
for fpath in go_files:
|
||||||
|
fdir = str(Path(fpath).parent)
|
||||||
|
if fdir == rel_dir or fdir.endswith("/" + rel_dir):
|
||||||
|
return rel_dir
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def _find_nodes(node, type_name: str):
|
||||||
|
"""Recursively find all nodes of a given type."""
|
||||||
|
if node.type == type_name:
|
||||||
|
yield node
|
||||||
|
for child in node.children:
|
||||||
|
yield from _find_nodes(child, type_name)
|
||||||
@@ -6,7 +6,7 @@ import time
|
|||||||
import json
|
import json
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
from parser import parse_go_file, filter_imports, get_repo_module, resolve_import_to_file
|
from go_parser import parse_go_file, filter_imports, get_repo_module, resolve_import_to_file
|
||||||
from docgen import generate_file_doc, generate_repo_doc, generate_docs_batch
|
from docgen import generate_file_doc, generate_repo_doc, generate_docs_batch
|
||||||
from db import GraphDB
|
from db import GraphDB
|
||||||
|
|
||||||
|
|||||||
14
pyproject.toml
Normal file
14
pyproject.toml
Normal file
@@ -0,0 +1,14 @@
|
|||||||
|
[project]
|
||||||
|
name = "dev-intel-poc"
|
||||||
|
version = "0.1.0"
|
||||||
|
description = "Developer Intelligence POC — knowledge graph from Go codebase"
|
||||||
|
requires-python = ">=3.11"
|
||||||
|
dependencies = [
|
||||||
|
"requests>=2.32",
|
||||||
|
"tree-sitter>=0.24",
|
||||||
|
"tree-sitter-go>=0.23",
|
||||||
|
"mcp>=1.9",
|
||||||
|
]
|
||||||
|
|
||||||
|
[tool.uv]
|
||||||
|
dev-dependencies = []
|
||||||
Reference in New Issue
Block a user