2026-03-04 04:37:46 +00:00
""" LLM client for generating documentation. Supports Ollama and OpenAI-compatible APIs. """
2026-03-04 04:30:43 +00:00
import requests
import os
import concurrent . futures
import time
2026-03-04 04:41:48 +00:00
from pathlib import Path
# Load .env if present
_env_file = Path ( __file__ ) . parent / " .env "
if _env_file . exists ( ) :
for line in _env_file . read_text ( ) . splitlines ( ) :
line = line . strip ( )
if line and not line . startswith ( " # " ) and " = " in line :
key , _ , val = line . partition ( " = " )
os . environ . setdefault ( key . strip ( ) , val . strip ( ) )
2026-03-04 04:30:43 +00:00
2026-03-04 04:37:46 +00:00
# Backend: "ollama" or "openai"
LLM_BACKEND = os . environ . get ( " LLM_BACKEND " , " ollama " )
# Ollama settings
2026-03-04 04:30:43 +00:00
OLLAMA_URL = os . environ . get ( " OLLAMA_URL " , " http://192.168.86.172:11434 " )
OLLAMA_MODEL = os . environ . get ( " OLLAMA_MODEL " , " qwen2.5:7b " )
2026-03-04 04:37:46 +00:00
# OpenAI-compatible settings (works with Kiro gateway, OpenRouter, etc.)
OPENAI_URL = os . environ . get ( " OPENAI_URL " , " http://192.168.86.11:8000 " )
OPENAI_MODEL = os . environ . get ( " OPENAI_MODEL " , " claude-haiku-4 " )
OPENAI_API_KEY = os . environ . get ( " OPENAI_API_KEY " , " not-needed " )
2026-03-04 04:30:43 +00:00
MAX_CONCURRENT = int ( os . environ . get ( " MAX_CONCURRENT " , " 4 " ) )
def generate_file_doc ( filepath : str , content : str ) - > str :
""" Generate documentation for a single file. """
if len ( content ) > 8000 :
content = content [ : 8000 ] + " \n \n ... [truncated] "
prompt = f """ You are a senior software engineer documenting a Go codebase.
Describe what this file does in 2 - 4 sentences . Be specific about :
- The domain logic and purpose ( not just " this file contains functions " )
- Key types , interfaces , or structs defined
- How it fits into the larger system ( if apparent from imports / naming )
Do NOT describe Go syntax or language mechanics . Describe WHAT the code does and WHY .
File : { filepath }
` ` ` go
{ content }
` ` `
Documentation : """
2026-03-04 04:37:46 +00:00
return _call_llm ( prompt )
2026-03-04 04:30:43 +00:00
def generate_relationship_doc ( file_a : str , content_a : str , file_b : str , content_b : str ) - > str :
""" Generate documentation for a relationship between two files. """
if len ( content_a ) > 4000 :
content_a = content_a [ : 4000 ] + " \n ... [truncated] "
if len ( content_b ) > 4000 :
content_b = content_b [ : 4000 ] + " \n ... [truncated] "
prompt = f """ You are a senior software engineer documenting how two files in a Go codebase interact.
Describe in 1 - 2 sentences how File A uses or depends on File B . Be specific about which types , functions , or interfaces are shared .
File A : { file_a }
` ` ` go
{ content_a }
` ` `
File B : { file_b }
` ` ` go
{ content_b }
` ` `
Relationship : """
2026-03-04 04:37:46 +00:00
return _call_llm ( prompt )
2026-03-04 04:30:43 +00:00
def generate_repo_doc ( readme : str , entry_files : list [ tuple [ str , str ] ] ) - > str :
""" Generate repo-level documentation from README and key entry points. """
files_section = " "
for path , content in entry_files [ : 5 ] :
snippet = content [ : 2000 ] if len ( content ) > 2000 else content
files_section + = f " \n --- { path } --- \n { snippet } \n "
readme_section = readme [ : 3000 ] if len ( readme ) > 3000 else readme
2026-03-05 04:24:26 +00:00
prompt = f """ You are a senior software engineer writing a project overview that will be consumed by AI coding agents and new developers joining the team.
2026-03-04 04:30:43 +00:00
2026-03-05 04:24:26 +00:00
Write a concise but opinionated overview of this project . This should read like the best possible onboarding document — the kind a senior engineer writes after spending a week with the codebase . It should help someone ( human or AI ) understand the system well enough to start making changes confidently .
Structure your response with these sections ( use markdown headers ) :
## What This Project Does
One paragraph . What problem does it solve ? Who uses it ? Be specific , not generic .
## Architecture
How is the code organized ? What are the key abstractions and how do they compose ? Mention the main entry points and the flow of a typical request / operation through the system . Name specific files and types .
## Key Patterns
What conventions does this codebase follow ? Middleware chains , handler signatures , error handling patterns , configuration approach . An agent needs to match these patterns when writing new code .
## Where Things Live
A brief map : which directories / files own which concerns . Focus on the non - obvious — things a newcomer would waste time searching for .
## Gotchas
Anything surprising , non - obvious , or easy to get wrong . Race conditions , initialization order , naming conventions that break expectations .
Be direct and opinionated . Say " the router is the heart of the system " not " the project contains routing functionality. " Use file names and type names . Skip generic statements about Go or web frameworks .
2026-03-04 04:30:43 +00:00
README :
{ readme_section }
2026-03-05 04:24:26 +00:00
Key source files ( with their generated documentation ) :
2026-03-04 04:30:43 +00:00
{ files_section }
Project Overview : """
2026-03-05 04:25:33 +00:00
return _call_llm ( prompt , max_tokens = 1024 )
2026-03-04 04:30:43 +00:00
def generate_docs_batch ( items : list [ tuple [ str , str ] ] , doc_fn ) - > list [ str ] :
""" Generate docs for multiple items concurrently. """
results = [ None ] * len ( items )
with concurrent . futures . ThreadPoolExecutor ( max_workers = MAX_CONCURRENT ) as executor :
future_to_idx = { }
for i , ( filepath , content ) in enumerate ( items ) :
future = executor . submit ( doc_fn , filepath , content )
future_to_idx [ future ] = i
done = 0
total = len ( items )
for future in concurrent . futures . as_completed ( future_to_idx ) :
idx = future_to_idx [ future ]
try :
results [ idx ] = future . result ( )
except Exception as e :
results [ idx ] = f " [doc generation failed: { e } ] "
done + = 1
if done % 10 == 0 or done == total :
print ( f " Generated { done } / { total } docs " )
return results
2026-03-05 04:25:33 +00:00
def _call_ollama ( prompt : str , retries : int = 3 , max_tokens : int = 256 ) - > str :
2026-03-04 04:30:43 +00:00
""" Call Ollama API with retries. """
for attempt in range ( retries ) :
try :
resp = requests . post (
f " { OLLAMA_URL } /api/generate " ,
json = {
" model " : OLLAMA_MODEL ,
" prompt " : prompt ,
" stream " : False ,
" options " : {
" temperature " : 0.3 ,
2026-03-05 04:25:33 +00:00
" num_predict " : max_tokens ,
2026-03-04 04:30:43 +00:00
} ,
} ,
timeout = 120 ,
)
resp . raise_for_status ( )
return resp . json ( ) [ " response " ] . strip ( )
except Exception as e :
if attempt < retries - 1 :
time . sleep ( 2 * * attempt )
continue
return f " [doc generation failed after { retries } attempts: { e } ] "
2026-03-04 04:37:46 +00:00
2026-03-05 04:25:33 +00:00
def _call_openai ( prompt : str , retries : int = 3 , max_tokens : int = 256 ) - > str :
2026-03-04 04:37:46 +00:00
""" Call OpenAI-compatible API (Kiro gateway, OpenRouter, etc.). """
for attempt in range ( retries ) :
try :
resp = requests . post (
f " { OPENAI_URL } /v1/chat/completions " ,
headers = { " Authorization " : f " Bearer { OPENAI_API_KEY } " } ,
json = {
" model " : OPENAI_MODEL ,
" messages " : [ { " role " : " user " , " content " : prompt } ] ,
" temperature " : 0.3 ,
2026-03-05 04:25:33 +00:00
" max_tokens " : max_tokens ,
2026-03-04 04:37:46 +00:00
} ,
timeout = 120 ,
)
resp . raise_for_status ( )
return resp . json ( ) [ " choices " ] [ 0 ] [ " message " ] [ " content " ] . strip ( )
except Exception as e :
if attempt < retries - 1 :
time . sleep ( 2 * * attempt )
continue
return f " [doc generation failed after { retries } attempts: { e } ] "
2026-03-05 04:25:33 +00:00
def _call_llm ( prompt : str , max_tokens : int = 256 ) - > str :
2026-03-04 04:37:46 +00:00
""" Route to the configured backend. """
if LLM_BACKEND == " openai " :
2026-03-05 04:25:33 +00:00
return _call_openai ( prompt , max_tokens = max_tokens )
return _call_ollama ( prompt , max_tokens = max_tokens )