Add OpenAI-compatible backend support (Kiro gateway, OpenRouter)

- LLM_BACKEND=openai routes to /v1/chat/completions - Default: ollama (unchanged) - For Kiro gateway: LLM_BACKEND=openai OPENAI_URL=http://192.168.86.11:8000 OPENAI_MODEL=claude-haiku-4 - Updated README with new env vars
2026-03-04 04:37:46 +00:00
parent 20253329e4
commit 65e114a5d6
2 changed files with 51 additions and 6 deletions
--- a/README.md
+++ b/README.md
@@ -125,9 +125,13 @@ dev-intel-poc/
 | Env Variable | Default | Description |
 |---|---|---|
 | `LLM_BACKEND` | `ollama` | `ollama` or `openai` (for Kiro gateway, OpenRouter, etc.) |
 | `OLLAMA_URL` | `http://192.168.86.172:11434` | Ollama endpoint |
-| `OLLAMA_MODEL` | `qwen2.5:7b` | Model for doc generation |
+| `OLLAMA_MODEL` | `qwen2.5:7b` | Ollama model |
 | `OPENAI_URL` | `http://192.168.86.11:8000` | OpenAI-compatible endpoint (Kiro gateway) |
 | `OPENAI_MODEL` | `claude-haiku-4` | Model name for OpenAI-compatible API |
 | `OPENAI_API_KEY` | `not-needed` | API key (if required by endpoint) |
 | `TARGET_REPO` | `https://github.com/labstack/echo.git` | Repo to ingest |
-| `MAX_CONCURRENT` | `4` | Parallel Ollama requests |
+| `MAX_CONCURRENT` | `4` | Parallel LLM requests |
 | `DEVINTEL_DB` | `./devintel.db` | SQLite database path |
 | `REPO_DIR` | `./repos/target` | Cloned repo location |
--- a/docgen.py
+++ b/docgen.py
@@ -1,12 +1,22 @@
-"""Ollama client for generating documentation."""
+"""LLM client for generating documentation. Supports Ollama and OpenAI-compatible APIs."""
 import requests
 import os
 import concurrent.futures
 import time
 # Backend: "ollama" or "openai"
 LLM_BACKEND = os.environ.get("LLM_BACKEND", "ollama")
 # Ollama settings
 OLLAMA_URL = os.environ.get("OLLAMA_URL", "http://192.168.86.172:11434")
 OLLAMA_MODEL = os.environ.get("OLLAMA_MODEL", "qwen2.5:7b")
 # OpenAI-compatible settings (works with Kiro gateway, OpenRouter, etc.)
 OPENAI_URL = os.environ.get("OPENAI_URL", "http://192.168.86.11:8000")
 OPENAI_MODEL = os.environ.get("OPENAI_MODEL", "claude-haiku-4")
 OPENAI_API_KEY = os.environ.get("OPENAI_API_KEY", "not-needed")
 MAX_CONCURRENT = int(os.environ.get("MAX_CONCURRENT", "4"))
@@ -32,7 +42,7 @@ File: {filepath}
 Documentation:"""
-    return _call_ollama(prompt)
+    return _call_llm(prompt)
 def generate_relationship_doc(file_a: str, content_a: str, file_b: str, content_b: str) -> str:
@@ -58,7 +68,7 @@ File B: {file_b}
 Relationship:"""
-    return _call_ollama(prompt)
+    return _call_llm(prompt)
 def generate_repo_doc(readme: str, entry_files: list[tuple[str, str]]) -> str:
@@ -85,7 +95,7 @@ Key source files:
 Project Overview:"""
-    return _call_ollama(prompt)
+    return _call_llm(prompt)
 def generate_docs_batch(items: list[tuple[str, str]], doc_fn) -> list[str]:
@@ -137,3 +147,34 @@ def _call_ollama(prompt: str, retries: int = 3) -> str:
                time.sleep(2 ** attempt)
                continue
            return f"[doc generation failed after {retries} attempts: {e}]"
 def _call_openai(prompt: str, retries: int = 3) -> str:
    """Call OpenAI-compatible API (Kiro gateway, OpenRouter, etc.)."""
    for attempt in range(retries):
        try:
            resp = requests.post(
                f"{OPENAI_URL}/v1/chat/completions",
                headers={"Authorization": f"Bearer {OPENAI_API_KEY}"},
                json={
                    "model": OPENAI_MODEL,
                    "messages": [{"role": "user", "content": prompt}],
                    "temperature": 0.3,
                    "max_tokens": 256,
                },
                timeout=120,
            )
            resp.raise_for_status()
            return resp.json()["choices"][0]["message"]["content"].strip()
        except Exception as e:
            if attempt < retries - 1:
                time.sleep(2 ** attempt)
                continue
            return f"[doc generation failed after {retries} attempts: {e}]"
 def _call_llm(prompt: str) -> str:
    """Route to the configured backend."""
    if LLM_BACKEND == "openai":
        return _call_openai(prompt)
    return _call_ollama(prompt)