Add OpenAI-compatible backend support (Kiro gateway, OpenRouter)

- LLM_BACKEND=openai routes to /v1/chat/completions
- Default: ollama (unchanged)
- For Kiro gateway: LLM_BACKEND=openai OPENAI_URL=http://192.168.86.11:8000 OPENAI_MODEL=claude-haiku-4
- Updated README with new env vars
This commit is contained in:
Jarvis Prime
2026-03-04 04:37:46 +00:00
parent 20253329e4
commit 65e114a5d6
2 changed files with 51 additions and 6 deletions

View File

@@ -125,9 +125,13 @@ dev-intel-poc/
| Env Variable | Default | Description |
|---|---|---|
| `LLM_BACKEND` | `ollama` | `ollama` or `openai` (for Kiro gateway, OpenRouter, etc.) |
| `OLLAMA_URL` | `http://192.168.86.172:11434` | Ollama endpoint |
| `OLLAMA_MODEL` | `qwen2.5:7b` | Model for doc generation |
| `OLLAMA_MODEL` | `qwen2.5:7b` | Ollama model |
| `OPENAI_URL` | `http://192.168.86.11:8000` | OpenAI-compatible endpoint (Kiro gateway) |
| `OPENAI_MODEL` | `claude-haiku-4` | Model name for OpenAI-compatible API |
| `OPENAI_API_KEY` | `not-needed` | API key (if required by endpoint) |
| `TARGET_REPO` | `https://github.com/labstack/echo.git` | Repo to ingest |
| `MAX_CONCURRENT` | `4` | Parallel Ollama requests |
| `MAX_CONCURRENT` | `4` | Parallel LLM requests |
| `DEVINTEL_DB` | `./devintel.db` | SQLite database path |
| `REPO_DIR` | `./repos/target` | Cloned repo location |

View File

@@ -1,12 +1,22 @@
"""Ollama client for generating documentation."""
"""LLM client for generating documentation. Supports Ollama and OpenAI-compatible APIs."""
import requests
import os
import concurrent.futures
import time
# Backend: "ollama" or "openai"
LLM_BACKEND = os.environ.get("LLM_BACKEND", "ollama")
# Ollama settings
OLLAMA_URL = os.environ.get("OLLAMA_URL", "http://192.168.86.172:11434")
OLLAMA_MODEL = os.environ.get("OLLAMA_MODEL", "qwen2.5:7b")
# OpenAI-compatible settings (works with Kiro gateway, OpenRouter, etc.)
OPENAI_URL = os.environ.get("OPENAI_URL", "http://192.168.86.11:8000")
OPENAI_MODEL = os.environ.get("OPENAI_MODEL", "claude-haiku-4")
OPENAI_API_KEY = os.environ.get("OPENAI_API_KEY", "not-needed")
MAX_CONCURRENT = int(os.environ.get("MAX_CONCURRENT", "4"))
@@ -32,7 +42,7 @@ File: {filepath}
Documentation:"""
return _call_ollama(prompt)
return _call_llm(prompt)
def generate_relationship_doc(file_a: str, content_a: str, file_b: str, content_b: str) -> str:
@@ -58,7 +68,7 @@ File B: {file_b}
Relationship:"""
return _call_ollama(prompt)
return _call_llm(prompt)
def generate_repo_doc(readme: str, entry_files: list[tuple[str, str]]) -> str:
@@ -85,7 +95,7 @@ Key source files:
Project Overview:"""
return _call_ollama(prompt)
return _call_llm(prompt)
def generate_docs_batch(items: list[tuple[str, str]], doc_fn) -> list[str]:
@@ -137,3 +147,34 @@ def _call_ollama(prompt: str, retries: int = 3) -> str:
time.sleep(2 ** attempt)
continue
return f"[doc generation failed after {retries} attempts: {e}]"
def _call_openai(prompt: str, retries: int = 3) -> str:
"""Call OpenAI-compatible API (Kiro gateway, OpenRouter, etc.)."""
for attempt in range(retries):
try:
resp = requests.post(
f"{OPENAI_URL}/v1/chat/completions",
headers={"Authorization": f"Bearer {OPENAI_API_KEY}"},
json={
"model": OPENAI_MODEL,
"messages": [{"role": "user", "content": prompt}],
"temperature": 0.3,
"max_tokens": 256,
},
timeout=120,
)
resp.raise_for_status()
return resp.json()["choices"][0]["message"]["content"].strip()
except Exception as e:
if attempt < retries - 1:
time.sleep(2 ** attempt)
continue
return f"[doc generation failed after {retries} attempts: {e}]"
def _call_llm(prompt: str) -> str:
"""Route to the configured backend."""
if LLM_BACKEND == "openai":
return _call_openai(prompt)
return _call_ollama(prompt)