commit 60850d975e52a548e96647dd1d74244da49e5079 Author: Jarvis Prime Date: Mon Mar 23 12:22:54 2026 +0000 Initial commit: OpenAI-compatible Piper TTS proxy diff --git a/README.md b/README.md new file mode 100644 index 0000000..0712d5c --- /dev/null +++ b/README.md @@ -0,0 +1,43 @@ +# piper-openai-proxy + +OpenAI-compatible TTS API proxy for [Piper](https://github.com/rhasspy/piper) via the [Wyoming protocol](https://github.com/rhasspy/wyoming). + +Any tool that speaks the OpenAI `/v1/audio/speech` API can use your local Piper instance without modification. + +## Usage + +```bash +python3 server.py +``` + +Environment variables: +- `PIPER_HOST` — Piper Wyoming host (default: `192.168.86.11`) +- `PIPER_PORT` — Piper Wyoming port (default: `10200`) +- `LISTEN_PORT` — Proxy listen port (default: `8951`) + +## API + +```bash +# Generate speech +curl -X POST http://localhost:8951/v1/audio/speech \ + -H "Content-Type: application/json" \ + -d '{"input": "Hello world"}' \ + -o speech.wav + +# Health check +curl http://localhost:8951/health +``` + +## Requirements + +- Python 3.8+ +- No dependencies (stdlib only) +- A running [Piper Wyoming server](https://github.com/rhasspy/wyoming-piper) + +## How it works + +The proxy translates OpenAI TTS API requests into Wyoming protocol messages over a raw TCP socket. Piper generates the audio, and the proxy wraps the raw PCM in a WAV container and returns it. + +## License + +MIT diff --git a/server.py b/server.py new file mode 100644 index 0000000..4d987de --- /dev/null +++ b/server.py @@ -0,0 +1,114 @@ +#!/usr/bin/env python3 +"""OpenAI-compatible TTS API proxy for Piper (Wyoming protocol).""" +import socket, json, struct, io, wave, os, sys +from http.server import HTTPServer, BaseHTTPRequestHandler + +PIPER_HOST = os.environ.get("PIPER_HOST", "192.168.86.11") +PIPER_PORT = int(os.environ.get("PIPER_PORT", "10200")) +LISTEN_PORT = int(os.environ.get("LISTEN_PORT", "8951")) + +def piper_synthesize(text): + """Send text to Piper via Wyoming protocol, return raw PCM + sample rate.""" + sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) + sock.settimeout(30) + sock.connect((PIPER_HOST, PIPER_PORT)) + f = sock.makefile('rb') + + # Send synthesize event + sock.sendall((json.dumps({"type": "synthesize", "data": {"text": text}}) + "\n").encode()) + + audio_data = b"" + rate = 22050 + width = 2 + channels = 1 + + while True: + line = f.readline() + if not line: + break + msg = json.loads(line.decode().strip()) + data_len = msg.get("data_length", 0) + payload_len = msg.get("payload_length", 0) + + if data_len > 0: + data_json = json.loads(f.read(data_len).decode()) + if msg.get("type") == "audio-start": + rate = data_json.get("rate", 22050) + width = data_json.get("width", 2) + channels = data_json.get("channels", 1) + + if payload_len > 0: + payload = f.read(payload_len) + if msg.get("type") == "audio-chunk": + audio_data += payload + + if msg.get("type") == "audio-stop": + break + + sock.close() + return audio_data, rate, width, channels + +def pcm_to_wav(pcm_data, rate, width, channels): + """Wrap raw PCM in a WAV container.""" + buf = io.BytesIO() + with wave.open(buf, 'wb') as wf: + wf.setnchannels(channels) + wf.setsampwidth(width) + wf.setframerate(rate) + wf.writeframes(pcm_data) + return buf.getvalue() + +class TTSHandler(BaseHTTPRequestHandler): + def do_POST(self): + if self.path in ("/v1/audio/speech", "/v1/audio/speech/"): + content_len = int(self.headers.get('Content-Length', 0)) + body = json.loads(self.rfile.read(content_len)) if content_len else {} + + text = body.get("input", "") + if not text: + self.send_response(400) + self.end_headers() + self.wfile.write(b'{"error": "missing input"}') + return + + try: + pcm, rate, width, channels = piper_synthesize(text) + wav_data = pcm_to_wav(pcm, rate, width, channels) + + self.send_response(200) + self.send_header("Content-Type", "audio/wav") + self.send_header("Content-Length", str(len(wav_data))) + self.end_headers() + self.wfile.write(wav_data) + except Exception as e: + self.send_response(500) + self.end_headers() + self.wfile.write(json.dumps({"error": str(e)}).encode()) + else: + # Health check / model list + if self.path in ("/v1/models", "/health", "/healthz"): + self.send_response(200) + self.end_headers() + self.wfile.write(b'{"status":"ok","provider":"piper"}') + else: + self.send_response(404) + self.end_headers() + + def do_GET(self): + if self.path in ("/v1/models", "/health", "/healthz", "/"): + self.send_response(200) + self.end_headers() + self.wfile.write(b'{"status":"ok","provider":"piper-proxy"}') + else: + self.send_response(404) + self.end_headers() + + def log_message(self, format, *args): + print(f"[piper-proxy] {args[0]}") + +if __name__ == "__main__": + print(f"Piper OpenAI TTS proxy starting on :{LISTEN_PORT}") + print(f" Piper backend: {PIPER_HOST}:{PIPER_PORT}") + print(f" Endpoint: POST /v1/audio/speech") + server = HTTPServer(("0.0.0.0", LISTEN_PORT), TTSHandler) + server.serve_forever()