Initial commit: OpenAI-compatible Piper TTS proxy

This commit is contained in:
Jarvis Prime
2026-03-23 12:22:54 +00:00
commit 60850d975e
2 changed files with 157 additions and 0 deletions

43
README.md Normal file
View File

@@ -0,0 +1,43 @@
# piper-openai-proxy
OpenAI-compatible TTS API proxy for [Piper](https://github.com/rhasspy/piper) via the [Wyoming protocol](https://github.com/rhasspy/wyoming).
Any tool that speaks the OpenAI `/v1/audio/speech` API can use your local Piper instance without modification.
## Usage
```bash
python3 server.py
```
Environment variables:
- `PIPER_HOST` — Piper Wyoming host (default: `192.168.86.11`)
- `PIPER_PORT` — Piper Wyoming port (default: `10200`)
- `LISTEN_PORT` — Proxy listen port (default: `8951`)
## API
```bash
# Generate speech
curl -X POST http://localhost:8951/v1/audio/speech \
-H "Content-Type: application/json" \
-d '{"input": "Hello world"}' \
-o speech.wav
# Health check
curl http://localhost:8951/health
```
## Requirements
- Python 3.8+
- No dependencies (stdlib only)
- A running [Piper Wyoming server](https://github.com/rhasspy/wyoming-piper)
## How it works
The proxy translates OpenAI TTS API requests into Wyoming protocol messages over a raw TCP socket. Piper generates the audio, and the proxy wraps the raw PCM in a WAV container and returns it.
## License
MIT

114
server.py Normal file
View File

@@ -0,0 +1,114 @@
#!/usr/bin/env python3
"""OpenAI-compatible TTS API proxy for Piper (Wyoming protocol)."""
import socket, json, struct, io, wave, os, sys
from http.server import HTTPServer, BaseHTTPRequestHandler
PIPER_HOST = os.environ.get("PIPER_HOST", "192.168.86.11")
PIPER_PORT = int(os.environ.get("PIPER_PORT", "10200"))
LISTEN_PORT = int(os.environ.get("LISTEN_PORT", "8951"))
def piper_synthesize(text):
"""Send text to Piper via Wyoming protocol, return raw PCM + sample rate."""
sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
sock.settimeout(30)
sock.connect((PIPER_HOST, PIPER_PORT))
f = sock.makefile('rb')
# Send synthesize event
sock.sendall((json.dumps({"type": "synthesize", "data": {"text": text}}) + "\n").encode())
audio_data = b""
rate = 22050
width = 2
channels = 1
while True:
line = f.readline()
if not line:
break
msg = json.loads(line.decode().strip())
data_len = msg.get("data_length", 0)
payload_len = msg.get("payload_length", 0)
if data_len > 0:
data_json = json.loads(f.read(data_len).decode())
if msg.get("type") == "audio-start":
rate = data_json.get("rate", 22050)
width = data_json.get("width", 2)
channels = data_json.get("channels", 1)
if payload_len > 0:
payload = f.read(payload_len)
if msg.get("type") == "audio-chunk":
audio_data += payload
if msg.get("type") == "audio-stop":
break
sock.close()
return audio_data, rate, width, channels
def pcm_to_wav(pcm_data, rate, width, channels):
"""Wrap raw PCM in a WAV container."""
buf = io.BytesIO()
with wave.open(buf, 'wb') as wf:
wf.setnchannels(channels)
wf.setsampwidth(width)
wf.setframerate(rate)
wf.writeframes(pcm_data)
return buf.getvalue()
class TTSHandler(BaseHTTPRequestHandler):
def do_POST(self):
if self.path in ("/v1/audio/speech", "/v1/audio/speech/"):
content_len = int(self.headers.get('Content-Length', 0))
body = json.loads(self.rfile.read(content_len)) if content_len else {}
text = body.get("input", "")
if not text:
self.send_response(400)
self.end_headers()
self.wfile.write(b'{"error": "missing input"}')
return
try:
pcm, rate, width, channels = piper_synthesize(text)
wav_data = pcm_to_wav(pcm, rate, width, channels)
self.send_response(200)
self.send_header("Content-Type", "audio/wav")
self.send_header("Content-Length", str(len(wav_data)))
self.end_headers()
self.wfile.write(wav_data)
except Exception as e:
self.send_response(500)
self.end_headers()
self.wfile.write(json.dumps({"error": str(e)}).encode())
else:
# Health check / model list
if self.path in ("/v1/models", "/health", "/healthz"):
self.send_response(200)
self.end_headers()
self.wfile.write(b'{"status":"ok","provider":"piper"}')
else:
self.send_response(404)
self.end_headers()
def do_GET(self):
if self.path in ("/v1/models", "/health", "/healthz", "/"):
self.send_response(200)
self.end_headers()
self.wfile.write(b'{"status":"ok","provider":"piper-proxy"}')
else:
self.send_response(404)
self.end_headers()
def log_message(self, format, *args):
print(f"[piper-proxy] {args[0]}")
if __name__ == "__main__":
print(f"Piper OpenAI TTS proxy starting on :{LISTEN_PORT}")
print(f" Piper backend: {PIPER_HOST}:{PIPER_PORT}")
print(f" Endpoint: POST /v1/audio/speech")
server = HTTPServer(("0.0.0.0", LISTEN_PORT), TTSHandler)
server.serve_forever()