Files
voice-kiro/app.py

87 lines
2.5 KiB
Python
Raw Normal View History

from flask import Flask, request, jsonify, send_from_directory
import subprocess
import requests
import tempfile
import os
app = Flask(__name__, static_folder="static")
WHISPER_URL = os.environ.get("WHISPER_URL", "http://192.168.86.11:8950/v1/audio/transcriptions")
WHISPER_MODEL = os.environ.get("WHISPER_MODEL", "small.en")
TMUX_SOCKET = "/tmp/tmux-host/default"
TMUX_SESSION = os.environ.get("TMUX_SESSION", "kiro")
def tmux(*args):
return subprocess.run(["tmux", "-S", TMUX_SOCKET] + list(args), capture_output=True, text=True)
def get_pane_content():
r = tmux("capture-pane", "-t", TMUX_SESSION, "-p", "-S", "-50")
return r.stdout
@app.route("/")
def index():
return send_from_directory("static", "index.html")
@app.route("/send", methods=["POST"])
def send_voice():
audio = request.files.get("audio")
if not audio:
return jsonify({"error": "no audio"}), 400
with tempfile.NamedTemporaryFile(suffix=".webm", delete=False) as f:
audio.save(f.name)
tmp = f.name
try:
resp = requests.post(
WHISPER_URL,
files={"file": (audio.filename or "audio.webm", open(tmp, "rb"), audio.content_type or "audio/webm")},
data={"model": WHISPER_MODEL},
)
finally:
os.unlink(tmp)
if resp.status_code != 200:
return jsonify({"error": f"whisper {resp.status_code}: {resp.text[:200]}"}), 400
text = resp.json().get("text", "").strip()
if not text:
return jsonify({"error": "empty transcription"}), 400
r = tmux("has-session", "-t", TMUX_SESSION)
if r.returncode != 0:
return jsonify({"error": f"no host tmux session '{TMUX_SESSION}' — start it on the host first"}), 400
tmux("send-keys", "-t", TMUX_SESSION, text, "Enter")
return jsonify({"text": text})
@app.route("/output")
def output():
return jsonify({"output": get_pane_content()})
@app.route("/cancel", methods=["POST"])
def send_cancel():
tmux("send-keys", "-t", TMUX_SESSION, "C-c")
return jsonify({"ok": True})
@app.route("/text", methods=["POST"])
def send_text():
text = request.json.get("text", "").strip()
if not text:
return jsonify({"error": "empty"}), 400
r = tmux("has-session", "-t", TMUX_SESSION)
if r.returncode != 0:
return jsonify({"error": f"no host tmux session '{TMUX_SESSION}'"}), 400
tmux("send-keys", "-t", TMUX_SESSION, text, "Enter")
return jsonify({"text": text})
if __name__ == "__main__":
app.run(host="0.0.0.0", port=8951)