Voice app with floating record/cancel buttons and terminal view

This commit is contained in:
brian
2026-05-27 13:14:46 -07:00
commit 20cdfe48d3
5 changed files with 244 additions and 0 deletions

8
Dockerfile Normal file
View File

@@ -0,0 +1,8 @@
FROM python:3.11-slim
RUN apt-get update && apt-get install -y --no-install-recommends tmux docker.io git curl sudo && rm -rf /var/lib/apt/lists/*
WORKDIR /app
COPY requirements.txt .
RUN pip install --no-cache-dir -r requirements.txt
COPY app.py .
COPY static/ static/
CMD ["python", "app.py"]

86
app.py Normal file
View File

@@ -0,0 +1,86 @@
from flask import Flask, request, jsonify, send_from_directory
import subprocess
import requests
import tempfile
import os
app = Flask(__name__, static_folder="static")
WHISPER_URL = os.environ.get("WHISPER_URL", "http://192.168.86.11:8950/v1/audio/transcriptions")
WHISPER_MODEL = os.environ.get("WHISPER_MODEL", "small.en")
TMUX_SOCKET = "/tmp/tmux-host/default"
TMUX_SESSION = os.environ.get("TMUX_SESSION", "kiro")
def tmux(*args):
return subprocess.run(["tmux", "-S", TMUX_SOCKET] + list(args), capture_output=True, text=True)
def get_pane_content():
r = tmux("capture-pane", "-t", TMUX_SESSION, "-p", "-S", "-50")
return r.stdout
@app.route("/")
def index():
return send_from_directory("static", "index.html")
@app.route("/send", methods=["POST"])
def send_voice():
audio = request.files.get("audio")
if not audio:
return jsonify({"error": "no audio"}), 400
with tempfile.NamedTemporaryFile(suffix=".webm", delete=False) as f:
audio.save(f.name)
tmp = f.name
try:
resp = requests.post(
WHISPER_URL,
files={"file": (audio.filename or "audio.webm", open(tmp, "rb"), audio.content_type or "audio/webm")},
data={"model": WHISPER_MODEL},
)
finally:
os.unlink(tmp)
if resp.status_code != 200:
return jsonify({"error": f"whisper {resp.status_code}: {resp.text[:200]}"}), 400
text = resp.json().get("text", "").strip()
if not text:
return jsonify({"error": "empty transcription"}), 400
r = tmux("has-session", "-t", TMUX_SESSION)
if r.returncode != 0:
return jsonify({"error": f"no host tmux session '{TMUX_SESSION}' — start it on the host first"}), 400
tmux("send-keys", "-t", TMUX_SESSION, text, "Enter")
return jsonify({"text": text})
@app.route("/output")
def output():
return jsonify({"output": get_pane_content()})
@app.route("/cancel", methods=["POST"])
def send_cancel():
tmux("send-keys", "-t", TMUX_SESSION, "C-c")
return jsonify({"ok": True})
@app.route("/text", methods=["POST"])
def send_text():
text = request.json.get("text", "").strip()
if not text:
return jsonify({"error": "empty"}), 400
r = tmux("has-session", "-t", TMUX_SESSION)
if r.returncode != 0:
return jsonify({"error": f"no host tmux session '{TMUX_SESSION}'"}), 400
tmux("send-keys", "-t", TMUX_SESSION, text, "Enter")
return jsonify({"text": text})
if __name__ == "__main__":
app.run(host="0.0.0.0", port=8951)

17
docker-compose.yml Normal file
View File

@@ -0,0 +1,17 @@
services:
voice-kiro:
build: .
restart: unless-stopped
ports:
- "8952:8951"
volumes:
- /mnt/data/home/brian/.local/bin/kiro-cli:/usr/local/bin/kiro-cli:ro
- /mnt/data/home/brian/.local/bin/kiro-cli-chat:/usr/local/bin/kiro-cli-chat:ro
- /mnt/data/home/brian/.local/share/kiro-cli:/root/.local/share/kiro-cli
- /mnt/data/home/brian/.kiro:/root/.kiro
- /mnt/data/home/brian/services:/services
- /var/run/docker.sock:/var/run/docker.sock
- /tmp/tmux-3000:/tmp/tmux-host
environment:
- WHISPER_URL=http://192.168.86.11:8950/v1/audio/transcriptions
- WHISPER_MODEL=small.en

2
requirements.txt Normal file
View File

@@ -0,0 +1,2 @@
flask==3.1.1
requests==2.32.3

131
static/index.html Normal file
View File

@@ -0,0 +1,131 @@
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="utf-8">
<meta name="viewport" content="width=device-width,initial-scale=1,user-scalable=no">
<title>Voice → Kiro</title>
<style>
*{box-sizing:border-box;margin:0;padding:0}
body{font-family:system-ui;background:#1a1a2e;color:#eee;display:flex;flex-direction:column;height:100dvh;padding:0}
#output{flex:1;width:100%;background:#0f0f1a;padding:.5rem;overflow-y:auto;font-family:monospace;font-size:.7rem;white-space:pre-wrap;color:#ccc}
#bottom{width:100%;padding:.4rem;background:#16213e;display:flex;gap:.4rem;align-items:center}
#status{font-size:.75rem;color:#aaa;padding:0 .4rem;white-space:nowrap;overflow:hidden;text-overflow:ellipsis;max-width:40%}
#textinput{flex:1;padding:.5rem;border-radius:6px;border:1px solid #333;background:#0f0f1a;color:#eee;font-size:.9rem}
.fab{position:fixed;border-radius:50%;border:none;color:#fff;font-size:.7rem;cursor:pointer;touch-action:none;user-select:none;z-index:100;display:flex;align-items:center;justify-content:center;text-align:center;box-shadow:0 2px 8px rgba(0,0,0,.5)}
#btn{width:70px;height:70px;background:#16213e;bottom:4rem;right:1rem;font-size:.8rem}
#btn.recording{background:#e94560;transform:scale(1.1)}
#cancelBtn{width:50px;height:50px;background:#e94560;bottom:4rem;left:1rem;font-size:.65rem}
</style>
</head>
<body>
<div id="output"></div>
<div id="bottom">
<span id="status"></span>
<form id="textform" autocomplete="off" style="flex:1;display:flex"><input id="textinput" type="text" placeholder="Message..." autocomplete="off"></form>
</div>
<button id="btn" class="fab">Hold<br>to Talk</button>
<button id="cancelBtn" class="fab">⌃C</button>
<script>
const btn=document.getElementById('btn'),status=document.getElementById('status'),output=document.getElementById('output');
let mediaRec,chunks=[];
function start(){
navigator.mediaDevices.getUserMedia({audio:true}).then(stream=>{
const mimeType = MediaRecorder.isTypeSupported('audio/webm;codecs=opus') ? 'audio/webm;codecs=opus' : 'audio/mp4';
mediaRec=new MediaRecorder(stream,{mimeType});
chunks=[];
mediaRec.ondataavailable=e=>chunks.push(e.data);
mediaRec.onstop=()=>{
stream.getTracks().forEach(t=>t.stop());
const ext = mediaRec.mimeType.includes('mp4') ? 'mp4' : 'webm';
send(new Blob(chunks,{type:mediaRec.mimeType}), ext);
};
mediaRec.start();
btn.classList.add('recording');
btn.textContent='●';
status.textContent='';
}).catch(e=>{status.textContent='Mic: '+e.message});
}
function stop(){
if(mediaRec&&mediaRec.state==='recording'){
mediaRec.stop();
btn.classList.remove('recording');
btn.innerHTML='Hold<br>to Talk';
}
}
function send(blob, ext){
status.textContent='Transcribing...';
const fd=new FormData();
fd.append('audio',blob,'audio.'+ext);
fetch('/send',{method:'POST',body:fd})
.then(r=>r.json())
.then(d=>{
if(d.error){status.textContent='Err: '+d.error}
else{status.textContent='✓ '+d.text.slice(0,30); pollOutput()}
})
.catch(e=>{status.textContent='Fail: '+e.message});
}
function pollOutput(){
fetch('/output').then(r=>r.json()).then(d=>{
const atBottom=output.scrollHeight-output.scrollTop-output.clientHeight<50;
output.textContent=d.output;
if(atBottom)output.scrollTop=output.scrollHeight;
}).catch(()=>{});
}
setInterval(pollOutput, 1000);
pollOutput();
document.getElementById('textform').addEventListener('submit',e=>{
e.preventDefault();
const inp=document.getElementById('textinput');
const t=inp.value.trim();
if(!t)return;
inp.value='';
status.textContent='Sending...';
fetch('/text',{method:'POST',headers:{'Content-Type':'application/json'},body:JSON.stringify({text:t})})
.then(r=>r.json())
.then(d=>{
if(d.error){status.textContent='Err: '+d.error}
else{status.textContent='✓ '+d.text.slice(0,30); pollOutput()}
})
.catch(e=>{status.textContent='Fail: '+e.message});
});
document.getElementById('cancelBtn').addEventListener('click',()=>{
fetch('/cancel',{method:'POST'}).then(()=>{status.textContent='⌃C sent'}).catch(()=>{});
});
btn.addEventListener('pointerdown',e=>{if(e.pointerId!==undefined)btn.setPointerCapture(e.pointerId);e.preventDefault();start()});
btn.addEventListener('pointerup',stop);
btn.addEventListener('pointercancel',stop);
// Draggable FABs
document.querySelectorAll('.fab').forEach(fab=>{
let dragging=false,ox,oy,sx,sy;
fab.addEventListener('pointerdown',e=>{
ox=e.clientX;oy=e.clientY;
sx=fab.offsetLeft;sy=fab.offsetTop;
dragging=false;
});
fab.addEventListener('pointermove',e=>{
const dx=e.clientX-ox,dy=e.clientY-oy;
if(!dragging&&Math.abs(dx)+Math.abs(dy)>10)dragging=true;
if(dragging){
fab.style.left=(sx+dx)+'px';
fab.style.top=(sy+dy)+'px';
fab.style.right='auto';
fab.style.bottom='auto';
e.preventDefault();
}
});
fab.addEventListener('pointerup',e=>{
if(dragging)e.stopImmediatePropagation();
});
});
</script>
</body>
</html>