Voice app with floating record/cancel buttons and terminal view
This commit is contained in:
8
Dockerfile
Normal file
8
Dockerfile
Normal file
@@ -0,0 +1,8 @@
|
||||
FROM python:3.11-slim
|
||||
RUN apt-get update && apt-get install -y --no-install-recommends tmux docker.io git curl sudo && rm -rf /var/lib/apt/lists/*
|
||||
WORKDIR /app
|
||||
COPY requirements.txt .
|
||||
RUN pip install --no-cache-dir -r requirements.txt
|
||||
COPY app.py .
|
||||
COPY static/ static/
|
||||
CMD ["python", "app.py"]
|
||||
86
app.py
Normal file
86
app.py
Normal file
@@ -0,0 +1,86 @@
|
||||
from flask import Flask, request, jsonify, send_from_directory
|
||||
import subprocess
|
||||
import requests
|
||||
import tempfile
|
||||
import os
|
||||
|
||||
app = Flask(__name__, static_folder="static")
|
||||
WHISPER_URL = os.environ.get("WHISPER_URL", "http://192.168.86.11:8950/v1/audio/transcriptions")
|
||||
WHISPER_MODEL = os.environ.get("WHISPER_MODEL", "small.en")
|
||||
TMUX_SOCKET = "/tmp/tmux-host/default"
|
||||
TMUX_SESSION = os.environ.get("TMUX_SESSION", "kiro")
|
||||
|
||||
|
||||
def tmux(*args):
|
||||
return subprocess.run(["tmux", "-S", TMUX_SOCKET] + list(args), capture_output=True, text=True)
|
||||
|
||||
|
||||
def get_pane_content():
|
||||
r = tmux("capture-pane", "-t", TMUX_SESSION, "-p", "-S", "-50")
|
||||
return r.stdout
|
||||
|
||||
|
||||
@app.route("/")
|
||||
def index():
|
||||
return send_from_directory("static", "index.html")
|
||||
|
||||
|
||||
@app.route("/send", methods=["POST"])
|
||||
def send_voice():
|
||||
audio = request.files.get("audio")
|
||||
if not audio:
|
||||
return jsonify({"error": "no audio"}), 400
|
||||
|
||||
with tempfile.NamedTemporaryFile(suffix=".webm", delete=False) as f:
|
||||
audio.save(f.name)
|
||||
tmp = f.name
|
||||
|
||||
try:
|
||||
resp = requests.post(
|
||||
WHISPER_URL,
|
||||
files={"file": (audio.filename or "audio.webm", open(tmp, "rb"), audio.content_type or "audio/webm")},
|
||||
data={"model": WHISPER_MODEL},
|
||||
)
|
||||
finally:
|
||||
os.unlink(tmp)
|
||||
|
||||
if resp.status_code != 200:
|
||||
return jsonify({"error": f"whisper {resp.status_code}: {resp.text[:200]}"}), 400
|
||||
text = resp.json().get("text", "").strip()
|
||||
|
||||
if not text:
|
||||
return jsonify({"error": "empty transcription"}), 400
|
||||
|
||||
r = tmux("has-session", "-t", TMUX_SESSION)
|
||||
if r.returncode != 0:
|
||||
return jsonify({"error": f"no host tmux session '{TMUX_SESSION}' — start it on the host first"}), 400
|
||||
|
||||
tmux("send-keys", "-t", TMUX_SESSION, text, "Enter")
|
||||
return jsonify({"text": text})
|
||||
|
||||
|
||||
@app.route("/output")
|
||||
def output():
|
||||
return jsonify({"output": get_pane_content()})
|
||||
|
||||
|
||||
@app.route("/cancel", methods=["POST"])
|
||||
def send_cancel():
|
||||
tmux("send-keys", "-t", TMUX_SESSION, "C-c")
|
||||
return jsonify({"ok": True})
|
||||
|
||||
|
||||
@app.route("/text", methods=["POST"])
|
||||
def send_text():
|
||||
text = request.json.get("text", "").strip()
|
||||
if not text:
|
||||
return jsonify({"error": "empty"}), 400
|
||||
r = tmux("has-session", "-t", TMUX_SESSION)
|
||||
if r.returncode != 0:
|
||||
return jsonify({"error": f"no host tmux session '{TMUX_SESSION}'"}), 400
|
||||
tmux("send-keys", "-t", TMUX_SESSION, text, "Enter")
|
||||
return jsonify({"text": text})
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
app.run(host="0.0.0.0", port=8951)
|
||||
17
docker-compose.yml
Normal file
17
docker-compose.yml
Normal file
@@ -0,0 +1,17 @@
|
||||
services:
|
||||
voice-kiro:
|
||||
build: .
|
||||
restart: unless-stopped
|
||||
ports:
|
||||
- "8952:8951"
|
||||
volumes:
|
||||
- /mnt/data/home/brian/.local/bin/kiro-cli:/usr/local/bin/kiro-cli:ro
|
||||
- /mnt/data/home/brian/.local/bin/kiro-cli-chat:/usr/local/bin/kiro-cli-chat:ro
|
||||
- /mnt/data/home/brian/.local/share/kiro-cli:/root/.local/share/kiro-cli
|
||||
- /mnt/data/home/brian/.kiro:/root/.kiro
|
||||
- /mnt/data/home/brian/services:/services
|
||||
- /var/run/docker.sock:/var/run/docker.sock
|
||||
- /tmp/tmux-3000:/tmp/tmux-host
|
||||
environment:
|
||||
- WHISPER_URL=http://192.168.86.11:8950/v1/audio/transcriptions
|
||||
- WHISPER_MODEL=small.en
|
||||
2
requirements.txt
Normal file
2
requirements.txt
Normal file
@@ -0,0 +1,2 @@
|
||||
flask==3.1.1
|
||||
requests==2.32.3
|
||||
131
static/index.html
Normal file
131
static/index.html
Normal file
@@ -0,0 +1,131 @@
|
||||
<!DOCTYPE html>
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta charset="utf-8">
|
||||
<meta name="viewport" content="width=device-width,initial-scale=1,user-scalable=no">
|
||||
<title>Voice → Kiro</title>
|
||||
<style>
|
||||
*{box-sizing:border-box;margin:0;padding:0}
|
||||
body{font-family:system-ui;background:#1a1a2e;color:#eee;display:flex;flex-direction:column;height:100dvh;padding:0}
|
||||
#output{flex:1;width:100%;background:#0f0f1a;padding:.5rem;overflow-y:auto;font-family:monospace;font-size:.7rem;white-space:pre-wrap;color:#ccc}
|
||||
#bottom{width:100%;padding:.4rem;background:#16213e;display:flex;gap:.4rem;align-items:center}
|
||||
#status{font-size:.75rem;color:#aaa;padding:0 .4rem;white-space:nowrap;overflow:hidden;text-overflow:ellipsis;max-width:40%}
|
||||
#textinput{flex:1;padding:.5rem;border-radius:6px;border:1px solid #333;background:#0f0f1a;color:#eee;font-size:.9rem}
|
||||
.fab{position:fixed;border-radius:50%;border:none;color:#fff;font-size:.7rem;cursor:pointer;touch-action:none;user-select:none;z-index:100;display:flex;align-items:center;justify-content:center;text-align:center;box-shadow:0 2px 8px rgba(0,0,0,.5)}
|
||||
#btn{width:70px;height:70px;background:#16213e;bottom:4rem;right:1rem;font-size:.8rem}
|
||||
#btn.recording{background:#e94560;transform:scale(1.1)}
|
||||
#cancelBtn{width:50px;height:50px;background:#e94560;bottom:4rem;left:1rem;font-size:.65rem}
|
||||
</style>
|
||||
</head>
|
||||
<body>
|
||||
<div id="output"></div>
|
||||
<div id="bottom">
|
||||
<span id="status"></span>
|
||||
<form id="textform" autocomplete="off" style="flex:1;display:flex"><input id="textinput" type="text" placeholder="Message..." autocomplete="off"></form>
|
||||
</div>
|
||||
<button id="btn" class="fab">Hold<br>to Talk</button>
|
||||
<button id="cancelBtn" class="fab">⌃C</button>
|
||||
<script>
|
||||
const btn=document.getElementById('btn'),status=document.getElementById('status'),output=document.getElementById('output');
|
||||
let mediaRec,chunks=[];
|
||||
|
||||
function start(){
|
||||
navigator.mediaDevices.getUserMedia({audio:true}).then(stream=>{
|
||||
const mimeType = MediaRecorder.isTypeSupported('audio/webm;codecs=opus') ? 'audio/webm;codecs=opus' : 'audio/mp4';
|
||||
mediaRec=new MediaRecorder(stream,{mimeType});
|
||||
chunks=[];
|
||||
mediaRec.ondataavailable=e=>chunks.push(e.data);
|
||||
mediaRec.onstop=()=>{
|
||||
stream.getTracks().forEach(t=>t.stop());
|
||||
const ext = mediaRec.mimeType.includes('mp4') ? 'mp4' : 'webm';
|
||||
send(new Blob(chunks,{type:mediaRec.mimeType}), ext);
|
||||
};
|
||||
mediaRec.start();
|
||||
btn.classList.add('recording');
|
||||
btn.textContent='●';
|
||||
status.textContent='';
|
||||
}).catch(e=>{status.textContent='Mic: '+e.message});
|
||||
}
|
||||
|
||||
function stop(){
|
||||
if(mediaRec&&mediaRec.state==='recording'){
|
||||
mediaRec.stop();
|
||||
btn.classList.remove('recording');
|
||||
btn.innerHTML='Hold<br>to Talk';
|
||||
}
|
||||
}
|
||||
|
||||
function send(blob, ext){
|
||||
status.textContent='Transcribing...';
|
||||
const fd=new FormData();
|
||||
fd.append('audio',blob,'audio.'+ext);
|
||||
fetch('/send',{method:'POST',body:fd})
|
||||
.then(r=>r.json())
|
||||
.then(d=>{
|
||||
if(d.error){status.textContent='Err: '+d.error}
|
||||
else{status.textContent='✓ '+d.text.slice(0,30); pollOutput()}
|
||||
})
|
||||
.catch(e=>{status.textContent='Fail: '+e.message});
|
||||
}
|
||||
|
||||
function pollOutput(){
|
||||
fetch('/output').then(r=>r.json()).then(d=>{
|
||||
const atBottom=output.scrollHeight-output.scrollTop-output.clientHeight<50;
|
||||
output.textContent=d.output;
|
||||
if(atBottom)output.scrollTop=output.scrollHeight;
|
||||
}).catch(()=>{});
|
||||
}
|
||||
|
||||
setInterval(pollOutput, 1000);
|
||||
pollOutput();
|
||||
|
||||
document.getElementById('textform').addEventListener('submit',e=>{
|
||||
e.preventDefault();
|
||||
const inp=document.getElementById('textinput');
|
||||
const t=inp.value.trim();
|
||||
if(!t)return;
|
||||
inp.value='';
|
||||
status.textContent='Sending...';
|
||||
fetch('/text',{method:'POST',headers:{'Content-Type':'application/json'},body:JSON.stringify({text:t})})
|
||||
.then(r=>r.json())
|
||||
.then(d=>{
|
||||
if(d.error){status.textContent='Err: '+d.error}
|
||||
else{status.textContent='✓ '+d.text.slice(0,30); pollOutput()}
|
||||
})
|
||||
.catch(e=>{status.textContent='Fail: '+e.message});
|
||||
});
|
||||
|
||||
document.getElementById('cancelBtn').addEventListener('click',()=>{
|
||||
fetch('/cancel',{method:'POST'}).then(()=>{status.textContent='⌃C sent'}).catch(()=>{});
|
||||
});
|
||||
|
||||
btn.addEventListener('pointerdown',e=>{if(e.pointerId!==undefined)btn.setPointerCapture(e.pointerId);e.preventDefault();start()});
|
||||
btn.addEventListener('pointerup',stop);
|
||||
btn.addEventListener('pointercancel',stop);
|
||||
|
||||
// Draggable FABs
|
||||
document.querySelectorAll('.fab').forEach(fab=>{
|
||||
let dragging=false,ox,oy,sx,sy;
|
||||
fab.addEventListener('pointerdown',e=>{
|
||||
ox=e.clientX;oy=e.clientY;
|
||||
sx=fab.offsetLeft;sy=fab.offsetTop;
|
||||
dragging=false;
|
||||
});
|
||||
fab.addEventListener('pointermove',e=>{
|
||||
const dx=e.clientX-ox,dy=e.clientY-oy;
|
||||
if(!dragging&&Math.abs(dx)+Math.abs(dy)>10)dragging=true;
|
||||
if(dragging){
|
||||
fab.style.left=(sx+dx)+'px';
|
||||
fab.style.top=(sy+dy)+'px';
|
||||
fab.style.right='auto';
|
||||
fab.style.bottom='auto';
|
||||
e.preventDefault();
|
||||
}
|
||||
});
|
||||
fab.addEventListener('pointerup',e=>{
|
||||
if(dragging)e.stopImmediatePropagation();
|
||||
});
|
||||
});
|
||||
</script>
|
||||
</body>
|
||||
</html>
|
||||
Reference in New Issue
Block a user