|
@@ -0,0 +1,171 @@
|
|
|
|
|
+#!/usr/bin/env python3
|
|
|
|
|
+"""
|
|
|
|
|
+Minimal whisper.cpp HTTP server (one-shot transcription)
|
|
|
|
|
+
|
|
|
|
|
+POST /transcribe
|
|
|
|
|
+ multipart/form-data with field: file=@audio.(ogg|mp3|wav|m4a|...)
|
|
|
|
|
+Returns:
|
|
|
|
|
+ { "text": "..." }
|
|
|
|
|
+
|
|
|
|
|
+If transcription is empty, it returns diagnostics (stdout/stderr/cmd) to help debug.
|
|
|
|
|
+
|
|
|
|
|
+Notes:
|
|
|
|
|
+- This version forces English (-l en). Change to "de" if needed.
|
|
|
|
|
+- It converts any input to 16kHz mono WAV via ffmpeg for robustness.
|
|
|
|
|
+"""
|
|
|
|
|
+
|
|
|
|
|
+from flask import Flask, request, jsonify
|
|
|
|
|
+import subprocess
|
|
|
|
|
+import os
|
|
|
|
|
+import tempfile
|
|
|
|
|
+
|
|
|
|
|
+app = Flask(__name__)
|
|
|
|
|
+
|
|
|
|
|
+# Adjust these paths for your machine:
|
|
|
|
|
+WHISPER_BIN = os.path.expanduser("~/whisper.cpp/build/bin/whisper-cli") # e.g. ~/whisper.cpp/main or ~/whisper.cpp/build/bin/whisper-cli
|
|
|
|
|
+MODEL_PATH = os.path.expanduser("~/whisper.cpp/models/ggml-small.bin")
|
|
|
|
|
+
|
|
|
|
|
+# Language to force; set to None to let whisper auto-detect (I recommend forcing for reliability)
|
|
|
|
|
+FORCE_LANG = "en" # "en" or "de" etc.
|
|
|
|
|
+
|
|
|
|
|
+# ffmpeg settings for stable whisper input
|
|
|
|
|
+WAV_AR = "16000"
|
|
|
|
|
+WAV_AC = "1"
|
|
|
|
|
+
|
|
|
|
|
+
|
|
|
|
|
+def _run(cmd, timeout=300):
|
|
|
|
|
+ """Run subprocess and return (returncode, stdout_str, stderr_str)."""
|
|
|
|
|
+ r = subprocess.run(
|
|
|
|
|
+ cmd,
|
|
|
|
|
+ stdout=subprocess.PIPE,
|
|
|
|
|
+ stderr=subprocess.PIPE,
|
|
|
|
|
+ timeout=timeout,
|
|
|
|
|
+ )
|
|
|
|
|
+ return (
|
|
|
|
|
+ r.returncode,
|
|
|
|
|
+ r.stdout.decode("utf-8", errors="ignore"),
|
|
|
|
|
+ r.stderr.decode("utf-8", errors="ignore"),
|
|
|
|
|
+ )
|
|
|
|
|
+
|
|
|
|
|
+
|
|
|
|
|
+@app.get("/health")
|
|
|
|
|
+def health():
|
|
|
|
|
+ ok = True
|
|
|
|
|
+ problems = []
|
|
|
|
|
+
|
|
|
|
|
+ if not os.path.exists(WHISPER_BIN):
|
|
|
|
|
+ ok = False
|
|
|
|
|
+ problems.append(f"WHISPER_BIN not found: {WHISPER_BIN}")
|
|
|
|
|
+ if not os.path.exists(MODEL_PATH):
|
|
|
|
|
+ ok = False
|
|
|
|
|
+ problems.append(f"MODEL_PATH not found: {MODEL_PATH}")
|
|
|
|
|
+
|
|
|
|
|
+ # Check ffmpeg availability
|
|
|
|
|
+ try:
|
|
|
|
|
+ code, out, err = _run(["ffmpeg", "-version"], timeout=10)
|
|
|
|
|
+ if code != 0:
|
|
|
|
|
+ ok = False
|
|
|
|
|
+ problems.append("ffmpeg not working")
|
|
|
|
|
+ except Exception as e:
|
|
|
|
|
+ ok = False
|
|
|
|
|
+ problems.append(f"ffmpeg check failed: {e}")
|
|
|
|
|
+
|
|
|
|
|
+ return jsonify({"ok": ok, "problems": problems})
|
|
|
|
|
+
|
|
|
|
|
+
|
|
|
|
|
+@app.post("/transcribe")
|
|
|
|
|
+def transcribe():
|
|
|
|
|
+ if "file" not in request.files:
|
|
|
|
|
+ return jsonify({"error": "no file field; expected multipart/form-data with field 'file'"}), 400
|
|
|
|
|
+
|
|
|
|
|
+ up = request.files["file"]
|
|
|
|
|
+ if not up.filename:
|
|
|
|
|
+ return jsonify({"error": "empty filename"}), 400
|
|
|
|
|
+
|
|
|
|
|
+ # Store uploaded audio to temp
|
|
|
|
|
+ in_tmp = tempfile.NamedTemporaryFile(delete=False, suffix=os.path.splitext(up.filename)[1] or ".bin")
|
|
|
|
|
+ up.save(in_tmp.name)
|
|
|
|
|
+ in_tmp.close()
|
|
|
|
|
+
|
|
|
|
|
+ # Convert to WAV 16kHz mono (whisper-friendly)
|
|
|
|
|
+ wav_tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".wav")
|
|
|
|
|
+ wav_tmp.close()
|
|
|
|
|
+
|
|
|
|
|
+ # Where whisper.cpp will write output .txt (we set -of explicitly)
|
|
|
|
|
+ out_base = tempfile.NamedTemporaryFile(delete=False)
|
|
|
|
|
+ out_base.close()
|
|
|
|
|
+ os.unlink(out_base.name) # we only want the path, whisper will create files
|
|
|
|
|
+
|
|
|
|
|
+ try:
|
|
|
|
|
+ # 1) Convert input -> wav
|
|
|
|
|
+ ffmpeg_cmd = [
|
|
|
|
|
+ "ffmpeg",
|
|
|
|
|
+ "-hide_banner",
|
|
|
|
|
+ "-loglevel", "error",
|
|
|
|
|
+ "-y",
|
|
|
|
|
+ "-i", in_tmp.name,
|
|
|
|
|
+ "-ar", WAV_AR,
|
|
|
|
|
+ "-ac", WAV_AC,
|
|
|
|
|
+ "-c:a", "pcm_s16le",
|
|
|
|
|
+ wav_tmp.name,
|
|
|
|
|
+ ]
|
|
|
|
|
+ ff_code, ff_out, ff_err = _run(ffmpeg_cmd, timeout=120)
|
|
|
|
|
+ if ff_code != 0:
|
|
|
|
|
+ return jsonify({
|
|
|
|
|
+ "error": "ffmpeg conversion failed",
|
|
|
|
|
+ "returncode": ff_code,
|
|
|
|
|
+ "stderr": ff_err[-4000:],
|
|
|
|
|
+ "cmd": ffmpeg_cmd,
|
|
|
|
|
+ }), 500
|
|
|
|
|
+
|
|
|
|
|
+ # 2) Run whisper.cpp
|
|
|
|
|
+ # whisper.cpp flags vary slightly by build; these are commonly supported in whisper.cpp "main"
|
|
|
|
|
+ whisper_cmd = [
|
|
|
|
|
+ WHISPER_BIN,
|
|
|
|
|
+ "-m", MODEL_PATH,
|
|
|
|
|
+ "-f", wav_tmp.name,
|
|
|
|
|
+ "-otxt",
|
|
|
|
|
+ "-of", out_base.name,
|
|
|
|
|
+ "-nt"
|
|
|
|
|
+ ]
|
|
|
|
|
+ if FORCE_LANG:
|
|
|
|
|
+ whisper_cmd += ["-l", FORCE_LANG]
|
|
|
|
|
+
|
|
|
|
|
+ w_code, w_out, w_err = _run(whisper_cmd, timeout=300)
|
|
|
|
|
+
|
|
|
|
|
+ txt_path = out_base.name + ".txt"
|
|
|
|
|
+ text = ""
|
|
|
|
|
+ if os.path.exists(txt_path):
|
|
|
|
|
+ with open(txt_path, "r", encoding="utf-8") as fh:
|
|
|
|
|
+ text = fh.read().strip()
|
|
|
|
|
+
|
|
|
|
|
+ # If empty, return diagnostics (don’t hide it behind a 500)
|
|
|
|
|
+ if not text:
|
|
|
|
|
+ return jsonify({
|
|
|
|
|
+ "text": "",
|
|
|
|
|
+ "note": "empty transcript; returning diagnostics",
|
|
|
|
|
+ "returncode": w_code,
|
|
|
|
|
+ "stdout": w_out[-4000:],
|
|
|
|
|
+ "stderr": w_err[-4000:],
|
|
|
|
|
+ "cmd": whisper_cmd,
|
|
|
|
|
+ }), 200
|
|
|
|
|
+
|
|
|
|
|
+ return jsonify({"text": text}), 200
|
|
|
|
|
+
|
|
|
|
|
+ except subprocess.TimeoutExpired:
|
|
|
|
|
+ return jsonify({"error": "timeout"}), 504
|
|
|
|
|
+ except Exception as e:
|
|
|
|
|
+ return jsonify({"error": f"server exception: {e}"}), 500
|
|
|
|
|
+ finally:
|
|
|
|
|
+ # cleanup
|
|
|
|
|
+ for p in [in_tmp.name, wav_tmp.name, out_base.name + ".txt", out_base.name + ".json", out_base.name + ".srt", out_base.name + ".vtt"]:
|
|
|
|
|
+ try:
|
|
|
|
|
+ if os.path.exists(p):
|
|
|
|
|
+ os.unlink(p)
|
|
|
|
|
+ except Exception:
|
|
|
|
|
+ pass
|
|
|
|
|
+
|
|
|
|
|
+
|
|
|
|
|
+if __name__ == "__main__":
|
|
|
|
|
+ # Listen on LAN
|
|
|
|
|
+ app.run(host="0.0.0.0", port=5005)
|