2 달 전 · e8f88eb57c
--- a/.gitignore
+++ b/.gitignore
@@ -0,0 +1,14 @@
 
															+# Byte-compiled / optimized / DLL files
														
 
															+__pycache__/
														
 
															+*.py[codz]
														
 
															+*$py.class
														
 
															+
														
 
															+# Environments
														
 
															+.env
														
 
															+.envrc
														
 
															+.venv
														
 
															+env/
														
 
															+venv/
														
 
															+ENV/
														
 
															+env.bak/
														
 
															+venv.bak/
														
--- a/README.md
+++ b/README.md
@@ -0,0 +1,3 @@
 
															+# Whisper STT Server
														
 
															+
														
 
															+this first flask version works for me.
														
--- a/server.py
+++ b/server.py
@@ -0,0 +1,171 @@
 
															+#!/usr/bin/env python3
														
 
															+"""
														
 
															+Minimal whisper.cpp HTTP server (one-shot transcription)
														
 
															+
														
 
															+POST /transcribe
														
 
															+  multipart/form-data with field: file=@audio.(ogg|mp3|wav|m4a|...)
														
 
															+Returns:
														
 
															+  { "text": "..." }
														
 
															+
														
 
															+If transcription is empty, it returns diagnostics (stdout/stderr/cmd) to help debug.
														
 
															+
														
 
															+Notes:
														
 
															+- This version forces English (-l en). Change to "de" if needed.
														
 
															+- It converts any input to 16kHz mono WAV via ffmpeg for robustness.
														
 
															+"""
														
 
															+
														
 
															+from flask import Flask, request, jsonify
														
 
															+import subprocess
														
 
															+import os
														
 
															+import tempfile
														
 
															+
														
 
															+app = Flask(__name__)
														
 
															+
														
 
															+# Adjust these paths for your machine:
														
 
															+WHISPER_BIN = os.path.expanduser("~/whisper.cpp/build/bin/whisper-cli")  # e.g. ~/whisper.cpp/main or ~/whisper.cpp/build/bin/whisper-cli
														
 
															+MODEL_PATH = os.path.expanduser("~/whisper.cpp/models/ggml-small.bin")
														
 
															+
														
 
															+# Language to force; set to None to let whisper auto-detect (I recommend forcing for reliability)
														
 
															+FORCE_LANG = "en"   # "en" or "de" etc.
														
 
															+
														
 
															+# ffmpeg settings for stable whisper input
														
 
															+WAV_AR = "16000"
														
 
															+WAV_AC = "1"
														
 
															+
														
 
															+
														
 
															+def _run(cmd, timeout=300):
														
 
															+    """Run subprocess and return (returncode, stdout_str, stderr_str)."""
														
 
															+    r = subprocess.run(
														
 
															+        cmd,
														
 
															+        stdout=subprocess.PIPE,
														
 
															+        stderr=subprocess.PIPE,
														
 
															+        timeout=timeout,
														
 
															+    )
														
 
															+    return (
														
 
															+        r.returncode,
														
 
															+        r.stdout.decode("utf-8", errors="ignore"),
														
 
															+        r.stderr.decode("utf-8", errors="ignore"),
														
 
															+    )
														
 
															+
														
 
															+
														
 
															+@app.get("/health")
														
 
															+def health():
														
 
															+    ok = True
														
 
															+    problems = []
														
 
															+
														
 
															+    if not os.path.exists(WHISPER_BIN):
														
 
															+        ok = False
														
 
															+        problems.append(f"WHISPER_BIN not found: {WHISPER_BIN}")
														
 
															+    if not os.path.exists(MODEL_PATH):
														
 
															+        ok = False
														
 
															+        problems.append(f"MODEL_PATH not found: {MODEL_PATH}")
														
 
															+
														
 
															+    # Check ffmpeg availability
														
 
															+    try:
														
 
															+        code, out, err = _run(["ffmpeg", "-version"], timeout=10)
														
 
															+        if code != 0:
														
 
															+            ok = False
														
 
															+            problems.append("ffmpeg not working")
														
 
															+    except Exception as e:
														
 
															+        ok = False
														
 
															+        problems.append(f"ffmpeg check failed: {e}")
														
 
															+
														
 
															+    return jsonify({"ok": ok, "problems": problems})
														
 
															+
														
 
															+
														
 
															+@app.post("/transcribe")
														
 
															+def transcribe():
														
 
															+    if "file" not in request.files:
														
 
															+        return jsonify({"error": "no file field; expected multipart/form-data with field 'file'"}), 400
														
 
															+
														
 
															+    up = request.files["file"]
														
 
															+    if not up.filename:
														
 
															+        return jsonify({"error": "empty filename"}), 400
														
 
															+
														
 
															+    # Store uploaded audio to temp
														
 
															+    in_tmp = tempfile.NamedTemporaryFile(delete=False, suffix=os.path.splitext(up.filename)[1] or ".bin")
														
 
															+    up.save(in_tmp.name)
														
 
															+    in_tmp.close()
														
 
															+
														
 
															+    # Convert to WAV 16kHz mono (whisper-friendly)
														
 
															+    wav_tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".wav")
														
 
															+    wav_tmp.close()
														
 
															+
														
 
															+    # Where whisper.cpp will write output .txt (we set -of explicitly)
														
 
															+    out_base = tempfile.NamedTemporaryFile(delete=False)
														
 
															+    out_base.close()
														
 
															+    os.unlink(out_base.name)  # we only want the path, whisper will create files
														
 
															+
														
 
															+    try:
														
 
															+        # 1) Convert input -> wav
														
 
															+        ffmpeg_cmd = [
														
 
															+            "ffmpeg",
														
 
															+            "-hide_banner",
														
 
															+            "-loglevel", "error",
														
 
															+            "-y",
														
 
															+            "-i", in_tmp.name,
														
 
															+            "-ar", WAV_AR,
														
 
															+            "-ac", WAV_AC,
														
 
															+            "-c:a", "pcm_s16le",
														
 
															+            wav_tmp.name,
														
 
															+        ]
														
 
															+        ff_code, ff_out, ff_err = _run(ffmpeg_cmd, timeout=120)
														
 
															+        if ff_code != 0:
														
 
															+            return jsonify({
														
 
															+                "error": "ffmpeg conversion failed",
														
 
															+                "returncode": ff_code,
														
 
															+                "stderr": ff_err[-4000:],
														
 
															+                "cmd": ffmpeg_cmd,
														
 
															+            }), 500
														
 
															+
														
 
															+        # 2) Run whisper.cpp
														
 
															+        # whisper.cpp flags vary slightly by build; these are commonly supported in whisper.cpp "main"
														
 
															+        whisper_cmd = [
														
 
															+            WHISPER_BIN,
														
 
															+            "-m", MODEL_PATH,
														
 
															+            "-f", wav_tmp.name,
														
 
															+            "-otxt",
														
 
															+            "-of", out_base.name,
														
 
															+	    "-nt"
														
 
															+        ]
														
 
															+        if FORCE_LANG:
														
 
															+            whisper_cmd += ["-l", FORCE_LANG]
														
 
															+
														
 
															+        w_code, w_out, w_err = _run(whisper_cmd, timeout=300)
														
 
															+
														
 
															+        txt_path = out_base.name + ".txt"
														
 
															+        text = ""
														
 
															+        if os.path.exists(txt_path):
														
 
															+            with open(txt_path, "r", encoding="utf-8") as fh:
														
 
															+                text = fh.read().strip()
														
 
															+
														
 
															+        # If empty, return diagnostics (don’t hide it behind a 500)
														
 
															+        if not text:
														
 
															+            return jsonify({
														
 
															+                "text": "",
														
 
															+                "note": "empty transcript; returning diagnostics",
														
 
															+                "returncode": w_code,
														
 
															+                "stdout": w_out[-4000:],
														
 
															+                "stderr": w_err[-4000:],
														
 
															+                "cmd": whisper_cmd,
														
 
															+            }), 200
														
 
															+
														
 
															+        return jsonify({"text": text}), 200
														
 
															+
														
 
															+    except subprocess.TimeoutExpired:
														
 
															+        return jsonify({"error": "timeout"}), 504
														
 
															+    except Exception as e:
														
 
															+        return jsonify({"error": f"server exception: {e}"}), 500
														
 
															+    finally:
														
 
															+        # cleanup
														
 
															+        for p in [in_tmp.name, wav_tmp.name, out_base.name + ".txt", out_base.name + ".json", out_base.name + ".srt", out_base.name + ".vtt"]:
														
 
															+            try:
														
 
															+                if os.path.exists(p):
														
 
															+                    os.unlink(p)
														
 
															+            except Exception:
														
 
															+                pass
														
 
															+
														
 
															+
														
 
															+if __name__ == "__main__":
														
 
															+    # Listen on LAN
														
 
															+    app.run(host="0.0.0.0", port=5005)