Browse Source

first commit

Lukas Goldschmidt 1 week ago
commit
0c481ee18b
4 changed files with 66 additions and 0 deletions
  1. 50 0
      Dockerfile
  2. 1 0
      build.sh
  3. 2 0
      run.sh
  4. 13 0
      tts_server.py

+ 50 - 0
Dockerfile

@@ -0,0 +1,50 @@
+FROM nvidia/cuda:11.8.0-runtime-ubuntu22.04
+
+ENV DEBIAN_FRONTEND=noninteractive
+ENV PYTHONUNBUFFERED=1
+ENV COQUI_TTS_CACHE=/models
+ENV TORCH_HOME=/models
+
+WORKDIR /app
+
+# Ubuntu repositories
+RUN printf 'deb http://archive.ubuntu.com/ubuntu jammy main universe\n\
+deb http://archive.ubuntu.com/ubuntu jammy-updates main universe\n\
+deb http://archive.ubuntu.com/ubuntu jammy-security main universe\n' > /etc/apt/sources.list
+
+# System dependencies
+RUN apt-get update && apt-get install -y \
+    python3 \
+    python3-pip \
+    git \
+    ffmpeg \
+    espeak-ng \
+    libsndfile1 \
+    && rm -rf /var/lib/apt/lists/*
+
+# Upgrade pip
+RUN pip3 install --no-cache-dir --upgrade pip
+
+# CUDA-enabled PyTorch
+RUN pip3 install --no-cache-dir \
+    torch \
+    torchvision \
+    torchaudio \
+    --index-url https://download.pytorch.org/whl/cu118
+
+# Install Coqui TTS + server
+RUN pip3 install --no-cache-dir \
+    TTS[all] \
+    fastapi \
+    uvicorn
+
+# Create directories
+RUN mkdir -p /models /voices
+
+# Ports
+EXPOSE 5002
+
+# Optional volume mounts
+VOLUME ["/models", "/voices"]
+
+ENTRYPOINT ["python3", "-m", "uvicorn", "tts_server:app", "--host", "0.0.0.0", "--port", "5002"]

+ 1 - 0
build.sh

@@ -0,0 +1 @@
+docker build -t coqui-tts-server . 

+ 2 - 0
run.sh

@@ -0,0 +1,2 @@
+docker run -d --gpus all -p 5002:5002  $(pwd)/tts_server.py:/app/tts_server.py  -v $(pwd)/models:/models  -v $(pwd)/voices:/voices  --name coqui-tts-server coqui-tts-server
+

+ 13 - 0
tts_server.py

@@ -0,0 +1,13 @@
+from fastapi import FastAPI, Query
+from fastapi.responses import FileResponse
+from TTS.api import TTS
+import tempfile
+
+app = FastAPI()
+tts = TTS(model_name="tts_models/en/ljspeech/vits", gpu=True)
+
+@app.get("/api/tts")
+def synth(text: str = Query(...)):
+    tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".wav")
+    tts.tts_to_file(text=text, file_path=tmp.name)
+    return FileResponse(tmp.name, media_type="audio/wav", filename="speech.wav")