# ── Base image ──────────────────────────────────────────────────────────────── # Use CUDA-enabled image so the container can use GPU if available. # Falls back to CPU automatically (same as bare-metal behaviour). FROM pytorch/pytorch:2.2.2-cuda12.1-cudnn8-runtime # ── System deps ─────────────────────────────────────────────────────────────── RUN apt-get update && apt-get install -y --no-install-recommends \ git \ curl \ build-essential \ cmake \ && rm -rf /var/lib/apt/lists/* # ── Working directory ───────────────────────────────────────────────────────── WORKDIR /app # ── Python dependencies ─────────────────────────────────────────────────────── COPY requirements.txt . RUN pip install --upgrade pip && \ pip install --no-cache-dir -r requirements.txt # ── Application code ────────────────────────────────────────────────────────── COPY reranker_server.py . # ── HuggingFace cache ───────────────────────────────────────────────────────── # Point HF libraries at a path inside /app/hf_cache so it can be bind-mounted # from the host directory, making model downloads persist across container runs. ENV HF_HOME=/app/hf_cache ENV SENTENCE_TRANSFORMERS_HOME=/app/hf_cache # Create the directory in the image as a fallback (overridden by the mount). RUN mkdir -p /app/hf_cache # ── Expose port ─────────────────────────────────────────────────────────────── EXPOSE 5200 # ── Entrypoint ──────────────────────────────────────────────────────────────── CMD ["uvicorn", "reranker_server:app", "--host", "0.0.0.0", "--port", "5200"]