# ── Base image ────────────────────────────────────────────────────────────────
# Use CUDA-enabled image so the container can use GPU if available.
# Falls back to CPU automatically (same as bare-metal behaviour).
FROM pytorch/pytorch:2.2.2-cuda12.1-cudnn8-runtime

# ── System deps ───────────────────────────────────────────────────────────────
RUN apt-get update && apt-get install -y --no-install-recommends \
        git \
        curl \
        build-essential \
        cmake \
    && rm -rf /var/lib/apt/lists/*

# ── Working directory ─────────────────────────────────────────────────────────
WORKDIR /app

# ── Python dependencies ───────────────────────────────────────────────────────
COPY requirements.txt .
RUN pip install --upgrade pip && \
    pip install --no-cache-dir -r requirements.txt

# ── Application code ──────────────────────────────────────────────────────────
COPY reranker_server.py .

# ── HuggingFace cache ─────────────────────────────────────────────────────────
# Point HF libraries at a path inside /app/hf_cache so it can be bind-mounted
# from the host directory, making model downloads persist across container runs.
ENV HF_HOME=/app/hf_cache
ENV SENTENCE_TRANSFORMERS_HOME=/app/hf_cache

# Create the directory in the image as a fallback (overridden by the mount).
RUN mkdir -p /app/hf_cache

# ── Expose port ───────────────────────────────────────────────────────────────
EXPOSE 5200

# ── Entrypoint ────────────────────────────────────────────────────────────────
CMD ["uvicorn", "reranker_server:app", "--host", "0.0.0.0", "--port", "5200"]