| 1234567891011121314151617181920212223242526272829303132333435363738 |
- # ── Base image ────────────────────────────────────────────────────────────────
- # Use CUDA-enabled image so the container can use GPU if available.
- # Falls back to CPU automatically (same as bare-metal behaviour).
- FROM pytorch/pytorch:2.2.2-cuda12.1-cudnn8-runtime
- # ── System deps ───────────────────────────────────────────────────────────────
- RUN apt-get update && apt-get install -y --no-install-recommends \
- git \
- curl \
- build-essential \
- cmake \
- && rm -rf /var/lib/apt/lists/*
- # ── Working directory ─────────────────────────────────────────────────────────
- WORKDIR /app
- # ── Python dependencies ───────────────────────────────────────────────────────
- COPY requirements.txt .
- RUN pip install --upgrade pip && \
- pip install --no-cache-dir -r requirements.txt
- # ── Application code ──────────────────────────────────────────────────────────
- COPY reranker_server.py .
- # ── HuggingFace cache ─────────────────────────────────────────────────────────
- # Point HF libraries at a path inside /app/hf_cache so it can be bind-mounted
- # from the host directory, making model downloads persist across container runs.
- ENV HF_HOME=/app/hf_cache
- ENV SENTENCE_TRANSFORMERS_HOME=/app/hf_cache
- # Create the directory in the image as a fallback (overridden by the mount).
- RUN mkdir -p /app/hf_cache
- # ── Expose port ───────────────────────────────────────────────────────────────
- EXPOSE 5200
- # ── Entrypoint ────────────────────────────────────────────────────────────────
- CMD ["uvicorn", "reranker_server:app", "--host", "0.0.0.0", "--port", "5200"]
|