Dockerfile 2.5 KB

1234567891011121314151617181920212223242526272829303132333435363738
  1. # ── Base image ────────────────────────────────────────────────────────────────
  2. # Use CUDA-enabled image so the container can use GPU if available.
  3. # Falls back to CPU automatically (same as bare-metal behaviour).
  4. FROM pytorch/pytorch:2.2.2-cuda12.1-cudnn8-runtime
  5. # ── System deps ───────────────────────────────────────────────────────────────
  6. RUN apt-get update && apt-get install -y --no-install-recommends \
  7. git \
  8. curl \
  9. build-essential \
  10. cmake \
  11. && rm -rf /var/lib/apt/lists/*
  12. # ── Working directory ─────────────────────────────────────────────────────────
  13. WORKDIR /app
  14. # ── Python dependencies ───────────────────────────────────────────────────────
  15. COPY requirements.txt .
  16. RUN pip install --upgrade pip && \
  17. pip install --no-cache-dir -r requirements.txt
  18. # ── Application code ──────────────────────────────────────────────────────────
  19. COPY reranker_server.py .
  20. # ── HuggingFace cache ─────────────────────────────────────────────────────────
  21. # Point HF libraries at a path inside /app/hf_cache so it can be bind-mounted
  22. # from the host directory, making model downloads persist across container runs.
  23. ENV HF_HOME=/app/hf_cache
  24. ENV SENTENCE_TRANSFORMERS_HOME=/app/hf_cache
  25. # Create the directory in the image as a fallback (overridden by the mount).
  26. RUN mkdir -p /app/hf_cache
  27. # ── Expose port ───────────────────────────────────────────────────────────────
  28. EXPOSE 5200
  29. # ── Entrypoint ────────────────────────────────────────────────────────────────
  30. CMD ["uvicorn", "reranker_server:app", "--host", "0.0.0.0", "--port", "5200"]