Files
SaaS-PDF/backend/Dockerfile

54 lines
1.4 KiB
Docker

FROM python:3.12-slim-bookworm
# Prevent interactive prompts during package installation
ENV DEBIAN_FRONTEND=noninteractive
# Install system dependencies for file processing
RUN apt-get update && apt-get install -y --no-install-recommends \
libreoffice-core \
libreoffice-writer \
libreoffice-calc \
libreoffice-draw \
libreoffice-impress \
ghostscript \
ffmpeg \
libmagic1 \
imagemagick \
tesseract-ocr \
tesseract-ocr-eng \
tesseract-ocr-ara \
tesseract-ocr-fra \
poppler-utils \
default-jre-headless \
curl \
&& apt-get clean \
&& rm -rf /var/lib/apt/lists/*
# Ensure Java is on PATH for tabula-py (extract-tables, pdf-to-excel)
ENV JAVA_HOME=/usr/lib/jvm/default-java
ENV PATH="${JAVA_HOME}/bin:${PATH}"
# Set working directory
WORKDIR /app
# Copy requirements first for Docker layer caching
COPY requirements.txt .
RUN pip install --no-cache-dir -r requirements.txt \
&& python -c "import PyPDF2; print('PyPDF2 OK')"
# Copy application code
COPY . .
# Create temp and persistence directories
RUN mkdir -p /tmp/uploads /tmp/outputs /app/data
# Expose port
EXPOSE 5000
# Health check
HEALTHCHECK --interval=30s --timeout=10s --retries=3 \
CMD curl -f http://localhost:5000/api/health || exit 1
# Run with Gunicorn (--preload ensures DB tables are created once before forking workers)
CMD ["gunicorn", "--bind", "0.0.0.0:5000", "--workers", "4", "--timeout", "120", "--preload", "wsgi:app"]