From 56c203189fb40e21fb794823a37e5ef840e73fd3 Mon Sep 17 00:00:00 2001 From: Viktor Barzin Date: Wed, 21 May 2025 21:30:00 +0000 Subject: [PATCH] dockerize --- crawler/.dockerignore | 1 + crawler/Dockerfile | 44 +++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 45 insertions(+) create mode 100644 crawler/.dockerignore create mode 100644 crawler/Dockerfile diff --git a/crawler/.dockerignore b/crawler/.dockerignore new file mode 100644 index 0000000..1269488 --- /dev/null +++ b/crawler/.dockerignore @@ -0,0 +1 @@ +data diff --git a/crawler/Dockerfile b/crawler/Dockerfile new file mode 100644 index 0000000..ed4dc8d --- /dev/null +++ b/crawler/Dockerfile @@ -0,0 +1,44 @@ +FROM python:3.13-slim + +# Install system dependencies +RUN apt-get update && apt-get install -y --no-install-recommends \ + curl \ + build-essential \ + gcc \ + python3-dev \ + libopencv-dev \ + libgl1 \ + libglib2.0-0 \ + tesseract-ocr \ + tesseract-ocr-eng \ + && rm -rf /var/lib/apt/lists/* + +# Install Poetry +ENV POETRY_HOME=/opt/poetry \ + POETRY_VIRTUALENVS_IN_PROJECT=true \ + POETRY_NO_INTERACTION=1 + +RUN curl -sSL https://install.python-poetry.org | python3 - && \ + cd /usr/local/bin && \ + ln -s /opt/poetry/bin/poetry && \ + poetry --version + +WORKDIR /app + +# Copy only the files needed for dependency installation +COPY pyproject.toml poetry.lock ./ + +# Install dependencies +RUN poetry install --only main --no-root + +ENV PATH="/app/.venv/bin:$PATH" + +# Copy the application code +COPY . . + +# Make the CLI executable +RUN chmod +x /app/main.py + +# Set the entry point (adjust to your CLI's entry point) +# ENTRYPOINT ["python", "/app/main.py"] +ENTRYPOINT ["/app/runall.sh"]