diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 0000000..3edb0b5 --- /dev/null +++ b/.dockerignore @@ -0,0 +1,34 @@ +# Include any files or directories that you don't want to be copied to your +# container here (e.g., local build artifacts, temporary files, etc.). +# +# For more help, visit the .dockerignore file reference guide at +# https://docs.docker.com/engine/reference/builder/#dockerignore-file + +**/.DS_Store +**/__pycache__ +**/.venv +**/.classpath +**/.dockerignore +**/.env +**/.git +**/.gitignore +**/.project +**/.settings +**/.toolstarget +**/.vs +**/.vscode +**/*.*proj.user +**/*.dbmdl +**/*.jfm +**/bin +**/charts +**/docker-compose* +**/compose* +**/Dockerfile* +**/node_modules +**/npm-debug.log +**/obj +**/secrets.dev.yaml +**/values.dev.yaml +LICENSE +README.md diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..e20bc8c --- /dev/null +++ b/Dockerfile @@ -0,0 +1,76 @@ +# syntax=docker/dockerfile:1 + +# Comments are provided throughout this file to help you get started. +# If you need more help, visit the Dockerfile reference guide at +# https://docs.docker.com/engine/reference/builder/ + +ARG PYTHON_VERSION=3.10.1 +FROM python:${PYTHON_VERSION}-slim as base + +# Prevents Python from writing pyc files. +ENV PYTHONDONTWRITEBYTECODE=1 + +# Keeps Python from buffering stdout and stderr to avoid situations where +# the application crashes without emitting any logs due to buffering. +ENV PYTHONUNBUFFERED=1 + +# Install required binary packages. +RUN apt-get update && apt-get install -y \ + imagemagick \ + && rm -rf /var/lib/apt/lists/* + +# Add PDF processing to the ImageMagic policy. +RUN sed -i 's///g' /etc/ImageMagick-6/policy.xml + +# DEBUG. Only for debug purposes. +RUN apt-get update && apt-get install -y \ + curl \ + procps \ + sudo \ + vim \ + && rm -rf /var/lib/apt/lists/* + +WORKDIR /app + +# Create a non-privileged user that the app will run under. +# See https://docs.docker.com/develop/develop-images/dockerfile_best-practices/#user +ARG UID=10001 +#RUN adduser \ +# --disabled-password \ +# --gecos "" \ +# --home "/nonexistent" \ +# --shell "/sbin/nologin" \ +# --no-create-home \ +# --uid "${UID}" \ +# appuser + +# DEBUG. Only for debug purposes. +RUN useradd -r -u ${UID} -s /sbin/nologin -d /nonexistent appuser +RUN echo "appuser:12345678" | chpasswd +RUN echo 'appuser ALL=(ALL) NOPASSWD:ALL' | tee -a /etc/sudoers + +# Upgrade pip +RUN python -m pip install --upgrade pip + +# Download dependencies as a separate step to take advantage of Docker's caching. +# Leverage a cache mount to /root/.cache/pip to speed up subsequent builds. +# Leverage a bind mount to requirements.txt to avoid having to copy them into +# into this layer. +RUN --mount=type=cache,target=/root/.cache/pip \ + --mount=type=bind,source=requirements.txt,target=requirements.txt \ + python -m pip install -r requirements.txt + +# Switch to the non-privileged user to run the application. +USER appuser + +# Copy the source code into the container. +COPY --chown=appuser . . + +# Change file mode(s). +RUN chmod 0744 cron.sh + +# Expose the port that the application listens on. +EXPOSE 5000 + +# Run the application. +CMD export FLASK_APP=serve.py; flask run --host=0.0.0.0 diff --git a/arxiv_daemon.py b/arxiv_daemon.py index 6ce68c7..4239e79 100644 --- a/arxiv_daemon.py +++ b/arxiv_daemon.py @@ -30,7 +30,7 @@ if __name__ == '__main__': """ # query string of papers to look for - q = 'cat:cs.CV+OR+cat:cs.LG+OR+cat:cs.CL+OR+cat:cs.AI+OR+cat:cs.NE+OR+cat:cs.RO' + q = 'cat:cs.CV+OR+cat:cs.LG+OR+cat:cs.CL+OR+cat:cs.AI+OR+cat:cs.NE+OR+cat:cs.RO+OR+cat:cs.AR+OR+cat:cs.DC' pdb = get_papers_db(flag='c') mdb = get_metas_db(flag='c') diff --git a/compose.yaml b/compose.yaml new file mode 100644 index 0000000..94e7d9c --- /dev/null +++ b/compose.yaml @@ -0,0 +1,49 @@ +# Comments are provided throughout this file to help you get started. +# If you need more help, visit the Docker compose reference guide at +# https://docs.docker.com/compose/compose-file/ + +# Here the instructions define your application as a service called "server". +# This service is built from the Dockerfile in the current directory. +# You can add other services your application may depend on here, such as a +# database or a cache. For examples, see the Awesome Compose repository: +# https://github.com/docker/awesome-compose +services: + server: + build: + context: . + ports: + - 5000:5000 + +# The commented out section below is an example of how to define a PostgreSQL +# database that your application can use. `depends_on` tells Docker Compose to +# start the database before your application. The `db-data` volume persists the +# database data between container restarts. The `db-password` secret is used +# to set the database password. You must create `db/password.txt` and add +# a password of your choosing to it before running `docker compose up`. +# depends_on: +# db: +# condition: service_healthy +# db: +# image: postgres +# restart: always +# user: postgres +# secrets: +# - db-password +# volumes: +# - db-data:/var/lib/postgresql/data +# environment: +# - POSTGRES_DB=example +# - POSTGRES_PASSWORD_FILE=/run/secrets/db-password +# expose: +# - 5432 +# healthcheck: +# test: [ "CMD", "pg_isready" ] +# interval: 10s +# timeout: 5s +# retries: 5 +# volumes: +# db-data: +# secrets: +# db-password: +# file: db/password.txt + diff --git a/cron.sh b/cron.sh new file mode 100644 index 0000000..8ba2dc1 --- /dev/null +++ b/cron.sh @@ -0,0 +1,10 @@ +#!/bin/bash + +python3 /app/arxiv_daemon.py --num 100 + +if [ $? -eq 0 ]; then + echo "New papers detected! Running compute.py" + python3 /app/compute.py +else + echo "No new papers were added, skipping feature computation" +fi diff --git a/requirements.txt b/requirements.txt index 304d2b4..b2ef35a 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,5 +1,6 @@ -feedparser==6.0.8 -Flask==2.0.2 -numpy==1.21.4 -scikit-learn==1.0.1 -sqlitedict==1.7.0 +feedparser +flask +numpy +requests +scikit-learn +sqlitedict