Merge branch 'feature/Docker' into 'wip/h3132'

Feature/docker

See merge request schihei/arxiv-sanity-lite!1
This commit is contained in:
2023-08-30 17:29:57 +00:00
6 changed files with 176 additions and 6 deletions
+34
View File
@@ -0,0 +1,34 @@
# Include any files or directories that you don't want to be copied to your
# container here (e.g., local build artifacts, temporary files, etc.).
#
# For more help, visit the .dockerignore file reference guide at
# https://docs.docker.com/engine/reference/builder/#dockerignore-file
**/.DS_Store
**/__pycache__
**/.venv
**/.classpath
**/.dockerignore
**/.env
**/.git
**/.gitignore
**/.project
**/.settings
**/.toolstarget
**/.vs
**/.vscode
**/*.*proj.user
**/*.dbmdl
**/*.jfm
**/bin
**/charts
**/docker-compose*
**/compose*
**/Dockerfile*
**/node_modules
**/npm-debug.log
**/obj
**/secrets.dev.yaml
**/values.dev.yaml
LICENSE
README.md
+76
View File
@@ -0,0 +1,76 @@
# syntax=docker/dockerfile:1
# Comments are provided throughout this file to help you get started.
# If you need more help, visit the Dockerfile reference guide at
# https://docs.docker.com/engine/reference/builder/
ARG PYTHON_VERSION=3.10.1
FROM python:${PYTHON_VERSION}-slim as base
# Prevents Python from writing pyc files.
ENV PYTHONDONTWRITEBYTECODE=1
# Keeps Python from buffering stdout and stderr to avoid situations where
# the application crashes without emitting any logs due to buffering.
ENV PYTHONUNBUFFERED=1
# Install required binary packages.
RUN apt-get update && apt-get install -y \
imagemagick \
&& rm -rf /var/lib/apt/lists/*
# Add PDF processing to the ImageMagic policy.
RUN sed -i 's/<policy domain="coder" rights="none" pattern="PDF" \/>/<policy domain="coder" rights="read|write" pattern="PDF" \/>/g' /etc/ImageMagick-6/policy.xml
# DEBUG. Only for debug purposes.
RUN apt-get update && apt-get install -y \
curl \
procps \
sudo \
vim \
&& rm -rf /var/lib/apt/lists/*
WORKDIR /app
# Create a non-privileged user that the app will run under.
# See https://docs.docker.com/develop/develop-images/dockerfile_best-practices/#user
ARG UID=10001
#RUN adduser \
# --disabled-password \
# --gecos "" \
# --home "/nonexistent" \
# --shell "/sbin/nologin" \
# --no-create-home \
# --uid "${UID}" \
# appuser
# DEBUG. Only for debug purposes.
RUN useradd -r -u ${UID} -s /sbin/nologin -d /nonexistent appuser
RUN echo "appuser:12345678" | chpasswd
RUN echo 'appuser ALL=(ALL) NOPASSWD:ALL' | tee -a /etc/sudoers
# Upgrade pip
RUN python -m pip install --upgrade pip
# Download dependencies as a separate step to take advantage of Docker's caching.
# Leverage a cache mount to /root/.cache/pip to speed up subsequent builds.
# Leverage a bind mount to requirements.txt to avoid having to copy them into
# into this layer.
RUN --mount=type=cache,target=/root/.cache/pip \
--mount=type=bind,source=requirements.txt,target=requirements.txt \
python -m pip install -r requirements.txt
# Switch to the non-privileged user to run the application.
USER appuser
# Copy the source code into the container.
COPY --chown=appuser . .
# Change file mode(s).
RUN chmod 0744 cron.sh
# Expose the port that the application listens on.
EXPOSE 5000
# Run the application.
CMD export FLASK_APP=serve.py; flask run --host=0.0.0.0
+1 -1
View File
@@ -30,7 +30,7 @@ if __name__ == '__main__':
"""
# query string of papers to look for
q = 'cat:cs.CV+OR+cat:cs.LG+OR+cat:cs.CL+OR+cat:cs.AI+OR+cat:cs.NE+OR+cat:cs.RO'
q = 'cat:cs.CV+OR+cat:cs.LG+OR+cat:cs.CL+OR+cat:cs.AI+OR+cat:cs.NE+OR+cat:cs.RO+OR+cat:cs.AR+OR+cat:cs.DC'
pdb = get_papers_db(flag='c')
mdb = get_metas_db(flag='c')
+49
View File
@@ -0,0 +1,49 @@
# Comments are provided throughout this file to help you get started.
# If you need more help, visit the Docker compose reference guide at
# https://docs.docker.com/compose/compose-file/
# Here the instructions define your application as a service called "server".
# This service is built from the Dockerfile in the current directory.
# You can add other services your application may depend on here, such as a
# database or a cache. For examples, see the Awesome Compose repository:
# https://github.com/docker/awesome-compose
services:
server:
build:
context: .
ports:
- 5000:5000
# The commented out section below is an example of how to define a PostgreSQL
# database that your application can use. `depends_on` tells Docker Compose to
# start the database before your application. The `db-data` volume persists the
# database data between container restarts. The `db-password` secret is used
# to set the database password. You must create `db/password.txt` and add
# a password of your choosing to it before running `docker compose up`.
# depends_on:
# db:
# condition: service_healthy
# db:
# image: postgres
# restart: always
# user: postgres
# secrets:
# - db-password
# volumes:
# - db-data:/var/lib/postgresql/data
# environment:
# - POSTGRES_DB=example
# - POSTGRES_PASSWORD_FILE=/run/secrets/db-password
# expose:
# - 5432
# healthcheck:
# test: [ "CMD", "pg_isready" ]
# interval: 10s
# timeout: 5s
# retries: 5
# volumes:
# db-data:
# secrets:
# db-password:
# file: db/password.txt
+10
View File
@@ -0,0 +1,10 @@
#!/bin/bash
python3 /app/arxiv_daemon.py --num 100
if [ $? -eq 0 ]; then
echo "New papers detected! Running compute.py"
python3 /app/compute.py
else
echo "No new papers were added, skipping feature computation"
fi
+6 -5
View File
@@ -1,5 +1,6 @@
feedparser==6.0.8
Flask==2.0.2
numpy==1.21.4
scikit-learn==1.0.1
sqlitedict==1.7.0
feedparser
flask
numpy
requests
scikit-learn
sqlitedict