Compare commits
25 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| bc3491d8f8 | |||
| 2a3172850e | |||
| d79b0b5b5e | |||
| 02ee89dcc5 | |||
| 4c5fbe4786 | |||
| 696d81f49a | |||
| 695c8d1c22 | |||
| 514c2a929d | |||
| 169db54df2 | |||
| 6a4b1176c6 | |||
| f58cbcc98c | |||
| 9d3ad6a896 | |||
| d1c485240c | |||
| 781e9099cb | |||
| b17e98b3f4 | |||
| dd37f6689a | |||
| e324bb91b1 | |||
| 8dc5f4ef0c | |||
| 2e85c9075c | |||
| 6eb4cfc56b | |||
| f181ae609a | |||
| 381b4ba7ff | |||
| bee3df79f4 | |||
| 1aff234cf6 | |||
| aeb7ecf96a |
@@ -0,0 +1,34 @@
|
|||||||
|
# Include any files or directories that you don't want to be copied to your
|
||||||
|
# container here (e.g., local build artifacts, temporary files, etc.).
|
||||||
|
#
|
||||||
|
# For more help, visit the .dockerignore file reference guide at
|
||||||
|
# https://docs.docker.com/engine/reference/builder/#dockerignore-file
|
||||||
|
|
||||||
|
**/.DS_Store
|
||||||
|
**/__pycache__
|
||||||
|
**/.venv
|
||||||
|
**/.classpath
|
||||||
|
**/.dockerignore
|
||||||
|
**/.env
|
||||||
|
**/.git
|
||||||
|
**/.gitignore
|
||||||
|
**/.project
|
||||||
|
**/.settings
|
||||||
|
**/.toolstarget
|
||||||
|
**/.vs
|
||||||
|
**/.vscode
|
||||||
|
**/*.*proj.user
|
||||||
|
**/*.dbmdl
|
||||||
|
**/*.jfm
|
||||||
|
**/bin
|
||||||
|
**/charts
|
||||||
|
**/docker-compose*
|
||||||
|
**/compose*
|
||||||
|
**/Dockerfile*
|
||||||
|
**/node_modules
|
||||||
|
**/npm-debug.log
|
||||||
|
**/obj
|
||||||
|
**/secrets.dev.yaml
|
||||||
|
**/values.dev.yaml
|
||||||
|
LICENSE
|
||||||
|
README.md
|
||||||
+82
@@ -0,0 +1,82 @@
|
|||||||
|
# syntax=docker/dockerfile:1
|
||||||
|
|
||||||
|
# Comments are provided throughout this file to help you get started.
|
||||||
|
# If you need more help, visit the Dockerfile reference guide at
|
||||||
|
# https://docs.docker.com/engine/reference/builder/
|
||||||
|
|
||||||
|
ARG PYTHON_VERSION=3.10.1
|
||||||
|
FROM python:${PYTHON_VERSION}-slim as base
|
||||||
|
|
||||||
|
# Prevents Python from writing pyc files.
|
||||||
|
ENV PYTHONDONTWRITEBYTECODE=1
|
||||||
|
|
||||||
|
# Keeps Python from buffering stdout and stderr to avoid situations where
|
||||||
|
# the application crashes without emitting any logs due to buffering.
|
||||||
|
ENV PYTHONUNBUFFERED=1
|
||||||
|
|
||||||
|
ENV ARXIV_QUERY=""
|
||||||
|
ENV SMTP_ADDRESS=""
|
||||||
|
ENV SMTP_PORT=""
|
||||||
|
ENV SMTP_USER_NAME=""
|
||||||
|
ENV SMTP_PASSWORD=""
|
||||||
|
|
||||||
|
# Install required binary packages.
|
||||||
|
RUN apt-get update && apt-get install -y \
|
||||||
|
imagemagick \
|
||||||
|
&& rm -rf /var/lib/apt/lists/*
|
||||||
|
|
||||||
|
# Add PDF processing to the ImageMagic policy.
|
||||||
|
RUN sed -i 's/<policy domain="coder" rights="none" pattern="PDF" \/>/<policy domain="coder" rights="read|write" pattern="PDF" \/>/g' /etc/ImageMagick-6/policy.xml
|
||||||
|
|
||||||
|
# DEBUG. Only for debug purposes.
|
||||||
|
RUN apt-get update && apt-get install -y \
|
||||||
|
curl \
|
||||||
|
procps \
|
||||||
|
sudo \
|
||||||
|
vim \
|
||||||
|
&& rm -rf /var/lib/apt/lists/*
|
||||||
|
|
||||||
|
WORKDIR /app
|
||||||
|
|
||||||
|
# Create a non-privileged user that the app will run under.
|
||||||
|
# See https://docs.docker.com/develop/develop-images/dockerfile_best-practices/#user
|
||||||
|
ARG UID=10001
|
||||||
|
#RUN adduser \
|
||||||
|
# --disabled-password \
|
||||||
|
# --gecos "" \
|
||||||
|
# --home "/nonexistent" \
|
||||||
|
# --shell "/sbin/nologin" \
|
||||||
|
# --no-create-home \
|
||||||
|
# --uid "${UID}" \
|
||||||
|
# appuser
|
||||||
|
|
||||||
|
# DEBUG. Only for debug purposes.
|
||||||
|
RUN useradd -r -u ${UID} -s /sbin/nologin -d /nonexistent appuser
|
||||||
|
# RUN echo "appuser:12345678" | chpasswd
|
||||||
|
# RUN echo 'appuser ALL=(ALL) NOPASSWD:ALL' | tee -a /etc/sudoers
|
||||||
|
|
||||||
|
# Upgrade pip
|
||||||
|
RUN python -m pip install --upgrade pip
|
||||||
|
|
||||||
|
# Download dependencies as a separate step to take advantage of Docker's caching.
|
||||||
|
# Leverage a cache mount to /root/.cache/pip to speed up subsequent builds.
|
||||||
|
# Leverage a bind mount to requirements.txt to avoid having to copy them into
|
||||||
|
# into this layer.
|
||||||
|
RUN --mount=type=cache,target=/root/.cache/pip \
|
||||||
|
--mount=type=bind,source=requirements.txt,target=requirements.txt \
|
||||||
|
python -m pip install -r requirements.txt
|
||||||
|
|
||||||
|
# Switch to the non-privileged user to run the application.
|
||||||
|
USER appuser
|
||||||
|
|
||||||
|
# Copy the source code into the container.
|
||||||
|
COPY --chown=appuser . .
|
||||||
|
|
||||||
|
# Change file mode(s).
|
||||||
|
RUN chmod 0744 cron.sh
|
||||||
|
|
||||||
|
# Expose the port that the application listens on.
|
||||||
|
EXPOSE 5000
|
||||||
|
|
||||||
|
# Run the application.
|
||||||
|
CMD export FLASK_APP=serve.py; flask run --host=0.0.0.0
|
||||||
+5
-1
@@ -9,6 +9,7 @@ import time
|
|||||||
import random
|
import random
|
||||||
import logging
|
import logging
|
||||||
import argparse
|
import argparse
|
||||||
|
import os
|
||||||
|
|
||||||
from aslite.arxiv import get_response, parse_response
|
from aslite.arxiv import get_response, parse_response
|
||||||
from aslite.db import get_papers_db, get_metas_db
|
from aslite.db import get_papers_db, get_metas_db
|
||||||
@@ -30,7 +31,10 @@ if __name__ == '__main__':
|
|||||||
"""
|
"""
|
||||||
|
|
||||||
# query string of papers to look for
|
# query string of papers to look for
|
||||||
q = 'cat:cs.CV+OR+cat:cs.LG+OR+cat:cs.CL+OR+cat:cs.AI+OR+cat:cs.NE+OR+cat:cs.RO'
|
q = os.environ.get('ARXIV_QUERY', '')
|
||||||
|
if not q:
|
||||||
|
print("No query string provided, will use default.")
|
||||||
|
q = 'cat:cs.CV+OR+cat:cs.LG+OR+cat:cs.CL+OR+cat:cs.AI+OR+cat:cs.NE+OR+cat:cs.RO+OR+cat:cs.AR+OR+cat:cs.DC+cat:cs.SE'
|
||||||
|
|
||||||
pdb = get_papers_db(flag='c')
|
pdb = get_papers_db(flag='c')
|
||||||
mdb = get_metas_db(flag='c')
|
mdb = get_metas_db(flag='c')
|
||||||
|
|||||||
+5
-5
@@ -104,27 +104,27 @@ PAPERS_DB_FILE = os.path.join(DATA_DIR, 'papers.db')
|
|||||||
# stores account-relevant info, like which tags exist for which papers
|
# stores account-relevant info, like which tags exist for which papers
|
||||||
DICT_DB_FILE = os.path.join(DATA_DIR, 'dict.db')
|
DICT_DB_FILE = os.path.join(DATA_DIR, 'dict.db')
|
||||||
|
|
||||||
def get_papers_db(flag='r', autocommit=True):
|
def get_papers_db(flag='c', autocommit=True):
|
||||||
assert flag in ['r', 'c']
|
assert flag in ['r', 'c']
|
||||||
pdb = CompressedSqliteDict(PAPERS_DB_FILE, tablename='papers', flag=flag, autocommit=autocommit)
|
pdb = CompressedSqliteDict(PAPERS_DB_FILE, tablename='papers', flag=flag, autocommit=autocommit)
|
||||||
return pdb
|
return pdb
|
||||||
|
|
||||||
def get_metas_db(flag='r', autocommit=True):
|
def get_metas_db(flag='c', autocommit=True):
|
||||||
assert flag in ['r', 'c']
|
assert flag in ['r', 'c']
|
||||||
mdb = SqliteDict(PAPERS_DB_FILE, tablename='metas', flag=flag, autocommit=autocommit)
|
mdb = SqliteDict(PAPERS_DB_FILE, tablename='metas', flag=flag, autocommit=autocommit)
|
||||||
return mdb
|
return mdb
|
||||||
|
|
||||||
def get_tags_db(flag='r', autocommit=True):
|
def get_tags_db(flag='c', autocommit=True):
|
||||||
assert flag in ['r', 'c']
|
assert flag in ['r', 'c']
|
||||||
tdb = CompressedSqliteDict(DICT_DB_FILE, tablename='tags', flag=flag, autocommit=autocommit)
|
tdb = CompressedSqliteDict(DICT_DB_FILE, tablename='tags', flag=flag, autocommit=autocommit)
|
||||||
return tdb
|
return tdb
|
||||||
|
|
||||||
def get_last_active_db(flag='r', autocommit=True):
|
def get_last_active_db(flag='c', autocommit=True):
|
||||||
assert flag in ['r', 'c']
|
assert flag in ['r', 'c']
|
||||||
ladb = SqliteDict(DICT_DB_FILE, tablename='last_active', flag=flag, autocommit=autocommit)
|
ladb = SqliteDict(DICT_DB_FILE, tablename='last_active', flag=flag, autocommit=autocommit)
|
||||||
return ladb
|
return ladb
|
||||||
|
|
||||||
def get_email_db(flag='r', autocommit=True):
|
def get_email_db(flag='c', autocommit=True):
|
||||||
assert flag in ['r', 'c']
|
assert flag in ['r', 'c']
|
||||||
edb = SqliteDict(DICT_DB_FILE, tablename='email', flag=flag, autocommit=autocommit)
|
edb = SqliteDict(DICT_DB_FILE, tablename='email', flag=flag, autocommit=autocommit)
|
||||||
return edb
|
return edb
|
||||||
|
|||||||
@@ -0,0 +1,56 @@
|
|||||||
|
# Comments are provided throughout this file to help you get started.
|
||||||
|
# If you need more help, visit the Docker compose reference guide at
|
||||||
|
# https://docs.docker.com/compose/compose-file/
|
||||||
|
|
||||||
|
# Here the instructions define your application as a service called "server".
|
||||||
|
# This service is built from the Dockerfile in the current directory.
|
||||||
|
# You can add other services your application may depend on here, such as a
|
||||||
|
# database or a cache. For examples, see the Awesome Compose repository:
|
||||||
|
# https://github.com/docker/awesome-compose
|
||||||
|
services:
|
||||||
|
server:
|
||||||
|
build:
|
||||||
|
context: .
|
||||||
|
ports:
|
||||||
|
- 5000:5000
|
||||||
|
volumes:
|
||||||
|
- /Users/schihei/git/arxiv-sanity-lite/container-data/data/:/app/data
|
||||||
|
- /Users/schihei/git/arxiv-sanity-lite/container-data/static/thumb/:/app/static/thumb
|
||||||
|
environment:
|
||||||
|
- ARXIV_QUERY=cat:cs.CV+OR+cat:cs.LG+OR+cat:cs.CL+OR+cat:cs.AI+OR+cat:cs.NE+OR+cat:cs.RO+OR+cat:cs.AR+OR+cat:cs.DC+cat.cs.SE
|
||||||
|
- SMTP_ADDRESS=mail.h3132.de
|
||||||
|
- SMTP_PORT=465
|
||||||
|
- SMTP_USER_NAME=arxiv@h3132.de
|
||||||
|
- SMTP_PASSWORD=__ap0gee9436!
|
||||||
|
# The commented out section below is an example of how to define a PostgreSQL
|
||||||
|
# database that your application can use. `depends_on` tells Docker Compose to
|
||||||
|
# start the database before your application. The `db-data` volume persists the
|
||||||
|
# database data between container restarts. The `db-password` secret is used
|
||||||
|
# to set the database password. You must create `db/password.txt` and add
|
||||||
|
# a password of your choosing to it before running `docker compose up`.
|
||||||
|
# depends_on:
|
||||||
|
# db:
|
||||||
|
# condition: service_healthy
|
||||||
|
# db:
|
||||||
|
# image: postgres
|
||||||
|
# restart: always
|
||||||
|
# user: postgres
|
||||||
|
# secrets:
|
||||||
|
# - db-password
|
||||||
|
# volumes:
|
||||||
|
# - db-data:/var/lib/postgresql/data
|
||||||
|
# environment:
|
||||||
|
# - POSTGRES_DB=example
|
||||||
|
# - POSTGRES_PASSWORD_FILE=/run/secrets/db-password
|
||||||
|
# expose:
|
||||||
|
# - 5432
|
||||||
|
# healthcheck:
|
||||||
|
# test: [ "CMD", "pg_isready" ]
|
||||||
|
# interval: 10s
|
||||||
|
# timeout: 5s
|
||||||
|
# retries: 5
|
||||||
|
# volumes:
|
||||||
|
# db-data:
|
||||||
|
# secrets:
|
||||||
|
# db-password:
|
||||||
|
# file: db/password.txt
|
||||||
@@ -0,0 +1,10 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
|
||||||
|
python3 /app/arxiv_daemon.py --num 2000
|
||||||
|
|
||||||
|
if [ $? -eq 0 ]; then
|
||||||
|
echo "New papers detected! Running compute.py"
|
||||||
|
python3 /app/compute.py
|
||||||
|
else
|
||||||
|
echo "No new papers were added, skipping feature computation"
|
||||||
|
fi
|
||||||
+6
-5
@@ -1,5 +1,6 @@
|
|||||||
feedparser==6.0.8
|
feedparser
|
||||||
Flask==2.0.2
|
flask
|
||||||
numpy==1.21.4
|
numpy
|
||||||
scikit-learn==1.0.1
|
requests
|
||||||
sqlitedict==1.7.0
|
scikit-learn
|
||||||
|
sqlitedict
|
||||||
|
|||||||
+50
-29
@@ -5,8 +5,8 @@ I run this script in a cron job to send out emails to the users with their
|
|||||||
recommendations. There's a bit of copy paste code here but I expect that
|
recommendations. There's a bit of copy paste code here but I expect that
|
||||||
the recommendations may become more complex in the future, so this is ok for now.
|
the recommendations may become more complex in the future, so this is ok for now.
|
||||||
|
|
||||||
You'll notice that the file sendgrid_api_key.txt is not in the repo, you'd have
|
You'll notice that the smtp password is not in the repo, you'd have
|
||||||
to manually register with sendgrid yourself, get an API key and put it in the file.
|
to manually register with smtp yourself.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
import os
|
import os
|
||||||
@@ -17,8 +17,9 @@ import argparse
|
|||||||
import numpy as np
|
import numpy as np
|
||||||
from sklearn import svm
|
from sklearn import svm
|
||||||
|
|
||||||
import sendgrid
|
import smtplib
|
||||||
from sendgrid.helpers.mail import Email, To, Content, Mail
|
from email.mime.multipart import MIMEMultipart
|
||||||
|
from email.mime.text import MIMEText
|
||||||
|
|
||||||
from aslite.db import load_features
|
from aslite.db import load_features
|
||||||
from aslite.db import get_tags_db
|
from aslite.db import get_tags_db
|
||||||
@@ -37,6 +38,7 @@ template = """
|
|||||||
<style>
|
<style>
|
||||||
body {
|
body {
|
||||||
font-family: Arial, sans-serif;
|
font-family: Arial, sans-serif;
|
||||||
|
font-size: 14px;
|
||||||
}
|
}
|
||||||
.s {
|
.s {
|
||||||
font-weight: bold;
|
font-weight: bold;
|
||||||
@@ -51,25 +53,23 @@ body {
|
|||||||
margin-bottom: 10px;
|
margin-bottom: 10px;
|
||||||
}
|
}
|
||||||
.f {
|
.f {
|
||||||
color: #933;
|
color: #fb0007;
|
||||||
display: inline-block;
|
display: inline-block;
|
||||||
}
|
}
|
||||||
</style>
|
</style>
|
||||||
</head>
|
</head>
|
||||||
|
|
||||||
<body>
|
<body>
|
||||||
|
<div>Hi! Here are your <a href="https://arxiv.h3132.de">arxiv-sanity-lite</a> recommendations.</div>
|
||||||
<br><br>
|
<br>
|
||||||
<div>Hi! Here are your <a href="https://arxiv-sanity-lite.com">arxiv-sanity-lite</a> recommendations. __STATS__</div>
|
<div>__STATS__</div>
|
||||||
<br><br>
|
<br>
|
||||||
|
|
||||||
<div>
|
<div>
|
||||||
__CONTENT__
|
__CONTENT__
|
||||||
</div>
|
</div>
|
||||||
|
<br>
|
||||||
<br><br>
|
|
||||||
<div>
|
<div>
|
||||||
To stop these emails remove your email in your <a href="https://arxiv-sanity-lite.com/profile">account</a> settings. (your account is __ACCOUNT__).
|
To stop these emails remove your email in your <a href="https://arxiv.h3132.de/profile">account</a> settings. (your account is __ACCOUNT__).
|
||||||
</div>
|
</div>
|
||||||
<div> <3, arxiv-sanity-lite. </div>
|
<div> <3, arxiv-sanity-lite. </div>
|
||||||
|
|
||||||
@@ -105,7 +105,7 @@ def calculate_recommendation(
|
|||||||
y[ptoi[pid]] = 1.0
|
y[ptoi[pid]] = 1.0
|
||||||
|
|
||||||
# classify
|
# classify
|
||||||
clf = svm.LinearSVC(class_weight='balanced', verbose=False, max_iter=10000, tol=1e-6, C=0.01)
|
clf = svm.LinearSVC(class_weight='balanced', verbose=False, max_iter=10000, tol=1e-6, C=0.01, dual=True)
|
||||||
clf.fit(x, y)
|
clf.fit(x, y)
|
||||||
s = clf.decision_function(x)
|
s = clf.decision_function(x)
|
||||||
sortix = np.argsort(-s)
|
sortix = np.argsort(-s)
|
||||||
@@ -159,15 +159,16 @@ def render_recommendations(user, tags, tag_pids, tag_scores):
|
|||||||
if len(summary) == 500:
|
if len(summary) == 500:
|
||||||
summary += '...'
|
summary += '...'
|
||||||
# create the url that will feature this paper on top and also show the most similar papers
|
# create the url that will feature this paper on top and also show the most similar papers
|
||||||
url = 'https://arxiv-sanity-lite.com/?rank=pid&pid=' + pid
|
url = 'https://arxiv.h3132.de/?rank=pid&pid=' + pid
|
||||||
parts.append(
|
parts.append(
|
||||||
"""
|
"""
|
||||||
<tr>
|
<tr>
|
||||||
<td valign="top"><div class="s">%.2f</div></td>
|
<td valign="top"><div class="s">%.2f</div></td>
|
||||||
<td>
|
<td>
|
||||||
<a href="%s">%s</a> <div class="f">(%s)</div>
|
<a href="%s">%s</a> <div class="f">(%s)</div>
|
||||||
<div class="a">%s</div>
|
<div class="a"><br>%s</div><br>
|
||||||
<div class="u">%s</div>
|
<div class="u">%s</div>
|
||||||
|
<hr size="1">
|
||||||
</td>
|
</td>
|
||||||
</tr>
|
</tr>
|
||||||
""" % (score, url, p['title'], max_source_tag[pid], authors, summary)
|
""" % (score, url, p['title'], max_source_tag[pid], authors, summary)
|
||||||
@@ -196,27 +197,47 @@ def render_recommendations(user, tags, tag_pids, tag_scores):
|
|||||||
return out
|
return out
|
||||||
|
|
||||||
# -----------------------------------------------------------------------------
|
# -----------------------------------------------------------------------------
|
||||||
# send the actual html via sendgrid
|
# send the actual html via smtp
|
||||||
|
|
||||||
def send_email(to, html):
|
def send_email(to, html):
|
||||||
|
|
||||||
# init the api
|
# init the api
|
||||||
assert os.path.isfile('sendgrid_api_key.txt')
|
# assert os.path.isfile('sendgrid_api_key.txt')
|
||||||
api_key = open('sendgrid_api_key.txt', 'r').read().strip()
|
# api_key = open('sendgrid_api_key.txt', 'r').read().strip()
|
||||||
sg = sendgrid.SendGridAPIClient(api_key=api_key)
|
# sg = sendgrid.SendGridAPIClient(api_key=api_key)
|
||||||
|
|
||||||
|
# Setup the necessary details from environment variables
|
||||||
|
smtp_server = os.environ.get('SMTP_ADDRESS', '')
|
||||||
|
port = os.environ.get('SMTP_PORT', '')
|
||||||
|
sender = os.environ.get('SMTP_USER_NAME', '')
|
||||||
|
password = os.environ.get('SMTP_PASSWORD', '')
|
||||||
|
receiver = to
|
||||||
|
|
||||||
|
# Create the message
|
||||||
|
msg = MIMEMultipart()
|
||||||
|
msg['Subject'] = tnow_str + " Arxiv Sanity Lite recommendations"
|
||||||
|
msg['From'] = sender
|
||||||
|
msg['To'] = to
|
||||||
|
msg_body = html
|
||||||
|
msg.attach(MIMEText(msg_body, 'html'))
|
||||||
|
|
||||||
# construct the email
|
|
||||||
from_email = Email("admin@arxiv-sanity-lite.com")
|
|
||||||
to_email = To(to)
|
|
||||||
subject = tnow_str + " Arxiv Sanity Lite recommendations"
|
|
||||||
content = Content("text/html", html)
|
|
||||||
mail = Mail(from_email, to_email, subject, content)
|
|
||||||
|
|
||||||
# hope for the best :)
|
# hope for the best :)
|
||||||
if not args.dry_run:
|
if not args.dry_run:
|
||||||
response = sg.client.mail.send.post(request_body=mail.get())
|
try:
|
||||||
print(response.status_code)
|
# Connect to the server
|
||||||
pass
|
server = smtplib.SMTP_SSL(smtp_server, port)
|
||||||
|
|
||||||
|
# Login to the email server
|
||||||
|
server.login(sender, password)
|
||||||
|
|
||||||
|
# Send the email
|
||||||
|
server.sendmail(sender, receiver, msg.as_string())
|
||||||
|
|
||||||
|
# Close the connection to the server
|
||||||
|
server.quit()
|
||||||
|
except Exception as e:
|
||||||
|
print('Something went wrong.', e)
|
||||||
|
|
||||||
# -----------------------------------------------------------------------------
|
# -----------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|||||||
@@ -10,6 +10,7 @@ ideas:
|
|||||||
import os
|
import os
|
||||||
import re
|
import re
|
||||||
import time
|
import time
|
||||||
|
import math
|
||||||
from random import shuffle
|
from random import shuffle
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
@@ -214,6 +215,7 @@ def main():
|
|||||||
opt_q = request.args.get('q', '') # search request in the text box
|
opt_q = request.args.get('q', '') # search request in the text box
|
||||||
opt_tags = request.args.get('tags', default_tags) # tags to rank by if opt_rank == 'tag'
|
opt_tags = request.args.get('tags', default_tags) # tags to rank by if opt_rank == 'tag'
|
||||||
opt_pid = request.args.get('pid', '') # pid to find nearest neighbors to
|
opt_pid = request.args.get('pid', '') # pid to find nearest neighbors to
|
||||||
|
opt_category_filter = request.args.get('category_filter', '') # primary category to filter
|
||||||
opt_time_filter = request.args.get('time_filter', default_time_filter) # number of days to filter by
|
opt_time_filter = request.args.get('time_filter', default_time_filter) # number of days to filter by
|
||||||
opt_skip_have = request.args.get('skip_have', default_skip_have) # hide papers we already have?
|
opt_skip_have = request.args.get('skip_have', default_skip_have) # hide papers we already have?
|
||||||
opt_svm_c = request.args.get('svm_c', '') # svm C parameter
|
opt_svm_c = request.args.get('svm_c', '') # svm C parameter
|
||||||
@@ -245,6 +247,14 @@ def main():
|
|||||||
else:
|
else:
|
||||||
raise ValueError("opt_rank %s is not a thing" % (opt_rank, ))
|
raise ValueError("opt_rank %s is not a thing" % (opt_rank, ))
|
||||||
|
|
||||||
|
# filter by primary category
|
||||||
|
if opt_category_filter:
|
||||||
|
pdb = get_papers()
|
||||||
|
kv = {k:v for k,v in pdb.items()} # read all of metas to memory at once, for efficiency
|
||||||
|
keep = [i for i,pid in enumerate(pids) if (kv[pid]['arxiv_primary_category']['term'])
|
||||||
|
== opt_category_filter]
|
||||||
|
pids, scores = [pids[i] for i in keep], [scores[i] for i in keep]
|
||||||
|
|
||||||
# filter by time
|
# filter by time
|
||||||
if opt_time_filter:
|
if opt_time_filter:
|
||||||
mdb = get_metas()
|
mdb = get_metas()
|
||||||
@@ -262,6 +272,7 @@ def main():
|
|||||||
pids, scores = [pids[i] for i in keep], [scores[i] for i in keep]
|
pids, scores = [pids[i] for i in keep], [scores[i] for i in keep]
|
||||||
|
|
||||||
# crop the number of results to RET_NUM, and paginate
|
# crop the number of results to RET_NUM, and paginate
|
||||||
|
total_pages = math.ceil(len(pids) / RET_NUM)
|
||||||
try:
|
try:
|
||||||
page_number = max(1, int(opt_page_number))
|
page_number = max(1, int(opt_page_number))
|
||||||
except ValueError:
|
except ValueError:
|
||||||
@@ -297,6 +308,7 @@ def main():
|
|||||||
context['gvars']['search_query'] = opt_q
|
context['gvars']['search_query'] = opt_q
|
||||||
context['gvars']['svm_c'] = str(C)
|
context['gvars']['svm_c'] = str(C)
|
||||||
context['gvars']['page_number'] = str(page_number)
|
context['gvars']['page_number'] = str(page_number)
|
||||||
|
context['gvars']['total_pages'] = str(total_pages)
|
||||||
return render_template('index.html', **context)
|
return render_template('index.html', **context)
|
||||||
|
|
||||||
@app.route('/inspect', methods=['GET'])
|
@app.route('/inspect', methods=['GET'])
|
||||||
|
|||||||
@@ -38,12 +38,12 @@ const Paper = props => {
|
|||||||
return (
|
return (
|
||||||
<div class='rel_paper'>
|
<div class='rel_paper'>
|
||||||
<div class="rel_score">{p.weight.toFixed(2)}</div>
|
<div class="rel_score">{p.weight.toFixed(2)}</div>
|
||||||
<div class='rel_title'><a href={'http://arxiv.org/abs/' + p.id}>{p.title}</a></div>
|
<div class='rel_title'><a href={'http://arxiv.org/abs/' + p.id} target="_blank">{p.title}</a></div>
|
||||||
<div class='rel_authors'>{p.authors}</div>
|
<div class='rel_authors'>{p.authors}</div>
|
||||||
<div class="rel_time">{p.time}</div>
|
<div class="rel_time">{p.time}</div>
|
||||||
<div class='rel_tags'>{p.tags}</div>
|
<div class='rel_tags'>{p.tags}</div>
|
||||||
{utag_controls}
|
{utag_controls}
|
||||||
{thumb_img}
|
<a href={'http://arxiv.org/pdf/' + p.id} target="_blank">{thumb_img}</a>
|
||||||
<div class='rel_abs'>{p.summary}</div>
|
<div class='rel_abs'>{p.summary}</div>
|
||||||
<div class='rel_more'><a href={similar_url}>similar</a></div>
|
<div class='rel_more'><a href={similar_url}>similar</a></div>
|
||||||
<div class='rel_inspect'><a href={inspect_url}>inspect</a></div>
|
<div class='rel_inspect'><a href={inspect_url}>inspect</a></div>
|
||||||
|
|||||||
+5
-1
@@ -119,7 +119,11 @@ body {
|
|||||||
text-decoration: none;
|
text-decoration: none;
|
||||||
}
|
}
|
||||||
.rel more {
|
.rel more {
|
||||||
font-size: 10px;
|
font-size
|
||||||
|
: 10px;
|
||||||
|
}
|
||||||
|
.rel_img { /* prevent thumbnail from increasing width (useful on mobile) */
|
||||||
|
overflow: hidden;
|
||||||
}
|
}
|
||||||
#sbox {
|
#sbox {
|
||||||
width: 100%;
|
width: 100%;
|
||||||
|
|||||||
@@ -54,6 +54,10 @@ var move_page = function(int_offset) {
|
|||||||
<label for="pid">pid: </label>
|
<label for="pid">pid: </label>
|
||||||
<input name="pid" type="text" id="pid_field" value="{{ gvars.pid }}">
|
<input name="pid" type="text" id="pid_field" value="{{ gvars.pid }}">
|
||||||
|
|
||||||
|
<!-- current category, simply in a text field -->
|
||||||
|
<label for="category">category: </label>
|
||||||
|
<input name="category_filter" type="text" id="category_filter__field" value="{{ gvars.category_filter }}" size="8">
|
||||||
|
|
||||||
<!-- current time_filter, in a text field -->
|
<!-- current time_filter, in a text field -->
|
||||||
<label for="time_filter">time_filter (days): </label>
|
<label for="time_filter">time_filter (days): </label>
|
||||||
<input name="time_filter" type="text" id="time_filter_field" value="{{ gvars.time_filter }}">
|
<input name="time_filter" type="text" id="time_filter_field" value="{{ gvars.time_filter }}">
|
||||||
@@ -102,9 +106,13 @@ var move_page = function(int_offset) {
|
|||||||
|
|
||||||
<!-- links to previous and next pages -->
|
<!-- links to previous and next pages -->
|
||||||
<div id="pagination">
|
<div id="pagination">
|
||||||
|
{% if gvars.page_number|int > 1 %}
|
||||||
<span id="link-prev-page" onclick='move_page(-1);'>prev</span>
|
<span id="link-prev-page" onclick='move_page(-1);'>prev</span>
|
||||||
<span>current page: {{ gvars.page_number }} </span>
|
{% endif %}
|
||||||
|
<span>page {{ gvars.page_number }} of {{ gvars.total_pages }}</span>
|
||||||
|
{% if gvars.page_number|int < gvars.total_pages|int %}
|
||||||
<span id="link-next-page" onclick='move_page(1);'>next</span>
|
<span id="link-next-page" onclick='move_page(1);'>next</span>
|
||||||
|
{% endif %}
|
||||||
</div>
|
</div>
|
||||||
{% endblock %}
|
{% endblock %}
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user