add thumbnails for papers, which apparently ppl like
This commit is contained in:
@@ -87,6 +87,8 @@ def render_pid(pid):
|
|||||||
# render a single paper with just the information we need for the UI
|
# render a single paper with just the information we need for the UI
|
||||||
pdb = get_papers()
|
pdb = get_papers()
|
||||||
tags = get_tags()
|
tags = get_tags()
|
||||||
|
thumb_path = 'static/thumb/' + pid + '.jpg'
|
||||||
|
thumb_url = thumb_path if os.path.isfile(thumb_path) else ''
|
||||||
d = pdb[pid]
|
d = pdb[pid]
|
||||||
return dict(
|
return dict(
|
||||||
weight = 0.0,
|
weight = 0.0,
|
||||||
@@ -97,6 +99,7 @@ def render_pid(pid):
|
|||||||
tags = ', '.join(t['term'] for t in d['tags']),
|
tags = ', '.join(t['term'] for t in d['tags']),
|
||||||
utags = [t for t, pids in tags.items() if pid in pids],
|
utags = [t for t, pids in tags.items() if pid in pids],
|
||||||
summary = d['summary'],
|
summary = d['summary'],
|
||||||
|
thumb_url = thumb_url,
|
||||||
)
|
)
|
||||||
|
|
||||||
def random_rank():
|
def random_rank():
|
||||||
|
|||||||
@@ -22,7 +22,7 @@ const Paper = props => {
|
|||||||
const utags = p.utags.map((utxt, ix) => <UTag key={ix} tag={utxt} />);
|
const utags = p.utags.map((utxt, ix) => <UTag key={ix} tag={utxt} />);
|
||||||
const similar_url = "/?rank=pid&pid=" + p.id;
|
const similar_url = "/?rank=pid&pid=" + p.id;
|
||||||
const inspect_url = "/inspect?pid=" + p.id;
|
const inspect_url = "/inspect?pid=" + p.id;
|
||||||
|
const thumb_img = p.thumb_url === '' ? null : <div class='rel_img'><img src={p.thumb_url} /></div>;
|
||||||
// if the user is logged in then we can show add/sub buttons
|
// if the user is logged in then we can show add/sub buttons
|
||||||
let utag_controls = null;
|
let utag_controls = null;
|
||||||
if(user) {
|
if(user) {
|
||||||
@@ -43,6 +43,7 @@ const Paper = props => {
|
|||||||
<div class="rel_time">{p.time}</div>
|
<div class="rel_time">{p.time}</div>
|
||||||
<div class='rel_tags'>{p.tags}</div>
|
<div class='rel_tags'>{p.tags}</div>
|
||||||
{utag_controls}
|
{utag_controls}
|
||||||
|
{thumb_img}
|
||||||
<div class='rel_abs'>{p.summary}</div>
|
<div class='rel_abs'>{p.summary}</div>
|
||||||
<div class='rel_more'><a href={similar_url}>similar</a></div>
|
<div class='rel_more'><a href={similar_url}>similar</a></div>
|
||||||
<div class='rel_inspect'><a href={inspect_url}>inspect</a></div>
|
<div class='rel_inspect'><a href={inspect_url}>inspect</a></div>
|
||||||
|
|||||||
+2
-1
@@ -3,6 +3,7 @@ body {
|
|||||||
padding: 0;
|
padding: 0;
|
||||||
font-family: sans-serif;
|
font-family: sans-serif;
|
||||||
line-height: 1.2;
|
line-height: 1.2;
|
||||||
|
background-color: #eee;
|
||||||
}
|
}
|
||||||
#header {
|
#header {
|
||||||
height: 24px;
|
height: 24px;
|
||||||
@@ -50,7 +51,7 @@ body {
|
|||||||
.rel_paper {
|
.rel_paper {
|
||||||
margin-bottom: 10px;
|
margin-bottom: 10px;
|
||||||
padding: 10px;
|
padding: 10px;
|
||||||
background-color: #eee;
|
background-color: white;
|
||||||
border-radius: 5px;
|
border-radius: 5px;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
+104
@@ -0,0 +1,104 @@
|
|||||||
|
"""
|
||||||
|
Iterates over the current database and makes best effort to download the papers,
|
||||||
|
convert them to thumbnail images and save them to disk, for display in the UI.
|
||||||
|
Atm only runs the most recent 5K papers. Intended to be run as a cron job daily
|
||||||
|
or something like that.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import os
|
||||||
|
import time
|
||||||
|
import random
|
||||||
|
import requests
|
||||||
|
from subprocess import Popen
|
||||||
|
from aslite.db import get_papers_db, get_metas_db
|
||||||
|
|
||||||
|
# create the tmp directory if it does not exist, where we will do temporary work
|
||||||
|
TMP_DIR = 'tmp'
|
||||||
|
if not os.path.exists(TMP_DIR):
|
||||||
|
os.makedirs(TMP_DIR)
|
||||||
|
# create the thumb directory, where we will store the paper thumbnails
|
||||||
|
THUMB_DIR = os.path.join('static', 'thumb')
|
||||||
|
if not os.path.exists(THUMB_DIR):
|
||||||
|
os.makedirs(THUMB_DIR)
|
||||||
|
|
||||||
|
# open the database, determine which papers we'll try to get thumbs for
|
||||||
|
pdb = get_papers_db()
|
||||||
|
n = len(pdb)
|
||||||
|
mdb = get_metas_db()
|
||||||
|
metas = list(mdb.items())
|
||||||
|
metas.sort(key=lambda kv: kv[1]['_time'], reverse=True) # most recent papers first
|
||||||
|
keys = [k for k,v in metas[:5000]] # only the most recent papers
|
||||||
|
|
||||||
|
for i, key in enumerate(keys):
|
||||||
|
time.sleep(0.01) # for safety
|
||||||
|
|
||||||
|
# the path where we would store the thumbnail for this key
|
||||||
|
thumb_path = os.path.join(THUMB_DIR, key + '.jpg')
|
||||||
|
if os.path.exists(thumb_path):
|
||||||
|
continue
|
||||||
|
|
||||||
|
# fetch the paper
|
||||||
|
p = pdb[key]
|
||||||
|
print("%d/%d: paper to process: %s" % (i, n, key))
|
||||||
|
|
||||||
|
# get the link to the pdf
|
||||||
|
url = p['link'].replace('abs', 'pdf')
|
||||||
|
|
||||||
|
# attempt to download the pdf
|
||||||
|
print("attempting to download pdf from: ", url)
|
||||||
|
try:
|
||||||
|
x = requests.get(url, timeout=10, allow_redirects=True)
|
||||||
|
with open(os.path.join(TMP_DIR, 'paper.pdf'), 'wb') as f:
|
||||||
|
f.write(x.content)
|
||||||
|
print("OK")
|
||||||
|
except Exception as e:
|
||||||
|
print("error downloading the pdf at url", url)
|
||||||
|
print(e)
|
||||||
|
continue
|
||||||
|
time.sleep(5 + random.uniform(0, 5)) # take a breather
|
||||||
|
|
||||||
|
# mv away the previous temporary files if they exist
|
||||||
|
if os.path.isfile(os.path.join(TMP_DIR, 'thumb-0.png')):
|
||||||
|
for i in range(8):
|
||||||
|
f1 = os.path.join(TMP_DIR, 'thumb-%d.png' % (i,))
|
||||||
|
f2 = os.path.join(TMP_DIR, 'thumbbuf-%d.png' % (i,))
|
||||||
|
if os.path.isfile(f1):
|
||||||
|
cmd = 'mv %s %s' % (f1, f2)
|
||||||
|
os.system(cmd)
|
||||||
|
|
||||||
|
# convert pdf to png images per page. spawn async because convert can unfortunately enter an infinite loop, have to handle this.
|
||||||
|
# this command will generate 8 independent images thumb-0.png ... thumb-7.png of the thumbnails
|
||||||
|
print("converting the pdf to png images")
|
||||||
|
pp = Popen(['convert', '%s[0-7]' % ('tmp/paper.pdf', ), '-thumbnail', 'x156', os.path.join(TMP_DIR, 'thumb.png')])
|
||||||
|
t0 = time.time()
|
||||||
|
while time.time() - t0 < 20: # give it 20 seconds deadline
|
||||||
|
ret = pp.poll()
|
||||||
|
if not (ret is None):
|
||||||
|
# process terminated
|
||||||
|
break
|
||||||
|
time.sleep(0.1)
|
||||||
|
ret = pp.poll()
|
||||||
|
if ret is None:
|
||||||
|
print("convert command did not terminate in 20 seconds, terminating.")
|
||||||
|
pp.terminate() # give up
|
||||||
|
continue
|
||||||
|
|
||||||
|
if not os.path.isfile(os.path.join(TMP_DIR, 'thumb-0.png')):
|
||||||
|
# failed to render pdf, replace with missing image
|
||||||
|
#missing_thumb_path = os.path.join('static', 'missing.jpg')
|
||||||
|
#os.system('cp %s %s' % (missing_thumb_path, thumb_path))
|
||||||
|
#print("could not render pdf, creating a missing image placeholder")
|
||||||
|
print("could not render pdf, skipping")
|
||||||
|
continue
|
||||||
|
else:
|
||||||
|
# otherwise concatenate the 8 images into one
|
||||||
|
cmd = "montage -mode concatenate -quality 80 -tile x1 %s %s" \
|
||||||
|
% (os.path.join(TMP_DIR, 'thumb-*.png'), thumb_path)
|
||||||
|
print(cmd)
|
||||||
|
os.system(cmd)
|
||||||
|
|
||||||
|
# remove the temporary paper.pdf file
|
||||||
|
tmp_pdf = os.path.join(TMP_DIR, 'paper.pdf')
|
||||||
|
if os.path.isfile(tmp_pdf):
|
||||||
|
os.remove(tmp_pdf)
|
||||||
|
|
||||||
Reference in New Issue
Block a user