first leet codes

2021-11-12 20:40:19 -08:00
parent 089adab199
commit 194b7f4b22
12 changed files with 820 additions and 0 deletions
@@ -0,0 +1,3 @@
+.DS_Store
+.ipynb_checkpoints
+__pycache__
@@ -0,0 +1,21 @@
+
+# arxiv-sanity-lite
+
+
+**(WIP)**
+
+A much lighter-weight arxiv-sanity re-write. Currently only runs locally on a single machine and doesn't actually exist as a website on the internet. However, the code is currently in a semi "feature-complete" state in the sense that I can personally run it locally on my computer and find it helpful to me. Basically I find the papers that look good and use the UI to tag them under any category of interest. Then the code recommends other similar papers for each tag based on SVM on tfidf vectors constructed from abstracts. So that's pretty cool, and may be useful to you as well!
+
+That said, the code was written quick & dirty style, so one currently has to read it and you're on your own wrt any support. But I hope to make it good and host it publicly in the future, deprecating the current bloated arxiv-sanity in favor of this format.
+
+
+#### To run
+
+- Periodically run arxiv_daemon.py to add recent papers from arxiv to the database.
+- Then run compute.py to calculate tfidf features on the paper abstracts and save those to database.
+- Finally run serve.py to start the server and access the frontend layer over the data, e.g.: `export FLASK_APP=serve.py; flask run`.
+
+
+#### License
+
+MIT
@@ -0,0 +1,83 @@
+"""
+This script is intended to wake up every 30 min or so (eg via cron),
+it checks for any new arxiv papers via the arxiv API and stashes
+them into a sqlite database papers.db
+"""
+
+import sys
+import time
+import random
+import logging
+import argparse
+
+from aslite.arxiv import get_response, parse_response
+from aslite.db import SqliteDict, CompressedSqliteDict
+
+if __name__ == '__main__':
+
+    parser = argparse.ArgumentParser(description='Arxiv Daemon')
+    parser.add_argument('-n', '--num', type=int, default=100, help='how many papers to fetch')
+    parser.add_argument('-s', '--start', type=int, default=0, help='start at what index')
+    args = parser.parse_args()
+    print(args)
+    logging.basicConfig(level=logging.INFO, format='%(name)s %(levelname)s %(asctime)s %(message)s', datefmt='%m/%d/%Y %I:%M:%S %p')
+
+    # query string of papers to look for
+    q = 'cat:cs.CV+OR+cat:cs.LG+OR+cat:cs.CL+OR+cat:cs.AI+OR+cat:cs.NE+OR+cat:cs.RO'
+
+    # flag='c': default mode, open for read/write, creating the db/table if necessary.
+    pdb = CompressedSqliteDict('papers.db', tablename='papers', flag='c', autocommit=True)
+    mdb = SqliteDict('papers.db', tablename='metas', flag='c', autocommit=True)
+    prevn = len(pdb)
+
+    def store(p):
+        pdb[p['_id']] = p
+        mdb[p['_id']] = {'_time': p['_time']}
+
+    # fetch the latest papers
+    for k in range(args.start, args.start + args.num, 100):
+        logging.info('querying arxiv api for query %s at start_index %d' % (q, k))
+
+        # attempt to fetch a batch of papers from arxiv api
+        ntried = 0
+        while True:
+            try:
+                resp = get_response(search_query=q, start_index=k)
+                papers = parse_response(resp)
+                time.sleep(0.5)
+                if len(papers) == 100:
+                    break # otherwise we have to try again
+            except Exception as e:
+                print(e)
+                print("will try again in a bit...")
+                ntried += 1
+                if ntried > 1000:
+                    print("ok we tried 1,000 times, something is srsly wrong. exitting.")
+                    sys.exit()
+                time.sleep(2 + random.uniform(0, 4))
+
+        # process the batch of retrieved papers
+        nhad, nnew, nreplace = 0, 0, 0
+        for p in papers:
+            pid = p['_id']
+            if pid in pdb:
+                if p['_time'] > pdb[pid]['_time']:
+                    # replace, this one is newer
+                    store(p)
+                    nreplace += 1
+                else:
+                    # we already had this paper, nothing to do
+                    nhad += 1
+            else:
+                # new, simple store into database
+                store(p)
+                nnew += 1
+        prevn = len(pdb)
+
+        # print some diagnostic information
+        print(papers[0]['_time_str'])
+        print("k=%d, out of %d: had %d, replaced %d, new %d. now have: %d" %
+             (k, len(papers), nhad, nreplace, nnew, prevn))
+
+        # zzz
+        time.sleep(2 + random.uniform(0, 4))
@@ -0,0 +1,81 @@
+"""
+Utils for dealing with arxiv API and related processing
+"""
+
+import time
+import logging
+import urllib.request
+import feedparser
+from collections import OrderedDict
+
+logger = logging.getLogger(__name__)
+
+def get_response(search_query, start_index=0):
+    """ pings arxiv.org API to fetch a batch of 100 papers """
+    # fetch raw response
+    base_url = 'http://export.arxiv.org/api/query?'
+    add_url = 'search_query=%s&sortBy=lastUpdatedDate&start=%d&max_results=100' % (search_query, start_index)
+    #add_url = 'search_query=%s&sortBy=submittedDate&start=%d&max_results=100' % (search_query, start_index)
+    search_query = base_url + add_url
+    logger.info(f"Searching arxiv for {search_query}")
+    with urllib.request.urlopen(search_query) as url:
+        response = url.read()
+
+    if url.status != 200:
+        logger.error(f"arxiv did not return status 200 response")
+
+    return response
+
+def encode_feedparser_dict(d):
+    """ helper function to strip feedparser objects using a deep copy """
+    if isinstance(d, feedparser.FeedParserDict) or isinstance(d, dict):
+        return {k: encode_feedparser_dict(d[k]) for k in d.keys()}
+    elif isinstance(d, list):
+        return [encode_feedparser_dict(k) for k in d]
+    else:
+        return d
+
+def parse_arxiv_url(url):
+    """
+    examples is http://arxiv.org/abs/1512.08756v2
+    we want to extract the raw id (1512.08756) and the version (2)
+    """
+    ix = url.rfind('/')
+    assert ix >= 0, 'bad url: ' + url
+    idv = url[ix+1:] # extract just the id (and the version)
+    parts = idv.split('v')
+    assert len(parts) == 2, 'error splitting id and version in idv string: ' + idv
+    return idv, parts[0], int(parts[1])
+
+def parse_response(response):
+
+    out = []
+    parse = feedparser.parse(response)
+    for e in parse.entries:
+        j = encode_feedparser_dict(e)
+        # extract / parse id information
+        idv, rawid, version = parse_arxiv_url(j['id'])
+        j['_idv']= idv
+        j['_id'] = rawid
+        j['_version'] = version
+        j['_time'] = time.mktime(j['updated_parsed'])
+        j['_time_str'] = time.strftime('%b %d %Y', j['updated_parsed'])
+        # delete apparently spurious and redundant information
+        del j['summary_detail']
+        del j['title_detail']
+        out.append(j)
+
+    return out
+
+def filter_latest_version(idvs):
+    """
+    for each idv filter the list down to only the most recent version
+    """
+
+    pid_to_v = OrderedDict()
+    for idv in idvs:
+        pid, v = idv.split('v')
+        pid_to_v[pid] = max(int(v), pid_to_v.get(pid, 0))
+
+    filt = [f"{pid}v{v}" for pid, v in pid_to_v.items()]
+    return filt
@@ -0,0 +1,21 @@
+"""
+Database support functions
+"""
+
+import sqlite3, zlib, pickle
+from sqlitedict import SqliteDict
+
+# -----------------------------------------------------------------------------
+
+class CompressedSqliteDict(SqliteDict):
+    """ overrides the encode/decode methods to use zlib, so we get compressed storage """
+
+    def __init__(self, *args, **kwargs):
+
+        def encode(obj):
+            return sqlite3.Binary(zlib.compress(pickle.dumps(obj, pickle.HIGHEST_PROTOCOL)))
+
+        def decode(obj):
+            return pickle.loads(zlib.decompress(bytes(obj)))
+
+        super().__init__(*args, **kwargs, encode=encode, decode=decode)
@@ -0,0 +1,53 @@
+"""
+Extracts features from all paper abstracts.
+Saves them into one big features.p pickle file holding the numpy array
+of features for all the paper abstracts...
+"""
+
+import pickle
+import argparse
+
+import numpy as np
+from sklearn.feature_extraction.text import TfidfVectorizer
+
+from aslite.db import SqliteDict, CompressedSqliteDict
+
+# -----------------------------------------------------------------------------
+
+if __name__ == '__main__':
+
+    parser = argparse.ArgumentParser(description='Arxiv Computor')
+    parser.add_argument('-n', '--num', type=int, default=10000, help='number of tfidf features')
+    parser.add_argument('--min_df', type=int, default=5, help='min df')
+    parser.add_argument('--max_df', type=float, default=0.5, help='max df')
+    args = parser.parse_args()
+    print(args)
+
+    v = TfidfVectorizer(input='content',
+                        encoding='utf-8', decode_error='replace', strip_accents='unicode',
+                        lowercase=True, analyzer='word', stop_words='english',
+                        token_pattern=r'(?u)\b[a-zA-Z_][a-zA-Z0-9_]+\b',
+                        ngram_range=(1, 2), max_features=args.num,
+                        norm='l2', use_idf=True, smooth_idf=True, sublinear_tf=True,
+                        max_df=args.max_df, min_df=args.min_df)
+
+    pdb = CompressedSqliteDict('papers.db', tablename='papers', flag='r')
+
+    def make_corpus():
+        for p, d in pdb.items():
+            author_str = ' '.join([a['name'] for a in d['authors']])
+            yield ' '.join([d['title'], d['summary'], author_str])
+
+    print("training tfidf vectors...")
+    v.fit(make_corpus())
+
+    print("running inference...")
+    x = v.transform(make_corpus()).astype(np.float32)
+    print(x.shape)
+
+    print("saving to features.p")
+    features = {
+        'pids': list(pdb.keys()),
+        'x': x,
+    }
+    pickle.dump(features, open('features.p', 'wb' ))
@@ -0,0 +1,276 @@
+"""
+Flask server backend
+
+ideas:
+- allow delete of tags
+- unify all different pages into single search filter sort interface
+- special single-image search just for paper similarity
+"""
+
+import time
+import pickle
+from random import shuffle
+
+import numpy as np
+from sklearn import svm
+
+from flask import Flask, request, redirect, url_for
+from flask import render_template
+from flask import g # global session-level object
+
+from aslite.db import SqliteDict, CompressedSqliteDict
+
+# -----------------------------------------------------------------------------
+# TODO: user accounts / password login are necessary...
+
+app = Flask(__name__)
+RET_NUM = 100 # number of papers to return per page
+
+def get_tags():
+    if not hasattr(g, '_tags'):
+        user = 'root' # root for now, the only default user
+        print("reading tags for user %s" % (user, ))
+        with CompressedSqliteDict('dict.db', tablename='tags', flag='r') as dict_db:
+            tags_dict = dict_db[user] if user in dict_db else {}
+        g._tags = tags_dict
+    return g._tags
+
+def get_papers():
+    if not hasattr(g, '_pdb'):
+        g._pdb = CompressedSqliteDict('papers.db', tablename='papers', flag='r')
+    return g._pdb
+
+def get_metas():
+    if not hasattr(g, '_mdb'):
+        g._mdb = SqliteDict('papers.db', tablename='metas', flag='r')
+    return g._mdb
+
+def render_pids(pids):
+
+    pdb = get_papers()
+    tags = get_tags()
+
+    papers = []
+    for pid in pids:
+        d = pdb[pid]
+        ptags = [t for t, pids in tags.items() if pid in pids]
+        papers.append({
+            'weight': 0.0,
+            'id': d['_id'],
+            'title': d['title'],
+            'time': d['_time_str'],
+            'authors': ', '.join(a['name'] for a in d['authors']),
+            'tags': ', '.join(t['term'] for t in d['tags']),
+            'utags': ptags,
+            'summary': d['summary'],
+        })
+
+    return papers
+
+def random_rank():
+    pdb = get_papers()
+    pids = list(pdb.keys())
+    shuffle(pids)
+    scores = [0 for _ in pids]
+    return pids, scores
+
+def time_rank():
+    mdb = get_metas()
+    ms = sorted(mdb.items(), key=lambda kv: kv[1]['_time'], reverse=True)
+    tnow = time.time()
+    pids = [k for k, v in ms]
+    scores = [(tnow - v['_time'])/60/60/24 for k, v in ms] # time delta in days
+    return pids, scores
+
+def svm_rank(tags=None, pid=None):
+
+    # tag can be one tag or a few comma-separated tags or 'all' for all tags we have in db
+    # pid can be a specific paper id to set as positive for a kind of nearest neighbor search
+    assert (tags is not None) or (pid is not None)
+
+    # load all of the features
+    features = pickle.load(open('features.p', 'rb'))
+    x, pids = features['x'], features['pids']
+    n, d = x.shape
+    ptoi, itop = {}, {}
+    for i, p in enumerate(pids):
+        ptoi[p] = i
+        itop[i] = p
+
+    # construct the positive set
+    y = np.zeros(n, dtype=np.float32)
+    if pid is not None:
+        y[ptoi[pid]] = 1.0
+    elif tags is not None:
+        tags_db = get_tags()
+        tags_filter_to = tags_db.keys() if tags == 'all' else set(tags.split(','))
+        for tag, pids in tags_db.items():
+            if tag in tags_filter_to:
+                for pid in pids:
+                    y[ptoi[pid]] = 1.0
+
+    if y.sum() == 0:
+        return [], [] # there are no positives?
+
+    # classify
+    clf = svm.LinearSVC(class_weight='balanced', verbose=False, max_iter=10000, tol=1e-6, C=0.1)
+    clf.fit(x, y)
+    s = clf.decision_function(x)
+    sortix = np.argsort(-s)
+    pids = [itop[ix] for ix in sortix]
+    scores = [100*float(s[ix]) for ix in sortix]
+
+    return pids, scores
+
+def default_context(papers, **kwargs):
+    context = {}
+    # insert the papers
+    context['papers'] = papers
+    # fetch and insert the available tags
+    tags = get_tags()
+    context['tags'] = [{'name':t, 'n':len(pids)} for t, pids in tags.items()] + [{'name': 'all'}]
+    # various other globals
+    gvars = {}
+    gvars['search_query'] = ''
+    gvars['time_filter'] = ''
+    gvars['message'] = 'default_message'
+    context['gvars'] = gvars
+    return context
+
+# -----------------------------------------------------------------------------
+
+@app.teardown_request
+def close_connection(error=None):
+    # close any opened database connections
+    if hasattr(g, '_pdb'):
+        g._pdb.close()
+    if hasattr(g, '_mdb'):
+        g._mdb.close()
+
+# -----------------------------------------------------------------------------
+
+@app.route('/', methods=['GET'])
+def main():
+
+    # GET options
+    opt_rank = request.args.get('rank', 'time') # rank type. tags|pid|time|random
+    opt_tags = request.args.get('tags', 'all')  # tags to rank by if opt_rank == 'tag'
+    opt_pid = request.args.get('pid', None)  # pid to find nearest neighbors to
+    opt_time_filter = request.args.get('time_filter', '') # number of days to filter by
+    opt_skip_have = request.args.get('skip_have', 'no') # hide papers we already have?
+
+    # rank papers: by tags, by time, by random
+    if opt_rank in ['tags', 'pid']:
+        pids, scores = svm_rank(tags=opt_tags, pid=opt_pid)
+    elif opt_rank == 'time':
+        pids, scores = time_rank()
+    elif opt_rank == 'random':
+        pids, scores = random_rank()
+    else:
+        raise ValueError("opt_rank %s is not a thing" % (opt_rank, ))
+
+    # filter by time
+    if opt_time_filter:
+        pdb = get_papers()
+        tnow = time.time()
+        deltat = int(opt_time_filter)*60*60*24 # allowed time delta in seconds
+        keep = [i for i,pid in enumerate(pids) if (tnow - pdb[pid]['_time']) < deltat]
+        pids, scores = [pids[i] for i in keep], [scores[i] for i in keep]
+
+    # optionally hide papers we already have
+    if opt_skip_have == 'yes':
+        tags_db = get_tags()
+        have = set().union(*tags_db.values())
+        keep = [i for i,pid in enumerate(pids) if pid not in have]
+        pids, scores = [pids[i] for i in keep], [scores[i] for i in keep]
+
+    # crop
+    pids = pids[:min(len(pids), RET_NUM)]
+    papers = render_pids(pids)
+    for i, p  in enumerate(papers):
+        p['weight'] = float(scores[i])
+
+    context = default_context(papers)
+    context['gvars']['rank'] = opt_rank
+    context['gvars']['tags'] = opt_tags
+    context['gvars']['time_filter'] = opt_time_filter
+    return render_template('index.html', **context)
+
+
+@app.route("/search", methods=['GET'])
+def search():
+    q = request.args.get('q', '') # get the search request
+    if not q:
+        return redirect(url_for('main')) # if someone just hits enter with empty field
+    qs = q.lower().strip().split() # split by spaces
+
+    match = lambda s: sum(s.lower().count(qp) for qp in qs)
+    pairs = []
+    pdb = get_papers()
+    for pid, p in pdb.items():
+        score = 0.0
+        score += 5.0 * match(' '.join([a['name'] for a in p['authors']]))
+        score += 10.0 * match(p['title'])
+        score += 1.0 * match(p['summary'])
+        if score > 0:
+            pairs.append((score, pid))
+
+    pairs.sort(reverse=True)
+    pids = [p[1] for p in pairs]
+    pids = pids[:min(RET_NUM, len(pids))] # crop if needed
+
+    papers = render_pids(pids)
+    for i, p in enumerate(papers):
+        p['weight'] = pairs[i][0]
+
+    context = default_context(papers)
+    context['gvars']['search_query'] = q
+    return render_template('index.html', **context)
+
+
+@app.route('/add/<pid>/<tag>')
+def add(pid=None, tag=None):
+    user = 'root'
+    with CompressedSqliteDict('dict.db', tablename='tags', flag='c') as dict_db:
+
+        # create the user if we don't know about them yet with an empty library
+        if not user in dict_db:
+            dict_db[user] = {}
+
+        # fetch the user library object
+        d = dict_db[user]
+
+        # add the paper to the tag
+        if tag not in d:
+            d[tag] = set()
+        d[tag].add(pid)
+
+        # write back to database
+        dict_db[user] = d
+        dict_db.commit()
+
+    print("added paper %s to tag %s for user %s" % (pid, tag, user))
+    return "ok: " + str(d) # return back the user library for debugging atm
+
+@app.route('/del/<tag>')
+def delete_tag(tag=None):
+    user = 'root'
+    with CompressedSqliteDict('dict.db', tablename='tags', flag='c') as dict_db:
+
+        if user not in dict_db:
+            return "user does not have a library"
+
+        d = dict_db[user]
+
+        if tag not in d:
+            return "user does not have this tag"
+
+        # delete the tag
+        del d[tag]
+
+        # write back to database
+        dict_db[user] = d
+        dict_db.commit()
+
+    print("deleted tag %s for user %s" % (tag, user))
+    return "ok: " + str(d) # return back the user library for debugging atm
@@ -0,0 +1,86 @@
+'use strict';
+
+const UTag = props => {
+    const tag_name = props.tag;
+    const turl = "/?rank=tags&tags=" + tag_name;
+    return (
+        <div class='rel_utag'>
+            <a href={turl}>
+                {tag_name}
+            </a>
+        </div>
+    )
+}
+
+const Paper = props => {
+    const p = props.paper;
+    const adder = () => fetch("/add/" + p.id + "/" + prompt("tag name:"))
+                        .then(response => console.log(response.text()));
+    const utags = p.utags.map((utxt, ix) => <UTag key={ix} tag={utxt} />);
+    const similar_url = "/?rank=tags&pid=" + p.id;
+    return (
+    <div class='rel_paper'>
+        <div class="rel_add" onClick={adder}>+</div>
+        <div class="rel_score">{p.weight.toFixed(2)}</div>
+        <div class='rel_title'><a href={'http://arxiv.org/abs/' + p.id}>{p.title}</a></div>
+        <div class='rel_authors'>{p.authors}</div>
+        <div class="rel_time">{p.time}</div>
+        <div class='rel_tags'>{p.tags}</div>
+        <div class='rel_utags'>{utags}</div>
+        <div class='rel_abs'>{p.summary}</div>
+        <div class='rel_more'><a href={similar_url}>similar</a></div>
+    </div>
+    )
+}
+
+const PaperList = props => {
+    const lst = props.papers;
+    const plst = lst.map((jpaper, ix) => <Paper key={ix} paper={jpaper} />);
+    return (
+        <div>
+            <div id="paperList" class="rel_papers">
+                {plst}
+            </div>
+        </div>
+    )
+}
+
+const Tag = props => {
+    const t = props.tag;
+    const turl = "/?rank=tags&tags=" + t.name;
+    return (
+        <div class='rel_utag'>
+            <a href={turl}>
+                {t.n} {t.name}
+            </a>
+        </div>
+    )
+}
+
+const TagList = props => {
+    const lst = props.tags;
+    const tlst = lst.map((jtag, ix) => <Tag key={ix} tag={jtag} />);
+    const deleter = () => fetch("/del/" + prompt("delete tag name:"))
+                          .then(response => console.log(response.text()));
+    return (
+        <div>
+            <div class="rel_del" onClick={deleter}>-</div>
+            <div id="tagList" class="rel_utags">
+                {tlst}
+            </div>
+        </div>
+    )
+}
+
+const Opts = props => {
+    const g = props.gvars;
+    return (
+        <div>
+             time filter (days): <input type="text" value={g.time_filter} />
+        </div>
+    )
+}
+
+ReactDOM.render(<PaperList papers={papers} />, document.getElementById('wrap'))
+ReactDOM.render(<TagList tags={tags} />, document.getElementById('tagwrap'))
+//ReactDOM.render(<Opts gvars={gvars} />, document.getElementById('cbox'))
@@ -0,0 +1,125 @@
+body {
+    margin: 0;
+    padding: 0;
+    font-family: sans-serif;
+    line-height: 1.2;
+}
+
+#header {
+    height: 24px;
+    background-color: #844;
+    color: white;
+    padding-top: 8px;
+    padding-left: 10px;
+    border-bottom: 1px solid #622;
+}
+#controls {
+    margin: 10px 40px 0 40px;
+    background-color: #eee;
+}
+#wrap {
+    margin: 10px 40px 0 40px;
+    font-size: 18px;
+}
+#tagwrap {
+    margin: 10px 40px 0 40px;
+    font-size: 18px;
+}
+
+.rel_title {
+    display: inline-block;
+}
+
+.rel_title a {
+    color: #844;
+}
+
+.rel_paper {
+    margin-bottom: 10px;
+    padding: 10px;
+    background-color: #eee;
+    border-radius: 5px;
+}
+
+.rel_score {
+    display: inline-block;
+    margin-right: 6px;
+    font-weight: bold;
+}
+.rel_authors {
+    font-style: italic;
+    font-size: 16px;
+}
+.rel_time {
+    color: #050;
+    display: inline-block;
+    font-size: 16px;
+}
+.rel_abs {
+    color: #333;
+    font-size: 14px;
+}
+.rel_tags {
+    color: #009;
+    font-size: 16px;
+    display: inline-block;
+    margin-left: 5px;
+}
+.rel_utag {
+    background-color: #009;
+    color: #fff;
+    font-size: 16px;
+    display: inline-block;
+    margin-right: 5px;
+    padding: 0 4px 0 4px;
+    border-radius: 4px;
+}
+.rel_add {
+    position: absolute;
+    width: 26px;
+    height: 26px;
+    background-color: #55f;
+    color: white;
+    left: 7px;
+    cursor: pointer;
+    text-align: center;
+    border-radius: 3px;
+}
+.rel_del {
+    position: absolute;
+    width: 22px;
+    height: 22px;
+    background-color: #f55;
+    color: white;
+    left: 7px;
+    cursor: pointer;
+    text-align: center;
+    border-radius: 3px;
+}
+.rel_utag a {
+    color: white;
+    text-decoration: none;
+}
+.rel more {
+    font-size: 10px;
+}
+#sbox {
+    width: 100%;
+}
+#qfield {
+    width: 100%;
+    box-sizing: border-box;
+
+    height: 40px;
+    font-size: 22px;
+
+    border: solid 1px #999;
+    color: "#333";
+
+    padding-left: 50px;
+
+    background-image: url('/static/search.png');
+    background-repeat: no-repeat;
+    background-position: left center;
+    outline: 0;
+  }
@@ -0,0 +1,71 @@
+<!DOCTYPE HTML>
+<html>
+
+<head>
+<!-- meta info -->
+<meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
+<title>arxiv-sanity</title>
+<!-- CSS -->
+<link rel="stylesheet" type="text/css" href="{{ url_for('static', filename='style.css') }}">
+<!-- Favicon -->
+<link rel="shortcut icon" type="image/png" href="{{ url_for('static', filename='favicon.png') }}" />
+
+<script>
+var papers = {{ papers | tojson }};
+var tags = {{ tags | tojson }};
+var gvars = {{ gvars | tojson }};
+</script>
+
+</head>
+
+<body>
+
+<div id="header">
+    arxiv-sanity
+</div>
+
+<div id="controls">
+    <div>
+        <div id="sbox">
+            <form action="/search" method="get">
+                <input name="q" type="text" id="qfield" value="{{ gvars.search_query }}">
+            </form>
+        </div>
+        <div id="cbox">
+            <!-- 
+                opt_rank = request.args.get('rank', 'tags') # rank type. tags|pid|time|random
+                opt_tags = request.args.get('tags', 'all')  # tags to rank by if opt_rank == 'tag'
+                opt_pid = request.args.get('pid', None)  # pid to find nearest neighbors to
+                opt_time_filter = request.args.get('time_filter', '') # number of days to filter by
+                opt_skip_have = request.args.get('skip_have', 'no') # hide papers we already have? 
+            -->
+            <a href="/?rank=tags&time_filter=7&skip_have=yes">recommend over last week</a><br>
+            <a href="/?rank=tags&time_filter=3&skip_have=yes">recommend over last 3 days</a><br>
+            <a href="/?rank=time">recent</a><br>
+            <a href="/?rank=random&time_filter=7">random last week</a><br>
+        </div>
+    </div>
+    <div>
+        
+    </div>
+    <div id="message">
+        {{gvars.message}}
+    </div>
+</div>
+
+<div id="tagwrap">
+</div>
+
+<div id="wrap">
+</div>
+
+<!-- React -->
+<script src="https://unpkg.com/react@16/umd/react.production.min.js" crossorigin></script>
+<script src="https://unpkg.com/react-dom@16/umd/react-dom.production.min.js" crossorigin></script>
+<!-- Babel for displaying JSX -->
+<script src="https://unpkg.com/babel-standalone@6/babel.min.js"></script>
+<!-- Load our React component -->
+<script src="{{ url_for('static', filename='paper_list.js') }}" type="text/babel"></script>
+
+</body>
+</html>