From 3d5bbb9851a4443b3641e4e2614f2ae1b23e488a Mon Sep 17 00:00:00 2001 From: Andrej Karpathy Date: Fri, 26 Nov 2021 09:51:48 -0800 Subject: [PATCH] optimization: read only metas if that's all that we need to complete the request. that said, i expected this to run much faster and it doesn't. i think the way i'm using sqlites here is not exactly efficient or appropriate and i have to re-think the database schema. e.g. getting random papers over the last 7 days should be almost instantaneous, but currently this request takes like 3 seconds. have to better understand why this is happening --- serve.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/serve.py b/serve.py index eec33d8..fcd1556 100644 --- a/serve.py +++ b/serve.py @@ -76,8 +76,8 @@ def render_pid(pid): ) def random_rank(): - pdb = get_papers() - pids = list(pdb.keys()) + mdb = get_metas() + pids = list(mdb.keys()) shuffle(pids) scores = [0 for _ in pids] return pids, scores @@ -186,16 +186,16 @@ def main(): # filter by time if opt_time_filter: - pdb = get_papers() + mdb = get_metas() tnow = time.time() deltat = int(opt_time_filter)*60*60*24 # allowed time delta in seconds - keep = [i for i,pid in enumerate(pids) if (tnow - pdb[pid]['_time']) < deltat] + keep = [i for i,pid in enumerate(pids) if (tnow - mdb[pid]['_time']) < deltat] pids, scores = [pids[i] for i in keep], [scores[i] for i in keep] # optionally hide papers we already have if opt_skip_have == 'yes': - tags_db = get_tags() - have = set().union(*tags_db.values()) + tags = get_tags() + have = set().union(*tags.values()) keep = [i for i,pid in enumerate(pids) if pid not in have] pids, scores = [pids[i] for i in keep], [scores[i] for i in keep]