optimization: read only metas if that's all that we need to complete the request. that said, i expected this to run much faster and it doesn't. i think the way i'm using sqlites here is not exactly efficient or appropriate and i have to re-think the database schema. e.g. getting random papers over the last 7 days should be almost instantaneous, but currently this request takes like 3 seconds. have to better understand why this is happening

This commit is contained in:
Andrej Karpathy
2021-11-26 09:51:48 -08:00
parent b283c85c72
commit 3d5bbb9851
+6 -6
View File
@@ -76,8 +76,8 @@ def render_pid(pid):
) )
def random_rank(): def random_rank():
pdb = get_papers() mdb = get_metas()
pids = list(pdb.keys()) pids = list(mdb.keys())
shuffle(pids) shuffle(pids)
scores = [0 for _ in pids] scores = [0 for _ in pids]
return pids, scores return pids, scores
@@ -186,16 +186,16 @@ def main():
# filter by time # filter by time
if opt_time_filter: if opt_time_filter:
pdb = get_papers() mdb = get_metas()
tnow = time.time() tnow = time.time()
deltat = int(opt_time_filter)*60*60*24 # allowed time delta in seconds deltat = int(opt_time_filter)*60*60*24 # allowed time delta in seconds
keep = [i for i,pid in enumerate(pids) if (tnow - pdb[pid]['_time']) < deltat] keep = [i for i,pid in enumerate(pids) if (tnow - mdb[pid]['_time']) < deltat]
pids, scores = [pids[i] for i in keep], [scores[i] for i in keep] pids, scores = [pids[i] for i in keep], [scores[i] for i in keep]
# optionally hide papers we already have # optionally hide papers we already have
if opt_skip_have == 'yes': if opt_skip_have == 'yes':
tags_db = get_tags() tags = get_tags()
have = set().union(*tags_db.values()) have = set().union(*tags.values())
keep = [i for i,pid in enumerate(pids) if pid not in have] keep = [i for i,pid in enumerate(pids) if pid not in have]
pids, scores = [pids[i] for i in keep], [scores[i] for i in keep] pids, scores = [pids[i] for i in keep], [scores[i] for i in keep]