speed up time filtering by 20X by loading all of metas into memory once instead of querying it item by item in a massive loop

This commit is contained in:
Andrej Karpathy
2021-11-26 10:22:52 -08:00
parent 3d5bbb9851
commit 3e2d1248d8
+2 -1
View File
@@ -187,9 +187,10 @@ def main():
# filter by time # filter by time
if opt_time_filter: if opt_time_filter:
mdb = get_metas() mdb = get_metas()
kv = {k:v for k,v in mdb.items()} # read all of metas to memory at once, for efficiency
tnow = time.time() tnow = time.time()
deltat = int(opt_time_filter)*60*60*24 # allowed time delta in seconds deltat = int(opt_time_filter)*60*60*24 # allowed time delta in seconds
keep = [i for i,pid in enumerate(pids) if (tnow - mdb[pid]['_time']) < deltat] keep = [i for i,pid in enumerate(pids) if (tnow - kv[pid]['_time']) < deltat]
pids, scores = [pids[i] for i in keep], [scores[i] for i in keep] pids, scores = [pids[i] for i in keep], [scores[i] for i in keep]
# optionally hide papers we already have # optionally hide papers we already have