minor logging changes

This commit is contained in:
Andrej Karpathy
2021-11-12 22:49:29 -08:00
parent 158ced647b
commit 7cbb90a480
2 changed files with 12 additions and 11 deletions
+11 -10
View File
@@ -15,18 +15,19 @@ from aslite.db import get_papers_db, get_metas_db
if __name__ == '__main__': if __name__ == '__main__':
logging.basicConfig(level=logging.INFO, format='%(name)s %(levelname)s %(asctime)s %(message)s', datefmt='%m/%d/%Y %I:%M:%S %p')
parser = argparse.ArgumentParser(description='Arxiv Daemon') parser = argparse.ArgumentParser(description='Arxiv Daemon')
parser.add_argument('-n', '--num', type=int, default=100, help='how many papers to fetch') parser.add_argument('-n', '--num', type=int, default=100, help='how many papers to fetch')
parser.add_argument('-s', '--start', type=int, default=0, help='start at what index') parser.add_argument('-s', '--start', type=int, default=0, help='start at what index')
args = parser.parse_args() args = parser.parse_args()
print(args) print(args)
logging.basicConfig(level=logging.INFO, format='%(name)s %(levelname)s %(asctime)s %(message)s', datefmt='%m/%d/%Y %I:%M:%S %p')
# query string of papers to look for # query string of papers to look for
q = 'cat:cs.CV+OR+cat:cs.LG+OR+cat:cs.CL+OR+cat:cs.AI+OR+cat:cs.NE+OR+cat:cs.RO' q = 'cat:cs.CV+OR+cat:cs.LG+OR+cat:cs.CL+OR+cat:cs.AI+OR+cat:cs.NE+OR+cat:cs.RO'
pdb = get_papers_db(flag='c', autocommit=True) pdb = get_papers_db(flag='c')
mdb = get_metas_db(flag='c', autocommit=True) mdb = get_metas_db(flag='c')
prevn = len(pdb) prevn = len(pdb)
def store(p): def store(p):
@@ -47,11 +48,11 @@ if __name__ == '__main__':
if len(papers) == 100: if len(papers) == 100:
break # otherwise we have to try again break # otherwise we have to try again
except Exception as e: except Exception as e:
print(e) logging.warning(e)
print("will try again in a bit...") logging.warning("will try again in a bit...")
ntried += 1 ntried += 1
if ntried > 1000: if ntried > 1000:
print("ok we tried 1,000 times, something is srsly wrong. exitting.") logging.error("ok we tried 1,000 times, something is srsly wrong. exitting.")
sys.exit() sys.exit()
time.sleep(2 + random.uniform(0, 4)) time.sleep(2 + random.uniform(0, 4))
@@ -73,10 +74,10 @@ if __name__ == '__main__':
nnew += 1 nnew += 1
prevn = len(pdb) prevn = len(pdb)
# print some diagnostic information # some diagnostic information on how things are coming along
print(papers[0]['_time_str']) logging.info(papers[0]['_time_str'])
print("k=%d, out of %d: had %d, replaced %d, new %d. now have: %d" % logging.info("k=%d, out of %d: had %d, replaced %d, new %d. now have: %d" %
(k, len(papers), nhad, nreplace, nnew, prevn)) (k, len(papers), nhad, nreplace, nnew, prevn))
# zzz # zzz
time.sleep(2 + random.uniform(0, 4)) time.sleep(1 + random.uniform(0, 3))
+1 -1
View File
@@ -17,7 +17,7 @@ def get_response(search_query, start_index=0):
add_url = 'search_query=%s&sortBy=lastUpdatedDate&start=%d&max_results=100' % (search_query, start_index) add_url = 'search_query=%s&sortBy=lastUpdatedDate&start=%d&max_results=100' % (search_query, start_index)
#add_url = 'search_query=%s&sortBy=submittedDate&start=%d&max_results=100' % (search_query, start_index) #add_url = 'search_query=%s&sortBy=submittedDate&start=%d&max_results=100' % (search_query, start_index)
search_query = base_url + add_url search_query = base_url + add_url
logger.info(f"Searching arxiv for {search_query}") logger.debug(f"Searching arxiv for {search_query}")
with urllib.request.urlopen(search_query) as url: with urllib.request.urlopen(search_query) as url:
response = url.read() response = url.read()