minor logging changes
This commit is contained in:
+11
-10
@@ -15,18 +15,19 @@ from aslite.db import get_papers_db, get_metas_db
|
|||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
|
|
||||||
|
logging.basicConfig(level=logging.INFO, format='%(name)s %(levelname)s %(asctime)s %(message)s', datefmt='%m/%d/%Y %I:%M:%S %p')
|
||||||
|
|
||||||
parser = argparse.ArgumentParser(description='Arxiv Daemon')
|
parser = argparse.ArgumentParser(description='Arxiv Daemon')
|
||||||
parser.add_argument('-n', '--num', type=int, default=100, help='how many papers to fetch')
|
parser.add_argument('-n', '--num', type=int, default=100, help='how many papers to fetch')
|
||||||
parser.add_argument('-s', '--start', type=int, default=0, help='start at what index')
|
parser.add_argument('-s', '--start', type=int, default=0, help='start at what index')
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
print(args)
|
print(args)
|
||||||
logging.basicConfig(level=logging.INFO, format='%(name)s %(levelname)s %(asctime)s %(message)s', datefmt='%m/%d/%Y %I:%M:%S %p')
|
|
||||||
|
|
||||||
# query string of papers to look for
|
# query string of papers to look for
|
||||||
q = 'cat:cs.CV+OR+cat:cs.LG+OR+cat:cs.CL+OR+cat:cs.AI+OR+cat:cs.NE+OR+cat:cs.RO'
|
q = 'cat:cs.CV+OR+cat:cs.LG+OR+cat:cs.CL+OR+cat:cs.AI+OR+cat:cs.NE+OR+cat:cs.RO'
|
||||||
|
|
||||||
pdb = get_papers_db(flag='c', autocommit=True)
|
pdb = get_papers_db(flag='c')
|
||||||
mdb = get_metas_db(flag='c', autocommit=True)
|
mdb = get_metas_db(flag='c')
|
||||||
prevn = len(pdb)
|
prevn = len(pdb)
|
||||||
|
|
||||||
def store(p):
|
def store(p):
|
||||||
@@ -47,11 +48,11 @@ if __name__ == '__main__':
|
|||||||
if len(papers) == 100:
|
if len(papers) == 100:
|
||||||
break # otherwise we have to try again
|
break # otherwise we have to try again
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(e)
|
logging.warning(e)
|
||||||
print("will try again in a bit...")
|
logging.warning("will try again in a bit...")
|
||||||
ntried += 1
|
ntried += 1
|
||||||
if ntried > 1000:
|
if ntried > 1000:
|
||||||
print("ok we tried 1,000 times, something is srsly wrong. exitting.")
|
logging.error("ok we tried 1,000 times, something is srsly wrong. exitting.")
|
||||||
sys.exit()
|
sys.exit()
|
||||||
time.sleep(2 + random.uniform(0, 4))
|
time.sleep(2 + random.uniform(0, 4))
|
||||||
|
|
||||||
@@ -73,10 +74,10 @@ if __name__ == '__main__':
|
|||||||
nnew += 1
|
nnew += 1
|
||||||
prevn = len(pdb)
|
prevn = len(pdb)
|
||||||
|
|
||||||
# print some diagnostic information
|
# some diagnostic information on how things are coming along
|
||||||
print(papers[0]['_time_str'])
|
logging.info(papers[0]['_time_str'])
|
||||||
print("k=%d, out of %d: had %d, replaced %d, new %d. now have: %d" %
|
logging.info("k=%d, out of %d: had %d, replaced %d, new %d. now have: %d" %
|
||||||
(k, len(papers), nhad, nreplace, nnew, prevn))
|
(k, len(papers), nhad, nreplace, nnew, prevn))
|
||||||
|
|
||||||
# zzz
|
# zzz
|
||||||
time.sleep(2 + random.uniform(0, 4))
|
time.sleep(1 + random.uniform(0, 3))
|
||||||
|
|||||||
+1
-1
@@ -17,7 +17,7 @@ def get_response(search_query, start_index=0):
|
|||||||
add_url = 'search_query=%s&sortBy=lastUpdatedDate&start=%d&max_results=100' % (search_query, start_index)
|
add_url = 'search_query=%s&sortBy=lastUpdatedDate&start=%d&max_results=100' % (search_query, start_index)
|
||||||
#add_url = 'search_query=%s&sortBy=submittedDate&start=%d&max_results=100' % (search_query, start_index)
|
#add_url = 'search_query=%s&sortBy=submittedDate&start=%d&max_results=100' % (search_query, start_index)
|
||||||
search_query = base_url + add_url
|
search_query = base_url + add_url
|
||||||
logger.info(f"Searching arxiv for {search_query}")
|
logger.debug(f"Searching arxiv for {search_query}")
|
||||||
with urllib.request.urlopen(search_query) as url:
|
with urllib.request.urlopen(search_query) as url:
|
||||||
response = url.read()
|
response = url.read()
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user