when writing features do it safely and atomically
This commit is contained in:
+69
-3
@@ -5,11 +5,78 @@ Any of the file system I/O and the associated settings are in this single file.
|
||||
"""
|
||||
|
||||
import os
|
||||
import sqlite3, zlib, pickle
|
||||
import sqlite3, zlib, pickle, tempfile
|
||||
from sqlitedict import SqliteDict
|
||||
from contextlib import contextmanager
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# global configuration
|
||||
|
||||
DATA_DIR = 'data'
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# utilities for safe writing of a pickle file
|
||||
|
||||
# Context managers for atomic writes courtesy of
|
||||
# http://stackoverflow.com/questions/2333872/atomic-writing-to-file-with-python
|
||||
@contextmanager
|
||||
def _tempfile(*args, **kws):
|
||||
""" Context for temporary file.
|
||||
Will find a free temporary filename upon entering
|
||||
and will try to delete the file on leaving
|
||||
Parameters
|
||||
----------
|
||||
suffix : string
|
||||
optional file suffix
|
||||
"""
|
||||
|
||||
fd, name = tempfile.mkstemp(*args, **kws)
|
||||
os.close(fd)
|
||||
try:
|
||||
yield name
|
||||
finally:
|
||||
try:
|
||||
os.remove(name)
|
||||
except OSError as e:
|
||||
if e.errno == 2:
|
||||
pass
|
||||
else:
|
||||
raise e
|
||||
|
||||
|
||||
@contextmanager
|
||||
def open_atomic(filepath, *args, **kwargs):
|
||||
""" Open temporary file object that atomically moves to destination upon
|
||||
exiting.
|
||||
Allows reading and writing to and from the same filename.
|
||||
Parameters
|
||||
----------
|
||||
filepath : string
|
||||
the file path to be opened
|
||||
fsync : bool
|
||||
whether to force write the file to disk
|
||||
kwargs : mixed
|
||||
Any valid keyword arguments for :code:`open`
|
||||
"""
|
||||
fsync = kwargs.pop('fsync', False)
|
||||
|
||||
with _tempfile(dir=os.path.dirname(filepath)) as tmppath:
|
||||
with open(tmppath, *args, **kwargs) as f:
|
||||
yield f
|
||||
if fsync:
|
||||
f.flush()
|
||||
os.fsync(f.fileno())
|
||||
os.rename(tmppath, filepath)
|
||||
|
||||
def safe_pickle_dump(obj, fname):
|
||||
"""
|
||||
prevents a case where one process could be writing a pickle file
|
||||
while another process is reading it, causing a crash. the solution
|
||||
is to write the pickle file to a temporary file and then move it.
|
||||
"""
|
||||
with open_atomic(fname, 'wb') as f:
|
||||
pickle.dump(obj, f, -1) # -1 specifies highest binary protocol
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
class CompressedSqliteDict(SqliteDict):
|
||||
@@ -62,8 +129,7 @@ FEATURES_FILE = os.path.join(DATA_DIR, 'features.p')
|
||||
|
||||
def save_features(features):
|
||||
""" takes the features dict and save it to disk in a simple pickle file """
|
||||
with open(FEATURES_FILE, 'wb') as f:
|
||||
pickle.dump(features, f)
|
||||
safe_pickle_dump(features, FEATURES_FILE)
|
||||
|
||||
def load_features():
|
||||
""" loads the features dict from disk """
|
||||
|
||||
@@ -2,7 +2,6 @@
|
||||
Extracts tfidf features from all paper abstracts and saves them to disk.
|
||||
"""
|
||||
|
||||
import pickle
|
||||
import argparse
|
||||
|
||||
import numpy as np
|
||||
|
||||
Reference in New Issue
Block a user