when writing features do it safely and atomically
This commit is contained in:
+69
-3
@@ -5,11 +5,78 @@ Any of the file system I/O and the associated settings are in this single file.
|
|||||||
"""
|
"""
|
||||||
|
|
||||||
import os
|
import os
|
||||||
import sqlite3, zlib, pickle
|
import sqlite3, zlib, pickle, tempfile
|
||||||
from sqlitedict import SqliteDict
|
from sqlitedict import SqliteDict
|
||||||
|
from contextlib import contextmanager
|
||||||
|
|
||||||
|
# -----------------------------------------------------------------------------
|
||||||
|
# global configuration
|
||||||
|
|
||||||
DATA_DIR = 'data'
|
DATA_DIR = 'data'
|
||||||
|
|
||||||
|
# -----------------------------------------------------------------------------
|
||||||
|
# utilities for safe writing of a pickle file
|
||||||
|
|
||||||
|
# Context managers for atomic writes courtesy of
|
||||||
|
# http://stackoverflow.com/questions/2333872/atomic-writing-to-file-with-python
|
||||||
|
@contextmanager
|
||||||
|
def _tempfile(*args, **kws):
|
||||||
|
""" Context for temporary file.
|
||||||
|
Will find a free temporary filename upon entering
|
||||||
|
and will try to delete the file on leaving
|
||||||
|
Parameters
|
||||||
|
----------
|
||||||
|
suffix : string
|
||||||
|
optional file suffix
|
||||||
|
"""
|
||||||
|
|
||||||
|
fd, name = tempfile.mkstemp(*args, **kws)
|
||||||
|
os.close(fd)
|
||||||
|
try:
|
||||||
|
yield name
|
||||||
|
finally:
|
||||||
|
try:
|
||||||
|
os.remove(name)
|
||||||
|
except OSError as e:
|
||||||
|
if e.errno == 2:
|
||||||
|
pass
|
||||||
|
else:
|
||||||
|
raise e
|
||||||
|
|
||||||
|
|
||||||
|
@contextmanager
|
||||||
|
def open_atomic(filepath, *args, **kwargs):
|
||||||
|
""" Open temporary file object that atomically moves to destination upon
|
||||||
|
exiting.
|
||||||
|
Allows reading and writing to and from the same filename.
|
||||||
|
Parameters
|
||||||
|
----------
|
||||||
|
filepath : string
|
||||||
|
the file path to be opened
|
||||||
|
fsync : bool
|
||||||
|
whether to force write the file to disk
|
||||||
|
kwargs : mixed
|
||||||
|
Any valid keyword arguments for :code:`open`
|
||||||
|
"""
|
||||||
|
fsync = kwargs.pop('fsync', False)
|
||||||
|
|
||||||
|
with _tempfile(dir=os.path.dirname(filepath)) as tmppath:
|
||||||
|
with open(tmppath, *args, **kwargs) as f:
|
||||||
|
yield f
|
||||||
|
if fsync:
|
||||||
|
f.flush()
|
||||||
|
os.fsync(f.fileno())
|
||||||
|
os.rename(tmppath, filepath)
|
||||||
|
|
||||||
|
def safe_pickle_dump(obj, fname):
|
||||||
|
"""
|
||||||
|
prevents a case where one process could be writing a pickle file
|
||||||
|
while another process is reading it, causing a crash. the solution
|
||||||
|
is to write the pickle file to a temporary file and then move it.
|
||||||
|
"""
|
||||||
|
with open_atomic(fname, 'wb') as f:
|
||||||
|
pickle.dump(obj, f, -1) # -1 specifies highest binary protocol
|
||||||
|
|
||||||
# -----------------------------------------------------------------------------
|
# -----------------------------------------------------------------------------
|
||||||
|
|
||||||
class CompressedSqliteDict(SqliteDict):
|
class CompressedSqliteDict(SqliteDict):
|
||||||
@@ -62,8 +129,7 @@ FEATURES_FILE = os.path.join(DATA_DIR, 'features.p')
|
|||||||
|
|
||||||
def save_features(features):
|
def save_features(features):
|
||||||
""" takes the features dict and save it to disk in a simple pickle file """
|
""" takes the features dict and save it to disk in a simple pickle file """
|
||||||
with open(FEATURES_FILE, 'wb') as f:
|
safe_pickle_dump(features, FEATURES_FILE)
|
||||||
pickle.dump(features, f)
|
|
||||||
|
|
||||||
def load_features():
|
def load_features():
|
||||||
""" loads the features dict from disk """
|
""" loads the features dict from disk """
|
||||||
|
|||||||
@@ -2,7 +2,6 @@
|
|||||||
Extracts tfidf features from all paper abstracts and saves them to disk.
|
Extracts tfidf features from all paper abstracts and saves them to disk.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
import pickle
|
|
||||||
import argparse
|
import argparse
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
|||||||
Reference in New Issue
Block a user