minor bugfix in persistentData

This commit is contained in:
cimatosa 2016-05-18 15:29:08 +02:00
parent 49cffb1ad4
commit bbbf46b10e
3 changed files with 156 additions and 43 deletions

View file

@ -107,7 +107,7 @@ class PersistentDataStructure(object):
if not make_dir:
full_name = join(self._dirname, fname)
if not os.path.exists(full_name):
open(full_name, 'a').close()
#open(full_name, 'a').close()
return fname
else:
full_name = join(self._dirname, '__'+fname)
@ -176,8 +176,6 @@ class PersistentDataStructure(object):
try:
if self.verbose > 1:
print("sub_data_keys:", self.sub_data_keys)
for key in self:
t, v = self.get_value_and_value_type(key)
if t == TYPE_SUB:
@ -322,11 +320,10 @@ class PersistentDataStructure(object):
"""
self.need_open()
if overwrite:
if (overwrite) and (key in self.db):
if self.verbose > 1:
print("overwrite True: del key")
if key in self.db:
self.__delitem__(key)
self.__delitem__(key)
if not key in self.db:
if _NP and isinstance(value, np.ndarray):
@ -350,6 +347,8 @@ class PersistentDataStructure(object):
def _setNPA(self, key, nparray):
d = {'fname': self._new_rand_file_name(end='.npy'),
'magic': MAGIC_SIGN_NPARRAY}
if self.verbose > 1:
print("set NPA (key)", key, " (fname)", d['fname'])
self.db[key] = d
self.db.commit()
@ -364,6 +363,8 @@ class PersistentDataStructure(object):
d = self.db[key]
assert d['magic'] == MAGIC_SIGN_NPARRAY
fname = d['fname']
if self.verbose > 1:
print("load NPA (key)", key, " (fname)", fname)
return self._loadNPA(fname)
@ -383,9 +384,13 @@ class PersistentDataStructure(object):
if not key in self.db:
d = {'name': self._new_rand_file_name(make_dir=True),
'magic': MAGIC_SIGN}
if self.verbose > 1:
print("newSubData (key)", key, " (name)", d['name'])
self.db[key] = d
self.db.commit()
return PersistentDataStructure(name = d['name'], path = os.path.join(self._dirname) , verbose = self.verbose)
return self.__class__(name = d['name'], path = os.path.join(self._dirname) , verbose = self.verbose)
else:
raise RuntimeError("can NOT create new SubData, key already found!")
@ -402,7 +407,7 @@ class PersistentDataStructure(object):
if self.verbose > 1:
print("return subData stored as key", key, "using name", sub_db_name)
return PersistentDataStructure(name = sub_db_name, path = os.path.join(self._dirname) , verbose = self.verbose)
return self.__class__(name = sub_db_name, path = os.path.join(self._dirname) , verbose = self.verbose)
elif t == TYPE_NPA:
if self.verbose > 1:
print("return nparray value")
@ -419,7 +424,7 @@ class PersistentDataStructure(object):
print("getData key does NOT exists -> create subData")
return self.newSubData(key)
def setDataFromSubData(self, key, subData):
def setDataFromSubData(self, key, subData, overwrite=False):
"""
set an entry of the PDS with data from an other PDS
@ -427,19 +432,31 @@ class PersistentDataStructure(object):
and rename them
"""
self.need_open()
if self.is_subdata(key): # check if key points to existing PDS
with self[key] as pds: #
name = pds._name # remember its name
dir_name = pds._dirname # and the directory where it's in
pds.erase() # remove the existing subData from hdd
else:
with self.newSubData(key) as new_sub_data: # create a new subData
name = new_sub_data._name # and remember name and directory
dir_name = new_sub_data._dirname
new_sub_data.erase()
shutil.copytree(src=subData._dirname, dst=dir_name)
os.rename(src=os.path.join(dir_name, subData._name+'.db'), dst=os.path.join(dir_name, name+'.db'))
if key in self.db:
if overwrite:
if self.verbose > 1:
print("overwrite True: del key")
self.__delitem__(key)
else:
raise RuntimeError("can NOT create new SubData from Data, key already found!")
d = {'name': self._new_rand_file_name(make_dir=True),
'magic': MAGIC_SIGN}
self.db[key] = d
self.db.commit()
if self.verbose > 1:
print("")
print("setDataFromSubData: orig SubData (name)", subData._name, "new SubData (key)", key, " (name)", d['name'])
dest_dir = os.path.join(self._dirname, '__'+d['name'])
os.removedirs(dest_dir)
shutil.copytree(src=subData._dirname, dst=dest_dir)
os.rename(src=os.path.join(dest_dir, subData._name+'.db'),
dst=os.path.join(dest_dir, d['name']+'.db'))
def mergeOtherPDS(self, other_db_name, other_db_path = './', update = 'error', status_interval=5):
"""
@ -466,9 +483,9 @@ class PersistentDataStructure(object):
else:
PB = progress.ProgressBarFancy
with PersistentDataStructure(name = other_db_name,
path = other_db_path,
verbose = self.verbose) as otherData:
with self.__class__(name = other_db_name,
path = other_db_path,
verbose = self.verbose) as otherData:
c = progress.UnsignedIntValue(val=0)
m = progress.UnsignedIntValue(val=len(otherData))
@ -496,7 +513,7 @@ class PersistentDataStructure(object):
self[k] = value
transfered += 1
finally:
if isinstance(value, PersistentDataStructure):
if isinstance(value, self.__class__):
value.close()
with c.get_lock():
@ -531,8 +548,8 @@ class PersistentDataStructure(object):
# implements '[]' operator setter
def __setitem__(self, key, value):
if isinstance(value, PersistentDataStructure):
self.setDataFromSubData(key, value)
if isinstance(value, self.__class__):
self.setDataFromSubData(key, value, overwrite=True)
else:
self.setData(key, value, overwrite=True)

View file

@ -1006,7 +1006,10 @@ class ProgressBarFancy(Progress):
if width == 'auto':
width = get_terminal_width()
# deduce relative progress
p = count_value / max_count_value
try:
p = count_value / max_count_value
except ZeroDivisionError:
p = 1
if p < 1:
ps = " {:.1%} ".format(p)
else:

View file

@ -63,6 +63,7 @@ def test_pd():
assert data.getData(key) == 1
finally:
print()
data.erase()
def test_pd_bytes():
@ -91,8 +92,10 @@ def test_pd_bytes():
assert base_data[b2] == t1
finally:
print()
base_data.erase()
def test_directory_removal():
try:
with PDS(name='data', verbose=VERBOSE) as data:
@ -152,6 +155,8 @@ def test_mp_read_from_sqlite():
def test_from_existing_sub_data():
print()
print('test_from_existing_sub_data')
t1 = (3.4, 4.5, 5.6, 6.7, 7.8, 8.9)
t2 = (3.4, 4.5, 5.6, 6.7, 7.8, 8.9, 9,1)
@ -205,6 +210,7 @@ def test_from_existing_sub_data():
assert sub_data[200] == "sub2:t2"
finally:
print()
base_data.erase()
def test_remove_sub_data_and_check_len():
@ -384,7 +390,7 @@ def test_merge():
with PDS(name='d2', verbose=VERBOSE) as d2:
d2['2k1'] = 1
d2.mergeOtherPDS(other_db_name = "d1")
d2.mergeOtherPDS(other_db_name = "d1", status_interval=0)
try:
with PDS(name='d2', verbose=VERBOSE) as d2:
@ -411,7 +417,7 @@ def test_merge():
try:
with PDS(name='d2', verbose=VERBOSE) as d2:
d2.mergeOtherPDS(other_db_name = "d1", update='error')
d2.mergeOtherPDS(other_db_name = "d1", update='error', status_interval=0)
except KeyError as e:
print(e)
print("this is ok!")
@ -419,12 +425,12 @@ def test_merge():
with PDS(name='d2', verbose=VERBOSE) as d2:
d2['k1'] = 'k1'
d2.mergeOtherPDS(other_db_name = "d1", update='ignore')
d2.mergeOtherPDS(other_db_name = "d1", update='ignore', status_interval=0)
assert d2['k1'] == 'k1'
with PDS(name='d2', verbose=VERBOSE) as d2:
d2['k1'] = 'k1'
d2.mergeOtherPDS(other_db_name = "d1", update='update')
d2.mergeOtherPDS(other_db_name = "d1", update='update', status_interval=0)
assert d2['k1'] == 1
finally:
@ -433,19 +439,106 @@ def test_merge():
with PDS(name='d2', verbose=VERBOSE) as d2:
d2.erase()
def test_merge_fname_conflict():
class PDS_det_fname(PDS):
def newNPA(self, key, nparray):
d = {'fname': 'det_fname.npy',
'magic': pd.MAGIC_SIGN_NPARRAY}
self.db[key] = d
self.db.commit()
full_name = os.path.join(self._dirname, d['fname'])
np.save(full_name, nparray)
return True
def newSubData(self, key):
self.need_open()
dirname = 'subDB'
i = 2
while os.path.exists(os.path.join(self._dirname, '__'+dirname)):
dirname = 'subDB{}'.format(i)
i += 1
print(self._dirname, dirname)
full_name = os.path.join(self._dirname, '__'+dirname)
os.mkdir(full_name)
if not key in self.db:
d = {'name': dirname,
'magic': pd.MAGIC_SIGN}
self.db[key] = d
self.db.commit()
return self.__class__(name = d['name'], path = os.path.join(self._dirname) , verbose = self.verbose)
else:
raise RuntimeError("can NOT create new SubData, key already found!")
a = np.random.rand(5)
b = np.random.rand(5)
with PDS_det_fname(name='d1', verbose=VERBOSE) as d1:
d1.newNPA('aa', a)
with d1.newSubData('sub1') as sub1:
sub1['s1'] = 11
sub1.newNPA('a', a)
with PDS_det_fname(name='d2', verbose=VERBOSE) as d2:
d2['2k1'] = 1
d2.newNPA('2aa', b)
with d2.newSubData('sub2') as sub2:
sub2['s2'] = 22
sub2.newNPA('a2', b)
assert np.all(d2['2aa'] == b)
d2.mergeOtherPDS(other_db_name = "d1", update='error', status_interval=0)
assert np.all(d2['2aa'] == b)
try:
assert os.path.exists( os.path.join(d1._path, '__d1', '__subDB'))
assert os.path.exists( os.path.join(d1._path, '__d1', 'det_fname.npy'))
assert os.path.exists( os.path.join(d1._path, '__d2', '__subDB'))
assert os.path.exists( os.path.join(d1._path, '__d2', 'det_fname.npy'))
with PDS_det_fname(name='d2', verbose=VERBOSE) as d2:
assert d2['2k1'] == 1
assert np.all(d2['2aa'] == b)
assert np.all(d2['aa'] == a)
assert d2.has_key('sub1')
with d2['sub1'] as sub1:
assert sub1['s1'] == 11
assert np.all(sub1['a'] == a)
assert d2.has_key('sub2')
with d2['sub2'] as sub2:
assert sub2['s2'] == 22
assert np.all(sub2['a2'] == b)
finally:
with PDS(name='d1', verbose=VERBOSE) as d1:
d1.erase()
with PDS(name='d2', verbose=VERBOSE) as d2:
d2.erase()
if __name__ == "__main__":
# test_pd()
# test_pd_bytes()
# test_directory_removal()
# test_mp_read_from_sqlite()
# test_from_existing_sub_data()
# test_remove_sub_data_and_check_len()
# test_show_stat()
# test_len()
# test_clear()
# test_not_in()
# test_npa()
test_pd()
test_pd_bytes()
test_directory_removal()
test_mp_read_from_sqlite()
test_from_existing_sub_data()
test_remove_sub_data_and_check_len()
test_show_stat()
test_len()
test_clear()
test_not_in()
test_npa()
test_merge()
test_merge_fname_conflict()
pass