whitespace

This commit is contained in:
Valentin Boettcher 2021-12-14 16:42:25 +01:00
parent f5a5710500
commit b47b23ce87

View file

@ -5,11 +5,11 @@ from __future__ import division, print_function
This module intents to generate a binary representation of a python object This module intents to generate a binary representation of a python object
where it is guaranteed that the same objects will result in the same binary where it is guaranteed that the same objects will result in the same binary
representation. representation.
By far not all python objects are supported. Here is the list of supported types By far not all python objects are supported. Here is the list of supported types
- special build-in constants: True, False, None - special build-in constants: True, False, None
- integer - integer
- float (64bit) - float (64bit)
- complex (128bit) - complex (128bit)
- np.ndarray - np.ndarray
@ -17,20 +17,20 @@ from __future__ import division, print_function
- tuple - tuple
- dictionary - dictionary
- namedtuple (new since version 0x80, before it also need the 'classes' lookup when loaded) - namedtuple (new since version 0x80, before it also need the 'classes' lookup when loaded)
For any nested combination of these objects it is also guaranteed that the For any nested combination of these objects it is also guaranteed that the
original objects can be restored without any extra information. original objects can be restored without any extra information.
Additionally Additionally
- 'getstate' (objects that implement __getstate__ and return a state that can be dumped as well) - 'getstate' (objects that implement __getstate__ and return a state that can be dumped as well)
can be dumped. To Restore these objects the load function needs a lookup given by the argument 'classes' can be dumped. To Restore these objects the load function needs a lookup given by the argument 'classes'
which maps the objects class name (obj.__class__.__name__) to the actual class definition (the class object). which maps the objects class name (obj.__class__.__name__) to the actual class definition (the class object).
Of course for these objects the __setstate__ method needs to be implemented. Of course for these objects the __setstate__ method needs to be implemented.
NOTE: the tests pass python2.7 and python 3.4 so far, but it not yet been tested if the binary representation NOTE: the tests pass python2.7 and python 3.4 so far, but it not yet been tested if the binary representation
is the same among different python versions (they should be though!) is the same among different python versions (they should be though!)
""" """
@ -55,7 +55,7 @@ _STR = 0x04
_BYTES = 0x05 # only for python3, as bytes and str are equivalent in python2 _BYTES = 0x05 # only for python3, as bytes and str are equivalent in python2
_INT = 0x06 _INT = 0x06
_TUPLE = 0x07 _TUPLE = 0x07
_NAMEDTUPLE = 0x08 _NAMEDTUPLE = 0x08
_NPARRAY = 0x09 _NPARRAY = 0x09
_LIST = 0x0a _LIST = 0x0a
_GETSTATE = 0x0b # only used when __bfkey__ is not present _GETSTATE = 0x0b # only used when __bfkey__ is not present
@ -93,7 +93,7 @@ def char_eq_byte(ch, b):
def byte_eq_byte(b1, b2): def byte_eq_byte(b1, b2):
return b1 == b2 return b1 == b2
if version_info.major > 2: if version_info.major > 2:
@ -122,12 +122,12 @@ int_to_bytes = lambda i: i.to_bytes(ceil(i.bit_length() / 8), 'big')
bytes_to_int = lambda ba: int.from_bytes(ba, 'big') bytes_to_int = lambda ba: int.from_bytes(ba, 'big')
try: try:
int_to_bytes(2**77) int_to_bytes(2**77)
except AttributeError: except AttributeError:
int_to_bytes = __int_to_bytes int_to_bytes = __int_to_bytes
__b_tmp = int_to_bytes(2**77) __b_tmp = int_to_bytes(2**77)
try: try:
bytes_to_int(__b_tmp) bytes_to_int(__b_tmp)
except AttributeError: except AttributeError:
@ -187,7 +187,7 @@ def _dump_spec(ob):
elif ob == None: elif ob == None:
b = init_BYTES([_SPEC, char_to_byte('N')]) b = init_BYTES([_SPEC, char_to_byte('N')])
else: else:
raise RuntimeError("object is not of 'special' kind!") raise RuntimeError("object is not of 'special' kind!")
return b return b
def _load_spec(b): def _load_spec(b):
@ -197,27 +197,27 @@ def _load_spec(b):
elif b[1] == char_to_byte('F'): elif b[1] == char_to_byte('F'):
return False, 2 return False, 2
elif b[1] == char_to_byte('N'): elif b[1] == char_to_byte('N'):
return None, 2 return None, 2
else: else:
raise BFLoadError("internal error (unknown code for 'special' {})".format(b[1])) raise BFLoadError("internal error (unknown code for 'special' {})".format(b[1]))
def _dump_int_32(ob): def _dump_int_32(ob):
b = init_BYTES([_INT_32]) b = init_BYTES([_INT_32])
b += struct.pack('>i', ob) b += struct.pack('>i', ob)
return b return b
def _load_int_32(b): def _load_int_32(b):
assert comp_id(b[0], _INT_32) assert comp_id(b[0], _INT_32)
i = struct.unpack('>i', b[1:5])[0] i = struct.unpack('>i', b[1:5])[0]
return i, 5 return i, 5
def _dump_int(ob): def _dump_int(ob):
if ob < 0: if ob < 0:
b = init_BYTES([_INT_NEG]) b = init_BYTES([_INT_NEG])
ob *= -1 ob *= -1
else: else:
b = init_BYTES([_INT]) b = init_BYTES([_INT])
ib = int_to_bytes(ob) ib = int_to_bytes(ob)
num_bytes = len(ib) num_bytes = len(ib)
b += struct.pack('>I', num_bytes) b += struct.pack('>I', num_bytes)
@ -234,7 +234,7 @@ def _load_int(b):
num_bytes = struct.unpack('>I', b[1:5])[0] num_bytes = struct.unpack('>I', b[1:5])[0]
i = m*bytes_to_int(b[5:5+num_bytes]) i = m*bytes_to_int(b[5:5+num_bytes])
return i, num_bytes + 5 return i, num_bytes + 5
def _dump_float(ob): def _dump_float(ob):
b = init_BYTES([_FLOAT]) b = init_BYTES([_FLOAT])
b += struct.pack('>d', ob) b += struct.pack('>d', ob)
@ -256,9 +256,9 @@ def _load_complex(b):
re = struct.unpack('>d', b[1:9])[0] re = struct.unpack('>d', b[1:9])[0]
im = struct.unpack('>d', b[9:17])[0] im = struct.unpack('>d', b[9:17])[0]
return re + 1j*im, 13 return re + 1j*im, 13
def _dump_str(ob): def _dump_str(ob):
b = init_BYTES([_STR]) b = init_BYTES([_STR])
str_bytes = str_to_bytes(ob) str_bytes = str_to_bytes(ob)
num_bytes = len(str_bytes) num_bytes = len(str_bytes)
b += struct.pack('>I', num_bytes) b += struct.pack('>I', num_bytes)
@ -284,15 +284,15 @@ def _load_bytes(b):
b_ = b[5:5+num_bytes] b_ = b[5:5+num_bytes]
return b_, 5+num_bytes return b_, 5+num_bytes
def _dump_tuple(t): def _dump_tuple(t):
b = init_BYTES([_TUPLE]) b = init_BYTES([_TUPLE])
size = len(t) size = len(t)
b += struct.pack('>I', size) b += struct.pack('>I', size)
for ti in t: for ti in t:
b += _dump(ti) b += _dump(ti)
return b return b
def _load_tuple(b, classes): def _load_tuple(b, classes):
assert comp_id(b[0], _TUPLE) assert comp_id(b[0], _TUPLE)
size = struct.unpack('>I', b[1:5])[0] size = struct.unpack('>I', b[1:5])[0]
idx = 5 idx = 5
@ -305,17 +305,17 @@ def _load_tuple(b, classes):
def _dump_namedtuple(t): def _dump_namedtuple(t):
b = init_BYTES([_NAMEDTUPLE]) b = init_BYTES([_NAMEDTUPLE])
size = len(t) size = len(t)
b += struct.pack('>I', size) b += struct.pack('>I', size)
b += _dump(t.__class__.__name__) b += _dump(t.__class__.__name__)
for i in range(size): for i in range(size):
b += _dump(t._fields[i]) b += _dump(t._fields[i])
b += _dump(t[i]) b += _dump(t[i])
return b return b
def _load_namedtuple(b, classes): def _load_namedtuple(b, classes):
assert comp_id(b[0], _NAMEDTUPLE) assert comp_id(b[0], _NAMEDTUPLE)
size = struct.unpack('>I', b[1:5])[0] size = struct.unpack('>I', b[1:5])[0]
class_name, len_ob = _load_str(b[5:]) class_name, len_ob = _load_str(b[5:])
@ -326,26 +326,26 @@ def _load_namedtuple(b, classes):
ob, len_ob = _load(b[idx:], classes) ob, len_ob = _load(b[idx:], classes)
fields.append(ob) fields.append(ob)
idx += len_ob idx += len_ob
ob, len_ob = _load(b[idx:], classes) ob, len_ob = _load(b[idx:], classes)
t.append(ob) t.append(ob)
idx += len_ob idx += len_ob
np_class = namedtuple(class_name, fields) np_class = namedtuple(class_name, fields)
np_obj = np_class(*t) np_obj = np_class(*t)
return np_obj, idx return np_obj, idx
def _dump_list(t): def _dump_list(t):
b = init_BYTES([_LIST]) b = init_BYTES([_LIST])
size = len(t) size = len(t)
b += struct.pack('>I', size) b += struct.pack('>I', size)
for ti in t: for ti in t:
b += _dump(ti) b += _dump(ti)
return b return b
def _load_list(b, classes): def _load_list(b, classes):
assert comp_id(b[0], _LIST) assert comp_id(b[0], _LIST)
size = struct.unpack('>I', b[1:5])[0] size = struct.unpack('>I', b[1:5])[0]
idx = 5 idx = 5
t = [] t = []
@ -362,7 +362,7 @@ def _dump_np_array(np_array):
b += struct.pack('>I', size) b += struct.pack('>I', size)
b += nparray_bytes b += nparray_bytes
return b return b
def _load_np_array(b): def _load_np_array(b):
assert comp_id(b[0], _NPARRAY) assert comp_id(b[0], _NPARRAY)
size = struct.unpack('>I', b[1:5])[0] size = struct.unpack('>I', b[1:5])[0]
@ -384,12 +384,12 @@ def _load_bfkey(b, classes):
return (obj_type, bfkey), l_obj_type+l_state+1 return (obj_type, bfkey), l_obj_type+l_state+1
def _dump_getstate(ob): def _dump_getstate(ob):
b = init_BYTES([_GETSTATE]) b = init_BYTES([_GETSTATE])
state = ob.__getstate__() state = ob.__getstate__()
obj_type = ob.__class__.__name__ obj_type = ob.__class__.__name__
b += _dump(str(obj_type)) b += _dump(str(obj_type))
b += _dump(state) b += _dump(state)
return b return b
def _load_getstate(b, classes): def _load_getstate(b, classes):
@ -405,7 +405,7 @@ def _load_getstate(b, classes):
return obj, l_obj_type+l_state+1 return obj, l_obj_type+l_state+1
def _dump_dict(ob): def _dump_dict(ob):
b = init_BYTES([_DICT]) b = init_BYTES([_DICT])
keys = ob.keys() keys = ob.keys()
bin_keys = [] bin_keys = []
for k in keys: for k in keys:
@ -472,12 +472,12 @@ def _dump(ob):
elif isinstance(ob, str): elif isinstance(ob, str):
return _dump_str(ob) return _dump_str(ob)
elif isinstance(ob, bytes): elif isinstance(ob, bytes):
return _dump_bytes(ob) return _dump_bytes(ob)
elif isinstance(ob, tuple): elif isinstance(ob, tuple):
if hasattr(ob, '_fields'): if hasattr(ob, '_fields'):
return _dump_namedtuple(ob) return _dump_namedtuple(ob)
else: else:
return _dump_tuple(ob) return _dump_tuple(ob)
elif isinstance(ob, list): elif isinstance(ob, list):
return _dump_list(ob) return _dump_list(ob)
elif isinstance(ob, np.ndarray): elif isinstance(ob, np.ndarray):
@ -492,17 +492,17 @@ def _dump(ob):
return _dump_scipy_csc_matrix(ob) return _dump_scipy_csc_matrix(ob)
else: else:
raise TypeError("unsupported type for dump '{}' ({})".format(type(ob), ob)) raise TypeError("unsupported type for dump '{}' ({})".format(type(ob), ob))
def _load(b, classes): def _load(b, classes):
identifier = b[0] identifier = b[0]
if isinstance(identifier, str): if isinstance(identifier, str):
identifier = ord(identifier) identifier = ord(identifier)
if identifier == _SPEC: if identifier == _SPEC:
return _load_spec(b) return _load_spec(b)
elif identifier == _INT_32: elif identifier == _INT_32:
return _load_int_32(b) return _load_int_32(b)
elif (identifier == _INT) or (identifier == _INT_NEG): elif (identifier == _INT) or (identifier == _INT_NEG):
return _load_int(b) return _load_int(b)
elif identifier == _FLOAT: elif identifier == _FLOAT:
return _load_float(b) return _load_float(b)
elif identifier == _COMPLEX: elif identifier == _COMPLEX:
@ -510,13 +510,13 @@ def _load(b, classes):
elif identifier == _STR: elif identifier == _STR:
return _load_str(b) return _load_str(b)
elif identifier == _BYTES: elif identifier == _BYTES:
return _load_bytes(b) return _load_bytes(b)
elif identifier == _TUPLE: elif identifier == _TUPLE:
return _load_tuple(b, classes) return _load_tuple(b, classes)
elif identifier == _NAMEDTUPLE: elif identifier == _NAMEDTUPLE:
return _load_namedtuple(b, classes) return _load_namedtuple(b, classes)
elif identifier == _LIST: elif identifier == _LIST:
return _load_list(b, classes) return _load_list(b, classes)
elif identifier == _NPARRAY: elif identifier == _NPARRAY:
return _load_np_array(b) return _load_np_array(b)
elif identifier == _DICT: elif identifier == _DICT:
@ -529,7 +529,7 @@ def _load(b, classes):
return _load_scipy_csc_matrix(b) return _load_scipy_csc_matrix(b)
else: else:
raise BFLoadError("internal error (unknown identifier '{}')".format(hex(identifier))) raise BFLoadError("internal error (unknown identifier '{}')".format(hex(identifier)))
def dump(ob, vers=_VERS): def dump(ob, vers=_VERS):
""" """
returns the binary footprint of the object 'ob' as bytes returns the binary footprint of the object 'ob' as bytes
@ -544,7 +544,7 @@ def dump(ob, vers=_VERS):
res = _dump(ob) res = _dump(ob)
finally: finally:
_dump = __dump_tmp _dump = __dump_tmp
return res return res
def load(b, classes={}): def load(b, classes={}):
@ -564,12 +564,12 @@ def load(b, classes={}):
res = _load(b, classes)[0] res = _load(b, classes)[0]
finally: finally:
_load = __load_tmp _load = __load_tmp
return res return res
else: else:
raise BFLoadError("internal error (unknown version tag {})".format(vers)) raise BFLoadError("internal error (unknown version tag {})".format(vers))
################################################################## ##################################################################
#### ####
#### VERY FIRST VERSION -- NO VERSION TAG #### VERY FIRST VERSION -- NO VERSION TAG
@ -578,7 +578,7 @@ def load(b, classes={}):
# #
# so the first two bytes must correspond to an identifier which are assumed # so the first two bytes must correspond to an identifier which are assumed
# to be < 128 = 0x80 # to be < 128 = 0x80
def _load_namedtuple_00(b, classes): def _load_namedtuple_00(b, classes):
""" """
need to explicitly know the named tuple class for reconstruction need to explicitly know the named tuple class for reconstruction
@ -598,32 +598,32 @@ def _load_namedtuple_00(b, classes):
np_class = classes[class_name] np_class = classes[class_name]
except KeyError: except KeyError:
raise BFUnkownClassError(class_name) raise BFUnkownClassError(class_name)
obj = np_class(*t) obj = np_class(*t)
return obj, idx return obj, idx
def _dump_namedtuple_00(t): def _dump_namedtuple_00(t):
b = init_BYTES([_NAMEDTUPLE]) b = init_BYTES([_NAMEDTUPLE])
size = len(t) size = len(t)
b += struct.pack('>I', size) b += struct.pack('>I', size)
b += _dump(t.__class__.__name__) b += _dump(t.__class__.__name__)
for ti in t: for ti in t:
b += _dump(ti) b += _dump(ti)
return b return b
def _load_00(b, classes): def _load_00(b, classes):
identifier = b[0] identifier = b[0]
if isinstance(identifier, str): if isinstance(identifier, str):
identifier = ord(identifier) identifier = ord(identifier)
if identifier == _SPEC: if identifier == _SPEC:
return _load_spec(b) return _load_spec(b)
elif identifier == _INT_32: elif identifier == _INT_32:
return _load_int_32(b) return _load_int_32(b)
elif (identifier == _INT) or (identifier == _INT_NEG): elif (identifier == _INT) or (identifier == _INT_NEG):
return _load_int(b) return _load_int(b)
elif identifier == _FLOAT: elif identifier == _FLOAT:
return _load_float(b) return _load_float(b)
elif identifier == _COMPLEX: elif identifier == _COMPLEX:
@ -631,22 +631,22 @@ def _load_00(b, classes):
elif identifier == _STR: elif identifier == _STR:
return _load_str(b) return _load_str(b)
elif identifier == _BYTES: elif identifier == _BYTES:
return _load_bytes(b) return _load_bytes(b)
elif identifier == _TUPLE: elif identifier == _TUPLE:
return _load_tuple(b, classes) return _load_tuple(b, classes)
elif identifier == _NAMEDTUPLE: elif identifier == _NAMEDTUPLE:
return _load_namedtuple_00(b, classes) return _load_namedtuple_00(b, classes)
elif identifier == _LIST: elif identifier == _LIST:
return _load_list(b, classes) return _load_list(b, classes)
elif identifier == _NPARRAY: elif identifier == _NPARRAY:
return _load_np_array(b) return _load_np_array(b)
elif identifier == _DICT: elif identifier == _DICT:
return _load_dict(b, classes) return _load_dict(b, classes)
elif identifier == _GETSTATE: elif identifier == _GETSTATE:
return _load_getstate(b, classes) return _load_getstate(b, classes)
else: else:
raise BFLoadError("unknown identifier '{}'".format(hex(identifier))) raise BFLoadError("unknown identifier '{}'".format(hex(identifier)))
def _dump_00(ob): def _dump_00(ob):
if isinstance(ob, _spec_types): if isinstance(ob, _spec_types):
return _dump_spec(ob) return _dump_spec(ob)
@ -662,19 +662,19 @@ def _dump_00(ob):
elif isinstance(ob, str): elif isinstance(ob, str):
return _dump_str(ob) return _dump_str(ob)
elif isinstance(ob, bytes): elif isinstance(ob, bytes):
return _dump_bytes(ob) return _dump_bytes(ob)
elif isinstance(ob, tuple): elif isinstance(ob, tuple):
if hasattr(ob, '_fields'): if hasattr(ob, '_fields'):
return _dump_namedtuple_00(ob) return _dump_namedtuple_00(ob)
else: else:
return _dump_tuple(ob) return _dump_tuple(ob)
elif isinstance(ob, list): elif isinstance(ob, list):
return _dump_list(ob) return _dump_list(ob)
elif isinstance(ob, np.ndarray): elif isinstance(ob, np.ndarray):
return _dump_np_array(ob) return _dump_np_array(ob)
elif isinstance(ob, dict): elif isinstance(ob, dict):
return _dump_dict(ob) return _dump_dict(ob)
elif hasattr(ob, '__getstate__'): elif hasattr(ob, '__getstate__'):
return _dump_getstate(ob) return _dump_getstate(ob)
else: else:
raise RuntimeError("unsupported type for dump '{}'".format(type(ob))) raise RuntimeError("unsupported type for dump '{}'".format(type(ob)))