used bytes / str (3.4/2.7) and data storage, so the bin foot print is now hashable

2025-03-06 02:11:39 -05:00 · 2015-11-11 21:59:42 +01:00 · 2015-11-11 21:59:42 +01:00 · 009d7241c8
commit 009d7241c8
parent 6ea441c83c
2 changed files with 73 additions and 47 deletions
--- a/jobmanager/binfootprint.py
+++ b/jobmanager/binfootprint.py
@ -27,10 +27,10 @@ __min_int32 = -2147483648
 def __int_to_bytes(i):
    m = 0xff
    assert i >= 0
-    ba = bytearray()
+    ba = str()
    while i > 0:
        b = i & m
-        ba += bytearray([b])
+        ba += str(bytearray([b]))
        i = i >> 8
    return ba[::-1]

@ -38,68 +38,81 @@ def __bytes_to_int(ba):
    i = 0
    for b in ba:
        i = i << 8
-        i += b
+        i += ord(b)
    return i

-BYTES_CLASS = bytearray
+def char_eq_byte(ch, b):
+    return ord(ch) == b
+
+def byte_eq_byte(b1, b2):
+    return b1 == b2
+
+

 if sys.version_info.major > 2:
-    str_to_bytes = lambda s: BYTES_CLASS(s, 'utf8')
+    BIN_TYPE = bytes
+    str_to_bytes = lambda s: bytes(s, 'utf8')
    bytes_to_str = lambda b: str(b, 'utf8')
    LONG_TYPE    = int
    int_to_bytes = lambda i: i.to_bytes(ceil(i.bit_length() / 8), 'big')  
    bytes_to_int = lambda ba: int.from_bytes(ba, 'big')
    np_load      = lambda ba: np.loads(ba)
+    init_BYTES   = lambda b: bytes(b)
+    comp_id      = byte_eq_byte
+    char_to_byte = lambda ch: ord(ch)
 else:
-
+    BIN_TYPE = str
    str_to_bytes = lambda s: s
    bytes_to_str = lambda b: str(b)
    LONG_TYPE    = long
    int_to_bytes = __int_to_bytes
    bytes_to_int = __bytes_to_int
    np_load      = lambda ba: np.loads(str(ba))
+    init_BYTES   = lambda b: str(bytearray(b))
+    comp_id      = char_eq_byte
+    char_to_byte = lambda ch: ch

 class BFLoadError(Exception):
    pass

 def _dump_spec(ob):
    if ob == True:
-        b = BYTES_CLASS([_SPEC, ord('T')])
+        b = init_BYTES([_SPEC, char_to_byte('T')])
    elif ob == False:
-        b = BYTES_CLASS([_SPEC, ord('F')])
+        b = init_BYTES([_SPEC, char_to_byte('F')])
    elif ob == None:
-        b = BYTES_CLASS([_SPEC, ord('N')])
+        b = init_BYTES([_SPEC, char_to_byte('N')])
    else:
        raise RuntimeError("object is not of 'special' kind!")        
    return b

 def _load_spec(b):
-    assert b[0] == _SPEC
-    if b[1] == ord('T'):
+    assert comp_id(b[0], _SPEC)
+    if b[1] == char_to_byte('T'):
        return True, 2
-    elif b[1] == ord('F'):
+    elif b[1] == char_to_byte('F'):
        return False, 2
-    elif b[1] == ord('N'):
+    elif b[1] == char_to_byte('N'):
        return None, 2   
    else:
        raise BFLoadError("unknown code for 'special' {}".format(b[1]))
    
 def _dump_int_32(ob):
-    b = BYTES_CLASS([_INT_32])
+    b = init_BYTES([_INT_32])
    b += struct.pack('>i', ob)
    return b
    
 def _load_int_32(b):
-    assert b[0] == _INT_32
+    assert comp_id(b[0], _INT_32)
    i = struct.unpack('>i', b[1:5])[0]
    return i, 5
    
 def _dump_int(ob):
    if ob < 0:
-        b = BYTES_CLASS([_INT_NEG])
+        b = init_BYTES([_INT_NEG])
        ob *= -1
    else:
-        b = BYTES_CLASS([_INT])
+        b = init_BYTES([_INT])
        
    ib = int_to_bytes(ob)
    num_bytes = len(ib)
@ -108,9 +121,9 @@ def _dump_int(ob):
    return b

 def _load_int(b):
-    if b[0] == _INT:
+    if comp_id(b[0], _INT):
        m = 1
-    elif b[0] == _INT_NEG:
+    elif comp_id(b[0], _INT_NEG):
        m = -1
    else:
        assert False
@ -119,29 +132,29 @@ def _load_int(b):
    return i, num_bytes + 5
    
 def _dump_float(ob):
-    b = BYTES_CLASS([_FLOAT])
+    b = init_BYTES([_FLOAT])
    b += struct.pack('>d', ob)
    return b

 def _load_float(b):
-    assert b[0] == _FLOAT
+    assert comp_id(b[0],_FLOAT)
    f = struct.unpack('>d', b[1:9])[0]
    return f, 9

 def _dump_complex(ob):
-    b = BYTES_CLASS([_COMPLEX])
+    b = init_BYTES([_COMPLEX])
    b += struct.pack('>d', ob.real)
    b += struct.pack('>d', ob.imag)
    return b

 def _load_complex(b):
-    assert b[0] == _COMPLEX
+    assert comp_id(b[0], _COMPLEX)
    re = struct.unpack('>d', b[1:9])[0]
    im = struct.unpack('>d', b[9:17])[0]
    return re + 1j*im, 13
    
 def _dump_str(ob):
-    b = BYTES_CLASS([_STR])    
+    b = init_BYTES([_STR])    
    str_bytes = str_to_bytes(ob)
    num_bytes = len(str_bytes)
    b += struct.pack('>I', num_bytes)
@ -149,26 +162,29 @@ def _dump_str(ob):
    return b

 def _load_str(b):
-    assert b[0] == _STR
+    assert comp_id(b[0], _STR)
    num_bytes = struct.unpack('>I', b[1:5])[0]
    s = bytes_to_str(b[5:5+num_bytes])
    return s, 5+num_bytes

 def _dump_bytes(ob):
-    b = BYTES_CLASS([_BYTES])
+    b = init_BYTES([_BYTES])
    num_bytes = len(ob)
    b += struct.pack('>I', num_bytes)
-    b += ob
+    if isinstance(b, str):
+        b += str(ob)
+    else:
+        b += ob
    return b

 def _load_bytes(b):
-    assert b[0] == _BYTES
+    assert comp_id(b[0], _BYTES)
    num_bytes = struct.unpack('>I', b[1:5])[0]
    b_ = b[5:5+num_bytes]
    return b_, 5+num_bytes

 def _dump_tuple(t):   
-    b = BYTES_CLASS([_TUPLE])
+    b = init_BYTES([_TUPLE])
    size = len(t)
    b += struct.pack('>I', size)    
    for ti in t:
@ -176,7 +192,7 @@ def _dump_tuple(t):
    return b

 def _load_tuple(b):    
-    assert b[0] == _TUPLE
+    assert comp_id(b[0], _TUPLE)
    size = struct.unpack('>I', b[1:5])[0]
    idx = 5
    t = []
@ -187,7 +203,7 @@ def _load_tuple(b):
    return tuple(t), idx

 def _dump_namedtuple(t):   
-    b = BYTES_CLASS([_NAMEDTUPLE])
+    b = init_BYTES([_NAMEDTUPLE])
    size = len(t)
    b += struct.pack('>I', size)
    b += _dump(t.__class__.__name__)    
@ -196,7 +212,7 @@ def _dump_namedtuple(t):
    return b

 def _load_namedtuple(b):    
-    assert b[0] == _NAMEDTUPLE
+    assert comp_id(b[0], _NAMEDTUPLE)
    size = struct.unpack('>I', b[1:5])[0]
    class_name, len_ob = _load(b[5:])
    idx = 5 + len_ob
@ -208,7 +224,7 @@ def _load_namedtuple(b):
    return (class_name, tuple(t)), idx

 def _dump_list(t):
-    b = BYTES_CLASS([_LIST])
+    b = init_BYTES([_LIST])
    size = len(t)
    b += struct.pack('>I', size)    
    for ti in t:
@ -216,7 +232,7 @@ def _dump_list(t):
    return b

 def _load_list(b):    
-    assert b[0] == _LIST    
+    assert comp_id(b[0], _LIST)    
    size = struct.unpack('>I', b[1:5])[0]
    idx = 5
    t = []
@ -227,7 +243,7 @@ def _load_list(b):
    return t, idx

 def _dump_np_array(np_array):
-    b = BYTES_CLASS([_NPARRAY])
+    b = init_BYTES([_NPARRAY])
    nparray_bytes = np.ndarray.dumps(np_array)
    size  = len(nparray_bytes)
    b += struct.pack('>I', size)
@ -235,13 +251,13 @@ def _dump_np_array(np_array):
    return b
    
 def _load_np_array(b):
-    assert b[0] == _NPARRAY
+    assert comp_id(b[0], _NPARRAY)
    size = struct.unpack('>I', b[1:5])[0]
    npa = np_load(b[5: size+5])
    return npa, size+5

 def _dump_getstate(ob):
-    b = BYTES_CLASS([_GETSTATE]) 
+    b = init_BYTES([_GETSTATE]) 
    state = ob.__getstate__()
    obj_type = ob.__class__.__name__
    b += _dump(str(obj_type))
@ -250,13 +266,13 @@ def _dump_getstate(ob):
    return b

 def _load_getstate(b):
-    assert b[0] == _GETSTATE
+    assert comp_id(b[0], _GETSTATE)
    obj_type, l_obj_type = _load(b[1:])
    state, l_state = _load(b[l_obj_type+1:])
    return (obj_type, state), l_obj_type+l_state+1

 def _dump_dict(ob):
-    b = BYTES_CLASS([_DICT]) 
+    b = init_BYTES([_DICT]) 
    keys = ob.keys()
    bin_keys = []
    for k in keys:
@ -265,7 +281,7 @@ def _dump_dict(ob):
    return b

 def _load_dict(b):
-    assert b[0] == _DICT
+    assert comp_id(b[0], _DICT)
    sorted_keys_value, l = _load_list(b[1:])
    res_dict = {}
    for i in range(len(sorted_keys_value)):
@ -289,7 +305,7 @@ def _dump(ob):
        return _dump_complex(ob)
    elif isinstance(ob, str):
        return _dump_str(ob)
-    elif isinstance(ob, (bytearray, bytes)):
+    elif isinstance(ob, bytes):
        return _dump_bytes(ob)    
    elif isinstance(ob, tuple):
        if hasattr(ob, '_fields'):
@ -309,6 +325,8 @@ def _dump(ob):
    
 def _load(b):
    identifier = b[0]
+    if isinstance(identifier, str):
+        identifier = ord(identifier)    
    if identifier == _SPEC:
        return _load_spec(b)
    elif identifier == _INT_32:
@ -336,19 +354,17 @@ def _load(b):
    elif identifier == _GETSTATE:
        return _load_getstate(b)    
    else:
-        if isinstance(identifier, str):
-            identifier = ord(identifier)
        raise BFLoadError("unknown identifier '{}'".format(hex(identifier)))
    
 def dump(ob):
    """
-        returns the binary footprint of the object 'ob' as BYTES_CLASS
+        returns the binary footprint of the object 'ob' as bytes
    """
    return _dump(ob)

 def load(b):
    """
-        reconstruct the object from the binary footprint given an BYTES_CLASS 'ba'
+        reconstruct the object from the binary footprint given an bytes 'ba'
    """
    return _load(b)[0]
        
--- a/tests/test_binfootprint.py
+++ b/tests/test_binfootprint.py
@ -20,17 +20,21 @@ import warnings
 warnings.filterwarnings('error')

 def test_atom():
-    atoms = [12345678, 3.141, 'hallo Welt', 'öäüß', True, False, None, 2**65, -3**65, bytearray([54,234,2]), b'\xff\fe\03']
+    atoms = [12345678, 3.141, 'hallo Welt', 'öäüß', True, False, None, 2**65, -3**65, b'\xff\fe\03']
    
    for atom in atoms:
        bin_atom = bfp.dump(atom)
+        assert type(bin_atom) is bfp.BIN_TYPE
        atom_prime = bfp.load(bin_atom)
        bin_ob_prime = bfp.dump(atom_prime)
        assert bin_atom == bin_ob_prime
        
+        hash(bin_atom)
+        
 def test_tuple():
    t = (12345678, 3.141, 'hallo Welt', 'öäüß', True, False, None, (3, tuple(), (4,5,None), 'test'))
    bin_tuple = bfp._dump_tuple(t)
+    assert type(bin_tuple) is bfp.BIN_TYPE
    t_prime = bfp._load_tuple(bin_tuple)[0]
    assert t == t_prime
    bin_ob_prime = bfp._dump(t_prime)
@ -39,6 +43,7 @@ def test_tuple():
 def test_nparray():
    ob = np.random.randn(3,53,2)
    bin_ob = bfp._dump(ob)
+    assert type(bin_ob) is bfp.BIN_TYPE
    ob_prime = bfp._load(bin_ob)[0]
    assert np.all(ob == ob_prime)
    bin_ob_prime = bfp._dump(ob_prime)
@ -56,6 +61,7 @@ def test_nparray():
 def test_list():
    ob = [1,2,3]
    bin_ob = bfp._dump(ob)
+    assert type(bin_ob) is bfp.BIN_TYPE
    ob_prime = bfp._load(bin_ob)[0]
    assert np.all(ob == ob_prime)
    bin_ob_prime = bfp._dump(ob_prime)
@ -82,6 +88,7 @@ def test_getstate():
    
    ob = T(4)
    bin_ob = bfp._dump(ob)
+    assert type(bin_ob) is bfp.BIN_TYPE
    ob_prime_state = bfp._load(bin_ob)[0]
    ob_prime = T.__new__(T)
    ob_prime.__setstate__(ob_prime_state[1])
@ -96,6 +103,7 @@ def test_named_tuple():
    obj = obj_type(12345678, 3.141, 'hallo Welt')
    
    bin_obj = bfp._dump(obj)
+    assert type(bin_obj) is bfp.BIN_TYPE
    obj_prime = bfp._load(bin_obj)[0]
    obj_prime_name, obj_prime_data = obj_prime
        
@ -110,12 +118,14 @@ def test_named_tuple():
 def test_complex():
    z = 3+4j
    bf = bfp.dump(z)
+    assert type(bf) is bfp.BIN_TYPE
    zr = bfp.load(bf)
    assert zr == z    
    
 def test_dict():
    a = {'a':1, 5:5, 3+4j:'l', False: b'ab4+#'}
    bf = bfp.dump(a)
+    assert type(bf) is bfp.BIN_TYPE
    a_restored = bfp.load(bf)
    
    for k in a: