mirror of
https://github.com/vale981/jobmanager
synced 2025-03-05 09:51:38 -05:00
save work in progress
This commit is contained in:
parent
521a29320e
commit
4dfa906017
5 changed files with 190 additions and 320 deletions
|
@ -49,6 +49,7 @@ import warnings
|
|||
import binfootprint as bf
|
||||
import progress
|
||||
import logging
|
||||
import psutil
|
||||
|
||||
# try:
|
||||
# from logging.handlers import QueueHandler
|
||||
|
@ -299,7 +300,13 @@ class JobManager_Client(object):
|
|||
|
||||
def connect(self):
|
||||
if self.manager_objects is None:
|
||||
self.manager_objects = self.create_manager_objects()
|
||||
try:
|
||||
self.manager_objects = self.create_manager_objects()
|
||||
except Exception as e:
|
||||
log.critical("creating manager objects failed due to {}".format(type(e)))
|
||||
log.info(traceback.format_exc())
|
||||
raise
|
||||
|
||||
else:
|
||||
log.info("already connected (at least shared object are available)")
|
||||
|
||||
|
@ -465,7 +472,7 @@ class JobManager_Client(object):
|
|||
# job_q.get failed -> server down?
|
||||
except Exception as e:
|
||||
log.error("Error when calling 'job_q_get'")
|
||||
handle_unexpected_queue_error(e, log)
|
||||
handle_unexpected_queue_error(e)
|
||||
break
|
||||
|
||||
# try to process the retrieved argument
|
||||
|
@ -504,7 +511,7 @@ class JobManager_Client(object):
|
|||
# fail_q.put failed -> server down?
|
||||
except Exception as e:
|
||||
log.error('sending arg to fail_q failed')
|
||||
handle_unexpected_queue_error(e, log)
|
||||
handle_unexpected_queue_error(e)
|
||||
break
|
||||
else:
|
||||
log.debug('sending arg to fail_q was successful')
|
||||
|
@ -525,8 +532,8 @@ class JobManager_Client(object):
|
|||
|
||||
except Exception as e:
|
||||
log.error('sending result to result_q failed due to %s', type(e))
|
||||
emergency_dump(arg, res, emergency_dump_path, log)
|
||||
handle_unexpected_queue_error(e, log)
|
||||
emergency_dump(arg, res, emergency_dump_path)
|
||||
handle_unexpected_queue_error(e)
|
||||
break
|
||||
|
||||
del res
|
||||
|
@ -550,7 +557,7 @@ class JobManager_Client(object):
|
|||
# fail_q.put failed -> server down?
|
||||
except Exception as e:
|
||||
log.error("put arg back to job_q failed due to %s", type(e))
|
||||
JobManager_Client._handle_unexpected_queue_error(e, log)
|
||||
handle_unexpected_queue_error(e)
|
||||
else:
|
||||
log.debug("putting arg back to job_q was successful")
|
||||
|
||||
|
@ -568,6 +575,10 @@ class JobManager_Client(object):
|
|||
log.info(stat)
|
||||
|
||||
log.debug("JobManager_Client.__worker_func at end (PID %s)", os.getpid())
|
||||
# log.debug("trigger sys.exit(0)")
|
||||
# raise SystemExit
|
||||
# log.debug("sys.exit(0) was triggered")
|
||||
|
||||
|
||||
def start(self):
|
||||
"""
|
||||
|
@ -670,9 +681,15 @@ class JobManager_Client(object):
|
|||
|
||||
for p in self.procs:
|
||||
log.debug("join %s PID %s", p, p.pid)
|
||||
while p.is_alive():
|
||||
log.debug("still alive %s PID %s", p, p.pid)
|
||||
p.join(timeout=self.interval)
|
||||
p.join()
|
||||
# while p.is_alive():
|
||||
# log.debug("still alive %s PID %s", p, p.pid)
|
||||
# p.join(timeout=self.interval)
|
||||
# _proc = psutil.Process(p.pid)
|
||||
# log.debug(str(p.exitcode))
|
||||
# log.debug(str(_proc.connections()))
|
||||
# log.debug(str(_proc.children(recursive=True)))
|
||||
# log.debug(str(_proc.status()))
|
||||
|
||||
log.debug("process %s PID %s was joined", p, p.pid)
|
||||
|
||||
|
@ -926,6 +943,9 @@ class JobManager_Server(object):
|
|||
"""
|
||||
# will only be False when _shutdown was started in subprocess
|
||||
|
||||
self.__stop_SyncManager()
|
||||
log.debug("SyncManager stopped!")
|
||||
|
||||
# do user defined final processing
|
||||
self.process_final_result()
|
||||
log.debug("process_final_result done!")
|
||||
|
@ -952,8 +972,7 @@ class JobManager_Server(object):
|
|||
|
||||
self.show_statistics()
|
||||
|
||||
self.__stop_SyncManager()
|
||||
log.debug("SyncManager stopped!")
|
||||
|
||||
log.info("JobManager_Server was successfully shut down")
|
||||
|
||||
def show_statistics(self):
|
||||
|
@ -1333,28 +1352,28 @@ def call_connect_python3(connect, dest, reconnect_wait=2, reconnect_tries=3):
|
|||
log.error(traceback.format_stack()[-3].strip())
|
||||
|
||||
if type(e) is ConnectionResetError: # ... when the destination hangs up on us
|
||||
c = handler_connection_reset(dest, c, reconnect_wait, reconnect_tries, log)
|
||||
c = handler_connection_reset(dest, c, reconnect_wait, reconnect_tries)
|
||||
elif type(e) is ConnectionRefusedError: # ... when the destination refuses our connection
|
||||
handler_connection_refused(e, dest, log)
|
||||
handler_connection_refused(e, dest)
|
||||
elif type(e) is AuthenticationError : # ... when the destination refuses our connection due authkey missmatch
|
||||
handler_authentication_error(e, dest, log)
|
||||
handler_authentication_error(e, dest)
|
||||
elif type(e) is RemoteError: # ... when the destination send us an error message
|
||||
if 'KeyError' in e.args[0]:
|
||||
handler_remote_key_error(e, dest, log)
|
||||
handler_remote_key_error(e, dest)
|
||||
elif 'ValueError: unsupported pickle protocol:' in e.args[0]:
|
||||
handler_remote_value_error(e, dest, log)
|
||||
handler_remote_value_error(e, dest)
|
||||
else:
|
||||
handler_remote_error(e, dest, log)
|
||||
handler_remote_error(e, dest)
|
||||
elif type(e) is ValueError:
|
||||
handler_value_error(e, log)
|
||||
handler_value_error(e)
|
||||
else: # any other exception
|
||||
handler_unexpected_error(e, log)
|
||||
handler_unexpected_error(e)
|
||||
|
||||
else: # no exception
|
||||
log.debug("connection to %s successfully established".format(dest))
|
||||
return True
|
||||
|
||||
def call_connect_python2(connect, dest, verbose=1, reconnect_wait=2, reconnect_tries=3):
|
||||
def call_connect_python2(connect, dest, reconnect_wait=2, reconnect_tries=3):
|
||||
c = 0
|
||||
while True:
|
||||
try: # here we try re establish the connection
|
||||
|
@ -1369,24 +1388,24 @@ def call_connect_python2(connect, dest, verbose=1, reconnect_wait=2, reconnect_t
|
|||
log.error("caught %s with args %s", type(e), e.args)
|
||||
err_code = e.args[0]
|
||||
if err_code == errno.ECONNRESET: # ... when the destination hangs up on us
|
||||
c = handler_connection_reset(dest, c, reconnect_wait, reconnect_tries, verbose)
|
||||
c = handler_connection_reset(dest, c, reconnect_wait, reconnect_tries)
|
||||
elif err_code == errno.ECONNREFUSED: # ... when the destination refuses our connection
|
||||
handler_connection_refused(e, dest, verbose)
|
||||
handler_connection_refused(e, dest)
|
||||
else:
|
||||
handler_unexpected_error(e, verbose)
|
||||
handler_unexpected_error(e)
|
||||
elif type(e) is AuthenticationError : # ... when the destination refuses our connection due authkey missmatch
|
||||
handler_authentication_error(e, dest, verbose)
|
||||
handler_authentication_error(e, dest)
|
||||
elif type(e) is RemoteError: # ... when the destination send us an error message
|
||||
if 'KeyError' in e.args[0]:
|
||||
handler_remote_key_error(e, verbose, dest)
|
||||
handler_remote_key_error(e, dest)
|
||||
elif 'ValueError: unsupported pickle protocol:' in e.args[0]:
|
||||
handler_remote_value_error(e, verbose, dest)
|
||||
handler_remote_value_error(e, dest)
|
||||
else:
|
||||
handler_remote_error(e, verbose, dest)
|
||||
handler_remote_error(e, dest)
|
||||
elif type(e) is ValueError:
|
||||
handler_value_error(e, verbose)
|
||||
handler_value_error(e)
|
||||
else: # any other exception
|
||||
handler_unexpected_error(e, verbose)
|
||||
handler_unexpected_error(e)
|
||||
|
||||
else: # no exception
|
||||
log.debug("connection to %s successfully established", dest)
|
||||
|
@ -1435,24 +1454,24 @@ def getDateForFileName(includePID = False):
|
|||
name += "_{}".format(os.getpid())
|
||||
return name
|
||||
|
||||
def handler_authentication_error(e, dest, log):
|
||||
def handler_authentication_error(e, dest):
|
||||
log.error("authentication error")
|
||||
log.info("Authkey specified does not match the authkey at destination side!")
|
||||
raise e
|
||||
|
||||
def handler_broken_pipe_error(e, log):
|
||||
def handler_broken_pipe_error(e):
|
||||
log.error("broken pip error")
|
||||
log.info("This usually means that an established connection was closed\n")
|
||||
log.info("does not exists anymore, probably the server went down")
|
||||
raise e
|
||||
|
||||
def handler_connection_refused(e, dest, log):
|
||||
def handler_connection_refused(e, dest):
|
||||
log.error("connection refused error")
|
||||
log.info("This usually means that no matching Manager object was instanciated at destination side!")
|
||||
log.info("Either there is no Manager running at all, or it is listening to another port.")
|
||||
raise JMConnectionRefusedError(e)
|
||||
|
||||
def handler_connection_reset(dest, c, reconnect_wait, reconnect_tries, log):
|
||||
def handler_connection_reset(dest, c, reconnect_wait, reconnect_tries):
|
||||
log.error("connection reset error")
|
||||
log.info("During 'connect' this error might be due to firewall settings"+
|
||||
"or other TPC connections controlling mechanisms!")
|
||||
|
@ -1465,33 +1484,33 @@ def handler_connection_reset(dest, c, reconnect_wait, reconnect_tries, log):
|
|||
time.sleep(reconnect_wait)
|
||||
return c
|
||||
|
||||
def handler_eof_error(e, log):
|
||||
def handler_eof_error(e):
|
||||
log.error("EOF error")
|
||||
log.info("This usually means that server did not replay, although the connection is still there.\n"+
|
||||
"This is due to the fact that the connection is in 'timewait' status for about 60s\n"+
|
||||
"after the server went down inappropriately.")
|
||||
raise e
|
||||
|
||||
def handler_remote_error(e, dest, log):
|
||||
def handler_remote_error(e, dest):
|
||||
log.error("remote error")
|
||||
log.info("The server %s send an RemoteError message!\n%s", dest, e.args[0])
|
||||
raise RemoteError(e.args[0])
|
||||
|
||||
def handler_remote_key_error(e, dest, log):
|
||||
def handler_remote_key_error(e, dest):
|
||||
log.error("remote key error")
|
||||
log.info("'KeyError' detected in RemoteError message from server %s!\n"+
|
||||
"This hints to the fact that the actual instace of the shared object on the server side has changed,\n"+
|
||||
"for example due to a server restart you need to reinstanciate the proxy object.", dest)
|
||||
raise RemoteKeyError(e.args[0])
|
||||
|
||||
def handler_remote_value_error(e, dest, log):
|
||||
def handler_remote_value_error(e, dest):
|
||||
log.error("remote value error")
|
||||
log.info("'ValueError' due to 'unsupported pickle protocol' detected in RemoteError from server %s!\n"+
|
||||
"You might have tried to connect to a SERVER running with an OLDER python version.\n"+
|
||||
"At this stage (and probably for ever) this should be avoided!", dest)
|
||||
raise RemoteValueError(e.args[0])
|
||||
|
||||
def handler_value_error(e, log):
|
||||
def handler_value_error(e):
|
||||
log.error("value error")
|
||||
if 'unsupported pickle protocol' in e.args[0]:
|
||||
log.info("'ValueError' due to 'unsupported pickle protocol'!\n"
|
||||
|
@ -1499,16 +1518,16 @@ def handler_value_error(e, log):
|
|||
"At this stage (and probably for ever) this should be avoided.\n")
|
||||
raise e
|
||||
|
||||
def handler_unexpected_error(e, log):
|
||||
def handler_unexpected_error(e):
|
||||
log.error("unexpected error of type %s and args %s", type(e), e.args)
|
||||
raise e
|
||||
|
||||
def handle_unexpected_queue_error(e, log):
|
||||
def handle_unexpected_queue_error(e):
|
||||
log.error("unexpected error of type %s and args %s\n"+
|
||||
"I guess the server went down, can't do anything, terminate now!", type(e), e.args)
|
||||
log.debug(traceback.print_exc())
|
||||
|
||||
def emergency_dump(arg, res, path, log):
|
||||
def emergency_dump(arg, res, path):
|
||||
now = datetime.now().isoformat()
|
||||
pid = os.getpid()
|
||||
fname = "{}_pid_{}".format(now, pid)
|
||||
|
@ -1566,7 +1585,6 @@ def proxy_operation_decorator_python3(proxy, operation, reconnect_wait=2, reconn
|
|||
|
||||
try:
|
||||
res = o(*args, **kwargs)
|
||||
|
||||
except queue.Empty as e:
|
||||
log.info("operation '%s' -> %s FAILED due to '%s'", operation, dest, type(e))
|
||||
raise e
|
||||
|
@ -1586,11 +1604,11 @@ def proxy_operation_decorator_python3(proxy, operation, reconnect_wait=2, reconn
|
|||
time.sleep(reconnect_wait)
|
||||
continue
|
||||
elif type(e) is BrokenPipeError:
|
||||
handler_broken_pipe_error(e, log)
|
||||
handler_broken_pipe_error(e)
|
||||
elif type(e) is EOFError:
|
||||
handler_eof_error(e, log)
|
||||
handler_eof_error(e)
|
||||
else:
|
||||
handler_unexpected_error(e, log)
|
||||
handler_unexpected_error(e)
|
||||
else: # SUCCESS -> return True
|
||||
log.debug("operation '%s' successfully executed", operation)
|
||||
return res
|
||||
|
@ -1627,11 +1645,14 @@ def proxy_operation_decorator_python2(proxy, operation, reconnect_wait=2, reconn
|
|||
raise e
|
||||
time.sleep(reconnect_wait)
|
||||
continue
|
||||
|
||||
|
||||
log.debug("execute operation '%s' -> %s", operation, dest)
|
||||
|
||||
try:
|
||||
res = o(*args, **kwargs)
|
||||
except queue.Empty as e:
|
||||
log.info("operation '%s' -> %s FAILED due to '%s'", operation, dest, type(e))
|
||||
raise e
|
||||
except Exception as e:
|
||||
log.warning("operation '%s' -> %s FAILED due to '%s'", operation, dest, type(e))
|
||||
log.debug("show traceback.format_stack()[-3]\n%s", traceback.format_stack()[-3].strip())
|
||||
|
@ -1652,11 +1673,11 @@ def proxy_operation_decorator_python2(proxy, operation, reconnect_wait=2, reconn
|
|||
time.sleep(reconnect_wait)
|
||||
continue
|
||||
else:
|
||||
handler_unexpected_error(e, log)
|
||||
handler_unexpected_error(e)
|
||||
elif type(e) is EOFError:
|
||||
handler_eof_error(e, log)
|
||||
handler_eof_error(e)
|
||||
else:
|
||||
handler_unexpected_error(e, log)
|
||||
handler_unexpected_error(e)
|
||||
else: # SUCCESS -> return True
|
||||
log.debug("operation '%s' successfully executed", operation)
|
||||
return res
|
||||
|
|
|
@ -1,67 +1,9 @@
|
|||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
import binfootprint
|
||||
|
||||
from .jobmanager import JobManager_Server
|
||||
import pickle
|
||||
|
||||
def recursive_scan_for_instance(obj, type, explicit_exclude = None ):
|
||||
"""
|
||||
try to do some recursive check to see whether 'obj' is of type
|
||||
'type' or contains items of 'type' type.
|
||||
|
||||
if obj is a mapping (like dict) this will only check
|
||||
for item iterated over via
|
||||
|
||||
for item in obj
|
||||
|
||||
which corresponds to the keys in the dict case.
|
||||
|
||||
The explicit_exclude argument may be a tuple of types for
|
||||
some explicit checking in the sense that if obj is an
|
||||
instance of one of the type given by explicit_exclude
|
||||
we know it is NOT an instance of type.
|
||||
"""
|
||||
|
||||
# check this object for type
|
||||
if isinstance(obj, type):
|
||||
return True
|
||||
|
||||
# check for some explicit types in order to conclude
|
||||
# that obj is not of type
|
||||
# see dict example
|
||||
if explicit_exclude is not None:
|
||||
if isinstance(obj, explicit_exclude):
|
||||
return False
|
||||
|
||||
|
||||
# if not of desired type, try to iterate and check each item for type
|
||||
try:
|
||||
for i in obj:
|
||||
# return True, if object is of type or contains type
|
||||
if recursive_scan_for_instance(i, type) == True:
|
||||
return True
|
||||
except:
|
||||
pass
|
||||
|
||||
# either object is not iterable and not of type, or each item is not of type -> return False
|
||||
return False
|
||||
|
||||
def recursive_scan_for_dict_instance(obj):
|
||||
# here we explicitly check against the 'str' class
|
||||
# as it is iterable, but can not contain an dict as item, only characters
|
||||
return recursive_scan_for_instance(obj, type=dict, explicit_exclude=(str, ))
|
||||
|
||||
def data_as_binary_key(data):
|
||||
# since the hash value of a string is randomly seeded each time python
|
||||
# is started -> the order of the entries in a dict is not guaranteed
|
||||
# and therefore its binary representation may vary
|
||||
# this forbids to use dicts as a key for persistent data structures
|
||||
if recursive_scan_for_dict_instance(data):
|
||||
raise RuntimeError("data used as 'key' must not include dictionaries!")
|
||||
|
||||
# protocol 2 ensures backwards compatibility
|
||||
# (at least here) down to Python 2.3
|
||||
return pickle.dumps(data, protocol=2)
|
||||
|
||||
class PersistentData_Server(JobManager_Server):
|
||||
def __init__(self,
|
||||
|
@ -91,13 +33,13 @@ class PersistentData_Server(JobManager_Server):
|
|||
return True, if a new data set was added (key not already in pds)
|
||||
otherwise false
|
||||
"""
|
||||
key = data_as_binary_key(arg.id)
|
||||
key = binfootprint.dump(arg.id)
|
||||
has_key = key in self.pds
|
||||
self.pds[key] = (arg, result)
|
||||
return not has_key
|
||||
|
||||
def put_arg(self, a):
|
||||
a_bin = data_as_binary_key(a.id)
|
||||
a_bin = binfootprint.dump(a.id)
|
||||
if self.overwrite or (not a_bin in self.pds):
|
||||
JobManager_Server.put_arg(self, a)
|
||||
return True
|
||||
|
|
|
@ -8,6 +8,7 @@ OUTFILE='pytest_out'
|
|||
PYTHON="python"
|
||||
PYTHON2_7="python2.7"
|
||||
PYTHON3_4="python3.4"
|
||||
PYTHON3_5="python3.5"
|
||||
|
||||
PYLIST=( $PYTHON )
|
||||
|
||||
|
@ -17,7 +18,7 @@ while getopts ":p:ahn" opt; do
|
|||
case $opt in
|
||||
a)
|
||||
echo "run all!" >&2
|
||||
PYLIST=( $PYTHON2_7 $PYTHON3_4 )
|
||||
PYLIST=( $PYTHON2_7 $PYTHON3_4 $PYTHON3_5 )
|
||||
;;
|
||||
p)
|
||||
if [ "$OPTARG" = "2.7" ]; then
|
||||
|
|
|
@ -23,7 +23,7 @@ progress.log.setLevel(logging.ERROR)
|
|||
|
||||
from jobmanager.jobmanager import log as jm_log
|
||||
|
||||
jm_log.setLevel(logging.INFO)
|
||||
jm_log.setLevel(logging.WARNING)
|
||||
|
||||
import warnings
|
||||
warnings.filterwarnings('error')
|
||||
|
@ -186,20 +186,24 @@ def test_jobmanager_basic():
|
|||
p_client = None
|
||||
|
||||
try:
|
||||
|
||||
# start a server
|
||||
p_server = mp.Process(target=start_server, args=(n,False))
|
||||
p_server.start()
|
||||
time.sleep(0.5)
|
||||
# server needs to be running
|
||||
assert p_server.is_alive()
|
||||
|
||||
|
||||
# start client
|
||||
p_client = mp.Process(target=start_client)
|
||||
p_client.start()
|
||||
|
||||
p_client.join(3)
|
||||
# client should have processed all
|
||||
assert not p_client.is_alive(), "the client did not terminate on time!"
|
||||
# client must not throw an exception
|
||||
assert p_client.exitcode == 0, "the client raised an exception"
|
||||
p_server.join(3)
|
||||
# server should have come down
|
||||
assert not p_server.is_alive(), "the server did not terminate on time!"
|
||||
|
||||
print("[+] client and server terminated")
|
||||
|
@ -207,84 +211,67 @@ def test_jobmanager_basic():
|
|||
fname = 'jobmanager.dump'
|
||||
with open(fname, 'rb') as f:
|
||||
data = jobmanager.JobManager_Server.static_load(f)
|
||||
|
||||
|
||||
|
||||
final_res_args_set = {a[0] for a in data['final_result']}
|
||||
|
||||
set_ref = set(range(1,n))
|
||||
|
||||
intersect = set_ref - final_res_args_set
|
||||
|
||||
assert len(intersect) == 0, "final result does not contain all arguments!"
|
||||
print("[+] all arguments found in final_results")
|
||||
except:
|
||||
if p_server is not None:
|
||||
os.kill(p_server.pid, signal.SIGINT)
|
||||
p_server.terminate()
|
||||
if p_client is not None:
|
||||
os.kill(p_client.pid, signal.SIGINT)
|
||||
p_client.terminate()
|
||||
raise
|
||||
|
||||
|
||||
|
||||
def test_jobmanager_server_signals():
|
||||
"""
|
||||
start a server (no client), shutdown, check dump
|
||||
"""
|
||||
global PORT
|
||||
PORT += 1
|
||||
print("## TEST SIGTERM ##")
|
||||
p_server = mp.Process(target=start_server, args=(30,))
|
||||
p_server.start()
|
||||
time.sleep(1)
|
||||
print(" send SIGTERM")
|
||||
os.kill(p_server.pid, signal.SIGTERM)
|
||||
assert p_server.is_alive()
|
||||
print("[+] still alive (assume shut down takes some time)")
|
||||
p_server.join(15)
|
||||
assert not p_server.is_alive(), "timeout for server shutdown reached"
|
||||
print("[+] now terminated (timeout of 15s not reached)")
|
||||
timeout = 5
|
||||
n = 15
|
||||
sigs = [('SIGTERM', signal.SIGTERM), ('SIGINT', signal.SIGINT)]
|
||||
|
||||
fname = 'jobmanager.dump'
|
||||
with open(fname, 'rb') as f:
|
||||
data = jobmanager.JobManager_Server.static_load(f)
|
||||
|
||||
args_set = set(data['args_dict'].keys())
|
||||
args_ref = range(1,30)
|
||||
ref_set = set()
|
||||
for a in args_ref:
|
||||
ref_set.add(binfootprint.dump(a))
|
||||
|
||||
assert len(args_set) == len(ref_set)
|
||||
assert len(ref_set - args_set) == 0
|
||||
print("[+] args_set from dump contains all arguments")
|
||||
|
||||
PORT += 1
|
||||
print("## TEST SIGINT ##")
|
||||
p_server = mp.Process(target=start_server, args=(30,))
|
||||
p_server.start()
|
||||
time.sleep(1)
|
||||
print(" send SIGINT")
|
||||
os.kill(p_server.pid, signal.SIGINT)
|
||||
assert p_server.is_alive()
|
||||
print("[+] still alive (assume shut down takes some time)")
|
||||
p_server.join(15)
|
||||
assert not p_server.is_alive(), "timeout for server shutdown reached"
|
||||
print("[+] now terminated (timeout of 15s not reached)")
|
||||
|
||||
fname = 'jobmanager.dump'
|
||||
with open(fname, 'rb') as f:
|
||||
data = jobmanager.JobManager_Server.static_load(f)
|
||||
|
||||
args_set = set(data['args_dict'].keys())
|
||||
args_ref = range(1,30)
|
||||
ref_set = set()
|
||||
for a in args_ref:
|
||||
ref_set.add(binfootprint.dump(a))
|
||||
|
||||
assert len(args_set) == len(ref_set)
|
||||
assert len(ref_set - args_set) == 0
|
||||
print("[+] args_set from dump contains all arguments")
|
||||
|
||||
for signame, sig in sigs:
|
||||
PORT += 1
|
||||
p_server = None
|
||||
try:
|
||||
print("## TEST {} ##".format(signame))
|
||||
p_server = mp.Process(target=start_server, args=(n,))
|
||||
p_server.start()
|
||||
time.sleep(0.5)
|
||||
assert p_server.is_alive()
|
||||
print(" send {}".format(signame))
|
||||
os.kill(p_server.pid, sig)
|
||||
print("[+] still alive (assume shut down takes some time)")
|
||||
p_server.join(timeout)
|
||||
assert not p_server.is_alive(), "timeout for server shutdown reached"
|
||||
print("[+] now terminated (timeout of {}s not reached)".format(timeout))
|
||||
|
||||
fname = 'jobmanager.dump'
|
||||
with open(fname, 'rb') as f:
|
||||
data = jobmanager.JobManager_Server.static_load(f)
|
||||
|
||||
args_set = set(data['args_dict'].keys())
|
||||
args_ref = range(1,n)
|
||||
ref_set = set()
|
||||
for a in args_ref:
|
||||
ref_set.add(binfootprint.dump(a))
|
||||
|
||||
assert len(args_set) == len(ref_set)
|
||||
assert len(ref_set - args_set) == 0
|
||||
print("[+] args_set from dump contains all arguments")
|
||||
except:
|
||||
if p_server is not None:
|
||||
p_server.terminate()
|
||||
raise
|
||||
|
||||
def test_shutdown_server_while_client_running():
|
||||
"""
|
||||
start server with 1000 elements in queue
|
||||
start server with 100 elements in queue
|
||||
|
||||
start client
|
||||
|
||||
|
@ -296,61 +283,66 @@ def test_shutdown_server_while_client_running():
|
|||
all arguments given
|
||||
"""
|
||||
global PORT
|
||||
PORT += 1
|
||||
|
||||
n = 100
|
||||
timeout = 10
|
||||
|
||||
n = 1000
|
||||
sigs = [('SIGTERM', signal.SIGTERM), ('SIGINT', signal.SIGINT)]
|
||||
sigs = [('SIGTERM', signal.SIGTERM)]
|
||||
|
||||
p_server = mp.Process(target=start_server, args=(n,))
|
||||
p_server.start()
|
||||
for signame, sig in sigs:
|
||||
PORT += 1
|
||||
|
||||
time.sleep(1)
|
||||
|
||||
p_client = mp.Process(target=start_client)
|
||||
p_client.start()
|
||||
|
||||
time.sleep(2)
|
||||
|
||||
os.kill(p_server.pid, signal.SIGTERM)
|
||||
|
||||
p_server.join(200)
|
||||
p_client.join(200)
|
||||
|
||||
try:
|
||||
assert not p_server.is_alive()
|
||||
except:
|
||||
p_server.terminate()
|
||||
raise
|
||||
|
||||
try:
|
||||
assert not p_client.is_alive()
|
||||
except:
|
||||
p_client.terminate()
|
||||
raise
|
||||
p_server = None
|
||||
p_client = None
|
||||
|
||||
|
||||
|
||||
fname = 'jobmanager.dump'
|
||||
with open(fname, 'rb') as f:
|
||||
data = jobmanager.JobManager_Server.static_load(f)
|
||||
|
||||
args_set = set(data['args_dict'].keys())
|
||||
final_result = data['final_result']
|
||||
|
||||
final_res_args = {binfootprint.dump(a[0]) for a in final_result}
|
||||
try:
|
||||
p_server = mp.Process(target=start_server, args=(n,))
|
||||
p_server.start()
|
||||
time.sleep(0.5)
|
||||
assert p_server.is_alive()
|
||||
|
||||
p_client = mp.Process(target=start_client)
|
||||
p_client.start()
|
||||
time.sleep(4)
|
||||
assert p_client.is_alive()
|
||||
|
||||
print(" send {} to server".format(signame))
|
||||
os.kill(p_server.pid, sig)
|
||||
|
||||
p_server.join(timeout)
|
||||
assert not p_server.is_alive(), "server did not shut down on time"
|
||||
p_client.join(timeout)
|
||||
assert not p_client.is_alive(), "client did not shut down on time"
|
||||
|
||||
fname = 'jobmanager.dump'
|
||||
with open(fname, 'rb') as f:
|
||||
data = jobmanager.JobManager_Server.static_load(f)
|
||||
|
||||
args_ref = range(1,n)
|
||||
set_ref = set()
|
||||
for a in args_ref:
|
||||
set_ref.add(binfootprint.dump(a))
|
||||
|
||||
set_recover = set(args_set) | set(final_res_args)
|
||||
|
||||
intersec_set = set_ref-set_recover
|
||||
|
||||
if len(intersec_set) == 0:
|
||||
print("[+] no arguments lost!")
|
||||
|
||||
assert len(intersec_set) == 0, "NOT all arguments found in dump!"
|
||||
args_set = set(data['args_dict'].keys())
|
||||
final_result = data['final_result']
|
||||
|
||||
final_res_args = {binfootprint.dump(a[0]) for a in final_result}
|
||||
|
||||
args_ref = range(1,n)
|
||||
set_ref = set()
|
||||
for a in args_ref:
|
||||
set_ref.add(binfootprint.dump(a))
|
||||
|
||||
set_recover = set(args_set) | set(final_res_args)
|
||||
|
||||
intersec_set = set_ref-set_recover
|
||||
|
||||
if len(intersec_set) == 0:
|
||||
print("[+] no arguments lost!")
|
||||
|
||||
assert len(intersec_set) == 0, "NOT all arguments found in dump!"
|
||||
except:
|
||||
if p_server is not None:
|
||||
p_server.terminate()
|
||||
if p_client is not None:
|
||||
p_client.terminate()
|
||||
raise
|
||||
|
||||
def test_shutdown_client():
|
||||
shutdown_client(signal.SIGTERM)
|
||||
|
@ -752,7 +744,9 @@ def test_hum_size():
|
|||
assert humanize_size(1024**3) == '1.00TB'
|
||||
assert humanize_size(1024**4) == '1024.00TB'
|
||||
|
||||
if __name__ == "__main__":
|
||||
if __name__ == "__main__":
|
||||
jm_log.setLevel(logging.DEBUG)
|
||||
progress.log.setLevel(logging.DEBUG)
|
||||
if len(sys.argv) > 1:
|
||||
pass
|
||||
else:
|
||||
|
@ -763,10 +757,10 @@ if __name__ == "__main__":
|
|||
# test_Signal_to_sys_exit,
|
||||
# test_Signal_to_terminate_process_list,
|
||||
|
||||
test_jobmanager_basic,
|
||||
# test_jobmanager_server_signals,
|
||||
# test_shutdown_server_while_client_running,
|
||||
# test_shutdown_client,
|
||||
# test_jobmanager_basic,
|
||||
# test_jobmanager_server_signals,
|
||||
test_shutdown_server_while_client_running,
|
||||
# test_shutdown_client,
|
||||
# test_check_fail,
|
||||
# test_jobmanager_read_old_stat,
|
||||
# test_client_status,
|
||||
|
|
|
@ -7,95 +7,7 @@ from os.path import abspath, dirname, split
|
|||
sys.path = [split(dirname(abspath(__file__)))[0]] + sys.path
|
||||
|
||||
import jobmanager as jm
|
||||
|
||||
def bin_list():
|
||||
import pickle
|
||||
a = ['cvbnm', 'qwert', 'asdfg']
|
||||
print(pickle.dumps(a))
|
||||
|
||||
def bin_dict():
|
||||
import pickle
|
||||
a = {'cvbnm': 1, 'qwert': 2, 'asdfg': 3}
|
||||
print(pickle.dumps(a))
|
||||
|
||||
def see_bin_data():
|
||||
import multiprocessing as mp
|
||||
mp.set_start_method('spawn')
|
||||
|
||||
for i in range(10):
|
||||
p = mp.Process(target = bin_list)
|
||||
p.start()
|
||||
p.join()
|
||||
|
||||
for i in range(10):
|
||||
p = mp.Process(target = bin_dict)
|
||||
p.start()
|
||||
p.join()
|
||||
|
||||
def test_recursive_type_scan():
|
||||
d = {'a': 1, 'b':2}
|
||||
l = [2, 3]
|
||||
t = ('3','4')
|
||||
i = 1
|
||||
f = 3.4
|
||||
s = 's'
|
||||
|
||||
a1 = [l, t, d]
|
||||
a2 = [l, t]
|
||||
|
||||
a3 = (d, d, d)
|
||||
a4 = (i, l, a1)
|
||||
|
||||
|
||||
###
|
||||
# GENERAL DICT SCAN
|
||||
###
|
||||
assert jm.servers.recursive_scan_for_instance(obj=d, type=dict) == True
|
||||
|
||||
assert jm.servers.recursive_scan_for_instance(obj=l, type=dict) == False
|
||||
assert jm.servers.recursive_scan_for_instance(obj=t, type=dict) == False
|
||||
assert jm.servers.recursive_scan_for_instance(obj=i, type=dict) == False
|
||||
assert jm.servers.recursive_scan_for_instance(obj=f, type=dict) == False
|
||||
assert jm.servers.recursive_scan_for_instance(obj=s, type=dict) == False
|
||||
|
||||
assert jm.servers.recursive_scan_for_instance(obj=a1, type=dict) == True
|
||||
assert jm.servers.recursive_scan_for_instance(obj=a2, type=dict) == False
|
||||
assert jm.servers.recursive_scan_for_instance(obj=a3, type=dict) == True
|
||||
assert jm.servers.recursive_scan_for_instance(obj=a4, type=dict) == True
|
||||
|
||||
###
|
||||
# SPECIFIC DICT SCAN
|
||||
###
|
||||
assert jm.servers.recursive_scan_for_dict_instance(obj=d) == True
|
||||
|
||||
assert jm.servers.recursive_scan_for_dict_instance(obj=l) == False
|
||||
assert jm.servers.recursive_scan_for_dict_instance(obj=t) == False
|
||||
assert jm.servers.recursive_scan_for_dict_instance(obj=i) == False
|
||||
assert jm.servers.recursive_scan_for_dict_instance(obj=f) == False
|
||||
assert jm.servers.recursive_scan_for_dict_instance(obj=s) == False
|
||||
|
||||
assert jm.servers.recursive_scan_for_dict_instance(obj=a1) == True
|
||||
assert jm.servers.recursive_scan_for_dict_instance(obj=a2) == False
|
||||
assert jm.servers.recursive_scan_for_dict_instance(obj=a3) == True
|
||||
assert jm.servers.recursive_scan_for_dict_instance(obj=a4) == True
|
||||
|
||||
###
|
||||
# INT SCAN
|
||||
###
|
||||
|
||||
assert jm.servers.recursive_scan_for_instance(obj = i, type=int) == True
|
||||
assert jm.servers.recursive_scan_for_instance(obj = l, type=int) == True
|
||||
assert jm.servers.recursive_scan_for_instance(obj = a1, type=int) == True
|
||||
assert jm.servers.recursive_scan_for_instance(obj = a2, type=int) == True
|
||||
assert jm.servers.recursive_scan_for_instance(obj = a4, type=int) == True
|
||||
|
||||
print(jm.servers.recursive_scan_for_instance(obj = d, type=int))
|
||||
print(jm.servers.recursive_scan_for_instance(obj = a3, type=int))
|
||||
|
||||
for i in d:
|
||||
print(i)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
test_recursive_type_scan()
|
||||
# see_bin_data()
|
||||
pass
|
||||
|
|
Loading…
Add table
Reference in a new issue