From 724a140795ca9869359ca1d554abf48324eb1d92 Mon Sep 17 00:00:00 2001 From: gjoliver Date: Wed, 17 Nov 2021 10:27:00 -0800 Subject: [PATCH] [rllib] Make sure json can serialize result dict (#20439) We may have fields in the result dict that are or None. Make sure our results are json serializable. --- python/requirements/ml/requirements_rllib.txt | 2 +- release/rllib_tests/app_config.yaml | 2 ++ rllib/utils/test_utils.py | 9 +++++---- 3 files changed, 8 insertions(+), 5 deletions(-) diff --git a/python/requirements/ml/requirements_rllib.txt b/python/requirements/ml/requirements_rllib.txt index e26422385..a9b500e40 100644 --- a/python/requirements/ml/requirements_rllib.txt +++ b/python/requirements/ml/requirements_rllib.txt @@ -15,7 +15,7 @@ pettingzoo==1.11.1 pymunk==6.0.0 supersuit==2.6.6 # For testing in MuJoCo-like envs (in PyBullet). -pybullet==3.1.7 +pybullet==3.2.0 # For tests on RecSim and Kaggle envs. recsim==0.2.4 tensorflow_estimator==2.6.0 diff --git a/release/rllib_tests/app_config.yaml b/release/rllib_tests/app_config.yaml index 18dd76969..5029c76d1 100755 --- a/release/rllib_tests/app_config.yaml +++ b/release/rllib_tests/app_config.yaml @@ -14,6 +14,8 @@ post_build_cmds: - pip uninstall -y ray || true - pip3 install -U {{ env["RAY_WHEELS"] | default("ray") }} - {{ env["RAY_WHEELS_SANITY_CHECK"] | default("echo No Ray wheels sanity check") }} + # TODO(jungong): remove once nightly image gets upgraded. + - pip install -U pybullet==3.2.0 # Clone the rl-experiments repo for offline-RL files. - git clone https://github.com/ray-project/rl-experiments.git - cp rl-experiments/halfcheetah-sac/2021-09-06/halfcheetah_expert_sac.zip ~/. diff --git a/rllib/utils/test_utils.py b/rllib/utils/test_utils.py index c0716d003..ef57e2782 100644 --- a/rllib/utils/test_utils.py +++ b/rllib/utils/test_utils.py @@ -759,8 +759,9 @@ def run_learning_tests_from_yaml( # Record performance. stats[experiment] = { - "episode_reward_mean": episode_reward_mean, - "throughput": throughput, + "episode_reward_mean": float(episode_reward_mean), + "throughput": (float(throughput) + if throughput is not None else 0.0), } print(f" ... Desired reward={desired_reward}; " @@ -787,9 +788,9 @@ def run_learning_tests_from_yaml( # Create results dict and write it to disk. result = { - "time_taken": time_taken, + "time_taken": float(time_taken), "trial_states": dict(Counter([trial.status for trial in all_trials])), - "last_update": time.time(), + "last_update": float(time.time()), "stats": stats, "passed": [k for k, exp in checks.items() if exp["passed"]], "failures": {