From 724a140795ca9869359ca1d554abf48324eb1d92 Mon Sep 17 00:00:00 2001
From: gjoliver <jungong@anyscale.com>
Date: Wed, 17 Nov 2021 10:27:00 -0800
Subject: [PATCH] [rllib] Make sure json can serialize result dict (#20439)

We may have fields in the result dict that are or None.
Make sure our results are json serializable.
---
 python/requirements/ml/requirements_rllib.txt | 2 +-
 release/rllib_tests/app_config.yaml           | 2 ++
 rllib/utils/test_utils.py                     | 9 +++++----
 3 files changed, 8 insertions(+), 5 deletions(-)

diff --git a/python/requirements/ml/requirements_rllib.txt b/python/requirements/ml/requirements_rllib.txt
index e26422385..a9b500e40 100644
--- a/python/requirements/ml/requirements_rllib.txt
+++ b/python/requirements/ml/requirements_rllib.txt
@@ -15,7 +15,7 @@ pettingzoo==1.11.1
 pymunk==6.0.0
 supersuit==2.6.6
 # For testing in MuJoCo-like envs (in PyBullet).
-pybullet==3.1.7
+pybullet==3.2.0
 # For tests on RecSim and Kaggle envs.
 recsim==0.2.4
 tensorflow_estimator==2.6.0
diff --git a/release/rllib_tests/app_config.yaml b/release/rllib_tests/app_config.yaml
index 18dd76969..5029c76d1 100755
--- a/release/rllib_tests/app_config.yaml
+++ b/release/rllib_tests/app_config.yaml
@@ -14,6 +14,8 @@ post_build_cmds:
   - pip uninstall -y ray || true
   - pip3 install -U {{ env["RAY_WHEELS"] | default("ray") }}
   - {{ env["RAY_WHEELS_SANITY_CHECK"] | default("echo No Ray wheels sanity check") }}
+  # TODO(jungong): remove once nightly image gets upgraded.
+  - pip install -U pybullet==3.2.0
   # Clone the rl-experiments repo for offline-RL files.
   - git clone https://github.com/ray-project/rl-experiments.git
   - cp rl-experiments/halfcheetah-sac/2021-09-06/halfcheetah_expert_sac.zip ~/.
diff --git a/rllib/utils/test_utils.py b/rllib/utils/test_utils.py
index c0716d003..ef57e2782 100644
--- a/rllib/utils/test_utils.py
+++ b/rllib/utils/test_utils.py
@@ -759,8 +759,9 @@ def run_learning_tests_from_yaml(
 
                 # Record performance.
                 stats[experiment] = {
-                    "episode_reward_mean": episode_reward_mean,
-                    "throughput": throughput,
+                    "episode_reward_mean": float(episode_reward_mean),
+                    "throughput": (float(throughput)
+                                   if throughput is not None else 0.0),
                 }
 
                 print(f" ... Desired reward={desired_reward}; "
@@ -787,9 +788,9 @@ def run_learning_tests_from_yaml(
 
     # Create results dict and write it to disk.
     result = {
-        "time_taken": time_taken,
+        "time_taken": float(time_taken),
         "trial_states": dict(Counter([trial.status for trial in all_trials])),
-        "last_update": time.time(),
+        "last_update": float(time.time()),
         "stats": stats,
         "passed": [k for k, exp in checks.items() if exp["passed"]],
         "failures": {