[rllib] Make sure json can serialize result dict (#20439)

We may have fields in the result dict that are or None. Make sure our results are json serializable.
2025-03-05 10:01:43 -05:00 · 2021-11-17 10:27:00 -08:00 · 2021-11-17 10:27:00 -08:00 · 724a140795
commit 724a140795
parent 03aec4e04a
3 changed files with 8 additions and 5 deletions
--- a/python/requirements/ml/requirements_rllib.txt
+++ b/python/requirements/ml/requirements_rllib.txt
@ -15,7 +15,7 @@ pettingzoo==1.11.1
 pymunk==6.0.0
 supersuit==2.6.6
 # For testing in MuJoCo-like envs (in PyBullet).
-pybullet==3.1.7
+pybullet==3.2.0
 # For tests on RecSim and Kaggle envs.
 recsim==0.2.4
 tensorflow_estimator==2.6.0
--- a/release/rllib_tests/app_config.yaml
+++ b/release/rllib_tests/app_config.yaml
@ -14,6 +14,8 @@ post_build_cmds:
  - pip uninstall -y ray || true
  - pip3 install -U {{ env["RAY_WHEELS"] | default("ray") }}
  - {{ env["RAY_WHEELS_SANITY_CHECK"] | default("echo No Ray wheels sanity check") }}
+  # TODO(jungong): remove once nightly image gets upgraded.
+  - pip install -U pybullet==3.2.0
  # Clone the rl-experiments repo for offline-RL files.
  - git clone https://github.com/ray-project/rl-experiments.git
  - cp rl-experiments/halfcheetah-sac/2021-09-06/halfcheetah_expert_sac.zip ~/.
--- a/rllib/utils/test_utils.py
+++ b/rllib/utils/test_utils.py
@ -759,8 +759,9 @@ def run_learning_tests_from_yaml(

                # Record performance.
                stats[experiment] = {
-                    "episode_reward_mean": episode_reward_mean,
-                    "throughput": throughput,
+                    "episode_reward_mean": float(episode_reward_mean),
+                    "throughput": (float(throughput)
+                                   if throughput is not None else 0.0),
                }

                print(f" ... Desired reward={desired_reward}; "
@ -787,9 +788,9 @@ def run_learning_tests_from_yaml(

    # Create results dict and write it to disk.
    result = {
-        "time_taken": time_taken,
+        "time_taken": float(time_taken),
        "trial_states": dict(Counter([trial.status for trial in all_trials])),
-        "last_update": time.time(),
+        "last_update": float(time.time()),
        "stats": stats,
        "passed": [k for k, exp in checks.items() if exp["passed"]],
        "failures": {