From 09886d7ab8626590ffb22e9a7542480d1b509182 Mon Sep 17 00:00:00 2001
From: Sven Mika <svenmika1977@gmail.com>
Date: Mon, 23 May 2022 08:18:44 +0200
Subject: [PATCH] [RLlib] Upgrade gym 0.23 (#24171)

---
 doc/source/rllib/doc_code/training.py         |   2 +-
 .../deployment-graph-e2e-tutorial.md          |   2 +-
 .../tests/test_client_library_integration.py  |   2 +-
 python/requirements.txt                       |   2 +-
 python/requirements/ml/requirements_rllib.txt |   2 +-
 python/requirements/ml/requirements_tune.txt  |   2 +-
 .../app_config.yaml                           |   2 +-
 release/long_running_tests/app_config.yaml    |   4 +-
 release/long_running_tests/app_config_np.yaml |   4 +-
 release/rllib_tests/app_config.yaml           |   2 +-
 .../tune_tests/cloud_tests/app_config.yaml    |   2 +-
 .../tune_tests/cloud_tests/app_config_ml.yaml |   2 +-
 rllib/BUILD                                   |  20 +--
 rllib/agents/mock.py                          |   5 +-
 rllib/agents/trainer.py                       | 146 +++++++++---------
 rllib/agents/trainer_config.py                |   9 --
 rllib/algorithms/alpha_zero/alpha_zero.py     |   2 +-
 rllib/algorithms/ars/ars.py                   |   2 -
 rllib/algorithms/es/es.py                     |   2 -
 rllib/env/multi_agent_env.py                  |   2 +-
 rllib/env/tests/test_record_env_wrapper.py    | 100 ------------
 rllib/env/tests/test_remote_worker_envs.py    |  33 ++--
 rllib/env/utils.py                            |  49 ------
 rllib/evaluate.py                             |  20 +--
 rllib/evaluation/rollout_worker.py            |  18 +--
 rllib/evaluation/tests/test_rollout_worker.py |  11 +-
 rllib/evaluation/worker_set.py                |   1 -
 rllib/examples/env_rendering_and_recording.py |  16 --
 .../remote_base_env_with_custom_api.py        |   7 +-
 rllib/examples/rnnsac_stateless_cartpole.py   |  16 +-
 rllib/utils/tests/test_check_env.py           |  13 --
 31 files changed, 133 insertions(+), 367 deletions(-)
 delete mode 100644 rllib/env/tests/test_record_env_wrapper.py

diff --git a/doc/source/rllib/doc_code/training.py b/doc/source/rllib/doc_code/training.py
index a248bc964..5317e922e 100644
--- a/doc/source/rllib/doc_code/training.py
+++ b/doc/source/rllib/doc_code/training.py
@@ -80,7 +80,7 @@ _____________________________________________________________________
 # __get_q_values_dqn_start__
 # Get a reference to the model through the policy
 import numpy as np
-from ray.rllib.agents.dqn import DQNTrainer
+from ray.rllib.algorithms.dqn import DQNTrainer
 
 trainer = DQNTrainer(env="CartPole-v0", config={"framework": "tf2"})
 model = trainer.get_policy().model
diff --git a/doc/source/serve/deployment-graph/deployment-graph-e2e-tutorial.md b/doc/source/serve/deployment-graph/deployment-graph-e2e-tutorial.md
index ac8cdb3df..faa94cfca 100644
--- a/doc/source/serve/deployment-graph/deployment-graph-e2e-tutorial.md
+++ b/doc/source/serve/deployment-graph/deployment-graph-e2e-tutorial.md
@@ -39,7 +39,7 @@ __[Full Ray Enhancement Proposal, REP-001: Serve Pipeline](https://github.com/ra
 
 ## Concepts
 
-- **Deployment**: Scalable, upgradeable group of actors managed by Ray Serve. __[See docs for detail](https://docs.ray.io/en/master/serve/core-apis.html#core-api-deployments)__
+- **Deployment**: Scalable, upgradeable group of actors managed by Ray Serve. __[See docs for detail](https://docs.ray.io/en/master/serve/package-ref.html#deployment-api)__
 
 - **DeploymentNode**: Smallest unit in a graph, created by calling `.bind()` on a serve decorated class or function, backed by a Deployment.
 
diff --git a/python/ray/tests/test_client_library_integration.py b/python/ray/tests/test_client_library_integration.py
index 6d5bef85e..d87be93e6 100644
--- a/python/ray/tests/test_client_library_integration.py
+++ b/python/ray/tests/test_client_library_integration.py
@@ -10,7 +10,7 @@ from ray._private.client_mode_hook import enable_client_mode, client_mode_should
 @pytest.mark.skip(reason="KV store is not working properly.")
 def test_rllib_integration(ray_start_regular_shared):
     with ray_start_client_server():
-        import ray.rllib.agents.dqn as dqn
+        import ray.rllib.algorithms.dqn as dqn
 
         # Confirming the behavior of this context manager.
         # (Client mode hook not yet enabled.)
diff --git a/python/requirements.txt b/python/requirements.txt
index 945e1fb3f..915f382e5 100644
--- a/python/requirements.txt
+++ b/python/requirements.txt
@@ -29,7 +29,7 @@ virtualenv
 ## setup.py extras
 dm_tree
 flask
-gym==0.21.0; python_version >= '3.7'
+gym>=0.21.0,<0.24.0; python_version >= '3.7'
 gym==0.19.0; python_version < '3.7'
 lz4
 scikit-image
diff --git a/python/requirements/ml/requirements_rllib.txt b/python/requirements/ml/requirements_rllib.txt
index 7ccbca042..fa44e6823 100644
--- a/python/requirements/ml/requirements_rllib.txt
+++ b/python/requirements/ml/requirements_rllib.txt
@@ -4,7 +4,7 @@
 # ---------------------
 # Atari
 autorom[accept-rom-license]
-gym[atari]==0.21.0; python_version >= '3.7'
+gym>=0.21.0,<0.24.0; python_version >= '3.7'
 gym[atari]==0.19.0; python_version < '3.7'
 # Kaggle envs.
 kaggle_environments==1.7.11
diff --git a/python/requirements/ml/requirements_tune.txt b/python/requirements/ml/requirements_tune.txt
index a6f007bc2..cd68943d8 100644
--- a/python/requirements/ml/requirements_tune.txt
+++ b/python/requirements/ml/requirements_tune.txt
@@ -11,7 +11,7 @@ freezegun==1.1.0
 gluoncv==0.10.1.post0
 gpy==1.10.0
 autorom[accept-rom-license]
-gym[atari]==0.21.0; python_version >= '3.7'
+gym>=0.21.0,<0.24.0; python_version >= '3.7'
 gym[atari]==0.19.0; python_version < '3.7'
 h5py==3.1.0
 hpbandster==0.7.4
diff --git a/release/long_running_distributed_tests/app_config.yaml b/release/long_running_distributed_tests/app_config.yaml
index c20fa7c1a..8ef60df93 100644
--- a/release/long_running_distributed_tests/app_config.yaml
+++ b/release/long_running_distributed_tests/app_config.yaml
@@ -7,7 +7,7 @@ python:
   pip_packages:
     - pytest
     - awscli
-    - gym==0.21.0
+    - gym
   conda_packages: []
 
 post_build_cmds:
diff --git a/release/long_running_tests/app_config.yaml b/release/long_running_tests/app_config.yaml
index 3208f5ead..25786cc9f 100755
--- a/release/long_running_tests/app_config.yaml
+++ b/release/long_running_tests/app_config.yaml
@@ -7,7 +7,7 @@ debian_packages:
 
 python:
   pip_packages:
-    - gym[atari]==0.21.0
+    - gym[atari]
     - pytest
     - tensorflow
   conda_packages: []
@@ -15,7 +15,7 @@ python:
 post_build_cmds:
   - 'rm -r wrk || true && git clone https://github.com/wg/wrk.git /tmp/wrk && cd /tmp/wrk && make -j && sudo cp wrk /usr/local/bin'
   - pip3 install pytest || true
-  - pip3 install -U ray[all] gym[atari]==0.21.0 autorom[accept-rom-license]
+  - pip3 install -U ray[all] gym[atari] autorom[accept-rom-license]
   - pip3 install ray[all]
   # TODO (Alex): Ideally we would install all the dependencies from the new
   # version too, but pip won't be able to find the new version of ray-cpp.
diff --git a/release/long_running_tests/app_config_np.yaml b/release/long_running_tests/app_config_np.yaml
index b709a55cc..362107894 100644
--- a/release/long_running_tests/app_config_np.yaml
+++ b/release/long_running_tests/app_config_np.yaml
@@ -7,7 +7,7 @@ debian_packages:
 
 python:
   pip_packages:
-    - gym[atari]==0.21.0
+    - gym[atari]
     - pygame
     - pytest
     - tensorflow
@@ -18,7 +18,7 @@ post_build_cmds:
     - 'rm -r wrk || true && git clone https://github.com/wg/wrk.git /tmp/wrk && cd /tmp/wrk && make -j && sudo cp wrk /usr/local/bin'
     - pip3 install numpy==1.19 || true
     - pip3 install pytest || true
-    - pip3 install -U ray[all] gym[atari]==0.21.0 autorom[accept-rom-license]
+    - pip3 install -U ray[all] gym[atari] autorom[accept-rom-license]
     - pip3 install ray[all]
     # TODO (Alex): Ideally we would install all the dependencies from the new
     # version too, but pip won't be able to find the new version of ray-cpp.
diff --git a/release/rllib_tests/app_config.yaml b/release/rllib_tests/app_config.yaml
index aa78f2a0f..c41c09caf 100755
--- a/release/rllib_tests/app_config.yaml
+++ b/release/rllib_tests/app_config.yaml
@@ -8,7 +8,7 @@ python:
   # These dependencies should be handled by requirements_rllib.txt and
   # requirements_ml_docker.txt
   pip_packages:
-      - gym==0.21.0
+      - gym
   conda_packages: []
 
 post_build_cmds:
diff --git a/release/tune_tests/cloud_tests/app_config.yaml b/release/tune_tests/cloud_tests/app_config.yaml
index 6a2296c4c..de75f5cc2 100755
--- a/release/tune_tests/cloud_tests/app_config.yaml
+++ b/release/tune_tests/cloud_tests/app_config.yaml
@@ -8,8 +8,8 @@ python:
     - pytest
     - awscli
     - gsutil
+    - gym
     - gcsfs
-    - gym==0.21.0
     - pyarrow>=6.0.1,<7.0.0
   conda_packages: []
 
diff --git a/release/tune_tests/cloud_tests/app_config_ml.yaml b/release/tune_tests/cloud_tests/app_config_ml.yaml
index fb328a598..6d92133a1 100755
--- a/release/tune_tests/cloud_tests/app_config_ml.yaml
+++ b/release/tune_tests/cloud_tests/app_config_ml.yaml
@@ -8,8 +8,8 @@ python:
     - pytest
     - awscli
     - gsutil
+    - gym
     - gcsfs
-    - gym==0.21.0
     - pyarrow>=6.0.1,<7.0.0
   conda_packages: []
 
diff --git a/rllib/BUILD b/rllib/BUILD
index 1cd1bcd4e..2ebf81e90 100644
--- a/rllib/BUILD
+++ b/rllib/BUILD
@@ -1318,12 +1318,6 @@ sh_test(
     data = glob(["examples/serving/*.py"]),
 )
 
-py_test(
-    name = "env/tests/test_record_env_wrapper",
-    tags = ["team:ml", "env"],
-    size = "small",
-    srcs = ["env/tests/test_record_env_wrapper.py"]
-)
 
 py_test(
     name = "env/tests/test_remote_worker_envs",
@@ -2818,13 +2812,13 @@ py_test(
     args = ["--as-test", "--framework=torch"],
 )
 
-py_test(
-    name = "examples/remote_base_env_with_custom_api",
-    tags = ["team:ml", "examples", "examples_R"],
-    size = "medium",
-    srcs = ["examples/remote_base_env_with_custom_api.py"],
-    args = ["--stop-iters=3"]
-)
+# py_test(
+#    name = "examples/remote_base_env_with_custom_api",
+#    tags = ["team:ml", "examples", "examples_R"],
+#    size = "medium",
+#    srcs = ["examples/remote_base_env_with_custom_api.py"],
+#    args = ["--stop-iters=3"]
+# )
 
 py_test(
     name = "examples/restore_1_of_n_agents_from_checkpoint",
diff --git a/rllib/agents/mock.py b/rllib/agents/mock.py
index 59adf26d7..2081ad467 100644
--- a/rllib/agents/mock.py
+++ b/rllib/agents/mock.py
@@ -75,10 +75,11 @@ class _MockTrainer(Trainer):
         self.info = info
         self.restored = True
 
+    @staticmethod
     @override(Trainer)
-    def _register_if_needed(self, env_object, config):
+    def _get_env_id_and_creator(env_specifier, config):
         # No env to register.
-        pass
+        return None, None
 
     def set_info(self, info):
         self.info = info
diff --git a/rllib/agents/trainer.py b/rllib/agents/trainer.py
index 34d8fb4e3..ea7e3a08a 100644
--- a/rllib/agents/trainer.py
+++ b/rllib/agents/trainer.py
@@ -8,7 +8,9 @@ import logging
 import math
 import numpy as np
 import os
+from packaging import version
 import pickle
+import pkg_resources
 import tempfile
 import time
 from typing import (
@@ -30,7 +32,6 @@ from ray.exceptions import RayError
 from ray.rllib.agents.callbacks import DefaultCallbacks
 from ray.rllib.agents.trainer_config import TrainerConfig
 from ray.rllib.env.env_context import EnvContext
-from ray.rllib.env.multi_agent_env import MultiAgentEnv
 from ray.rllib.env.utils import gym_env_creator
 from ray.rllib.evaluation.episode import Episode
 from ray.rllib.evaluation.metrics import (
@@ -94,7 +95,7 @@ from ray.rllib.utils.typing import (
     TrainerConfigDict,
 )
 from ray.tune.logger import Logger, UnifiedLogger
-from ray.tune.registry import ENV_CREATOR, register_env, _global_registry
+from ray.tune.registry import ENV_CREATOR, _global_registry
 from ray.tune.resources import Resources
 from ray.tune.result import DEFAULT_RESULTS_DIR
 from ray.tune.trainable import Trainable
@@ -220,19 +221,13 @@ class Trainer(Trainable):
         if isinstance(config, TrainerConfig):
             config = config.to_dict()
 
-        # Convert `env` provided in config into a string:
-        # - If `env` is a string: `self._env_id` = `env`.
-        # - If `env` is a class: `self._env_id` = `env.__name__` -> Already
-        #   register it with a auto-generated env creator.
-        # - If `env` is None: `self._env_id` is None.
-        self._env_id: Optional[str] = self._register_if_needed(
+        # Convert `env` provided in config into a concrete env creator callable, which
+        # takes an EnvContext (config dict) as arg and returning an RLlib supported Env
+        # type (e.g. a gym.Env).
+        self._env_id, self.env_creator = self._get_env_id_and_creator(
             env or config.get("env"), config
         )
 
-        # The env creator callable, taking an EnvContext (config dict)
-        # as arg and returning an RLlib supported Env type (e.g. a gym.Env).
-        self.env_creator: Optional[EnvCreator] = None
-
         # Placeholder for a local replay buffer instance.
         self.local_replay_buffer = None
 
@@ -310,10 +305,6 @@ class Trainer(Trainable):
         # Validate the framework settings in config.
         self.validate_framework(self.config)
 
-        # Setup the self.env_creator callable (to be passed
-        # e.g. to RolloutWorkers' c'tors).
-        self.env_creator = self._get_env_creator_from_env_id(self._env_id)
-
         # Set Trainer's seed after we have - if necessary - enabled
         # tf eager-execution.
         update_global_seed_if_necessary(self.config["framework"], self.config["seed"])
@@ -466,8 +457,9 @@ class Trainer(Trainable):
 
             self.config["evaluation_config"] = eval_config
 
-            env_id = self._register_if_needed(eval_config.get("env"), eval_config)
-            env_creator = self._get_env_creator_from_env_id(env_id)
+            env_id, env_creator = self._get_env_id_and_creator(
+                eval_config.get("env"), eval_config
+            )
 
             # Create a separate evaluation worker set for evaluation.
             # If evaluation_num_workers=0, use the evaluation set's local
@@ -1541,37 +1533,87 @@ class Trainer(Trainable):
         """Pre-evaluation callback."""
         pass
 
-    def _get_env_creator_from_env_id(self, env_id: Optional[str] = None) -> EnvCreator:
-        """Returns an env creator callable, given an `env_id` (e.g. "CartPole-v0").
+    @staticmethod
+    def _get_env_id_and_creator(
+        env_specifier: Union[str, EnvType, None], config: PartialTrainerConfigDict
+    ) -> Tuple[Optional[str], EnvCreator]:
+        """Returns env_id and creator callable given original env id from config.
 
         Args:
-            env_id: An already tune registered env ID, a known gym env name,
-                or None (if no env is used).
+            env_specifier: An env class, an already tune registered env ID, a known
+                gym env name, or None (if no env is used).
+            config: The Trainer's (maybe partial) config dict.
 
         Returns:
+            Tuple consisting of a) env ID string and b) env creator callable.
         """
-        if env_id:
+        # Environment is specified via a string.
+        if isinstance(env_specifier, str):
             # An already registered env.
-            if _global_registry.contains(ENV_CREATOR, env_id):
-                return _global_registry.get(ENV_CREATOR, env_id)
+            if _global_registry.contains(ENV_CREATOR, env_specifier):
+                return env_specifier, _global_registry.get(ENV_CREATOR, env_specifier)
 
             # A class path specifier.
-            elif "." in env_id:
+            elif "." in env_specifier:
 
                 def env_creator_from_classpath(env_context):
                     try:
-                        env_obj = from_config(env_id, env_context)
+                        env_obj = from_config(env_specifier, env_context)
                     except ValueError:
-                        raise EnvError(ERR_MSG_INVALID_ENV_DESCRIPTOR.format(env_id))
+                        raise EnvError(
+                            ERR_MSG_INVALID_ENV_DESCRIPTOR.format(env_specifier)
+                        )
                     return env_obj
 
-                return env_creator_from_classpath
+                return env_specifier, env_creator_from_classpath
             # Try gym/PyBullet/Vizdoom.
             else:
-                return functools.partial(gym_env_creator, env_descriptor=env_id)
+                return env_specifier, functools.partial(
+                    gym_env_creator, env_descriptor=env_specifier
+                )
+
+        elif isinstance(env_specifier, type):
+            env_id = env_specifier  # .__name__
+
+            if config.get("remote_worker_envs"):
+                # Check gym version (0.22 or higher?).
+                # If > 0.21, can't perform auto-wrapping of the given class as this
+                # would lead to a pickle error.
+                gym_version = pkg_resources.get_distribution("gym").version
+                if version.parse(gym_version) >= version.parse("0.22"):
+                    raise ValueError(
+                        "Cannot specify a gym.Env class via `config.env` while setting "
+                        "`config.remote_worker_env=True` AND your gym version is >= "
+                        "0.22! Try installing an older version of gym or set `config."
+                        "remote_worker_env=False`."
+                    )
+
+                @ray.remote(num_cpus=1)
+                class _wrapper(env_specifier):
+                    # Add convenience `_get_spaces` and `_is_multi_agent`
+                    # methods:
+                    def _get_spaces(self):
+                        return self.observation_space, self.action_space
+
+                    def _is_multi_agent(self):
+                        from ray.rllib.env.multi_agent_env import MultiAgentEnv
+
+                        return isinstance(self, MultiAgentEnv)
+
+                return env_id, lambda cfg: _wrapper.remote(cfg)
+            else:
+                return env_id, lambda cfg: env_specifier(cfg)
+
         # No env -> Env creator always returns None.
+        elif env_specifier is None:
+            return None, lambda env_config: None
+
         else:
-            return lambda env_config: None
+            raise ValueError(
+                "{} is an invalid env specifier. ".format(env_specifier)
+                + "You can specify a custom env as either a class "
+                '(e.g., YourEnvCls) or a registered env id (e.g., "your_env").'
+            )
 
     def _sync_filters_if_needed(self, workers: WorkerSet):
         if self.config.get("observation_filter", "NoFilter") != "NoFilter":
@@ -1753,16 +1795,6 @@ class Trainer(Trainable):
         if model_config is None:
             config["model"] = model_config = {}
 
-        # Monitor should be replaced by `record_env`.
-        if config.get("monitor", DEPRECATED_VALUE) != DEPRECATED_VALUE:
-            deprecation_warning("monitor", "record_env", error=False)
-            config["record_env"] = config.get("monitor", False)
-        # Empty string would fail some if-blocks checking for this setting.
-        # Set to True instead, meaning: use default output dir to store
-        # the videos.
-        if config.get("record_env") == "":
-            config["record_env"] = True
-
         # Use DefaultCallbacks class, if callbacks is None.
         if config["callbacks"] is None:
             config["callbacks"] = DefaultCallbacks
@@ -2149,38 +2181,6 @@ class Trainer(Trainable):
             kwargs["local_replay_buffer"] = self.local_replay_buffer
         return kwargs
 
-    def _register_if_needed(
-        self, env_object: Union[str, EnvType, None], config
-    ) -> Optional[str]:
-        if isinstance(env_object, str):
-            return env_object
-        elif isinstance(env_object, type):
-            name = env_object.__name__
-
-            if config.get("remote_worker_envs"):
-
-                @ray.remote(num_cpus=0)
-                class _wrapper(env_object):
-                    # Add convenience `_get_spaces` and `_is_multi_agent`
-                    # methods.
-                    def _get_spaces(self):
-                        return self.observation_space, self.action_space
-
-                    def _is_multi_agent(self):
-                        return isinstance(self, MultiAgentEnv)
-
-                register_env(name, lambda cfg: _wrapper.remote(cfg))
-            else:
-                register_env(name, lambda cfg: env_object(cfg))
-            return name
-        elif env_object is None:
-            return None
-        raise ValueError(
-            "{} is an invalid env specification. ".format(env_object)
-            + "You can specify a custom env as either a class "
-            '(e.g., YourEnvCls) or a registered env id (e.g., "your_env").'
-        )
-
     def _step_context(trainer):
         class StepCtx:
             def __enter__(self):
diff --git a/rllib/agents/trainer_config.py b/rllib/agents/trainer_config.py
index bf8f983d4..3fa1618b2 100644
--- a/rllib/agents/trainer_config.py
+++ b/rllib/agents/trainer_config.py
@@ -108,7 +108,6 @@ class TrainerConfig:
         self.action_space = None
         self.env_task_fn = None
         self.render_env = False
-        self.record_env = False
         self.clip_rewards = None
         self.normalize_actions = True
         self.clip_actions = False
@@ -458,7 +457,6 @@ class TrainerConfig:
         action_space: Optional[gym.spaces.Space] = None,
         env_task_fn: Optional[Callable[[ResultDict, EnvType, EnvContext], Any]] = None,
         render_env: Optional[bool] = None,
-        record_env: Optional[bool] = None,
         clip_rewards: Optional[Union[bool, float]] = None,
         normalize_actions: Optional[bool] = None,
         clip_actions: Optional[bool] = None,
@@ -489,11 +487,6 @@ class TrainerConfig:
                 `render()` method which either:
                 a) handles window generation and rendering itself (returning True) or
                 b) returns a numpy uint8 image of shape [height x width x 3 (RGB)].
-            record_env: If True, stores videos in this relative directory inside the
-                default output dir (~/ray_results/...). Alternatively, you can
-                specify an absolute path (str), in which the env recordings should be
-                stored instead. Set to False for not recording anything.
-                Note: This setting replaces the deprecated `monitor` key.
             clip_rewards: Whether to clip rewards during Policy's postprocessing.
                 None (default): Clip for Atari only (r=sign(r)).
                 True: r=sign(r): Fixed rewards -1.0, 1.0, or 0.0.
@@ -524,8 +517,6 @@ class TrainerConfig:
             self.env_task_fn = env_task_fn
         if render_env is not None:
             self.render_env = render_env
-        if record_env is not None:
-            self.record_env = record_env
         if clip_rewards is not None:
             self.clip_rewards = clip_rewards
         if normalize_actions is not None:
diff --git a/rllib/algorithms/alpha_zero/alpha_zero.py b/rllib/algorithms/alpha_zero/alpha_zero.py
index f8df24f8b..7d5a6ee53 100644
--- a/rllib/algorithms/alpha_zero/alpha_zero.py
+++ b/rllib/algorithms/alpha_zero/alpha_zero.py
@@ -168,7 +168,7 @@ class AlphaZeroPolicyWrapperClass(AlphaZeroPolicy):
         model = ModelCatalog.get_model_v2(
             obs_space, action_space, action_space.n, config["model"], "torch"
         )
-        env_creator = Trainer._get_env_creator_from_env_id(None, config["env"])
+        _, env_creator = Trainer._get_env_id_and_creator(config["env"], config)
         if config["ranked_rewards"]["enable"]:
             # if r2 is enabled, tne env is wrapped to include a rewards buffer
             # used to normalize rewards
diff --git a/rllib/algorithms/ars/ars.py b/rllib/algorithms/ars/ars.py
index 617b92ca7..1b7d914e2 100644
--- a/rllib/algorithms/ars/ars.py
+++ b/rllib/algorithms/ars/ars.py
@@ -355,8 +355,6 @@ class ARSTrainer(Trainer):
         # Validate our config dict.
         self.validate_config(self.config)
 
-        # Generate `self.env_creator` callable to create an env instance.
-        self.env_creator = self._get_env_creator_from_env_id(self._env_id)
         # Generate the local env.
         env_context = EnvContext(self.config["env_config"] or {}, worker_index=0)
         env = self.env_creator(env_context)
diff --git a/rllib/algorithms/es/es.py b/rllib/algorithms/es/es.py
index 9c0252c00..9c2478ce9 100644
--- a/rllib/algorithms/es/es.py
+++ b/rllib/algorithms/es/es.py
@@ -362,8 +362,6 @@ class ESTrainer(Trainer):
         # Call super's validation method.
         self.validate_config(self.config)
 
-        # Generate `self.env_creator` callable to create an env instance.
-        self.env_creator = self._get_env_creator_from_env_id(self._env_id)
         # Generate the local env.
         env_context = EnvContext(self.config["env_config"] or {}, worker_index=0)
         env = self.env_creator(env_context)
diff --git a/rllib/env/multi_agent_env.py b/rllib/env/multi_agent_env.py
index 3ff435b39..952fbd23b 100644
--- a/rllib/env/multi_agent_env.py
+++ b/rllib/env/multi_agent_env.py
@@ -1,6 +1,6 @@
 import gym
 import logging
-from typing import Callable, Dict, List, Tuple, Type, Optional, Union, Set
+from typing import Callable, Dict, List, Tuple, Optional, Union, Set, Type
 
 from ray.rllib.env.base_env import BaseEnv
 from ray.rllib.utils.annotations import (
diff --git a/rllib/env/tests/test_record_env_wrapper.py b/rllib/env/tests/test_record_env_wrapper.py
deleted file mode 100644
index d930b727e..000000000
--- a/rllib/env/tests/test_record_env_wrapper.py
+++ /dev/null
@@ -1,100 +0,0 @@
-import glob
-import gym
-import numpy as np
-import os
-import shutil
-import unittest
-
-from ray.rllib.env.utils import VideoMonitor, record_env_wrapper
-from ray.rllib.examples.env.mock_env import MockEnv2
-from ray.rllib.examples.env.multi_agent import BasicMultiAgent
-from ray.rllib.utils.test_utils import check
-
-
-class TestRecordEnvWrapper(unittest.TestCase):
-    def test_wrap_gym_env(self):
-        record_env_dir = os.popen("mktemp -d").read()[:-1]
-        print(f"tmp dir for videos={record_env_dir}")
-
-        if not os.path.exists(record_env_dir):
-            sys.exit(1)
-
-        num_steps_per_episode = 10
-        wrapped = record_env_wrapper(
-            env=MockEnv2(num_steps_per_episode),
-            record_env=record_env_dir,
-            log_dir="",
-            policy_config={
-                "in_evaluation": False,
-            },
-        )
-        # Non MultiAgentEnv: Wrapper's type is wrappers.Monitor.
-        self.assertTrue(isinstance(wrapped, gym.wrappers.Monitor))
-        self.assertFalse(isinstance(wrapped, VideoMonitor))
-
-        wrapped.reset()
-        # Expect one video file to have been produced in the tmp dir.
-        os.chdir(record_env_dir)
-        ls = glob.glob("*.mp4")
-        self.assertTrue(len(ls) == 1)
-        # 10 steps for a complete episode.
-        for i in range(num_steps_per_episode):
-            wrapped.step(0)
-        # Another episode.
-        wrapped.reset()
-        for i in range(num_steps_per_episode):
-            wrapped.step(0)
-        # Expect another video file to have been produced (2nd episode).
-        ls = glob.glob("*.mp4")
-        self.assertTrue(len(ls) == 2)
-
-        # MockEnv2 returns a reward of 100.0 every step.
-        # So total reward is 1000.0 per episode (10 steps).
-        check(
-            np.array([100.0, 100.0]) * num_steps_per_episode,
-            wrapped.get_episode_rewards(),
-        )
-        # Erase all generated files and the temp path just in case,
-        # as to not disturb further CI-tests.
-        shutil.rmtree(record_env_dir)
-
-    def test_wrap_multi_agent_env(self):
-        record_env_dir = os.popen("mktemp -d").read()[:-1]
-        print(f"tmp dir for videos={record_env_dir}")
-
-        if not os.path.exists(record_env_dir):
-            sys.exit(1)
-
-        wrapped = record_env_wrapper(
-            env=BasicMultiAgent(3),
-            record_env=record_env_dir,
-            log_dir="",
-            policy_config={
-                "in_evaluation": False,
-            },
-        )
-        # Type is VideoMonitor.
-        self.assertTrue(isinstance(wrapped, gym.wrappers.Monitor))
-        self.assertTrue(isinstance(wrapped, VideoMonitor))
-
-        wrapped.reset()
-
-        # BasicMultiAgent is hardcoded to run 25-step episodes.
-        for i in range(25):
-            wrapped.step({0: 0, 1: 0, 2: 0})
-
-        # Expect one video file to have been produced in the tmp dir.
-        os.chdir(record_env_dir)
-        ls = glob.glob("*.mp4")
-        self.assertTrue(len(ls) == 1)
-
-        # However VideoMonitor's _after_step is overwritten to not
-        # use stats_recorder. So nothing to verify here, except that
-        # it runs fine.
-
-
-if __name__ == "__main__":
-    import pytest
-    import sys
-
-    sys.exit(pytest.main(["-v", __file__]))
diff --git a/rllib/env/tests/test_remote_worker_envs.py b/rllib/env/tests/test_remote_worker_envs.py
index f83eac21c..adbc32b0d 100644
--- a/rllib/env/tests/test_remote_worker_envs.py
+++ b/rllib/env/tests/test_remote_worker_envs.py
@@ -7,10 +7,8 @@ import unittest
 import ray
 from ray.rllib.algorithms.pg import pg
 from ray.rllib.env.wrappers.pettingzoo_env import PettingZooEnv
-from ray.rllib.examples.env.random_env import RandomEnv, RandomMultiAgentEnv
-from ray.rllib.examples.remote_base_env_with_custom_api import (
-    NonVectorizedEnvToBeVectorizedIntoRemoteBaseEnv,
-)
+
+# from ray.rllib.examples.env.random_env import RandomEnv
 from ray import tune
 
 
@@ -55,17 +53,19 @@ class TestRemoteWorkerEnvSetting(unittest.TestCase):
         trainer.stop()
 
         # Using class directly.
-        config["env"] = RandomEnv
-        trainer = pg.PGTrainer(config=config)
-        print(trainer.train())
-        trainer.stop()
+        # This doesn't work anymore as of gym==0.23
+        # config["env"] = RandomEnv
+        # trainer = pg.PGTrainer(config=config)
+        # print(trainer.train())
+        # trainer.stop()
 
         # Using class directly: Sub-class of gym.Env,
         # which implements its own API.
-        config["env"] = NonVectorizedEnvToBeVectorizedIntoRemoteBaseEnv
-        trainer = pg.PGTrainer(config=config)
-        print(trainer.train())
-        trainer.stop()
+        # This doesn't work anymore as of gym==0.23
+        # config["env"] = NonVectorizedEnvToBeVectorizedIntoRemoteBaseEnv
+        # trainer = pg.PGTrainer(config=config)
+        # print(trainer.train())
+        # trainer.stop()
 
     def test_remote_worker_env_multi_agent(self):
         config = pg.DEFAULT_CONFIG.copy()
@@ -85,10 +85,11 @@ class TestRemoteWorkerEnvSetting(unittest.TestCase):
         trainer.stop()
 
         # Using class directly.
-        config["env"] = RandomMultiAgentEnv
-        trainer = pg.PGTrainer(config=config)
-        print(trainer.train())
-        trainer.stop()
+        # This doesn't work anymore as of gym==0.23.
+        # config["env"] = RandomMultiAgentEnv
+        # trainer = pg.PGTrainer(config=config)
+        # print(trainer.train())
+        # trainer.stop()
 
 
 if __name__ == "__main__":
diff --git a/rllib/env/utils.py b/rllib/env/utils.py
index 2f17eaa77..bcfbee1c4 100644
--- a/rllib/env/utils.py
+++ b/rllib/env/utils.py
@@ -1,10 +1,6 @@
 import gym
-from gym import wrappers
-import os
 
 from ray.rllib.env.env_context import EnvContext
-from ray.rllib.env.multi_agent_env import MultiAgentEnv
-from ray.rllib.utils import add_mixins
 from ray.rllib.utils.error import ERR_MSG_INVALID_ENV_DESCRIPTOR, EnvError
 
 
@@ -54,48 +50,3 @@ def gym_env_creator(env_context: EnvContext, env_descriptor: str) -> gym.Env:
         return gym.make(env_descriptor, **env_context)
     except gym.error.Error:
         raise EnvError(ERR_MSG_INVALID_ENV_DESCRIPTOR.format(env_descriptor))
-
-
-class VideoMonitor(wrappers.Monitor):
-    # Same as original method, but doesn't use the StatsRecorder as it will
-    # try to add up multi-agent rewards dicts, which throws errors.
-    def _after_step(self, observation, reward, done, info):
-        if not self.enabled:
-            return done
-
-        # Use done["__all__"] b/c this is a multi-agent dict.
-        if done["__all__"] and self.env_semantics_autoreset:
-            # For envs with BlockingReset wrapping VNCEnv, this observation
-            # will be the first one of the new episode
-            self.reset_video_recorder()
-            self.episode_id += 1
-            self._flush()
-
-        # Record video
-        self.video_recorder.capture_frame()
-
-        return done
-
-
-def record_env_wrapper(env, record_env, log_dir, policy_config):
-    if record_env:
-        path_ = record_env if isinstance(record_env, str) else log_dir
-        # Relative path: Add logdir here, otherwise, this would
-        # not work for non-local workers.
-        if not os.path.isabs(path_):
-            path_ = os.path.join(log_dir, path_)
-        print(f"Setting the path for recording to {path_}")
-        wrapper_cls = (
-            VideoMonitor if isinstance(env, MultiAgentEnv) else wrappers.Monitor
-        )
-        if isinstance(env, MultiAgentEnv):
-            wrapper_cls = add_mixins(wrapper_cls, [MultiAgentEnv], reversed=True)
-        env = wrapper_cls(
-            env,
-            path_,
-            resume=True,
-            force=True,
-            video_callable=lambda _: True,
-            mode="evaluation" if policy_config["in_evaluation"] else "training",
-        )
-    return env
diff --git a/rllib/evaluate.py b/rllib/evaluate.py
index 8ba62a2eb..fdb18b02d 100755
--- a/rllib/evaluate.py
+++ b/rllib/evaluate.py
@@ -4,7 +4,6 @@ import argparse
 import collections
 import copy
 import gym
-from gym import wrappers as gym_wrappers
 import json
 import os
 from pathlib import Path
@@ -339,7 +338,6 @@ def run(args, parser):
         deprecation_warning(old="--no-render", new="--render", error=False)
         args.render = False
     config["render_env"] = args.render
-    config["record_env"] = args.video_dir
 
     ray.init(local_mode=args.local_mode)
 
@@ -354,12 +352,6 @@ def run(args, parser):
     num_steps = int(args.steps)
     num_episodes = int(args.episodes)
 
-    # Determine the video output directory.
-    video_dir = None
-    # Allow user to specify a video output path.
-    if args.video_dir:
-        video_dir = os.path.expanduser(args.video_dir)
-
     # Do the actual rollout.
     with RolloutSaver(
         args.out,
@@ -369,9 +361,7 @@ def run(args, parser):
         target_episodes=num_episodes,
         save_info=args.save_info,
     ) as saver:
-        rollout(
-            agent, args.env, num_steps, num_episodes, saver, not args.render, video_dir
-        )
+        rollout(agent, args.env, num_steps, num_episodes, saver, not args.render)
     agent.stop()
 
 
@@ -406,7 +396,6 @@ def rollout(
     num_episodes=0,
     saver=None,
     no_render=True,
-    video_dir=None,
 ):
     policy_agent_mapping = default_policy_agent_mapping
 
@@ -473,13 +462,6 @@ def rollout(
         for p, m in policy_map.items()
     }
 
-    # If monitoring has been requested, manually wrap our environment with a
-    # gym monitor, which is set to record every episode.
-    if video_dir:
-        env = gym_wrappers.Monitor(
-            env=env, directory=video_dir, video_callable=lambda _: True, force=True
-        )
-
     steps = 0
     episodes = 0
     while keep_going(steps, num_steps, episodes, num_episodes):
diff --git a/rllib/evaluation/rollout_worker.py b/rllib/evaluation/rollout_worker.py
index f2b813cae..54610921c 100644
--- a/rllib/evaluation/rollout_worker.py
+++ b/rllib/evaluation/rollout_worker.py
@@ -27,7 +27,6 @@ from ray.rllib.env.base_env import BaseEnv, convert_to_base_env
 from ray.rllib.env.env_context import EnvContext
 from ray.rllib.env.multi_agent_env import MultiAgentEnv
 from ray.rllib.env.external_multi_agent_env import ExternalMultiAgentEnv
-from ray.rllib.env.utils import record_env_wrapper
 from ray.rllib.env.wrappers.atari_wrappers import wrap_deepmind, is_atari
 from ray.rllib.evaluation.sampler import AsyncSampler, SyncSampler
 from ray.rllib.evaluation.metrics import RolloutMetrics
@@ -233,7 +232,6 @@ class RolloutWorker(ParallelIteratorWorker):
         worker_index: int = 0,
         num_workers: int = 0,
         recreated_worker: bool = False,
-        record_env: Union[bool, str] = False,
         log_dir: Optional[str] = None,
         log_level: Optional[str] = None,
         callbacks: Type["DefaultCallbacks"] = None,
@@ -253,7 +251,6 @@ class RolloutWorker(ParallelIteratorWorker):
         fake_sampler: bool = False,
         spaces: Optional[Dict[PolicyID, Tuple[Space, Space]]] = None,
         policy=None,
-        monitor_path=None,
         disable_env_checking=False,
     ):
         """Initializes a RolloutWorker instance.
@@ -332,10 +329,6 @@ class RolloutWorker(ParallelIteratorWorker):
                 `recreate_failed_workers=True` and one of the original workers (or an
                 already recreated one) has failed. They don't differ from original
                 workers other than the value of this flag (`self.recreated_worker`).
-            record_env: Write out episode stats and videos
-                using gym.wrappers.Monitor to this directory if specified. If
-                True, use the default output dir in ~/ray_results/.... If
-                False, do not record anything.
             log_dir: Directory where logs can be placed.
             log_level: Set the root log level on creation.
             callbacks: Custom sub-class of
@@ -374,7 +367,6 @@ class RolloutWorker(ParallelIteratorWorker):
                 to (obs_space, action_space)-tuples. This is used in case no
                 Env is created on this RolloutWorker.
             policy: Obsoleted arg. Use `policy_spec` instead.
-            monitor_path: Obsoleted arg. Use `record_env` instead.
             disable_env_checking: If True, disables the env checking module that
                 validates the properties of the passed environment.
         """
@@ -395,10 +387,6 @@ class RolloutWorker(ParallelIteratorWorker):
             for pid, spec in policy_spec.copy().items()
         }
 
-        if monitor_path is not None:
-            deprecation_warning("monitor_path", "record_env", error=False)
-            record_env = monitor_path
-
         self._original_kwargs: dict = locals().copy()
         del self._original_kwargs["self"]
 
@@ -490,7 +478,7 @@ class RolloutWorker(ParallelIteratorWorker):
         # 1) Create the env using the user provided env_creator. This may
         #    return a gym.Env (incl. MultiAgentEnv), an already vectorized
         #    VectorEnv, BaseEnv, ExternalEnv, or an ActorHandle (remote env).
-        # 2) Wrap - if applicable - with Atari/recording/rendering wrappers.
+        # 2) Wrap - if applicable - with Atari/rendering wrappers.
         # 3) Seed the env, if necessary.
         # 4) Vectorize the existing single env by creating more clones of
         #    this env and wrapping it with the RLlib BaseEnv class.
@@ -541,14 +529,12 @@ class RolloutWorker(ParallelIteratorWorker):
                     env = wrap_deepmind(
                         env, dim=model_config.get("dim"), framestack=use_framestack
                     )
-                    env = record_env_wrapper(env, record_env, log_dir, policy_config)
                     return env
 
-            # gym.Env -> Wrap with gym Monitor.
             else:
 
                 def wrap(env):
-                    return record_env_wrapper(env, record_env, log_dir, policy_config)
+                    return env
 
             # Wrap env through the correct wrapper.
             self.env: EnvType = wrap(self.env)
diff --git a/rllib/evaluation/tests/test_rollout_worker.py b/rllib/evaluation/tests/test_rollout_worker.py
index 1961c5acd..f37b088e6 100644
--- a/rllib/evaluation/tests/test_rollout_worker.py
+++ b/rllib/evaluation/tests/test_rollout_worker.py
@@ -4,7 +4,6 @@ from gym.spaces import Box, Discrete
 import numpy as np
 import os
 import random
-import tempfile
 import time
 import unittest
 
@@ -12,7 +11,6 @@ import ray
 from ray.rllib.algorithms.pg import PGTrainer
 from ray.rllib.agents.a3c import A2CTrainer
 from ray.rllib.env.multi_agent_env import MultiAgentEnv
-from ray.rllib.env.utils import VideoMonitor
 from ray.rllib.evaluation.rollout_worker import RolloutWorker
 from ray.rllib.evaluation.metrics import collect_metrics
 from ray.rllib.evaluation.postprocessing import compute_advantages
@@ -376,10 +374,8 @@ class TestRolloutWorker(unittest.TestCase):
 
                 curframe = inspect.currentframe()
                 called_from_check = any(
-                    [
-                        frame[3] == "check_gym_environments"
-                        for frame in inspect.getouterframes(curframe, 2)
-                    ]
+                    frame[3] == "check_gym_environments"
+                    for frame in inspect.getouterframes(curframe, 2)
                 )
                 # Check, whether the action is immutable.
                 if action.flags.writeable and not called_from_check:
@@ -825,15 +821,12 @@ class TestRolloutWorker(unittest.TestCase):
             policy_config={
                 "in_evaluation": False,
             },
-            record_env=tempfile.gettempdir(),
         )
         # Make sure we can properly sample from the wrapped env.
         ev.sample()
         # Make sure the resulting environment is indeed still an
-        # instance of MultiAgentEnv and VideoMonitor.
         self.assertTrue(isinstance(ev.env.unwrapped, MultiAgentEnv))
         self.assertTrue(isinstance(ev.env, gym.Env))
-        self.assertTrue(isinstance(ev.env, VideoMonitor))
         ev.stop()
 
     def test_no_training(self):
diff --git a/rllib/evaluation/worker_set.py b/rllib/evaluation/worker_set.py
index f38ee5b27..5403e1ddf 100644
--- a/rllib/evaluation/worker_set.py
+++ b/rllib/evaluation/worker_set.py
@@ -654,7 +654,6 @@ class WorkerSet:
             worker_index=worker_index,
             num_workers=num_workers,
             recreated_worker=recreated_worker,
-            record_env=config["record_env"],
             log_dir=self._logdir,
             log_level=config["log_level"],
             callbacks=config["callbacks"],
diff --git a/rllib/examples/env_rendering_and_recording.py b/rllib/examples/env_rendering_and_recording.py
index 43949f846..58333d5e6 100644
--- a/rllib/examples/env_rendering_and_recording.py
+++ b/rllib/examples/env_rendering_and_recording.py
@@ -1,12 +1,3 @@
-# ---------------
-# IMPORTANT NOTE:
-# ---------------
-# A recent bug in openAI gym prevents RLlib's "record_env" option
-# from recording videos properly. Instead, the produced mp4 files
-# have a size of 1kb and are corrupted.
-# A simple fix for this is described here:
-# https://github.com/openai/gym/issues/1925
-
 import argparse
 import gym
 import numpy as np
@@ -115,13 +106,6 @@ if __name__ == "__main__":
         # Special evaluation config. Keys specified here will override
         # the same keys in the main config, but only for evaluation.
         "evaluation_config": {
-            # Store videos in this relative directory here inside
-            # the default output dir (~/ray_results/...).
-            # Alternatively, you can specify an absolute path.
-            # Set to True for using the default output dir (~/ray_results/...).
-            # Set to False for not recording anything.
-            "record_env": "videos",
-            # "record_env": "/Users/xyz/my_videos/",
             # Render the env while evaluating.
             # Note that this will always only render the 1st RolloutWorker's
             # env and only the 1st sub-env in a vectorized env.
diff --git a/rllib/examples/remote_base_env_with_custom_api.py b/rllib/examples/remote_base_env_with_custom_api.py
index 9fd45dad4..c0f5ae1bc 100644
--- a/rllib/examples/remote_base_env_with_custom_api.py
+++ b/rllib/examples/remote_base_env_with_custom_api.py
@@ -46,6 +46,11 @@ parser.add_argument(
 parser.add_argument(
     "--stop-reward", type=float, default=180.0, help="Reward at which we stop training."
 )
+parser.add_argument(
+    "--local-mode",
+    action="store_true",
+    help="Init Ray in local mode for easier debugging.",
+)
 
 
 class NonVectorizedEnvToBeVectorizedIntoRemoteBaseEnv(TaskSettableEnv):
@@ -96,7 +101,7 @@ class TaskSettingCallback(DefaultCallbacks):
 
 if __name__ == "__main__":
     args = parser.parse_args()
-    ray.init(num_cpus=6)
+    ray.init(num_cpus=6, local_mode=args.local_mode)
 
     config = {
         # Specify your custom (single, non-vectorized) env directly as a
diff --git a/rllib/examples/rnnsac_stateless_cartpole.py b/rllib/examples/rnnsac_stateless_cartpole.py
index d79d74e11..0293124f2 100644
--- a/rllib/examples/rnnsac_stateless_cartpole.py
+++ b/rllib/examples/rnnsac_stateless_cartpole.py
@@ -6,15 +6,14 @@ import ray
 from ray import tune
 from ray.rllib.agents.registry import get_trainer_class
 
-from ray.rllib.examples.env.repeat_after_me_env import RepeatAfterMeEnv
+# from ray.rllib.examples.env.repeat_after_me_env import RepeatAfterMeEnv
 from ray.rllib.examples.env.stateless_cartpole import StatelessCartPole
 
-envs = {"RepeatAfterMeEnv": RepeatAfterMeEnv, "StatelessCartPole": StatelessCartPole}
 
 config = {
     "name": "RNNSAC_example",
     "local_dir": str(Path(__file__).parent / "example_out"),
-    "checkpoint_freq": 1,
+    "checkpoint_at_end": True,
     "keep_checkpoints_num": 1,
     "checkpoint_score_attr": "episode_reward_mean",
     "stop": {
@@ -29,11 +28,8 @@ config = {
         "num_gpus": int(os.environ.get("RLLIB_NUM_GPUS", "0")),
         "framework": "torch",
         "num_workers": 4,
-        "num_envs_per_worker": 1,
-        "num_cpus_per_worker": 1,
-        "log_level": "INFO",
-        # "env": envs["RepeatAfterMeEnv"],
-        "env": envs["StatelessCartPole"],
+        # "env": RepeatAfterMeEnv,
+        "env": StatelessCartPole,
         "horizon": 1000,
         "gamma": 0.95,
         "batch_mode": "complete_episodes",
@@ -102,7 +98,7 @@ if __name__ == "__main__":
     eps = 0
     ep_reward = 0
     while eps < 10:
-        action, state, info_trainer = agent.compute_action(
+        action, state, info_trainer = agent.compute_single_action(
             obs,
             state=state,
             prev_action=prev_action,
@@ -115,7 +111,7 @@ if __name__ == "__main__":
         ep_reward += reward
         try:
             env.render()
-        except (NotImplementedError, ImportError):
+        except Exception:
             pass
         if done:
             eps += 1
diff --git a/rllib/utils/tests/test_check_env.py b/rllib/utils/tests/test_check_env.py
index d9a9a2301..b295fbdf1 100644
--- a/rllib/utils/tests/test_check_env.py
+++ b/rllib/utils/tests/test_check_env.py
@@ -29,7 +29,6 @@ class TestGymCheckEnv(unittest.TestCase):
         env = Mock(spec=["observation_space"])
         with pytest.raises(AttributeError, match="Env must have action_space."):
             check_gym_environments(env)
-        del env
 
     def test_obs_and_action_spaces_are_gym_spaces(self):
         env = RandomEnv()
@@ -41,7 +40,6 @@ class TestGymCheckEnv(unittest.TestCase):
         env.action_space = "not an action space"
         with pytest.raises(ValueError, match="Action space must be a gym.space"):
             check_env(env)
-        del env
 
     def test_reset(self):
         reset = MagicMock(return_value=5)
@@ -56,7 +54,6 @@ class TestGymCheckEnv(unittest.TestCase):
         env.reset = reset
         with pytest.raises(ValueError, match=error):
             check_env(env)
-        del env
 
     def test_step(self):
         step = MagicMock(return_value=(5, 5, True, {}))
@@ -92,7 +89,6 @@ class TestGymCheckEnv(unittest.TestCase):
         error = "Your step function must return a info that is a dict."
         with pytest.raises(ValueError, match=error):
             check_env(env)
-        del env
 
 
 class TestCheckMultiAgentEnv(unittest.TestCase):
@@ -104,7 +100,6 @@ class TestCheckMultiAgentEnv(unittest.TestCase):
         env = RandomEnv()
         with pytest.raises(ValueError, match="The passed env is not"):
             check_multiagent_environments(env)
-        del env
 
     def test_check_env_reset_incorrect_error(self):
         reset = MagicMock(return_value=5)
@@ -119,7 +114,6 @@ class TestCheckMultiAgentEnv(unittest.TestCase):
         env.reset = lambda *_: bad_obs
         with pytest.raises(ValueError, match="The observation collected from env"):
             check_env(env)
-        del env
 
     def test_check_incorrect_space_contains_functions_error(self):
         def bad_contains_function(self, x):
@@ -131,7 +125,6 @@ class TestCheckMultiAgentEnv(unittest.TestCase):
             ValueError, match="Your observation_space_contains function has some"
         ):
             check_env(env)
-        del env
         env = make_multi_agent("CartPole-v1")({"num_agents": 2})
         bad_action = {0: 2, 1: 2}
         env.action_space_sample = lambda *_: bad_action
@@ -178,7 +171,6 @@ class TestCheckMultiAgentEnv(unittest.TestCase):
             ValueError, match="The action collected from action_space_sample"
         ):
             check_env(env)
-        del env
         env = make_multi_agent("CartPole-v1")({"num_agents": 2})
         bad_obs = {
             0: np.array([np.inf, np.inf, np.inf, np.inf]),
@@ -206,7 +198,6 @@ class TestCheckBaseEnv:
         env = RandomEnv()
         with pytest.raises(ValueError, match="The passed env is not"):
             check_base_env(env)
-        del env
 
     def test_check_env_reset_incorrect_error(self):
         reset = MagicMock(return_value=5)
@@ -244,7 +235,6 @@ class TestCheckBaseEnv:
             ValueError, match="The observation collected from try_reset"
         ):
             check_env(env)
-        del env
 
     def test_check_space_contains_functions_errors(self):
         def bad_contains_function(self, x):
@@ -258,14 +248,12 @@ class TestCheckBaseEnv:
         ):
             check_env(env)
 
-        del env
         env = self._make_base_env()
         env.action_space_contains = bad_contains_function
         with pytest.raises(
             ValueError, match="Your action_space_contains function has some error"
         ):
             check_env(env)
-        del env
 
     def test_bad_sample_function(self):
         env = self._make_base_env()
@@ -276,7 +264,6 @@ class TestCheckBaseEnv:
         ):
             check_env(env)
 
-        del env
         env = self._make_base_env()
         bad_obs = {
             0: {