[RLlib] Fix all example scripts to run on GPUs. (#11105)

2025-03-05 18:11:42 -05:00 · 2020-10-02 23:07:44 +02:00 · 2020-10-02 23:07:44 +02:00 · c17169dc11
commit c17169dc11
parent 5a42ed1848
56 changed files with 221 additions and 98 deletions
--- a/rllib/agents/ddpg/apex.py
+++ b/rllib/agents/ddpg/apex.py
@ -27,14 +27,7 @@ APEX_DDPG_DEFAULT_CONFIG = DDPGTrainer.merge_trainer_configs(
    },
 )

-
-def validate_config(config):
-    if config.get("framework") == "tfe":
-        raise ValueError("APEX_DDPG does not support tf-eager yet!")
-
-
 ApexDDPGTrainer = DDPGTrainer.with_updates(
    name="APEX_DDPG",
    default_config=APEX_DDPG_DEFAULT_CONFIG,
-    validate_config=validate_config,
    execution_plan=apex_execution_plan)
--- a/rllib/agents/ppo/ppo_tf_policy.py
+++ b/rllib/agents/ppo/ppo_tf_policy.py
@ -251,7 +251,10 @@ class KLCoeffMixin:
        self.kl_coeff_val = config["kl_coeff"]
        # The current KL value (as tf Variable for in-graph operations).
        self.kl_coeff = get_variable(
-            float(self.kl_coeff_val), tf_name="kl_coeff", trainable=False)
+            float(self.kl_coeff_val),
+            tf_name="kl_coeff",
+            trainable=False,
+            framework=config["framework"])
        # Constant target value.
        self.kl_target = config["kl_target"]

--- a/rllib/agents/ppo/tests/test_ppo.py
+++ b/rllib/agents/ppo/tests/test_ppo.py
@ -37,7 +37,7 @@ FAKE_BATCH = {
 class TestPPO(unittest.TestCase):
    @classmethod
    def setUpClass(cls):
-        ray.init(local_mode=True)
+        ray.init()

    @classmethod
    def tearDownClass(cls):
--- a/rllib/agents/sac/tests/test_sac.py
+++ b/rllib/agents/sac/tests/test_sac.py
@ -166,7 +166,8 @@ class TestSAC(unittest.TestCase):

            # Set all weights (of all nets) to fixed values.
            if weights_dict is None:
-                assert fw in ["tf", "tfe"]  # Start with the tf vars-dict.
+                # Start with the tf vars-dict.
+                assert fw in ["tf2", "tf", "tfe"]
                weights_dict = policy.get_weights()
                if fw == "tfe":
                    log_alpha = weights_dict[10]
--- a/rllib/examples/attention_net.py
+++ b/rllib/examples/attention_net.py
@ -1,4 +1,5 @@
 import argparse
+import os

 import ray
 from ray import tune
@ -42,6 +43,8 @@ if __name__ == "__main__":
            "repeat_delay": 2,
        },
        "gamma": 0.99,
+        # Use GPUs iff `RLLIB_NUM_GPUS` env var set to > 0.
+        "num_gpus": int(os.environ.get("RLLIB_NUM_GPUS", 0)),
        "num_workers": 0,
        "num_envs_per_worker": 20,
        "entropy_coeff": 0.001,
--- a/rllib/examples/autoregressive_action_dist.py
+++ b/rllib/examples/autoregressive_action_dist.py
@ -11,6 +11,7 @@ This examples shows both.
 """

 import argparse
+import os

 import ray
 from ray import tune
@ -44,7 +45,8 @@ if __name__ == "__main__":
    config = {
        "env": CorrelatedActionsEnv,
        "gamma": 0.5,
-        "num_gpus": 0,
+        # Use GPUs iff `RLLIB_NUM_GPUS` env var set to > 0.
+        "num_gpus": int(os.environ.get("RLLIB_NUM_GPUS", "0")),
        "model": {
            "custom_model": "autoregressive_model",
            "custom_action_dist": "binary_autoreg_dist",
@ -58,7 +60,7 @@ if __name__ == "__main__":
        "episode_reward_mean": args.stop_reward,
    }

-    results = tune.run(args.run, stop=stop, config=config)
+    results = tune.run(args.run, stop=stop, config=config, verbose=1)

    if args.as_test:
        check_learning_achieved(results, args.stop_reward)
--- a/rllib/examples/batch_norm_model.py
+++ b/rllib/examples/batch_norm_model.py
@ -1,6 +1,7 @@
 """Example of using a custom model with batch norm."""

 import argparse
+import os

 import ray
 from ray import tune
@ -32,6 +33,8 @@ if __name__ == "__main__":
        "model": {
            "custom_model": "bn_model",
        },
+        # Use GPUs iff `RLLIB_NUM_GPUS` env var set to > 0.
+        "num_gpus": int(os.environ.get("RLLIB_NUM_GPUS", "0")),
        "num_workers": 0,
        "framework": "torch" if args.torch else "tf",
    }
@ -42,7 +45,7 @@ if __name__ == "__main__":
        "episode_reward_mean": args.stop_reward,
    }

-    results = tune.run(args.run, stop=stop, config=config)
+    results = tune.run(args.run, stop=stop, config=config, verbose=1)

    if args.as_test:
        check_learning_achieved(results, args.stop_reward)
--- a/rllib/examples/cartpole_lstm.py
+++ b/rllib/examples/cartpole_lstm.py
@ -1,4 +1,5 @@
 import argparse
+import os

 from ray.rllib.examples.env.stateless_cartpole import StatelessCartPole
 from ray.rllib.utils.test_utils import check_learning_achieved
@ -35,8 +36,11 @@ if __name__ == "__main__":
    }

    config = dict(
-        configs[args.run], **{
+        configs[args.run],
+        **{
            "env": StatelessCartPole,
+            # Use GPUs iff `RLLIB_NUM_GPUS` env var set to > 0.
+            "num_gpus": int(os.environ.get("RLLIB_NUM_GPUS", "0")),
            "model": {
                "use_lstm": True,
                "lstm_use_prev_action_reward": args.use_prev_action_reward,
--- a/rllib/examples/centralized_critic.py
+++ b/rllib/examples/centralized_critic.py
@ -16,6 +16,7 @@ modifies the environment.
 import argparse
 import numpy as np
 from gym.spaces import Discrete
+import os

 import ray
 from ray import tune
@ -90,7 +91,7 @@ def centralized_critic_postprocessing(policy,
                    sample_batch[OPPONENT_OBS], policy.device),
                convert_to_torch_tensor(
                    sample_batch[OPPONENT_ACTION], policy.device)) \
-                .detach().numpy()
+                .cpu().detach().numpy()
        else:
            sample_batch[SampleBatch.VF_PREDS] = policy.compute_central_vf(
                sample_batch[SampleBatch.CUR_OBS], sample_batch[OPPONENT_OBS],
@ -137,14 +138,22 @@ def loss_with_central_critic(policy, model, dist_class, train_batch):
    return loss


-def setup_mixins(policy, obs_space, action_space, config):
-    # copied from PPO
+def setup_tf_mixins(policy, obs_space, action_space, config):
+    # Copied from PPOTFPolicy (w/o ValueNetworkMixin).
    KLCoeffMixin.__init__(policy, config)
    EntropyCoeffSchedule.__init__(policy, config["entropy_coeff"],
                                  config["entropy_coeff_schedule"])
    LearningRateSchedule.__init__(policy, config["lr"], config["lr_schedule"])


+def setup_torch_mixins(policy, obs_space, action_space, config):
+    # Copied from PPOTorchPolicy  (w/o ValueNetworkMixin).
+    TorchKLCoeffMixin.__init__(policy, config)
+    TorchEntropyCoeffSchedule.__init__(policy, config["entropy_coeff"],
+                                       config["entropy_coeff_schedule"])
+    TorchLR.__init__(policy, config["lr"], config["lr_schedule"])
+
+
 def central_vf_stats(policy, train_batch, grads):
    # Report the explained variance of the central value function.
    return {
@ -158,7 +167,7 @@ CCPPOTFPolicy = PPOTFPolicy.with_updates(
    name="CCPPOTFPolicy",
    postprocess_fn=centralized_critic_postprocessing,
    loss_fn=loss_with_central_critic,
-    before_loss_init=setup_mixins,
+    before_loss_init=setup_tf_mixins,
    grad_stats_fn=central_vf_stats,
    mixins=[
        LearningRateSchedule, EntropyCoeffSchedule, KLCoeffMixin,
@ -169,7 +178,7 @@ CCPPOTorchPolicy = PPOTorchPolicy.with_updates(
    name="CCPPOTorchPolicy",
    postprocess_fn=centralized_critic_postprocessing,
    loss_fn=loss_with_central_critic,
-    before_init=setup_mixins,
+    before_init=setup_torch_mixins,
    mixins=[
        TorchLR, TorchEntropyCoeffSchedule, TorchKLCoeffMixin,
        CentralizedValueMixin
@ -188,7 +197,7 @@ CCTrainer = PPOTrainer.with_updates(
 )

 if __name__ == "__main__":
-    ray.init(local_mode=True)
+    ray.init()
    args = parser.parse_args()

    ModelCatalog.register_custom_model(
@ -198,6 +207,8 @@ if __name__ == "__main__":
    config = {
        "env": TwoStepGame,
        "batch_mode": "complete_episodes",
+        # Use GPUs iff `RLLIB_NUM_GPUS` env var set to > 0.
+        "num_gpus": int(os.environ.get("RLLIB_NUM_GPUS", "0")),
        "num_workers": 0,
        "multiagent": {
            "policies": {
@ -222,7 +233,7 @@ if __name__ == "__main__":
        "episode_reward_mean": args.stop_reward,
    }

-    results = tune.run(CCTrainer, config=config, stop=stop)
+    results = tune.run(CCTrainer, config=config, stop=stop, verbose=1)

    if args.as_test:
        check_learning_achieved(results, args.stop_reward)
--- a/rllib/examples/centralized_critic_2.py
+++ b/rllib/examples/centralized_critic_2.py
@ -12,6 +12,7 @@ modifies the policy to add a centralized value function.
 import numpy as np
 from gym.spaces import Dict, Discrete
 import argparse
+import os

 from ray import tune
 from ray.rllib.agents.callbacks import DefaultCallbacks
@ -87,6 +88,8 @@ if __name__ == "__main__":
        "env": TwoStepGame,
        "batch_mode": "complete_episodes",
        "callbacks": FillInActions,
+        # Use GPUs iff `RLLIB_NUM_GPUS` env var set to > 0.
+        "num_gpus": int(os.environ.get("RLLIB_NUM_GPUS", "0")),
        "num_workers": 0,
        "multiagent": {
            "policies": {
--- a/rllib/examples/complex_struct_space.py
+++ b/rllib/examples/complex_struct_space.py
@ -8,7 +8,9 @@ For PyTorch / TF eager mode, use the --torch and --eager flags.
 """

 import argparse
+import os

+import ray
 from ray import tune
 from ray.rllib.models import ModelCatalog
 from ray.rllib.examples.env.simple_rpg import SimpleRPG
@ -17,9 +19,10 @@ from ray.rllib.examples.models.simple_rpg_model import CustomTorchRPGModel, \

 parser = argparse.ArgumentParser()
 parser.add_argument(
-    "--framework", choices=["tf", "tfe", "torch"], default="tf")
+    "--framework", choices=["tf2", "tf", "tfe", "torch"], default="tf2")

 if __name__ == "__main__":
+    ray.init()
    args = parser.parse_args()
    if args.framework == "torch":
        ModelCatalog.register_custom_model("my_model", CustomTorchRPGModel)
@ -31,6 +34,8 @@ if __name__ == "__main__":
        "env": SimpleRPG,
        "rollout_fragment_length": 1,
        "train_batch_size": 2,
+        # Use GPUs iff `RLLIB_NUM_GPUS` env var set to > 0.
+        "num_gpus": int(os.environ.get("RLLIB_NUM_GPUS", "0")),
        "num_workers": 0,
        "model": {
            "custom_model": "my_model",
--- a/rllib/examples/custom_env.py
+++ b/rllib/examples/custom_env.py
@ -11,6 +11,7 @@ import argparse
 import gym
 from gym.spaces import Discrete, Box
 import numpy as np
+import os

 import ray
 from ray import tune
@ -114,6 +115,8 @@ if __name__ == "__main__":
        "env_config": {
            "corridor_length": 5,
        },
+        # Use GPUs iff `RLLIB_NUM_GPUS` env var set to > 0.
+        "num_gpus": int(os.environ.get("RLLIB_NUM_GPUS", "0")),
        "model": {
            "custom_model": "my_model",
        },
--- a/rllib/examples/custom_eval.py
+++ b/rllib/examples/custom_eval.py
@ -67,6 +67,7 @@ Result for PG_SimpleCorridor_0de4e686:
 """

 import argparse
+import os

 import ray
 from ray import tune
@ -137,7 +138,9 @@ if __name__ == "__main__":
            "corridor_length": 10,
        },
        "horizon": 20,
-        "log_level": "INFO",
+
+        # Use GPUs iff `RLLIB_NUM_GPUS` env var set to > 0.
+        "num_gpus": int(os.environ.get("RLLIB_NUM_GPUS", "0")),

        # Training rollouts will be collected using just the learner
        # process, but evaluation will be done in parallel with two
--- a/rllib/examples/custom_fast_model.py
+++ b/rllib/examples/custom_fast_model.py
@ -5,6 +5,7 @@ for running perf microbenchmarks.
 """

 import argparse
+import os

 import ray
 import ray.tune as tune
@ -32,7 +33,8 @@ if __name__ == "__main__":
        "model": {
            "custom_model": "fast_model"
        },
-        "num_gpus": 0,
+        # Use GPUs iff `RLLIB_NUM_GPUS` env var set to > 0.
+        "num_gpus": int(os.environ.get("RLLIB_NUM_GPUS", "0")),
        "num_workers": 2,
        "num_envs_per_worker": 10,
        "num_data_loader_buffers": 1,
@ -40,7 +42,7 @@ if __name__ == "__main__":
        "broadcast_interval": 50,
        "rollout_fragment_length": 100,
        "train_batch_size": sample_from(
-            lambda spec: 1000 * max(1, spec.config.num_gpus)),
+            lambda spec: 1000 * max(1, spec.config.num_gpus or 1)),
        "fake_sampler": True,
        "framework": "torch" if args.torch else "tf",
    }
@ -50,6 +52,6 @@ if __name__ == "__main__":
        "timesteps_total": args.stop_timesteps,
    }

-    tune.run("IMPALA", config=config, stop=stop)
+    tune.run("IMPALA", config=config, stop=stop, verbose=1)

    ray.shutdown()
--- a/rllib/examples/custom_keras_model.py
+++ b/rllib/examples/custom_keras_model.py
@ -1,6 +1,7 @@
 """Example of using a custom ModelV2 Keras-style model."""

 import argparse
+import os

 import ray
 from ray import tune
@ -119,11 +120,12 @@ if __name__ == "__main__":
        args.run,
        stop={"episode_reward_mean": args.stop},
        config=dict(
-            extra_config, **{
-                "log_level": "INFO",
+            extra_config,
+            **{
                "env": "BreakoutNoFrameskip-v4"
                if args.use_vision_network else "CartPole-v0",
-                "num_gpus": 0,
+                # Use GPUs iff `RLLIB_NUM_GPUS` env var set to > 0.
+                "num_gpus": int(os.environ.get("RLLIB_NUM_GPUS", "0")),
                "callbacks": {
                    "on_train_result": check_has_custom_metric,
                },
--- a/rllib/examples/custom_loss.py
+++ b/rllib/examples/custom_loss.py
@ -50,6 +50,8 @@ if __name__ == "__main__":

    config = {
        "env": "CartPole-v0",
+        # Use GPUs iff `RLLIB_NUM_GPUS` env var set to > 0.
+        "num_gpus": int(os.environ.get("RLLIB_NUM_GPUS", "0")),
        "num_workers": 0,
        "model": {
            "custom_model": "custom_loss",
@ -64,4 +66,4 @@ if __name__ == "__main__":
        "training_iteration": args.stop_iters,
    }

-    tune.run("PG", config=config, stop=stop)
+    tune.run("PG", config=config, stop=stop, verbose=1)
--- a/rllib/examples/custom_metrics_and_callbacks.py
+++ b/rllib/examples/custom_metrics_and_callbacks.py
@ -7,14 +7,19 @@ custom metric.
 from typing import Dict
 import argparse
 import numpy as np
+import os

 import ray
 from ray import tune
+from ray.rllib.agents.callbacks import DefaultCallbacks
 from ray.rllib.env import BaseEnv
+from ray.rllib.evaluation import MultiAgentEpisode, RolloutWorker
 from ray.rllib.policy import Policy
 from ray.rllib.policy.sample_batch import SampleBatch
-from ray.rllib.evaluation import MultiAgentEpisode, RolloutWorker
-from ray.rllib.agents.callbacks import DefaultCallbacks
+
+parser = argparse.ArgumentParser()
+parser.add_argument("--torch", action="store_true")
+parser.add_argument("--stop-iters", type=int, default=2000)


 class MyCallbacks(DefaultCallbacks):
@ -65,8 +70,6 @@ class MyCallbacks(DefaultCallbacks):


 if __name__ == "__main__":
-    parser = argparse.ArgumentParser()
-    parser.add_argument("--stop-iters", type=int, default=2000)
    args = parser.parse_args()

    ray.init()
@ -79,7 +82,9 @@ if __name__ == "__main__":
            "env": "CartPole-v0",
            "num_envs_per_worker": 2,
            "callbacks": MyCallbacks,
-            "framework": "tf",
+            "framework": "torch" if args.torch else "tf",
+            # Use GPUs iff `RLLIB_NUM_GPUS` env var set to > 0.
+            "num_gpus": int(os.environ.get("RLLIB_NUM_GPUS", "0")),
        }).trials

    # verify custom metrics for integration tests
--- a/rllib/examples/custom_metrics_and_callbacks_legacy.py
+++ b/rllib/examples/custom_metrics_and_callbacks_legacy.py
@ -2,6 +2,7 @@

 import argparse
 import numpy as np
+import os

 import ray
 from ray import tune
@ -73,6 +74,8 @@ if __name__ == "__main__":
                "on_postprocess_traj": on_postprocess_traj,
            },
            "framework": "tf",
+            # Use GPUs iff `RLLIB_NUM_GPUS` env var set to > 0.
+            "num_gpus": int(os.environ.get("RLLIB_NUM_GPUS", "0")),
        }).trials

    # verify custom metrics for integration tests
--- a/rllib/examples/custom_rnn_model.py
+++ b/rllib/examples/custom_rnn_model.py
@ -1,6 +1,7 @@
 """Example of using a custom RNN keras model."""

 import argparse
+import os

 import ray
 from ray import tune
@ -37,6 +38,8 @@ if __name__ == "__main__":
            "repeat_delay": 2,
        },
        "gamma": 0.9,
+        # Use GPUs iff `RLLIB_NUM_GPUS` env var set to > 0.
+        "num_gpus": int(os.environ.get("RLLIB_NUM_GPUS", "0")),
        "num_workers": 0,
        "num_envs_per_worker": 20,
        "entropy_coeff": 0.001,
--- a/rllib/examples/custom_tf_policy.py
+++ b/rllib/examples/custom_tf_policy.py
@ -1,4 +1,5 @@
 import argparse
+import os

 import ray
 from ray import tune
@ -50,6 +51,8 @@ if __name__ == "__main__":
        stop={"training_iteration": args.stop_iters},
        config={
            "env": "CartPole-v0",
+            # Use GPUs iff `RLLIB_NUM_GPUS` env var set to > 0.
+            "num_gpus": int(os.environ.get("RLLIB_NUM_GPUS", "0")),
            "num_workers": 2,
            "framework": "tf",
        })
--- a/rllib/examples/custom_torch_policy.py
+++ b/rllib/examples/custom_torch_policy.py
@ -1,4 +1,5 @@
 import argparse
+import os

 import ray
 from ray import tune
@ -36,6 +37,8 @@ if __name__ == "__main__":
        stop={"training_iteration": args.stop_iters},
        config={
            "env": "CartPole-v0",
+            # Use GPUs iff `RLLIB_NUM_GPUS` env var set to > 0.
+            "num_gpus": int(os.environ.get("RLLIB_NUM_GPUS", "0")),
            "num_workers": 2,
            "framework": "torch",
        })
--- a/rllib/examples/custom_train_fn.py
+++ b/rllib/examples/custom_train_fn.py
@ -6,6 +6,7 @@ This example shows:
 You can visualize experiment results in ~/ray_results using TensorBoard.
 """
 import argparse
+import os

 import ray
 from ray import tune
@ -43,6 +44,8 @@ if __name__ == "__main__":
    args = parser.parse_args()
    config = {
        "lr": 0.01,
+        # Use GPUs iff `RLLIB_NUM_GPUS` env var set to > 0.
+        "num_gpus": int(os.environ.get("RLLIB_NUM_GPUS", "0")),
        "num_workers": 0,
        "framework": "torch" if args.torch else "tf",
    }
--- a/rllib/examples/eager_execution.py
+++ b/rllib/examples/eager_execution.py
@ -1,4 +1,5 @@
 import argparse
+import os
 import random

 import ray
@ -58,12 +59,14 @@ MyTrainer = build_trainer(
 )

 if __name__ == "__main__":
-    ray.init()
+    ray.init(local_mode=True)
    args = parser.parse_args()
    ModelCatalog.register_custom_model("eager_model", EagerModel)

    config = {
        "env": "CartPole-v0",
+        # Use GPUs iff `RLLIB_NUM_GPUS` env var set to > 0.
+        "num_gpus": int(os.environ.get("RLLIB_NUM_GPUS", "0")),
        "num_workers": 0,
        "model": {
            "custom_model": "eager_model"
@ -76,7 +79,7 @@ if __name__ == "__main__":
        "episode_reward_mean": args.stop_reward,
    }

-    results = tune.run(MyTrainer, stop=stop, config=config)
+    results = tune.run(MyTrainer, stop=stop, config=config, verbose=1)

    if args.as_test:
        check_learning_achieved(results, args.stop_reward)
--- a/rllib/examples/hierarchical_training.py
+++ b/rllib/examples/hierarchical_training.py
@ -25,6 +25,7 @@ using --flat in this example.
 import argparse
 from gym.spaces import Discrete, Tuple
 import logging
+import os

 import ray
 from ray import tune
@ -75,7 +76,6 @@ if __name__ == "__main__":
        config = {
            "env": HierarchicalWindyMazeEnv,
            "num_workers": 0,
-            "log_level": "INFO",
            "entropy_coeff": 0.01,
            "multiagent": {
                "policies": {
@ -94,6 +94,8 @@ if __name__ == "__main__":
                "policy_mapping_fn": function(policy_mapping_fn),
            },
            "framework": "torch" if args.torch else "tf",
+            # Use GPUs iff `RLLIB_NUM_GPUS` env var set to > 0.
+            "num_gpus": int(os.environ.get("RLLIB_NUM_GPUS", "0")),
        }

        results = tune.run("PPO", stop=stop, config=config, verbose=1)
--- a/rllib/examples/mobilenet_v2_with_lstm.py
+++ b/rllib/examples/mobilenet_v2_with_lstm.py
@ -5,8 +5,9 @@
 import argparse
 from gym.spaces import Discrete, Box
 import numpy as np
+import os

-from ray.rllib.agents.ppo import PPOTrainer
+from ray import tune
 from ray.rllib.examples.env.random_env import RandomEnv
 from ray.rllib.examples.models.mobilenet_v2_with_lstm_models import \
    MobileV2PlusRNNModel, TorchMobileV2PlusRNNModel
@ -21,6 +22,9 @@ cnn_shape_torch = (3, 224, 224)

 parser = argparse.ArgumentParser()
 parser.add_argument("--torch", action="store_true")
+parser.add_argument("--stop-iters", type=int, default=200)
+parser.add_argument("--stop-reward", type=float, default=0.0)
+parser.add_argument("--stop-timesteps", type=int, default=100000)

 if __name__ == "__main__":
    args = parser.parse_args()
@ -30,8 +34,15 @@ if __name__ == "__main__":
        "my_model", TorchMobileV2PlusRNNModel
        if args.torch else MobileV2PlusRNNModel)

+    stop = {
+        "training_iteration": args.stop_iters,
+        "timesteps_total": args.stop_timesteps,
+        "episode_reward_mean": args.stop_reward,
+    }
+
    # Configure our Trainer.
    config = {
+        "env": RandomEnv,
        "framework": "torch" if args.torch else "tf",
        "model": {
            "custom_model": "my_model",
@ -42,6 +53,8 @@ if __name__ == "__main__":
            "max_seq_len": 20,
        },
        "vf_share_layers": True,
+        # Use GPUs iff `RLLIB_NUM_GPUS` env var set to > 0.
+        "num_gpus": int(os.environ.get("RLLIB_NUM_GPUS", "0")),
        "num_workers": 0,  # no parallelism
        "env_config": {
            "action_space": Discrete(2),
@ -54,5 +67,4 @@ if __name__ == "__main__":
        },
    }

-    trainer = PPOTrainer(config=config, env=RandomEnv)
-    print(trainer.train())
+    tune.run("PPO", config=config, stop=stop, verbose=1)
--- a/rllib/examples/models/autoregressive_action_dist.py
+++ b/rllib/examples/models/autoregressive_action_dist.py
@ -131,8 +131,8 @@ class TorchBinaryAutoregressiveDistribution(TorchDistributionWrapper):

    def _a1_distribution(self):
        BATCH = self.inputs.shape[0]
-        a1_logits, _ = self.model.action_module(self.inputs,
-                                                torch.zeros((BATCH, 1)))
+        zeros = torch.zeros((BATCH, 1)).to(self.inputs.device)
+        a1_logits, _ = self.model.action_module(self.inputs, zeros)
        a1_dist = TorchCategorical(a1_logits)
        return a1_dist

--- a/rllib/examples/models/custom_loss_model.py
+++ b/rllib/examples/models/custom_loss_model.py
@ -116,7 +116,7 @@ class TorchCustomLossModel(TorchModelV2, nn.Module):

        # Define a secondary loss by building a graph copy with weight sharing.
        obs = restore_original_dimensions(
-            torch.from_numpy(batch["obs"]).float(),
+            torch.from_numpy(batch["obs"]).float().to(policy_loss[0].device),
            self.obs_space,
            tensorlib="torch")
        logits, _ = self.forward({"obs": obs}, [], None)
@ -130,8 +130,8 @@ class TorchCustomLossModel(TorchModelV2, nn.Module):

        # Compute the IL loss.
        action_dist = TorchCategorical(logits, self.model_config)
-        imitation_loss = torch.mean(
-            -action_dist.logp(torch.from_numpy(batch["actions"])))
+        imitation_loss = torch.mean(-action_dist.logp(
+            torch.from_numpy(batch["actions"]).to(policy_loss[0].device)))
        self.imitation_loss_metric = imitation_loss.item()
        self.policy_loss_metric = np.mean([l.item() for l in policy_loss])

--- a/rllib/examples/models/fast_model.py
+++ b/rllib/examples/models/fast_model.py
@ -57,8 +57,8 @@ class TorchFastModel(TorchModelV2, nn.Module):
                              model_config, name)
        nn.Module.__init__(self)

-        self.bias = torch.tensor(
-            [0.0], dtype=torch.float32, requires_grad=True)
+        self.bias = nn.Parameter(
+            torch.tensor([0.0], dtype=torch.float32, requires_grad=True))

        # Only needed to give some params to the optimizer (even though,
        # they are never used anywhere).
@ -67,8 +67,9 @@ class TorchFastModel(TorchModelV2, nn.Module):

    @override(ModelV2)
    def forward(self, input_dict, state, seq_lens):
-        self._output = self.bias + \
-            torch.zeros(size=(input_dict["obs"].shape[0], self.num_outputs))
+        self._output = self.bias + torch.zeros(
+            size=(input_dict["obs"].shape[0], self.num_outputs)).to(
+                self.bias.device)
        return self._output, []

    @override(ModelV2)
--- a/rllib/examples/models/mobilenet_v2_with_lstm_models.py
+++ b/rllib/examples/models/mobilenet_v2_with_lstm_models.py
@ -89,14 +89,15 @@ class MobileV2PlusRNNModel(RecurrentNetwork):
        return tf.reshape(self._value_out, [-1])


-class TorchMobileV2PlusRNNModel(TorchRNN):
+class TorchMobileV2PlusRNNModel(TorchRNN, nn.Module):
    """A conv. + recurrent torch net example using a pre-trained MobileNet."""

    def __init__(self, obs_space, action_space, num_outputs, model_config,
                 name, cnn_shape):

-        super().__init__(obs_space, action_space, num_outputs, model_config,
-                         name)
+        TorchRNN.__init__(self, obs_space, action_space, num_outputs,
+                          model_config, name)
+        nn.Module.__init__(self)

        self.lstm_state_size = 16
        self.cnn_shape = list(cnn_shape)
--- a/rllib/examples/models/shared_weights_model.py
+++ b/rllib/examples/models/shared_weights_model.py
@ -125,12 +125,13 @@ class TorchSharedWeightsModel(TorchModelV2, nn.Module):
            activation_fn=None,
            initializer=torch.nn.init.xavier_uniform_,
        )
+        self._global_shared_layer = TORCH_GLOBAL_SHARED_LAYER
        self._output = None

    @override(ModelV2)
    def forward(self, input_dict, state, seq_lens):
        out = self.first_layer(input_dict["obs"])
-        self._output = TORCH_GLOBAL_SHARED_LAYER(out)
+        self._output = self._global_shared_layer(out)
        model_out = self.last_layer(self._output)
        return model_out, []

--- a/rllib/examples/multi_agent_cartpole.py
+++ b/rllib/examples/multi_agent_cartpole.py
@ -11,6 +11,7 @@ execution, set the TF_TIMELINE_DIR environment variable.

 import argparse
 import gym
+import os
 import random

 import ray
@ -75,6 +76,8 @@ if __name__ == "__main__":
            "num_agents": args.num_agents,
        },
        "simple_optimizer": args.simple,
+        # Use GPUs iff `RLLIB_NUM_GPUS` env var set to > 0.
+        "num_gpus": int(os.environ.get("RLLIB_NUM_GPUS", "0")),
        "num_sgd_iter": 10,
        "multiagent": {
            "policies": policies,
--- a/rllib/examples/multi_agent_custom_policy.py
+++ b/rllib/examples/multi_agent_custom_policy.py
@ -15,6 +15,7 @@ Result for PG_multi_cartpole_0:

 import argparse
 import gym
+import os

 import ray
 from ray import tune
@ -60,6 +61,8 @@ if __name__ == "__main__":
                lambda agent_id: ["pg_policy", "random"][agent_id % 2]),
        },
        "framework": "torch" if args.torch else "tf",
+        # Use GPUs iff `RLLIB_NUM_GPUS` env var set to > 0.
+        "num_gpus": int(os.environ.get("RLLIB_NUM_GPUS", "0")),
    }

    results = tune.run("PG", config=config, stop=stop, verbose=1)
--- a/rllib/examples/multi_agent_two_trainers.py
+++ b/rllib/examples/multi_agent_two_trainers.py
@ -10,6 +10,7 @@ For a simpler example, see also: multiagent_cartpole.py

 import argparse
 import gym
+import os

 import ray
 from ray.rllib.agents.dqn import DQNTrainer, DQNTFPolicy, DQNTorchPolicy
@ -38,9 +39,9 @@ if __name__ == "__main__":
    # Simple environment with 4 independent cartpole entities
    register_env("multi_agent_cartpole",
                 lambda _: MultiAgentCartPole({"num_agents": 4}))
-    single_env = gym.make("CartPole-v0")
-    obs_space = single_env.observation_space
-    act_space = single_env.action_space
+    single_dummy_env = gym.make("CartPole-v0")
+    obs_space = single_dummy_env.observation_space
+    act_space = single_dummy_env.action_space

    # You can also have multiple policies per trainer, but here we just
    # show one each for PPO and DQN.
@ -69,6 +70,8 @@ if __name__ == "__main__":
            # disable filters, otherwise we would need to synchronize those
            # as well to the DQN agent
            "observation_filter": "NoFilter",
+            # Use GPUs iff `RLLIB_NUM_GPUS` env var set to > 0.
+            "num_gpus": int(os.environ.get("RLLIB_NUM_GPUS", "0")),
            "framework": "torch" if args.torch else "tf",
        })

@ -82,6 +85,8 @@ if __name__ == "__main__":
            },
            "gamma": 0.95,
            "n_step": 3,
+            # Use GPUs iff `RLLIB_NUM_GPUS` env var set to > 0.
+            "num_gpus": int(os.environ.get("RLLIB_NUM_GPUS", "0")),
            "framework": "torch" if args.torch or args.mixed_torch_tf else "tf"
        })

--- a/rllib/examples/nested_action_spaces.py
+++ b/rllib/examples/nested_action_spaces.py
@ -1,5 +1,6 @@
 import argparse
 from gym.spaces import Dict, Tuple, Box, Discrete
+import os

 import ray
 import ray.tune as tune
@ -40,6 +41,8 @@ if __name__ == "__main__":
        "gamma": 0.0,  # No history in Env (bandit problem).
        "lr": 0.0005,
        "num_envs_per_worker": 20,
+        # Use GPUs iff `RLLIB_NUM_GPUS` env var set to > 0.
+        "num_gpus": int(os.environ.get("RLLIB_NUM_GPUS", "0")),
        "num_sgd_iter": 4,
        "num_workers": 0,
        "vf_loss_coeff": 0.01,
--- a/rllib/examples/parametric_actions_cartpole.py
+++ b/rllib/examples/parametric_actions_cartpole.py
@ -15,6 +15,7 @@ Working configurations are given below.
 """

 import argparse
+import os

 import ray
 from ray import tune
@ -55,14 +56,18 @@ if __name__ == "__main__":
    else:
        cfg = {}

-    config = dict({
-        "env": "pa_cartpole",
-        "model": {
-            "custom_model": "pa_model",
+    config = dict(
+        {
+            "env": "pa_cartpole",
+            "model": {
+                "custom_model": "pa_model",
+            },
+            # Use GPUs iff `RLLIB_NUM_GPUS` env var set to > 0.
+            "num_gpus": int(os.environ.get("RLLIB_NUM_GPUS", "0")),
+            "num_workers": 0,
+            "framework": "torch" if args.torch else "tf",
        },
-        "num_workers": 0,
-        "framework": "torch" if args.torch else "tf",
-    }, **cfg)
+        **cfg)

    stop = {
        "training_iteration": args.stop_iters,
--- a/rllib/examples/pettingzoo_env.py
+++ b/rllib/examples/pettingzoo_env.py
@ -1,15 +1,13 @@
 from copy import deepcopy
-import ray
-try:
-    from ray.rllib.agents.agent import get_agent_class
-except ImportError:
-    from ray.rllib.agents.registry import get_agent_class
-from ray.tune.registry import register_env
-from ray.rllib.env import PettingZooEnv
+from numpy import float32
+import os
 from pettingzoo.butterfly import pistonball_v0
 from supersuit import normalize_obs_v0, dtype_v0, color_reduction_v0

-from numpy import float32
+import ray
+from ray.rllib.agents.registry import get_agent_class
+from ray.rllib.env import PettingZooEnv
+from ray.tune.registry import register_env

 if __name__ == "__main__":
    """For this script, you need:
@ -37,7 +35,7 @@ if __name__ == "__main__":
    config = deepcopy(get_agent_class(alg_name)._default_config)

    # 2. Set environment config. This will be passed to
-    # the env_creator function via the register env lambda below
+    # the env_creator function via the register env lambda below.
    config["env_config"] = {"local_ratio": 0.5}

    # 3. Register env
@ -58,6 +56,8 @@ if __name__ == "__main__":
        "policy_mapping_fn": lambda agent_id: "av"
    }

+    # Use GPUs iff `RLLIB_NUM_GPUS` env var set to > 0.
+    config["num_gpus"] = int(os.environ.get("RLLIB_NUM_GPUS", "0"))
    config["log_level"] = "DEBUG"
    config["num_workers"] = 1
    # Fragment length, collected at once from each worker and for each agent!
--- a/rllib/examples/rock_paper_scissors_multiagent.py
+++ b/rllib/examples/rock_paper_scissors_multiagent.py
@ -9,6 +9,7 @@ This demonstrates running the following policies in competition:

 import argparse
 from gym.spaces import Discrete
+import os
 import random

 from ray import tune
@ -63,6 +64,8 @@ def run_heuristic_vs_learned(args, use_lstm=False, trainer="PG"):
    config = {
        "env": RockPaperScissors,
        "gamma": 0.9,
+        # Use GPUs iff `RLLIB_NUM_GPUS` env var set to > 0.
+        "num_gpus": int(os.environ.get("RLLIB_NUM_GPUS", "0")),
        "num_workers": 0,
        "num_envs_per_worker": 4,
        "rollout_fragment_length": 10,
--- a/rllib/examples/rollout_worker_custom_workflow.py
+++ b/rllib/examples/rollout_worker_custom_workflow.py
@ -8,6 +8,7 @@ collection and policy optimization.
 import argparse
 import gym
 import numpy as np
+import os

 import ray
 from ray import tune
@ -32,6 +33,7 @@ class CustomPolicy(Policy):

    def __init__(self, observation_space, action_space, config):
        super().__init__(observation_space, action_space, config)
+        self.config["framework"] = None
        # example parameter
        self.w = 1.0

@ -107,7 +109,8 @@ if __name__ == "__main__":
    tune.run(
        training_workflow,
        resources_per_trial={
-            "gpu": 1 if args.gpu else 0,
+            "gpu": 1 if args.gpu
+            or int(os.environ.get("RLLIB_FORCE_NUM_GPUS", 0)) else 0,
            "cpu": 1,
            "extra_cpu": args.num_workers,
        },
@ -115,4 +118,5 @@ if __name__ == "__main__":
            "num_workers": args.num_workers,
            "num_iters": args.num_iters,
        },
+        verbose=1,
    )
--- a/rllib/examples/serving/unity3d_server.py
+++ b/rllib/examples/serving/unity3d_server.py
@ -69,7 +69,7 @@ parser.add_argument(

 if __name__ == "__main__":
    args = parser.parse_args()
-    ray.init(local_mode=True)
+    ray.init()

    # Create a fake-env for the server. This env will never be used (neither
    # for sampling, nor for evaluation) and its obs/action Spaces do not
--- a/rllib/examples/two_step_game.py
+++ b/rllib/examples/two_step_game.py
@ -10,6 +10,7 @@ See also: centralized_critic.py for centralized critic PPO on this game.

 import argparse
 from gym.spaces import Tuple, MultiDiscrete, Dict, Discrete
+import os

 import ray
 from ray import tune
@ -77,6 +78,8 @@ if __name__ == "__main__":
                "policy_mapping_fn": lambda x: "pol1" if x == 0 else "pol2",
            },
            "framework": "torch" if args.torch else "tf",
+            # Use GPUs iff `RLLIB_NUM_GPUS` env var set to > 0.
+            "num_gpus": int(os.environ.get("RLLIB_NUM_GPUS", "0")),
        }
        group = False
    elif args.run == "QMIX":
@ -93,11 +96,17 @@ if __name__ == "__main__":
                "separate_state_space": True,
                "one_hot_state_encoding": True
            },
+            # Use GPUs iff `RLLIB_NUM_GPUS` env var set to > 0.
+            "num_gpus": int(os.environ.get("RLLIB_NUM_GPUS", "0")),
            "framework": "torch" if args.torch else "tf",
        }
        group = True
    else:
-        config = {"framework": "torch" if args.torch else "tf"}
+        config = {
+            # Use GPUs iff `RLLIB_NUM_GPUS` env var set to > 0.
+            "num_gpus": int(os.environ.get("RLLIB_NUM_GPUS", "0")),
+            "framework": "torch" if args.torch else "tf",
+        }
        group = False

    ray.init(num_cpus=args.num_cpus or None)
--- a/rllib/examples/two_trainer_workflow.py
+++ b/rllib/examples/two_trainer_workflow.py
@ -7,6 +7,7 @@ via a custom training workflow.

 import argparse
 import gym
+import os

 import ray
 from ray import tune
@ -139,6 +140,8 @@ if __name__ == "__main__":
            "policy_mapping_fn": policy_mapping_fn,
            "policies_to_train": ["dqn_policy", "ppo_policy"],
        },
+        # Use GPUs iff `RLLIB_NUM_GPUS` env var set to > 0.
+        "num_gpus": int(os.environ.get("RLLIB_NUM_GPUS", "0")),
        "framework": "torch" if args.torch else "tf",
    }

--- a/rllib/examples/unity3d_env_local.py
+++ b/rllib/examples/unity3d_env_local.py
@ -21,6 +21,7 @@ $ python unity3d_env_local.py --env 3DBall --stop-reward [..] [--torch]?
 """

 import argparse
+import os

 import ray
 from ray import tune
@ -99,6 +100,8 @@ if __name__ == "__main__":
        "gamma": 0.99,
        "sgd_minibatch_size": 256,
        "train_batch_size": 4000,
+        # Use GPUs iff `RLLIB_NUM_GPUS` env var set to > 0.
+        "num_gpus": int(os.environ.get("RLLIB_NUM_GPUS", "0")),
        "num_sgd_iter": 20,
        "rollout_fragment_length": 200,
        "clip_param": 0.2,
--- a/rllib/models/catalog.py
+++ b/rllib/models/catalog.py
@ -307,7 +307,7 @@ class ModelCatalog:
            model_cls = ModelCatalog._wrap_if_needed(model_cls,
                                                     model_interface)

-            if framework in ["tf", "tfe"]:
+            if framework in ["tf2", "tf", "tfe"]:
                # Track and warn if vars were created but not registered.
                created = set()

--- a/rllib/models/tests/test_distributions.py
+++ b/rllib/models/tests/test_distributions.py
@ -423,7 +423,7 @@ class TestDistributions(unittest.TestCase):
    def test_gumbel_softmax(self):
        """Tests the GumbelSoftmax ActionDistribution (tf + eager only)."""
        for fw, sess in framework_iterator(
-                frameworks=["tf", "tfe"], session=True):
+                frameworks=("tf2", "tf", "tfe"), session=True):
            batch_size = 1000
            num_categories = 5
            input_space = Box(-1.0, 1.0, shape=(batch_size, num_categories))
--- a/rllib/policy/eager_tf_policy.py
+++ b/rllib/policy/eager_tf_policy.py
@ -200,7 +200,7 @@ def build_eager_tf_policy(name,
    class eager_policy_cls(base):
        def __init__(self, observation_space, action_space, config):
            assert tf.executing_eagerly()
-            self.framework = "tfe"
+            self.framework = config.get("framework", "tfe")
            Policy.__init__(self, observation_space, action_space, config)
            self._is_training = False
            self._loss_initialized = False
--- a/rllib/policy/sample_batch.py
+++ b/rllib/policy/sample_batch.py
@ -88,7 +88,7 @@ class SampleBatch:

    @staticmethod
    @PublicAPI
-    def concat_samples(samples: List[Dict[str, TensorType]]) -> \
+    def concat_samples(samples: List["SampleBatch"]) -> \
            Union["SampleBatch", "MultiAgentBatch"]:
        """Concatenates n data dicts or MultiAgentBatches.

--- a/rllib/policy/torch_policy.py
+++ b/rllib/policy/torch_policy.py
@ -392,7 +392,7 @@ class TorchPolicy(Policy):

                grad_info["allreduce_latency"] += time.time() - start

-        # Step the optimizer
+        # Step the optimizers.
        for i, opt in enumerate(self._optimizers):
            opt.step()

--- a/rllib/policy/torch_policy_template.py
+++ b/rllib/policy/torch_policy_template.py
@ -66,7 +66,8 @@ def build_torch_policy(
        mixins: Optional[List[type]] = None,
        view_requirements_fn: Optional[Callable[[], Dict[
            str, ViewRequirement]]] = None,
-        get_batch_divisibility_req: Optional[Callable[[Policy], int]] = None):
+        get_batch_divisibility_req: Optional[Callable[[Policy], int]] = None
+) -> Type[TorchPolicy]:
    """Helper function for creating a torch policy class at runtime.

    Args:
@ -167,7 +168,8 @@ def build_torch_policy(
            sample batches. If None, will assume a value of 1.

    Returns:
-        type: TorchPolicy child class constructed from the specified args.
+        Type[TorchPolicy]: TorchPolicy child class constructed from the
+            specified args.
    """

    original_kwargs = locals().copy()
--- a/rllib/tests/test_checkpoint_restore.py
+++ b/rllib/tests/test_checkpoint_restore.py
@ -96,7 +96,7 @@ def ckpt_restore_test(alg_name, tfe=False):
            if optim_state:
                s2 = alg2.get_policy().get_state().get("_optimizer_variables")
                # Tf -> Compare states 1:1.
-                if fw in ["tf", "tfe"]:
+                if fw in ["tf2", "tf", "tfe"]:
                    check(s2, optim_state)
                # For torch, optimizers have state_dicts with keys=params,
                # which are different for the two models (ignore these
--- a/rllib/utils/init.py
+++ b/rllib/utils/init.py
@ -59,22 +59,22 @@ __all__ = [
    "add_mixins",
    "check",
    "check_compute_single_action",
+    "deep_update",
    "deprecation_warning",
    "fc",
    "force_list",
    "force_tuple",
    "framework_iterator",
    "lstm",
-    "one_hot",
-    "relu",
-    "sigmoid",
-    "softmax",
-    "deep_update",
    "merge_dicts",
+    "one_hot",
    "override",
+    "relu",
    "renamed_function",
    "renamed_agent",
    "renamed_class",
+    "sigmoid",
+    "softmax",
    "try_import_tf",
    "try_import_tfp",
    "try_import_torch",
--- a/rllib/utils/exploration/curiosity.py
+++ b/rllib/utils/exploration/curiosity.py
@ -211,7 +211,7 @@ class Curiosity(Exploration):
        })
        phi, next_phi = torch.chunk(phis, 2)
        actions_tensor = torch.from_numpy(
-            sample_batch[SampleBatch.ACTIONS]).long()
+            sample_batch[SampleBatch.ACTIONS]).long().to(policy.device)

        # Predict next phi with forward model.
        predicted_next_phi = self.model._curiosity_forward_fcnet(
--- a/rllib/utils/exploration/epsilon_greedy.py
+++ b/rllib/utils/exploration/epsilon_greedy.py
@ -57,7 +57,7 @@ class EpsilonGreedy(Exploration):
            0, framework=framework, tf_name="timestep")

        # Build the tf-info-op.
-        if self.framework in ["tf", "tfe"]:
+        if self.framework in ["tf2", "tf", "tfe"]:
            self._tf_info_op = self.get_info()

    @override(Exploration)
@ -68,7 +68,7 @@ class EpsilonGreedy(Exploration):
                               explore: bool = True):

        q_values = action_distribution.inputs
-        if self.framework in ["tf", "tfe"]:
+        if self.framework in ["tf2", "tf", "tfe"]:
            return self._get_tf_exploration_action_op(q_values, explore,
                                                      timestep)
        else:
--- a/rllib/utils/exploration/gaussian_noise.py
+++ b/rllib/utils/exploration/gaussian_noise.py
@ -72,7 +72,7 @@ class GaussianNoise(Exploration):
            0, framework=self.framework, tf_name="timestep")

        # Build the tf-info-op.
-        if self.framework in ["tf", "tfe"]:
+        if self.framework in ["tf2", "tf", "tfe"]:
            self._tf_info_op = self.get_info()

    @override(Exploration)
--- a/rllib/utils/exploration/parameter_noise.py
+++ b/rllib/utils/exploration/parameter_noise.py
@ -291,7 +291,7 @@ class ParameterNoise(Exploration):
        """Samples new noise and stores it in `self.noise`."""
        if self.framework == "tf":
            tf_sess.run(self.tf_sample_new_noise_op)
-        elif self.framework == "tfe":
+        elif self.framework in ["tfe", "tf2"]:
            self._tf_sample_new_noise_op()
        else:
            for i in range(len(self.noise)):
@ -340,7 +340,7 @@ class ParameterNoise(Exploration):
        # Add stored noise to the model's parameters.
        if self.framework == "tf":
            tf_sess.run(self.tf_add_stored_noise_op)
-        elif self.framework == "tfe":
+        elif self.framework in ["tf2", "tfe"]:
            self._tf_add_stored_noise_op()
        else:
            for i in range(len(self.noise)):
@ -378,7 +378,7 @@ class ParameterNoise(Exploration):
        # Removes the stored noise from the model's parameters.
        if self.framework == "tf":
            tf_sess.run(self.tf_remove_noise_op)
-        elif self.framework == "tfe":
+        elif self.framework in ["tf2", "tfe"]:
            self._tf_remove_noise_op()
        else:
            for var, noise in zip(self.model_variables, self.noise):
--- a/rllib/utils/exploration/random.py
+++ b/rllib/utils/exploration/random.py
@ -46,7 +46,7 @@ class Random(Exploration):
                               timestep: Union[int, TensorType],
                               explore: bool = True):
        # Instantiate the distribution object.
-        if self.framework in ["tf", "tfe"]:
+        if self.framework in ["tf2", "tf", "tfe"]:
            return self.get_tf_exploration_action_op(action_distribution,
                                                     explore)
        else:
--- a/rllib/utils/framework.py
+++ b/rllib/utils/framework.py
@ -190,7 +190,7 @@ def get_variable(value,
        any: A framework-specific variable (tf.Variable, torch.tensor, or
            python primitive).
    """
-    if framework in ["tf", "tfe"]:
+    if framework in ["tf2", "tf", "tfe"]:
        import tensorflow as tf
        dtype = dtype or getattr(
            value, "dtype", tf.float32