diff --git a/doc/source/data/_examples/doc_code/quick_start.py b/doc/source/data/_examples/doc_code/quick_start.py
index d68f1d08c..913c8b2b1 100644
--- a/doc/source/data/_examples/doc_code/quick_start.py
+++ b/doc/source/data/_examples/doc_code/quick_start.py
@@ -1,5 +1,5 @@
 # flake8: noqa
-# yapf: disable
+# fmt: off
 
 # __data_setup_begin__
 
diff --git a/doc/source/ray-core/_examples/doc_code/runtime_env_example.py b/doc/source/ray-core/_examples/doc_code/runtime_env_example.py
index c761ae278..9cfd207c9 100644
--- a/doc/source/ray-core/_examples/doc_code/runtime_env_example.py
+++ b/doc/source/ray-core/_examples/doc_code/runtime_env_example.py
@@ -8,7 +8,7 @@ in the documentation.
 """
 import ray
 
-# yapf: disable
+# fmt: off
 
 # __runtime_env_conda_def_start__
 runtime_env = {
diff --git a/doc/source/ray-core/_examples/doc_code/tf_example.py b/doc/source/ray-core/_examples/doc_code/tf_example.py
index 97e1a68f0..b5ac9cdfd 100644
--- a/doc/source/ray-core/_examples/doc_code/tf_example.py
+++ b/doc/source/ray-core/_examples/doc_code/tf_example.py
@@ -7,7 +7,7 @@ but we put comments right after code blocks to prevent large white spaces
 in the documentation.
 """
 
-# yapf: disable
+# fmt: off
 # __tf_model_start__
 
 
@@ -28,9 +28,9 @@ def create_keras_model():
         metrics=[keras.metrics.categorical_accuracy])
     return model
 # __tf_model_end__
-# yapf: enable
+# fmt: on
 
-# yapf: disable
+# fmt: off
 # __ray_start__
 import ray
 import numpy as np
@@ -65,17 +65,17 @@ class Network(object):
         # Note that for simplicity this does not handle the optimizer state.
         self.model.set_weights(weights)
 # __ray_end__
-# yapf: enable
+# fmt: on
 
-# yapf: disable
+# fmt: off
 # __actor_start__
 NetworkActor = Network.remote()
 result_object_ref = NetworkActor.train.remote()
 ray.get(result_object_ref)
 # __actor_end__
-# yapf: enable
+# fmt: on
 
-# yapf: disable
+# fmt: off
 # __weight_average_start__
 NetworkActor2 = Network.remote()
 NetworkActor2.train.remote()
diff --git a/doc/source/ray-core/_examples/doc_code/torch_example.py b/doc/source/ray-core/_examples/doc_code/torch_example.py
index d55569a17..5964d2a64 100644
--- a/doc/source/ray-core/_examples/doc_code/torch_example.py
+++ b/doc/source/ray-core/_examples/doc_code/torch_example.py
@@ -6,7 +6,7 @@ It ignores yapf because yapf doesn't allow comments right after code blocks,
 but we put comments right after code blocks to prevent large white spaces
 in the documentation.
 """
-# yapf: disable
+# fmt: off
 # __torch_model_start__
 import argparse
 
@@ -35,9 +35,9 @@ class Model(nn.Module):
 
 
 # __torch_model_end__
-# yapf: enable
+# fmt: on
 
-# yapf: disable
+# fmt: off
 # __torch_helper_start__
 from filelock import FileLock
 from torchvision import datasets, transforms
@@ -112,9 +112,9 @@ def dataset_creator(use_cuda, data_dir):
 
 
 # __torch_helper_end__
-# yapf: enable
+# fmt: on
 
-# yapf: disable
+# fmt: off
 # __torch_net_start__
 import torch.optim as optim
 
@@ -155,9 +155,9 @@ args = parser.parse_args()
 net = Network(data_dir=args.data_dir)
 net.train()
 # __torch_net_end__
-# yapf: enable
+# fmt: on
 
-# yapf: disable
+# fmt: off
 # __torch_ray_start__
 import ray
 
@@ -167,18 +167,18 @@ RemoteNetwork = ray.remote(Network)
 # Use the below instead of `ray.remote(network)` to leverage the GPU.
 # RemoteNetwork = ray.remote(num_gpus=1)(Network)
 # __torch_ray_end__
-# yapf: enable
+# fmt: on
 
-# yapf: disable
+# fmt: off
 # __torch_actor_start__
 NetworkActor = RemoteNetwork.remote()
 NetworkActor2 = RemoteNetwork.remote()
 
 ray.get([NetworkActor.train.remote(), NetworkActor2.train.remote()])
 # __torch_actor_end__
-# yapf: enable
+# fmt: on
 
-# yapf: disable
+# fmt: off
 # __weight_average_start__
 weights = ray.get(
     [NetworkActor.get_weights.remote(),
diff --git a/doc/source/serve/_examples/doc_code/quick_start.py b/doc/source/serve/_examples/doc_code/quick_start.py
index 99034741c..30397d55b 100644
--- a/doc/source/serve/_examples/doc_code/quick_start.py
+++ b/doc/source/serve/_examples/doc_code/quick_start.py
@@ -1,5 +1,5 @@
 # flake8: noqa
-# yapf: disable
+# fmt: off
 
 # __serve_example_begin__
 import requests
diff --git a/python/ray/serve/examples/doc/tutorial_batch.py b/python/ray/serve/examples/doc/tutorial_batch.py
index c7ef2ad6e..972c43e5d 100644
--- a/python/ray/serve/examples/doc/tutorial_batch.py
+++ b/python/ray/serve/examples/doc/tutorial_batch.py
@@ -1,4 +1,4 @@
-# yapf: disable
+# fmt: off
 # __doc_import_begin__
 from typing import List
 import time
@@ -10,7 +10,7 @@ from starlette.requests import Request
 import ray
 from ray import serve
 # __doc_import_end__
-# yapf: enable
+# fmt: on
 
 
 # __doc_define_servable_begin__
diff --git a/python/ray/serve/examples/doc/tutorial_pytorch.py b/python/ray/serve/examples/doc/tutorial_pytorch.py
index fe3f38276..38ef70108 100644
--- a/python/ray/serve/examples/doc/tutorial_pytorch.py
+++ b/python/ray/serve/examples/doc/tutorial_pytorch.py
@@ -1,4 +1,4 @@
-# yapf: disable
+# fmt: off
 import ray
 # __doc_import_begin__
 from ray import serve
@@ -11,7 +11,7 @@ import torch
 from torchvision import transforms
 from torchvision.models import resnet18
 # __doc_import_end__
-# yapf: enable
+# fmt: on
 
 
 # __doc_define_servable_begin__
diff --git a/python/ray/serve/examples/doc/tutorial_sklearn.py b/python/ray/serve/examples/doc/tutorial_sklearn.py
index 3c3ad1b69..6c6fe4fb7 100644
--- a/python/ray/serve/examples/doc/tutorial_sklearn.py
+++ b/python/ray/serve/examples/doc/tutorial_sklearn.py
@@ -1,4 +1,4 @@
-# yapf: disable
+# fmt: off
 import ray
 # __doc_import_begin__
 from ray import serve
@@ -14,7 +14,7 @@ from sklearn.datasets import load_iris
 from sklearn.ensemble import GradientBoostingClassifier
 from sklearn.metrics import mean_squared_error
 # __doc_import_end__
-# yapf: enable
+# fmt: on
 
 # __doc_train_model_begin__
 # Load data
diff --git a/python/ray/serve/examples/doc/tutorial_tensorflow.py b/python/ray/serve/examples/doc/tutorial_tensorflow.py
index 7497b66b6..2964d57b8 100644
--- a/python/ray/serve/examples/doc/tutorial_tensorflow.py
+++ b/python/ray/serve/examples/doc/tutorial_tensorflow.py
@@ -1,4 +1,4 @@
-# yapf: disable
+# fmt: off
 import ray
 # __doc_import_begin__
 from ray import serve
@@ -8,7 +8,7 @@ import tempfile
 import numpy as np
 import requests
 # __doc_import_end__
-# yapf: enable
+# fmt: on
 
 # __doc_train_model_begin__
 TRAINED_MODEL_PATH = os.path.join(tempfile.gettempdir(), "mnist_model.h5")
diff --git a/python/ray/tests/test_resource_demand_scheduler.py b/python/ray/tests/test_resource_demand_scheduler.py
index 48b010b48..69e718b57 100644
--- a/python/ray/tests/test_resource_demand_scheduler.py
+++ b/python/ray/tests/test_resource_demand_scheduler.py
@@ -289,7 +289,7 @@ def test_add_min_workers_nodes():
     # Formatting is disabled to prevent Black from erroring while formatting
     # this file. See https://github.com/ray-project/ray/issues/21313 for more
     # information.
-    # yapf: disable
+    # fmt: off
     assert _add_min_workers_nodes([],
                                   {},
                                   types, None, None, None) == \
@@ -336,7 +336,7 @@ def test_add_min_workers_nodes():
                                   }, {
                                       "gpubla": 10
                                   })
-    # yapf: enable
+    # fmt: on
 
 
 def test_get_nodes_to_launch_with_min_workers():
diff --git a/python/ray/train/examples/tensorflow_quick_start.py b/python/ray/train/examples/tensorflow_quick_start.py
index f1111510f..090785313 100644
--- a/python/ray/train/examples/tensorflow_quick_start.py
+++ b/python/ray/train/examples/tensorflow_quick_start.py
@@ -1,5 +1,5 @@
 # flake8: noqa
-# yapf: disable
+# fmt: off
 
 # __tf_setup_begin__
 
diff --git a/python/ray/train/examples/torch_quick_start.py b/python/ray/train/examples/torch_quick_start.py
index 27f784071..8911a37c7 100644
--- a/python/ray/train/examples/torch_quick_start.py
+++ b/python/ray/train/examples/torch_quick_start.py
@@ -1,5 +1,5 @@
 # flake8: noqa
-# yapf: disable
+# fmt: off
 
 # __torch_setup_begin__
 import torch
diff --git a/python/ray/tune/examples/cifar10_pytorch.py b/python/ray/tune/examples/cifar10_pytorch.py
index 2abfa25d9..5cad9100a 100644
--- a/python/ray/tune/examples/cifar10_pytorch.py
+++ b/python/ray/tune/examples/cifar10_pytorch.py
@@ -1,5 +1,5 @@
 # flake8: noqa
-# yapf: disable
+# fmt: off
 
 # __import_begin__
 from functools import partial
diff --git a/python/ray/tune/examples/mnist_pytorch_lightning.py b/python/ray/tune/examples/mnist_pytorch_lightning.py
index 1fb720e3e..e48c12e28 100644
--- a/python/ray/tune/examples/mnist_pytorch_lightning.py
+++ b/python/ray/tune/examples/mnist_pytorch_lightning.py
@@ -1,5 +1,5 @@
 # flake8: noqa
-# yapf: disable
+# fmt: off
 
 # __import_lightning_begin__
 import math
diff --git a/python/ray/tune/examples/mnist_pytorch_trainable.py b/python/ray/tune/examples/mnist_pytorch_trainable.py
index c734cd7bd..45257ed2b 100644
--- a/python/ray/tune/examples/mnist_pytorch_trainable.py
+++ b/python/ray/tune/examples/mnist_pytorch_trainable.py
@@ -28,7 +28,7 @@ parser.add_argument(
 
 
 # Below comments are for documentation purposes only.
-# yapf: disable
+# fmt: off
 # __trainable_example_begin__
 class TrainMNIST(tune.Trainable):
     def setup(self, config):
@@ -57,7 +57,7 @@ class TrainMNIST(tune.Trainable):
 
 
 # __trainable_example_end__
-# yapf: enable
+# fmt: on
 
 if __name__ == "__main__":
     args = parser.parse_args()
diff --git a/python/ray/tune/examples/pbt_convnet_example.py b/python/ray/tune/examples/pbt_convnet_example.py
index ee62a044a..48074e76c 100644
--- a/python/ray/tune/examples/pbt_convnet_example.py
+++ b/python/ray/tune/examples/pbt_convnet_example.py
@@ -1,7 +1,7 @@
 #!/usr/bin/env python
 
 # flake8: noqa
-# yapf: disable
+# fmt: off
 
 # __tutorial_imports_begin__
 import argparse
diff --git a/python/ray/tune/result.py b/python/ray/tune/result.py
index d18d57eb6..42d8661e2 100644
--- a/python/ray/tune/result.py
+++ b/python/ray/tune/result.py
@@ -1,6 +1,6 @@
 import os
 
-# yapf: disable
+# fmt: off
 # __sphinx_doc_begin__
 # (Optional/Auto-filled) training is terminated. Filled only if not provided.
 DONE = "done"
@@ -60,7 +60,7 @@ TIME_TOTAL_S = "time_total_s"
 # (Auto-filled) The index of this training iteration.
 TRAINING_ITERATION = "training_iteration"
 # __sphinx_doc_end__
-# yapf: enable
+# fmt: on
 
 DEFAULT_EXPERIMENT_INFO_KEYS = ("trainable_name", EXPERIMENT_TAG, TRIAL_ID)
 
diff --git a/python/ray/tune/tests/ext_pytorch.py b/python/ray/tune/tests/ext_pytorch.py
index 7f0abf989..b8864f9dc 100644
--- a/python/ray/tune/tests/ext_pytorch.py
+++ b/python/ray/tune/tests/ext_pytorch.py
@@ -1,5 +1,5 @@
 # flake8: noqa
-# yapf: disable
+# fmt: off
 
 # External PyTorch tutorial (https://github.com/pytorch/tutorials/pull/1066)
 # If this script fails, fix it and submit a PR to pytorch/tutorials.
diff --git a/python/ray/tune/tests/tutorial.py b/python/ray/tune/tests/tutorial.py
index 42dd8b1b6..5f829982e 100644
--- a/python/ray/tune/tests/tutorial.py
+++ b/python/ray/tune/tests/tutorial.py
@@ -1,7 +1,7 @@
 # flake8: noqa
 # Original Code: https://github.com/pytorch/examples/blob/master/mnist/main.py
 
-# yapf: disable
+# fmt: off
 # __tutorial_imports_begin__
 import numpy as np
 import torch
@@ -14,10 +14,10 @@ import torch.nn.functional as F
 from ray import tune
 from ray.tune.schedulers import ASHAScheduler
 # __tutorial_imports_end__
-# yapf: enable
+# fmt: on
 
 
-# yapf: disable
+# fmt: off
 # __model_def_begin__
 class ConvNet(nn.Module):
     def __init__(self):
@@ -33,9 +33,9 @@ class ConvNet(nn.Module):
         x = self.fc(x)
         return F.log_softmax(x, dim=1)
 # __model_def_end__
-# yapf: enable
+# fmt: on
 
-# yapf: disable
+# fmt: off
 # __train_def_begin__
 
 # Change these values if you want the training to run quicker or slower.
@@ -111,7 +111,7 @@ def train_mnist(config):
             # This saves the model to the trial directory
             torch.save(model.state_dict(), "./model.pth")
 # __train_func_end__
-# yapf: enable
+# fmt: on
 
 # __eval_func_begin__
 search_space = {
@@ -145,14 +145,14 @@ analysis = tune.run(
 dfs = analysis.trial_dataframes
 # __run_scheduler_end__
 
-# yapf: disable
+# fmt: off
 # __plot_scheduler_begin__
 # Plot by epoch
 ax = None  # This plots everything on the same plot
 for d in dfs.values():
     ax = d.mean_accuracy.plot(ax=ax, legend=False)
 # __plot_scheduler_end__
-# yapf: enable
+# fmt: on
 
 # __run_searchalg_begin__
 from hyperopt import hp
diff --git a/python/ray/util/sgd/torch/examples/raysgd_torch_signatures.py b/python/ray/util/sgd/torch/examples/raysgd_torch_signatures.py
index 1bde9a541..4ed3c0006 100644
--- a/python/ray/util/sgd/torch/examples/raysgd_torch_signatures.py
+++ b/python/ray/util/sgd/torch/examples/raysgd_torch_signatures.py
@@ -5,7 +5,7 @@ It ignores yapf because yapf doesn't allow comments right after code blocks,
 but we put comments right after code blocks to prevent large white spaces
 in the documentation.
 """
-# yapf: disable
+# fmt: off
 
 # __torch_operator_start__
 import torch
diff --git a/python/ray/util/sgd/torch/examples/train_example.py b/python/ray/util/sgd/torch/examples/train_example.py
index bf68f205c..ede9d864c 100644
--- a/python/ray/util/sgd/torch/examples/train_example.py
+++ b/python/ray/util/sgd/torch/examples/train_example.py
@@ -5,7 +5,7 @@ but we put comments right after code blocks to prevent large white spaces
 in the documentation.
 """
 
-# yapf: disable
+# fmt: off
 # __torch_train_example__
 import argparse
 import numpy as np
diff --git a/python/ray/util/sgd/torch/examples/tune_example.py b/python/ray/util/sgd/torch/examples/tune_example.py
index 1900489a6..8fafcc8a3 100644
--- a/python/ray/util/sgd/torch/examples/tune_example.py
+++ b/python/ray/util/sgd/torch/examples/tune_example.py
@@ -1,4 +1,4 @@
-# yapf: disable
+# fmt: off
 """
 This file holds code for a Distributed Pytorch + Tune page in the docs.
 
diff --git a/python/ray/worker.pyi b/python/ray/worker.pyi
index 8954f1936..10d44429c 100644
--- a/python/ray/worker.pyi
+++ b/python/ray/worker.pyi
@@ -1,4 +1,4 @@
-# yapf: disable
+# fmt: off
 from typing import Any, Callable, Generic, Optional, TypeVar, Union, overload, Sequence, List
 
 from ray._raylet import ObjectRef
diff --git a/python/ray/workflow/api.pyi b/python/ray/workflow/api.pyi
index c0c5db7c3..e09eafc9e 100644
--- a/python/ray/workflow/api.pyi
+++ b/python/ray/workflow/api.pyi
@@ -1,4 +1,4 @@
-# yapf: disable
+# fmt: off
 from typing import Callable, Generic, Optional, TypeVar, Union, overload, Any
 from types import FunctionType
 
diff --git a/rllib/agents/a3c/a3c.py b/rllib/agents/a3c/a3c.py
index 7fd573faf..2b3807b78 100644
--- a/rllib/agents/a3c/a3c.py
+++ b/rllib/agents/a3c/a3c.py
@@ -27,7 +27,7 @@ from ray.util.iter import LocalIterator
 
 logger = logging.getLogger(__name__)
 
-# yapf: disable
+# fmt: off
 # __sphinx_doc_begin__
 DEFAULT_CONFIG = with_common_config({
     # Should use a critic as a baseline (otherwise don't use value baseline;
@@ -68,7 +68,7 @@ DEFAULT_CONFIG = with_common_config({
     "_disable_execution_plan_api": True,
 })
 # __sphinx_doc_end__
-# yapf: enable
+# fmt: on
 
 
 class A3CTrainer(Trainer):
diff --git a/rllib/agents/ars/ars.py b/rllib/agents/ars/ars.py
index a6f08e8a9..f76d39847 100644
--- a/rllib/agents/ars/ars.py
+++ b/rllib/agents/ars/ars.py
@@ -35,7 +35,7 @@ Result = namedtuple(
     ],
 )
 
-# yapf: disable
+# fmt: off
 # __sphinx_doc_begin__
 DEFAULT_CONFIG = with_common_config({
     "action_noise_std": 0.0,
@@ -59,7 +59,7 @@ DEFAULT_CONFIG = with_common_config({
     },
 })
 # __sphinx_doc_end__
-# yapf: enable
+# fmt: on
 
 
 @ray.remote
diff --git a/rllib/agents/bandit/bandit.py b/rllib/agents/bandit/bandit.py
index d812fa4e1..1968fa97e 100644
--- a/rllib/agents/bandit/bandit.py
+++ b/rllib/agents/bandit/bandit.py
@@ -9,7 +9,7 @@ from ray.rllib.utils.typing import TrainerConfigDict
 
 logger = logging.getLogger(__name__)
 
-# yapf: disable
+# fmt: off
 # __sphinx_doc_begin__
 DEFAULT_CONFIG = with_common_config({
     # No remote workers by default.
@@ -26,7 +26,7 @@ DEFAULT_CONFIG = with_common_config({
     "timesteps_per_iteration": 100,
 })
 # __sphinx_doc_end__
-# yapf: enable
+# fmt: on
 
 
 class BanditLinTSTrainer(Trainer):
diff --git a/rllib/agents/cql/cql.py b/rllib/agents/cql/cql.py
index 3a2a212a4..5382d66ee 100644
--- a/rllib/agents/cql/cql.py
+++ b/rllib/agents/cql/cql.py
@@ -26,7 +26,7 @@ tf1, tf, tfv = try_import_tf()
 tfp = try_import_tfp()
 logger = logging.getLogger(__name__)
 
-# yapf: disable
+# fmt: off
 # __sphinx_doc_begin__
 CQL_DEFAULT_CONFIG = merge_dicts(
     SAC_CONFIG, {
@@ -55,7 +55,7 @@ CQL_DEFAULT_CONFIG = merge_dicts(
         },
     })
 # __sphinx_doc_end__
-# yapf: enable
+# fmt: on
 
 
 class CQLTrainer(SACTrainer):
diff --git a/rllib/agents/ddpg/ddpg.py b/rllib/agents/ddpg/ddpg.py
index a1b7ae09a..5a74fa0b5 100644
--- a/rllib/agents/ddpg/ddpg.py
+++ b/rllib/agents/ddpg/ddpg.py
@@ -11,7 +11,7 @@ from ray.rllib.utils.typing import TrainerConfigDict
 
 logger = logging.getLogger(__name__)
 
-# yapf: disable
+# fmt: off
 # __sphinx_doc_begin__
 DEFAULT_CONFIG = with_common_config({
     # === Twin Delayed DDPG (TD3) and Soft Actor-Critic (SAC) tricks ===
@@ -175,7 +175,7 @@ DEFAULT_CONFIG = with_common_config({
     "min_time_s_per_reporting": 1,
 })
 # __sphinx_doc_end__
-# yapf: enable
+# fmt: on
 
 
 class DDPGTrainer(SimpleQTrainer):
diff --git a/rllib/agents/dqn/apex.py b/rllib/agents/dqn/apex.py
index 49b65495d..1867e2e34 100644
--- a/rllib/agents/dqn/apex.py
+++ b/rllib/agents/dqn/apex.py
@@ -47,7 +47,7 @@ from ray.tune.trainable import Trainable
 from ray.tune.utils.placement_groups import PlacementGroupFactory
 from ray.util.iter import LocalIterator
 
-# yapf: disable
+# fmt: off
 # __sphinx_doc_begin__
 APEX_DEFAULT_CONFIG = merge_dicts(
     # See also the options in dqn.py, which are also supported.
@@ -92,7 +92,7 @@ APEX_DEFAULT_CONFIG = merge_dicts(
     },
 )
 # __sphinx_doc_end__
-# yapf: enable
+# fmt: on
 
 
 # Update worker weights as they finish generating experiences.
diff --git a/rllib/agents/dqn/dqn.py b/rllib/agents/dqn/dqn.py
index dc933ec1b..7533a33ac 100644
--- a/rllib/agents/dqn/dqn.py
+++ b/rllib/agents/dqn/dqn.py
@@ -38,7 +38,7 @@ from ray.util.iter import LocalIterator
 
 logger = logging.getLogger(__name__)
 
-# yapf: disable
+# fmt: off
 # __sphinx_doc_begin__
 DEFAULT_CONFIG = Trainer.merge_trainer_configs(
     SIMPLEQ_DEFAULT_CONFIG,
@@ -106,7 +106,7 @@ DEFAULT_CONFIG = Trainer.merge_trainer_configs(
     _allow_unknown_configs=True,
 )
 # __sphinx_doc_end__
-# yapf: enable
+# fmt: on
 
 
 def calculate_rr_weights(config: TrainerConfigDict) -> List[float]:
diff --git a/rllib/agents/dqn/r2d2.py b/rllib/agents/dqn/r2d2.py
index f51b5bf3b..566dec720 100644
--- a/rllib/agents/dqn/r2d2.py
+++ b/rllib/agents/dqn/r2d2.py
@@ -11,7 +11,7 @@ from ray.rllib.utils.typing import TrainerConfigDict
 
 logger = logging.getLogger(__name__)
 
-# yapf: disable
+# fmt: off
 # __sphinx_doc_begin__
 R2D2_DEFAULT_CONFIG = Trainer.merge_trainer_configs(
     DQN_DEFAULT_CONFIG,  # See keys in impala.py, which are also supported.
@@ -70,7 +70,7 @@ R2D2_DEFAULT_CONFIG = Trainer.merge_trainer_configs(
     _allow_unknown_configs=True,
 )
 # __sphinx_doc_end__
-# yapf: enable
+# fmt: on
 
 
 # Build an R2D2 trainer, which uses the framework specific Policy
diff --git a/rllib/agents/dqn/simple_q.py b/rllib/agents/dqn/simple_q.py
index 9fea74590..1e0656b9b 100644
--- a/rllib/agents/dqn/simple_q.py
+++ b/rllib/agents/dqn/simple_q.py
@@ -31,7 +31,7 @@ from ray.rllib.utils.typing import TrainerConfigDict
 
 logger = logging.getLogger(__name__)
 
-# yapf: disable
+# fmt: off
 # __sphinx_doc_begin__
 DEFAULT_CONFIG = with_common_config({
     # === Exploration Settings ===
@@ -110,7 +110,7 @@ DEFAULT_CONFIG = with_common_config({
     "min_time_s_per_reporting": 1,
 })
 # __sphinx_doc_end__
-# yapf: enable
+# fmt: on
 
 
 class SimpleQTrainer(Trainer):
diff --git a/rllib/agents/dreamer/dreamer.py b/rllib/agents/dreamer/dreamer.py
index 50c26dbee..4b35b71f8 100644
--- a/rllib/agents/dreamer/dreamer.py
+++ b/rllib/agents/dreamer/dreamer.py
@@ -17,7 +17,7 @@ from ray.rllib.utils.typing import SampleBatchType, TrainerConfigDict
 
 logger = logging.getLogger(__name__)
 
-# yapf: disable
+# fmt: off
 # __sphinx_doc_begin__
 DEFAULT_CONFIG = with_common_config({
     # PlaNET Model LR
@@ -78,7 +78,7 @@ DEFAULT_CONFIG = with_common_config({
     }
 })
 # __sphinx_doc_end__
-# yapf: enable
+# fmt: on
 
 
 class EpisodicBuffer(object):
diff --git a/rllib/agents/es/es.py b/rllib/agents/es/es.py
index d41ba1a29..66c1cf532 100644
--- a/rllib/agents/es/es.py
+++ b/rllib/agents/es/es.py
@@ -33,7 +33,7 @@ Result = namedtuple(
     ],
 )
 
-# yapf: disable
+# fmt: off
 # __sphinx_doc_begin__
 DEFAULT_CONFIG = with_common_config({
     "action_noise_std": 0.01,
@@ -58,7 +58,7 @@ DEFAULT_CONFIG = with_common_config({
     },
 })
 # __sphinx_doc_end__
-# yapf: enable
+# fmt: on
 
 
 @ray.remote
diff --git a/rllib/agents/impala/impala.py b/rllib/agents/impala/impala.py
index 66200afd1..c431d1ef4 100644
--- a/rllib/agents/impala/impala.py
+++ b/rllib/agents/impala/impala.py
@@ -25,7 +25,7 @@ from ray.tune.utils.placement_groups import PlacementGroupFactory
 
 logger = logging.getLogger(__name__)
 
-# yapf: disable
+# fmt: off
 # __sphinx_doc_begin__
 DEFAULT_CONFIG = with_common_config({
     # V-trace params (see vtrace_tf/torch.py).
@@ -127,7 +127,7 @@ DEFAULT_CONFIG = with_common_config({
     "num_data_loader_buffers": DEPRECATED_VALUE,
 })
 # __sphinx_doc_end__
-# yapf: enable
+# fmt: on
 
 
 def make_learner_thread(local_worker, config):
diff --git a/rllib/agents/maml/maml.py b/rllib/agents/maml/maml.py
index 19f51d7d1..793b5c756 100644
--- a/rllib/agents/maml/maml.py
+++ b/rllib/agents/maml/maml.py
@@ -27,7 +27,7 @@ from ray.util.iter import from_actors, LocalIterator
 
 logger = logging.getLogger(__name__)
 
-# yapf: disable
+# fmt: off
 # __sphinx_doc_begin__
 DEFAULT_CONFIG = with_common_config({
     # If true, use the Generalized Advantage Estimator (GAE)
@@ -80,7 +80,7 @@ DEFAULT_CONFIG = with_common_config({
     "vf_share_layers": DEPRECATED_VALUE,
 })
 # __sphinx_doc_end__
-# yapf: enable
+# fmt: on
 
 
 # @mluo: TODO
diff --git a/rllib/agents/marwil/bc.py b/rllib/agents/marwil/bc.py
index 367a58f02..c74d42079 100644
--- a/rllib/agents/marwil/bc.py
+++ b/rllib/agents/marwil/bc.py
@@ -5,7 +5,7 @@ from ray.rllib.agents.marwil.marwil import (
 from ray.rllib.utils.annotations import override
 from ray.rllib.utils.typing import TrainerConfigDict
 
-# yapf: disable
+# fmt: off
 # __sphinx_doc_begin__
 BC_DEFAULT_CONFIG = MARWILTrainer.merge_trainer_configs(
     MARWIL_CONFIG, {
@@ -19,7 +19,7 @@ BC_DEFAULT_CONFIG = MARWILTrainer.merge_trainer_configs(
         "input_evaluation": [],
     })
 # __sphinx_doc_end__
-# yapf: enable
+# fmt: on
 
 
 class BCTrainer(MARWILTrainer):
diff --git a/rllib/agents/marwil/marwil.py b/rllib/agents/marwil/marwil.py
index a2984a778..7a7c3583a 100644
--- a/rllib/agents/marwil/marwil.py
+++ b/rllib/agents/marwil/marwil.py
@@ -14,7 +14,7 @@ from ray.rllib.utils.annotations import override
 from ray.rllib.utils.typing import TrainerConfigDict
 from ray.util.iter import LocalIterator
 
-# yapf: disable
+# fmt: off
 # __sphinx_doc_begin__
 DEFAULT_CONFIG = with_common_config({
     # === Input settings ===
@@ -73,7 +73,7 @@ DEFAULT_CONFIG = with_common_config({
     "num_workers": 0,
 })
 # __sphinx_doc_end__
-# yapf: enable
+# fmt: on
 
 
 class MARWILTrainer(Trainer):
diff --git a/rllib/agents/mbmpo/mbmpo.py b/rllib/agents/mbmpo/mbmpo.py
index 9af1a6de3..858462f4d 100644
--- a/rllib/agents/mbmpo/mbmpo.py
+++ b/rllib/agents/mbmpo/mbmpo.py
@@ -35,7 +35,7 @@ from ray.util.iter import from_actors, LocalIterator
 
 logger = logging.getLogger(__name__)
 
-# yapf: disable
+# fmt: off
 # __sphinx_doc_begin__
 
 # Adds the following updates to the (base) `Trainer` config in
@@ -115,7 +115,7 @@ DEFAULT_CONFIG = with_common_config({
     "vf_share_layers": DEPRECATED_VALUE,
 })
 # __sphinx_doc_end__
-# yapf: enable
+# fmt: on
 
 # Select Metric Keys for MAML Stats Tracing
 METRICS_KEYS = ["episode_reward_mean", "episode_reward_min", "episode_reward_max"]
diff --git a/rllib/agents/pg/default_config.py b/rllib/agents/pg/default_config.py
index bb5db73b2..346132287 100644
--- a/rllib/agents/pg/default_config.py
+++ b/rllib/agents/pg/default_config.py
@@ -1,6 +1,6 @@
 from ray.rllib.agents.trainer import with_common_config
 
-# yapf: disable
+# fmt: off
 # __sphinx_doc_begin__
 
 # Add the following (PG-specific) updates to the (base) `Trainer` config in
@@ -20,4 +20,4 @@ DEFAULT_CONFIG = with_common_config({
 })
 
 # __sphinx_doc_end__
-# yapf: enable
+# fmt: on
diff --git a/rllib/agents/ppo/appo.py b/rllib/agents/ppo/appo.py
index 3bbcd9f50..86b6a9e54 100644
--- a/rllib/agents/ppo/appo.py
+++ b/rllib/agents/ppo/appo.py
@@ -25,7 +25,7 @@ from ray.rllib.execution.common import (
 from ray.rllib.utils.annotations import override
 from ray.rllib.utils.typing import PartialTrainerConfigDict, TrainerConfigDict
 
-# yapf: disable
+# fmt: off
 # __sphinx_doc_begin__
 
 # Adds the following updates to the `IMPALATrainer` config in
@@ -85,7 +85,7 @@ DEFAULT_CONFIG = impala.ImpalaTrainer.merge_trainer_configs(
 )
 
 # __sphinx_doc_end__
-# yapf: enable
+# fmt: on
 
 
 class UpdateTargetAndKL:
diff --git a/rllib/agents/ppo/ddppo.py b/rllib/agents/ppo/ddppo.py
index e959b2cbd..77f453c24 100644
--- a/rllib/agents/ppo/ddppo.py
+++ b/rllib/agents/ppo/ddppo.py
@@ -43,7 +43,7 @@ from ray.util.iter import LocalIterator
 
 logger = logging.getLogger(__name__)
 
-# yapf: disable
+# fmt: off
 # __sphinx_doc_begin__
 
 # Adds the following updates to the `PPOTrainer` config in
@@ -93,7 +93,7 @@ DEFAULT_CONFIG = Trainer.merge_trainer_configs(
 )
 
 # __sphinx_doc_end__
-# yapf: enable
+# fmt: on
 
 
 class DDPPOTrainer(PPOTrainer):
diff --git a/rllib/agents/ppo/ppo.py b/rllib/agents/ppo/ppo.py
index 016414042..b89b72469 100644
--- a/rllib/agents/ppo/ppo.py
+++ b/rllib/agents/ppo/ppo.py
@@ -34,7 +34,7 @@ from ray.util.iter import LocalIterator
 
 logger = logging.getLogger(__name__)
 
-# yapf: disable
+# fmt: off
 # __sphinx_doc_begin__
 
 # Adds the following updates to the (base) `Trainer` config in
@@ -101,7 +101,7 @@ DEFAULT_CONFIG = with_common_config({
 })
 
 # __sphinx_doc_end__
-# yapf: enable
+# fmt: on
 
 
 class UpdateKL:
diff --git a/rllib/agents/qmix/qmix.py b/rllib/agents/qmix/qmix.py
index bdd5baee4..47b6befc7 100644
--- a/rllib/agents/qmix/qmix.py
+++ b/rllib/agents/qmix/qmix.py
@@ -18,7 +18,7 @@ from ray.rllib.utils.annotations import override
 from ray.rllib.utils.typing import TrainerConfigDict
 from ray.util.iter import LocalIterator
 
-# yapf: disable
+# fmt: off
 # __sphinx_doc_begin__
 DEFAULT_CONFIG = with_common_config({
     # === QMix ===
@@ -107,7 +107,7 @@ DEFAULT_CONFIG = with_common_config({
     "framework": "torch",
 })
 # __sphinx_doc_end__
-# yapf: enable
+# fmt: on
 
 
 class QMixTrainer(SimpleQTrainer):
diff --git a/rllib/agents/sac/sac.py b/rllib/agents/sac/sac.py
index bf968b75f..eb455c2d8 100644
--- a/rllib/agents/sac/sac.py
+++ b/rllib/agents/sac/sac.py
@@ -26,7 +26,7 @@ OPTIMIZER_SHARED_CONFIGS = [
     "learning_starts",
 ]
 
-# yapf: disable
+# fmt: off
 # __sphinx_doc_begin__
 
 # Adds the following updates to the (base) `Trainer` config in
@@ -173,7 +173,7 @@ DEFAULT_CONFIG = with_common_config({
     "_use_beta_distribution": False,
 })
 # __sphinx_doc_end__
-# yapf: enable
+# fmt: on
 
 
 class SACTrainer(DQNTrainer):
diff --git a/rllib/agents/slateq/slateq.py b/rllib/agents/slateq/slateq.py
index c8785284a..0df74b231 100644
--- a/rllib/agents/slateq/slateq.py
+++ b/rllib/agents/slateq/slateq.py
@@ -46,7 +46,7 @@ ALL_SLATEQ_STRATEGIES = [
     "QL",
 ]
 
-# yapf: disable
+# fmt: off
 # __sphinx_doc_begin__
 DEFAULT_CONFIG = with_common_config({
     # === Model ===
@@ -144,7 +144,7 @@ DEFAULT_CONFIG = with_common_config({
     "double_q": True,
 })
 # __sphinx_doc_end__
-# yapf: enable
+# fmt: on
 
 
 def calculate_round_robin_weights(config: TrainerConfigDict) -> List[float]:
diff --git a/rllib/agents/trainer.py b/rllib/agents/trainer.py
index 37c822b11..43890208b 100644
--- a/rllib/agents/trainer.py
+++ b/rllib/agents/trainer.py
@@ -117,7 +117,7 @@ logger = logging.getLogger(__name__)
 # times in a row since that would indicate a persistent cluster issue.
 MAX_WORKER_FAILURE_RETRIES = 3
 
-# yapf: disable
+# fmt: off
 # __sphinx_doc_begin__
 COMMON_CONFIG: TrainerConfigDict = {
     # === Settings for Rollout Worker processes ===
@@ -650,7 +650,7 @@ COMMON_CONFIG: TrainerConfigDict = {
     "collect_metrics_timeout": DEPRECATED_VALUE,
 }
 # __sphinx_doc_end__
-# yapf: enable
+# fmt: on
 
 
 @DeveloperAPI
diff --git a/rllib/contrib/alpha_zero/core/alpha_zero_trainer.py b/rllib/contrib/alpha_zero/core/alpha_zero_trainer.py
index 8a6c8d581..3ccad8bf3 100644
--- a/rllib/contrib/alpha_zero/core/alpha_zero_trainer.py
+++ b/rllib/contrib/alpha_zero/core/alpha_zero_trainer.py
@@ -48,7 +48,7 @@ class AlphaZeroDefaultCallbacks(DefaultCallbacks):
         episode.user_data["initial_state"] = state
 
 
-# yapf: disable
+# fmt: off
 # __sphinx_doc_begin__
 DEFAULT_CONFIG = with_common_config({
     # Size of batches collected from each worker
@@ -121,7 +121,7 @@ DEFAULT_CONFIG = with_common_config({
 
 
 # __sphinx_doc_end__
-# yapf: enable
+# fmt: on
 
 
 def alpha_zero_loss(policy, model, dist_class, train_batch):
diff --git a/rllib/contrib/maddpg/maddpg.py b/rllib/contrib/maddpg/maddpg.py
index b96a4e37d..84af12803 100644
--- a/rllib/contrib/maddpg/maddpg.py
+++ b/rllib/contrib/maddpg/maddpg.py
@@ -25,7 +25,7 @@ from ray.rllib.utils.typing import TrainerConfigDict
 logger = logging.getLogger(__name__)
 logger.setLevel(logging.INFO)
 
-# yapf: disable
+# fmt: off
 # __sphinx_doc_begin__
 DEFAULT_CONFIG = with_common_config({
     # === Framework to run the algorithm ===
@@ -123,7 +123,7 @@ DEFAULT_CONFIG = with_common_config({
     "min_time_s_per_reporting": 0,
 })
 # __sphinx_doc_end__
-# yapf: enable
+# fmt: on
 
 
 def before_learn_on_batch(multi_agent_batch, policies, train_batch_size):
diff --git a/rllib/contrib/random_agent/random_agent.py b/rllib/contrib/random_agent/random_agent.py
index c756c727c..ed55acb8d 100644
--- a/rllib/contrib/random_agent/random_agent.py
+++ b/rllib/contrib/random_agent/random_agent.py
@@ -5,7 +5,7 @@ from ray.rllib.utils.annotations import override
 from ray.rllib.utils.typing import TrainerConfigDict
 
 
-# yapf: disable
+# fmt: off
 # __sphinx_doc_begin__
 class RandomAgent(Trainer):
     """Trainer that produces random actions and never learns."""
diff --git a/rllib/env/multi_agent_env.py b/rllib/env/multi_agent_env.py
index 02fae28bc..ed21d9236 100644
--- a/rllib/env/multi_agent_env.py
+++ b/rllib/env/multi_agent_env.py
@@ -233,7 +233,7 @@ class MultiAgentEnv(gym.Env):
         # By default, do nothing.
         pass
 
-    # yapf: disable
+    # fmt: off
     # __grouping_doc_begin__
     @ExperimentalAPI
     def with_agent_groups(
@@ -279,7 +279,7 @@ class MultiAgentEnv(gym.Env):
         return GroupAgentsWrapper(self, groups, obs_space, act_space)
 
     # __grouping_doc_end__
-    # yapf: enable
+    # fmt: on
 
     @PublicAPI
     def to_base_env(
diff --git a/rllib/evaluation/collectors/sample_collector.py b/rllib/evaluation/collectors/sample_collector.py
index 333188c94..e0b301e0f 100644
--- a/rllib/evaluation/collectors/sample_collector.py
+++ b/rllib/evaluation/collectors/sample_collector.py
@@ -13,7 +13,7 @@ if TYPE_CHECKING:
 logger = logging.getLogger(__name__)
 
 
-# yapf: disable
+# fmt: off
 # __sphinx_doc_begin__
 class SampleCollector(metaclass=ABCMeta):
     """Collects samples for all policies and agents from a multi-agent env.
diff --git a/rllib/models/catalog.py b/rllib/models/catalog.py
index 1dd43cb6a..775a3367e 100644
--- a/rllib/models/catalog.py
+++ b/rllib/models/catalog.py
@@ -48,7 +48,7 @@ torch, _ = try_import_torch()
 
 logger = logging.getLogger(__name__)
 
-# yapf: disable
+# fmt: off
 # __sphinx_doc_begin__
 MODEL_DEFAULTS: ModelConfigDict = {
     # Experimental flag.
@@ -188,7 +188,7 @@ MODEL_DEFAULTS: ModelConfigDict = {
     "lstm_use_prev_action_reward": DEPRECATED_VALUE,
 }
 # __sphinx_doc_end__
-# yapf: enable
+# fmt: on
 
 
 @PublicAPI
diff --git a/rllib/utils/exploration/exploration.py b/rllib/utils/exploration/exploration.py
index edd24393c..346c96c20 100644
--- a/rllib/utils/exploration/exploration.py
+++ b/rllib/utils/exploration/exploration.py
@@ -80,7 +80,7 @@ class Exploration:
         """
         pass
 
-    # yapf: disable
+    # fmt: off
     # __sphinx_doc_begin_get_exploration_action__
 
     @DeveloperAPI
@@ -112,7 +112,7 @@ class Exploration:
         pass
 
     # __sphinx_doc_end_get_exploration_action__
-    # yapf: enable
+    # fmt: on
 
     @DeveloperAPI
     def on_episode_start(