From d354161528612b2d430e47e0aced5b3b0a22628a Mon Sep 17 00:00:00 2001
From: Amog Kamsetty <amogkam@users.noreply.github.com>
Date: Wed, 22 Sep 2021 18:49:41 -0700
Subject: [PATCH] [SGD] Link `ray.sgd` namespace to `ray.util.sgd.v2` (#18732)

* wip

* add symlink

* update

* remove from init

* no require tune

* try fix

* change

* * import

* fix docs

* address comment
---
 doc/source/raysgd/raysgd.rst               |  4 +--
 doc/source/raysgd/v2/api.rst               | 29 ++++++++++----------
 doc/source/raysgd/v2/raysgd.rst            |  4 +--
 doc/source/raysgd/v2/user_guide.rst        | 32 +++++++++++-----------
 python/ray/setup-dev.py                    |  1 +
 python/ray/sgd/__init__.py                 |  2 ++
 python/ray/util/sgd/torch/torch_trainer.py | 19 +++++++++++--
 7 files changed, 55 insertions(+), 36 deletions(-)
 create mode 100644 python/ray/sgd/__init__.py
diff --git a/doc/source/raysgd/raysgd.rst b/doc/source/raysgd/raysgd.rst
index 245179623..87696e68d 100644
--- a/doc/source/raysgd/raysgd.rst
+++ b/doc/source/raysgd/raysgd.rst
@@ -5,8 +5,8 @@ RaySGD: Distributed Training Wrappers
 =====================================
 
 
-.. tip:: We are rolling out a lighter-weight version of RaySGD. See the
-         documentation :ref:`here <sgd-v2-docs>`.
+.. warning:: This is an older version of Ray SGD. A newer, more light-weight version of Ray SGD is in alpha as of Ray 1.7.
+         See the documentation :ref:`here <sgd-v2-docs>`.
 
 RaySGD is a lightweight library for distributed deep learning, providing thin wrappers around PyTorch and TensorFlow native modules for data parallel training.
 
diff --git a/doc/source/raysgd/v2/api.rst b/doc/source/raysgd/v2/api.rst
index 382a8d8fc..bb4a871cb 100644
--- a/doc/source/raysgd/v2/api.rst
+++ b/doc/source/raysgd/v2/api.rst
@@ -1,3 +1,4 @@
+
 .. _sgd-api:
 
 RaySGD API
@@ -8,7 +9,7 @@ RaySGD API
 Trainer
 -------
 
-.. autoclass:: ray.util.sgd.v2.Trainer
+.. autoclass:: ray.sgd.Trainer
     :members:
 
 .. _sgd-api-iterator:
@@ -16,7 +17,7 @@ Trainer
 SGDIterator
 ~~~~~~~~~~~
 
-.. autoclass:: ray.util.sgd.v2.SGDIterator
+.. autoclass:: ray.sgd.SGDIterator
     :members:
 
 .. _sgd-api-backend-config:
@@ -24,35 +25,35 @@ SGDIterator
 BackendConfig
 -------------
 
-.. autoclass:: ray.util.sgd.v2.BackendConfig
+.. autoclass:: ray.sgd.BackendConfig
 
 .. _sgd-api-torch-config:
 
 TorchConfig
 ~~~~~~~~~~~
 
-.. autoclass:: ray.util.sgd.v2.TorchConfig
+.. autoclass:: ray.sgd.TorchConfig
 
 .. _sgd-api-tensorflow-config:
 
 TensorflowConfig
 ~~~~~~~~~~~~~~~~
 
-.. autoclass:: ray.util.sgd.v2.TensorflowConfig
+.. autoclass:: ray.sgd.TensorflowConfig
 
 .. _sgd-api-horovod-config:
 
 HorovodConfig
 ~~~~~~~~~~~~~
 
-.. autoclass:: ray.util.sgd.v2.HorovodConfig
+.. autoclass:: ray.sgd.HorovodConfig
 
 .. _sgd-api-callback:
 
 SGDCallback
 -----------
 
-.. autoclass:: ray.util.sgd.v2.SGDCallback
+.. autoclass:: ray.sgd.SGDCallback
     :members:
 
 .. _sgd-api-json-logger-callback:
@@ -60,21 +61,21 @@ SGDCallback
 JsonLoggerCallback
 ~~~~~~~~~~~~~~~~~~
 
-.. autoclass:: ray.util.sgd.v2.callbacks.JsonLoggerCallback
+.. autoclass:: ray.sgd.JsonLoggerCallback
 
 .. _sgd-api-tbx-logger-callback:
 
 TBXLoggerCallback
 ~~~~~~~~~~~~~~~~~
 
-.. autoclass:: ray.util.sgd.v2.callbacks.TBXLoggerCallback
+.. autoclass:: ray.sgd.TBXLoggerCallback
 
 .. _sgd-api-checkpoint-strategy:
 
 CheckpointStrategy
 ------------------
 
-.. autoclass:: ray.util.sgd.v2.CheckpointStrategy
+.. autoclass:: ray.sgd.CheckpointStrategy
 
 Training Function Utilities
 ---------------------------
@@ -82,19 +83,19 @@ Training Function Utilities
 sgd.report
 ~~~~~~~~~~
 
-.. autofunction::  ray.util.sgd.v2.report
+.. autofunction::  ray.sgd.report
 
 sgd.load_checkpoint
 ~~~~~~~~~~~~~~~~~~~
 
-.. autofunction::  ray.util.sgd.v2.load_checkpoint
+.. autofunction::  ray.sgd.load_checkpoint
 
 sgd.save_checkpoint
 ~~~~~~~~~~~~~~~~~~~
 
-.. autofunction::  ray.util.sgd.v2.save_checkpoint
+.. autofunction::  ray.sgd.save_checkpoint
 
 sgd.world_rank
 ~~~~~~~~~~~~~~
 
-.. autofunction::  ray.util.sgd.v2.world_rank
\ No newline at end of file
+.. autofunction::  ray.sgd.world_rank
\ No newline at end of file
diff --git a/doc/source/raysgd/v2/raysgd.rst b/doc/source/raysgd/v2/raysgd.rst
index 374603134..02111cdae 100644
--- a/doc/source/raysgd/v2/raysgd.rst
+++ b/doc/source/raysgd/v2/raysgd.rst
@@ -146,7 +146,7 @@ system. Let's take following simple examples:
 
     .. code-block:: python
 
-        from ray.util.sgd.v2 import Trainer
+        from ray.sgd import Trainer
 
         trainer = Trainer(backend="torch", num_workers=4)
         trainer.start()
@@ -246,7 +246,7 @@ system. Let's take following simple examples:
 
     .. code-block:: python
 
-        from ray.util.sgd.v2 import Trainer
+        from ray.sgd import Trainer
 
         trainer = Trainer(backend="tensorflow", num_workers=4)
         trainer.start()
diff --git a/doc/source/raysgd/v2/user_guide.rst b/doc/source/raysgd/v2/user_guide.rst
index 88b528390..9b2d223f3 100644
--- a/doc/source/raysgd/v2/user_guide.rst
+++ b/doc/source/raysgd/v2/user_guide.rst
@@ -213,7 +213,7 @@ configurations. As an example:
 
 .. code-block:: python
 
-    from ray.util.sgd.v2 import Trainer
+    from ray.sgd import Trainer
 
     def train_func(config):
         results = []
@@ -340,8 +340,8 @@ You can plug all of these into RaySGD with the following interface:
 
 .. code-block:: python
 
-    from ray.util.sgd import v2 as sgd
-    from ray.util.sgd.v2 import SGDCallback, Trainer
+    from ray import sgd
+    from sgd import SGDCallback, Trainer
     from typing import List, Dict
 
     class PrintingCallback(SGDCallback):
@@ -395,7 +395,7 @@ A simple example for creating a callback that will print out results:
 
 .. code-block:: python
 
-    from ray.util.sgd.v2 import SGDCallback
+    from ray.sgd import SGDCallback
 
     class PrintingCallback(SGDCallback):
         def handle_result(self, results: List[Dict], **info):
@@ -422,8 +422,8 @@ Here is an example:
 
 .. code-block:: python
 
-    from ray.util.sgd import v2 as sgd
-    from ray.util.sgd.v2 import SGDCallback, Trainer
+    from ray import sgd
+    from ray.sgd import SGDCallback, Trainer
     from typing import List, Dict
 
     import torch
@@ -477,8 +477,8 @@ The latest saved checkpoint can be accessed through the ``Trainer``'s
 
 .. code-block:: python
 
-    from ray.util.sgd import v2 as sgd
-    from ray.util.sgd.v2 import Trainer
+    from ray import sgd
+    from sgd import Trainer
 
     def train_func(config):
         model = 0 # This should be replaced with a real model.
@@ -519,8 +519,8 @@ As an example, to disable writing checkpoints to disk:
 .. code-block:: python
     :emphasize-lines: 8,12
 
-    from ray.util.sgd import v2 as sgd
-    from ray.util.sgd.v2 import CheckpointStrategy, Trainer
+    from ray import sgd
+    from sgd import CheckpointStrategy, Trainer
 
     def train_func():
         for epoch in range(3):
@@ -550,8 +550,8 @@ Checkpoints can be loaded into the training function in 2 steps:
 
 .. code-block:: python
 
-    from ray.util.sgd import v2 as sgd
-    from ray.util.sgd.v2 import Trainer
+    from ray import sgd
+    from sgd import Trainer
 
     def train_func(config):
         checkpoint = sgd.load_checkpoint() or {}
@@ -662,8 +662,8 @@ produce an object ("Trainable") that will be passed to Ray Tune.
 
 .. code-block:: python
 
-    from ray.util.sgd import v2 as sgd
-    from ray.util.sgd.v2 import Trainer
+    from ray import sgd
+    from sgd import Trainer
 
     def train_func(config):
         # In this example, nothing is expected to change over epochs,
@@ -704,8 +704,8 @@ A couple caveats:
 .. code-block:: python
 
     from ray import tune
-    from ray.util.sgd import v2 as sgd
-    from ray.util.sgd.v2 import Trainer
+    from ray import sgd
+    from sgd import Trainer
 
     def train_func(config):
         # In this example, nothing is expected to change over epochs,
diff --git a/python/ray/setup-dev.py b/python/ray/setup-dev.py
index 9873a2cc6..81a0a0aca 100755
--- a/python/ray/setup-dev.py
+++ b/python/ray/setup-dev.py
@@ -74,6 +74,7 @@ if __name__ == "__main__":
 
     do_link("rllib", force=args.yes, local_path="../../../rllib")
     do_link("tune", force=args.yes)
+    do_link("sgd", force=args.yes)
     do_link("autoscaler", force=args.yes)
     do_link("ray_operator", force=args.yes)
     do_link("cloudpickle", force=args.yes)
diff --git a/python/ray/sgd/__init__.py b/python/ray/sgd/__init__.py
new file mode 100644
index 000000000..c5d4677aa
--- /dev/null
+++ b/python/ray/sgd/__init__.py
@@ -0,0 +1,2 @@
+from ray.util.sgd.v2 import *  # noqa: F401, F403
+from ray.util.sgd.v2.callbacks import JsonLoggerCallback, TBXLoggerCallback  # noqa: E501, F401, F403
diff --git a/python/ray/util/sgd/torch/torch_trainer.py b/python/ray/util/sgd/torch/torch_trainer.py
index 24258290f..d5bda476e 100644
--- a/python/ray/util/sgd/torch/torch_trainer.py
+++ b/python/ray/util/sgd/torch/torch_trainer.py
@@ -10,8 +10,6 @@ import torch
 import torch.distributed as dist
 
 import ray
-from ray.tune import PlacementGroupFactory, Trainable
-from ray.tune.utils.util import merge_dicts
 from ray.util import log_once
 from ray.util.annotations import PublicAPI
 from ray.util.sgd.torch.worker_group import LocalWorkerGroup, \
@@ -20,6 +18,20 @@ from ray.util.sgd.utils import NUM_SAMPLES, BATCH_SIZE
 from ray.util.sgd.torch.constants import VALID_SCHEDULER_STEP, NCCL_TIMEOUT_S
 from ray.util.sgd.data import Dataset
 
+try:
+    from ray.tune import Trainable
+    from ray.tune import PlacementGroupFactory
+    from ray.tune.utils.util import merge_dicts
+    TUNE_INSTALLED = True
+except ImportError:
+    TUNE_INSTALLED = False
+    Trainable = PlacementGroupFactory = object
+
+    def noop():
+        return
+
+    merge_dicts = noop
+
 logger = logging.getLogger(__name__)
 
 
@@ -652,6 +664,9 @@ class TorchTrainer:
                 training epoch for each tune iteration.
 
         """
+        if not TUNE_INSTALLED:
+            raise RuntimeError("Please install `ray[tune]` to use the Tune "
+                               "integration.")
         if override_tune_step is not None:
             callback_args = inspect.signature(override_tune_step)
             if not len(callback_args.parameters) == 2: