[AIR/Train] Move ray.air.train to ray.train (#25570)

This commit is contained in:
Amog Kamsetty 2022-06-08 21:34:18 -07:00 committed by GitHub
parent 836b08597f
commit 1316a2d05e
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
100 changed files with 426 additions and 387 deletions

View file

@ -7,7 +7,7 @@
- rm -rf ./python/ray/thirdparty_files; rm -rf ./python/ray/pickle5_files; ./ci/ci.sh build
- pip install -Ur ./python/requirements_ml_docker.txt
- ./ci/env/env_info.sh
- bazel test --config=ci $(./ci/run/bazel_export_options) --build_tests_only --test_tag_filters=gpu,gpu_only python/ray/train/...
- bazel test --config=ci $(./ci/run/bazel_export_options) --build_tests_only --test_tag_filters=gpu,gpu_only,-ray_air python/ray/train/...
- label: ":tv: :database: :steam_locomotive: Datasets Train Integration GPU Tests and Examples (Python 3.7)"
conditions: ["RAY_CI_TRAIN_AFFECTED"]

View file

@ -4,6 +4,7 @@
- cleanup() { if [ "${BUILDKITE_PULL_REQUEST}" = "false" ]; then ./ci/build/upload_build_info.sh; fi }; trap cleanup EXIT
- DATA_PROCESSING_TESTING=1 INSTALL_HOROVOD=1 ./ci/env/install-dependencies.sh
- bazel test --config=ci $(./ci/run/bazel_export_options) --build_tests_only --test_tag_filters=-gpu,-needs_credentials python/ray/air/...
- bazel test --config=ci $(./ci/run/bazel_export_options) --build_tests_only --test_tag_filters=ray_air,-gpu_only,-gpu,-needs_credentials python/ray/train/...
- label: ":brain: RLlib: Learning discr. actions TF2-static-graph"
conditions: ["RAY_CI_RLLIB_AFFECTED"]
@ -300,14 +301,14 @@
commands:
- cleanup() { if [ "${BUILDKITE_PULL_REQUEST}" = "false" ]; then ./ci/build/upload_build_info.sh; fi }; trap cleanup EXIT
- TRAIN_TESTING=1 INSTALL_HOROVOD=1 ./ci/env/install-dependencies.sh
- bazel test --config=ci $(./ci/run/bazel_export_options) --build_tests_only --test_tag_filters=-gpu_only,-minimal,-tune python/ray/train/...
- bazel test --config=ci $(./ci/run/bazel_export_options) --build_tests_only --test_tag_filters=-gpu_only,-minimal,-tune,-ray_air python/ray/train/...
- label: ":steam_locomotive: :octopus: Train + Tune tests and examples"
conditions: ["RAY_CI_TRAIN_AFFECTED"]
commands:
- cleanup() { if [ "${BUILDKITE_PULL_REQUEST}" = "false" ]; then ./ci/build/upload_build_info.sh; fi }; trap cleanup EXIT
- TRAIN_TESTING=1 TUNE_TESTING=1 ./ci/env/install-dependencies.sh
- bazel test --config=ci $(./ci/run/bazel_export_options) --build_tests_only --test_tag_filters=tune,-gpu_only python/ray/train/...
- bazel test --config=ci $(./ci/run/bazel_export_options) --build_tests_only --test_tag_filters=tune,-gpu_only,-ray_air python/ray/train/...
- label: ":octopus: Tune/Modin/Dask tests and examples. Python 3.7"
conditions: ["RAY_CI_TUNE_AFFECTED"]

View file

@ -443,11 +443,12 @@
- ./ci/env/install-dependencies.sh
- bazel test --config=ci $(./ci/run/bazel_export_options) --build_tests_only --test_tag_filters=soft_imports python/ray/tune/...
# Test to see if Train can be used without torch, tf, etc. installed
- label: ":steam_locomotive: Train minimal install"
conditions: ["RAY_CI_TRAIN_AFFECTED"]
commands:
- cleanup() { if [ "${BUILDKITE_PULL_REQUEST}" = "false" ]; then ./ci/build/upload_build_info.sh; fi }; trap cleanup EXIT
- ./ci/env/install-minimal.sh
- TRAIN_MINIMAL_INSTALL=1 ./ci/env/install-minimal.sh
- ./ci/env/env_info.sh
- python ./ci/env/check_minimal_install.py
- bazel test --config=ci $(./ci/run/bazel_export_options) --build_tests_only --test_tag_filters=minimal python/ray/train/...

View file

@ -18,6 +18,9 @@ DEFAULT_BLACKLIST = [
"opencensus",
"prometheus_client",
"smart_open",
"torch",
"tensorflow",
"jax",
]

View file

@ -35,3 +35,9 @@ eval "${WORKSPACE_DIR}/ci/ci.sh build"
python -m pip install -U \
pytest==5.4.3 \
numpy
# Train requirements.
# TODO: make this dynamic
if [ "${TRAIN_MINIMAL_INSTALL-}" = 1 ]; then
python -m pip install -U "ray[tune]"
fi

View file

@ -90,7 +90,7 @@ trainer.fit()
# __config_1__
import ray
from ray.air.train.integrations.torch import TorchTrainer
from ray.train.torch import TorchTrainer
from ray.air.config import DatasetConfig
train_ds = ray.data.range_tensor(1000)
@ -118,7 +118,7 @@ print(my_trainer.get_dataset_config())
# __config_2__
import ray
from ray.air.train.integrations.torch import TorchTrainer
from ray.train.torch import TorchTrainer
from ray.air.config import DatasetConfig
train_ds = ray.data.range_tensor(1000)
@ -144,7 +144,7 @@ print(my_trainer.get_dataset_config())
import ray
from ray import train
from ray.data import Dataset
from ray.air.train.integrations.torch import TorchTrainer
from ray.train.torch import TorchTrainer
from ray.air.config import DatasetConfig
@ -175,7 +175,7 @@ my_trainer.fit()
import ray
from ray import train
from ray.data import DatasetPipeline
from ray.air.train.integrations.torch import TorchTrainer
from ray.train.torch import TorchTrainer
from ray.air.config import DatasetConfig

View file

@ -21,7 +21,7 @@ preprocessor = StandardScaler(columns=columns_to_scale)
# __air_preprocessors_end__
# __air_trainer_start__
from ray.air.train.integrations.xgboost import XGBoostTrainer
from ray.train.xgboost import XGBoostTrainer
num_workers = 2
use_gpu = False

View file

@ -47,7 +47,7 @@ print(batch_transformed)
# __trainer_start__
import ray
from ray.air.train.integrations.xgboost import XGBoostTrainer
from ray.train.xgboost import XGBoostTrainer
from ray.air.preprocessors import MinMaxScaler
train_dataset = ray.data.from_items([{"x": x, "y": 2 * x} for x in range(0, 32, 3)])

View file

@ -28,7 +28,7 @@ import torch
from torch import nn
from torch.utils.data import DataLoader
import ray.train as train
from ray.air.train.integrations.torch import TorchTrainer
from ray.train.torch import TorchTrainer
# Define model
class NeuralNetwork(nn.Module):

View file

@ -18,7 +18,7 @@ from tensorflow.keras.callbacks import Callback
import ray.train as train
from ray.train.tensorflow import prepare_dataset_shard
from ray.air.train.integrations.tensorflow import TensorflowTrainer
from ray.train.tensorflow import TensorflowTrainer
def build_model() -> tf.keras.Model:

View file

@ -25,7 +25,7 @@ preprocessor = StandardScaler(columns=columns_to_scale)
# __air_xgb_train_start__
from ray.air.train.integrations.xgboost import XGBoostTrainer
from ray.train.xgboost import XGBoostTrainer
# XGBoost specific params
params = {

View file

@ -72,7 +72,7 @@
"import ray\n",
"from ray import tune\n",
"from ray.air import RunConfig\n",
"from ray.air.train.integrations.xgboost import XGBoostTrainer\n",
"from ray.train.xgboost import XGBoostTrainer\n",
"from ray.tune.tune_config import TuneConfig\n",
"from ray.tune.tuner import Tuner"
]

View file

@ -797,7 +797,7 @@
},
"outputs": [],
"source": [
"from ray.air.train.integrations.huggingface import HuggingFaceTrainer\n",
"from ray.train.huggingface import HuggingFaceTrainer\n",
"from ray.air import RunConfig\n",
"from ray.tune.integration.mlflow import MLflowLoggerCallback\n",
"\n",
@ -1458,7 +1458,7 @@
},
"outputs": [],
"source": [
"from ray.air.train.integrations.huggingface import load_checkpoint\n",
"from ray.train.huggingface import load_checkpoint\n",
"\n",
"hf_trainer = load_checkpoint(\n",
" checkpoint=result.checkpoint,\n",

View file

@ -53,7 +53,7 @@
"from ray.air.predictors.integrations.lightgbm import LightGBMPredictor\n",
"from ray.air.preprocessors.chain import Chain\n",
"from ray.air.preprocessors.encoder import Categorizer\n",
"from ray.air.train.integrations.lightgbm import LightGBMTrainer\n",
"from ray.train.lightgbm import LightGBMTrainer\n",
"from ray.data.dataset import Dataset\n",
"from ray.air.result import Result\n",
"from ray.air.preprocessors import StandardScaler\n",

View file

@ -62,7 +62,7 @@
"from ray.air import Checkpoint\n",
"from ray.air.config import RunConfig\n",
"from ray.air.predictors.integrations.rl.rl_predictor import RLPredictor\n",
"from ray.air.train.integrations.rl.rl_trainer import RLTrainer\n",
"from ray.train.rl.rl_trainer import RLTrainer\n",
"from ray.air.result import Result\n",
"from ray.rllib.agents.marwil import BCTrainer\n",
"from ray.tune.tuner import Tuner"

View file

@ -62,7 +62,7 @@
"from ray.air import Checkpoint\n",
"from ray.air.config import RunConfig\n",
"from ray.air.predictors.integrations.rl.rl_predictor import RLPredictor\n",
"from ray.air.train.integrations.rl.rl_trainer import RLTrainer\n",
"from ray.train.rl.rl_trainer import RLTrainer\n",
"from ray.air.result import Result\n",
"from ray.rllib.agents.marwil import BCTrainer\n",
"from ray.tune.tuner import Tuner"

View file

@ -52,7 +52,7 @@
"\n",
"from ray.air.checkpoint import Checkpoint\n",
"from ray.air.config import RunConfig\n",
"from ray.air.train.integrations.rl.rl_trainer import RLTrainer\n",
"from ray.train.rl.rl_trainer import RLTrainer\n",
"from ray.air.predictors.integrations.rl.rl_predictor import RLPredictor\n",
"from ray.air.result import Result\n",
"from ray.serve.model_wrappers import ModelWrapperDeployment\n",

View file

@ -58,7 +58,7 @@
"from ray.air.predictors.integrations.sklearn import SklearnPredictor\n",
"from ray.air.preprocessors import Chain, OrdinalEncoder, StandardScaler\n",
"from ray.air.result import Result\n",
"from ray.air.train.integrations.sklearn import SklearnTrainer\n",
"from ray.train.sklearn import SklearnTrainer\n",
"\n",
"\n",
"from sklearn.datasets import load_breast_cancer\n",

View file

@ -748,7 +748,7 @@
}
],
"source": [
"from ray.air.train.integrations.tensorflow import TensorflowTrainer\n",
"from ray.train.tensorflow import TensorflowTrainer\n",
"\n",
"trainer = TensorflowTrainer(\n",
" train_loop_per_worker=train_loop_per_worker,\n",

View file

@ -410,7 +410,7 @@
}
],
"source": [
"from ray.air.train.integrations.torch import TorchTrainer\n",
"from ray.train.torch import TorchTrainer\n",
"\n",
"trainer = TorchTrainer(\n",
" train_loop_per_worker=train_loop_per_worker,\n",

View file

@ -1235,7 +1235,7 @@
}
],
"source": [
"from ray.air.train.integrations.torch import TorchTrainer\n",
"from ray.train.torch import TorchTrainer\n",
"from ray.air.predictors.integrations.torch import TorchPredictor\n",
"from ray.air import Checkpoint\n",
"from ray import serve\n",

View file

@ -48,7 +48,7 @@
"\n",
"from ray.air import RunConfig\n",
"from ray.air.result import Result\n",
"from ray.air.train.integrations.xgboost import XGBoostTrainer\n",
"from ray.train.xgboost import XGBoostTrainer\n",
"from ray.tune.integration.comet import CometLoggerCallback\n",
"from sklearn.datasets import load_breast_cancer"
]

View file

@ -48,7 +48,7 @@
"\n",
"from ray.air import RunConfig\n",
"from ray.air.result import Result\n",
"from ray.air.train.integrations.xgboost import XGBoostTrainer\n",
"from ray.train.xgboost import XGBoostTrainer\n",
"from ray.tune.integration.wandb import WandbLoggerCallback\n",
"from sklearn.datasets import load_breast_cancer"
]

View file

@ -66,7 +66,7 @@
"import ray\n",
"from ray.air.batch_predictor import BatchPredictor\n",
"from ray.air.predictors.integrations.xgboost import XGBoostPredictor\n",
"from ray.air.train.integrations.xgboost import XGBoostTrainer\n",
"from ray.train.xgboost import XGBoostTrainer\n",
"from ray.data.dataset import Dataset\n",
"from ray.air.result import Result\n",
"from ray.air.preprocessors import StandardScaler\n",

View file

@ -29,42 +29,42 @@ Preprocessors
Trainer
~~~~~~~
.. autoclass:: ray.air.trainer.Trainer
.. autoclass:: ray.train.trainer.BaseTrainer
:members:
.. automodule:: ray.air.train.integrations.xgboost
.. automodule:: ray.train.xgboost
:members:
:show-inheritance:
.. automodule:: ray.air.train.integrations.lightgbm
.. automodule:: ray.train.lightgbm
:members:
:show-inheritance:
.. automodule:: ray.air.train.integrations.tensorflow
.. automodule:: ray.train.tensorflow
:members:
:show-inheritance:
.. automodule:: ray.air.train.integrations.torch
.. automodule:: ray.train.torch
:members:
:show-inheritance:
.. automodule:: ray.air.train.integrations.horovod
.. automodule:: ray.train.horovod
:members:
:show-inheritance:
.. automodule:: ray.air.train.integrations.huggingface
.. automodule:: ray.train.huggingface
:members:
:show-inheritance:
.. automodule:: ray.air.train.integrations.sklearn
.. automodule:: ray.train.sklearn
:members:
:show-inheritance:
.. autoclass:: ray.air.train.data_parallel_trainer.DataParallelTrainer
.. autoclass:: ray.train.data_parallel_trainer.DataParallelTrainer
:members:
:show-inheritance:
.. autoclass:: ray.air.train.gbdt_trainer.GBDTTrainer
.. autoclass:: ray.train.gbdt_trainer.GBDTTrainer
:members:
:show-inheritance:

View file

@ -63,7 +63,7 @@ The same logic is applicable to other integrations as well.
Trainer
~~~~~~~
The journey of the ``Preprocessor`` starts with the :class:`Trainer <ray.air.trainer.Trainer>`.
The journey of the ``Preprocessor`` starts with the :class:`Trainer <ray.train.trainer.BaseTrainer>`.
If the ``Trainer`` is instantiated with a ``Preprocessor``, then the following logic will be executed when ``Trainer.fit()`` is called:
#. If a ``"train"`` ``Dataset`` is passed in, then the ``Preprocessor`` will call ``fit()`` on it.

View file

@ -31,6 +31,7 @@ TorchConfig
~~~~~~~~~~~
.. autoclass:: ray.train.torch.TorchConfig
:noindex:
.. _train-api-tensorflow-config:
@ -38,6 +39,7 @@ TensorflowConfig
~~~~~~~~~~~~~~~~
.. autoclass:: ray.train.tensorflow.TensorflowConfig
:noindex:
.. _train-api-horovod-config:
@ -45,6 +47,7 @@ HorovodConfig
~~~~~~~~~~~~~
.. autoclass:: ray.train.horovod.HorovodConfig
:noindex:
.. _train-api-backend-interfaces:
@ -170,6 +173,7 @@ train.torch.prepare_model
~~~~~~~~~~~~~~~~~~~~~~~~~
.. autofunction:: ray.train.torch.prepare_model
:noindex:
.. _train-api-torch-prepare-data-loader:
@ -177,17 +181,20 @@ train.torch.prepare_data_loader
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
.. autofunction:: ray.train.torch.prepare_data_loader
:noindex:
train.torch.prepare_optimizer
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
.. autofunction:: ray.train.torch.prepare_optimizer
:noindex:
train.torch.backward
~~~~~~~~~~~~~~~~~~~~
.. autofunction:: ray.train.torch.backward
:noindex:
.. _train-api-torch-get-device:
@ -195,11 +202,13 @@ train.torch.get_device
~~~~~~~~~~~~~~~~~~~~~~
.. autofunction:: ray.train.torch.get_device
:noindex:
train.torch.enable_reproducibility
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
.. autofunction:: ray.train.torch.enable_reproducibility
:noindex:
.. _train-api-torch-worker-profiler:
@ -207,12 +216,14 @@ train.torch.accelerate
~~~~~~~~~~~~~~~~~~~~~~
.. autofunction:: ray.train.torch.accelerate
:noindex:
train.torch.TorchWorkerProfiler
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
.. autoclass:: ray.train.torch.TorchWorkerProfiler
:members:
:noindex:
.. _train-api-tensorflow-utils:
@ -222,4 +233,5 @@ TensorFlow Training Function Utilities
train.tensorflow.prepare_dataset_shard
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
.. autofunction:: ray.train.tensorflow.prepare_dataset_shard
.. autofunction:: ray.train.tensorflow.prepare_dataset_shard
:noindex:

View file

@ -42,7 +42,7 @@
"import ray\n",
"from ray import train\n",
"from ray import tune\n",
"from ray.air.train.integrations.horovod import HorovodTrainer\n",
"from ray.train.horovod import HorovodTrainer\n",
"from ray.tune.tune_config import TuneConfig\n",
"from ray.tune.tuner import Tuner\n",
"\n",

View file

@ -177,14 +177,6 @@ py_test(
deps = [":ml_lib"]
)
py_test(
name = "test_data_parallel_trainer",
size = "medium",
srcs = ["tests/test_data_parallel_trainer.py"],
tags = ["team:ml", "exclusive"],
deps = [":ml_lib"]
)
py_test(
name = "test_dataset_config",
size = "medium",
@ -201,14 +193,6 @@ py_test(
deps = [":ml_lib"]
)
py_test(
name = "test_horovod_trainer",
size = "large",
srcs = ["tests/test_horovod_trainer.py"],
tags = ["team:ml", "exclusive"],
deps = [":ml_lib"]
)
py_test(
name = "test_huggingface_predictor",
size = "medium",
@ -217,14 +201,6 @@ py_test(
deps = [":ml_lib"]
)
py_test(
name = "test_huggingface_trainer",
size = "medium",
srcs = ["tests/test_huggingface_trainer.py"],
tags = ["team:ml", "exclusive"],
deps = [":ml_lib"]
)
py_test(
name = "test_lightgbm_predictor",
size = "small",
@ -233,14 +209,6 @@ py_test(
deps = [":ml_lib"]
)
py_test(
name = "test_lightgbm_trainer",
size = "medium",
srcs = ["tests/test_lightgbm_trainer.py"],
tags = ["team:ml", "exclusive"],
deps = [":ml_lib"]
)
py_test(
name = "test_predictor",
size = "small",
@ -273,14 +241,6 @@ py_test(
deps = [":ml_lib"]
)
py_test(
name = "test_sklearn_trainer",
size = "medium",
srcs = ["tests/test_sklearn_trainer.py"],
tags = ["team:ml", "exclusive"],
deps = [":ml_lib"]
)
py_test(
name = "test_tensorflow_predictor",
size = "small",
@ -289,14 +249,6 @@ py_test(
deps = [":ml_lib"]
)
py_test(
name = "test_tensorflow_trainer",
size = "medium",
srcs = ["tests/test_tensorflow_trainer.py"],
tags = ["team:ml", "exclusive"],
deps = [":ml_lib"]
)
py_test(
name = "test_tensorflow_utils",
size = "small",
@ -313,14 +265,6 @@ py_test(
deps = [":ml_lib"]
)
py_test(
name = "test_torch_trainer",
size = "medium",
srcs = ["tests/test_torch_trainer.py"],
tags = ["team:ml", "exclusive"],
deps = [":ml_lib"]
)
py_test(
name = "test_torch_utils",
size = "small",
@ -329,14 +273,6 @@ py_test(
deps = [":ml_lib"]
)
py_test(
name = "test_trainer",
size = "medium",
srcs = ["tests/test_trainer.py"],
tags = ["team:ml", "exclusive"],
deps = [":ml_lib"]
)
py_test(
name = "test_xgboost_predictor",
size = "small",
@ -345,14 +281,6 @@ py_test(
deps = [":ml_lib"]
)
py_test(
name = "test_xgboost_trainer",
size = "medium",
srcs = ["tests/test_xgboost_trainer.py"],
tags = ["team:ml", "exclusive"],
deps = [":ml_lib"]
)
# This is a dummy test dependency that causes the above tests to be
# re-run if any of these files changes.
py_library(

View file

@ -4,14 +4,14 @@ PREPROCESSOR_KEY = "_preprocessor"
# Key to denote the model in the checkpoint dict.
MODEL_KEY = "model"
# Key to denote which dataset is the evaluation dataset.
# Only used in trainers which do not support multiple
# evaluation datasets.
EVALUATION_DATASET_KEY = "evaluation"
# Key to denote which dataset is the training dataset.
# This is the dataset that the preprocessor is fit on.
TRAIN_DATASET_KEY = "train"
# Key to denote all user-specified auxiliary datasets in DatasetConfig.
WILDCARD_KEY = "*"
# Key to denote which dataset is the evaluation dataset.
# Only used in trainers which do not support multiple
# evaluation datasets.
EVALUATION_DATASET_KEY = "evaluation"

View file

@ -5,11 +5,11 @@
# __custom_trainer_begin__
import torch
from ray.air.trainer import Trainer
from ray.train.trainer import BaseTrainer
from ray import tune
class MyPytorchTrainer(Trainer):
class MyPytorchTrainer(BaseTrainer):
def setup(self):
self.model = torch.nn.Linear(1, 1)
self.optimizer = torch.optim.SGD(self.model.parameters(), lr=0.1)

View file

@ -10,7 +10,7 @@ from torchvision import datasets, transforms
import ray
from ray import train
from ray.air.train.integrations.horovod import HorovodTrainer
from ray.train.horovod import HorovodTrainer
def metric_average(val, name):

View file

@ -5,7 +5,7 @@ import torch
import ray
from ray import train
from ray import tune
from ray.air.train.integrations.horovod import HorovodTrainer
from ray.train.horovod import HorovodTrainer
from ray.tune.tune_config import TuneConfig
from ray.tune.tuner import Tuner

View file

@ -17,7 +17,7 @@ import torch
import ray
import ray.data
from ray.air.train.integrations.huggingface import HuggingFaceTrainer
from ray.train.huggingface import HuggingFaceTrainer
from ray.air.predictors.integrations.huggingface import HuggingFacePredictor
from ray.air.batch_predictor import BatchPredictor

View file

@ -8,7 +8,7 @@ from torchvision import datasets
from torchvision.transforms import ToTensor
import ray.train as train
from ray.air.train.integrations.torch import TorchTrainer
from ray.train.torch import TorchTrainer
# Download training data from open datasets.
training_data = datasets.FashionMNIST(

View file

@ -12,7 +12,7 @@ from ray.air import train_test_split
from ray.air.batch_predictor import BatchPredictor
from ray.air.predictors.integrations.torch import TorchPredictor
from ray.air.result import Result
from ray.air.train.integrations.torch import TorchTrainer
from ray.train.torch import TorchTrainer
def get_datasets(a=5, b=10, size=1000, split=0.8) -> Tuple[Dataset]:

View file

@ -4,7 +4,7 @@ import numpy as np
import torch
import torch.nn as nn
import ray.train as train
from ray.air.train.integrations.torch import TorchTrainer
from ray.train.torch import TorchTrainer
class LinearDataset(torch.utils.data.Dataset):

View file

@ -2,7 +2,7 @@ import argparse
import ray
from ray import tune
from ray.air.train.integrations.torch import TorchTrainer
from ray.train.torch import TorchTrainer
from ray.tune.tune_config import TuneConfig
from ray.tune.tuner import Tuner

View file

@ -13,7 +13,7 @@ from torch_geometric.loader import NeighborSampler
from torch_geometric.nn import SAGEConv
from ray import train
from ray.air.train.integrations.torch import TorchTrainer
from ray.train.torch import TorchTrainer
from torch_geometric.transforms import RandomNodeSplit

View file

@ -10,7 +10,7 @@ import ray
import ray.train as train
from ray.data import Dataset
from ray.train.tensorflow import prepare_dataset_shard
from ray.air.train.integrations.tensorflow import TensorflowTrainer
from ray.train.tensorflow import TensorflowTrainer
from ray.air.predictors.integrations.tensorflow import TensorflowPredictor
from ray.air.result import Result

View file

@ -11,7 +11,7 @@ import tensorflow as tf
from tensorflow.keras.callbacks import Callback
import ray.train as train
from ray.air.train.integrations.tensorflow import TensorflowTrainer
from ray.train.tensorflow import TensorflowTrainer
class TrainCheckpointReportCallback(Callback):

View file

@ -2,7 +2,7 @@ import argparse
import ray
from ray import tune
from ray.air.train.integrations.tensorflow import TensorflowTrainer
from ray.train.tensorflow import TensorflowTrainer
from ray.air.examples.tf.tensorflow_mnist_example import train_func
from ray.tune.tune_config import TuneConfig

View file

@ -6,7 +6,7 @@ import lightgbm
from ray.air.checkpoint import Checkpoint
from ray.air.predictor import Predictor, DataBatchType
from ray.air.train.integrations.lightgbm import load_checkpoint
from ray.train.lightgbm import load_checkpoint
if TYPE_CHECKING:
from ray.air.preprocessor import Preprocessor

View file

@ -4,7 +4,7 @@ import numpy as np
import pandas as pd
from ray.air.checkpoint import Checkpoint
from ray.air.predictor import Predictor, DataBatchType
from ray.air.train.integrations.rl import load_checkpoint
from ray.train.rl import load_checkpoint
from ray.rllib.policy.policy import Policy
from ray.rllib.utils.typing import EnvType

View file

@ -6,7 +6,7 @@ from joblib import parallel_backend
from ray.air.checkpoint import Checkpoint
from ray.air.predictor import Predictor, DataBatchType
from ray.air.train.integrations.sklearn import load_checkpoint
from ray.train.sklearn import load_checkpoint
from ray.air._internal.sklearn_utils import set_cpu_params
from ray.util.joblib import register_ray

View file

@ -5,7 +5,7 @@ import tensorflow as tf
from ray.air.predictor import Predictor, DataBatchType
from ray.air.checkpoint import Checkpoint
from ray.air.train.data_parallel_trainer import _load_checkpoint
from ray.train.data_parallel_trainer import _load_checkpoint
from ray.air._internal.tensorflow_utils import convert_pandas_to_tf_tensor
if TYPE_CHECKING:

View file

@ -6,7 +6,7 @@ import torch
from ray.air.predictor import Predictor, DataBatchType
from ray.air.checkpoint import Checkpoint
from ray.air.train.integrations.torch import load_checkpoint
from ray.train.torch import load_checkpoint
from ray.air._internal.torch_utils import convert_pandas_to_torch_tensor
if TYPE_CHECKING:

View file

@ -6,7 +6,7 @@ import xgboost
from ray.air.checkpoint import Checkpoint
from ray.air.predictor import Predictor, DataBatchType
from ray.air.train.integrations.xgboost import load_checkpoint
from ray.train.xgboost import load_checkpoint
if TYPE_CHECKING:
from ray.air.preprocessor import Preprocessor

View file

@ -3,7 +3,7 @@ import pytest
import ray
from ray.air import Checkpoint
from ray.air.config import ScalingConfigDataClass
from ray.air.trainer import Trainer
from ray.train import BaseTrainer
from ray.air.preprocessor import Preprocessor
from ray.air._internal.config import (
ensure_only_allowed_dataclass_keys_updated,
@ -11,7 +11,7 @@ from ray.air._internal.config import (
)
class DummyTrainer(Trainer):
class DummyTrainer(BaseTrainer):
def training_loop(self) -> None:
pass

View file

@ -7,7 +7,7 @@ from ray.data import Dataset, DatasetPipeline
from ray.air.config import DatasetConfig
from ray import train
from ray.air.train.data_parallel_trainer import DataParallelTrainer
from ray.train.data_parallel_trainer import DataParallelTrainer
from ray.air.preprocessors import BatchMapper

View file

@ -9,7 +9,7 @@ import tempfile
from ray.air.predictors.integrations.rl.rl_predictor import RLPredictor
from ray.air.preprocessor import Preprocessor
from ray.air.checkpoint import Checkpoint
from ray.air.train.integrations.rl import RLTrainer
from ray.train.rl import RLTrainer
from ray.rllib.agents import Trainer
from ray.rllib.policy import Policy

View file

@ -1,4 +0,0 @@
from ray.air.constants import TRAIN_DATASET_KEY
from ray.air.trainer import Trainer
__all__ = ["TRAIN_DATASET_KEY", "Trainer"]

View file

@ -1,122 +0,0 @@
from typing import Optional, Dict, List, Union, TYPE_CHECKING
from ray.actor import ActorHandle
from ray.air.config import DatasetConfig
if TYPE_CHECKING:
from ray.data import Dataset, DatasetPipeline
from ray.air.preprocessor import Preprocessor
class _DataParallelIngestSpec:
"""Implements the execution of DatasetConfig preprocessing and ingest."""
def __init__(self, dataset_config: Dict[str, DatasetConfig]):
"""Construct an ingest spec.
Args:
dataset_config: The merged default + user config dict for the trainer
with all defaults filled in.
"""
self.dataset_config = dataset_config
self.preprocessed_datasets: Optional[Dict[str, "Dataset"]] = None
self.preprocessor: Optional["Preprocessor"] = None
def preprocess_datasets(
self, prep: "Preprocessor", datasets: Dict[str, "Dataset"]
) -> Dict[str, "Dataset"]:
"""Preprocess the given datasets.
This will be called prior to `get_dataset_shards()`.
Args:
prep: The preprocessor to fit, if needed.
dataset: The datasets to fit and transform.
Returns:
Dict of transformed datasets.
"""
if prep:
ds_to_fit = None
for k, conf in self.dataset_config.items():
if k not in datasets:
assert not conf.required, "Missing dataset post-validation"
continue
if conf.fit:
ds_to_fit = datasets[k]
if ds_to_fit:
prep.fit(ds_to_fit)
new_datasets = {}
for key, dataset in datasets.items():
conf = self._config(key)
if conf.transform:
if conf.use_stream_api and conf.stream_window_size > 0:
# In windowed mode, preprocessor is applied in streaming way.
new_datasets[key] = dataset
else:
# Window size of infinity is treated same as bulk mode.
new_datasets[key] = prep.transform(dataset)
else:
new_datasets[key] = dataset
else:
new_datasets = datasets
self.preprocessed_datasets = new_datasets
self.preprocessor = prep
return new_datasets
def get_dataset_shards(
self, training_worker_handles: List[ActorHandle]
) -> List[Dict[str, Union["Dataset", "DatasetPipeline"]]]:
"""Get the shards to pass to training workers.
Note: this has to match the signature of DatasetSpec in legacy train.
Args:
training_worker_handles: Actor handles of the workers, which can be used
for locality-aware splitting.
Returns:
List of dataset shard dicts, one for each training worker.
"""
dataset_dict_splits = [{} for _ in range(len(training_worker_handles))]
for key, dataset in self.preprocessed_datasets.items():
config = self._config(key)
if config.use_stream_api:
if config.stream_window_size > 0:
dataset = dataset.window(
bytes_per_window=config.stream_window_size
).repeat()
# In windowed mode, we re-apply the preprocessor on each iteration.
if self.preprocessor:
prep = self.preprocessor.transform_batch
dataset = dataset.map_batches(prep, batch_format="pandas")
else:
# If the window size is infinity, the preprocessor is cached and
# we don't need to re-apply it each time.
dataset = dataset.repeat()
if config.global_shuffle:
dataset = dataset.random_shuffle_each_window()
if config.split:
dataset_splits = dataset.split(
len(training_worker_handles),
equal=True,
locality_hints=training_worker_handles,
)
else:
dataset_splits = [dataset] * len(training_worker_handles)
for i in range(len(dataset_splits)):
dataset_dict_splits[i][key] = dataset_splits[i]
return dataset_dict_splits
def _config(self, key: str) -> "DatasetConfig":
"""Get the dataset config for the given dataset name."""
if key in self.dataset_config:
return self.dataset_config[key]
return self.dataset_config["*"]

View file

@ -1,3 +0,0 @@
from ray.air.train.integrations.horovod.horovod_trainer import HorovodTrainer
__all__ = ["HorovodTrainer"]

View file

@ -1,3 +0,0 @@
from ray.air.train.integrations.rl.rl_trainer import RLTrainer, load_checkpoint
__all__ = ["RLTrainer", "load_checkpoint"]

View file

@ -1,6 +0,0 @@
from ray.air.train.integrations.tensorflow.tensorflow_trainer import (
TensorflowTrainer,
load_checkpoint,
)
__all__ = ["TensorflowTrainer", "load_checkpoint"]

View file

@ -1,3 +0,0 @@
from ray.air.train.integrations.torch.torch_trainer import TorchTrainer, load_checkpoint
__all__ = ["TorchTrainer", "load_checkpoint"]

View file

@ -9,7 +9,7 @@ import ray
from ray import train
from ray.air.preprocessors import Chain, BatchMapper
from ray.air.config import DatasetConfig
from ray.air.train.data_parallel_trainer import DataParallelTrainer
from ray.train.data_parallel_trainer import DataParallelTrainer
from ray.util.annotations import DeveloperAPI

View file

@ -112,6 +112,14 @@ py_test(
deps = [":train_lib"]
)
py_test(
name = "test_base_trainer",
size = "medium",
srcs = ["tests/test_base_trainer.py"],
tags = ["team:ml", "exclusive", "ray_air"],
deps = [":train_lib"]
)
py_test(
name = "test_callbacks",
size = "medium",
@ -120,6 +128,14 @@ py_test(
deps = [":train_lib"]
)
py_test(
name = "test_data_parallel_trainer",
size = "medium",
srcs = ["tests/test_data_parallel_trainer.py"],
tags = ["team:ml", "exclusive", "ray_air"],
deps = [":train_lib"]
)
py_test(
name = "test_examples",
size = "large",
@ -137,18 +153,34 @@ py_test(
)
py_test(
name = "test_minimal",
size = "small",
srcs = ["tests/test_minimal.py"],
tags = ["team:ml", "exclusive", "minimal"],
name = "test_horovod_trainer",
size = "large",
srcs = ["tests/test_horovod_trainer.py"],
tags = ["team:ml", "exclusive", "ray_air"],
deps = [":train_lib"]
)
py_test(
name = "test_session",
name = "test_huggingface_trainer",
size = "medium",
srcs = ["tests/test_huggingface_trainer.py"],
tags = ["team:ml", "exclusive", "ray_air"],
deps = [":train_lib"]
)
py_test(
name = "test_lightgbm_trainer",
size = "medium",
srcs = ["tests/test_lightgbm_trainer.py"],
tags = ["team:ml", "exclusive", "ray_air"],
deps = [":train_lib"]
)
py_test(
name = "test_minimal",
size = "small",
srcs = ["tests/test_session.py"],
tags = ["team:ml", "exclusive"],
srcs = ["tests/test_minimal.py"],
tags = ["team:ml", "exclusive", "minimal"],
deps = [":train_lib"]
)
@ -160,6 +192,22 @@ py_test(
deps = [":train_lib"]
)
py_test(
name = "test_session",
size = "small",
srcs = ["tests/test_session.py"],
tags = ["team:ml", "exclusive"],
deps = [":train_lib"]
)
py_test(
name = "test_sklearn_trainer",
size = "medium",
srcs = ["tests/test_sklearn_trainer.py"],
tags = ["team:ml", "exclusive", "ray_air"],
deps = [":train_lib"]
)
py_test(
name = "test_trainer",
size = "large",
@ -168,6 +216,22 @@ py_test(
deps = [":train_lib"]
)
py_test(
name = "test_tensorflow_trainer",
size = "medium",
srcs = ["tests/test_tensorflow_trainer.py"],
tags = ["team:ml", "exclusive", "ray_air"],
deps = [":train_lib"]
)
py_test(
name = "test_torch_trainer",
size = "medium",
srcs = ["tests/test_torch_trainer.py"],
tags = ["team:ml", "exclusive", "ray_air"],
deps = [":train_lib"]
)
py_test(
name = "test_tune",
size = "medium",
@ -184,7 +248,6 @@ py_test(
deps = [":train_lib"]
)
py_test(
name = "test_worker_group",
size = "medium",
@ -193,7 +256,13 @@ py_test(
deps = [":train_lib"]
)
py_test(
name = "test_xgboost_trainer",
size = "medium",
srcs = ["tests/test_xgboost_trainer.py"],
tags = ["team:ml", "exclusive", "ray_air"],
deps = [":train_lib"]
)
# This is a dummy test dependency that causes the above tests to be
# re-run if any of these files changes.

View file

@ -12,6 +12,9 @@ from ray.train.train_loop_utils import (
from ray.train.trainer import Trainer, TrainingIterator
from ray.util.ml_utils.checkpoint_manager import CheckpointStrategy
from ray.train.base_trainer import BaseTrainer
from ray.train.constants import TRAIN_DATASET_KEY
from ray._private.usage import usage_lib
usage_lib.record_library_usage("train")
@ -29,4 +32,6 @@ __all__ = [
"Trainer",
"world_rank",
"world_size",
"BaseTrainer",
"TRAIN_DATASET_KEY",
]

View file

@ -3,8 +3,11 @@ from typing import Optional, Union, Dict, Callable, List, TYPE_CHECKING
from ray.actor import ActorHandle
from ray.air.config import DatasetConfig
if TYPE_CHECKING:
from ray.data import Dataset, DatasetPipeline
from ray.air.preprocessor import Preprocessor
RayDataset = Union["Dataset", "DatasetPipeline"]
@ -91,3 +94,117 @@ class RayDatasetSpec:
f"the number of training workers: {len(training_worker_handles)}"
)
return splits
class DataParallelIngestSpec:
"""Implements the execution of DatasetConfig preprocessing and ingest."""
def __init__(self, dataset_config: Dict[str, DatasetConfig]):
"""Construct an ingest spec.
Args:
dataset_config: The merged default + user config dict for the trainer
with all defaults filled in.
"""
self.dataset_config = dataset_config
self.preprocessed_datasets: Optional[Dict[str, "Dataset"]] = None
self.preprocessor: Optional["Preprocessor"] = None
def preprocess_datasets(
self, prep: "Preprocessor", datasets: Dict[str, "Dataset"]
) -> Dict[str, "Dataset"]:
"""Preprocess the given datasets.
This will be called prior to `get_dataset_shards()`.
Args:
prep: The preprocessor to fit, if needed.
dataset: The datasets to fit and transform.
Returns:
Dict of transformed datasets.
"""
if prep:
ds_to_fit = None
for k, conf in self.dataset_config.items():
if k not in datasets:
assert not conf.required, "Missing dataset post-validation"
continue
if conf.fit:
ds_to_fit = datasets[k]
if ds_to_fit:
prep.fit(ds_to_fit)
new_datasets = {}
for key, dataset in datasets.items():
conf = self._config(key)
if conf.transform:
if conf.use_stream_api and conf.stream_window_size > 0:
# In windowed mode, preprocessor is applied in streaming way.
new_datasets[key] = dataset
else:
# Window size of infinity is treated same as bulk mode.
new_datasets[key] = prep.transform(dataset)
else:
new_datasets[key] = dataset
else:
new_datasets = datasets
self.preprocessed_datasets = new_datasets
self.preprocessor = prep
return new_datasets
def get_dataset_shards(
self, training_worker_handles: List[ActorHandle]
) -> List[Dict[str, Union["Dataset", "DatasetPipeline"]]]:
"""Get the shards to pass to training workers.
Note: this has to match the signature of DatasetSpec in legacy train.
Args:
training_worker_handles: Actor handles of the workers, which can be used
for locality-aware splitting.
Returns:
List of dataset shard dicts, one for each training worker.
"""
dataset_dict_splits = [{} for _ in range(len(training_worker_handles))]
for key, dataset in self.preprocessed_datasets.items():
config = self._config(key)
if config.use_stream_api:
if config.stream_window_size > 0:
dataset = dataset.window(
bytes_per_window=config.stream_window_size
).repeat()
# In windowed mode, we re-apply the preprocessor on each iteration.
if self.preprocessor:
prep = self.preprocessor.transform_batch
dataset = dataset.map_batches(prep, batch_format="pandas")
else:
# If the window size is infinity, the preprocessor is cached and
# we don't need to re-apply it each time.
dataset = dataset.repeat()
if config.global_shuffle:
dataset = dataset.random_shuffle_each_window()
if config.split:
dataset_splits = dataset.split(
len(training_worker_handles),
equal=True,
locality_hints=training_worker_handles,
)
else:
dataset_splits = [dataset] * len(training_worker_handles)
for i in range(len(dataset_splits)):
dataset_dict_splits[i][key] = dataset_splits[i]
return dataset_dict_splits
def _config(self, key: str) -> "DatasetConfig":
"""Get the dataset config for the given dataset name."""
if key in self.dataset_config:
return self.dataset_config[key]
return self.dataset_config["*"]

View file

@ -6,13 +6,12 @@ from typing import TYPE_CHECKING, Any, Callable, Dict, List, Optional, Type, Uni
import ray
from ray.util import PublicAPI
from ray.air.checkpoint import Checkpoint
from ray.air.constants import TRAIN_DATASET_KEY
from ray.train.constants import TRAIN_DATASET_KEY
from ray.air.config import (
RunConfig,
ScalingConfig,
ScalingConfigDataClass,
)
from ray.air.preprocessor import Preprocessor
from ray.air.result import Result
from ray.air._internal.config import (
ensure_only_allowed_dataclass_keys_updated,
@ -26,6 +25,7 @@ from ray.util.ml_utils.dict import merge_dicts
if TYPE_CHECKING:
from ray.data import Dataset
from ray.air.preprocessor import Preprocessor
# A type representing either a ray.data.Dataset or a function that returns a
# ray.data.Dataset and accepts no arguments.
@ -43,10 +43,10 @@ class TrainingFailedError(RuntimeError):
@DeveloperAPI
class Trainer(abc.ABC):
class BaseTrainer(abc.ABC):
"""Defines interface for distributed training on Ray.
Note: The base ``Trainer`` class cannot be instantiated directly. Only
Note: The base ``BaseTrainer`` class cannot be instantiated directly. Only
one of its subclasses can be used.
How does a trainer work?
@ -68,18 +68,18 @@ class Trainer(abc.ABC):
**How do I create a new Trainer?**
Subclass ``ray.train.Trainer``, and override the ``training_loop``
Subclass ``ray.train.BaseTrainer``, and override the ``training_loop``
method, and optionally ``setup``.
.. code-block:: python
import torch
from ray.air.train import Trainer
from ray.train import BaseTrainer
from ray import tune
class MyPytorchTrainer(Trainer):
class MyPytorchTrainer(BaseTrainer):
def setup(self):
self.model = torch.nn.Linear(1, 1)
self.optimizer = torch.optim.SGD(
@ -148,7 +148,7 @@ class Trainer(abc.ABC):
scaling_config: Optional[ScalingConfig] = None,
run_config: Optional[RunConfig] = None,
datasets: Optional[Dict[str, GenDataset]] = None,
preprocessor: Optional[Preprocessor] = None,
preprocessor: Optional["Preprocessor"] = None,
resume_from_checkpoint: Optional[Checkpoint] = None,
):
@ -162,7 +162,7 @@ class Trainer(abc.ABC):
def __new__(cls, *args, **kwargs):
"""Store the init args as attributes so this can be merged with Tune hparams."""
trainer = super(Trainer, cls).__new__(cls)
trainer = super(BaseTrainer, cls).__new__(cls)
parameters = inspect.signature(cls.__init__).parameters
parameters = list(parameters.keys())
# Remove self.
@ -301,9 +301,9 @@ class Trainer(abc.ABC):
Example:
.. code-block: python
from ray.air.trainer import Trainer
from ray.train.trainer import BaseTrainer
class MyTrainer(Trainer):
class MyTrainer(BaseTrainer):
def training_loop(self):
for epoch_idx in range(5):
...

View file

@ -6,6 +6,14 @@ try:
except ImportError:
TUNE_INSTALLED = False
from ray.air.constants import ( # noqa: F401
EVALUATION_DATASET_KEY,
MODEL_KEY,
PREPROCESSOR_KEY,
TRAIN_DATASET_KEY,
WILDCARD_KEY,
)
# Autofilled train.report() metrics. Keys should be consistent with Tune.
TIMESTAMP = "_timestamp"
TIME_THIS_ITER_S = "_time_this_iter_s"

View file

@ -14,17 +14,16 @@ from typing import (
import ray
from ray import tune
from ray.air.constants import (
MODEL_KEY,
PREPROCESSOR_KEY,
from ray.air.constants import MODEL_KEY, PREPROCESSOR_KEY
from ray.train.constants import (
TRAIN_DATASET_KEY,
WILDCARD_KEY,
)
from ray.air.trainer import Trainer
from ray.train.trainer import BaseTrainer
from ray.air.config import ScalingConfig, RunConfig, DatasetConfig
from ray.air.trainer import GenDataset
from ray.train.trainer import GenDataset
from ray.air.checkpoint import Checkpoint
from ray.air.train.data_parallel_ingest import _DataParallelIngestSpec
from ray.train._internal.dataset_spec import DataParallelIngestSpec
from ray.train import BackendConfig, TrainingIterator
from ray.train._internal.backend_executor import BackendExecutor
from ray.train._internal.checkpoint import TuneCheckpointManager
@ -59,7 +58,7 @@ class _DataParallelCheckpointManager(TuneCheckpointManager):
@DeveloperAPI
class DataParallelTrainer(Trainer):
class DataParallelTrainer(BaseTrainer):
"""A Trainer for data parallel training.
You should subclass this Trainer if your Trainer follows SPMD (single program,
@ -162,7 +161,7 @@ class DataParallelTrainer(Trainer):
.. code-block:: python
from ray.air.train.data_parallel_trainer import DataParallelTrainer
from ray.train.data_parallel_trainer import DataParallelTrainer
class MyDataParallelTrainer(DataParallelTrainer):
def __init__(self, *args, **kwargs):
@ -227,7 +226,7 @@ class DataParallelTrainer(Trainer):
TuneCheckpointManager
] = _DataParallelCheckpointManager
_scaling_config_allowed_keys = Trainer._scaling_config_allowed_keys + [
_scaling_config_allowed_keys = BaseTrainer._scaling_config_allowed_keys + [
"num_workers",
"num_cpus_per_worker",
"num_gpus_per_worker",
@ -267,7 +266,7 @@ class DataParallelTrainer(Trainer):
self._dataset_config = DatasetConfig.validated(
DatasetConfig.merge(self._dataset_config, dataset_config), datasets
)
self._ingest_spec = _DataParallelIngestSpec(
self._ingest_spec = DataParallelIngestSpec(
dataset_config=self._dataset_config,
)

View file

@ -1,15 +1,15 @@
from typing import TYPE_CHECKING, Dict, Tuple, Type, Any, Optional
import warnings
from ray.air.trainer import GenDataset
from ray.train.trainer import GenDataset
from ray.air.config import ScalingConfig, RunConfig, ScalingConfigDataClass
from ray.air._internal.checkpointing import save_preprocessor_to_dir
from ray.tune.utils.trainable import TrainableUtil
from ray.util.annotations import DeveloperAPI
from ray.air.trainer import Trainer
from ray.train.trainer import BaseTrainer
from ray.air.checkpoint import Checkpoint
from ray.tune import Trainable
from ray.air.constants import MODEL_KEY, TRAIN_DATASET_KEY
from ray.train.constants import MODEL_KEY, TRAIN_DATASET_KEY
if TYPE_CHECKING:
import xgboost_ray
@ -40,7 +40,7 @@ def _convert_scaling_config_to_ray_params(
@DeveloperAPI
class GBDTTrainer(Trainer):
class GBDTTrainer(BaseTrainer):
"""Common logic for gradient-boosting decision tree (GBDT) frameworks
like XGBoost-Ray and LightGBM-Ray.
@ -65,7 +65,7 @@ class GBDTTrainer(Trainer):
**train_kwargs: Additional kwargs passed to framework ``train()`` function.
"""
_scaling_config_allowed_keys = Trainer._scaling_config_allowed_keys + [
_scaling_config_allowed_keys = BaseTrainer._scaling_config_allowed_keys + [
"num_workers",
"num_cpus_per_worker",
"num_gpus_per_worker",

View file

@ -7,6 +7,7 @@ except ModuleNotFoundError:
"run 'pip install 'horovod[tensorflow]''."
)
from ray.train.horovod.horovod_trainer import HorovodTrainer
from ray.train.horovod.config import HorovodConfig
__all__ = ["HorovodConfig"]
__all__ = ["HorovodConfig", "HorovodTrainer"]

View file

@ -1,12 +1,12 @@
from typing import Dict, Callable, Optional, Union, TYPE_CHECKING
from ray.air.config import ScalingConfig, RunConfig, DatasetConfig
from ray.air.trainer import GenDataset
from ray.train.trainer import GenDataset
from ray.air.checkpoint import Checkpoint
from ray.air.train.data_parallel_trainer import DataParallelTrainer
from ray.train.horovod import HorovodConfig
from ray.train.data_parallel_trainer import DataParallelTrainer
from ray.train.horovod.config import HorovodConfig
if TYPE_CHECKING:
from ray.air.preprocessor import Preprocessor
@ -83,7 +83,7 @@ class HorovodTrainer(DataParallelTrainer):
import horovod.torch as hvd
import torch
import torch.nn as nn
from ray.air.train.integrations.horovod import HorovodTrainer
from ray.train.horovod import HorovodTrainer
input_size = 1
layer_size = 15

View file

@ -1,4 +1,4 @@
from ray.air.train.integrations.huggingface.huggingface_trainer import (
from ray.train.huggingface.huggingface_trainer import (
HuggingFaceTrainer,
load_checkpoint,
)

View file

@ -20,15 +20,15 @@ from ray import train
from ray.util import PublicAPI, get_node_ip_address
from ray.air.checkpoint import Checkpoint
from ray.air.config import RunConfig, ScalingConfig, DatasetConfig
from ray.air.constants import (
from ray.train.constants import (
EVALUATION_DATASET_KEY,
TRAIN_DATASET_KEY,
PREPROCESSOR_KEY,
)
from ray.air.train.integrations.torch import TorchTrainer
from ray.air.trainer import GenDataset
from ray.air.train.data_parallel_trainer import _DataParallelCheckpointManager
from ray.air.train.integrations.huggingface.huggingface_utils import (
from ray.train.torch import TorchTrainer
from ray.train.trainer import GenDataset
from ray.train.data_parallel_trainer import _DataParallelCheckpointManager
from ray.train.huggingface.huggingface_utils import (
CHECKPOINT_PATH_ON_NODE_KEY,
NODE_IP_KEY,
process_datasets,
@ -157,7 +157,7 @@ class HuggingFaceTrainer(TorchTrainer):
from transformers import AutoConfig, AutoModelForCausalLM, AutoTokenizer
import ray
from ray.air.train.integrations.huggingface import HuggingFaceTrainer
from ray.train.huggingface import HuggingFaceTrainer
model_checkpoint = "gpt2"
tokenizer_checkpoint = "sgugger/gpt2-like-tokenizer"

View file

@ -1,4 +1,4 @@
from ray.air.train.integrations.lightgbm.lightgbm_trainer import (
from ray.train.lightgbm.lightgbm_trainer import (
LightGBMTrainer,
load_checkpoint,
)

View file

@ -2,10 +2,10 @@ from typing import Dict, Any, Optional, Tuple, TYPE_CHECKING
import os
from ray.air.checkpoint import Checkpoint
from ray.air.train.gbdt_trainer import GBDTTrainer
from ray.train.gbdt_trainer import GBDTTrainer
from ray.air._internal.checkpointing import load_preprocessor_from_dir
from ray.util.annotations import PublicAPI
from ray.air.constants import MODEL_KEY
from ray.train.constants import MODEL_KEY
import lightgbm
import lightgbm_ray
@ -31,7 +31,7 @@ class LightGBMTrainer(GBDTTrainer):
import ray
from ray.air.train.integrations.lightgbm import LightGBMTrainer
from ray.train.lightgbm import LightGBMTrainer
train_dataset = ray.data.from_items(
[{"x": x, "y": x + 1} for x in range(32)])

View file

@ -0,0 +1,3 @@
from ray.train.rl.rl_trainer import RLTrainer, load_checkpoint
__all__ = ["RLTrainer", "load_checkpoint"]

View file

@ -5,7 +5,7 @@ from typing import Optional, Dict, Tuple, Type, Union, Callable, Any, TYPE_CHECK
import ray.cloudpickle as cpickle
from ray.air.checkpoint import Checkpoint
from ray.air.config import ScalingConfig, RunConfig
from ray.air.trainer import Trainer, GenDataset
from ray.train.trainer import BaseTrainer, GenDataset
from ray.air._internal.checkpointing import (
load_preprocessor_from_dir,
save_preprocessor_to_dir,
@ -28,7 +28,7 @@ RL_CONFIG_FILE = "config.pkl"
@PublicAPI(stability="alpha")
class RLTrainer(Trainer):
class RLTrainer(BaseTrainer):
"""Reinforcement learning trainer.
This trainer provides an interface to RLlib trainables.
@ -58,7 +58,7 @@ class RLTrainer(Trainer):
.. code-block:: python
from ray.air.config import RunConfig
from ray.air.train.integrations.rl import RLTrainer
from ray.train.rl import RLTrainer
trainer = RLTrainer(
run_config=RunConfig(stop={"training_iteration": 5}),
@ -85,7 +85,7 @@ class RLTrainer(Trainer):
import ray
from ray.air.config import RunConfig
from ray.air.train.integrations.rl import RLTrainer
from ray.train.rl import RLTrainer
from ray.rllib.agents.marwil.bc import BCTrainer
dataset = ray.data.read_json(

View file

@ -1,4 +1,4 @@
from ray.air.train.integrations.sklearn.sklearn_trainer import (
from ray.train.sklearn.sklearn_trainer import (
SklearnTrainer,
load_checkpoint,
)

View file

@ -14,8 +14,8 @@ from ray import tune
import ray.cloudpickle as cpickle
from ray.air.checkpoint import Checkpoint
from ray.air.config import RunConfig, ScalingConfig
from ray.air.constants import MODEL_KEY, TRAIN_DATASET_KEY
from ray.air.trainer import GenDataset, Trainer
from ray.train.constants import MODEL_KEY, TRAIN_DATASET_KEY
from ray.train.trainer import GenDataset, BaseTrainer
from ray.air._internal.checkpointing import (
load_preprocessor_from_dir,
save_preprocessor_to_dir,
@ -47,7 +47,7 @@ CVType = Union[int, Iterable, BaseCrossValidator]
@PublicAPI(stability="alpha")
class SklearnTrainer(Trainer):
class SklearnTrainer(BaseTrainer):
"""A Trainer for scikit-learn estimator training.
This Trainer runs the ``fit`` method of the given estimator in a
@ -69,7 +69,7 @@ class SklearnTrainer(Trainer):
import ray
from ray.air.train.integrations.sklearn import SklearnTrainer
from ray.train.sklearn import SklearnTrainer
from sklearn.ensemble import RandomForestRegressor
train_dataset = ray.data.from_items(

View file

@ -8,5 +8,14 @@ except ModuleNotFoundError:
from ray.train.tensorflow.config import TensorflowConfig
from ray.train.tensorflow.train_loop_utils import prepare_dataset_shard
from ray.train.tensorflow.tensorflow_trainer import (
TensorflowTrainer,
load_checkpoint,
)
__all__ = ["TensorflowConfig", "prepare_dataset_shard"]
__all__ = [
"TensorflowConfig",
"prepare_dataset_shard",
"TensorflowTrainer",
"load_checkpoint",
]

View file

@ -1,9 +1,9 @@
from typing import Callable, Optional, Dict, Tuple, Type, Union, TYPE_CHECKING
import tensorflow as tf
from ray.train.tensorflow import TensorflowConfig
from ray.air.trainer import GenDataset
from ray.air.train.data_parallel_trainer import DataParallelTrainer, _load_checkpoint
from ray.train.tensorflow.config import TensorflowConfig
from ray.train.trainer import GenDataset
from ray.train.data_parallel_trainer import DataParallelTrainer, _load_checkpoint
from ray.air.config import ScalingConfig, RunConfig, DatasetConfig
from ray.air.checkpoint import Checkpoint
from ray.util import PublicAPI
@ -95,7 +95,7 @@ class TensorflowTrainer(DataParallelTrainer):
from ray import train
from ray.train.tensorflow import prepare_dataset_shard
from ray.air.train.integrations.tensorflow import TensorflowTrainer
from ray.train.tensorflow import TensorflowTrainer
input_size = 1

File diff suppressed because one or more lines are too long

View file

@ -4,7 +4,7 @@ import ray
from ray import tune
from ray.air.preprocessor import Preprocessor
from ray.air.trainer import Trainer
from ray.train.trainer import BaseTrainer
from ray.util.placement_group import get_current_placement_group
@ -27,7 +27,7 @@ class DummyPreprocessor(Preprocessor):
return ds.map(lambda x: x + 1)
class DummyTrainer(Trainer):
class DummyTrainer(BaseTrainer):
_scaling_config_allowed_keys = [
"num_workers",
"num_cpus_per_worker",

View file

@ -3,9 +3,9 @@ import pytest
import ray
from ray import train, tune
from ray.air.checkpoint import Checkpoint
from ray.air.constants import PREPROCESSOR_KEY
from ray.train.constants import PREPROCESSOR_KEY
from ray.air.train.data_parallel_trainer import DataParallelTrainer
from ray.train.data_parallel_trainer import DataParallelTrainer
from ray.air.preprocessor import Preprocessor
from ray.tune.tune_config import TuneConfig
from ray.tune.tuner import Tuner

View file

@ -11,7 +11,7 @@ from ray.air.examples.horovod.horovod_pytorch_example import (
Net,
)
from ray.air.predictors.integrations.torch import TorchPredictor
from ray.air.train.integrations.horovod import HorovodTrainer
from ray.train.horovod import HorovodTrainer
@pytest.fixture

View file

@ -1,7 +1,7 @@
import pandas as pd
import pytest
from unittest.mock import patch
from ray.air.train.integrations.huggingface.huggingface_utils import TrainReportCallback
from ray.train.huggingface.huggingface_utils import TrainReportCallback
from transformers import (
AutoConfig,
@ -13,11 +13,11 @@ from transformers import (
from transformers.trainer_callback import TrainerState
import ray.data
from ray.air.train.integrations.huggingface import HuggingFaceTrainer
from ray.train.huggingface import HuggingFaceTrainer
from ray.air.predictors.integrations.huggingface import HuggingFacePredictor
from ray.air.batch_predictor import BatchPredictor
from ray.air.tests._huggingface_data import train_data, validation_data
from ray.train.tests._huggingface_data import train_data, validation_data
# 16 first rows of tokenized wikitext-2-raw-v1 training & validation
train_df = pd.read_json(train_data)

View file

@ -7,9 +7,9 @@ import lightgbm as lgbm
import ray
from ray import tune
from ray.air.checkpoint import Checkpoint
from ray.air.constants import TRAIN_DATASET_KEY
from ray.train.constants import TRAIN_DATASET_KEY
from ray.air.train.integrations.lightgbm import LightGBMTrainer, load_checkpoint
from ray.train.lightgbm import LightGBMTrainer, load_checkpoint
from ray.air.preprocessor import Preprocessor
from sklearn.datasets import load_breast_cancer

View file

@ -87,9 +87,6 @@ def test_failure():
with pytest.raises(ModuleNotFoundError):
import horovod # noqa: F401
with pytest.raises(ModuleNotFoundError):
from ray import tune # noqa: F401
if __name__ == "__main__":
import pytest

View file

@ -4,9 +4,9 @@ import pandas as pd
import ray
from ray import tune
from ray.air.checkpoint import Checkpoint
from ray.air.constants import TRAIN_DATASET_KEY
from ray.train.constants import TRAIN_DATASET_KEY
from ray.air.train.integrations.sklearn import SklearnTrainer, load_checkpoint
from ray.train.sklearn import SklearnTrainer, load_checkpoint
from ray.air.preprocessor import Preprocessor
from sklearn.datasets import load_breast_cancer

View file

@ -3,13 +3,13 @@ import numpy as np
import ray
from ray import train
from ray.air.train.integrations.tensorflow import TensorflowTrainer
from ray.train.tensorflow import TensorflowTrainer
from ray.air.examples.tf.tensorflow_linear_dataset_example import (
train_func as tensorflow_linear_train_func,
get_dataset,
)
from ray.air.predictors.integrations.tensorflow import TensorflowPredictor
from ray.air.constants import MODEL_KEY, TRAIN_DATASET_KEY
from ray.train.constants import MODEL_KEY, TRAIN_DATASET_KEY
@pytest.fixture

View file

@ -3,7 +3,7 @@ import torch
import ray
from ray.air.predictors.integrations.torch import TorchPredictor
from ray.air.train.integrations.torch import TorchTrainer
from ray.train.torch import TorchTrainer
from ray import train
from ray.air.examples.pytorch.torch_linear_example import (
train_func as linear_train_func,

View file

@ -7,9 +7,9 @@ import xgboost as xgb
import ray
from ray import tune
from ray.air.checkpoint import Checkpoint
from ray.air.constants import TRAIN_DATASET_KEY
from ray.train.constants import TRAIN_DATASET_KEY
from ray.air.train.integrations.xgboost import XGBoostTrainer, load_checkpoint
from ray.train.xgboost import XGBoostTrainer, load_checkpoint
from ray.air.preprocessor import Preprocessor
from sklearn.datasets import load_breast_cancer

View file

@ -17,7 +17,12 @@ from ray.train.torch.train_loop_utils import (
TorchWorkerProfiler,
)
from ray.train.torch.torch_trainer import TorchTrainer, load_checkpoint
__all__ = [
"TorchTrainer",
"load_checkpoint",
"TorchConfig",
"accelerate",
"get_device",

View file

@ -1,9 +1,9 @@
from typing import Callable, Optional, Dict, Tuple, Union, TYPE_CHECKING
import torch
from ray.train.torch import TorchConfig
from ray.air.trainer import GenDataset
from ray.air.train.data_parallel_trainer import DataParallelTrainer, _load_checkpoint
from ray.train.torch.config import TorchConfig
from ray.train.trainer import GenDataset
from ray.train.data_parallel_trainer import DataParallelTrainer, _load_checkpoint
from ray.air.config import ScalingConfig, RunConfig, DatasetConfig
from ray.air.checkpoint import Checkpoint
from ray.air._internal.torch_utils import load_torch_model
@ -101,7 +101,7 @@ class TorchTrainer(DataParallelTrainer):
import ray
from ray import train
from ray.air.train.integrations.torch import TorchTrainer
from ray.train.torch import TorchTrainer
input_size = 1
layer_size = 15

View file

@ -44,6 +44,12 @@ from ray.train._internal.worker_group import WorkerGroup
from ray.util.annotations import DeveloperAPI, Deprecated
from ray.util.ml_utils.checkpoint_manager import CheckpointStrategy
from ray.train.base_trainer import ( # noqa: F401
BaseTrainer,
GenDataset,
TrainingFailedError,
)
if TUNE_INSTALLED:
from ray import tune
from ray.tune import Trainable

View file

@ -1,4 +1,4 @@
from ray.air.train.integrations.xgboost.xgboost_trainer import (
from ray.train.xgboost.xgboost_trainer import (
XGBoostTrainer,
load_checkpoint,
)

View file

@ -2,10 +2,10 @@ import os
from typing import Optional, Tuple, TYPE_CHECKING
from ray.air.checkpoint import Checkpoint
from ray.air.train.gbdt_trainer import GBDTTrainer
from ray.train.gbdt_trainer import GBDTTrainer
from ray.air._internal.checkpointing import load_preprocessor_from_dir
from ray.util.annotations import PublicAPI
from ray.air.constants import MODEL_KEY
from ray.train.constants import MODEL_KEY
import xgboost
import xgboost_ray
@ -27,7 +27,7 @@ class XGBoostTrainer(GBDTTrainer):
import ray
from ray.air.train.integrations.xgboost import XGBoostTrainer
from ray.train.xgboost import XGBoostTrainer
train_dataset = ray.data.from_items(
[{"x": x, "y": x + 1} for x in range(32)])

View file

@ -4,7 +4,7 @@ from typing import Any, Callable, Dict, Optional, Type, Union
import ray.cloudpickle as pickle
from ray.air.config import RunConfig
from ray.air.trainer import Trainer
from ray.train.trainer import BaseTrainer
from ray.tune import Experiment, TuneError, ExperimentAnalysis
from ray.tune.impl.utils import execute_dataset
from ray.tune.result_grid import ResultGrid
@ -55,7 +55,7 @@ class TunerInternal:
str,
Callable,
Type[Trainable],
Trainer,
BaseTrainer,
]
] = None,
param_space: Optional[Dict[str, Any]] = None,
@ -85,7 +85,7 @@ class TunerInternal:
# If no run config was passed to Tuner directly, use the one from the Trainer,
# if available
if not run_config and isinstance(trainable, Trainer):
if not run_config and isinstance(trainable, BaseTrainer):
run_config = trainable.run_config
self._is_restored = False
@ -144,7 +144,7 @@ class TunerInternal:
@staticmethod
def _convert_trainable(trainable: Any) -> Type[Trainable]:
if isinstance(trainable, Trainer):
if isinstance(trainable, BaseTrainer):
trainable = trainable.as_trainable()
else:
trainable = trainable

View file

@ -13,9 +13,9 @@ from ray.air.config import RunConfig
from ray.air.examples.pytorch.torch_linear_example import (
train_func as linear_train_func,
)
from ray.air.train.integrations.torch import TorchTrainer
from ray.air.train.integrations.xgboost import XGBoostTrainer
from ray.air.train import Trainer
from ray.train.torch import TorchTrainer
from ray.train.xgboost import XGBoostTrainer
from ray.train import BaseTrainer
from ray.tune import Callback, TuneError
from ray.tune.cloud import TrialCheckpoint
from ray.tune.result import DEFAULT_RESULTS_DIR
@ -23,7 +23,7 @@ from ray.tune.tune_config import TuneConfig
from ray.tune.tuner import Tuner
class DummyTrainer(Trainer):
class DummyTrainer(BaseTrainer):
_scaling_config_allowed_keys = [
"num_workers",
"num_cpus_per_worker",

View file

@ -3,7 +3,7 @@ from typing import Any, Callable, Dict, Optional, Type, Union
import ray
from ray.air.config import RunConfig
from ray.air.trainer import Trainer
from ray.train.trainer import BaseTrainer
from ray.tune import TuneError
from ray.tune.result_grid import ResultGrid
from ray.tune.trainable import Trainable
@ -47,7 +47,7 @@ class Tuner:
from ray import tune
from ray.data import from_pandas
from ray.air.config import RunConfig
from ray.air.train.integrations.xgboost import XGBoostTrainer
from ray.train.xgboost import XGBoostTrainer
from ray.tune.tuner import Tuner
def get_dataset():
@ -111,7 +111,7 @@ class Tuner:
str,
Callable,
Type[Trainable],
Trainer,
BaseTrainer,
]
] = None,
param_space: Optional[Dict[str, Any]] = None,

View file

@ -256,11 +256,14 @@ if setup_spec.type == SetupType.RAY:
"scipy",
]
setup_spec.extras["train"] = setup_spec.extras["tune"]
# Ray AI Runtime should encompass Data, Tune, and Serve.
setup_spec.extras["air"] = list(
set(
setup_spec.extras["tune"]
+ setup_spec.extras["data"]
+ setup_spec.extras["train"]
+ setup_spec.extras["serve"]
)
)

View file

@ -3,7 +3,7 @@ import torch.nn as nn
import numpy as np
import torchvision
from ray.air import RunConfig
from ray.air.train.integrations.horovod import HorovodTrainer
from ray.train.horovod import HorovodTrainer
from ray.tune.tune_config import TuneConfig
from ray.tune.tuner import Tuner
from torch.utils.data import DataLoader