mirror of
https://github.com/vale981/ray
synced 2025-03-05 10:01:43 -05:00
[AIR/Train] Move ray.air.train
to ray.train
(#25570)
This commit is contained in:
parent
836b08597f
commit
1316a2d05e
100 changed files with 426 additions and 387 deletions
|
@ -7,7 +7,7 @@
|
|||
- rm -rf ./python/ray/thirdparty_files; rm -rf ./python/ray/pickle5_files; ./ci/ci.sh build
|
||||
- pip install -Ur ./python/requirements_ml_docker.txt
|
||||
- ./ci/env/env_info.sh
|
||||
- bazel test --config=ci $(./ci/run/bazel_export_options) --build_tests_only --test_tag_filters=gpu,gpu_only python/ray/train/...
|
||||
- bazel test --config=ci $(./ci/run/bazel_export_options) --build_tests_only --test_tag_filters=gpu,gpu_only,-ray_air python/ray/train/...
|
||||
|
||||
- label: ":tv: :database: :steam_locomotive: Datasets Train Integration GPU Tests and Examples (Python 3.7)"
|
||||
conditions: ["RAY_CI_TRAIN_AFFECTED"]
|
||||
|
|
|
@ -4,6 +4,7 @@
|
|||
- cleanup() { if [ "${BUILDKITE_PULL_REQUEST}" = "false" ]; then ./ci/build/upload_build_info.sh; fi }; trap cleanup EXIT
|
||||
- DATA_PROCESSING_TESTING=1 INSTALL_HOROVOD=1 ./ci/env/install-dependencies.sh
|
||||
- bazel test --config=ci $(./ci/run/bazel_export_options) --build_tests_only --test_tag_filters=-gpu,-needs_credentials python/ray/air/...
|
||||
- bazel test --config=ci $(./ci/run/bazel_export_options) --build_tests_only --test_tag_filters=ray_air,-gpu_only,-gpu,-needs_credentials python/ray/train/...
|
||||
|
||||
- label: ":brain: RLlib: Learning discr. actions TF2-static-graph"
|
||||
conditions: ["RAY_CI_RLLIB_AFFECTED"]
|
||||
|
@ -300,14 +301,14 @@
|
|||
commands:
|
||||
- cleanup() { if [ "${BUILDKITE_PULL_REQUEST}" = "false" ]; then ./ci/build/upload_build_info.sh; fi }; trap cleanup EXIT
|
||||
- TRAIN_TESTING=1 INSTALL_HOROVOD=1 ./ci/env/install-dependencies.sh
|
||||
- bazel test --config=ci $(./ci/run/bazel_export_options) --build_tests_only --test_tag_filters=-gpu_only,-minimal,-tune python/ray/train/...
|
||||
- bazel test --config=ci $(./ci/run/bazel_export_options) --build_tests_only --test_tag_filters=-gpu_only,-minimal,-tune,-ray_air python/ray/train/...
|
||||
|
||||
- label: ":steam_locomotive: :octopus: Train + Tune tests and examples"
|
||||
conditions: ["RAY_CI_TRAIN_AFFECTED"]
|
||||
commands:
|
||||
- cleanup() { if [ "${BUILDKITE_PULL_REQUEST}" = "false" ]; then ./ci/build/upload_build_info.sh; fi }; trap cleanup EXIT
|
||||
- TRAIN_TESTING=1 TUNE_TESTING=1 ./ci/env/install-dependencies.sh
|
||||
- bazel test --config=ci $(./ci/run/bazel_export_options) --build_tests_only --test_tag_filters=tune,-gpu_only python/ray/train/...
|
||||
- bazel test --config=ci $(./ci/run/bazel_export_options) --build_tests_only --test_tag_filters=tune,-gpu_only,-ray_air python/ray/train/...
|
||||
|
||||
- label: ":octopus: Tune/Modin/Dask tests and examples. Python 3.7"
|
||||
conditions: ["RAY_CI_TUNE_AFFECTED"]
|
||||
|
|
|
@ -443,11 +443,12 @@
|
|||
- ./ci/env/install-dependencies.sh
|
||||
- bazel test --config=ci $(./ci/run/bazel_export_options) --build_tests_only --test_tag_filters=soft_imports python/ray/tune/...
|
||||
|
||||
# Test to see if Train can be used without torch, tf, etc. installed
|
||||
- label: ":steam_locomotive: Train minimal install"
|
||||
conditions: ["RAY_CI_TRAIN_AFFECTED"]
|
||||
commands:
|
||||
- cleanup() { if [ "${BUILDKITE_PULL_REQUEST}" = "false" ]; then ./ci/build/upload_build_info.sh; fi }; trap cleanup EXIT
|
||||
- ./ci/env/install-minimal.sh
|
||||
- TRAIN_MINIMAL_INSTALL=1 ./ci/env/install-minimal.sh
|
||||
- ./ci/env/env_info.sh
|
||||
- python ./ci/env/check_minimal_install.py
|
||||
- bazel test --config=ci $(./ci/run/bazel_export_options) --build_tests_only --test_tag_filters=minimal python/ray/train/...
|
||||
|
|
3
ci/env/check_minimal_install.py
vendored
3
ci/env/check_minimal_install.py
vendored
|
@ -18,6 +18,9 @@ DEFAULT_BLACKLIST = [
|
|||
"opencensus",
|
||||
"prometheus_client",
|
||||
"smart_open",
|
||||
"torch",
|
||||
"tensorflow",
|
||||
"jax",
|
||||
]
|
||||
|
||||
|
||||
|
|
6
ci/env/install-minimal.sh
vendored
6
ci/env/install-minimal.sh
vendored
|
@ -35,3 +35,9 @@ eval "${WORKSPACE_DIR}/ci/ci.sh build"
|
|||
python -m pip install -U \
|
||||
pytest==5.4.3 \
|
||||
numpy
|
||||
|
||||
# Train requirements.
|
||||
# TODO: make this dynamic
|
||||
if [ "${TRAIN_MINIMAL_INSTALL-}" = 1 ]; then
|
||||
python -m pip install -U "ray[tune]"
|
||||
fi
|
|
@ -90,7 +90,7 @@ trainer.fit()
|
|||
|
||||
# __config_1__
|
||||
import ray
|
||||
from ray.air.train.integrations.torch import TorchTrainer
|
||||
from ray.train.torch import TorchTrainer
|
||||
from ray.air.config import DatasetConfig
|
||||
|
||||
train_ds = ray.data.range_tensor(1000)
|
||||
|
@ -118,7 +118,7 @@ print(my_trainer.get_dataset_config())
|
|||
|
||||
# __config_2__
|
||||
import ray
|
||||
from ray.air.train.integrations.torch import TorchTrainer
|
||||
from ray.train.torch import TorchTrainer
|
||||
from ray.air.config import DatasetConfig
|
||||
|
||||
train_ds = ray.data.range_tensor(1000)
|
||||
|
@ -144,7 +144,7 @@ print(my_trainer.get_dataset_config())
|
|||
import ray
|
||||
from ray import train
|
||||
from ray.data import Dataset
|
||||
from ray.air.train.integrations.torch import TorchTrainer
|
||||
from ray.train.torch import TorchTrainer
|
||||
from ray.air.config import DatasetConfig
|
||||
|
||||
|
||||
|
@ -175,7 +175,7 @@ my_trainer.fit()
|
|||
import ray
|
||||
from ray import train
|
||||
from ray.data import DatasetPipeline
|
||||
from ray.air.train.integrations.torch import TorchTrainer
|
||||
from ray.train.torch import TorchTrainer
|
||||
from ray.air.config import DatasetConfig
|
||||
|
||||
|
||||
|
|
|
@ -21,7 +21,7 @@ preprocessor = StandardScaler(columns=columns_to_scale)
|
|||
# __air_preprocessors_end__
|
||||
|
||||
# __air_trainer_start__
|
||||
from ray.air.train.integrations.xgboost import XGBoostTrainer
|
||||
from ray.train.xgboost import XGBoostTrainer
|
||||
|
||||
num_workers = 2
|
||||
use_gpu = False
|
||||
|
|
|
@ -47,7 +47,7 @@ print(batch_transformed)
|
|||
# __trainer_start__
|
||||
import ray
|
||||
|
||||
from ray.air.train.integrations.xgboost import XGBoostTrainer
|
||||
from ray.train.xgboost import XGBoostTrainer
|
||||
from ray.air.preprocessors import MinMaxScaler
|
||||
|
||||
train_dataset = ray.data.from_items([{"x": x, "y": 2 * x} for x in range(0, 32, 3)])
|
||||
|
|
|
@ -28,7 +28,7 @@ import torch
|
|||
from torch import nn
|
||||
from torch.utils.data import DataLoader
|
||||
import ray.train as train
|
||||
from ray.air.train.integrations.torch import TorchTrainer
|
||||
from ray.train.torch import TorchTrainer
|
||||
|
||||
# Define model
|
||||
class NeuralNetwork(nn.Module):
|
||||
|
|
|
@ -18,7 +18,7 @@ from tensorflow.keras.callbacks import Callback
|
|||
|
||||
import ray.train as train
|
||||
from ray.train.tensorflow import prepare_dataset_shard
|
||||
from ray.air.train.integrations.tensorflow import TensorflowTrainer
|
||||
from ray.train.tensorflow import TensorflowTrainer
|
||||
|
||||
|
||||
def build_model() -> tf.keras.Model:
|
||||
|
|
|
@ -25,7 +25,7 @@ preprocessor = StandardScaler(columns=columns_to_scale)
|
|||
|
||||
|
||||
# __air_xgb_train_start__
|
||||
from ray.air.train.integrations.xgboost import XGBoostTrainer
|
||||
from ray.train.xgboost import XGBoostTrainer
|
||||
|
||||
# XGBoost specific params
|
||||
params = {
|
||||
|
|
|
@ -72,7 +72,7 @@
|
|||
"import ray\n",
|
||||
"from ray import tune\n",
|
||||
"from ray.air import RunConfig\n",
|
||||
"from ray.air.train.integrations.xgboost import XGBoostTrainer\n",
|
||||
"from ray.train.xgboost import XGBoostTrainer\n",
|
||||
"from ray.tune.tune_config import TuneConfig\n",
|
||||
"from ray.tune.tuner import Tuner"
|
||||
]
|
||||
|
|
|
@ -797,7 +797,7 @@
|
|||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from ray.air.train.integrations.huggingface import HuggingFaceTrainer\n",
|
||||
"from ray.train.huggingface import HuggingFaceTrainer\n",
|
||||
"from ray.air import RunConfig\n",
|
||||
"from ray.tune.integration.mlflow import MLflowLoggerCallback\n",
|
||||
"\n",
|
||||
|
@ -1458,7 +1458,7 @@
|
|||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from ray.air.train.integrations.huggingface import load_checkpoint\n",
|
||||
"from ray.train.huggingface import load_checkpoint\n",
|
||||
"\n",
|
||||
"hf_trainer = load_checkpoint(\n",
|
||||
" checkpoint=result.checkpoint,\n",
|
||||
|
|
|
@ -53,7 +53,7 @@
|
|||
"from ray.air.predictors.integrations.lightgbm import LightGBMPredictor\n",
|
||||
"from ray.air.preprocessors.chain import Chain\n",
|
||||
"from ray.air.preprocessors.encoder import Categorizer\n",
|
||||
"from ray.air.train.integrations.lightgbm import LightGBMTrainer\n",
|
||||
"from ray.train.lightgbm import LightGBMTrainer\n",
|
||||
"from ray.data.dataset import Dataset\n",
|
||||
"from ray.air.result import Result\n",
|
||||
"from ray.air.preprocessors import StandardScaler\n",
|
||||
|
|
|
@ -62,7 +62,7 @@
|
|||
"from ray.air import Checkpoint\n",
|
||||
"from ray.air.config import RunConfig\n",
|
||||
"from ray.air.predictors.integrations.rl.rl_predictor import RLPredictor\n",
|
||||
"from ray.air.train.integrations.rl.rl_trainer import RLTrainer\n",
|
||||
"from ray.train.rl.rl_trainer import RLTrainer\n",
|
||||
"from ray.air.result import Result\n",
|
||||
"from ray.rllib.agents.marwil import BCTrainer\n",
|
||||
"from ray.tune.tuner import Tuner"
|
||||
|
|
|
@ -62,7 +62,7 @@
|
|||
"from ray.air import Checkpoint\n",
|
||||
"from ray.air.config import RunConfig\n",
|
||||
"from ray.air.predictors.integrations.rl.rl_predictor import RLPredictor\n",
|
||||
"from ray.air.train.integrations.rl.rl_trainer import RLTrainer\n",
|
||||
"from ray.train.rl.rl_trainer import RLTrainer\n",
|
||||
"from ray.air.result import Result\n",
|
||||
"from ray.rllib.agents.marwil import BCTrainer\n",
|
||||
"from ray.tune.tuner import Tuner"
|
||||
|
|
|
@ -52,7 +52,7 @@
|
|||
"\n",
|
||||
"from ray.air.checkpoint import Checkpoint\n",
|
||||
"from ray.air.config import RunConfig\n",
|
||||
"from ray.air.train.integrations.rl.rl_trainer import RLTrainer\n",
|
||||
"from ray.train.rl.rl_trainer import RLTrainer\n",
|
||||
"from ray.air.predictors.integrations.rl.rl_predictor import RLPredictor\n",
|
||||
"from ray.air.result import Result\n",
|
||||
"from ray.serve.model_wrappers import ModelWrapperDeployment\n",
|
||||
|
|
|
@ -58,7 +58,7 @@
|
|||
"from ray.air.predictors.integrations.sklearn import SklearnPredictor\n",
|
||||
"from ray.air.preprocessors import Chain, OrdinalEncoder, StandardScaler\n",
|
||||
"from ray.air.result import Result\n",
|
||||
"from ray.air.train.integrations.sklearn import SklearnTrainer\n",
|
||||
"from ray.train.sklearn import SklearnTrainer\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"from sklearn.datasets import load_breast_cancer\n",
|
||||
|
|
|
@ -748,7 +748,7 @@
|
|||
}
|
||||
],
|
||||
"source": [
|
||||
"from ray.air.train.integrations.tensorflow import TensorflowTrainer\n",
|
||||
"from ray.train.tensorflow import TensorflowTrainer\n",
|
||||
"\n",
|
||||
"trainer = TensorflowTrainer(\n",
|
||||
" train_loop_per_worker=train_loop_per_worker,\n",
|
||||
|
|
|
@ -410,7 +410,7 @@
|
|||
}
|
||||
],
|
||||
"source": [
|
||||
"from ray.air.train.integrations.torch import TorchTrainer\n",
|
||||
"from ray.train.torch import TorchTrainer\n",
|
||||
"\n",
|
||||
"trainer = TorchTrainer(\n",
|
||||
" train_loop_per_worker=train_loop_per_worker,\n",
|
||||
|
|
|
@ -1235,7 +1235,7 @@
|
|||
}
|
||||
],
|
||||
"source": [
|
||||
"from ray.air.train.integrations.torch import TorchTrainer\n",
|
||||
"from ray.train.torch import TorchTrainer\n",
|
||||
"from ray.air.predictors.integrations.torch import TorchPredictor\n",
|
||||
"from ray.air import Checkpoint\n",
|
||||
"from ray import serve\n",
|
||||
|
|
|
@ -48,7 +48,7 @@
|
|||
"\n",
|
||||
"from ray.air import RunConfig\n",
|
||||
"from ray.air.result import Result\n",
|
||||
"from ray.air.train.integrations.xgboost import XGBoostTrainer\n",
|
||||
"from ray.train.xgboost import XGBoostTrainer\n",
|
||||
"from ray.tune.integration.comet import CometLoggerCallback\n",
|
||||
"from sklearn.datasets import load_breast_cancer"
|
||||
]
|
||||
|
|
|
@ -48,7 +48,7 @@
|
|||
"\n",
|
||||
"from ray.air import RunConfig\n",
|
||||
"from ray.air.result import Result\n",
|
||||
"from ray.air.train.integrations.xgboost import XGBoostTrainer\n",
|
||||
"from ray.train.xgboost import XGBoostTrainer\n",
|
||||
"from ray.tune.integration.wandb import WandbLoggerCallback\n",
|
||||
"from sklearn.datasets import load_breast_cancer"
|
||||
]
|
||||
|
|
|
@ -66,7 +66,7 @@
|
|||
"import ray\n",
|
||||
"from ray.air.batch_predictor import BatchPredictor\n",
|
||||
"from ray.air.predictors.integrations.xgboost import XGBoostPredictor\n",
|
||||
"from ray.air.train.integrations.xgboost import XGBoostTrainer\n",
|
||||
"from ray.train.xgboost import XGBoostTrainer\n",
|
||||
"from ray.data.dataset import Dataset\n",
|
||||
"from ray.air.result import Result\n",
|
||||
"from ray.air.preprocessors import StandardScaler\n",
|
||||
|
|
|
@ -29,42 +29,42 @@ Preprocessors
|
|||
Trainer
|
||||
~~~~~~~
|
||||
|
||||
.. autoclass:: ray.air.trainer.Trainer
|
||||
.. autoclass:: ray.train.trainer.BaseTrainer
|
||||
:members:
|
||||
|
||||
.. automodule:: ray.air.train.integrations.xgboost
|
||||
.. automodule:: ray.train.xgboost
|
||||
:members:
|
||||
:show-inheritance:
|
||||
|
||||
.. automodule:: ray.air.train.integrations.lightgbm
|
||||
.. automodule:: ray.train.lightgbm
|
||||
:members:
|
||||
:show-inheritance:
|
||||
|
||||
.. automodule:: ray.air.train.integrations.tensorflow
|
||||
.. automodule:: ray.train.tensorflow
|
||||
:members:
|
||||
:show-inheritance:
|
||||
|
||||
.. automodule:: ray.air.train.integrations.torch
|
||||
.. automodule:: ray.train.torch
|
||||
:members:
|
||||
:show-inheritance:
|
||||
|
||||
.. automodule:: ray.air.train.integrations.horovod
|
||||
.. automodule:: ray.train.horovod
|
||||
:members:
|
||||
:show-inheritance:
|
||||
|
||||
.. automodule:: ray.air.train.integrations.huggingface
|
||||
.. automodule:: ray.train.huggingface
|
||||
:members:
|
||||
:show-inheritance:
|
||||
|
||||
.. automodule:: ray.air.train.integrations.sklearn
|
||||
.. automodule:: ray.train.sklearn
|
||||
:members:
|
||||
:show-inheritance:
|
||||
|
||||
.. autoclass:: ray.air.train.data_parallel_trainer.DataParallelTrainer
|
||||
.. autoclass:: ray.train.data_parallel_trainer.DataParallelTrainer
|
||||
:members:
|
||||
:show-inheritance:
|
||||
|
||||
.. autoclass:: ray.air.train.gbdt_trainer.GBDTTrainer
|
||||
.. autoclass:: ray.train.gbdt_trainer.GBDTTrainer
|
||||
:members:
|
||||
:show-inheritance:
|
||||
|
||||
|
|
|
@ -63,7 +63,7 @@ The same logic is applicable to other integrations as well.
|
|||
Trainer
|
||||
~~~~~~~
|
||||
|
||||
The journey of the ``Preprocessor`` starts with the :class:`Trainer <ray.air.trainer.Trainer>`.
|
||||
The journey of the ``Preprocessor`` starts with the :class:`Trainer <ray.train.trainer.BaseTrainer>`.
|
||||
If the ``Trainer`` is instantiated with a ``Preprocessor``, then the following logic will be executed when ``Trainer.fit()`` is called:
|
||||
|
||||
#. If a ``"train"`` ``Dataset`` is passed in, then the ``Preprocessor`` will call ``fit()`` on it.
|
||||
|
|
|
@ -31,6 +31,7 @@ TorchConfig
|
|||
~~~~~~~~~~~
|
||||
|
||||
.. autoclass:: ray.train.torch.TorchConfig
|
||||
:noindex:
|
||||
|
||||
.. _train-api-tensorflow-config:
|
||||
|
||||
|
@ -38,6 +39,7 @@ TensorflowConfig
|
|||
~~~~~~~~~~~~~~~~
|
||||
|
||||
.. autoclass:: ray.train.tensorflow.TensorflowConfig
|
||||
:noindex:
|
||||
|
||||
.. _train-api-horovod-config:
|
||||
|
||||
|
@ -45,6 +47,7 @@ HorovodConfig
|
|||
~~~~~~~~~~~~~
|
||||
|
||||
.. autoclass:: ray.train.horovod.HorovodConfig
|
||||
:noindex:
|
||||
|
||||
.. _train-api-backend-interfaces:
|
||||
|
||||
|
@ -170,6 +173,7 @@ train.torch.prepare_model
|
|||
~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
.. autofunction:: ray.train.torch.prepare_model
|
||||
:noindex:
|
||||
|
||||
.. _train-api-torch-prepare-data-loader:
|
||||
|
||||
|
@ -177,17 +181,20 @@ train.torch.prepare_data_loader
|
|||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
.. autofunction:: ray.train.torch.prepare_data_loader
|
||||
:noindex:
|
||||
|
||||
train.torch.prepare_optimizer
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
.. autofunction:: ray.train.torch.prepare_optimizer
|
||||
:noindex:
|
||||
|
||||
|
||||
train.torch.backward
|
||||
~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
.. autofunction:: ray.train.torch.backward
|
||||
:noindex:
|
||||
|
||||
.. _train-api-torch-get-device:
|
||||
|
||||
|
@ -195,11 +202,13 @@ train.torch.get_device
|
|||
~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
.. autofunction:: ray.train.torch.get_device
|
||||
:noindex:
|
||||
|
||||
train.torch.enable_reproducibility
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
.. autofunction:: ray.train.torch.enable_reproducibility
|
||||
:noindex:
|
||||
|
||||
.. _train-api-torch-worker-profiler:
|
||||
|
||||
|
@ -207,12 +216,14 @@ train.torch.accelerate
|
|||
~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
.. autofunction:: ray.train.torch.accelerate
|
||||
:noindex:
|
||||
|
||||
train.torch.TorchWorkerProfiler
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
.. autoclass:: ray.train.torch.TorchWorkerProfiler
|
||||
:members:
|
||||
:noindex:
|
||||
|
||||
.. _train-api-tensorflow-utils:
|
||||
|
||||
|
@ -222,4 +233,5 @@ TensorFlow Training Function Utilities
|
|||
train.tensorflow.prepare_dataset_shard
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
.. autofunction:: ray.train.tensorflow.prepare_dataset_shard
|
||||
.. autofunction:: ray.train.tensorflow.prepare_dataset_shard
|
||||
:noindex:
|
|
@ -42,7 +42,7 @@
|
|||
"import ray\n",
|
||||
"from ray import train\n",
|
||||
"from ray import tune\n",
|
||||
"from ray.air.train.integrations.horovod import HorovodTrainer\n",
|
||||
"from ray.train.horovod import HorovodTrainer\n",
|
||||
"from ray.tune.tune_config import TuneConfig\n",
|
||||
"from ray.tune.tuner import Tuner\n",
|
||||
"\n",
|
||||
|
|
|
@ -177,14 +177,6 @@ py_test(
|
|||
deps = [":ml_lib"]
|
||||
)
|
||||
|
||||
py_test(
|
||||
name = "test_data_parallel_trainer",
|
||||
size = "medium",
|
||||
srcs = ["tests/test_data_parallel_trainer.py"],
|
||||
tags = ["team:ml", "exclusive"],
|
||||
deps = [":ml_lib"]
|
||||
)
|
||||
|
||||
py_test(
|
||||
name = "test_dataset_config",
|
||||
size = "medium",
|
||||
|
@ -201,14 +193,6 @@ py_test(
|
|||
deps = [":ml_lib"]
|
||||
)
|
||||
|
||||
py_test(
|
||||
name = "test_horovod_trainer",
|
||||
size = "large",
|
||||
srcs = ["tests/test_horovod_trainer.py"],
|
||||
tags = ["team:ml", "exclusive"],
|
||||
deps = [":ml_lib"]
|
||||
)
|
||||
|
||||
py_test(
|
||||
name = "test_huggingface_predictor",
|
||||
size = "medium",
|
||||
|
@ -217,14 +201,6 @@ py_test(
|
|||
deps = [":ml_lib"]
|
||||
)
|
||||
|
||||
py_test(
|
||||
name = "test_huggingface_trainer",
|
||||
size = "medium",
|
||||
srcs = ["tests/test_huggingface_trainer.py"],
|
||||
tags = ["team:ml", "exclusive"],
|
||||
deps = [":ml_lib"]
|
||||
)
|
||||
|
||||
py_test(
|
||||
name = "test_lightgbm_predictor",
|
||||
size = "small",
|
||||
|
@ -233,14 +209,6 @@ py_test(
|
|||
deps = [":ml_lib"]
|
||||
)
|
||||
|
||||
py_test(
|
||||
name = "test_lightgbm_trainer",
|
||||
size = "medium",
|
||||
srcs = ["tests/test_lightgbm_trainer.py"],
|
||||
tags = ["team:ml", "exclusive"],
|
||||
deps = [":ml_lib"]
|
||||
)
|
||||
|
||||
py_test(
|
||||
name = "test_predictor",
|
||||
size = "small",
|
||||
|
@ -273,14 +241,6 @@ py_test(
|
|||
deps = [":ml_lib"]
|
||||
)
|
||||
|
||||
py_test(
|
||||
name = "test_sklearn_trainer",
|
||||
size = "medium",
|
||||
srcs = ["tests/test_sklearn_trainer.py"],
|
||||
tags = ["team:ml", "exclusive"],
|
||||
deps = [":ml_lib"]
|
||||
)
|
||||
|
||||
py_test(
|
||||
name = "test_tensorflow_predictor",
|
||||
size = "small",
|
||||
|
@ -289,14 +249,6 @@ py_test(
|
|||
deps = [":ml_lib"]
|
||||
)
|
||||
|
||||
py_test(
|
||||
name = "test_tensorflow_trainer",
|
||||
size = "medium",
|
||||
srcs = ["tests/test_tensorflow_trainer.py"],
|
||||
tags = ["team:ml", "exclusive"],
|
||||
deps = [":ml_lib"]
|
||||
)
|
||||
|
||||
py_test(
|
||||
name = "test_tensorflow_utils",
|
||||
size = "small",
|
||||
|
@ -313,14 +265,6 @@ py_test(
|
|||
deps = [":ml_lib"]
|
||||
)
|
||||
|
||||
py_test(
|
||||
name = "test_torch_trainer",
|
||||
size = "medium",
|
||||
srcs = ["tests/test_torch_trainer.py"],
|
||||
tags = ["team:ml", "exclusive"],
|
||||
deps = [":ml_lib"]
|
||||
)
|
||||
|
||||
py_test(
|
||||
name = "test_torch_utils",
|
||||
size = "small",
|
||||
|
@ -329,14 +273,6 @@ py_test(
|
|||
deps = [":ml_lib"]
|
||||
)
|
||||
|
||||
py_test(
|
||||
name = "test_trainer",
|
||||
size = "medium",
|
||||
srcs = ["tests/test_trainer.py"],
|
||||
tags = ["team:ml", "exclusive"],
|
||||
deps = [":ml_lib"]
|
||||
)
|
||||
|
||||
py_test(
|
||||
name = "test_xgboost_predictor",
|
||||
size = "small",
|
||||
|
@ -345,14 +281,6 @@ py_test(
|
|||
deps = [":ml_lib"]
|
||||
)
|
||||
|
||||
py_test(
|
||||
name = "test_xgboost_trainer",
|
||||
size = "medium",
|
||||
srcs = ["tests/test_xgboost_trainer.py"],
|
||||
tags = ["team:ml", "exclusive"],
|
||||
deps = [":ml_lib"]
|
||||
)
|
||||
|
||||
# This is a dummy test dependency that causes the above tests to be
|
||||
# re-run if any of these files changes.
|
||||
py_library(
|
||||
|
|
|
@ -4,14 +4,14 @@ PREPROCESSOR_KEY = "_preprocessor"
|
|||
# Key to denote the model in the checkpoint dict.
|
||||
MODEL_KEY = "model"
|
||||
|
||||
# Key to denote which dataset is the evaluation dataset.
|
||||
# Only used in trainers which do not support multiple
|
||||
# evaluation datasets.
|
||||
EVALUATION_DATASET_KEY = "evaluation"
|
||||
|
||||
# Key to denote which dataset is the training dataset.
|
||||
# This is the dataset that the preprocessor is fit on.
|
||||
TRAIN_DATASET_KEY = "train"
|
||||
|
||||
# Key to denote all user-specified auxiliary datasets in DatasetConfig.
|
||||
WILDCARD_KEY = "*"
|
||||
|
||||
# Key to denote which dataset is the evaluation dataset.
|
||||
# Only used in trainers which do not support multiple
|
||||
# evaluation datasets.
|
||||
EVALUATION_DATASET_KEY = "evaluation"
|
||||
|
|
|
@ -5,11 +5,11 @@
|
|||
# __custom_trainer_begin__
|
||||
import torch
|
||||
|
||||
from ray.air.trainer import Trainer
|
||||
from ray.train.trainer import BaseTrainer
|
||||
from ray import tune
|
||||
|
||||
|
||||
class MyPytorchTrainer(Trainer):
|
||||
class MyPytorchTrainer(BaseTrainer):
|
||||
def setup(self):
|
||||
self.model = torch.nn.Linear(1, 1)
|
||||
self.optimizer = torch.optim.SGD(self.model.parameters(), lr=0.1)
|
||||
|
|
|
@ -10,7 +10,7 @@ from torchvision import datasets, transforms
|
|||
|
||||
import ray
|
||||
from ray import train
|
||||
from ray.air.train.integrations.horovod import HorovodTrainer
|
||||
from ray.train.horovod import HorovodTrainer
|
||||
|
||||
|
||||
def metric_average(val, name):
|
||||
|
|
|
@ -5,7 +5,7 @@ import torch
|
|||
import ray
|
||||
from ray import train
|
||||
from ray import tune
|
||||
from ray.air.train.integrations.horovod import HorovodTrainer
|
||||
from ray.train.horovod import HorovodTrainer
|
||||
from ray.tune.tune_config import TuneConfig
|
||||
from ray.tune.tuner import Tuner
|
||||
|
||||
|
|
|
@ -17,7 +17,7 @@ import torch
|
|||
|
||||
import ray
|
||||
import ray.data
|
||||
from ray.air.train.integrations.huggingface import HuggingFaceTrainer
|
||||
from ray.train.huggingface import HuggingFaceTrainer
|
||||
from ray.air.predictors.integrations.huggingface import HuggingFacePredictor
|
||||
from ray.air.batch_predictor import BatchPredictor
|
||||
|
||||
|
|
|
@ -8,7 +8,7 @@ from torchvision import datasets
|
|||
from torchvision.transforms import ToTensor
|
||||
|
||||
import ray.train as train
|
||||
from ray.air.train.integrations.torch import TorchTrainer
|
||||
from ray.train.torch import TorchTrainer
|
||||
|
||||
# Download training data from open datasets.
|
||||
training_data = datasets.FashionMNIST(
|
||||
|
|
|
@ -12,7 +12,7 @@ from ray.air import train_test_split
|
|||
from ray.air.batch_predictor import BatchPredictor
|
||||
from ray.air.predictors.integrations.torch import TorchPredictor
|
||||
from ray.air.result import Result
|
||||
from ray.air.train.integrations.torch import TorchTrainer
|
||||
from ray.train.torch import TorchTrainer
|
||||
|
||||
|
||||
def get_datasets(a=5, b=10, size=1000, split=0.8) -> Tuple[Dataset]:
|
||||
|
|
|
@ -4,7 +4,7 @@ import numpy as np
|
|||
import torch
|
||||
import torch.nn as nn
|
||||
import ray.train as train
|
||||
from ray.air.train.integrations.torch import TorchTrainer
|
||||
from ray.train.torch import TorchTrainer
|
||||
|
||||
|
||||
class LinearDataset(torch.utils.data.Dataset):
|
||||
|
|
|
@ -2,7 +2,7 @@ import argparse
|
|||
|
||||
import ray
|
||||
from ray import tune
|
||||
from ray.air.train.integrations.torch import TorchTrainer
|
||||
from ray.train.torch import TorchTrainer
|
||||
from ray.tune.tune_config import TuneConfig
|
||||
from ray.tune.tuner import Tuner
|
||||
|
||||
|
|
|
@ -13,7 +13,7 @@ from torch_geometric.loader import NeighborSampler
|
|||
from torch_geometric.nn import SAGEConv
|
||||
|
||||
from ray import train
|
||||
from ray.air.train.integrations.torch import TorchTrainer
|
||||
from ray.train.torch import TorchTrainer
|
||||
from torch_geometric.transforms import RandomNodeSplit
|
||||
|
||||
|
||||
|
|
|
@ -10,7 +10,7 @@ import ray
|
|||
import ray.train as train
|
||||
from ray.data import Dataset
|
||||
from ray.train.tensorflow import prepare_dataset_shard
|
||||
from ray.air.train.integrations.tensorflow import TensorflowTrainer
|
||||
from ray.train.tensorflow import TensorflowTrainer
|
||||
from ray.air.predictors.integrations.tensorflow import TensorflowPredictor
|
||||
from ray.air.result import Result
|
||||
|
||||
|
|
|
@ -11,7 +11,7 @@ import tensorflow as tf
|
|||
from tensorflow.keras.callbacks import Callback
|
||||
|
||||
import ray.train as train
|
||||
from ray.air.train.integrations.tensorflow import TensorflowTrainer
|
||||
from ray.train.tensorflow import TensorflowTrainer
|
||||
|
||||
|
||||
class TrainCheckpointReportCallback(Callback):
|
||||
|
|
|
@ -2,7 +2,7 @@ import argparse
|
|||
|
||||
import ray
|
||||
from ray import tune
|
||||
from ray.air.train.integrations.tensorflow import TensorflowTrainer
|
||||
from ray.train.tensorflow import TensorflowTrainer
|
||||
|
||||
from ray.air.examples.tf.tensorflow_mnist_example import train_func
|
||||
from ray.tune.tune_config import TuneConfig
|
||||
|
|
|
@ -6,7 +6,7 @@ import lightgbm
|
|||
|
||||
from ray.air.checkpoint import Checkpoint
|
||||
from ray.air.predictor import Predictor, DataBatchType
|
||||
from ray.air.train.integrations.lightgbm import load_checkpoint
|
||||
from ray.train.lightgbm import load_checkpoint
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from ray.air.preprocessor import Preprocessor
|
||||
|
|
|
@ -4,7 +4,7 @@ import numpy as np
|
|||
import pandas as pd
|
||||
from ray.air.checkpoint import Checkpoint
|
||||
from ray.air.predictor import Predictor, DataBatchType
|
||||
from ray.air.train.integrations.rl import load_checkpoint
|
||||
from ray.train.rl import load_checkpoint
|
||||
from ray.rllib.policy.policy import Policy
|
||||
from ray.rllib.utils.typing import EnvType
|
||||
|
||||
|
|
|
@ -6,7 +6,7 @@ from joblib import parallel_backend
|
|||
|
||||
from ray.air.checkpoint import Checkpoint
|
||||
from ray.air.predictor import Predictor, DataBatchType
|
||||
from ray.air.train.integrations.sklearn import load_checkpoint
|
||||
from ray.train.sklearn import load_checkpoint
|
||||
from ray.air._internal.sklearn_utils import set_cpu_params
|
||||
from ray.util.joblib import register_ray
|
||||
|
||||
|
|
|
@ -5,7 +5,7 @@ import tensorflow as tf
|
|||
|
||||
from ray.air.predictor import Predictor, DataBatchType
|
||||
from ray.air.checkpoint import Checkpoint
|
||||
from ray.air.train.data_parallel_trainer import _load_checkpoint
|
||||
from ray.train.data_parallel_trainer import _load_checkpoint
|
||||
from ray.air._internal.tensorflow_utils import convert_pandas_to_tf_tensor
|
||||
|
||||
if TYPE_CHECKING:
|
||||
|
|
|
@ -6,7 +6,7 @@ import torch
|
|||
|
||||
from ray.air.predictor import Predictor, DataBatchType
|
||||
from ray.air.checkpoint import Checkpoint
|
||||
from ray.air.train.integrations.torch import load_checkpoint
|
||||
from ray.train.torch import load_checkpoint
|
||||
from ray.air._internal.torch_utils import convert_pandas_to_torch_tensor
|
||||
|
||||
if TYPE_CHECKING:
|
||||
|
|
|
@ -6,7 +6,7 @@ import xgboost
|
|||
|
||||
from ray.air.checkpoint import Checkpoint
|
||||
from ray.air.predictor import Predictor, DataBatchType
|
||||
from ray.air.train.integrations.xgboost import load_checkpoint
|
||||
from ray.train.xgboost import load_checkpoint
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from ray.air.preprocessor import Preprocessor
|
||||
|
|
|
@ -3,7 +3,7 @@ import pytest
|
|||
import ray
|
||||
from ray.air import Checkpoint
|
||||
from ray.air.config import ScalingConfigDataClass
|
||||
from ray.air.trainer import Trainer
|
||||
from ray.train import BaseTrainer
|
||||
from ray.air.preprocessor import Preprocessor
|
||||
from ray.air._internal.config import (
|
||||
ensure_only_allowed_dataclass_keys_updated,
|
||||
|
@ -11,7 +11,7 @@ from ray.air._internal.config import (
|
|||
)
|
||||
|
||||
|
||||
class DummyTrainer(Trainer):
|
||||
class DummyTrainer(BaseTrainer):
|
||||
def training_loop(self) -> None:
|
||||
pass
|
||||
|
||||
|
|
|
@ -7,7 +7,7 @@ from ray.data import Dataset, DatasetPipeline
|
|||
from ray.air.config import DatasetConfig
|
||||
from ray import train
|
||||
|
||||
from ray.air.train.data_parallel_trainer import DataParallelTrainer
|
||||
from ray.train.data_parallel_trainer import DataParallelTrainer
|
||||
from ray.air.preprocessors import BatchMapper
|
||||
|
||||
|
||||
|
|
|
@ -9,7 +9,7 @@ import tempfile
|
|||
from ray.air.predictors.integrations.rl.rl_predictor import RLPredictor
|
||||
from ray.air.preprocessor import Preprocessor
|
||||
from ray.air.checkpoint import Checkpoint
|
||||
from ray.air.train.integrations.rl import RLTrainer
|
||||
from ray.train.rl import RLTrainer
|
||||
|
||||
from ray.rllib.agents import Trainer
|
||||
from ray.rllib.policy import Policy
|
||||
|
|
|
@ -1,4 +0,0 @@
|
|||
from ray.air.constants import TRAIN_DATASET_KEY
|
||||
from ray.air.trainer import Trainer
|
||||
|
||||
__all__ = ["TRAIN_DATASET_KEY", "Trainer"]
|
|
@ -1,122 +0,0 @@
|
|||
from typing import Optional, Dict, List, Union, TYPE_CHECKING
|
||||
|
||||
from ray.actor import ActorHandle
|
||||
from ray.air.config import DatasetConfig
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from ray.data import Dataset, DatasetPipeline
|
||||
from ray.air.preprocessor import Preprocessor
|
||||
|
||||
|
||||
class _DataParallelIngestSpec:
|
||||
"""Implements the execution of DatasetConfig preprocessing and ingest."""
|
||||
|
||||
def __init__(self, dataset_config: Dict[str, DatasetConfig]):
|
||||
"""Construct an ingest spec.
|
||||
|
||||
Args:
|
||||
dataset_config: The merged default + user config dict for the trainer
|
||||
with all defaults filled in.
|
||||
"""
|
||||
self.dataset_config = dataset_config
|
||||
self.preprocessed_datasets: Optional[Dict[str, "Dataset"]] = None
|
||||
self.preprocessor: Optional["Preprocessor"] = None
|
||||
|
||||
def preprocess_datasets(
|
||||
self, prep: "Preprocessor", datasets: Dict[str, "Dataset"]
|
||||
) -> Dict[str, "Dataset"]:
|
||||
"""Preprocess the given datasets.
|
||||
|
||||
This will be called prior to `get_dataset_shards()`.
|
||||
|
||||
Args:
|
||||
prep: The preprocessor to fit, if needed.
|
||||
dataset: The datasets to fit and transform.
|
||||
|
||||
Returns:
|
||||
Dict of transformed datasets.
|
||||
"""
|
||||
if prep:
|
||||
ds_to_fit = None
|
||||
for k, conf in self.dataset_config.items():
|
||||
if k not in datasets:
|
||||
assert not conf.required, "Missing dataset post-validation"
|
||||
continue
|
||||
if conf.fit:
|
||||
ds_to_fit = datasets[k]
|
||||
if ds_to_fit:
|
||||
prep.fit(ds_to_fit)
|
||||
new_datasets = {}
|
||||
|
||||
for key, dataset in datasets.items():
|
||||
conf = self._config(key)
|
||||
if conf.transform:
|
||||
if conf.use_stream_api and conf.stream_window_size > 0:
|
||||
# In windowed mode, preprocessor is applied in streaming way.
|
||||
new_datasets[key] = dataset
|
||||
else:
|
||||
# Window size of infinity is treated same as bulk mode.
|
||||
new_datasets[key] = prep.transform(dataset)
|
||||
else:
|
||||
new_datasets[key] = dataset
|
||||
else:
|
||||
new_datasets = datasets
|
||||
self.preprocessed_datasets = new_datasets
|
||||
self.preprocessor = prep
|
||||
return new_datasets
|
||||
|
||||
def get_dataset_shards(
|
||||
self, training_worker_handles: List[ActorHandle]
|
||||
) -> List[Dict[str, Union["Dataset", "DatasetPipeline"]]]:
|
||||
"""Get the shards to pass to training workers.
|
||||
|
||||
Note: this has to match the signature of DatasetSpec in legacy train.
|
||||
|
||||
Args:
|
||||
training_worker_handles: Actor handles of the workers, which can be used
|
||||
for locality-aware splitting.
|
||||
|
||||
Returns:
|
||||
List of dataset shard dicts, one for each training worker.
|
||||
"""
|
||||
dataset_dict_splits = [{} for _ in range(len(training_worker_handles))]
|
||||
|
||||
for key, dataset in self.preprocessed_datasets.items():
|
||||
config = self._config(key)
|
||||
|
||||
if config.use_stream_api:
|
||||
if config.stream_window_size > 0:
|
||||
dataset = dataset.window(
|
||||
bytes_per_window=config.stream_window_size
|
||||
).repeat()
|
||||
# In windowed mode, we re-apply the preprocessor on each iteration.
|
||||
if self.preprocessor:
|
||||
prep = self.preprocessor.transform_batch
|
||||
dataset = dataset.map_batches(prep, batch_format="pandas")
|
||||
else:
|
||||
# If the window size is infinity, the preprocessor is cached and
|
||||
# we don't need to re-apply it each time.
|
||||
dataset = dataset.repeat()
|
||||
|
||||
if config.global_shuffle:
|
||||
dataset = dataset.random_shuffle_each_window()
|
||||
|
||||
if config.split:
|
||||
dataset_splits = dataset.split(
|
||||
len(training_worker_handles),
|
||||
equal=True,
|
||||
locality_hints=training_worker_handles,
|
||||
)
|
||||
else:
|
||||
dataset_splits = [dataset] * len(training_worker_handles)
|
||||
|
||||
for i in range(len(dataset_splits)):
|
||||
dataset_dict_splits[i][key] = dataset_splits[i]
|
||||
|
||||
return dataset_dict_splits
|
||||
|
||||
def _config(self, key: str) -> "DatasetConfig":
|
||||
"""Get the dataset config for the given dataset name."""
|
||||
if key in self.dataset_config:
|
||||
return self.dataset_config[key]
|
||||
return self.dataset_config["*"]
|
|
@ -1,3 +0,0 @@
|
|||
from ray.air.train.integrations.horovod.horovod_trainer import HorovodTrainer
|
||||
|
||||
__all__ = ["HorovodTrainer"]
|
|
@ -1,3 +0,0 @@
|
|||
from ray.air.train.integrations.rl.rl_trainer import RLTrainer, load_checkpoint
|
||||
|
||||
__all__ = ["RLTrainer", "load_checkpoint"]
|
|
@ -1,6 +0,0 @@
|
|||
from ray.air.train.integrations.tensorflow.tensorflow_trainer import (
|
||||
TensorflowTrainer,
|
||||
load_checkpoint,
|
||||
)
|
||||
|
||||
__all__ = ["TensorflowTrainer", "load_checkpoint"]
|
|
@ -1,3 +0,0 @@
|
|||
from ray.air.train.integrations.torch.torch_trainer import TorchTrainer, load_checkpoint
|
||||
|
||||
__all__ = ["TorchTrainer", "load_checkpoint"]
|
|
@ -9,7 +9,7 @@ import ray
|
|||
from ray import train
|
||||
from ray.air.preprocessors import Chain, BatchMapper
|
||||
from ray.air.config import DatasetConfig
|
||||
from ray.air.train.data_parallel_trainer import DataParallelTrainer
|
||||
from ray.train.data_parallel_trainer import DataParallelTrainer
|
||||
from ray.util.annotations import DeveloperAPI
|
||||
|
||||
|
||||
|
|
|
@ -112,6 +112,14 @@ py_test(
|
|||
deps = [":train_lib"]
|
||||
)
|
||||
|
||||
py_test(
|
||||
name = "test_base_trainer",
|
||||
size = "medium",
|
||||
srcs = ["tests/test_base_trainer.py"],
|
||||
tags = ["team:ml", "exclusive", "ray_air"],
|
||||
deps = [":train_lib"]
|
||||
)
|
||||
|
||||
py_test(
|
||||
name = "test_callbacks",
|
||||
size = "medium",
|
||||
|
@ -120,6 +128,14 @@ py_test(
|
|||
deps = [":train_lib"]
|
||||
)
|
||||
|
||||
py_test(
|
||||
name = "test_data_parallel_trainer",
|
||||
size = "medium",
|
||||
srcs = ["tests/test_data_parallel_trainer.py"],
|
||||
tags = ["team:ml", "exclusive", "ray_air"],
|
||||
deps = [":train_lib"]
|
||||
)
|
||||
|
||||
py_test(
|
||||
name = "test_examples",
|
||||
size = "large",
|
||||
|
@ -137,18 +153,34 @@ py_test(
|
|||
)
|
||||
|
||||
py_test(
|
||||
name = "test_minimal",
|
||||
size = "small",
|
||||
srcs = ["tests/test_minimal.py"],
|
||||
tags = ["team:ml", "exclusive", "minimal"],
|
||||
name = "test_horovod_trainer",
|
||||
size = "large",
|
||||
srcs = ["tests/test_horovod_trainer.py"],
|
||||
tags = ["team:ml", "exclusive", "ray_air"],
|
||||
deps = [":train_lib"]
|
||||
)
|
||||
|
||||
py_test(
|
||||
name = "test_session",
|
||||
name = "test_huggingface_trainer",
|
||||
size = "medium",
|
||||
srcs = ["tests/test_huggingface_trainer.py"],
|
||||
tags = ["team:ml", "exclusive", "ray_air"],
|
||||
deps = [":train_lib"]
|
||||
)
|
||||
|
||||
py_test(
|
||||
name = "test_lightgbm_trainer",
|
||||
size = "medium",
|
||||
srcs = ["tests/test_lightgbm_trainer.py"],
|
||||
tags = ["team:ml", "exclusive", "ray_air"],
|
||||
deps = [":train_lib"]
|
||||
)
|
||||
|
||||
py_test(
|
||||
name = "test_minimal",
|
||||
size = "small",
|
||||
srcs = ["tests/test_session.py"],
|
||||
tags = ["team:ml", "exclusive"],
|
||||
srcs = ["tests/test_minimal.py"],
|
||||
tags = ["team:ml", "exclusive", "minimal"],
|
||||
deps = [":train_lib"]
|
||||
)
|
||||
|
||||
|
@ -160,6 +192,22 @@ py_test(
|
|||
deps = [":train_lib"]
|
||||
)
|
||||
|
||||
py_test(
|
||||
name = "test_session",
|
||||
size = "small",
|
||||
srcs = ["tests/test_session.py"],
|
||||
tags = ["team:ml", "exclusive"],
|
||||
deps = [":train_lib"]
|
||||
)
|
||||
|
||||
py_test(
|
||||
name = "test_sklearn_trainer",
|
||||
size = "medium",
|
||||
srcs = ["tests/test_sklearn_trainer.py"],
|
||||
tags = ["team:ml", "exclusive", "ray_air"],
|
||||
deps = [":train_lib"]
|
||||
)
|
||||
|
||||
py_test(
|
||||
name = "test_trainer",
|
||||
size = "large",
|
||||
|
@ -168,6 +216,22 @@ py_test(
|
|||
deps = [":train_lib"]
|
||||
)
|
||||
|
||||
py_test(
|
||||
name = "test_tensorflow_trainer",
|
||||
size = "medium",
|
||||
srcs = ["tests/test_tensorflow_trainer.py"],
|
||||
tags = ["team:ml", "exclusive", "ray_air"],
|
||||
deps = [":train_lib"]
|
||||
)
|
||||
|
||||
py_test(
|
||||
name = "test_torch_trainer",
|
||||
size = "medium",
|
||||
srcs = ["tests/test_torch_trainer.py"],
|
||||
tags = ["team:ml", "exclusive", "ray_air"],
|
||||
deps = [":train_lib"]
|
||||
)
|
||||
|
||||
py_test(
|
||||
name = "test_tune",
|
||||
size = "medium",
|
||||
|
@ -184,7 +248,6 @@ py_test(
|
|||
deps = [":train_lib"]
|
||||
)
|
||||
|
||||
|
||||
py_test(
|
||||
name = "test_worker_group",
|
||||
size = "medium",
|
||||
|
@ -193,7 +256,13 @@ py_test(
|
|||
deps = [":train_lib"]
|
||||
)
|
||||
|
||||
|
||||
py_test(
|
||||
name = "test_xgboost_trainer",
|
||||
size = "medium",
|
||||
srcs = ["tests/test_xgboost_trainer.py"],
|
||||
tags = ["team:ml", "exclusive", "ray_air"],
|
||||
deps = [":train_lib"]
|
||||
)
|
||||
|
||||
# This is a dummy test dependency that causes the above tests to be
|
||||
# re-run if any of these files changes.
|
||||
|
|
|
@ -12,6 +12,9 @@ from ray.train.train_loop_utils import (
|
|||
from ray.train.trainer import Trainer, TrainingIterator
|
||||
from ray.util.ml_utils.checkpoint_manager import CheckpointStrategy
|
||||
|
||||
from ray.train.base_trainer import BaseTrainer
|
||||
from ray.train.constants import TRAIN_DATASET_KEY
|
||||
|
||||
from ray._private.usage import usage_lib
|
||||
|
||||
usage_lib.record_library_usage("train")
|
||||
|
@ -29,4 +32,6 @@ __all__ = [
|
|||
"Trainer",
|
||||
"world_rank",
|
||||
"world_size",
|
||||
"BaseTrainer",
|
||||
"TRAIN_DATASET_KEY",
|
||||
]
|
||||
|
|
|
@ -3,8 +3,11 @@ from typing import Optional, Union, Dict, Callable, List, TYPE_CHECKING
|
|||
|
||||
from ray.actor import ActorHandle
|
||||
|
||||
from ray.air.config import DatasetConfig
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from ray.data import Dataset, DatasetPipeline
|
||||
from ray.air.preprocessor import Preprocessor
|
||||
|
||||
RayDataset = Union["Dataset", "DatasetPipeline"]
|
||||
|
||||
|
@ -91,3 +94,117 @@ class RayDatasetSpec:
|
|||
f"the number of training workers: {len(training_worker_handles)}"
|
||||
)
|
||||
return splits
|
||||
|
||||
|
||||
class DataParallelIngestSpec:
|
||||
"""Implements the execution of DatasetConfig preprocessing and ingest."""
|
||||
|
||||
def __init__(self, dataset_config: Dict[str, DatasetConfig]):
|
||||
"""Construct an ingest spec.
|
||||
|
||||
Args:
|
||||
dataset_config: The merged default + user config dict for the trainer
|
||||
with all defaults filled in.
|
||||
"""
|
||||
self.dataset_config = dataset_config
|
||||
self.preprocessed_datasets: Optional[Dict[str, "Dataset"]] = None
|
||||
self.preprocessor: Optional["Preprocessor"] = None
|
||||
|
||||
def preprocess_datasets(
|
||||
self, prep: "Preprocessor", datasets: Dict[str, "Dataset"]
|
||||
) -> Dict[str, "Dataset"]:
|
||||
"""Preprocess the given datasets.
|
||||
|
||||
This will be called prior to `get_dataset_shards()`.
|
||||
|
||||
Args:
|
||||
prep: The preprocessor to fit, if needed.
|
||||
dataset: The datasets to fit and transform.
|
||||
|
||||
Returns:
|
||||
Dict of transformed datasets.
|
||||
"""
|
||||
if prep:
|
||||
ds_to_fit = None
|
||||
for k, conf in self.dataset_config.items():
|
||||
if k not in datasets:
|
||||
assert not conf.required, "Missing dataset post-validation"
|
||||
continue
|
||||
if conf.fit:
|
||||
ds_to_fit = datasets[k]
|
||||
if ds_to_fit:
|
||||
prep.fit(ds_to_fit)
|
||||
new_datasets = {}
|
||||
|
||||
for key, dataset in datasets.items():
|
||||
conf = self._config(key)
|
||||
if conf.transform:
|
||||
if conf.use_stream_api and conf.stream_window_size > 0:
|
||||
# In windowed mode, preprocessor is applied in streaming way.
|
||||
new_datasets[key] = dataset
|
||||
else:
|
||||
# Window size of infinity is treated same as bulk mode.
|
||||
new_datasets[key] = prep.transform(dataset)
|
||||
else:
|
||||
new_datasets[key] = dataset
|
||||
else:
|
||||
new_datasets = datasets
|
||||
self.preprocessed_datasets = new_datasets
|
||||
self.preprocessor = prep
|
||||
return new_datasets
|
||||
|
||||
def get_dataset_shards(
|
||||
self, training_worker_handles: List[ActorHandle]
|
||||
) -> List[Dict[str, Union["Dataset", "DatasetPipeline"]]]:
|
||||
"""Get the shards to pass to training workers.
|
||||
|
||||
Note: this has to match the signature of DatasetSpec in legacy train.
|
||||
|
||||
Args:
|
||||
training_worker_handles: Actor handles of the workers, which can be used
|
||||
for locality-aware splitting.
|
||||
|
||||
Returns:
|
||||
List of dataset shard dicts, one for each training worker.
|
||||
"""
|
||||
dataset_dict_splits = [{} for _ in range(len(training_worker_handles))]
|
||||
|
||||
for key, dataset in self.preprocessed_datasets.items():
|
||||
config = self._config(key)
|
||||
|
||||
if config.use_stream_api:
|
||||
if config.stream_window_size > 0:
|
||||
dataset = dataset.window(
|
||||
bytes_per_window=config.stream_window_size
|
||||
).repeat()
|
||||
# In windowed mode, we re-apply the preprocessor on each iteration.
|
||||
if self.preprocessor:
|
||||
prep = self.preprocessor.transform_batch
|
||||
dataset = dataset.map_batches(prep, batch_format="pandas")
|
||||
else:
|
||||
# If the window size is infinity, the preprocessor is cached and
|
||||
# we don't need to re-apply it each time.
|
||||
dataset = dataset.repeat()
|
||||
|
||||
if config.global_shuffle:
|
||||
dataset = dataset.random_shuffle_each_window()
|
||||
|
||||
if config.split:
|
||||
dataset_splits = dataset.split(
|
||||
len(training_worker_handles),
|
||||
equal=True,
|
||||
locality_hints=training_worker_handles,
|
||||
)
|
||||
else:
|
||||
dataset_splits = [dataset] * len(training_worker_handles)
|
||||
|
||||
for i in range(len(dataset_splits)):
|
||||
dataset_dict_splits[i][key] = dataset_splits[i]
|
||||
|
||||
return dataset_dict_splits
|
||||
|
||||
def _config(self, key: str) -> "DatasetConfig":
|
||||
"""Get the dataset config for the given dataset name."""
|
||||
if key in self.dataset_config:
|
||||
return self.dataset_config[key]
|
||||
return self.dataset_config["*"]
|
||||
|
|
|
@ -6,13 +6,12 @@ from typing import TYPE_CHECKING, Any, Callable, Dict, List, Optional, Type, Uni
|
|||
import ray
|
||||
from ray.util import PublicAPI
|
||||
from ray.air.checkpoint import Checkpoint
|
||||
from ray.air.constants import TRAIN_DATASET_KEY
|
||||
from ray.train.constants import TRAIN_DATASET_KEY
|
||||
from ray.air.config import (
|
||||
RunConfig,
|
||||
ScalingConfig,
|
||||
ScalingConfigDataClass,
|
||||
)
|
||||
from ray.air.preprocessor import Preprocessor
|
||||
from ray.air.result import Result
|
||||
from ray.air._internal.config import (
|
||||
ensure_only_allowed_dataclass_keys_updated,
|
||||
|
@ -26,6 +25,7 @@ from ray.util.ml_utils.dict import merge_dicts
|
|||
|
||||
if TYPE_CHECKING:
|
||||
from ray.data import Dataset
|
||||
from ray.air.preprocessor import Preprocessor
|
||||
|
||||
# A type representing either a ray.data.Dataset or a function that returns a
|
||||
# ray.data.Dataset and accepts no arguments.
|
||||
|
@ -43,10 +43,10 @@ class TrainingFailedError(RuntimeError):
|
|||
|
||||
|
||||
@DeveloperAPI
|
||||
class Trainer(abc.ABC):
|
||||
class BaseTrainer(abc.ABC):
|
||||
"""Defines interface for distributed training on Ray.
|
||||
|
||||
Note: The base ``Trainer`` class cannot be instantiated directly. Only
|
||||
Note: The base ``BaseTrainer`` class cannot be instantiated directly. Only
|
||||
one of its subclasses can be used.
|
||||
|
||||
How does a trainer work?
|
||||
|
@ -68,18 +68,18 @@ class Trainer(abc.ABC):
|
|||
|
||||
**How do I create a new Trainer?**
|
||||
|
||||
Subclass ``ray.train.Trainer``, and override the ``training_loop``
|
||||
Subclass ``ray.train.BaseTrainer``, and override the ``training_loop``
|
||||
method, and optionally ``setup``.
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
import torch
|
||||
|
||||
from ray.air.train import Trainer
|
||||
from ray.train import BaseTrainer
|
||||
from ray import tune
|
||||
|
||||
|
||||
class MyPytorchTrainer(Trainer):
|
||||
class MyPytorchTrainer(BaseTrainer):
|
||||
def setup(self):
|
||||
self.model = torch.nn.Linear(1, 1)
|
||||
self.optimizer = torch.optim.SGD(
|
||||
|
@ -148,7 +148,7 @@ class Trainer(abc.ABC):
|
|||
scaling_config: Optional[ScalingConfig] = None,
|
||||
run_config: Optional[RunConfig] = None,
|
||||
datasets: Optional[Dict[str, GenDataset]] = None,
|
||||
preprocessor: Optional[Preprocessor] = None,
|
||||
preprocessor: Optional["Preprocessor"] = None,
|
||||
resume_from_checkpoint: Optional[Checkpoint] = None,
|
||||
):
|
||||
|
||||
|
@ -162,7 +162,7 @@ class Trainer(abc.ABC):
|
|||
|
||||
def __new__(cls, *args, **kwargs):
|
||||
"""Store the init args as attributes so this can be merged with Tune hparams."""
|
||||
trainer = super(Trainer, cls).__new__(cls)
|
||||
trainer = super(BaseTrainer, cls).__new__(cls)
|
||||
parameters = inspect.signature(cls.__init__).parameters
|
||||
parameters = list(parameters.keys())
|
||||
# Remove self.
|
||||
|
@ -301,9 +301,9 @@ class Trainer(abc.ABC):
|
|||
Example:
|
||||
.. code-block: python
|
||||
|
||||
from ray.air.trainer import Trainer
|
||||
from ray.train.trainer import BaseTrainer
|
||||
|
||||
class MyTrainer(Trainer):
|
||||
class MyTrainer(BaseTrainer):
|
||||
def training_loop(self):
|
||||
for epoch_idx in range(5):
|
||||
...
|
|
@ -6,6 +6,14 @@ try:
|
|||
except ImportError:
|
||||
TUNE_INSTALLED = False
|
||||
|
||||
from ray.air.constants import ( # noqa: F401
|
||||
EVALUATION_DATASET_KEY,
|
||||
MODEL_KEY,
|
||||
PREPROCESSOR_KEY,
|
||||
TRAIN_DATASET_KEY,
|
||||
WILDCARD_KEY,
|
||||
)
|
||||
|
||||
# Autofilled train.report() metrics. Keys should be consistent with Tune.
|
||||
TIMESTAMP = "_timestamp"
|
||||
TIME_THIS_ITER_S = "_time_this_iter_s"
|
||||
|
|
|
@ -14,17 +14,16 @@ from typing import (
|
|||
|
||||
import ray
|
||||
from ray import tune
|
||||
from ray.air.constants import (
|
||||
MODEL_KEY,
|
||||
PREPROCESSOR_KEY,
|
||||
from ray.air.constants import MODEL_KEY, PREPROCESSOR_KEY
|
||||
from ray.train.constants import (
|
||||
TRAIN_DATASET_KEY,
|
||||
WILDCARD_KEY,
|
||||
)
|
||||
from ray.air.trainer import Trainer
|
||||
from ray.train.trainer import BaseTrainer
|
||||
from ray.air.config import ScalingConfig, RunConfig, DatasetConfig
|
||||
from ray.air.trainer import GenDataset
|
||||
from ray.train.trainer import GenDataset
|
||||
from ray.air.checkpoint import Checkpoint
|
||||
from ray.air.train.data_parallel_ingest import _DataParallelIngestSpec
|
||||
from ray.train._internal.dataset_spec import DataParallelIngestSpec
|
||||
from ray.train import BackendConfig, TrainingIterator
|
||||
from ray.train._internal.backend_executor import BackendExecutor
|
||||
from ray.train._internal.checkpoint import TuneCheckpointManager
|
||||
|
@ -59,7 +58,7 @@ class _DataParallelCheckpointManager(TuneCheckpointManager):
|
|||
|
||||
|
||||
@DeveloperAPI
|
||||
class DataParallelTrainer(Trainer):
|
||||
class DataParallelTrainer(BaseTrainer):
|
||||
"""A Trainer for data parallel training.
|
||||
|
||||
You should subclass this Trainer if your Trainer follows SPMD (single program,
|
||||
|
@ -162,7 +161,7 @@ class DataParallelTrainer(Trainer):
|
|||
|
||||
.. code-block:: python
|
||||
|
||||
from ray.air.train.data_parallel_trainer import DataParallelTrainer
|
||||
from ray.train.data_parallel_trainer import DataParallelTrainer
|
||||
|
||||
class MyDataParallelTrainer(DataParallelTrainer):
|
||||
def __init__(self, *args, **kwargs):
|
||||
|
@ -227,7 +226,7 @@ class DataParallelTrainer(Trainer):
|
|||
TuneCheckpointManager
|
||||
] = _DataParallelCheckpointManager
|
||||
|
||||
_scaling_config_allowed_keys = Trainer._scaling_config_allowed_keys + [
|
||||
_scaling_config_allowed_keys = BaseTrainer._scaling_config_allowed_keys + [
|
||||
"num_workers",
|
||||
"num_cpus_per_worker",
|
||||
"num_gpus_per_worker",
|
||||
|
@ -267,7 +266,7 @@ class DataParallelTrainer(Trainer):
|
|||
self._dataset_config = DatasetConfig.validated(
|
||||
DatasetConfig.merge(self._dataset_config, dataset_config), datasets
|
||||
)
|
||||
self._ingest_spec = _DataParallelIngestSpec(
|
||||
self._ingest_spec = DataParallelIngestSpec(
|
||||
dataset_config=self._dataset_config,
|
||||
)
|
||||
|
|
@ -1,15 +1,15 @@
|
|||
from typing import TYPE_CHECKING, Dict, Tuple, Type, Any, Optional
|
||||
import warnings
|
||||
|
||||
from ray.air.trainer import GenDataset
|
||||
from ray.train.trainer import GenDataset
|
||||
from ray.air.config import ScalingConfig, RunConfig, ScalingConfigDataClass
|
||||
from ray.air._internal.checkpointing import save_preprocessor_to_dir
|
||||
from ray.tune.utils.trainable import TrainableUtil
|
||||
from ray.util.annotations import DeveloperAPI
|
||||
from ray.air.trainer import Trainer
|
||||
from ray.train.trainer import BaseTrainer
|
||||
from ray.air.checkpoint import Checkpoint
|
||||
from ray.tune import Trainable
|
||||
from ray.air.constants import MODEL_KEY, TRAIN_DATASET_KEY
|
||||
from ray.train.constants import MODEL_KEY, TRAIN_DATASET_KEY
|
||||
|
||||
if TYPE_CHECKING:
|
||||
import xgboost_ray
|
||||
|
@ -40,7 +40,7 @@ def _convert_scaling_config_to_ray_params(
|
|||
|
||||
|
||||
@DeveloperAPI
|
||||
class GBDTTrainer(Trainer):
|
||||
class GBDTTrainer(BaseTrainer):
|
||||
"""Common logic for gradient-boosting decision tree (GBDT) frameworks
|
||||
like XGBoost-Ray and LightGBM-Ray.
|
||||
|
||||
|
@ -65,7 +65,7 @@ class GBDTTrainer(Trainer):
|
|||
**train_kwargs: Additional kwargs passed to framework ``train()`` function.
|
||||
"""
|
||||
|
||||
_scaling_config_allowed_keys = Trainer._scaling_config_allowed_keys + [
|
||||
_scaling_config_allowed_keys = BaseTrainer._scaling_config_allowed_keys + [
|
||||
"num_workers",
|
||||
"num_cpus_per_worker",
|
||||
"num_gpus_per_worker",
|
|
@ -7,6 +7,7 @@ except ModuleNotFoundError:
|
|||
"run 'pip install 'horovod[tensorflow]''."
|
||||
)
|
||||
|
||||
from ray.train.horovod.horovod_trainer import HorovodTrainer
|
||||
from ray.train.horovod.config import HorovodConfig
|
||||
|
||||
__all__ = ["HorovodConfig"]
|
||||
__all__ = ["HorovodConfig", "HorovodTrainer"]
|
||||
|
|
|
@ -1,12 +1,12 @@
|
|||
from typing import Dict, Callable, Optional, Union, TYPE_CHECKING
|
||||
|
||||
from ray.air.config import ScalingConfig, RunConfig, DatasetConfig
|
||||
from ray.air.trainer import GenDataset
|
||||
from ray.train.trainer import GenDataset
|
||||
from ray.air.checkpoint import Checkpoint
|
||||
|
||||
|
||||
from ray.air.train.data_parallel_trainer import DataParallelTrainer
|
||||
from ray.train.horovod import HorovodConfig
|
||||
from ray.train.data_parallel_trainer import DataParallelTrainer
|
||||
from ray.train.horovod.config import HorovodConfig
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from ray.air.preprocessor import Preprocessor
|
||||
|
@ -83,7 +83,7 @@ class HorovodTrainer(DataParallelTrainer):
|
|||
import horovod.torch as hvd
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
from ray.air.train.integrations.horovod import HorovodTrainer
|
||||
from ray.train.horovod import HorovodTrainer
|
||||
|
||||
input_size = 1
|
||||
layer_size = 15
|
|
@ -1,4 +1,4 @@
|
|||
from ray.air.train.integrations.huggingface.huggingface_trainer import (
|
||||
from ray.train.huggingface.huggingface_trainer import (
|
||||
HuggingFaceTrainer,
|
||||
load_checkpoint,
|
||||
)
|
|
@ -20,15 +20,15 @@ from ray import train
|
|||
from ray.util import PublicAPI, get_node_ip_address
|
||||
from ray.air.checkpoint import Checkpoint
|
||||
from ray.air.config import RunConfig, ScalingConfig, DatasetConfig
|
||||
from ray.air.constants import (
|
||||
from ray.train.constants import (
|
||||
EVALUATION_DATASET_KEY,
|
||||
TRAIN_DATASET_KEY,
|
||||
PREPROCESSOR_KEY,
|
||||
)
|
||||
from ray.air.train.integrations.torch import TorchTrainer
|
||||
from ray.air.trainer import GenDataset
|
||||
from ray.air.train.data_parallel_trainer import _DataParallelCheckpointManager
|
||||
from ray.air.train.integrations.huggingface.huggingface_utils import (
|
||||
from ray.train.torch import TorchTrainer
|
||||
from ray.train.trainer import GenDataset
|
||||
from ray.train.data_parallel_trainer import _DataParallelCheckpointManager
|
||||
from ray.train.huggingface.huggingface_utils import (
|
||||
CHECKPOINT_PATH_ON_NODE_KEY,
|
||||
NODE_IP_KEY,
|
||||
process_datasets,
|
||||
|
@ -157,7 +157,7 @@ class HuggingFaceTrainer(TorchTrainer):
|
|||
from transformers import AutoConfig, AutoModelForCausalLM, AutoTokenizer
|
||||
|
||||
import ray
|
||||
from ray.air.train.integrations.huggingface import HuggingFaceTrainer
|
||||
from ray.train.huggingface import HuggingFaceTrainer
|
||||
|
||||
model_checkpoint = "gpt2"
|
||||
tokenizer_checkpoint = "sgugger/gpt2-like-tokenizer"
|
|
@ -1,4 +1,4 @@
|
|||
from ray.air.train.integrations.lightgbm.lightgbm_trainer import (
|
||||
from ray.train.lightgbm.lightgbm_trainer import (
|
||||
LightGBMTrainer,
|
||||
load_checkpoint,
|
||||
)
|
|
@ -2,10 +2,10 @@ from typing import Dict, Any, Optional, Tuple, TYPE_CHECKING
|
|||
import os
|
||||
|
||||
from ray.air.checkpoint import Checkpoint
|
||||
from ray.air.train.gbdt_trainer import GBDTTrainer
|
||||
from ray.train.gbdt_trainer import GBDTTrainer
|
||||
from ray.air._internal.checkpointing import load_preprocessor_from_dir
|
||||
from ray.util.annotations import PublicAPI
|
||||
from ray.air.constants import MODEL_KEY
|
||||
from ray.train.constants import MODEL_KEY
|
||||
|
||||
import lightgbm
|
||||
import lightgbm_ray
|
||||
|
@ -31,7 +31,7 @@ class LightGBMTrainer(GBDTTrainer):
|
|||
|
||||
import ray
|
||||
|
||||
from ray.air.train.integrations.lightgbm import LightGBMTrainer
|
||||
from ray.train.lightgbm import LightGBMTrainer
|
||||
|
||||
train_dataset = ray.data.from_items(
|
||||
[{"x": x, "y": x + 1} for x in range(32)])
|
3
python/ray/train/rl/__init__.py
Normal file
3
python/ray/train/rl/__init__.py
Normal file
|
@ -0,0 +1,3 @@
|
|||
from ray.train.rl.rl_trainer import RLTrainer, load_checkpoint
|
||||
|
||||
__all__ = ["RLTrainer", "load_checkpoint"]
|
|
@ -5,7 +5,7 @@ from typing import Optional, Dict, Tuple, Type, Union, Callable, Any, TYPE_CHECK
|
|||
import ray.cloudpickle as cpickle
|
||||
from ray.air.checkpoint import Checkpoint
|
||||
from ray.air.config import ScalingConfig, RunConfig
|
||||
from ray.air.trainer import Trainer, GenDataset
|
||||
from ray.train.trainer import BaseTrainer, GenDataset
|
||||
from ray.air._internal.checkpointing import (
|
||||
load_preprocessor_from_dir,
|
||||
save_preprocessor_to_dir,
|
||||
|
@ -28,7 +28,7 @@ RL_CONFIG_FILE = "config.pkl"
|
|||
|
||||
|
||||
@PublicAPI(stability="alpha")
|
||||
class RLTrainer(Trainer):
|
||||
class RLTrainer(BaseTrainer):
|
||||
"""Reinforcement learning trainer.
|
||||
|
||||
This trainer provides an interface to RLlib trainables.
|
||||
|
@ -58,7 +58,7 @@ class RLTrainer(Trainer):
|
|||
.. code-block:: python
|
||||
|
||||
from ray.air.config import RunConfig
|
||||
from ray.air.train.integrations.rl import RLTrainer
|
||||
from ray.train.rl import RLTrainer
|
||||
|
||||
trainer = RLTrainer(
|
||||
run_config=RunConfig(stop={"training_iteration": 5}),
|
||||
|
@ -85,7 +85,7 @@ class RLTrainer(Trainer):
|
|||
|
||||
import ray
|
||||
from ray.air.config import RunConfig
|
||||
from ray.air.train.integrations.rl import RLTrainer
|
||||
from ray.train.rl import RLTrainer
|
||||
from ray.rllib.agents.marwil.bc import BCTrainer
|
||||
|
||||
dataset = ray.data.read_json(
|
|
@ -1,4 +1,4 @@
|
|||
from ray.air.train.integrations.sklearn.sklearn_trainer import (
|
||||
from ray.train.sklearn.sklearn_trainer import (
|
||||
SklearnTrainer,
|
||||
load_checkpoint,
|
||||
)
|
|
@ -14,8 +14,8 @@ from ray import tune
|
|||
import ray.cloudpickle as cpickle
|
||||
from ray.air.checkpoint import Checkpoint
|
||||
from ray.air.config import RunConfig, ScalingConfig
|
||||
from ray.air.constants import MODEL_KEY, TRAIN_DATASET_KEY
|
||||
from ray.air.trainer import GenDataset, Trainer
|
||||
from ray.train.constants import MODEL_KEY, TRAIN_DATASET_KEY
|
||||
from ray.train.trainer import GenDataset, BaseTrainer
|
||||
from ray.air._internal.checkpointing import (
|
||||
load_preprocessor_from_dir,
|
||||
save_preprocessor_to_dir,
|
||||
|
@ -47,7 +47,7 @@ CVType = Union[int, Iterable, BaseCrossValidator]
|
|||
|
||||
|
||||
@PublicAPI(stability="alpha")
|
||||
class SklearnTrainer(Trainer):
|
||||
class SklearnTrainer(BaseTrainer):
|
||||
"""A Trainer for scikit-learn estimator training.
|
||||
|
||||
This Trainer runs the ``fit`` method of the given estimator in a
|
||||
|
@ -69,7 +69,7 @@ class SklearnTrainer(Trainer):
|
|||
|
||||
import ray
|
||||
|
||||
from ray.air.train.integrations.sklearn import SklearnTrainer
|
||||
from ray.train.sklearn import SklearnTrainer
|
||||
from sklearn.ensemble import RandomForestRegressor
|
||||
|
||||
train_dataset = ray.data.from_items(
|
|
@ -8,5 +8,14 @@ except ModuleNotFoundError:
|
|||
|
||||
from ray.train.tensorflow.config import TensorflowConfig
|
||||
from ray.train.tensorflow.train_loop_utils import prepare_dataset_shard
|
||||
from ray.train.tensorflow.tensorflow_trainer import (
|
||||
TensorflowTrainer,
|
||||
load_checkpoint,
|
||||
)
|
||||
|
||||
__all__ = ["TensorflowConfig", "prepare_dataset_shard"]
|
||||
__all__ = [
|
||||
"TensorflowConfig",
|
||||
"prepare_dataset_shard",
|
||||
"TensorflowTrainer",
|
||||
"load_checkpoint",
|
||||
]
|
||||
|
|
|
@ -1,9 +1,9 @@
|
|||
from typing import Callable, Optional, Dict, Tuple, Type, Union, TYPE_CHECKING
|
||||
import tensorflow as tf
|
||||
|
||||
from ray.train.tensorflow import TensorflowConfig
|
||||
from ray.air.trainer import GenDataset
|
||||
from ray.air.train.data_parallel_trainer import DataParallelTrainer, _load_checkpoint
|
||||
from ray.train.tensorflow.config import TensorflowConfig
|
||||
from ray.train.trainer import GenDataset
|
||||
from ray.train.data_parallel_trainer import DataParallelTrainer, _load_checkpoint
|
||||
from ray.air.config import ScalingConfig, RunConfig, DatasetConfig
|
||||
from ray.air.checkpoint import Checkpoint
|
||||
from ray.util import PublicAPI
|
||||
|
@ -95,7 +95,7 @@ class TensorflowTrainer(DataParallelTrainer):
|
|||
from ray import train
|
||||
from ray.train.tensorflow import prepare_dataset_shard
|
||||
|
||||
from ray.air.train.integrations.tensorflow import TensorflowTrainer
|
||||
from ray.train.tensorflow import TensorflowTrainer
|
||||
|
||||
input_size = 1
|
||||
|
7
python/ray/train/tests/_huggingface_data.py
Normal file
7
python/ray/train/tests/_huggingface_data.py
Normal file
File diff suppressed because one or more lines are too long
|
@ -4,7 +4,7 @@ import ray
|
|||
from ray import tune
|
||||
|
||||
from ray.air.preprocessor import Preprocessor
|
||||
from ray.air.trainer import Trainer
|
||||
from ray.train.trainer import BaseTrainer
|
||||
from ray.util.placement_group import get_current_placement_group
|
||||
|
||||
|
||||
|
@ -27,7 +27,7 @@ class DummyPreprocessor(Preprocessor):
|
|||
return ds.map(lambda x: x + 1)
|
||||
|
||||
|
||||
class DummyTrainer(Trainer):
|
||||
class DummyTrainer(BaseTrainer):
|
||||
_scaling_config_allowed_keys = [
|
||||
"num_workers",
|
||||
"num_cpus_per_worker",
|
|
@ -3,9 +3,9 @@ import pytest
|
|||
import ray
|
||||
from ray import train, tune
|
||||
from ray.air.checkpoint import Checkpoint
|
||||
from ray.air.constants import PREPROCESSOR_KEY
|
||||
from ray.train.constants import PREPROCESSOR_KEY
|
||||
|
||||
from ray.air.train.data_parallel_trainer import DataParallelTrainer
|
||||
from ray.train.data_parallel_trainer import DataParallelTrainer
|
||||
from ray.air.preprocessor import Preprocessor
|
||||
from ray.tune.tune_config import TuneConfig
|
||||
from ray.tune.tuner import Tuner
|
|
@ -11,7 +11,7 @@ from ray.air.examples.horovod.horovod_pytorch_example import (
|
|||
Net,
|
||||
)
|
||||
from ray.air.predictors.integrations.torch import TorchPredictor
|
||||
from ray.air.train.integrations.horovod import HorovodTrainer
|
||||
from ray.train.horovod import HorovodTrainer
|
||||
|
||||
|
||||
@pytest.fixture
|
|
@ -1,7 +1,7 @@
|
|||
import pandas as pd
|
||||
import pytest
|
||||
from unittest.mock import patch
|
||||
from ray.air.train.integrations.huggingface.huggingface_utils import TrainReportCallback
|
||||
from ray.train.huggingface.huggingface_utils import TrainReportCallback
|
||||
|
||||
from transformers import (
|
||||
AutoConfig,
|
||||
|
@ -13,11 +13,11 @@ from transformers import (
|
|||
from transformers.trainer_callback import TrainerState
|
||||
|
||||
import ray.data
|
||||
from ray.air.train.integrations.huggingface import HuggingFaceTrainer
|
||||
from ray.train.huggingface import HuggingFaceTrainer
|
||||
from ray.air.predictors.integrations.huggingface import HuggingFacePredictor
|
||||
from ray.air.batch_predictor import BatchPredictor
|
||||
|
||||
from ray.air.tests._huggingface_data import train_data, validation_data
|
||||
from ray.train.tests._huggingface_data import train_data, validation_data
|
||||
|
||||
# 16 first rows of tokenized wikitext-2-raw-v1 training & validation
|
||||
train_df = pd.read_json(train_data)
|
|
@ -7,9 +7,9 @@ import lightgbm as lgbm
|
|||
import ray
|
||||
from ray import tune
|
||||
from ray.air.checkpoint import Checkpoint
|
||||
from ray.air.constants import TRAIN_DATASET_KEY
|
||||
from ray.train.constants import TRAIN_DATASET_KEY
|
||||
|
||||
from ray.air.train.integrations.lightgbm import LightGBMTrainer, load_checkpoint
|
||||
from ray.train.lightgbm import LightGBMTrainer, load_checkpoint
|
||||
from ray.air.preprocessor import Preprocessor
|
||||
|
||||
from sklearn.datasets import load_breast_cancer
|
|
@ -87,9 +87,6 @@ def test_failure():
|
|||
with pytest.raises(ModuleNotFoundError):
|
||||
import horovod # noqa: F401
|
||||
|
||||
with pytest.raises(ModuleNotFoundError):
|
||||
from ray import tune # noqa: F401
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
import pytest
|
||||
|
|
|
@ -4,9 +4,9 @@ import pandas as pd
|
|||
import ray
|
||||
from ray import tune
|
||||
from ray.air.checkpoint import Checkpoint
|
||||
from ray.air.constants import TRAIN_DATASET_KEY
|
||||
from ray.train.constants import TRAIN_DATASET_KEY
|
||||
|
||||
from ray.air.train.integrations.sklearn import SklearnTrainer, load_checkpoint
|
||||
from ray.train.sklearn import SklearnTrainer, load_checkpoint
|
||||
from ray.air.preprocessor import Preprocessor
|
||||
|
||||
from sklearn.datasets import load_breast_cancer
|
|
@ -3,13 +3,13 @@ import numpy as np
|
|||
|
||||
import ray
|
||||
from ray import train
|
||||
from ray.air.train.integrations.tensorflow import TensorflowTrainer
|
||||
from ray.train.tensorflow import TensorflowTrainer
|
||||
from ray.air.examples.tf.tensorflow_linear_dataset_example import (
|
||||
train_func as tensorflow_linear_train_func,
|
||||
get_dataset,
|
||||
)
|
||||
from ray.air.predictors.integrations.tensorflow import TensorflowPredictor
|
||||
from ray.air.constants import MODEL_KEY, TRAIN_DATASET_KEY
|
||||
from ray.train.constants import MODEL_KEY, TRAIN_DATASET_KEY
|
||||
|
||||
|
||||
@pytest.fixture
|
|
@ -3,7 +3,7 @@ import torch
|
|||
|
||||
import ray
|
||||
from ray.air.predictors.integrations.torch import TorchPredictor
|
||||
from ray.air.train.integrations.torch import TorchTrainer
|
||||
from ray.train.torch import TorchTrainer
|
||||
from ray import train
|
||||
from ray.air.examples.pytorch.torch_linear_example import (
|
||||
train_func as linear_train_func,
|
|
@ -7,9 +7,9 @@ import xgboost as xgb
|
|||
import ray
|
||||
from ray import tune
|
||||
from ray.air.checkpoint import Checkpoint
|
||||
from ray.air.constants import TRAIN_DATASET_KEY
|
||||
from ray.train.constants import TRAIN_DATASET_KEY
|
||||
|
||||
from ray.air.train.integrations.xgboost import XGBoostTrainer, load_checkpoint
|
||||
from ray.train.xgboost import XGBoostTrainer, load_checkpoint
|
||||
from ray.air.preprocessor import Preprocessor
|
||||
|
||||
from sklearn.datasets import load_breast_cancer
|
|
@ -17,7 +17,12 @@ from ray.train.torch.train_loop_utils import (
|
|||
TorchWorkerProfiler,
|
||||
)
|
||||
|
||||
from ray.train.torch.torch_trainer import TorchTrainer, load_checkpoint
|
||||
|
||||
|
||||
__all__ = [
|
||||
"TorchTrainer",
|
||||
"load_checkpoint",
|
||||
"TorchConfig",
|
||||
"accelerate",
|
||||
"get_device",
|
||||
|
|
|
@ -1,9 +1,9 @@
|
|||
from typing import Callable, Optional, Dict, Tuple, Union, TYPE_CHECKING
|
||||
import torch
|
||||
|
||||
from ray.train.torch import TorchConfig
|
||||
from ray.air.trainer import GenDataset
|
||||
from ray.air.train.data_parallel_trainer import DataParallelTrainer, _load_checkpoint
|
||||
from ray.train.torch.config import TorchConfig
|
||||
from ray.train.trainer import GenDataset
|
||||
from ray.train.data_parallel_trainer import DataParallelTrainer, _load_checkpoint
|
||||
from ray.air.config import ScalingConfig, RunConfig, DatasetConfig
|
||||
from ray.air.checkpoint import Checkpoint
|
||||
from ray.air._internal.torch_utils import load_torch_model
|
||||
|
@ -101,7 +101,7 @@ class TorchTrainer(DataParallelTrainer):
|
|||
|
||||
import ray
|
||||
from ray import train
|
||||
from ray.air.train.integrations.torch import TorchTrainer
|
||||
from ray.train.torch import TorchTrainer
|
||||
|
||||
input_size = 1
|
||||
layer_size = 15
|
|
@ -44,6 +44,12 @@ from ray.train._internal.worker_group import WorkerGroup
|
|||
from ray.util.annotations import DeveloperAPI, Deprecated
|
||||
from ray.util.ml_utils.checkpoint_manager import CheckpointStrategy
|
||||
|
||||
from ray.train.base_trainer import ( # noqa: F401
|
||||
BaseTrainer,
|
||||
GenDataset,
|
||||
TrainingFailedError,
|
||||
)
|
||||
|
||||
if TUNE_INSTALLED:
|
||||
from ray import tune
|
||||
from ray.tune import Trainable
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
from ray.air.train.integrations.xgboost.xgboost_trainer import (
|
||||
from ray.train.xgboost.xgboost_trainer import (
|
||||
XGBoostTrainer,
|
||||
load_checkpoint,
|
||||
)
|
|
@ -2,10 +2,10 @@ import os
|
|||
from typing import Optional, Tuple, TYPE_CHECKING
|
||||
|
||||
from ray.air.checkpoint import Checkpoint
|
||||
from ray.air.train.gbdt_trainer import GBDTTrainer
|
||||
from ray.train.gbdt_trainer import GBDTTrainer
|
||||
from ray.air._internal.checkpointing import load_preprocessor_from_dir
|
||||
from ray.util.annotations import PublicAPI
|
||||
from ray.air.constants import MODEL_KEY
|
||||
from ray.train.constants import MODEL_KEY
|
||||
|
||||
import xgboost
|
||||
import xgboost_ray
|
||||
|
@ -27,7 +27,7 @@ class XGBoostTrainer(GBDTTrainer):
|
|||
|
||||
import ray
|
||||
|
||||
from ray.air.train.integrations.xgboost import XGBoostTrainer
|
||||
from ray.train.xgboost import XGBoostTrainer
|
||||
|
||||
train_dataset = ray.data.from_items(
|
||||
[{"x": x, "y": x + 1} for x in range(32)])
|
|
@ -4,7 +4,7 @@ from typing import Any, Callable, Dict, Optional, Type, Union
|
|||
|
||||
import ray.cloudpickle as pickle
|
||||
from ray.air.config import RunConfig
|
||||
from ray.air.trainer import Trainer
|
||||
from ray.train.trainer import BaseTrainer
|
||||
from ray.tune import Experiment, TuneError, ExperimentAnalysis
|
||||
from ray.tune.impl.utils import execute_dataset
|
||||
from ray.tune.result_grid import ResultGrid
|
||||
|
@ -55,7 +55,7 @@ class TunerInternal:
|
|||
str,
|
||||
Callable,
|
||||
Type[Trainable],
|
||||
Trainer,
|
||||
BaseTrainer,
|
||||
]
|
||||
] = None,
|
||||
param_space: Optional[Dict[str, Any]] = None,
|
||||
|
@ -85,7 +85,7 @@ class TunerInternal:
|
|||
|
||||
# If no run config was passed to Tuner directly, use the one from the Trainer,
|
||||
# if available
|
||||
if not run_config and isinstance(trainable, Trainer):
|
||||
if not run_config and isinstance(trainable, BaseTrainer):
|
||||
run_config = trainable.run_config
|
||||
|
||||
self._is_restored = False
|
||||
|
@ -144,7 +144,7 @@ class TunerInternal:
|
|||
|
||||
@staticmethod
|
||||
def _convert_trainable(trainable: Any) -> Type[Trainable]:
|
||||
if isinstance(trainable, Trainer):
|
||||
if isinstance(trainable, BaseTrainer):
|
||||
trainable = trainable.as_trainable()
|
||||
else:
|
||||
trainable = trainable
|
||||
|
|
|
@ -13,9 +13,9 @@ from ray.air.config import RunConfig
|
|||
from ray.air.examples.pytorch.torch_linear_example import (
|
||||
train_func as linear_train_func,
|
||||
)
|
||||
from ray.air.train.integrations.torch import TorchTrainer
|
||||
from ray.air.train.integrations.xgboost import XGBoostTrainer
|
||||
from ray.air.train import Trainer
|
||||
from ray.train.torch import TorchTrainer
|
||||
from ray.train.xgboost import XGBoostTrainer
|
||||
from ray.train import BaseTrainer
|
||||
from ray.tune import Callback, TuneError
|
||||
from ray.tune.cloud import TrialCheckpoint
|
||||
from ray.tune.result import DEFAULT_RESULTS_DIR
|
||||
|
@ -23,7 +23,7 @@ from ray.tune.tune_config import TuneConfig
|
|||
from ray.tune.tuner import Tuner
|
||||
|
||||
|
||||
class DummyTrainer(Trainer):
|
||||
class DummyTrainer(BaseTrainer):
|
||||
_scaling_config_allowed_keys = [
|
||||
"num_workers",
|
||||
"num_cpus_per_worker",
|
||||
|
|
|
@ -3,7 +3,7 @@ from typing import Any, Callable, Dict, Optional, Type, Union
|
|||
import ray
|
||||
|
||||
from ray.air.config import RunConfig
|
||||
from ray.air.trainer import Trainer
|
||||
from ray.train.trainer import BaseTrainer
|
||||
from ray.tune import TuneError
|
||||
from ray.tune.result_grid import ResultGrid
|
||||
from ray.tune.trainable import Trainable
|
||||
|
@ -47,7 +47,7 @@ class Tuner:
|
|||
from ray import tune
|
||||
from ray.data import from_pandas
|
||||
from ray.air.config import RunConfig
|
||||
from ray.air.train.integrations.xgboost import XGBoostTrainer
|
||||
from ray.train.xgboost import XGBoostTrainer
|
||||
from ray.tune.tuner import Tuner
|
||||
|
||||
def get_dataset():
|
||||
|
@ -111,7 +111,7 @@ class Tuner:
|
|||
str,
|
||||
Callable,
|
||||
Type[Trainable],
|
||||
Trainer,
|
||||
BaseTrainer,
|
||||
]
|
||||
] = None,
|
||||
param_space: Optional[Dict[str, Any]] = None,
|
||||
|
|
|
@ -256,11 +256,14 @@ if setup_spec.type == SetupType.RAY:
|
|||
"scipy",
|
||||
]
|
||||
|
||||
setup_spec.extras["train"] = setup_spec.extras["tune"]
|
||||
|
||||
# Ray AI Runtime should encompass Data, Tune, and Serve.
|
||||
setup_spec.extras["air"] = list(
|
||||
set(
|
||||
setup_spec.extras["tune"]
|
||||
+ setup_spec.extras["data"]
|
||||
+ setup_spec.extras["train"]
|
||||
+ setup_spec.extras["serve"]
|
||||
)
|
||||
)
|
||||
|
|
|
@ -3,7 +3,7 @@ import torch.nn as nn
|
|||
import numpy as np
|
||||
import torchvision
|
||||
from ray.air import RunConfig
|
||||
from ray.air.train.integrations.horovod import HorovodTrainer
|
||||
from ray.train.horovod import HorovodTrainer
|
||||
from ray.tune.tune_config import TuneConfig
|
||||
from ray.tune.tuner import Tuner
|
||||
from torch.utils.data import DataLoader
|
||||
|
|
Loading…
Add table
Reference in a new issue