ray/doc/source/ray-air/doc_code/tuner.py

# flake8: noqa
# isort: skip_file

# __basic_start__
import ray
from ray import tune
from ray.tune import Tuner
from ray.train.xgboost import XGBoostTrainer

dataset = ray.data.read_csv("s3://anonymous@air-example-data/breast_cancer.csv")

trainer = XGBoostTrainer(
    label_column="target",
    params={
        "objective": "binary:logistic",
        "eval_metric": ["logloss", "error"],
        "max_depth": 4,
    },
    datasets={"train": dataset},
)

# Create Tuner
tuner = Tuner(
    trainer,
    # Add some parameters to tune
    param_space={"params": {"max_depth": tune.choice([4, 5, 6])}},
    # Specify tuning behavior
    tune_config=tune.TuneConfig(metric="train-logloss", mode="min", num_samples=2),
)
# Run tuning job
tuner.fit()
# __basic_end__

# __xgboost_start__
import ray
from ray import tune
from ray.tune import Tuner
from ray.train.xgboost import XGBoostTrainer
from ray.air.config import ScalingConfig, RunConfig

dataset = ray.data.read_csv("s3://anonymous@air-example-data/breast_cancer.csv")

# Create an XGBoost trainer
trainer = XGBoostTrainer(
    label_column="target",
    params={
        "objective": "binary:logistic",
        "eval_metric": ["logloss", "error"],
        "max_depth": 4,
    },
    num_boost_round=10,
    datasets={"train": dataset},
)

param_space = {
    # Tune parameters directly passed into the XGBoostTrainer
    "num_boost_round": tune.randint(5, 20),
    # `params` will be merged with the `params` defined in the above XGBoostTrainer
    "params": {
        "min_child_weight": tune.uniform(0.8, 1.0),
        # Below will overwrite the XGBoostTrainer setting
        "max_depth": tune.randint(1, 5),
    },
    # Tune the number of distributed workers
    "scaling_config": ScalingConfig(num_workers=tune.grid_search([1, 2])),
}

tuner = Tuner(
    trainable=trainer,
    run_config=RunConfig(name="test_tuner"),
    param_space=param_space,
    tune_config=tune.TuneConfig(
        mode="min", metric="train-logloss", num_samples=2, max_concurrent_trials=2
    ),
)
result_grid = tuner.fit()
# __xgboost_end__

# __torch_start__
from ray import tune
from ray.tune import Tuner
from ray.air.examples.pytorch.torch_linear_example import (
    train_func as linear_train_func,
)
from ray.train.torch import TorchTrainer

trainer = TorchTrainer(
    train_loop_per_worker=linear_train_func,
    train_loop_config={"lr": 1e-2, "batch_size": 4, "epochs": 10},
    scaling_config=ScalingConfig(num_workers=1, use_gpu=False),
)

param_space = {
    # The params will be merged with the ones defined in the TorchTrainer
    "train_loop_config": {
        # This is a parameter that hasn't been set in the TorchTrainer
        "hidden_size": tune.randint(1, 4),
        # This will overwrite whatever was set when TorchTrainer was instantiated
        "batch_size": tune.choice([4, 8]),
    },
    # Tune the number of distributed workers
    "scaling_config": ScalingConfig(num_workers=tune.grid_search([1, 2])),
}

tuner = Tuner(
    trainable=trainer,
    run_config=RunConfig(name="test_tuner", local_dir="~/ray_results"),
    param_space=param_space,
    tune_config=tune.TuneConfig(
        mode="min", metric="loss", num_samples=2, max_concurrent_trials=2
    ),
)
result_grid = tuner.fit()
# __torch_end__


# __tune_preprocess_start__
from ray.data.preprocessors import StandardScaler
from ray.tune import Tuner

prep_v1 = StandardScaler(["worst radius", "worst area"])
prep_v2 = StandardScaler(["worst concavity", "worst smoothness"])
tuner = Tuner(
    trainer,
    param_space={
        "preprocessor": tune.grid_search([prep_v1, prep_v2]),
        # Your other parameters go here
    },
)
# __tune_preprocess_end__

# __tune_dataset_start__
def get_dataset():
    return ray.data.read_csv("s3://anonymous@air-example-data/breast_cancer.csv")


def get_another_dataset():
    # imagine this is a different dataset
    return ray.data.read_csv("s3://anonymous@air-example-data/breast_cancer.csv")


dataset_1 = get_dataset()
dataset_2 = get_another_dataset()

tuner = tune.Tuner(
    trainer,
    param_space={
        "datasets": {
            "train": tune.grid_search([dataset_1, dataset_2]),
        }
        # Your other parameters go here
    },
)
# __tune_dataset_end__

# __tune_parallelism_start__
from ray.tune import TuneConfig

config = TuneConfig(
    # ...
    num_samples=100,
    max_concurrent_trials=10,
)
# __tune_parallelism_end__

# __tune_optimization_start__
from ray.tune.search.bayesopt import BayesOptSearch
from ray.tune.schedulers import HyperBandScheduler
from ray.tune import TuneConfig

config = TuneConfig(
    # ...
    search_alg=BayesOptSearch(),
    scheduler=HyperBandScheduler(),
)
# __tune_optimization_end__

# __result_grid_inspection_start__
from ray.air.config import RunConfig
from ray.tune import Tuner, TuneConfig

tuner = Tuner(
    trainable=trainer,
    param_space=param_space,
    tune_config=TuneConfig(mode="min", metric="loss", num_samples=5),
)
result_grid = tuner.fit()

num_results = len(result_grid)

# Check if there have been errors
if result_grid.errors:
    print("At least one trial failed.")

# Get the best result
best_result = result_grid.get_best_result()

# And the best checkpoint
best_checkpoint = best_result.checkpoint

# And the best metrics
best_metric = best_result.metrics

# Inspect all results
for result in result_grid:
    if result.error:
        print("The trial had an error:", result.error)
        continue

    print("The trial finished successfully with the metrics:", result.metrics["loss"])
# __result_grid_inspection_end__

# __run_config_start__
from ray import air, tune
from ray.air.config import RunConfig

run_config = RunConfig(
    name="MyExperiment",
    local_dir="./your_log_directory/",
    verbose=2,
    sync_config=tune.SyncConfig(upload_dir="s3://..."),
    checkpoint_config=air.CheckpointConfig(checkpoint_frequency=2),
)
# __run_config_end__

# __tune_config_start__
from ray.tune import TuneConfig
from ray.tune.search.bayesopt import BayesOptSearch

tune_config = TuneConfig(
    metric="loss",
    mode="min",
    max_concurrent_trials=10,
    num_samples=100,
    search_alg=BayesOptSearch(),
)
# __tune_config_end__

# __tune_restore_start__
tuner = Tuner.restore("~/ray_results/test_tuner", restart_errored=True)
tuner.fit()
# __tune_restore_end__
[air] add tuner user guide (#26837) Co-authored-by: Kai Fricke <kai@anyscale.com> Co-authored-by: Richard Liaw <rliaw@berkeley.edu> 2022-08-03 09:43:42 -07:00			`# flake8: noqa`
			`# isort: skip_file`

			`# __basic_start__`
			`import ray`
			`from ray import tune`
			`from ray.tune import Tuner`
			`from ray.train.xgboost import XGBoostTrainer`

			`dataset = ray.data.read_csv("s3://anonymous@air-example-data/breast_cancer.csv")`

			`trainer = XGBoostTrainer(`
			`label_column="target",`
			`params={`
			`"objective": "binary:logistic",`
			`"eval_metric": ["logloss", "error"],`
			`"max_depth": 4,`
			`},`
			`datasets={"train": dataset},`
			`)`

			`# Create Tuner`
			`tuner = Tuner(`
			`trainer,`
			`# Add some parameters to tune`
			`param_space={"params": {"max_depth": tune.choice([4, 5, 6])}},`
			`# Specify tuning behavior`
			`tune_config=tune.TuneConfig(metric="train-logloss", mode="min", num_samples=2),`
			`)`
			`# Run tuning job`
			`tuner.fit()`
			`# __basic_end__`

			`# __xgboost_start__`
			`import ray`
			`from ray import tune`
			`from ray.tune import Tuner`
			`from ray.train.xgboost import XGBoostTrainer`
			`from ray.air.config import ScalingConfig, RunConfig`

			`dataset = ray.data.read_csv("s3://anonymous@air-example-data/breast_cancer.csv")`

			`# Create an XGBoost trainer`
			`trainer = XGBoostTrainer(`
			`label_column="target",`
			`params={`
			`"objective": "binary:logistic",`
			`"eval_metric": ["logloss", "error"],`
			`"max_depth": 4,`
			`},`
			`num_boost_round=10,`
			`datasets={"train": dataset},`
			`)`

			`param_space = {`
			`# Tune parameters directly passed into the XGBoostTrainer`
			`"num_boost_round": tune.randint(5, 20),`
			# `params` will be merged with the `params` defined in the above XGBoostTrainer
			`"params": {`
			`"min_child_weight": tune.uniform(0.8, 1.0),`
			`# Below will overwrite the XGBoostTrainer setting`
			`"max_depth": tune.randint(1, 5),`
			`},`
			`# Tune the number of distributed workers`
			`"scaling_config": ScalingConfig(num_workers=tune.grid_search([1, 2])),`
			`}`

			`tuner = Tuner(`
			`trainable=trainer,`
			`run_config=RunConfig(name="test_tuner"),`
			`param_space=param_space,`
			`tune_config=tune.TuneConfig(`
			`mode="min", metric="train-logloss", num_samples=2, max_concurrent_trials=2`
			`),`
			`)`
			`result_grid = tuner.fit()`
			`# __xgboost_end__`

			`# __torch_start__`
			`from ray import tune`
			`from ray.tune import Tuner`
			`from ray.air.examples.pytorch.torch_linear_example import (`
			`train_func as linear_train_func,`
			`)`
			`from ray.train.torch import TorchTrainer`

			`trainer = TorchTrainer(`
			`train_loop_per_worker=linear_train_func,`
			`train_loop_config={"lr": 1e-2, "batch_size": 4, "epochs": 10},`
			`scaling_config=ScalingConfig(num_workers=1, use_gpu=False),`
			`)`

			`param_space = {`
			`# The params will be merged with the ones defined in the TorchTrainer`
			`"train_loop_config": {`
			`# This is a parameter that hasn't been set in the TorchTrainer`
			`"hidden_size": tune.randint(1, 4),`
			`# This will overwrite whatever was set when TorchTrainer was instantiated`
			`"batch_size": tune.choice([4, 8]),`
			`},`
			`# Tune the number of distributed workers`
			`"scaling_config": ScalingConfig(num_workers=tune.grid_search([1, 2])),`
			`}`

			`tuner = Tuner(`
			`trainable=trainer,`
			`run_config=RunConfig(name="test_tuner", local_dir="~/ray_results"),`
			`param_space=param_space,`
			`tune_config=tune.TuneConfig(`
			`mode="min", metric="loss", num_samples=2, max_concurrent_trials=2`
			`),`
			`)`
			`result_grid = tuner.fit()`
			`# __torch_end__`


			`# __tune_preprocess_start__`
			`from ray.data.preprocessors import StandardScaler`
			`from ray.tune import Tuner`

			`prep_v1 = StandardScaler(["worst radius", "worst area"])`
			`prep_v2 = StandardScaler(["worst concavity", "worst smoothness"])`
			`tuner = Tuner(`
			`trainer,`
			`param_space={`
			`"preprocessor": tune.grid_search([prep_v1, prep_v2]),`
			`# Your other parameters go here`
			`},`
			`)`
			`# __tune_preprocess_end__`

			`# __tune_dataset_start__`
			`def get_dataset():`
			`return ray.data.read_csv("s3://anonymous@air-example-data/breast_cancer.csv")`


			`def get_another_dataset():`
			`# imagine this is a different dataset`
			`return ray.data.read_csv("s3://anonymous@air-example-data/breast_cancer.csv")`


			`dataset_1 = get_dataset()`
			`dataset_2 = get_another_dataset()`

			`tuner = tune.Tuner(`
			`trainer,`
			`param_space={`
			`"datasets": {`
			`"train": tune.grid_search([dataset_1, dataset_2]),`
			`}`
			`# Your other parameters go here`
			`},`
			`)`
			`# __tune_dataset_end__`

			`# __tune_parallelism_start__`
			`from ray.tune import TuneConfig`

			`config = TuneConfig(`
			`# ...`
			`num_samples=100,`
			`max_concurrent_trials=10,`
			`)`
			`# __tune_parallelism_end__`

			`# __tune_optimization_start__`
			`from ray.tune.search.bayesopt import BayesOptSearch`
			`from ray.tune.schedulers import HyperBandScheduler`
			`from ray.tune import TuneConfig`

			`config = TuneConfig(`
			`# ...`
			`search_alg=BayesOptSearch(),`
			`scheduler=HyperBandScheduler(),`
			`)`
			`# __tune_optimization_end__`

			`# __result_grid_inspection_start__`
			`from ray.air.config import RunConfig`
			`from ray.tune import Tuner, TuneConfig`

			`tuner = Tuner(`
			`trainable=trainer,`
			`param_space=param_space,`
			`tune_config=TuneConfig(mode="min", metric="loss", num_samples=5),`
			`)`
			`result_grid = tuner.fit()`

			`num_results = len(result_grid)`

			`# Check if there have been errors`
			`if result_grid.errors:`
			`print("At least one trial failed.")`

			`# Get the best result`
			`best_result = result_grid.get_best_result()`

			`# And the best checkpoint`
			`best_checkpoint = best_result.checkpoint`

			`# And the best metrics`
			`best_metric = best_result.metrics`

			`# Inspect all results`
			`for result in result_grid:`
			`if result.error:`
			`print("The trial had an error:", result.error)`
			`continue`

			`print("The trial finished successfully with the metrics:", result.metrics["loss"])`
			`# __result_grid_inspection_end__`

			`# __run_config_start__`
			`from ray import air, tune`
			`from ray.air.config import RunConfig`

			`run_config = RunConfig(`
			`name="MyExperiment",`
			`local_dir="./your_log_directory/",`
			`verbose=2,`
			`sync_config=tune.SyncConfig(upload_dir="s3://..."),`
			`checkpoint_config=air.CheckpointConfig(checkpoint_frequency=2),`
			`)`
			`# __run_config_end__`

			`# __tune_config_start__`
			`from ray.tune import TuneConfig`
			`from ray.tune.search.bayesopt import BayesOptSearch`

			`tune_config = TuneConfig(`
			`metric="loss",`
			`mode="min",`
			`max_concurrent_trials=10,`
			`num_samples=100,`
			`search_alg=BayesOptSearch(),`
			`)`
			`# __tune_config_end__`

			`# __tune_restore_start__`
			`tuner = Tuner.restore("~/ray_results/test_tuner", restart_errored=True)`
			`tuner.fit()`
			`# __tune_restore_end__`