ray/doc/source/ray-air/doc_code/tuner.py

Ignoring revisions in .git-blame-ignore-revs. Click here to bypass and see the normal blame view.

243 lines
6.2 KiB
Python
Raw Normal View History

# flake8: noqa
# isort: skip_file
# __basic_start__
import ray
from ray import tune
from ray.tune import Tuner
from ray.train.xgboost import XGBoostTrainer
dataset = ray.data.read_csv("s3://anonymous@air-example-data/breast_cancer.csv")
trainer = XGBoostTrainer(
label_column="target",
params={
"objective": "binary:logistic",
"eval_metric": ["logloss", "error"],
"max_depth": 4,
},
datasets={"train": dataset},
)
# Create Tuner
tuner = Tuner(
trainer,
# Add some parameters to tune
param_space={"params": {"max_depth": tune.choice([4, 5, 6])}},
# Specify tuning behavior
tune_config=tune.TuneConfig(metric="train-logloss", mode="min", num_samples=2),
)
# Run tuning job
tuner.fit()
# __basic_end__
# __xgboost_start__
import ray
from ray import tune
from ray.tune import Tuner
from ray.train.xgboost import XGBoostTrainer
from ray.air.config import ScalingConfig, RunConfig
dataset = ray.data.read_csv("s3://anonymous@air-example-data/breast_cancer.csv")
# Create an XGBoost trainer
trainer = XGBoostTrainer(
label_column="target",
params={
"objective": "binary:logistic",
"eval_metric": ["logloss", "error"],
"max_depth": 4,
},
num_boost_round=10,
datasets={"train": dataset},
)
param_space = {
# Tune parameters directly passed into the XGBoostTrainer
"num_boost_round": tune.randint(5, 20),
# `params` will be merged with the `params` defined in the above XGBoostTrainer
"params": {
"min_child_weight": tune.uniform(0.8, 1.0),
# Below will overwrite the XGBoostTrainer setting
"max_depth": tune.randint(1, 5),
},
# Tune the number of distributed workers
"scaling_config": ScalingConfig(num_workers=tune.grid_search([1, 2])),
}
tuner = Tuner(
trainable=trainer,
run_config=RunConfig(name="test_tuner"),
param_space=param_space,
tune_config=tune.TuneConfig(
mode="min", metric="train-logloss", num_samples=2, max_concurrent_trials=2
),
)
result_grid = tuner.fit()
# __xgboost_end__
# __torch_start__
from ray import tune
from ray.tune import Tuner
from ray.air.examples.pytorch.torch_linear_example import (
train_func as linear_train_func,
)
from ray.train.torch import TorchTrainer
trainer = TorchTrainer(
train_loop_per_worker=linear_train_func,
train_loop_config={"lr": 1e-2, "batch_size": 4, "epochs": 10},
scaling_config=ScalingConfig(num_workers=1, use_gpu=False),
)
param_space = {
# The params will be merged with the ones defined in the TorchTrainer
"train_loop_config": {
# This is a parameter that hasn't been set in the TorchTrainer
"hidden_size": tune.randint(1, 4),
# This will overwrite whatever was set when TorchTrainer was instantiated
"batch_size": tune.choice([4, 8]),
},
# Tune the number of distributed workers
"scaling_config": ScalingConfig(num_workers=tune.grid_search([1, 2])),
}
tuner = Tuner(
trainable=trainer,
run_config=RunConfig(name="test_tuner", local_dir="~/ray_results"),
param_space=param_space,
tune_config=tune.TuneConfig(
mode="min", metric="loss", num_samples=2, max_concurrent_trials=2
),
)
result_grid = tuner.fit()
# __torch_end__
# __tune_preprocess_start__
from ray.data.preprocessors import StandardScaler
from ray.tune import Tuner
prep_v1 = StandardScaler(["worst radius", "worst area"])
prep_v2 = StandardScaler(["worst concavity", "worst smoothness"])
tuner = Tuner(
trainer,
param_space={
"preprocessor": tune.grid_search([prep_v1, prep_v2]),
# Your other parameters go here
},
)
# __tune_preprocess_end__
# __tune_dataset_start__
def get_dataset():
return ray.data.read_csv("s3://anonymous@air-example-data/breast_cancer.csv")
def get_another_dataset():
# imagine this is a different dataset
return ray.data.read_csv("s3://anonymous@air-example-data/breast_cancer.csv")
dataset_1 = get_dataset()
dataset_2 = get_another_dataset()
tuner = tune.Tuner(
trainer,
param_space={
"datasets": {
"train": tune.grid_search([dataset_1, dataset_2]),
}
# Your other parameters go here
},
)
# __tune_dataset_end__
# __tune_parallelism_start__
from ray.tune import TuneConfig
config = TuneConfig(
# ...
num_samples=100,
max_concurrent_trials=10,
)
# __tune_parallelism_end__
# __tune_optimization_start__
from ray.tune.search.bayesopt import BayesOptSearch
from ray.tune.schedulers import HyperBandScheduler
from ray.tune import TuneConfig
config = TuneConfig(
# ...
search_alg=BayesOptSearch(),
scheduler=HyperBandScheduler(),
)
# __tune_optimization_end__
# __result_grid_inspection_start__
from ray.air.config import RunConfig
from ray.tune import Tuner, TuneConfig
tuner = Tuner(
trainable=trainer,
param_space=param_space,
tune_config=TuneConfig(mode="min", metric="loss", num_samples=5),
)
result_grid = tuner.fit()
num_results = len(result_grid)
# Check if there have been errors
if result_grid.errors:
print("At least one trial failed.")
# Get the best result
best_result = result_grid.get_best_result()
# And the best checkpoint
best_checkpoint = best_result.checkpoint
# And the best metrics
best_metric = best_result.metrics
# Inspect all results
for result in result_grid:
if result.error:
print("The trial had an error:", result.error)
continue
print("The trial finished successfully with the metrics:", result.metrics["loss"])
# __result_grid_inspection_end__
# __run_config_start__
from ray import air, tune
from ray.air.config import RunConfig
run_config = RunConfig(
name="MyExperiment",
local_dir="./your_log_directory/",
verbose=2,
sync_config=tune.SyncConfig(upload_dir="s3://..."),
checkpoint_config=air.CheckpointConfig(checkpoint_frequency=2),
)
# __run_config_end__
# __tune_config_start__
from ray.tune import TuneConfig
from ray.tune.search.bayesopt import BayesOptSearch
tune_config = TuneConfig(
metric="loss",
mode="min",
max_concurrent_trials=10,
num_samples=100,
search_alg=BayesOptSearch(),
)
# __tune_config_end__
# __tune_restore_start__
tuner = Tuner.restore("~/ray_results/test_tuner", restart_errored=True)
tuner.fit()
# __tune_restore_end__