mirror of
https://github.com/vale981/ray
synced 2025-03-10 05:16:49 -04:00
243 lines
6.2 KiB
Python
243 lines
6.2 KiB
Python
![]() |
# flake8: noqa
|
||
|
# isort: skip_file
|
||
|
|
||
|
# __basic_start__
|
||
|
import ray
|
||
|
from ray import tune
|
||
|
from ray.tune import Tuner
|
||
|
from ray.train.xgboost import XGBoostTrainer
|
||
|
|
||
|
dataset = ray.data.read_csv("s3://anonymous@air-example-data/breast_cancer.csv")
|
||
|
|
||
|
trainer = XGBoostTrainer(
|
||
|
label_column="target",
|
||
|
params={
|
||
|
"objective": "binary:logistic",
|
||
|
"eval_metric": ["logloss", "error"],
|
||
|
"max_depth": 4,
|
||
|
},
|
||
|
datasets={"train": dataset},
|
||
|
)
|
||
|
|
||
|
# Create Tuner
|
||
|
tuner = Tuner(
|
||
|
trainer,
|
||
|
# Add some parameters to tune
|
||
|
param_space={"params": {"max_depth": tune.choice([4, 5, 6])}},
|
||
|
# Specify tuning behavior
|
||
|
tune_config=tune.TuneConfig(metric="train-logloss", mode="min", num_samples=2),
|
||
|
)
|
||
|
# Run tuning job
|
||
|
tuner.fit()
|
||
|
# __basic_end__
|
||
|
|
||
|
# __xgboost_start__
|
||
|
import ray
|
||
|
from ray import tune
|
||
|
from ray.tune import Tuner
|
||
|
from ray.train.xgboost import XGBoostTrainer
|
||
|
from ray.air.config import ScalingConfig, RunConfig
|
||
|
|
||
|
dataset = ray.data.read_csv("s3://anonymous@air-example-data/breast_cancer.csv")
|
||
|
|
||
|
# Create an XGBoost trainer
|
||
|
trainer = XGBoostTrainer(
|
||
|
label_column="target",
|
||
|
params={
|
||
|
"objective": "binary:logistic",
|
||
|
"eval_metric": ["logloss", "error"],
|
||
|
"max_depth": 4,
|
||
|
},
|
||
|
num_boost_round=10,
|
||
|
datasets={"train": dataset},
|
||
|
)
|
||
|
|
||
|
param_space = {
|
||
|
# Tune parameters directly passed into the XGBoostTrainer
|
||
|
"num_boost_round": tune.randint(5, 20),
|
||
|
# `params` will be merged with the `params` defined in the above XGBoostTrainer
|
||
|
"params": {
|
||
|
"min_child_weight": tune.uniform(0.8, 1.0),
|
||
|
# Below will overwrite the XGBoostTrainer setting
|
||
|
"max_depth": tune.randint(1, 5),
|
||
|
},
|
||
|
# Tune the number of distributed workers
|
||
|
"scaling_config": ScalingConfig(num_workers=tune.grid_search([1, 2])),
|
||
|
}
|
||
|
|
||
|
tuner = Tuner(
|
||
|
trainable=trainer,
|
||
|
run_config=RunConfig(name="test_tuner"),
|
||
|
param_space=param_space,
|
||
|
tune_config=tune.TuneConfig(
|
||
|
mode="min", metric="train-logloss", num_samples=2, max_concurrent_trials=2
|
||
|
),
|
||
|
)
|
||
|
result_grid = tuner.fit()
|
||
|
# __xgboost_end__
|
||
|
|
||
|
# __torch_start__
|
||
|
from ray import tune
|
||
|
from ray.tune import Tuner
|
||
|
from ray.air.examples.pytorch.torch_linear_example import (
|
||
|
train_func as linear_train_func,
|
||
|
)
|
||
|
from ray.train.torch import TorchTrainer
|
||
|
|
||
|
trainer = TorchTrainer(
|
||
|
train_loop_per_worker=linear_train_func,
|
||
|
train_loop_config={"lr": 1e-2, "batch_size": 4, "epochs": 10},
|
||
|
scaling_config=ScalingConfig(num_workers=1, use_gpu=False),
|
||
|
)
|
||
|
|
||
|
param_space = {
|
||
|
# The params will be merged with the ones defined in the TorchTrainer
|
||
|
"train_loop_config": {
|
||
|
# This is a parameter that hasn't been set in the TorchTrainer
|
||
|
"hidden_size": tune.randint(1, 4),
|
||
|
# This will overwrite whatever was set when TorchTrainer was instantiated
|
||
|
"batch_size": tune.choice([4, 8]),
|
||
|
},
|
||
|
# Tune the number of distributed workers
|
||
|
"scaling_config": ScalingConfig(num_workers=tune.grid_search([1, 2])),
|
||
|
}
|
||
|
|
||
|
tuner = Tuner(
|
||
|
trainable=trainer,
|
||
|
run_config=RunConfig(name="test_tuner", local_dir="~/ray_results"),
|
||
|
param_space=param_space,
|
||
|
tune_config=tune.TuneConfig(
|
||
|
mode="min", metric="loss", num_samples=2, max_concurrent_trials=2
|
||
|
),
|
||
|
)
|
||
|
result_grid = tuner.fit()
|
||
|
# __torch_end__
|
||
|
|
||
|
|
||
|
# __tune_preprocess_start__
|
||
|
from ray.data.preprocessors import StandardScaler
|
||
|
from ray.tune import Tuner
|
||
|
|
||
|
prep_v1 = StandardScaler(["worst radius", "worst area"])
|
||
|
prep_v2 = StandardScaler(["worst concavity", "worst smoothness"])
|
||
|
tuner = Tuner(
|
||
|
trainer,
|
||
|
param_space={
|
||
|
"preprocessor": tune.grid_search([prep_v1, prep_v2]),
|
||
|
# Your other parameters go here
|
||
|
},
|
||
|
)
|
||
|
# __tune_preprocess_end__
|
||
|
|
||
|
# __tune_dataset_start__
|
||
|
def get_dataset():
|
||
|
return ray.data.read_csv("s3://anonymous@air-example-data/breast_cancer.csv")
|
||
|
|
||
|
|
||
|
def get_another_dataset():
|
||
|
# imagine this is a different dataset
|
||
|
return ray.data.read_csv("s3://anonymous@air-example-data/breast_cancer.csv")
|
||
|
|
||
|
|
||
|
dataset_1 = get_dataset()
|
||
|
dataset_2 = get_another_dataset()
|
||
|
|
||
|
tuner = tune.Tuner(
|
||
|
trainer,
|
||
|
param_space={
|
||
|
"datasets": {
|
||
|
"train": tune.grid_search([dataset_1, dataset_2]),
|
||
|
}
|
||
|
# Your other parameters go here
|
||
|
},
|
||
|
)
|
||
|
# __tune_dataset_end__
|
||
|
|
||
|
# __tune_parallelism_start__
|
||
|
from ray.tune import TuneConfig
|
||
|
|
||
|
config = TuneConfig(
|
||
|
# ...
|
||
|
num_samples=100,
|
||
|
max_concurrent_trials=10,
|
||
|
)
|
||
|
# __tune_parallelism_end__
|
||
|
|
||
|
# __tune_optimization_start__
|
||
|
from ray.tune.search.bayesopt import BayesOptSearch
|
||
|
from ray.tune.schedulers import HyperBandScheduler
|
||
|
from ray.tune import TuneConfig
|
||
|
|
||
|
config = TuneConfig(
|
||
|
# ...
|
||
|
search_alg=BayesOptSearch(),
|
||
|
scheduler=HyperBandScheduler(),
|
||
|
)
|
||
|
# __tune_optimization_end__
|
||
|
|
||
|
# __result_grid_inspection_start__
|
||
|
from ray.air.config import RunConfig
|
||
|
from ray.tune import Tuner, TuneConfig
|
||
|
|
||
|
tuner = Tuner(
|
||
|
trainable=trainer,
|
||
|
param_space=param_space,
|
||
|
tune_config=TuneConfig(mode="min", metric="loss", num_samples=5),
|
||
|
)
|
||
|
result_grid = tuner.fit()
|
||
|
|
||
|
num_results = len(result_grid)
|
||
|
|
||
|
# Check if there have been errors
|
||
|
if result_grid.errors:
|
||
|
print("At least one trial failed.")
|
||
|
|
||
|
# Get the best result
|
||
|
best_result = result_grid.get_best_result()
|
||
|
|
||
|
# And the best checkpoint
|
||
|
best_checkpoint = best_result.checkpoint
|
||
|
|
||
|
# And the best metrics
|
||
|
best_metric = best_result.metrics
|
||
|
|
||
|
# Inspect all results
|
||
|
for result in result_grid:
|
||
|
if result.error:
|
||
|
print("The trial had an error:", result.error)
|
||
|
continue
|
||
|
|
||
|
print("The trial finished successfully with the metrics:", result.metrics["loss"])
|
||
|
# __result_grid_inspection_end__
|
||
|
|
||
|
# __run_config_start__
|
||
|
from ray import air, tune
|
||
|
from ray.air.config import RunConfig
|
||
|
|
||
|
run_config = RunConfig(
|
||
|
name="MyExperiment",
|
||
|
local_dir="./your_log_directory/",
|
||
|
verbose=2,
|
||
|
sync_config=tune.SyncConfig(upload_dir="s3://..."),
|
||
|
checkpoint_config=air.CheckpointConfig(checkpoint_frequency=2),
|
||
|
)
|
||
|
# __run_config_end__
|
||
|
|
||
|
# __tune_config_start__
|
||
|
from ray.tune import TuneConfig
|
||
|
from ray.tune.search.bayesopt import BayesOptSearch
|
||
|
|
||
|
tune_config = TuneConfig(
|
||
|
metric="loss",
|
||
|
mode="min",
|
||
|
max_concurrent_trials=10,
|
||
|
num_samples=100,
|
||
|
search_alg=BayesOptSearch(),
|
||
|
)
|
||
|
# __tune_config_end__
|
||
|
|
||
|
# __tune_restore_start__
|
||
|
tuner = Tuner.restore("~/ray_results/test_tuner", restart_errored=True)
|
||
|
tuner.fit()
|
||
|
# __tune_restore_end__
|