# flake8: noqa # isort: skip_file # __basic_start__ import ray from ray import tune from ray.tune import Tuner from ray.train.xgboost import XGBoostTrainer dataset = ray.data.read_csv("s3://anonymous@air-example-data/breast_cancer.csv") trainer = XGBoostTrainer( label_column="target", params={ "objective": "binary:logistic", "eval_metric": ["logloss", "error"], "max_depth": 4, }, datasets={"train": dataset}, ) # Create Tuner tuner = Tuner( trainer, # Add some parameters to tune param_space={"params": {"max_depth": tune.choice([4, 5, 6])}}, # Specify tuning behavior tune_config=tune.TuneConfig(metric="train-logloss", mode="min", num_samples=2), ) # Run tuning job tuner.fit() # __basic_end__ # __xgboost_start__ import ray from ray import tune from ray.tune import Tuner from ray.train.xgboost import XGBoostTrainer from ray.air.config import ScalingConfig, RunConfig dataset = ray.data.read_csv("s3://anonymous@air-example-data/breast_cancer.csv") # Create an XGBoost trainer trainer = XGBoostTrainer( label_column="target", params={ "objective": "binary:logistic", "eval_metric": ["logloss", "error"], "max_depth": 4, }, num_boost_round=10, datasets={"train": dataset}, ) param_space = { # Tune parameters directly passed into the XGBoostTrainer "num_boost_round": tune.randint(5, 20), # `params` will be merged with the `params` defined in the above XGBoostTrainer "params": { "min_child_weight": tune.uniform(0.8, 1.0), # Below will overwrite the XGBoostTrainer setting "max_depth": tune.randint(1, 5), }, # Tune the number of distributed workers "scaling_config": ScalingConfig(num_workers=tune.grid_search([1, 2])), } tuner = Tuner( trainable=trainer, run_config=RunConfig(name="test_tuner"), param_space=param_space, tune_config=tune.TuneConfig( mode="min", metric="train-logloss", num_samples=2, max_concurrent_trials=2 ), ) result_grid = tuner.fit() # __xgboost_end__ # __torch_start__ from ray import tune from ray.tune import Tuner from ray.air.examples.pytorch.torch_linear_example import ( train_func as linear_train_func, ) from ray.train.torch import TorchTrainer trainer = TorchTrainer( train_loop_per_worker=linear_train_func, train_loop_config={"lr": 1e-2, "batch_size": 4, "epochs": 10}, scaling_config=ScalingConfig(num_workers=1, use_gpu=False), ) param_space = { # The params will be merged with the ones defined in the TorchTrainer "train_loop_config": { # This is a parameter that hasn't been set in the TorchTrainer "hidden_size": tune.randint(1, 4), # This will overwrite whatever was set when TorchTrainer was instantiated "batch_size": tune.choice([4, 8]), }, # Tune the number of distributed workers "scaling_config": ScalingConfig(num_workers=tune.grid_search([1, 2])), } tuner = Tuner( trainable=trainer, run_config=RunConfig(name="test_tuner", local_dir="~/ray_results"), param_space=param_space, tune_config=tune.TuneConfig( mode="min", metric="loss", num_samples=2, max_concurrent_trials=2 ), ) result_grid = tuner.fit() # __torch_end__ # __tune_preprocess_start__ from ray.data.preprocessors import StandardScaler from ray.tune import Tuner prep_v1 = StandardScaler(["worst radius", "worst area"]) prep_v2 = StandardScaler(["worst concavity", "worst smoothness"]) tuner = Tuner( trainer, param_space={ "preprocessor": tune.grid_search([prep_v1, prep_v2]), # Your other parameters go here }, ) # __tune_preprocess_end__ # __tune_dataset_start__ def get_dataset(): return ray.data.read_csv("s3://anonymous@air-example-data/breast_cancer.csv") def get_another_dataset(): # imagine this is a different dataset return ray.data.read_csv("s3://anonymous@air-example-data/breast_cancer.csv") dataset_1 = get_dataset() dataset_2 = get_another_dataset() tuner = tune.Tuner( trainer, param_space={ "datasets": { "train": tune.grid_search([dataset_1, dataset_2]), } # Your other parameters go here }, ) # __tune_dataset_end__ # __tune_parallelism_start__ from ray.tune import TuneConfig config = TuneConfig( # ... num_samples=100, max_concurrent_trials=10, ) # __tune_parallelism_end__ # __tune_optimization_start__ from ray.tune.search.bayesopt import BayesOptSearch from ray.tune.schedulers import HyperBandScheduler from ray.tune import TuneConfig config = TuneConfig( # ... search_alg=BayesOptSearch(), scheduler=HyperBandScheduler(), ) # __tune_optimization_end__ # __result_grid_inspection_start__ from ray.air.config import RunConfig from ray.tune import Tuner, TuneConfig tuner = Tuner( trainable=trainer, param_space=param_space, tune_config=TuneConfig(mode="min", metric="loss", num_samples=5), ) result_grid = tuner.fit() num_results = len(result_grid) # Check if there have been errors if result_grid.errors: print("At least one trial failed.") # Get the best result best_result = result_grid.get_best_result() # And the best checkpoint best_checkpoint = best_result.checkpoint # And the best metrics best_metric = best_result.metrics # Inspect all results for result in result_grid: if result.error: print("The trial had an error:", result.error) continue print("The trial finished successfully with the metrics:", result.metrics["loss"]) # __result_grid_inspection_end__ # __run_config_start__ from ray import air, tune from ray.air.config import RunConfig run_config = RunConfig( name="MyExperiment", local_dir="./your_log_directory/", verbose=2, sync_config=tune.SyncConfig(upload_dir="s3://..."), checkpoint_config=air.CheckpointConfig(checkpoint_frequency=2), ) # __run_config_end__ # __tune_config_start__ from ray.tune import TuneConfig from ray.tune.search.bayesopt import BayesOptSearch tune_config = TuneConfig( metric="loss", mode="min", max_concurrent_trials=10, num_samples=100, search_alg=BayesOptSearch(), ) # __tune_config_end__ # __tune_restore_start__ tuner = Tuner.restore("~/ray_results/test_tuner", restart_errored=True) tuner.fit() # __tune_restore_end__