# flake8: noqa # __reproducible_start__ import numpy as np from ray import tune def train(config): # Set seed for trainable random result. # If you remove this line, you will get different results # each time you run the trial, even if the configuration # is the same. np.random.seed(config["seed"]) random_result = np.random.uniform(0, 100, size=1).item() tune.report(result=random_result) # Set seed for Ray Tune's random search. # If you remove this line, you will get different configurations # each time you run the script. np.random.seed(1234) tune.run( train, config={"seed": tune.randint(0, 1000)}, search_alg=tune.search.BasicVariantGenerator(), num_samples=10, ) # __reproducible_end__ # __basic_config_start__ config = {"a": {"x": tune.uniform(0, 10)}, "b": tune.choice([1, 2, 3])} # __basic_config_end__ # __conditional_spaces_start__ config = { "a": tune.randint(5, 10), "b": tune.sample_from(lambda spec: np.random.randint(0, spec.config.a)), } # __conditional_spaces_end__ # __iter_start__ def _iter(): for a in range(5, 10): for b in range(a): yield a, b config = { "ab": tune.grid_search(list(_iter())), } # __iter_end__ def train(config): random_result = np.random.uniform(0, 100, size=1).item() tune.report(result=random_result) train_fn = train MOCK = True # Note we put this check here to make sure at least the syntax of # the code is correct. Some of these snippets simply can't be run on the nose. if not MOCK: # __resources_start__ tune.run( train_fn, resources_per_trial={"cpu": 2, "gpu": 0.5, "custom_resources": {"hdd": 80}}, ) # __resources_end__ # __resources_pgf_start__ tune.run( train_fn, resources_per_trial=tune.PlacementGroupFactory( [ {"CPU": 2, "GPU": 0.5, "hdd": 80}, {"CPU": 1}, {"CPU": 1}, ], strategy="PACK", ), ) # __resources_pgf_end__ metric = None # __modin_start__ def train_fn(config, checkpoint_dir=None): # some Modin operations here # import modin.pandas as pd tune.report(metric=metric) tune.run( train_fn, resources_per_trial=tune.PlacementGroupFactory( [ {"CPU": 1}, # this bundle will be used by the trainable itself {"CPU": 1}, # this bundle will be used by Modin ], strategy="PACK", ), ) # __modin_end__ # __huge_data_start__ from ray import tune import numpy as np def train(config, checkpoint_dir=None, num_epochs=5, data=None): for i in range(num_epochs): for sample in data: # ... train on sample pass # Some huge dataset data = np.random.random(size=100000000) tune.run(tune.with_parameters(train, num_epochs=5, data=data)) # __huge_data_end__ # __seeded_1_start__ import random random.seed(1234) output = [random.randint(0, 100) for _ in range(10)] # The output will always be the same. assert output == [99, 56, 14, 0, 11, 74, 4, 85, 88, 10] # __seeded_1_end__ # __seeded_2_start__ # This should suffice to initialize the RNGs for most Python-based libraries import random import numpy as np random.seed(1234) np.random.seed(5678) # __seeded_2_end__ # __torch_tf_seeds_start__ import torch torch.manual_seed(0) import tensorflow as tf tf.random.set_seed(0) # __torch_tf_seeds_end__ # __torch_seed_example_start__ import random import numpy as np from ray import tune def trainable(config): # config["seed"] is set deterministically, but differs between training runs random.seed(config["seed"]) np.random.seed(config["seed"]) # torch.manual_seed(config["seed"]) # ... training code config = { "seed": tune.randint(0, 10000), # ... } if __name__ == "__main__": # Set seed for the search algorithms/schedulers random.seed(1234) np.random.seed(1234) # Don't forget to check if the search alg has a `seed` parameter tune.run(trainable, config=config) # __torch_seed_example_end__ # __large_data_start__ from ray import tune import numpy as np def f(config, data=None): pass # use data data = np.random.random(size=100000000) tune.run(tune.with_parameters(f, data=data)) # __large_data_end__ MyTrainableClass = None if not MOCK: # __log_1_start__ tune.run( MyTrainableClass, local_dir="~/ray_results", sync_config=tune.SyncConfig(upload_dir="s3://my-log-dir"), ) # __log_1_end__ # __log_2_start__ from ray.tune.syncer import Syncer class CustomSyncer(Syncer): def sync_up( self, local_dir: str, remote_dir: str, exclude: list = None ) -> bool: pass # sync up def sync_down( self, remote_dir: str, local_dir: str, exclude: list = None ) -> bool: pass # sync down def delete(self, remote_dir: str) -> bool: pass # delete tune.run( MyTrainableClass, sync_config=tune.SyncConfig( upload_dir="s3://my-log-dir", syncer=CustomSyncer() ), ) # __log_2_end__ if not MOCK: # __s3_start__ from ray import tune tune.run( tune.durable(train_fn), # ..., sync_config=tune.SyncConfig(upload_dir="s3://your-s3-bucket/durable-trial/"), ) # __s3_end__ # __sync_config_start__ from ray import tune tune.run( train_fn, # ..., local_dir="/path/to/shared/storage", sync_config=tune.SyncConfig( # Do not sync because we are on shared storage syncer=None ), ) # __sync_config_end__ import ray ray.shutdown() # __grid_search_start__ parameters = { "qux": tune.sample_from(lambda spec: 2 + 2), "bar": tune.grid_search([True, False]), "foo": tune.grid_search([1, 2, 3]), "baz": "asd", # a constant value } tune.run(train_fn, config=parameters) # __grid_search_end__ # __grid_search_2_start__ # num_samples=10 repeats the 3x3 grid search 10 times, for a total of 90 trials tune.run( train_fn, name="my_trainable", config={ "alpha": tune.uniform(100, 200), "beta": tune.sample_from(lambda spec: spec.config.alpha * np.random.normal()), "nn_layers": [ tune.grid_search([16, 64, 256]), tune.grid_search([16, 64, 256]), ], }, num_samples=10, ) # __grid_search_2_end__