mirror of
https://github.com/vale981/ray
synced 2025-03-08 19:41:38 -05:00

Update documentation to use `session.report`. Next steps: 1. Update our internal caller to use `session.report`. Most importantly, CheckpointManager and DataParallelTrainer. 2. Update `get_trial_resources` to use PGF notions to incorporate the requirement of ResourceChangingScheduler. @Yard1 3. After 2 is done, change all `tune.get_trial_resources` to `session.get_trial_resources` 4. [internal implementation] remove special checkpoint handling logic from huggingface trainer. Optimize the flow for checkpoint conversion with `session.report`. Co-authored-by: Antoni Baum <antoni.baum@protonmail.com>
30 lines
835 B
Python
30 lines
835 B
Python
from ray import tune
|
|
from ray.air import session
|
|
|
|
|
|
def objective(step, alpha, beta):
|
|
return (0.1 + alpha * step / 100) ** (-1) + beta * 0.1
|
|
|
|
|
|
def training_function(config):
|
|
# Hyperparameters
|
|
alpha, beta = config["alpha"], config["beta"]
|
|
for step in range(10):
|
|
# Iterative training function - can be any arbitrary training procedure.
|
|
intermediate_score = objective(step, alpha, beta)
|
|
# Feed the score back back to Tune.
|
|
session.report({"mean_loss": intermediate_score})
|
|
|
|
|
|
analysis = tune.run(
|
|
training_function,
|
|
config={
|
|
"alpha": tune.grid_search([0.001, 0.01, 0.1]),
|
|
"beta": tune.choice([1, 2, 3]),
|
|
},
|
|
)
|
|
|
|
print("Best config: ", analysis.get_best_config(metric="mean_loss", mode="min"))
|
|
|
|
# Get a dataframe for analyzing trial results.
|
|
df = analysis.results_df
|