mirror of
https://github.com/vale981/ray
synced 2025-03-05 10:01:43 -05:00
[docs/air] Move upload example to docs (#25022)
This commit is contained in:
parent
5a70b732e8
commit
d57ba750f5
10 changed files with 831 additions and 117 deletions
|
@ -6,9 +6,7 @@
|
|||
- bazel test --config=ci $(./ci/run/bazel_export_options) --build_tests_only --test_tag_filters=-gpu,-needs_credentials python/ray/ml/...
|
||||
# Only setup credentials in branch builds
|
||||
- if [ "$BUILDKITE_PULL_REQUEST" != "false" ]; then exit 0; fi
|
||||
- python ./ci/env/setup_credentials.py wandb comet_ml
|
||||
- bazel test --config=ci $(./ci/run/bazel_export_options) --build_tests_only --test_tag_filters=-gpu,needs_credentials python/ray/ml/...
|
||||
- python ./ci/env/cleanup_test_state.py wandb comet_ml
|
||||
|
||||
- label: ":brain: RLlib: Learning discr. actions TF2-static-graph"
|
||||
conditions: ["RAY_CI_RLLIB_AFFECTED"]
|
||||
|
@ -385,4 +383,6 @@
|
|||
commands:
|
||||
- cleanup() { if [ "${BUILDKITE_PULL_REQUEST}" = "false" ]; then ./ci/build/upload_build_info.sh; fi }; trap cleanup EXIT
|
||||
- DOC_TESTING=1 PYTHON=3.7 ./ci/env/install-dependencies.sh
|
||||
- python ./ci/env/setup_credentials.py wandb comet_ml
|
||||
- bazel test --config=ci $(./ci/run/bazel_export_options) --build_tests_only --test_tag_filters=ray_air,-gpu,-py37,-post_wheel_build doc/...
|
||||
- python ./ci/env/cleanup_test_state.py wandb comet_ml
|
||||
|
|
|
@ -184,6 +184,8 @@ parts:
|
|||
- file: ray-air/examples/analyze_tuning_results
|
||||
title: "Analyze hyperparameter tuning results"
|
||||
- file: ray-air/examples/rl_serving_example
|
||||
- file: ray-air/examples/upload_to_comet_ml
|
||||
- file: ray-air/examples/upload_to_wandb
|
||||
- file: ray-air/examples/lightgbm_example
|
||||
- file: ray-air/examples/rl_online_example
|
||||
- file: ray-air/examples/rl_offline_example
|
||||
|
|
|
@ -11,6 +11,8 @@ Guides
|
|||
|
||||
- :doc:`/ray-air/examples/analyze_tuning_results`: How to analyze trial results (e.g. find the best trial) of a hyperparameter tuning run.
|
||||
- :doc:`/ray-air/examples/rl_serving_example`
|
||||
- :doc:`/ray-air/examples/upload_to_comet_ml`
|
||||
- :doc:`/ray-air/examples/upload_to_wandb`
|
||||
|
||||
|
||||
Trainers
|
||||
|
|
409
doc/source/ray-air/examples/upload_to_comet_ml.ipynb
Normal file
409
doc/source/ray-air/examples/upload_to_comet_ml.ipynb
Normal file
|
@ -0,0 +1,409 @@
|
|||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "98d7c620",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Logging results and uploading models to Comet ML\n",
|
||||
"In this example, we train a simple XGBoost model and log the training\n",
|
||||
"results to Comet ML. We also save the resulting model checkpoints\n",
|
||||
"as artifacts."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "c6e66577",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Let's start with installing our dependencies:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"id": "6d6297ef",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"!pip install -qU \"ray[tune]\" sklearn xgboost_ray comet_ml"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "c2e21446",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Then we need some imports:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"id": "dffff484",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import ray\n",
|
||||
"\n",
|
||||
"from ray.ml import RunConfig\n",
|
||||
"from ray.ml.result import Result\n",
|
||||
"from ray.ml.train.integrations.xgboost import XGBoostTrainer\n",
|
||||
"from ray.tune.integration.comet import CometLoggerCallback\n",
|
||||
"from sklearn.datasets import load_breast_cancer"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "29fcd93b",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"We define a simple function that returns our training dataset as a Ray Dataset:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"id": "cf830706",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def get_train_dataset() -> ray.data.Dataset:\n",
|
||||
" \"\"\"Return the \"Breast cancer\" dataset as a Ray dataset.\"\"\"\n",
|
||||
" data_raw = load_breast_cancer(as_frame=True)\n",
|
||||
" df = data_raw[\"data\"]\n",
|
||||
" df[\"target\"] = data_raw[\"target\"]\n",
|
||||
" return ray.data.from_pandas(df)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "0f48f948",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Now we define a simple training function. All the magic happens within the `CometLoggerCallback`:\n",
|
||||
"\n",
|
||||
"```python\n",
|
||||
"CometLoggerCallback(\n",
|
||||
" project_name=comet_project,\n",
|
||||
" save_checkpoints=True,\n",
|
||||
")\n",
|
||||
"```\n",
|
||||
"\n",
|
||||
"It will automatically log all results to Comet ML and upload the checkpoints as artifacts. It assumes you're logged in into Comet via an API key or your `~./.comet.config`."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"id": "230f23a3",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def train_model(train_dataset: ray.data.Dataset, comet_project: str) -> Result:\n",
|
||||
" \"\"\"Train a simple XGBoost model and return the result.\"\"\"\n",
|
||||
" trainer = XGBoostTrainer(\n",
|
||||
" scaling_config={\"num_workers\": 2},\n",
|
||||
" params={\"tree_method\": \"auto\"},\n",
|
||||
" label_column=\"target\",\n",
|
||||
" datasets={\"train\": train_dataset},\n",
|
||||
" num_boost_round=10,\n",
|
||||
" run_config=RunConfig(\n",
|
||||
" callbacks=[\n",
|
||||
" # This is the part needed to enable logging to Comet ML.\n",
|
||||
" # It assumes Comet ML can find a valid API (e.g. by setting\n",
|
||||
" # the ``COMET_API_KEY`` environment variable).\n",
|
||||
" CometLoggerCallback(\n",
|
||||
" project_name=comet_project,\n",
|
||||
" save_checkpoints=True,\n",
|
||||
" )\n",
|
||||
" ]\n",
|
||||
" ),\n",
|
||||
" )\n",
|
||||
" result = trainer.fit()\n",
|
||||
" return result"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "711b1d7d",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Let's kick off a run:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"id": "9bfd9a8d",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"2022-05-19 15:19:17,237\tINFO services.py:1483 -- View the Ray dashboard at \u001b[1m\u001b[32mhttp://127.0.0.1:8265\u001b[39m\u001b[22m\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/html": [
|
||||
"== Status ==<br>Current time: 2022-05-19 15:19:35 (running for 00:00:14.95)<br>Memory usage on this node: 10.2/16.0 GiB<br>Using FIFO scheduling algorithm.<br>Resources requested: 0/16 CPUs, 0/0 GPUs, 0.0/5.12 GiB heap, 0.0/2.0 GiB objects<br>Result logdir: /Users/kai/ray_results/XGBoostTrainer_2022-05-19_15-19-19<br>Number of trials: 1/1 (1 TERMINATED)<br><table>\n",
|
||||
"<thead>\n",
|
||||
"<tr><th>Trial name </th><th>status </th><th>loc </th><th style=\"text-align: right;\"> iter</th><th style=\"text-align: right;\"> total time (s)</th><th style=\"text-align: right;\"> train-rmse</th></tr>\n",
|
||||
"</thead>\n",
|
||||
"<tbody>\n",
|
||||
"<tr><td>XGBoostTrainer_ac544_00000</td><td>TERMINATED</td><td>127.0.0.1:19852</td><td style=\"text-align: right;\"> 10</td><td style=\"text-align: right;\"> 9.7203</td><td style=\"text-align: right;\"> 0.030717</td></tr>\n",
|
||||
"</tbody>\n",
|
||||
"</table><br><br>"
|
||||
],
|
||||
"text/plain": [
|
||||
"<IPython.core.display.HTML object>"
|
||||
]
|
||||
},
|
||||
"metadata": {},
|
||||
"output_type": "display_data"
|
||||
},
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"COMET WARNING: As you are running in a Jupyter environment, you will need to call `experiment.end()` when finished to ensure all metrics and code are logged before exiting.\n",
|
||||
"\u001b[2m\u001b[33m(raylet)\u001b[0m 2022-05-19 15:19:21,584\tINFO context.py:70 -- Exec'ing worker with command: exec /Users/kai/.pyenv/versions/3.7.7/bin/python3.7 /Users/kai/coding/ray/python/ray/workers/default_worker.py --node-ip-address=127.0.0.1 --node-manager-port=61222 --object-store-name=/tmp/ray/session_2022-05-19_15-19-14_632568_19778/sockets/plasma_store --raylet-name=/tmp/ray/session_2022-05-19_15-19-14_632568_19778/sockets/raylet --redis-address=None --storage=None --temp-dir=/tmp/ray --metrics-agent-port=62873 --logging-rotate-bytes=536870912 --logging-rotate-backup-count=5 --gcs-address=127.0.0.1:61938 --redis-password=5241590000000000 --startup-token=16 --runtime-env-hash=-2010331134\n",
|
||||
"COMET INFO: Experiment is live on comet.ml https://www.comet.ml/krfricke/ray-air-example/ecd3726ca127497ba7386003a249fad6\n",
|
||||
"\n",
|
||||
"COMET WARNING: Failed to add tag(s) None to the experiment\n",
|
||||
"\n",
|
||||
"COMET WARNING: Empty mapping given to log_params({}); ignoring\n",
|
||||
"\u001b[2m\u001b[36m(GBDTTrainable pid=19852)\u001b[0m UserWarning: Dataset 'train' has 1 blocks, which is less than the `num_workers` 2. This dataset will be automatically repartitioned to 2 blocks.\n",
|
||||
"\u001b[2m\u001b[33m(raylet)\u001b[0m 2022-05-19 15:19:24,628\tINFO context.py:70 -- Exec'ing worker with command: exec /Users/kai/.pyenv/versions/3.7.7/bin/python3.7 /Users/kai/coding/ray/python/ray/workers/default_worker.py --node-ip-address=127.0.0.1 --node-manager-port=61222 --object-store-name=/tmp/ray/session_2022-05-19_15-19-14_632568_19778/sockets/plasma_store --raylet-name=/tmp/ray/session_2022-05-19_15-19-14_632568_19778/sockets/raylet --redis-address=None --storage=None --temp-dir=/tmp/ray --metrics-agent-port=62873 --logging-rotate-bytes=536870912 --logging-rotate-backup-count=5 --gcs-address=127.0.0.1:61938 --redis-password=5241590000000000 --startup-token=17 --runtime-env-hash=-2010331069\n",
|
||||
"\u001b[2m\u001b[36m(GBDTTrainable pid=19852)\u001b[0m 2022-05-19 15:19:25,961\tINFO main.py:980 -- [RayXGBoost] Created 2 new actors (2 total actors). Waiting until actors are ready for training.\n",
|
||||
"\u001b[2m\u001b[33m(raylet)\u001b[0m 2022-05-19 15:19:26,830\tINFO context.py:70 -- Exec'ing worker with command: exec /Users/kai/.pyenv/versions/3.7.7/bin/python3.7 /Users/kai/coding/ray/python/ray/workers/default_worker.py --node-ip-address=127.0.0.1 --node-manager-port=61222 --object-store-name=/tmp/ray/session_2022-05-19_15-19-14_632568_19778/sockets/plasma_store --raylet-name=/tmp/ray/session_2022-05-19_15-19-14_632568_19778/sockets/raylet --redis-address=None --storage=None --temp-dir=/tmp/ray --metrics-agent-port=62873 --logging-rotate-bytes=536870912 --logging-rotate-backup-count=5 --gcs-address=127.0.0.1:61938 --redis-password=5241590000000000 --startup-token=18 --runtime-env-hash=-2010331069\n",
|
||||
"\u001b[2m\u001b[33m(raylet)\u001b[0m 2022-05-19 15:19:26,918\tINFO context.py:70 -- Exec'ing worker with command: exec /Users/kai/.pyenv/versions/3.7.7/bin/python3.7 /Users/kai/coding/ray/python/ray/workers/default_worker.py --node-ip-address=127.0.0.1 --node-manager-port=61222 --object-store-name=/tmp/ray/session_2022-05-19_15-19-14_632568_19778/sockets/plasma_store --raylet-name=/tmp/ray/session_2022-05-19_15-19-14_632568_19778/sockets/raylet --redis-address=None --storage=None --temp-dir=/tmp/ray --metrics-agent-port=62873 --logging-rotate-bytes=536870912 --logging-rotate-backup-count=5 --gcs-address=127.0.0.1:61938 --redis-password=5241590000000000 --startup-token=20 --runtime-env-hash=-2010331134\n",
|
||||
"\u001b[2m\u001b[33m(raylet)\u001b[0m 2022-05-19 15:19:26,922\tINFO context.py:70 -- Exec'ing worker with command: exec /Users/kai/.pyenv/versions/3.7.7/bin/python3.7 /Users/kai/coding/ray/python/ray/workers/default_worker.py --node-ip-address=127.0.0.1 --node-manager-port=61222 --object-store-name=/tmp/ray/session_2022-05-19_15-19-14_632568_19778/sockets/plasma_store --raylet-name=/tmp/ray/session_2022-05-19_15-19-14_632568_19778/sockets/raylet --redis-address=None --storage=None --temp-dir=/tmp/ray --metrics-agent-port=62873 --logging-rotate-bytes=536870912 --logging-rotate-backup-count=5 --gcs-address=127.0.0.1:61938 --redis-password=5241590000000000 --startup-token=21 --runtime-env-hash=-2010331134\n",
|
||||
"\u001b[2m\u001b[33m(raylet)\u001b[0m 2022-05-19 15:19:26,922\tINFO context.py:70 -- Exec'ing worker with command: exec /Users/kai/.pyenv/versions/3.7.7/bin/python3.7 /Users/kai/coding/ray/python/ray/workers/default_worker.py --node-ip-address=127.0.0.1 --node-manager-port=61222 --object-store-name=/tmp/ray/session_2022-05-19_15-19-14_632568_19778/sockets/plasma_store --raylet-name=/tmp/ray/session_2022-05-19_15-19-14_632568_19778/sockets/raylet --redis-address=None --storage=None --temp-dir=/tmp/ray --metrics-agent-port=62873 --logging-rotate-bytes=536870912 --logging-rotate-backup-count=5 --gcs-address=127.0.0.1:61938 --redis-password=5241590000000000 --startup-token=22 --runtime-env-hash=-2010331134\n",
|
||||
"\u001b[2m\u001b[33m(raylet)\u001b[0m 2022-05-19 15:19:26,923\tINFO context.py:70 -- Exec'ing worker with command: exec /Users/kai/.pyenv/versions/3.7.7/bin/python3.7 /Users/kai/coding/ray/python/ray/workers/default_worker.py --node-ip-address=127.0.0.1 --node-manager-port=61222 --object-store-name=/tmp/ray/session_2022-05-19_15-19-14_632568_19778/sockets/plasma_store --raylet-name=/tmp/ray/session_2022-05-19_15-19-14_632568_19778/sockets/raylet --redis-address=None --storage=None --temp-dir=/tmp/ray --metrics-agent-port=62873 --logging-rotate-bytes=536870912 --logging-rotate-backup-count=5 --gcs-address=127.0.0.1:61938 --redis-password=5241590000000000 --startup-token=19 --runtime-env-hash=-2010331134\n",
|
||||
"\u001b[2m\u001b[36m(GBDTTrainable pid=19852)\u001b[0m 2022-05-19 15:19:29,272\tINFO main.py:1025 -- [RayXGBoost] Starting XGBoost training.\n",
|
||||
"\u001b[2m\u001b[36m(_RemoteRayXGBoostActor pid=19876)\u001b[0m [15:19:29] task [xgboost.ray]:4505889744 got new rank 1\n",
|
||||
"\u001b[2m\u001b[36m(_RemoteRayXGBoostActor pid=19875)\u001b[0m [15:19:29] task [xgboost.ray]:6941849424 got new rank 0\n",
|
||||
"COMET WARNING: The given value of the metric episodes_total was None; ignoring\n",
|
||||
"COMET WARNING: The given value of the metric timesteps_total was None; ignoring\n",
|
||||
"COMET INFO: Artifact 'checkpoint_XGBoostTrainer_ac544_00000' version 1.0.0 created\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Result for XGBoostTrainer_ac544_00000:\n",
|
||||
" date: 2022-05-19_15-19-30\n",
|
||||
" done: false\n",
|
||||
" experiment_id: d3007bd6a2734b328fd90385485c5a8d\n",
|
||||
" hostname: Kais-MacBook-Pro.local\n",
|
||||
" iterations_since_restore: 1\n",
|
||||
" node_ip: 127.0.0.1\n",
|
||||
" pid: 19852\n",
|
||||
" should_checkpoint: true\n",
|
||||
" time_since_restore: 6.529659032821655\n",
|
||||
" time_this_iter_s: 6.529659032821655\n",
|
||||
" time_total_s: 6.529659032821655\n",
|
||||
" timestamp: 1652969970\n",
|
||||
" timesteps_since_restore: 0\n",
|
||||
" train-rmse: 0.357284\n",
|
||||
" training_iteration: 1\n",
|
||||
" trial_id: ac544_00000\n",
|
||||
" warmup_time: 0.003961086273193359\n",
|
||||
" \n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"COMET INFO: Scheduling the upload of 3 assets for a size of 2.48 KB, this can take some time\n",
|
||||
"COMET INFO: Artifact 'krfricke/checkpoint_XGBoostTrainer_ac544_00000:1.0.0' has started uploading asynchronously\n",
|
||||
"COMET WARNING: The given value of the metric episodes_total was None; ignoring\n",
|
||||
"COMET WARNING: The given value of the metric timesteps_total was None; ignoring\n",
|
||||
"COMET INFO: Artifact 'checkpoint_XGBoostTrainer_ac544_00000' version 2.0.0 created (previous was: 1.0.0)\n",
|
||||
"COMET INFO: Scheduling the upload of 3 assets for a size of 3.86 KB, this can take some time\n",
|
||||
"COMET INFO: Artifact 'krfricke/checkpoint_XGBoostTrainer_ac544_00000:2.0.0' has started uploading asynchronously\n",
|
||||
"COMET WARNING: The given value of the metric episodes_total was None; ignoring\n",
|
||||
"COMET WARNING: The given value of the metric timesteps_total was None; ignoring\n",
|
||||
"COMET INFO: Artifact 'checkpoint_XGBoostTrainer_ac544_00000' version 3.0.0 created (previous was: 2.0.0)\n",
|
||||
"COMET INFO: Artifact 'krfricke/checkpoint_XGBoostTrainer_ac544_00000:1.0.0' has been fully uploaded successfully\n",
|
||||
"COMET INFO: Scheduling the upload of 3 assets for a size of 5.31 KB, this can take some time\n",
|
||||
"COMET INFO: Artifact 'krfricke/checkpoint_XGBoostTrainer_ac544_00000:3.0.0' has started uploading asynchronously\n",
|
||||
"COMET WARNING: The given value of the metric episodes_total was None; ignoring\n",
|
||||
"COMET WARNING: The given value of the metric timesteps_total was None; ignoring\n",
|
||||
"COMET INFO: Artifact 'checkpoint_XGBoostTrainer_ac544_00000' version 4.0.0 created (previous was: 3.0.0)\n",
|
||||
"COMET INFO: Artifact 'krfricke/checkpoint_XGBoostTrainer_ac544_00000:2.0.0' has been fully uploaded successfully\n",
|
||||
"COMET INFO: Scheduling the upload of 3 assets for a size of 6.76 KB, this can take some time\n",
|
||||
"COMET INFO: Artifact 'krfricke/checkpoint_XGBoostTrainer_ac544_00000:4.0.0' has started uploading asynchronously\n",
|
||||
"COMET WARNING: The given value of the metric episodes_total was None; ignoring\n",
|
||||
"COMET WARNING: The given value of the metric timesteps_total was None; ignoring\n",
|
||||
"COMET INFO: Artifact 'checkpoint_XGBoostTrainer_ac544_00000' version 5.0.0 created (previous was: 4.0.0)\n",
|
||||
"COMET INFO: Scheduling the upload of 3 assets for a size of 8.21 KB, this can take some time\n",
|
||||
"COMET INFO: Artifact 'krfricke/checkpoint_XGBoostTrainer_ac544_00000:3.0.0' has been fully uploaded successfully\n",
|
||||
"COMET INFO: Artifact 'krfricke/checkpoint_XGBoostTrainer_ac544_00000:5.0.0' has started uploading asynchronously\n",
|
||||
"COMET WARNING: The given value of the metric episodes_total was None; ignoring\n",
|
||||
"COMET WARNING: The given value of the metric timesteps_total was None; ignoring\n",
|
||||
"COMET INFO: Artifact 'krfricke/checkpoint_XGBoostTrainer_ac544_00000:4.0.0' has been fully uploaded successfully\n",
|
||||
"COMET INFO: Artifact 'checkpoint_XGBoostTrainer_ac544_00000' version 6.0.0 created (previous was: 5.0.0)\n",
|
||||
"COMET INFO: Scheduling the upload of 3 assets for a size of 9.87 KB, this can take some time\n",
|
||||
"COMET INFO: Artifact 'krfricke/checkpoint_XGBoostTrainer_ac544_00000:6.0.0' has started uploading asynchronously\n",
|
||||
"COMET WARNING: The given value of the metric episodes_total was None; ignoring\n",
|
||||
"COMET WARNING: The given value of the metric timesteps_total was None; ignoring\n",
|
||||
"COMET INFO: Artifact 'krfricke/checkpoint_XGBoostTrainer_ac544_00000:5.0.0' has been fully uploaded successfully\n",
|
||||
"COMET INFO: Artifact 'checkpoint_XGBoostTrainer_ac544_00000' version 7.0.0 created (previous was: 6.0.0)\n",
|
||||
"COMET INFO: Scheduling the upload of 3 assets for a size of 11.46 KB, this can take some time\n",
|
||||
"COMET INFO: Artifact 'krfricke/checkpoint_XGBoostTrainer_ac544_00000:7.0.0' has started uploading asynchronously\n",
|
||||
"COMET WARNING: The given value of the metric episodes_total was None; ignoring\n",
|
||||
"COMET WARNING: The given value of the metric timesteps_total was None; ignoring\n",
|
||||
"COMET INFO: Artifact 'krfricke/checkpoint_XGBoostTrainer_ac544_00000:6.0.0' has been fully uploaded successfully\n",
|
||||
"COMET INFO: Artifact 'checkpoint_XGBoostTrainer_ac544_00000' version 8.0.0 created (previous was: 7.0.0)\n",
|
||||
"COMET INFO: Scheduling the upload of 3 assets for a size of 12.84 KB, this can take some time\n",
|
||||
"COMET INFO: Artifact 'krfricke/checkpoint_XGBoostTrainer_ac544_00000:8.0.0' has started uploading asynchronously\n",
|
||||
"COMET WARNING: The given value of the metric episodes_total was None; ignoring\n",
|
||||
"COMET WARNING: The given value of the metric timesteps_total was None; ignoring\n",
|
||||
"COMET INFO: Artifact 'krfricke/checkpoint_XGBoostTrainer_ac544_00000:7.0.0' has been fully uploaded successfully\n",
|
||||
"COMET INFO: Artifact 'checkpoint_XGBoostTrainer_ac544_00000' version 9.0.0 created (previous was: 8.0.0)\n",
|
||||
"COMET INFO: Scheduling the upload of 3 assets for a size of 14.36 KB, this can take some time\n",
|
||||
"COMET INFO: Artifact 'krfricke/checkpoint_XGBoostTrainer_ac544_00000:9.0.0' has started uploading asynchronously\n",
|
||||
"COMET WARNING: The given value of the metric episodes_total was None; ignoring\n",
|
||||
"COMET WARNING: The given value of the metric timesteps_total was None; ignoring\n",
|
||||
"COMET INFO: Artifact 'krfricke/checkpoint_XGBoostTrainer_ac544_00000:8.0.0' has been fully uploaded successfully\n",
|
||||
"COMET INFO: Artifact 'checkpoint_XGBoostTrainer_ac544_00000' version 10.0.0 created (previous was: 9.0.0)\n",
|
||||
"COMET INFO: Scheduling the upload of 3 assets for a size of 16.37 KB, this can take some time\n",
|
||||
"COMET INFO: Artifact 'krfricke/checkpoint_XGBoostTrainer_ac544_00000:10.0.0' has started uploading asynchronously\n",
|
||||
"\u001b[2m\u001b[36m(GBDTTrainable pid=19852)\u001b[0m 2022-05-19 15:19:33,890\tINFO main.py:1519 -- [RayXGBoost] Finished XGBoost training on training data with total N=569 in 7.96 seconds (4.61 pure XGBoost training time).\n",
|
||||
"COMET INFO: Artifact 'krfricke/checkpoint_XGBoostTrainer_ac544_00000:9.0.0' has been fully uploaded successfully\n",
|
||||
"COMET INFO: Artifact 'checkpoint_XGBoostTrainer_ac544_00000' version 11.0.0 created (previous was: 10.0.0)\n",
|
||||
"COMET INFO: Scheduling the upload of 3 assets for a size of 16.39 KB, this can take some time\n",
|
||||
"COMET INFO: Artifact 'krfricke/checkpoint_XGBoostTrainer_ac544_00000:11.0.0' has started uploading asynchronously\n",
|
||||
"COMET INFO: ---------------------------\n",
|
||||
"COMET INFO: Comet.ml Experiment Summary\n",
|
||||
"COMET INFO: ---------------------------\n",
|
||||
"COMET INFO: Data:\n",
|
||||
"COMET INFO: display_summary_level : 1\n",
|
||||
"COMET INFO: url : https://www.comet.ml/krfricke/ray-air-example/ecd3726ca127497ba7386003a249fad6\n",
|
||||
"COMET INFO: Metrics [count] (min, max):\n",
|
||||
"COMET INFO: iterations_since_restore [10] : (1, 10)\n",
|
||||
"COMET INFO: time_since_restore [10] : (6.529659032821655, 9.720295906066895)\n",
|
||||
"COMET INFO: time_this_iter_s [10] : (0.3124058246612549, 6.529659032821655)\n",
|
||||
"COMET INFO: time_total_s [10] : (6.529659032821655, 9.720295906066895)\n",
|
||||
"COMET INFO: timestamp [10] : (1652969970, 1652969973)\n",
|
||||
"COMET INFO: timesteps_since_restore : 0\n",
|
||||
"COMET INFO: train-rmse [10] : (0.030717, 0.357284)\n",
|
||||
"COMET INFO: training_iteration [10] : (1, 10)\n",
|
||||
"COMET INFO: warmup_time : 0.003961086273193359\n",
|
||||
"COMET INFO: Others:\n",
|
||||
"COMET INFO: Created from : Ray\n",
|
||||
"COMET INFO: Name : XGBoostTrainer_ac544_00000\n",
|
||||
"COMET INFO: experiment_id : d3007bd6a2734b328fd90385485c5a8d\n",
|
||||
"COMET INFO: trial_id : ac544_00000\n",
|
||||
"COMET INFO: System Information:\n",
|
||||
"COMET INFO: date : 2022-05-19_15-19-33\n",
|
||||
"COMET INFO: hostname : Kais-MacBook-Pro.local\n",
|
||||
"COMET INFO: node_ip : 127.0.0.1\n",
|
||||
"COMET INFO: pid : 19852\n",
|
||||
"COMET INFO: Uploads:\n",
|
||||
"COMET INFO: artifact assets : 33 (107.92 KB)\n",
|
||||
"COMET INFO: artifacts : 11\n",
|
||||
"COMET INFO: environment details : 1\n",
|
||||
"COMET INFO: filename : 1\n",
|
||||
"COMET INFO: installed packages : 1\n",
|
||||
"COMET INFO: notebook : 1\n",
|
||||
"COMET INFO: source_code : 1\n",
|
||||
"COMET INFO: ---------------------------\n",
|
||||
"COMET INFO: Uploading metrics, params, and assets to Comet before program termination (may take several seconds)\n",
|
||||
"COMET INFO: The Python SDK has 3600 seconds to finish before aborting...\n",
|
||||
"COMET INFO: Waiting for completion of the file uploads (may take several seconds)\n",
|
||||
"COMET INFO: The Python SDK has 10800 seconds to finish before aborting...\n",
|
||||
"COMET INFO: Still uploading 6 file(s), remaining 21.05 KB/116.69 KB\n",
|
||||
"COMET INFO: Artifact 'krfricke/checkpoint_XGBoostTrainer_ac544_00000:10.0.0' has been fully uploaded successfully\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"COMET INFO: Artifact 'krfricke/checkpoint_XGBoostTrainer_ac544_00000:11.0.0' has been fully uploaded successfully\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Result for XGBoostTrainer_ac544_00000:\n",
|
||||
" date: 2022-05-19_15-19-33\n",
|
||||
" done: true\n",
|
||||
" experiment_id: d3007bd6a2734b328fd90385485c5a8d\n",
|
||||
" experiment_tag: '0'\n",
|
||||
" hostname: Kais-MacBook-Pro.local\n",
|
||||
" iterations_since_restore: 10\n",
|
||||
" node_ip: 127.0.0.1\n",
|
||||
" pid: 19852\n",
|
||||
" should_checkpoint: true\n",
|
||||
" time_since_restore: 9.720295906066895\n",
|
||||
" time_this_iter_s: 0.39761900901794434\n",
|
||||
" time_total_s: 9.720295906066895\n",
|
||||
" timestamp: 1652969973\n",
|
||||
" timesteps_since_restore: 0\n",
|
||||
" train-rmse: 0.030717\n",
|
||||
" training_iteration: 10\n",
|
||||
" trial_id: ac544_00000\n",
|
||||
" warmup_time: 0.003961086273193359\n",
|
||||
" \n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"2022-05-19 15:19:35,621\tINFO tune.py:753 -- Total run time: 15.75 seconds (14.94 seconds for the tuning loop).\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"comet_project = \"ray_air_example\"\n",
|
||||
"\n",
|
||||
"train_dataset = get_train_dataset()\n",
|
||||
"result = train_model(train_dataset=train_dataset, comet_project=comet_project)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "be28bdd3",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Check out your [Comet ML](https://www.comet.ml/) project to see the results!"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"jupytext": {
|
||||
"cell_metadata_filter": "-all",
|
||||
"main_language": "python",
|
||||
"notebook_metadata_filter": "-all"
|
||||
},
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.7.7"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
414
doc/source/ray-air/examples/upload_to_wandb.ipynb
Normal file
414
doc/source/ray-air/examples/upload_to_wandb.ipynb
Normal file
|
@ -0,0 +1,414 @@
|
|||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "f37e8a9f",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Logging results and uploading models to Weights & Biases\n",
|
||||
"In this example, we train a simple XGBoost model and log the training\n",
|
||||
"results to Weights & Biases. We also save the resulting model checkpoints\n",
|
||||
"as artifacts."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "27d04c97",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Let's start with installing our dependencies:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"id": "4e697e5d",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"!pip install -qU \"ray[tune]\" sklearn xgboost_ray wandb"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "3096e7c9",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Then we need some imports:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"id": "9c286701",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import ray\n",
|
||||
"\n",
|
||||
"from ray.ml import RunConfig\n",
|
||||
"from ray.ml.result import Result\n",
|
||||
"from ray.ml.train.integrations.xgboost import XGBoostTrainer\n",
|
||||
"from ray.tune.integration.wandb import WandbLoggerCallback\n",
|
||||
"from sklearn.datasets import load_breast_cancer"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "2efa1564",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"We define a simple function that returns our training dataset as a Ray Dataset:\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"id": "a63ebd10",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def get_train_dataset() -> ray.data.Dataset:\n",
|
||||
" \"\"\"Return the \"Breast cancer\" dataset as a Ray dataset.\"\"\"\n",
|
||||
" data_raw = load_breast_cancer(as_frame=True)\n",
|
||||
" df = data_raw[\"data\"]\n",
|
||||
" df[\"target\"] = data_raw[\"target\"]\n",
|
||||
" return ray.data.from_pandas(df)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "d07cf41f",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Now we define a simple training function. All the magic happens within the `WandbLoggerCallback`:\n",
|
||||
"\n",
|
||||
"```python\n",
|
||||
"WandbLoggerCallback(\n",
|
||||
" project=wandb_project,\n",
|
||||
" save_checkpoints=True,\n",
|
||||
")\n",
|
||||
"```\n",
|
||||
"\n",
|
||||
"It will automatically log all results to Weights & Biases and upload the checkpoints as artifacts. It assumes you're logged in into Wandb via an API key or `wandb login`."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"id": "52edfde0",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def train_model(train_dataset: ray.data.Dataset, wandb_project: str) -> Result:\n",
|
||||
" \"\"\"Train a simple XGBoost model and return the result.\"\"\"\n",
|
||||
" trainer = XGBoostTrainer(\n",
|
||||
" scaling_config={\"num_workers\": 2},\n",
|
||||
" params={\"tree_method\": \"auto\"},\n",
|
||||
" label_column=\"target\",\n",
|
||||
" datasets={\"train\": train_dataset},\n",
|
||||
" num_boost_round=10,\n",
|
||||
" run_config=RunConfig(\n",
|
||||
" callbacks=[\n",
|
||||
" # This is the part needed to enable logging to Weights & Biases.\n",
|
||||
" # It assumes you've logged in before, e.g. with `wandb login`.\n",
|
||||
" WandbLoggerCallback(\n",
|
||||
" project=wandb_project,\n",
|
||||
" save_checkpoints=True,\n",
|
||||
" )\n",
|
||||
" ]\n",
|
||||
" ),\n",
|
||||
" )\n",
|
||||
" result = trainer.fit()\n",
|
||||
" return result"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "1959ce19",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Let's kick off a run:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"id": "64f80d6c",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"2022-05-19 15:22:11,956\tINFO services.py:1483 -- View the Ray dashboard at \u001b[1m\u001b[32mhttp://127.0.0.1:8266\u001b[39m\u001b[22m\n",
|
||||
"2022-05-19 15:22:15,995\tINFO wandb.py:172 -- Already logged into W&B.\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/html": [
|
||||
"== Status ==<br>Current time: 2022-05-19 15:22:42 (running for 00:00:26.61)<br>Memory usage on this node: 10.2/16.0 GiB<br>Using FIFO scheduling algorithm.<br>Resources requested: 0/16 CPUs, 0/0 GPUs, 0.0/4.6 GiB heap, 0.0/2.0 GiB objects<br>Result logdir: /Users/kai/ray_results/XGBoostTrainer_2022-05-19_15-22-14<br>Number of trials: 1/1 (1 TERMINATED)<br><table>\n",
|
||||
"<thead>\n",
|
||||
"<tr><th>Trial name </th><th>status </th><th>loc </th><th style=\"text-align: right;\"> iter</th><th style=\"text-align: right;\"> total time (s)</th><th style=\"text-align: right;\"> train-rmse</th></tr>\n",
|
||||
"</thead>\n",
|
||||
"<tbody>\n",
|
||||
"<tr><td>XGBoostTrainer_14a73_00000</td><td>TERMINATED</td><td>127.0.0.1:20065</td><td style=\"text-align: right;\"> 10</td><td style=\"text-align: right;\"> 10.2724</td><td style=\"text-align: right;\"> 0.030717</td></tr>\n",
|
||||
"</tbody>\n",
|
||||
"</table><br><br>"
|
||||
],
|
||||
"text/plain": [
|
||||
"<IPython.core.display.HTML object>"
|
||||
]
|
||||
},
|
||||
"metadata": {},
|
||||
"output_type": "display_data"
|
||||
},
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\u001b[2m\u001b[33m(raylet)\u001b[0m 2022-05-19 15:22:17,422\tINFO context.py:70 -- Exec'ing worker with command: exec /Users/kai/.pyenv/versions/3.7.7/bin/python3.7 /Users/kai/coding/ray/python/ray/workers/default_worker.py --node-ip-address=127.0.0.1 --node-manager-port=61838 --object-store-name=/tmp/ray/session_2022-05-19_15-22-09_017478_19912/sockets/plasma_store --raylet-name=/tmp/ray/session_2022-05-19_15-22-09_017478_19912/sockets/raylet --redis-address=None --storage=None --temp-dir=/tmp/ray --metrics-agent-port=63609 --logging-rotate-bytes=536870912 --logging-rotate-backup-count=5 --gcs-address=127.0.0.1:62933 --redis-password=5241590000000000 --startup-token=16 --runtime-env-hash=-2010331134\n",
|
||||
"\u001b[34m\u001b[1mwandb\u001b[0m: Currently logged in as: \u001b[33mkaifricke\u001b[0m. Use \u001b[1m`wandb login --relogin`\u001b[0m to force relogin\n",
|
||||
"\u001b[2m\u001b[36m(GBDTTrainable pid=20065)\u001b[0m UserWarning: Dataset 'train' has 1 blocks, which is less than the `num_workers` 2. This dataset will be automatically repartitioned to 2 blocks.\n",
|
||||
"\u001b[2m\u001b[33m(raylet)\u001b[0m 2022-05-19 15:22:23,215\tINFO context.py:70 -- Exec'ing worker with command: exec /Users/kai/.pyenv/versions/3.7.7/bin/python3.7 /Users/kai/coding/ray/python/ray/workers/default_worker.py --node-ip-address=127.0.0.1 --node-manager-port=61838 --object-store-name=/tmp/ray/session_2022-05-19_15-22-09_017478_19912/sockets/plasma_store --raylet-name=/tmp/ray/session_2022-05-19_15-22-09_017478_19912/sockets/raylet --redis-address=None --storage=None --temp-dir=/tmp/ray --metrics-agent-port=63609 --logging-rotate-bytes=536870912 --logging-rotate-backup-count=5 --gcs-address=127.0.0.1:62933 --redis-password=5241590000000000 --startup-token=17 --runtime-env-hash=-2010331069\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/html": [
|
||||
"Tracking run with wandb version 0.12.16"
|
||||
],
|
||||
"text/plain": [
|
||||
"<IPython.core.display.HTML object>"
|
||||
]
|
||||
},
|
||||
"metadata": {},
|
||||
"output_type": "display_data"
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/html": [
|
||||
"Run data is saved locally in <code>/Users/kai/coding/ray/doc/source/ray-air/examples/wandb/run-20220519_152218-14a73_00000</code>"
|
||||
],
|
||||
"text/plain": [
|
||||
"<IPython.core.display.HTML object>"
|
||||
]
|
||||
},
|
||||
"metadata": {},
|
||||
"output_type": "display_data"
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/html": [
|
||||
"Syncing run <strong><a href=\"https://wandb.ai/kaifricke/ray_air_example/runs/14a73_00000\" target=\"_blank\">XGBoostTrainer_14a73_00000</a></strong> to <a href=\"https://wandb.ai/kaifricke/ray_air_example\" target=\"_blank\">Weights & Biases</a> (<a href=\"https://wandb.me/run\" target=\"_blank\">docs</a>)<br/>"
|
||||
],
|
||||
"text/plain": [
|
||||
"<IPython.core.display.HTML object>"
|
||||
]
|
||||
},
|
||||
"metadata": {},
|
||||
"output_type": "display_data"
|
||||
},
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\u001b[2m\u001b[36m(GBDTTrainable pid=20065)\u001b[0m 2022-05-19 15:22:24,711\tINFO main.py:980 -- [RayXGBoost] Created 2 new actors (2 total actors). Waiting until actors are ready for training.\n",
|
||||
"\u001b[2m\u001b[33m(raylet)\u001b[0m 2022-05-19 15:22:26,090\tINFO context.py:70 -- Exec'ing worker with command: exec /Users/kai/.pyenv/versions/3.7.7/bin/python3.7 /Users/kai/coding/ray/python/ray/workers/default_worker.py --node-ip-address=127.0.0.1 --node-manager-port=61838 --object-store-name=/tmp/ray/session_2022-05-19_15-22-09_017478_19912/sockets/plasma_store --raylet-name=/tmp/ray/session_2022-05-19_15-22-09_017478_19912/sockets/raylet --redis-address=None --storage=None --temp-dir=/tmp/ray --metrics-agent-port=63609 --logging-rotate-bytes=536870912 --logging-rotate-backup-count=5 --gcs-address=127.0.0.1:62933 --redis-password=5241590000000000 --startup-token=18 --runtime-env-hash=-2010331069\n",
|
||||
"\u001b[2m\u001b[33m(raylet)\u001b[0m 2022-05-19 15:22:26,234\tINFO context.py:70 -- Exec'ing worker with command: exec /Users/kai/.pyenv/versions/3.7.7/bin/python3.7 /Users/kai/coding/ray/python/ray/workers/default_worker.py --node-ip-address=127.0.0.1 --node-manager-port=61838 --object-store-name=/tmp/ray/session_2022-05-19_15-22-09_017478_19912/sockets/plasma_store --raylet-name=/tmp/ray/session_2022-05-19_15-22-09_017478_19912/sockets/raylet --redis-address=None --storage=None --temp-dir=/tmp/ray --metrics-agent-port=63609 --logging-rotate-bytes=536870912 --logging-rotate-backup-count=5 --gcs-address=127.0.0.1:62933 --redis-password=5241590000000000 --startup-token=19 --runtime-env-hash=-2010331134\n",
|
||||
"\u001b[2m\u001b[33m(raylet)\u001b[0m 2022-05-19 15:22:26,236\tINFO context.py:70 -- Exec'ing worker with command: exec /Users/kai/.pyenv/versions/3.7.7/bin/python3.7 /Users/kai/coding/ray/python/ray/workers/default_worker.py --node-ip-address=127.0.0.1 --node-manager-port=61838 --object-store-name=/tmp/ray/session_2022-05-19_15-22-09_017478_19912/sockets/plasma_store --raylet-name=/tmp/ray/session_2022-05-19_15-22-09_017478_19912/sockets/raylet --redis-address=None --storage=None --temp-dir=/tmp/ray --metrics-agent-port=63609 --logging-rotate-bytes=536870912 --logging-rotate-backup-count=5 --gcs-address=127.0.0.1:62933 --redis-password=5241590000000000 --startup-token=20 --runtime-env-hash=-2010331134\n",
|
||||
"\u001b[2m\u001b[33m(raylet)\u001b[0m 2022-05-19 15:22:26,239\tINFO context.py:70 -- Exec'ing worker with command: exec /Users/kai/.pyenv/versions/3.7.7/bin/python3.7 /Users/kai/coding/ray/python/ray/workers/default_worker.py --node-ip-address=127.0.0.1 --node-manager-port=61838 --object-store-name=/tmp/ray/session_2022-05-19_15-22-09_017478_19912/sockets/plasma_store --raylet-name=/tmp/ray/session_2022-05-19_15-22-09_017478_19912/sockets/raylet --redis-address=None --storage=None --temp-dir=/tmp/ray --metrics-agent-port=63609 --logging-rotate-bytes=536870912 --logging-rotate-backup-count=5 --gcs-address=127.0.0.1:62933 --redis-password=5241590000000000 --startup-token=21 --runtime-env-hash=-2010331134\n",
|
||||
"\u001b[2m\u001b[33m(raylet)\u001b[0m 2022-05-19 15:22:26,263\tINFO context.py:70 -- Exec'ing worker with command: exec /Users/kai/.pyenv/versions/3.7.7/bin/python3.7 /Users/kai/coding/ray/python/ray/workers/default_worker.py --node-ip-address=127.0.0.1 --node-manager-port=61838 --object-store-name=/tmp/ray/session_2022-05-19_15-22-09_017478_19912/sockets/plasma_store --raylet-name=/tmp/ray/session_2022-05-19_15-22-09_017478_19912/sockets/raylet --redis-address=None --storage=None --temp-dir=/tmp/ray --metrics-agent-port=63609 --logging-rotate-bytes=536870912 --logging-rotate-backup-count=5 --gcs-address=127.0.0.1:62933 --redis-password=5241590000000000 --startup-token=22 --runtime-env-hash=-2010331134\n",
|
||||
"\u001b[2m\u001b[36m(GBDTTrainable pid=20065)\u001b[0m 2022-05-19 15:22:29,260\tINFO main.py:1025 -- [RayXGBoost] Starting XGBoost training.\n",
|
||||
"\u001b[2m\u001b[36m(_RemoteRayXGBoostActor pid=20130)\u001b[0m [15:22:29] task [xgboost.ray]:6859875216 got new rank 0\n",
|
||||
"\u001b[2m\u001b[36m(_RemoteRayXGBoostActor pid=20131)\u001b[0m [15:22:29] task [xgboost.ray]:4625795280 got new rank 1\n",
|
||||
"\u001b[34m\u001b[1mwandb\u001b[0m: Adding directory to artifact (/Users/kai/ray_results/XGBoostTrainer_2022-05-19_15-22-14/XGBoostTrainer_14a73_00000_0_2022-05-19_15-22-16/checkpoint_000000)... Done. 0.1s\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Result for XGBoostTrainer_14a73_00000:\n",
|
||||
" date: 2022-05-19_15-22-31\n",
|
||||
" done: false\n",
|
||||
" experiment_id: 2d50bfe80d2a441e80f4ca05f7c3b607\n",
|
||||
" hostname: Kais-MacBook-Pro.local\n",
|
||||
" iterations_since_restore: 1\n",
|
||||
" node_ip: 127.0.0.1\n",
|
||||
" pid: 20065\n",
|
||||
" should_checkpoint: true\n",
|
||||
" time_since_restore: 10.080440044403076\n",
|
||||
" time_this_iter_s: 10.080440044403076\n",
|
||||
" time_total_s: 10.080440044403076\n",
|
||||
" timestamp: 1652970151\n",
|
||||
" timesteps_since_restore: 0\n",
|
||||
" train-rmse: 0.357284\n",
|
||||
" training_iteration: 1\n",
|
||||
" trial_id: 14a73_00000\n",
|
||||
" warmup_time: 0.006903171539306641\n",
|
||||
" \n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\u001b[34m\u001b[1mwandb\u001b[0m: Adding directory to artifact (/Users/kai/ray_results/XGBoostTrainer_2022-05-19_15-22-14/XGBoostTrainer_14a73_00000_0_2022-05-19_15-22-16/checkpoint_000001)... Done. 0.1s\n",
|
||||
"\u001b[2m\u001b[36m(GBDTTrainable pid=20065)\u001b[0m 2022-05-19 15:22:32,051\tINFO main.py:1519 -- [RayXGBoost] Finished XGBoost training on training data with total N=569 in 7.37 seconds (2.79 pure XGBoost training time).\n",
|
||||
"\u001b[34m\u001b[1mwandb\u001b[0m: Adding directory to artifact (/Users/kai/ray_results/XGBoostTrainer_2022-05-19_15-22-14/XGBoostTrainer_14a73_00000_0_2022-05-19_15-22-16/checkpoint_000002)... Done. 0.1s\n",
|
||||
"\u001b[34m\u001b[1mwandb\u001b[0m: Adding directory to artifact (/Users/kai/ray_results/XGBoostTrainer_2022-05-19_15-22-14/XGBoostTrainer_14a73_00000_0_2022-05-19_15-22-16/checkpoint_000003)... Done. 0.1s\n",
|
||||
"\u001b[34m\u001b[1mwandb\u001b[0m: Adding directory to artifact (/Users/kai/ray_results/XGBoostTrainer_2022-05-19_15-22-14/XGBoostTrainer_14a73_00000_0_2022-05-19_15-22-16/checkpoint_000004)... Done. 0.1s\n",
|
||||
"\u001b[34m\u001b[1mwandb\u001b[0m: Adding directory to artifact (/Users/kai/ray_results/XGBoostTrainer_2022-05-19_15-22-14/XGBoostTrainer_14a73_00000_0_2022-05-19_15-22-16/checkpoint_000005)... Done. 0.1s\n",
|
||||
"\u001b[34m\u001b[1mwandb\u001b[0m: Adding directory to artifact (/Users/kai/ray_results/XGBoostTrainer_2022-05-19_15-22-14/XGBoostTrainer_14a73_00000_0_2022-05-19_15-22-16/checkpoint_000006)... Done. 0.1s\n",
|
||||
"\u001b[34m\u001b[1mwandb\u001b[0m: Adding directory to artifact (/Users/kai/ray_results/XGBoostTrainer_2022-05-19_15-22-14/XGBoostTrainer_14a73_00000_0_2022-05-19_15-22-16/checkpoint_000007)... Done. 0.1s\n",
|
||||
"\u001b[34m\u001b[1mwandb\u001b[0m: Adding directory to artifact (/Users/kai/ray_results/XGBoostTrainer_2022-05-19_15-22-14/XGBoostTrainer_14a73_00000_0_2022-05-19_15-22-16/checkpoint_000008)... Done. 0.1s\n",
|
||||
"\u001b[34m\u001b[1mwandb\u001b[0m: Adding directory to artifact (/Users/kai/ray_results/XGBoostTrainer_2022-05-19_15-22-14/XGBoostTrainer_14a73_00000_0_2022-05-19_15-22-16/checkpoint_000009)... Done. 0.1s\n",
|
||||
"\u001b[34m\u001b[1mwandb\u001b[0m: Adding directory to artifact (/Users/kai/ray_results/XGBoostTrainer_2022-05-19_15-22-14/XGBoostTrainer_14a73_00000_0_2022-05-19_15-22-16/checkpoint_000009)... Done. 0.1s\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/html": [
|
||||
"Waiting for W&B process to finish... <strong style=\"color:green\">(success).</strong>"
|
||||
],
|
||||
"text/plain": [
|
||||
"<IPython.core.display.HTML object>"
|
||||
]
|
||||
},
|
||||
"metadata": {},
|
||||
"output_type": "display_data"
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Result for XGBoostTrainer_14a73_00000:\n",
|
||||
" date: 2022-05-19_15-22-32\n",
|
||||
" done: true\n",
|
||||
" experiment_id: 2d50bfe80d2a441e80f4ca05f7c3b607\n",
|
||||
" experiment_tag: '0'\n",
|
||||
" hostname: Kais-MacBook-Pro.local\n",
|
||||
" iterations_since_restore: 10\n",
|
||||
" node_ip: 127.0.0.1\n",
|
||||
" pid: 20065\n",
|
||||
" should_checkpoint: true\n",
|
||||
" time_since_restore: 10.272444248199463\n",
|
||||
" time_this_iter_s: 0.023891210556030273\n",
|
||||
" time_total_s: 10.272444248199463\n",
|
||||
" timestamp: 1652970152\n",
|
||||
" timesteps_since_restore: 0\n",
|
||||
" train-rmse: 0.030717\n",
|
||||
" training_iteration: 10\n",
|
||||
" trial_id: 14a73_00000\n",
|
||||
" warmup_time: 0.006903171539306641\n",
|
||||
" \n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"2022-05-19 15:22:42,727\tINFO tune.py:753 -- Total run time: 27.83 seconds (26.61 seconds for the tuning loop).\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"application/vnd.jupyter.widget-view+json": {
|
||||
"model_id": "",
|
||||
"version_major": 2,
|
||||
"version_minor": 0
|
||||
},
|
||||
"text/plain": [
|
||||
"VBox(children=(Label(value='0.090 MB of 0.090 MB uploaded (0.000 MB deduped)\\r'), FloatProgress(value=1.0, max…"
|
||||
]
|
||||
},
|
||||
"metadata": {},
|
||||
"output_type": "display_data"
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/html": [
|
||||
"<style>\n",
|
||||
" table.wandb td:nth-child(1) { padding: 0 10px; text-align: left ; width: auto;} td:nth-child(2) {text-align: left ; width: 100%}\n",
|
||||
" .wandb-row { display: flex; flex-direction: row; flex-wrap: wrap; justify-content: flex-start; width: 100% }\n",
|
||||
" .wandb-col { display: flex; flex-direction: column; flex-basis: 100%; flex: 1; padding: 10px; }\n",
|
||||
" </style>\n",
|
||||
"<div class=\"wandb-row\"><div class=\"wandb-col\"><h3>Run history:</h3><br/><table class=\"wandb\"><tr><td>iterations_since_restore</td><td>▁▂▃▃▄▅▆▆▇█</td></tr><tr><td>time_since_restore</td><td>▁▂▃▃▄▅▅▆▇█</td></tr><tr><td>time_this_iter_s</td><td>█▁▁▁▁▁▁▁▁▁</td></tr><tr><td>time_total_s</td><td>▁▂▃▃▄▅▅▆▇█</td></tr><tr><td>timestamp</td><td>▁▁▁▁▁▁▁▁██</td></tr><tr><td>timesteps_since_restore</td><td>▁▁▁▁▁▁▁▁▁▁</td></tr><tr><td>train-rmse</td><td>█▆▄▃▂▂▂▁▁▁</td></tr><tr><td>training_iteration</td><td>▁▂▃▃▄▅▆▆▇█</td></tr><tr><td>warmup_time</td><td>▁▁▁▁▁▁▁▁▁▁</td></tr></table><br/></div><div class=\"wandb-col\"><h3>Run summary:</h3><br/><table class=\"wandb\"><tr><td>iterations_since_restore</td><td>10</td></tr><tr><td>time_since_restore</td><td>10.27244</td></tr><tr><td>time_this_iter_s</td><td>0.02389</td></tr><tr><td>time_total_s</td><td>10.27244</td></tr><tr><td>timestamp</td><td>1652970152</td></tr><tr><td>timesteps_since_restore</td><td>0</td></tr><tr><td>train-rmse</td><td>0.03072</td></tr><tr><td>training_iteration</td><td>10</td></tr><tr><td>warmup_time</td><td>0.0069</td></tr></table><br/></div></div>"
|
||||
],
|
||||
"text/plain": [
|
||||
"<IPython.core.display.HTML object>"
|
||||
]
|
||||
},
|
||||
"metadata": {},
|
||||
"output_type": "display_data"
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/html": [
|
||||
"Synced <strong style=\"color:#cdcd00\">XGBoostTrainer_14a73_00000</strong>: <a href=\"https://wandb.ai/kaifricke/ray_air_example/runs/14a73_00000\" target=\"_blank\">https://wandb.ai/kaifricke/ray_air_example/runs/14a73_00000</a><br/>Synced 5 W&B file(s), 0 media file(s), 21 artifact file(s) and 0 other file(s)"
|
||||
],
|
||||
"text/plain": [
|
||||
"<IPython.core.display.HTML object>"
|
||||
]
|
||||
},
|
||||
"metadata": {},
|
||||
"output_type": "display_data"
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/html": [
|
||||
"Find logs at: <code>./wandb/run-20220519_152218-14a73_00000/logs</code>"
|
||||
],
|
||||
"text/plain": [
|
||||
"<IPython.core.display.HTML object>"
|
||||
]
|
||||
},
|
||||
"metadata": {},
|
||||
"output_type": "display_data"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"wandb_project = \"ray_air_example\"\n",
|
||||
"\n",
|
||||
"train_dataset = get_train_dataset()\n",
|
||||
"result = train_model(train_dataset=train_dataset, wandb_project=wandb_project)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "78701c42",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Check out your [WandB](https://wandb.ai/) project to see the results!"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"jupytext": {
|
||||
"cell_metadata_filter": "-all",
|
||||
"main_language": "python",
|
||||
"notebook_metadata_filter": "-all"
|
||||
},
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.7.7"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
|
@ -147,22 +147,6 @@ py_test(
|
|||
args = ["--smoke-test"]
|
||||
)
|
||||
|
||||
py_test(
|
||||
name = "upload_to_comet_ml",
|
||||
size = "medium",
|
||||
srcs = ["examples/upload_to_comet_ml.py"],
|
||||
tags = ["team:ml", "exclusive", "needs_credentials"],
|
||||
deps = [":ml_lib"]
|
||||
)
|
||||
|
||||
py_test(
|
||||
name = "upload_to_wandb",
|
||||
size = "medium",
|
||||
srcs = ["examples/upload_to_wandb.py"],
|
||||
tags = ["team:ml", "exclusive", "needs_credentials"],
|
||||
deps = [":ml_lib"]
|
||||
)
|
||||
|
||||
# --------------------------------------------------------------------
|
||||
# Tests from the python/ray/ml/tests directory.
|
||||
# Covers all tests starting with `test_`.
|
||||
|
|
1
python/ray/ml/examples/upload_to_comet_ml.ipynb
Symbolic link
1
python/ray/ml/examples/upload_to_comet_ml.ipynb
Symbolic link
|
@ -0,0 +1 @@
|
|||
../../../../doc/source/ray-air/examples/upload_to_comet_ml.ipynb
|
|
@ -1,50 +0,0 @@
|
|||
"""
|
||||
In this example, we train a simple XGBoost model and log the training
|
||||
results to Comet ML. We also save the resulting model checkpoints
|
||||
as artifacts.
|
||||
"""
|
||||
import ray
|
||||
|
||||
from ray.ml import RunConfig
|
||||
from ray.ml.result import Result
|
||||
from ray.ml.train.integrations.xgboost import XGBoostTrainer
|
||||
from ray.tune.integration.comet import CometLoggerCallback
|
||||
from sklearn.datasets import load_breast_cancer
|
||||
|
||||
|
||||
def get_train_dataset() -> ray.data.Dataset:
|
||||
"""Return the "Breast cancer" dataset as a Ray dataset."""
|
||||
data_raw = load_breast_cancer(as_frame=True)
|
||||
df = data_raw["data"]
|
||||
df["target"] = data_raw["target"]
|
||||
return ray.data.from_pandas(df)
|
||||
|
||||
|
||||
def train_model(train_dataset: ray.data.Dataset, comet_project: str) -> Result:
|
||||
"""Train a simple XGBoost model and return the result."""
|
||||
trainer = XGBoostTrainer(
|
||||
scaling_config={"num_workers": 2},
|
||||
params={"tree_method": "auto"},
|
||||
label_column="target",
|
||||
datasets={"train": train_dataset},
|
||||
num_boost_round=10,
|
||||
run_config=RunConfig(
|
||||
callbacks=[
|
||||
# This is the part needed to enable logging to Comet ML.
|
||||
# It assumes Comet ML can find a valid API (e.g. by setting
|
||||
# the ``COMET_API_KEY`` environment variable).
|
||||
CometLoggerCallback(
|
||||
project_name=comet_project,
|
||||
save_checkpoints=True,
|
||||
)
|
||||
]
|
||||
),
|
||||
)
|
||||
result = trainer.fit()
|
||||
return result
|
||||
|
||||
|
||||
comet_project = "ray_air_example"
|
||||
|
||||
train_dataset = get_train_dataset()
|
||||
result = train_model(train_dataset=train_dataset, comet_project=comet_project)
|
1
python/ray/ml/examples/upload_to_wandb.ipynb
Symbolic link
1
python/ray/ml/examples/upload_to_wandb.ipynb
Symbolic link
|
@ -0,0 +1 @@
|
|||
../../../../doc/source/ray-air/examples/upload_to_wandb.ipynb
|
|
@ -1,49 +0,0 @@
|
|||
"""
|
||||
In this example, we train a simple XGBoost model and log the training
|
||||
results to Weights & Biases. We also save the resulting model checkpoints
|
||||
as artifacts.
|
||||
"""
|
||||
import ray
|
||||
|
||||
from ray.ml import RunConfig
|
||||
from ray.ml.result import Result
|
||||
from ray.ml.train.integrations.xgboost import XGBoostTrainer
|
||||
from ray.tune.integration.wandb import WandbLoggerCallback
|
||||
from sklearn.datasets import load_breast_cancer
|
||||
|
||||
|
||||
def get_train_dataset() -> ray.data.Dataset:
|
||||
"""Return the "Breast cancer" dataset as a Ray dataset."""
|
||||
data_raw = load_breast_cancer(as_frame=True)
|
||||
df = data_raw["data"]
|
||||
df["target"] = data_raw["target"]
|
||||
return ray.data.from_pandas(df)
|
||||
|
||||
|
||||
def train_model(train_dataset: ray.data.Dataset, wandb_project: str) -> Result:
|
||||
"""Train a simple XGBoost model and return the result."""
|
||||
trainer = XGBoostTrainer(
|
||||
scaling_config={"num_workers": 2},
|
||||
params={"tree_method": "auto"},
|
||||
label_column="target",
|
||||
datasets={"train": train_dataset},
|
||||
num_boost_round=10,
|
||||
run_config=RunConfig(
|
||||
callbacks=[
|
||||
# This is the part needed to enable logging to Weights & Biases.
|
||||
# It assumes you've logged in before, e.g. with `wandb login`.
|
||||
WandbLoggerCallback(
|
||||
project=wandb_project,
|
||||
save_checkpoints=True,
|
||||
)
|
||||
]
|
||||
),
|
||||
)
|
||||
result = trainer.fit()
|
||||
return result
|
||||
|
||||
|
||||
wandb_project = "ray_air_example"
|
||||
|
||||
train_dataset = get_train_dataset()
|
||||
result = train_model(train_dataset=train_dataset, wandb_project=wandb_project)
|
Loading…
Add table
Reference in a new issue