diff --git a/.buildkite/pipeline.ml.yml b/.buildkite/pipeline.ml.yml index 7a169fbd4..1228cfbc1 100644 --- a/.buildkite/pipeline.ml.yml +++ b/.buildkite/pipeline.ml.yml @@ -6,9 +6,7 @@ - bazel test --config=ci $(./ci/run/bazel_export_options) --build_tests_only --test_tag_filters=-gpu,-needs_credentials python/ray/ml/... # Only setup credentials in branch builds - if [ "$BUILDKITE_PULL_REQUEST" != "false" ]; then exit 0; fi - - python ./ci/env/setup_credentials.py wandb comet_ml - bazel test --config=ci $(./ci/run/bazel_export_options) --build_tests_only --test_tag_filters=-gpu,needs_credentials python/ray/ml/... - - python ./ci/env/cleanup_test_state.py wandb comet_ml - label: ":brain: RLlib: Learning discr. actions TF2-static-graph" conditions: ["RAY_CI_RLLIB_AFFECTED"] @@ -385,4 +383,6 @@ commands: - cleanup() { if [ "${BUILDKITE_PULL_REQUEST}" = "false" ]; then ./ci/build/upload_build_info.sh; fi }; trap cleanup EXIT - DOC_TESTING=1 PYTHON=3.7 ./ci/env/install-dependencies.sh + - python ./ci/env/setup_credentials.py wandb comet_ml - bazel test --config=ci $(./ci/run/bazel_export_options) --build_tests_only --test_tag_filters=ray_air,-gpu,-py37,-post_wheel_build doc/... + - python ./ci/env/cleanup_test_state.py wandb comet_ml diff --git a/doc/source/_toc.yml b/doc/source/_toc.yml index 225e33d3a..887c8ad35 100644 --- a/doc/source/_toc.yml +++ b/doc/source/_toc.yml @@ -184,6 +184,8 @@ parts: - file: ray-air/examples/analyze_tuning_results title: "Analyze hyperparameter tuning results" - file: ray-air/examples/rl_serving_example + - file: ray-air/examples/upload_to_comet_ml + - file: ray-air/examples/upload_to_wandb - file: ray-air/examples/lightgbm_example - file: ray-air/examples/rl_online_example - file: ray-air/examples/rl_offline_example diff --git a/doc/source/ray-air/examples/index.rst b/doc/source/ray-air/examples/index.rst index 2e1296e0b..4f34095cc 100644 --- a/doc/source/ray-air/examples/index.rst +++ b/doc/source/ray-air/examples/index.rst @@ -11,6 +11,8 @@ Guides - :doc:`/ray-air/examples/analyze_tuning_results`: How to analyze trial results (e.g. find the best trial) of a hyperparameter tuning run. - :doc:`/ray-air/examples/rl_serving_example` +- :doc:`/ray-air/examples/upload_to_comet_ml` +- :doc:`/ray-air/examples/upload_to_wandb` Trainers diff --git a/doc/source/ray-air/examples/upload_to_comet_ml.ipynb b/doc/source/ray-air/examples/upload_to_comet_ml.ipynb new file mode 100644 index 000000000..1b1fd4e9e --- /dev/null +++ b/doc/source/ray-air/examples/upload_to_comet_ml.ipynb @@ -0,0 +1,409 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "98d7c620", + "metadata": {}, + "source": [ + "# Logging results and uploading models to Comet ML\n", + "In this example, we train a simple XGBoost model and log the training\n", + "results to Comet ML. We also save the resulting model checkpoints\n", + "as artifacts." + ] + }, + { + "cell_type": "markdown", + "id": "c6e66577", + "metadata": {}, + "source": [ + "Let's start with installing our dependencies:" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "6d6297ef", + "metadata": {}, + "outputs": [], + "source": [ + "!pip install -qU \"ray[tune]\" sklearn xgboost_ray comet_ml" + ] + }, + { + "cell_type": "markdown", + "id": "c2e21446", + "metadata": {}, + "source": [ + "Then we need some imports:" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "dffff484", + "metadata": {}, + "outputs": [], + "source": [ + "import ray\n", + "\n", + "from ray.ml import RunConfig\n", + "from ray.ml.result import Result\n", + "from ray.ml.train.integrations.xgboost import XGBoostTrainer\n", + "from ray.tune.integration.comet import CometLoggerCallback\n", + "from sklearn.datasets import load_breast_cancer" + ] + }, + { + "cell_type": "markdown", + "id": "29fcd93b", + "metadata": {}, + "source": [ + "We define a simple function that returns our training dataset as a Ray Dataset:" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "cf830706", + "metadata": {}, + "outputs": [], + "source": [ + "def get_train_dataset() -> ray.data.Dataset:\n", + " \"\"\"Return the \"Breast cancer\" dataset as a Ray dataset.\"\"\"\n", + " data_raw = load_breast_cancer(as_frame=True)\n", + " df = data_raw[\"data\"]\n", + " df[\"target\"] = data_raw[\"target\"]\n", + " return ray.data.from_pandas(df)" + ] + }, + { + "cell_type": "markdown", + "id": "0f48f948", + "metadata": {}, + "source": [ + "Now we define a simple training function. All the magic happens within the `CometLoggerCallback`:\n", + "\n", + "```python\n", + "CometLoggerCallback(\n", + " project_name=comet_project,\n", + " save_checkpoints=True,\n", + ")\n", + "```\n", + "\n", + "It will automatically log all results to Comet ML and upload the checkpoints as artifacts. It assumes you're logged in into Comet via an API key or your `~./.comet.config`." + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "230f23a3", + "metadata": {}, + "outputs": [], + "source": [ + "def train_model(train_dataset: ray.data.Dataset, comet_project: str) -> Result:\n", + " \"\"\"Train a simple XGBoost model and return the result.\"\"\"\n", + " trainer = XGBoostTrainer(\n", + " scaling_config={\"num_workers\": 2},\n", + " params={\"tree_method\": \"auto\"},\n", + " label_column=\"target\",\n", + " datasets={\"train\": train_dataset},\n", + " num_boost_round=10,\n", + " run_config=RunConfig(\n", + " callbacks=[\n", + " # This is the part needed to enable logging to Comet ML.\n", + " # It assumes Comet ML can find a valid API (e.g. by setting\n", + " # the ``COMET_API_KEY`` environment variable).\n", + " CometLoggerCallback(\n", + " project_name=comet_project,\n", + " save_checkpoints=True,\n", + " )\n", + " ]\n", + " ),\n", + " )\n", + " result = trainer.fit()\n", + " return result" + ] + }, + { + "cell_type": "markdown", + "id": "711b1d7d", + "metadata": {}, + "source": [ + "Let's kick off a run:" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "9bfd9a8d", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2022-05-19 15:19:17,237\tINFO services.py:1483 -- View the Ray dashboard at \u001b[1m\u001b[32mhttp://127.0.0.1:8265\u001b[39m\u001b[22m\n" + ] + }, + { + "data": { + "text/html": [ + "== Status ==
Current time: 2022-05-19 15:19:35 (running for 00:00:14.95)
Memory usage on this node: 10.2/16.0 GiB
Using FIFO scheduling algorithm.
Resources requested: 0/16 CPUs, 0/0 GPUs, 0.0/5.12 GiB heap, 0.0/2.0 GiB objects
Result logdir: /Users/kai/ray_results/XGBoostTrainer_2022-05-19_15-19-19
Number of trials: 1/1 (1 TERMINATED)
\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
Trial name status loc iter total time (s) train-rmse
XGBoostTrainer_ac544_00000TERMINATED127.0.0.1:19852 10 9.7203 0.030717


" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "COMET WARNING: As you are running in a Jupyter environment, you will need to call `experiment.end()` when finished to ensure all metrics and code are logged before exiting.\n", + "\u001b[2m\u001b[33m(raylet)\u001b[0m 2022-05-19 15:19:21,584\tINFO context.py:70 -- Exec'ing worker with command: exec /Users/kai/.pyenv/versions/3.7.7/bin/python3.7 /Users/kai/coding/ray/python/ray/workers/default_worker.py --node-ip-address=127.0.0.1 --node-manager-port=61222 --object-store-name=/tmp/ray/session_2022-05-19_15-19-14_632568_19778/sockets/plasma_store --raylet-name=/tmp/ray/session_2022-05-19_15-19-14_632568_19778/sockets/raylet --redis-address=None --storage=None --temp-dir=/tmp/ray --metrics-agent-port=62873 --logging-rotate-bytes=536870912 --logging-rotate-backup-count=5 --gcs-address=127.0.0.1:61938 --redis-password=5241590000000000 --startup-token=16 --runtime-env-hash=-2010331134\n", + "COMET INFO: Experiment is live on comet.ml https://www.comet.ml/krfricke/ray-air-example/ecd3726ca127497ba7386003a249fad6\n", + "\n", + "COMET WARNING: Failed to add tag(s) None to the experiment\n", + "\n", + "COMET WARNING: Empty mapping given to log_params({}); ignoring\n", + "\u001b[2m\u001b[36m(GBDTTrainable pid=19852)\u001b[0m UserWarning: Dataset 'train' has 1 blocks, which is less than the `num_workers` 2. This dataset will be automatically repartitioned to 2 blocks.\n", + "\u001b[2m\u001b[33m(raylet)\u001b[0m 2022-05-19 15:19:24,628\tINFO context.py:70 -- Exec'ing worker with command: exec /Users/kai/.pyenv/versions/3.7.7/bin/python3.7 /Users/kai/coding/ray/python/ray/workers/default_worker.py --node-ip-address=127.0.0.1 --node-manager-port=61222 --object-store-name=/tmp/ray/session_2022-05-19_15-19-14_632568_19778/sockets/plasma_store --raylet-name=/tmp/ray/session_2022-05-19_15-19-14_632568_19778/sockets/raylet --redis-address=None --storage=None --temp-dir=/tmp/ray --metrics-agent-port=62873 --logging-rotate-bytes=536870912 --logging-rotate-backup-count=5 --gcs-address=127.0.0.1:61938 --redis-password=5241590000000000 --startup-token=17 --runtime-env-hash=-2010331069\n", + "\u001b[2m\u001b[36m(GBDTTrainable pid=19852)\u001b[0m 2022-05-19 15:19:25,961\tINFO main.py:980 -- [RayXGBoost] Created 2 new actors (2 total actors). Waiting until actors are ready for training.\n", + "\u001b[2m\u001b[33m(raylet)\u001b[0m 2022-05-19 15:19:26,830\tINFO context.py:70 -- Exec'ing worker with command: exec /Users/kai/.pyenv/versions/3.7.7/bin/python3.7 /Users/kai/coding/ray/python/ray/workers/default_worker.py --node-ip-address=127.0.0.1 --node-manager-port=61222 --object-store-name=/tmp/ray/session_2022-05-19_15-19-14_632568_19778/sockets/plasma_store --raylet-name=/tmp/ray/session_2022-05-19_15-19-14_632568_19778/sockets/raylet --redis-address=None --storage=None --temp-dir=/tmp/ray --metrics-agent-port=62873 --logging-rotate-bytes=536870912 --logging-rotate-backup-count=5 --gcs-address=127.0.0.1:61938 --redis-password=5241590000000000 --startup-token=18 --runtime-env-hash=-2010331069\n", + "\u001b[2m\u001b[33m(raylet)\u001b[0m 2022-05-19 15:19:26,918\tINFO context.py:70 -- Exec'ing worker with command: exec /Users/kai/.pyenv/versions/3.7.7/bin/python3.7 /Users/kai/coding/ray/python/ray/workers/default_worker.py --node-ip-address=127.0.0.1 --node-manager-port=61222 --object-store-name=/tmp/ray/session_2022-05-19_15-19-14_632568_19778/sockets/plasma_store --raylet-name=/tmp/ray/session_2022-05-19_15-19-14_632568_19778/sockets/raylet --redis-address=None --storage=None --temp-dir=/tmp/ray --metrics-agent-port=62873 --logging-rotate-bytes=536870912 --logging-rotate-backup-count=5 --gcs-address=127.0.0.1:61938 --redis-password=5241590000000000 --startup-token=20 --runtime-env-hash=-2010331134\n", + "\u001b[2m\u001b[33m(raylet)\u001b[0m 2022-05-19 15:19:26,922\tINFO context.py:70 -- Exec'ing worker with command: exec /Users/kai/.pyenv/versions/3.7.7/bin/python3.7 /Users/kai/coding/ray/python/ray/workers/default_worker.py --node-ip-address=127.0.0.1 --node-manager-port=61222 --object-store-name=/tmp/ray/session_2022-05-19_15-19-14_632568_19778/sockets/plasma_store --raylet-name=/tmp/ray/session_2022-05-19_15-19-14_632568_19778/sockets/raylet --redis-address=None --storage=None --temp-dir=/tmp/ray --metrics-agent-port=62873 --logging-rotate-bytes=536870912 --logging-rotate-backup-count=5 --gcs-address=127.0.0.1:61938 --redis-password=5241590000000000 --startup-token=21 --runtime-env-hash=-2010331134\n", + "\u001b[2m\u001b[33m(raylet)\u001b[0m 2022-05-19 15:19:26,922\tINFO context.py:70 -- Exec'ing worker with command: exec /Users/kai/.pyenv/versions/3.7.7/bin/python3.7 /Users/kai/coding/ray/python/ray/workers/default_worker.py --node-ip-address=127.0.0.1 --node-manager-port=61222 --object-store-name=/tmp/ray/session_2022-05-19_15-19-14_632568_19778/sockets/plasma_store --raylet-name=/tmp/ray/session_2022-05-19_15-19-14_632568_19778/sockets/raylet --redis-address=None --storage=None --temp-dir=/tmp/ray --metrics-agent-port=62873 --logging-rotate-bytes=536870912 --logging-rotate-backup-count=5 --gcs-address=127.0.0.1:61938 --redis-password=5241590000000000 --startup-token=22 --runtime-env-hash=-2010331134\n", + "\u001b[2m\u001b[33m(raylet)\u001b[0m 2022-05-19 15:19:26,923\tINFO context.py:70 -- Exec'ing worker with command: exec /Users/kai/.pyenv/versions/3.7.7/bin/python3.7 /Users/kai/coding/ray/python/ray/workers/default_worker.py --node-ip-address=127.0.0.1 --node-manager-port=61222 --object-store-name=/tmp/ray/session_2022-05-19_15-19-14_632568_19778/sockets/plasma_store --raylet-name=/tmp/ray/session_2022-05-19_15-19-14_632568_19778/sockets/raylet --redis-address=None --storage=None --temp-dir=/tmp/ray --metrics-agent-port=62873 --logging-rotate-bytes=536870912 --logging-rotate-backup-count=5 --gcs-address=127.0.0.1:61938 --redis-password=5241590000000000 --startup-token=19 --runtime-env-hash=-2010331134\n", + "\u001b[2m\u001b[36m(GBDTTrainable pid=19852)\u001b[0m 2022-05-19 15:19:29,272\tINFO main.py:1025 -- [RayXGBoost] Starting XGBoost training.\n", + "\u001b[2m\u001b[36m(_RemoteRayXGBoostActor pid=19876)\u001b[0m [15:19:29] task [xgboost.ray]:4505889744 got new rank 1\n", + "\u001b[2m\u001b[36m(_RemoteRayXGBoostActor pid=19875)\u001b[0m [15:19:29] task [xgboost.ray]:6941849424 got new rank 0\n", + "COMET WARNING: The given value of the metric episodes_total was None; ignoring\n", + "COMET WARNING: The given value of the metric timesteps_total was None; ignoring\n", + "COMET INFO: Artifact 'checkpoint_XGBoostTrainer_ac544_00000' version 1.0.0 created\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Result for XGBoostTrainer_ac544_00000:\n", + " date: 2022-05-19_15-19-30\n", + " done: false\n", + " experiment_id: d3007bd6a2734b328fd90385485c5a8d\n", + " hostname: Kais-MacBook-Pro.local\n", + " iterations_since_restore: 1\n", + " node_ip: 127.0.0.1\n", + " pid: 19852\n", + " should_checkpoint: true\n", + " time_since_restore: 6.529659032821655\n", + " time_this_iter_s: 6.529659032821655\n", + " time_total_s: 6.529659032821655\n", + " timestamp: 1652969970\n", + " timesteps_since_restore: 0\n", + " train-rmse: 0.357284\n", + " training_iteration: 1\n", + " trial_id: ac544_00000\n", + " warmup_time: 0.003961086273193359\n", + " \n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "COMET INFO: Scheduling the upload of 3 assets for a size of 2.48 KB, this can take some time\n", + "COMET INFO: Artifact 'krfricke/checkpoint_XGBoostTrainer_ac544_00000:1.0.0' has started uploading asynchronously\n", + "COMET WARNING: The given value of the metric episodes_total was None; ignoring\n", + "COMET WARNING: The given value of the metric timesteps_total was None; ignoring\n", + "COMET INFO: Artifact 'checkpoint_XGBoostTrainer_ac544_00000' version 2.0.0 created (previous was: 1.0.0)\n", + "COMET INFO: Scheduling the upload of 3 assets for a size of 3.86 KB, this can take some time\n", + "COMET INFO: Artifact 'krfricke/checkpoint_XGBoostTrainer_ac544_00000:2.0.0' has started uploading asynchronously\n", + "COMET WARNING: The given value of the metric episodes_total was None; ignoring\n", + "COMET WARNING: The given value of the metric timesteps_total was None; ignoring\n", + "COMET INFO: Artifact 'checkpoint_XGBoostTrainer_ac544_00000' version 3.0.0 created (previous was: 2.0.0)\n", + "COMET INFO: Artifact 'krfricke/checkpoint_XGBoostTrainer_ac544_00000:1.0.0' has been fully uploaded successfully\n", + "COMET INFO: Scheduling the upload of 3 assets for a size of 5.31 KB, this can take some time\n", + "COMET INFO: Artifact 'krfricke/checkpoint_XGBoostTrainer_ac544_00000:3.0.0' has started uploading asynchronously\n", + "COMET WARNING: The given value of the metric episodes_total was None; ignoring\n", + "COMET WARNING: The given value of the metric timesteps_total was None; ignoring\n", + "COMET INFO: Artifact 'checkpoint_XGBoostTrainer_ac544_00000' version 4.0.0 created (previous was: 3.0.0)\n", + "COMET INFO: Artifact 'krfricke/checkpoint_XGBoostTrainer_ac544_00000:2.0.0' has been fully uploaded successfully\n", + "COMET INFO: Scheduling the upload of 3 assets for a size of 6.76 KB, this can take some time\n", + "COMET INFO: Artifact 'krfricke/checkpoint_XGBoostTrainer_ac544_00000:4.0.0' has started uploading asynchronously\n", + "COMET WARNING: The given value of the metric episodes_total was None; ignoring\n", + "COMET WARNING: The given value of the metric timesteps_total was None; ignoring\n", + "COMET INFO: Artifact 'checkpoint_XGBoostTrainer_ac544_00000' version 5.0.0 created (previous was: 4.0.0)\n", + "COMET INFO: Scheduling the upload of 3 assets for a size of 8.21 KB, this can take some time\n", + "COMET INFO: Artifact 'krfricke/checkpoint_XGBoostTrainer_ac544_00000:3.0.0' has been fully uploaded successfully\n", + "COMET INFO: Artifact 'krfricke/checkpoint_XGBoostTrainer_ac544_00000:5.0.0' has started uploading asynchronously\n", + "COMET WARNING: The given value of the metric episodes_total was None; ignoring\n", + "COMET WARNING: The given value of the metric timesteps_total was None; ignoring\n", + "COMET INFO: Artifact 'krfricke/checkpoint_XGBoostTrainer_ac544_00000:4.0.0' has been fully uploaded successfully\n", + "COMET INFO: Artifact 'checkpoint_XGBoostTrainer_ac544_00000' version 6.0.0 created (previous was: 5.0.0)\n", + "COMET INFO: Scheduling the upload of 3 assets for a size of 9.87 KB, this can take some time\n", + "COMET INFO: Artifact 'krfricke/checkpoint_XGBoostTrainer_ac544_00000:6.0.0' has started uploading asynchronously\n", + "COMET WARNING: The given value of the metric episodes_total was None; ignoring\n", + "COMET WARNING: The given value of the metric timesteps_total was None; ignoring\n", + "COMET INFO: Artifact 'krfricke/checkpoint_XGBoostTrainer_ac544_00000:5.0.0' has been fully uploaded successfully\n", + "COMET INFO: Artifact 'checkpoint_XGBoostTrainer_ac544_00000' version 7.0.0 created (previous was: 6.0.0)\n", + "COMET INFO: Scheduling the upload of 3 assets for a size of 11.46 KB, this can take some time\n", + "COMET INFO: Artifact 'krfricke/checkpoint_XGBoostTrainer_ac544_00000:7.0.0' has started uploading asynchronously\n", + "COMET WARNING: The given value of the metric episodes_total was None; ignoring\n", + "COMET WARNING: The given value of the metric timesteps_total was None; ignoring\n", + "COMET INFO: Artifact 'krfricke/checkpoint_XGBoostTrainer_ac544_00000:6.0.0' has been fully uploaded successfully\n", + "COMET INFO: Artifact 'checkpoint_XGBoostTrainer_ac544_00000' version 8.0.0 created (previous was: 7.0.0)\n", + "COMET INFO: Scheduling the upload of 3 assets for a size of 12.84 KB, this can take some time\n", + "COMET INFO: Artifact 'krfricke/checkpoint_XGBoostTrainer_ac544_00000:8.0.0' has started uploading asynchronously\n", + "COMET WARNING: The given value of the metric episodes_total was None; ignoring\n", + "COMET WARNING: The given value of the metric timesteps_total was None; ignoring\n", + "COMET INFO: Artifact 'krfricke/checkpoint_XGBoostTrainer_ac544_00000:7.0.0' has been fully uploaded successfully\n", + "COMET INFO: Artifact 'checkpoint_XGBoostTrainer_ac544_00000' version 9.0.0 created (previous was: 8.0.0)\n", + "COMET INFO: Scheduling the upload of 3 assets for a size of 14.36 KB, this can take some time\n", + "COMET INFO: Artifact 'krfricke/checkpoint_XGBoostTrainer_ac544_00000:9.0.0' has started uploading asynchronously\n", + "COMET WARNING: The given value of the metric episodes_total was None; ignoring\n", + "COMET WARNING: The given value of the metric timesteps_total was None; ignoring\n", + "COMET INFO: Artifact 'krfricke/checkpoint_XGBoostTrainer_ac544_00000:8.0.0' has been fully uploaded successfully\n", + "COMET INFO: Artifact 'checkpoint_XGBoostTrainer_ac544_00000' version 10.0.0 created (previous was: 9.0.0)\n", + "COMET INFO: Scheduling the upload of 3 assets for a size of 16.37 KB, this can take some time\n", + "COMET INFO: Artifact 'krfricke/checkpoint_XGBoostTrainer_ac544_00000:10.0.0' has started uploading asynchronously\n", + "\u001b[2m\u001b[36m(GBDTTrainable pid=19852)\u001b[0m 2022-05-19 15:19:33,890\tINFO main.py:1519 -- [RayXGBoost] Finished XGBoost training on training data with total N=569 in 7.96 seconds (4.61 pure XGBoost training time).\n", + "COMET INFO: Artifact 'krfricke/checkpoint_XGBoostTrainer_ac544_00000:9.0.0' has been fully uploaded successfully\n", + "COMET INFO: Artifact 'checkpoint_XGBoostTrainer_ac544_00000' version 11.0.0 created (previous was: 10.0.0)\n", + "COMET INFO: Scheduling the upload of 3 assets for a size of 16.39 KB, this can take some time\n", + "COMET INFO: Artifact 'krfricke/checkpoint_XGBoostTrainer_ac544_00000:11.0.0' has started uploading asynchronously\n", + "COMET INFO: ---------------------------\n", + "COMET INFO: Comet.ml Experiment Summary\n", + "COMET INFO: ---------------------------\n", + "COMET INFO: Data:\n", + "COMET INFO: display_summary_level : 1\n", + "COMET INFO: url : https://www.comet.ml/krfricke/ray-air-example/ecd3726ca127497ba7386003a249fad6\n", + "COMET INFO: Metrics [count] (min, max):\n", + "COMET INFO: iterations_since_restore [10] : (1, 10)\n", + "COMET INFO: time_since_restore [10] : (6.529659032821655, 9.720295906066895)\n", + "COMET INFO: time_this_iter_s [10] : (0.3124058246612549, 6.529659032821655)\n", + "COMET INFO: time_total_s [10] : (6.529659032821655, 9.720295906066895)\n", + "COMET INFO: timestamp [10] : (1652969970, 1652969973)\n", + "COMET INFO: timesteps_since_restore : 0\n", + "COMET INFO: train-rmse [10] : (0.030717, 0.357284)\n", + "COMET INFO: training_iteration [10] : (1, 10)\n", + "COMET INFO: warmup_time : 0.003961086273193359\n", + "COMET INFO: Others:\n", + "COMET INFO: Created from : Ray\n", + "COMET INFO: Name : XGBoostTrainer_ac544_00000\n", + "COMET INFO: experiment_id : d3007bd6a2734b328fd90385485c5a8d\n", + "COMET INFO: trial_id : ac544_00000\n", + "COMET INFO: System Information:\n", + "COMET INFO: date : 2022-05-19_15-19-33\n", + "COMET INFO: hostname : Kais-MacBook-Pro.local\n", + "COMET INFO: node_ip : 127.0.0.1\n", + "COMET INFO: pid : 19852\n", + "COMET INFO: Uploads:\n", + "COMET INFO: artifact assets : 33 (107.92 KB)\n", + "COMET INFO: artifacts : 11\n", + "COMET INFO: environment details : 1\n", + "COMET INFO: filename : 1\n", + "COMET INFO: installed packages : 1\n", + "COMET INFO: notebook : 1\n", + "COMET INFO: source_code : 1\n", + "COMET INFO: ---------------------------\n", + "COMET INFO: Uploading metrics, params, and assets to Comet before program termination (may take several seconds)\n", + "COMET INFO: The Python SDK has 3600 seconds to finish before aborting...\n", + "COMET INFO: Waiting for completion of the file uploads (may take several seconds)\n", + "COMET INFO: The Python SDK has 10800 seconds to finish before aborting...\n", + "COMET INFO: Still uploading 6 file(s), remaining 21.05 KB/116.69 KB\n", + "COMET INFO: Artifact 'krfricke/checkpoint_XGBoostTrainer_ac544_00000:10.0.0' has been fully uploaded successfully\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "COMET INFO: Artifact 'krfricke/checkpoint_XGBoostTrainer_ac544_00000:11.0.0' has been fully uploaded successfully\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Result for XGBoostTrainer_ac544_00000:\n", + " date: 2022-05-19_15-19-33\n", + " done: true\n", + " experiment_id: d3007bd6a2734b328fd90385485c5a8d\n", + " experiment_tag: '0'\n", + " hostname: Kais-MacBook-Pro.local\n", + " iterations_since_restore: 10\n", + " node_ip: 127.0.0.1\n", + " pid: 19852\n", + " should_checkpoint: true\n", + " time_since_restore: 9.720295906066895\n", + " time_this_iter_s: 0.39761900901794434\n", + " time_total_s: 9.720295906066895\n", + " timestamp: 1652969973\n", + " timesteps_since_restore: 0\n", + " train-rmse: 0.030717\n", + " training_iteration: 10\n", + " trial_id: ac544_00000\n", + " warmup_time: 0.003961086273193359\n", + " \n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2022-05-19 15:19:35,621\tINFO tune.py:753 -- Total run time: 15.75 seconds (14.94 seconds for the tuning loop).\n" + ] + } + ], + "source": [ + "comet_project = \"ray_air_example\"\n", + "\n", + "train_dataset = get_train_dataset()\n", + "result = train_model(train_dataset=train_dataset, comet_project=comet_project)" + ] + }, + { + "cell_type": "markdown", + "id": "be28bdd3", + "metadata": {}, + "source": [ + "Check out your [Comet ML](https://www.comet.ml/) project to see the results!" + ] + } + ], + "metadata": { + "jupytext": { + "cell_metadata_filter": "-all", + "main_language": "python", + "notebook_metadata_filter": "-all" + }, + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.7" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/doc/source/ray-air/examples/upload_to_wandb.ipynb b/doc/source/ray-air/examples/upload_to_wandb.ipynb new file mode 100644 index 000000000..48e8769d1 --- /dev/null +++ b/doc/source/ray-air/examples/upload_to_wandb.ipynb @@ -0,0 +1,414 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "f37e8a9f", + "metadata": {}, + "source": [ + "# Logging results and uploading models to Weights & Biases\n", + "In this example, we train a simple XGBoost model and log the training\n", + "results to Weights & Biases. We also save the resulting model checkpoints\n", + "as artifacts." + ] + }, + { + "cell_type": "markdown", + "id": "27d04c97", + "metadata": {}, + "source": [ + "Let's start with installing our dependencies:" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "4e697e5d", + "metadata": {}, + "outputs": [], + "source": [ + "!pip install -qU \"ray[tune]\" sklearn xgboost_ray wandb" + ] + }, + { + "cell_type": "markdown", + "id": "3096e7c9", + "metadata": {}, + "source": [ + "Then we need some imports:" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "9c286701", + "metadata": {}, + "outputs": [], + "source": [ + "import ray\n", + "\n", + "from ray.ml import RunConfig\n", + "from ray.ml.result import Result\n", + "from ray.ml.train.integrations.xgboost import XGBoostTrainer\n", + "from ray.tune.integration.wandb import WandbLoggerCallback\n", + "from sklearn.datasets import load_breast_cancer" + ] + }, + { + "cell_type": "markdown", + "id": "2efa1564", + "metadata": {}, + "source": [ + "We define a simple function that returns our training dataset as a Ray Dataset:\n" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "a63ebd10", + "metadata": {}, + "outputs": [], + "source": [ + "def get_train_dataset() -> ray.data.Dataset:\n", + " \"\"\"Return the \"Breast cancer\" dataset as a Ray dataset.\"\"\"\n", + " data_raw = load_breast_cancer(as_frame=True)\n", + " df = data_raw[\"data\"]\n", + " df[\"target\"] = data_raw[\"target\"]\n", + " return ray.data.from_pandas(df)" + ] + }, + { + "cell_type": "markdown", + "id": "d07cf41f", + "metadata": {}, + "source": [ + "Now we define a simple training function. All the magic happens within the `WandbLoggerCallback`:\n", + "\n", + "```python\n", + "WandbLoggerCallback(\n", + " project=wandb_project,\n", + " save_checkpoints=True,\n", + ")\n", + "```\n", + "\n", + "It will automatically log all results to Weights & Biases and upload the checkpoints as artifacts. It assumes you're logged in into Wandb via an API key or `wandb login`." + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "52edfde0", + "metadata": {}, + "outputs": [], + "source": [ + "def train_model(train_dataset: ray.data.Dataset, wandb_project: str) -> Result:\n", + " \"\"\"Train a simple XGBoost model and return the result.\"\"\"\n", + " trainer = XGBoostTrainer(\n", + " scaling_config={\"num_workers\": 2},\n", + " params={\"tree_method\": \"auto\"},\n", + " label_column=\"target\",\n", + " datasets={\"train\": train_dataset},\n", + " num_boost_round=10,\n", + " run_config=RunConfig(\n", + " callbacks=[\n", + " # This is the part needed to enable logging to Weights & Biases.\n", + " # It assumes you've logged in before, e.g. with `wandb login`.\n", + " WandbLoggerCallback(\n", + " project=wandb_project,\n", + " save_checkpoints=True,\n", + " )\n", + " ]\n", + " ),\n", + " )\n", + " result = trainer.fit()\n", + " return result" + ] + }, + { + "cell_type": "markdown", + "id": "1959ce19", + "metadata": {}, + "source": [ + "Let's kick off a run:" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "64f80d6c", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2022-05-19 15:22:11,956\tINFO services.py:1483 -- View the Ray dashboard at \u001b[1m\u001b[32mhttp://127.0.0.1:8266\u001b[39m\u001b[22m\n", + "2022-05-19 15:22:15,995\tINFO wandb.py:172 -- Already logged into W&B.\n" + ] + }, + { + "data": { + "text/html": [ + "== Status ==
Current time: 2022-05-19 15:22:42 (running for 00:00:26.61)
Memory usage on this node: 10.2/16.0 GiB
Using FIFO scheduling algorithm.
Resources requested: 0/16 CPUs, 0/0 GPUs, 0.0/4.6 GiB heap, 0.0/2.0 GiB objects
Result logdir: /Users/kai/ray_results/XGBoostTrainer_2022-05-19_15-22-14
Number of trials: 1/1 (1 TERMINATED)
\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
Trial name status loc iter total time (s) train-rmse
XGBoostTrainer_14a73_00000TERMINATED127.0.0.1:20065 10 10.2724 0.030717


" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\u001b[2m\u001b[33m(raylet)\u001b[0m 2022-05-19 15:22:17,422\tINFO context.py:70 -- Exec'ing worker with command: exec /Users/kai/.pyenv/versions/3.7.7/bin/python3.7 /Users/kai/coding/ray/python/ray/workers/default_worker.py --node-ip-address=127.0.0.1 --node-manager-port=61838 --object-store-name=/tmp/ray/session_2022-05-19_15-22-09_017478_19912/sockets/plasma_store --raylet-name=/tmp/ray/session_2022-05-19_15-22-09_017478_19912/sockets/raylet --redis-address=None --storage=None --temp-dir=/tmp/ray --metrics-agent-port=63609 --logging-rotate-bytes=536870912 --logging-rotate-backup-count=5 --gcs-address=127.0.0.1:62933 --redis-password=5241590000000000 --startup-token=16 --runtime-env-hash=-2010331134\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Currently logged in as: \u001b[33mkaifricke\u001b[0m. Use \u001b[1m`wandb login --relogin`\u001b[0m to force relogin\n", + "\u001b[2m\u001b[36m(GBDTTrainable pid=20065)\u001b[0m UserWarning: Dataset 'train' has 1 blocks, which is less than the `num_workers` 2. This dataset will be automatically repartitioned to 2 blocks.\n", + "\u001b[2m\u001b[33m(raylet)\u001b[0m 2022-05-19 15:22:23,215\tINFO context.py:70 -- Exec'ing worker with command: exec /Users/kai/.pyenv/versions/3.7.7/bin/python3.7 /Users/kai/coding/ray/python/ray/workers/default_worker.py --node-ip-address=127.0.0.1 --node-manager-port=61838 --object-store-name=/tmp/ray/session_2022-05-19_15-22-09_017478_19912/sockets/plasma_store --raylet-name=/tmp/ray/session_2022-05-19_15-22-09_017478_19912/sockets/raylet --redis-address=None --storage=None --temp-dir=/tmp/ray --metrics-agent-port=63609 --logging-rotate-bytes=536870912 --logging-rotate-backup-count=5 --gcs-address=127.0.0.1:62933 --redis-password=5241590000000000 --startup-token=17 --runtime-env-hash=-2010331069\n" + ] + }, + { + "data": { + "text/html": [ + "Tracking run with wandb version 0.12.16" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "Run data is saved locally in /Users/kai/coding/ray/doc/source/ray-air/examples/wandb/run-20220519_152218-14a73_00000" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "Syncing run XGBoostTrainer_14a73_00000 to Weights & Biases (docs)
" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\u001b[2m\u001b[36m(GBDTTrainable pid=20065)\u001b[0m 2022-05-19 15:22:24,711\tINFO main.py:980 -- [RayXGBoost] Created 2 new actors (2 total actors). Waiting until actors are ready for training.\n", + "\u001b[2m\u001b[33m(raylet)\u001b[0m 2022-05-19 15:22:26,090\tINFO context.py:70 -- Exec'ing worker with command: exec /Users/kai/.pyenv/versions/3.7.7/bin/python3.7 /Users/kai/coding/ray/python/ray/workers/default_worker.py --node-ip-address=127.0.0.1 --node-manager-port=61838 --object-store-name=/tmp/ray/session_2022-05-19_15-22-09_017478_19912/sockets/plasma_store --raylet-name=/tmp/ray/session_2022-05-19_15-22-09_017478_19912/sockets/raylet --redis-address=None --storage=None --temp-dir=/tmp/ray --metrics-agent-port=63609 --logging-rotate-bytes=536870912 --logging-rotate-backup-count=5 --gcs-address=127.0.0.1:62933 --redis-password=5241590000000000 --startup-token=18 --runtime-env-hash=-2010331069\n", + "\u001b[2m\u001b[33m(raylet)\u001b[0m 2022-05-19 15:22:26,234\tINFO context.py:70 -- Exec'ing worker with command: exec /Users/kai/.pyenv/versions/3.7.7/bin/python3.7 /Users/kai/coding/ray/python/ray/workers/default_worker.py --node-ip-address=127.0.0.1 --node-manager-port=61838 --object-store-name=/tmp/ray/session_2022-05-19_15-22-09_017478_19912/sockets/plasma_store --raylet-name=/tmp/ray/session_2022-05-19_15-22-09_017478_19912/sockets/raylet --redis-address=None --storage=None --temp-dir=/tmp/ray --metrics-agent-port=63609 --logging-rotate-bytes=536870912 --logging-rotate-backup-count=5 --gcs-address=127.0.0.1:62933 --redis-password=5241590000000000 --startup-token=19 --runtime-env-hash=-2010331134\n", + "\u001b[2m\u001b[33m(raylet)\u001b[0m 2022-05-19 15:22:26,236\tINFO context.py:70 -- Exec'ing worker with command: exec /Users/kai/.pyenv/versions/3.7.7/bin/python3.7 /Users/kai/coding/ray/python/ray/workers/default_worker.py --node-ip-address=127.0.0.1 --node-manager-port=61838 --object-store-name=/tmp/ray/session_2022-05-19_15-22-09_017478_19912/sockets/plasma_store --raylet-name=/tmp/ray/session_2022-05-19_15-22-09_017478_19912/sockets/raylet --redis-address=None --storage=None --temp-dir=/tmp/ray --metrics-agent-port=63609 --logging-rotate-bytes=536870912 --logging-rotate-backup-count=5 --gcs-address=127.0.0.1:62933 --redis-password=5241590000000000 --startup-token=20 --runtime-env-hash=-2010331134\n", + "\u001b[2m\u001b[33m(raylet)\u001b[0m 2022-05-19 15:22:26,239\tINFO context.py:70 -- Exec'ing worker with command: exec /Users/kai/.pyenv/versions/3.7.7/bin/python3.7 /Users/kai/coding/ray/python/ray/workers/default_worker.py --node-ip-address=127.0.0.1 --node-manager-port=61838 --object-store-name=/tmp/ray/session_2022-05-19_15-22-09_017478_19912/sockets/plasma_store --raylet-name=/tmp/ray/session_2022-05-19_15-22-09_017478_19912/sockets/raylet --redis-address=None --storage=None --temp-dir=/tmp/ray --metrics-agent-port=63609 --logging-rotate-bytes=536870912 --logging-rotate-backup-count=5 --gcs-address=127.0.0.1:62933 --redis-password=5241590000000000 --startup-token=21 --runtime-env-hash=-2010331134\n", + "\u001b[2m\u001b[33m(raylet)\u001b[0m 2022-05-19 15:22:26,263\tINFO context.py:70 -- Exec'ing worker with command: exec /Users/kai/.pyenv/versions/3.7.7/bin/python3.7 /Users/kai/coding/ray/python/ray/workers/default_worker.py --node-ip-address=127.0.0.1 --node-manager-port=61838 --object-store-name=/tmp/ray/session_2022-05-19_15-22-09_017478_19912/sockets/plasma_store --raylet-name=/tmp/ray/session_2022-05-19_15-22-09_017478_19912/sockets/raylet --redis-address=None --storage=None --temp-dir=/tmp/ray --metrics-agent-port=63609 --logging-rotate-bytes=536870912 --logging-rotate-backup-count=5 --gcs-address=127.0.0.1:62933 --redis-password=5241590000000000 --startup-token=22 --runtime-env-hash=-2010331134\n", + "\u001b[2m\u001b[36m(GBDTTrainable pid=20065)\u001b[0m 2022-05-19 15:22:29,260\tINFO main.py:1025 -- [RayXGBoost] Starting XGBoost training.\n", + "\u001b[2m\u001b[36m(_RemoteRayXGBoostActor pid=20130)\u001b[0m [15:22:29] task [xgboost.ray]:6859875216 got new rank 0\n", + "\u001b[2m\u001b[36m(_RemoteRayXGBoostActor pid=20131)\u001b[0m [15:22:29] task [xgboost.ray]:4625795280 got new rank 1\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Adding directory to artifact (/Users/kai/ray_results/XGBoostTrainer_2022-05-19_15-22-14/XGBoostTrainer_14a73_00000_0_2022-05-19_15-22-16/checkpoint_000000)... Done. 0.1s\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Result for XGBoostTrainer_14a73_00000:\n", + " date: 2022-05-19_15-22-31\n", + " done: false\n", + " experiment_id: 2d50bfe80d2a441e80f4ca05f7c3b607\n", + " hostname: Kais-MacBook-Pro.local\n", + " iterations_since_restore: 1\n", + " node_ip: 127.0.0.1\n", + " pid: 20065\n", + " should_checkpoint: true\n", + " time_since_restore: 10.080440044403076\n", + " time_this_iter_s: 10.080440044403076\n", + " time_total_s: 10.080440044403076\n", + " timestamp: 1652970151\n", + " timesteps_since_restore: 0\n", + " train-rmse: 0.357284\n", + " training_iteration: 1\n", + " trial_id: 14a73_00000\n", + " warmup_time: 0.006903171539306641\n", + " \n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: Adding directory to artifact (/Users/kai/ray_results/XGBoostTrainer_2022-05-19_15-22-14/XGBoostTrainer_14a73_00000_0_2022-05-19_15-22-16/checkpoint_000001)... Done. 0.1s\n", + "\u001b[2m\u001b[36m(GBDTTrainable pid=20065)\u001b[0m 2022-05-19 15:22:32,051\tINFO main.py:1519 -- [RayXGBoost] Finished XGBoost training on training data with total N=569 in 7.37 seconds (2.79 pure XGBoost training time).\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Adding directory to artifact (/Users/kai/ray_results/XGBoostTrainer_2022-05-19_15-22-14/XGBoostTrainer_14a73_00000_0_2022-05-19_15-22-16/checkpoint_000002)... Done. 0.1s\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Adding directory to artifact (/Users/kai/ray_results/XGBoostTrainer_2022-05-19_15-22-14/XGBoostTrainer_14a73_00000_0_2022-05-19_15-22-16/checkpoint_000003)... Done. 0.1s\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Adding directory to artifact (/Users/kai/ray_results/XGBoostTrainer_2022-05-19_15-22-14/XGBoostTrainer_14a73_00000_0_2022-05-19_15-22-16/checkpoint_000004)... Done. 0.1s\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Adding directory to artifact (/Users/kai/ray_results/XGBoostTrainer_2022-05-19_15-22-14/XGBoostTrainer_14a73_00000_0_2022-05-19_15-22-16/checkpoint_000005)... Done. 0.1s\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Adding directory to artifact (/Users/kai/ray_results/XGBoostTrainer_2022-05-19_15-22-14/XGBoostTrainer_14a73_00000_0_2022-05-19_15-22-16/checkpoint_000006)... Done. 0.1s\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Adding directory to artifact (/Users/kai/ray_results/XGBoostTrainer_2022-05-19_15-22-14/XGBoostTrainer_14a73_00000_0_2022-05-19_15-22-16/checkpoint_000007)... Done. 0.1s\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Adding directory to artifact (/Users/kai/ray_results/XGBoostTrainer_2022-05-19_15-22-14/XGBoostTrainer_14a73_00000_0_2022-05-19_15-22-16/checkpoint_000008)... Done. 0.1s\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Adding directory to artifact (/Users/kai/ray_results/XGBoostTrainer_2022-05-19_15-22-14/XGBoostTrainer_14a73_00000_0_2022-05-19_15-22-16/checkpoint_000009)... Done. 0.1s\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Adding directory to artifact (/Users/kai/ray_results/XGBoostTrainer_2022-05-19_15-22-14/XGBoostTrainer_14a73_00000_0_2022-05-19_15-22-16/checkpoint_000009)... Done. 0.1s\n" + ] + }, + { + "data": { + "text/html": [ + "Waiting for W&B process to finish... (success)." + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Result for XGBoostTrainer_14a73_00000:\n", + " date: 2022-05-19_15-22-32\n", + " done: true\n", + " experiment_id: 2d50bfe80d2a441e80f4ca05f7c3b607\n", + " experiment_tag: '0'\n", + " hostname: Kais-MacBook-Pro.local\n", + " iterations_since_restore: 10\n", + " node_ip: 127.0.0.1\n", + " pid: 20065\n", + " should_checkpoint: true\n", + " time_since_restore: 10.272444248199463\n", + " time_this_iter_s: 0.023891210556030273\n", + " time_total_s: 10.272444248199463\n", + " timestamp: 1652970152\n", + " timesteps_since_restore: 0\n", + " train-rmse: 0.030717\n", + " training_iteration: 10\n", + " trial_id: 14a73_00000\n", + " warmup_time: 0.006903171539306641\n", + " \n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2022-05-19 15:22:42,727\tINFO tune.py:753 -- Total run time: 27.83 seconds (26.61 seconds for the tuning loop).\n" + ] + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "VBox(children=(Label(value='0.090 MB of 0.090 MB uploaded (0.000 MB deduped)\\r'), FloatProgress(value=1.0, max…" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "\n", + "

Run history:


iterations_since_restore▁▂▃▃▄▅▆▆▇█
time_since_restore▁▂▃▃▄▅▅▆▇█
time_this_iter_s█▁▁▁▁▁▁▁▁▁
time_total_s▁▂▃▃▄▅▅▆▇█
timestamp▁▁▁▁▁▁▁▁██
timesteps_since_restore▁▁▁▁▁▁▁▁▁▁
train-rmse█▆▄▃▂▂▂▁▁▁
training_iteration▁▂▃▃▄▅▆▆▇█
warmup_time▁▁▁▁▁▁▁▁▁▁

Run summary:


iterations_since_restore10
time_since_restore10.27244
time_this_iter_s0.02389
time_total_s10.27244
timestamp1652970152
timesteps_since_restore0
train-rmse0.03072
training_iteration10
warmup_time0.0069

" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "Synced XGBoostTrainer_14a73_00000: https://wandb.ai/kaifricke/ray_air_example/runs/14a73_00000
Synced 5 W&B file(s), 0 media file(s), 21 artifact file(s) and 0 other file(s)" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "Find logs at: ./wandb/run-20220519_152218-14a73_00000/logs" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "wandb_project = \"ray_air_example\"\n", + "\n", + "train_dataset = get_train_dataset()\n", + "result = train_model(train_dataset=train_dataset, wandb_project=wandb_project)" + ] + }, + { + "cell_type": "markdown", + "id": "78701c42", + "metadata": {}, + "source": [ + "Check out your [WandB](https://wandb.ai/) project to see the results!" + ] + } + ], + "metadata": { + "jupytext": { + "cell_metadata_filter": "-all", + "main_language": "python", + "notebook_metadata_filter": "-all" + }, + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.7" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/python/ray/ml/BUILD b/python/ray/ml/BUILD index a46d1f9cd..2b2460568 100644 --- a/python/ray/ml/BUILD +++ b/python/ray/ml/BUILD @@ -147,22 +147,6 @@ py_test( args = ["--smoke-test"] ) -py_test( - name = "upload_to_comet_ml", - size = "medium", - srcs = ["examples/upload_to_comet_ml.py"], - tags = ["team:ml", "exclusive", "needs_credentials"], - deps = [":ml_lib"] -) - -py_test( - name = "upload_to_wandb", - size = "medium", - srcs = ["examples/upload_to_wandb.py"], - tags = ["team:ml", "exclusive", "needs_credentials"], - deps = [":ml_lib"] -) - # -------------------------------------------------------------------- # Tests from the python/ray/ml/tests directory. # Covers all tests starting with `test_`. diff --git a/python/ray/ml/examples/upload_to_comet_ml.ipynb b/python/ray/ml/examples/upload_to_comet_ml.ipynb new file mode 120000 index 000000000..9c6606ee1 --- /dev/null +++ b/python/ray/ml/examples/upload_to_comet_ml.ipynb @@ -0,0 +1 @@ +../../../../doc/source/ray-air/examples/upload_to_comet_ml.ipynb \ No newline at end of file diff --git a/python/ray/ml/examples/upload_to_comet_ml.py b/python/ray/ml/examples/upload_to_comet_ml.py deleted file mode 100644 index e78b89d6e..000000000 --- a/python/ray/ml/examples/upload_to_comet_ml.py +++ /dev/null @@ -1,50 +0,0 @@ -""" -In this example, we train a simple XGBoost model and log the training -results to Comet ML. We also save the resulting model checkpoints -as artifacts. -""" -import ray - -from ray.ml import RunConfig -from ray.ml.result import Result -from ray.ml.train.integrations.xgboost import XGBoostTrainer -from ray.tune.integration.comet import CometLoggerCallback -from sklearn.datasets import load_breast_cancer - - -def get_train_dataset() -> ray.data.Dataset: - """Return the "Breast cancer" dataset as a Ray dataset.""" - data_raw = load_breast_cancer(as_frame=True) - df = data_raw["data"] - df["target"] = data_raw["target"] - return ray.data.from_pandas(df) - - -def train_model(train_dataset: ray.data.Dataset, comet_project: str) -> Result: - """Train a simple XGBoost model and return the result.""" - trainer = XGBoostTrainer( - scaling_config={"num_workers": 2}, - params={"tree_method": "auto"}, - label_column="target", - datasets={"train": train_dataset}, - num_boost_round=10, - run_config=RunConfig( - callbacks=[ - # This is the part needed to enable logging to Comet ML. - # It assumes Comet ML can find a valid API (e.g. by setting - # the ``COMET_API_KEY`` environment variable). - CometLoggerCallback( - project_name=comet_project, - save_checkpoints=True, - ) - ] - ), - ) - result = trainer.fit() - return result - - -comet_project = "ray_air_example" - -train_dataset = get_train_dataset() -result = train_model(train_dataset=train_dataset, comet_project=comet_project) diff --git a/python/ray/ml/examples/upload_to_wandb.ipynb b/python/ray/ml/examples/upload_to_wandb.ipynb new file mode 120000 index 000000000..e241f6fcb --- /dev/null +++ b/python/ray/ml/examples/upload_to_wandb.ipynb @@ -0,0 +1 @@ +../../../../doc/source/ray-air/examples/upload_to_wandb.ipynb \ No newline at end of file diff --git a/python/ray/ml/examples/upload_to_wandb.py b/python/ray/ml/examples/upload_to_wandb.py deleted file mode 100644 index 32e306a36..000000000 --- a/python/ray/ml/examples/upload_to_wandb.py +++ /dev/null @@ -1,49 +0,0 @@ -""" -In this example, we train a simple XGBoost model and log the training -results to Weights & Biases. We also save the resulting model checkpoints -as artifacts. -""" -import ray - -from ray.ml import RunConfig -from ray.ml.result import Result -from ray.ml.train.integrations.xgboost import XGBoostTrainer -from ray.tune.integration.wandb import WandbLoggerCallback -from sklearn.datasets import load_breast_cancer - - -def get_train_dataset() -> ray.data.Dataset: - """Return the "Breast cancer" dataset as a Ray dataset.""" - data_raw = load_breast_cancer(as_frame=True) - df = data_raw["data"] - df["target"] = data_raw["target"] - return ray.data.from_pandas(df) - - -def train_model(train_dataset: ray.data.Dataset, wandb_project: str) -> Result: - """Train a simple XGBoost model and return the result.""" - trainer = XGBoostTrainer( - scaling_config={"num_workers": 2}, - params={"tree_method": "auto"}, - label_column="target", - datasets={"train": train_dataset}, - num_boost_round=10, - run_config=RunConfig( - callbacks=[ - # This is the part needed to enable logging to Weights & Biases. - # It assumes you've logged in before, e.g. with `wandb login`. - WandbLoggerCallback( - project=wandb_project, - save_checkpoints=True, - ) - ] - ), - ) - result = trainer.fit() - return result - - -wandb_project = "ray_air_example" - -train_dataset = get_train_dataset() -result = train_model(train_dataset=train_dataset, wandb_project=wandb_project)