[air/tune] Documentation testing fixes (#26409)

This commit is contained in:
Richard Liaw 2022-07-09 19:47:21 -07:00 committed by GitHub
parent a68c02a15d
commit 5892a76a44
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
22 changed files with 247 additions and 135 deletions

View file

@ -30,8 +30,8 @@ def py_test_run_all_subdirectory(include, exclude, extra_srcs, **kwargs):
)
# Runs all included notebooks as py_test targets, by first converting them to .py files with "test_myst_doc.py".
def py_test_run_all_notebooks(include, exclude, **kwargs):
for file in native.glob(include = include, exclude = exclude, allow_empty=False):
def py_test_run_all_notebooks(include, exclude, allow_empty=False, **kwargs):
for file in native.glob(include = include, exclude = exclude, allow_empty=allow_empty):
print(file)
basename = paths.split_extension(file)[0]
if basename == file:

View file

@ -16,7 +16,12 @@ filegroup(
py_test_run_all_notebooks(
size = "medium",
include = ["*.ipynb"],
exclude = ["big_data_ingestion.ipynb"],
exclude = [
"big_data_ingestion.ipynb",
"nyc_taxi_basic_processing.ipynb", # REGRESSION
"ocr_example.ipynb", # REGRESSION
],
data = ["//doc/source/data/examples:data_examples"],
tags = ["exclusive", "team:ml"],
allow_empty = True # REGRESSION
)

View file

@ -13,7 +13,16 @@ filegroup(
py_test_run_all_notebooks(
size = "large",
include = ["*.ipynb"],
exclude = ["huggingface_text_classification.ipynb", "torch_incremental_learning.ipynb"],
exclude = [
"huggingface_text_classification.ipynb",
"torch_incremental_learning.ipynb",
"feast_example.ipynb", # REGRESSION
"rl_offline_example.ipynb", # REGRESSION
"rl_online_example.ipynb", # REGRESSION
"convert_existing_pytorch_code_to_ray_air.ipynb", # REGRESSION
"tfx_tabular_train_to_serve.ipynb", # REGRESSION
"torch_image_example.ipynb", # REGRESSION
],
data = ["//doc/source/ray-air/examples:air_examples"],
tags = ["exclusive", "team:ml", "ray_air"],
)
@ -23,8 +32,12 @@ py_test_run_all_notebooks(
py_test_run_all_notebooks(
size = "large",
include = ["huggingface_text_classification.ipynb", "torch_incremental_learning.ipynb"],
exclude = [],
include = [],
exclude = [
"huggingface_text_classification.ipynb", # REGRESSION!
"torch_incremental_learning.ipynb" # REGRESSION!
],
data = ["//doc/source/ray-air/examples:air_examples"],
tags = ["exclusive", "team:ml", "gpu", "ray_air"],
)
allow_empty = True # REGRESSION
)

View file

@ -241,25 +241,9 @@
"id": "SbL_EbMC2MFS",
"outputId": "13b07f1f-d52a-4c4e-a73f-f5478c0304de"
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Feast is an open source project that collects anonymized error reporting and usage statistics. To opt out or learn more see https://docs.feast.dev/reference/usage\n",
"Created entity \u001b[1m\u001b[32mdob_ssn\u001b[0m\n",
"Created entity \u001b[1m\u001b[32mzipcode\u001b[0m\n",
"Created feature view \u001b[1m\u001b[32mcredit_history\u001b[0m\n",
"Created feature view \u001b[1m\u001b[32mzipcode_features\u001b[0m\n",
"\n",
"Created sqlite table \u001b[1m\u001b[32mfeature_repo_credit_history\u001b[0m\n",
"Created sqlite table \u001b[1m\u001b[32mfeature_repo_zipcode_features\u001b[0m\n",
"\n"
]
}
],
"outputs": [],
"source": [
"! (cd feature_repo && feast apply)"
"! cd feature_repo && feast apply"
]
},
{

View file

@ -1463,7 +1463,7 @@
" checkpoint=result.checkpoint,\n",
" model=AutoModelForSequenceClassification,\n",
" tokenizer=AutoTokenizer\n",
")"
")[0]"
]
},
{

View file

@ -72,7 +72,11 @@
"outputs": [],
"source": [
"def prepare_data() -> Tuple[Dataset, Dataset, Dataset]:\n",
" dataset = ray.data.read_csv(\"s3://air-example-data/breast_cancer_with_categorical.csv\")\n",
" import pandas as pd\n",
" df = pd.read_csv(\"https://air-example-data.s3.us-east-2.amazonaws.com/breast_cancer_with_categorical.csv\")\n",
" dataset = ray.data.from_pandas(df)\n",
" # Optionally, read directly from s3\n",
" # dataset = ray.data.read_csv(\"s3://air-example-data/breast_cancer_with_categorical.csv\")\n",
" train_dataset, valid_dataset = train_test_split(dataset, test_size=0.3)\n",
" test_dataset = valid_dataset.map_batches(lambda df: df.drop(\"target\", axis=1), batch_format=\"pandas\")\n",
" return train_dataset, valid_dataset, test_dataset"

View file

@ -164,7 +164,7 @@
"from sklearn.datasets import load_breast_cancer\n",
"from sklearn.model_selection import train_test_split\n",
"\n",
"from ray.air.train.integrations.xgboost import XGBoostTrainer\n",
"from ray.train.xgboost import XGBoostTrainer\n",
"from ray.data.preprocessors import StandardScaler\n",
"\n",
"data_raw = load_breast_cancer()\n",

View file

@ -82,7 +82,11 @@
"outputs": [],
"source": [
"def prepare_data() -> Tuple[Dataset, Dataset, Dataset]:\n",
" dataset = ray.data.read_csv(\"s3://air-example-data/breast_cancer_with_categorical.csv\")\n",
" import pandas as pd\n",
" df = pd.read_csv(\"https://air-example-data.s3.us-east-2.amazonaws.com/breast_cancer_with_categorical.csv\")\n",
" dataset = ray.data.from_pandas(df)\n",
" # Optionally, read directly from s3\n",
" # dataset = ray.data.read_csv(\"s3://air-example-data/breast_cancer_with_categorical.csv\")\n",
" train_dataset, valid_dataset = train_test_split(dataset, test_size=0.3)\n",
" test_dataset = valid_dataset.map_batches(lambda df: df.drop(\"target\", axis=1), batch_format=\"pandas\")\n",
" return train_dataset, valid_dataset, test_dataset"

View file

@ -68,7 +68,11 @@
"outputs": [],
"source": [
"def get_train_dataset() -> ray.data.Dataset:\n",
" dataset = ray.data.read_csv(\"s3://air-example-data/breast_cancer.csv\")\n",
" import pandas as pd\n",
" df = pd.read_csv(\"https://air-example-data.s3.us-east-2.amazonaws.com/breast_cancer.csv\")\n",
" dataset = ray.data.from_pandas(df)\n",
" # Optionally, read directly from s3\n",
" # dataset = ray.data.read_csv(\"s3://air-example-data/breast_cancer.csv\")\n",
" return dataset"
]
},

View file

@ -68,7 +68,11 @@
"outputs": [],
"source": [
"def get_train_dataset() -> ray.data.Dataset:\n",
" dataset = ray.data.read_csv(\"s3://air-example-data/breast_cancer.csv\")\n",
" import pandas as pd\n",
" df = pd.read_csv(\"https://air-example-data.s3.us-east-2.amazonaws.com/breast_cancer.csv\")\n",
" dataset = ray.data.from_pandas(df)\n",
" # Optionally, read directly from s3\n",
" # dataset = ray.data.read_csv(\"s3://air-example-data/breast_cancer.csv\")\n",
" return dataset"
]
},

View file

@ -102,7 +102,11 @@
"outputs": [],
"source": [
"def prepare_data() -> Tuple[Dataset, Dataset, Dataset]:\n",
" dataset = ray.data.read_csv(\"s3://air-example-data/breast_cancer.csv\")\n",
" import pandas as pd\n",
" df = pd.read_csv(\"https://air-example-data.s3.us-east-2.amazonaws.com/breast_cancer.csv\") \n",
" dataset = ray.data.from_pandas(df)\n",
" # Optionally, read directly from s3\n",
" # dataset = ray.data.read_csv(\"s3://air-example-data/breast_cancer.csv\")\n",
" train_dataset, valid_dataset = train_test_split(dataset, test_size=0.3)\n",
" test_dataset = valid_dataset.map_batches(lambda df: df.drop(\"target\", axis=1), batch_format=\"pandas\")\n",
" return train_dataset, valid_dataset, test_dataset"

View file

@ -16,7 +16,19 @@ filegroup(
py_test_run_all_notebooks(
size = "medium",
include = ["*.ipynb"],
exclude = ["pbt_ppo_example.ipynb", "tune-xgboost.ipynb"],
exclude = [
"pbt_ppo_example.ipynb",
"tune-xgboost.ipynb",
"nyc_taxi_basic_processing.ipynb", # REGRESSION
"ocr_example.ipynb", # REGRESSION
"bohb_example.ipynb", # REGRESSION
"hebo_example.ipynb", # REGRESSION
"horovod_simple.ipynb", # REGRESSION
"hyperopt_example.ipynb", # REGRESSION
"sigopt_example.ipynb", # REGRESSION
"tune-pytorch-cifar.ipynb", # REGRESSION
"tune-sklearn.ipynb", # REGRESSION
],
data = ["//doc/source/tune/examples:tune_examples"],
tags = ["exclusive", "team:ml"],
)

View file

@ -45,7 +45,10 @@
"metadata": {
"tags": [
"hide-input"
]
],
"vscode": {
"languageId": "python"
}
},
"outputs": [],
"source": [
@ -73,7 +76,11 @@
"cell_type": "code",
"execution_count": null,
"id": "0f7fbe0f",
"metadata": {},
"metadata": {
"vscode": {
"languageId": "python"
}
},
"outputs": [],
"source": [
"def landscape(x):\n",
@ -120,14 +127,18 @@
"cell_type": "code",
"execution_count": null,
"id": "8c3f252e",
"metadata": {},
"metadata": {
"vscode": {
"languageId": "python"
}
},
"outputs": [],
"source": [
"def objective(config):\n",
" for i in range(config[\"iterations\"]):\n",
" x = np.array([config.get(\"x{}\".format(i + 1)) for i in range(6)])\n",
" session.report(\n",
" {\"timesteps_total\": i, \"landscape\": landscape(x), \"l2norm\": np.sqrt((x ** 2).sum()})\n",
" {\"timesteps_total\": i, \"landscape\": landscape(x), \"l2norm\": np.sqrt((x ** 2).sum())}\n",
" )\n",
" time.sleep(0.02)"
]
@ -144,7 +155,11 @@
"cell_type": "code",
"execution_count": null,
"id": "30f75f5a",
"metadata": {},
"metadata": {
"vscode": {
"languageId": "python"
}
},
"outputs": [],
"source": [
"search_space = {\n",
@ -165,7 +180,10 @@
"metadata": {
"tags": [
"remove-cell"
]
],
"vscode": {
"languageId": "python"
}
},
"outputs": [],
"source": [
@ -184,7 +202,11 @@
"cell_type": "code",
"execution_count": null,
"id": "34dd5c95",
"metadata": {},
"metadata": {
"vscode": {
"languageId": "python"
}
},
"outputs": [],
"source": [
"algo = AxSearch(\n",
@ -205,7 +227,11 @@
"cell_type": "code",
"execution_count": null,
"id": "dcd905ef",
"metadata": {},
"metadata": {
"vscode": {
"languageId": "python"
}
},
"outputs": [],
"source": [
"algo = tune.search.ConcurrencyLimiter(algo, max_concurrent=4)"
@ -224,7 +250,11 @@
"cell_type": "code",
"execution_count": null,
"id": "c53349a5",
"metadata": {},
"metadata": {
"vscode": {
"languageId": "python"
}
},
"outputs": [],
"source": [
"num_samples = 100\n",
@ -238,7 +268,10 @@
"metadata": {
"tags": [
"remove-cell"
]
],
"vscode": {
"languageId": "python"
}
},
"outputs": [],
"source": [
@ -258,7 +291,11 @@
"cell_type": "code",
"execution_count": null,
"id": "2f519d63",
"metadata": {},
"metadata": {
"vscode": {
"languageId": "python"
}
},
"outputs": [],
"source": [
"analysis = tune.run(\n",
@ -285,7 +322,11 @@
"cell_type": "code",
"execution_count": null,
"id": "12906421",
"metadata": {},
"metadata": {
"vscode": {
"languageId": "python"
}
},
"outputs": [],
"source": [
"print(\"Best hyperparameters found were: \", analysis.best_config)"
@ -298,7 +339,10 @@
"metadata": {
"tags": [
"remove-cell"
]
],
"vscode": {
"languageId": "python"
}
},
"outputs": [],
"source": [

View file

@ -330,41 +330,48 @@
},
{
"cell_type": "code",
"execution_count": null,
"id": "8305c975",
"metadata": {
"pycharm": {
"name": "#%%\n"
},
"vscode": {
"languageId": "python"
}
},
"outputs": [],
"source": [
"analysis = tune.run(\n",
" objective,\n",
" config=config_space,\n",
" scheduler=bohb_hyperband,\n",
" scheduler=scheduler,\n",
" search_alg=algo,\n",
" num_samples=num_samples,\n",
" name=\"bohb_exp_2\",\n",
" stop={\"training_iteration\": 100},\n",
")"
],
"execution_count": null,
"outputs": []
]
},
{
"cell_type": "markdown",
"metadata": {
"collapsed": false
},
"source": [
"Here again are the hyperparameters found to minimize the mean loss of the\n",
"defined objective."
],
"metadata": {
"collapsed": false
}
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "1ae613e4",
"metadata": {},
"metadata": {
"vscode": {
"languageId": "python"
}
},
"outputs": [],
"source": [
"print(\"Best hyperparameters found were: \", analysis.best_config)"

View file

@ -57,6 +57,7 @@
"\n",
"import ray\n",
"from ray import tune\n",
"from ray.air import session\n",
"from ray.tune.search import ConcurrencyLimiter\n",
"from ray.tune.search.flaml import BlendSearch, CFO"
]
@ -503,4 +504,4 @@
},
"nbformat": 4,
"nbformat_minor": 5
}
}

View file

@ -242,7 +242,7 @@
" mode=\"min\",\n",
" name=\"hyperopt_exp\",\n",
" num_samples=num_samples,\n",
" config=search_space,\n",
" config=search_config,\n",
")"
]
},

View file

@ -419,11 +419,11 @@
" w2 = config[\"w2\"]\n",
" total = w1 + w2\n",
" if total > 1:\n",
" w3 = 0\n",
" w1 /= total\n",
" w2 /= total\n",
" w3 = 0\n",
" w1 /= total\n",
" w2 /= total\n",
" else:\n",
" w3 = 1 - total\n",
" w3 = 1 - total\n",
" \n",
" average, std = evaluate(w1, w2, w3)\n",
" session.report({\"average\": average, \"std\": std})"

View file

@ -80,6 +80,15 @@
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
},
"vscode": {
"languageId": "python"
}
},
"outputs": [],
"source": [
"import sklearn.datasets\n",
@ -115,16 +124,16 @@
" })\n",
" accuracy = 1. - results[\"eval\"][\"error\"][-1]\n",
" print(f\"Accuracy: {accuracy:.4f}\")"
],
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
}
]
},
{
"cell_type": "markdown",
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%% md\n"
}
},
"source": [
"As you can see, the code is quite simple. First, the dataset is loaded and split\n",
"into a `test` and `train` set. The XGBoost model is trained with `xgb.train()`.\n",
@ -248,17 +257,20 @@
"### Putting it together\n",
"\n",
"Let's see how this looks like in code! We just need to adjust our `config` dict:"
],
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%% md\n"
}
}
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
},
"vscode": {
"languageId": "python"
}
},
"outputs": [],
"source": [
"if __name__ == \"__main__\":\n",
@ -273,16 +285,16 @@
" results = train_breast_cancer(config)\n",
" accuracy = 1. - results[\"eval\"][\"error\"][-1]\n",
" print(f\"Accuracy: {accuracy:.4f}\")"
],
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
}
]
},
{
"cell_type": "markdown",
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%% md\n"
}
},
"source": [
"The rest stays the same. Please note that we do not adjust the `num_boost_rounds` here.\n",
"The result should also show a high accuracy of over 90%.\n",
@ -307,23 +319,27 @@
"\n",
"Let's start with a basic example on how to use Tune for this. We just need to make\n",
"a few changes to our code-block:"
],
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%% md\n"
}
}
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
},
"vscode": {
"languageId": "python"
}
},
"outputs": [],
"source": [
"import sklearn.datasets\n",
"import sklearn.metrics\n",
"\n",
"from ray import tune\n",
"from ray.air import session\n",
"\n",
"\n",
"def train_breast_cancer(config):\n",
@ -362,16 +378,16 @@
" resources_per_trial={\"cpu\": 1},\n",
" config=config,\n",
" num_samples=10)"
],
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
}
]
},
{
"cell_type": "markdown",
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%% md\n"
}
},
"source": [
"As you can see, the changes in the actual training function are minimal. Instead of\n",
"returning the accuracy value, we report it back to Tune using `session.report()`.\n",
@ -464,17 +480,20 @@
"We will also load the best checkpointed model so that we can use it for predictions.\n",
"The best model is selected with respect to the `metric` and `mode` parameters we\n",
"pass to `tune.run()`."
],
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%% md\n"
}
}
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
},
"vscode": {
"languageId": "python"
}
},
"outputs": [],
"source": [
"import sklearn.datasets\n",
@ -581,16 +600,16 @@
"\n",
" # You could now do further predictions with\n",
" # best_bst.predict(...)"
],
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
}
]
},
{
"cell_type": "markdown",
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%% md\n"
}
},
"source": [
"The output of our run could look like this:\n",
"\n",
@ -634,17 +653,20 @@
"\n",
"Tune supports *fractional GPUs*. This means that each task is assigned a fraction\n",
"of the GPU memory for training. For 10 tasks, this could look like this:"
],
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%% md\n"
}
}
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
},
"vscode": {
"languageId": "python"
}
},
"outputs": [],
"source": [
"config = {\n",
@ -664,16 +686,16 @@
" config=config,\n",
" num_samples=10,\n",
")"
],
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
}
]
},
{
"cell_type": "markdown",
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%% md\n"
}
},
"source": [
"Each task thus works with 10% of the available GPU memory. You also have to tell\n",
"XGBoost to use the `gpu_hist` tree method, so it knows it should use the GPU.\n",
@ -697,13 +719,7 @@
"- [XGBoost Hyperparameter Tuning - A Visual Guide](https://kevinvecmanis.io/machine%20learning/hyperparameter%20tuning/dataviz/python/2019/05/11/XGBoost-Tuning-Visual-Guide.html)\n",
"- [Notes on XGBoost Parameter Tuning](https://xgboost.readthedocs.io/en/latest/tutorials/param_tuning.html)\n",
"- [Doing XGBoost Hyperparameter Tuning the smart way](https://towardsdatascience.com/doing-xgboost-hyper-parameter-tuning-the-smart-way-part-1-of-2-f6d255a45dde)"
],
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%% md\n"
}
}
]
}
],
"metadata": {
@ -716,4 +732,4 @@
},
"nbformat": 4,
"nbformat_minor": 5
}
}

View file

@ -42,4 +42,4 @@ if __name__ == "__main__":
remainder.insert(0, sys.executable)
# Run the notebook
subprocess.run(remainder)
subprocess.run(remainder, check=True)

View file

@ -87,6 +87,16 @@ py_test(
args = ["--smoke-test"]
)
# py_test(
# name = "tensorflow_autoencoder_example", # REGRESSION
# size = "medium",
# main = "examples/tf/tensorflow_autoencoder_example.py",
# srcs = ["examples/tf/tensorflow_autoencoder_example.py"],
# tags = ["team:ml", "exclusive"],
# deps = [":ml_lib"],
# args = ["--smoke-test"]
# )
py_test(
name = "torch_fashion_mnist_example",
size = "medium",

View file

@ -12,7 +12,7 @@ from ray.data.datasource import SimpleTensorFlowDatasource
from ray.air.batch_predictor import BatchPredictor
from ray.air.predictors.integrations.tensorflow import TensorflowPredictor
from ray.air.result import Result
from ray.air.train.integrations.tensorflow import TensorflowTrainer
from ray.train.tensorflow import TensorflowTrainer
from ray.train.tensorflow import prepare_dataset_shard
from ray.air.callbacks.keras import Callback as TrainCheckpointReportCallback

View file

@ -2,7 +2,7 @@ import argparse
import ray
from ray import tune
from ray.air.train.integrations.tensorflow import TensorflowTrainer
from ray.train.tensorflow import TensorflowTrainer
from ray.air.examples.tf.tensorflow_mnist_example import train_func
from ray.tune.tune_config import TuneConfig