[air] Add initial benchmark section (#26608)

This commit is contained in:
Richard Liaw 2022-07-15 15:33:48 -07:00 committed by GitHub
parent 964bc90e09
commit 5ad4e75831
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
4 changed files with 122 additions and 14 deletions

View file

@ -23,6 +23,7 @@ parts:
- file: ray-air/examples/serving_guide
- file: ray-air/deployment
- file: ray-air/use-pretrained-model
- file: ray-air/benchmarks
- file: ray-air/examples/index
sections:
- file: ray-air/examples/torch_image_example

View file

@ -0,0 +1,81 @@
AIR Benchmarks
==============
Below we document key performance benchmarks for common AIR tasks and workflows.
XGBoost Batch Prediction
------------------------
This task uses the BatchPredictor module to process different amounts of data
using an XGBoost model.
We test out the performance across different cluster sizes and data sizes.
- `XGBoost Prediction Script`_
- `XGBoost Cluster configuration`_
.. TODO: Add script for generating data and running the benchmark.
.. list-table::
* - **Cluster Setup**
- **# workers**
- **Data Size**
- **# of rows**
- **Time taken**
- **Throughput**
- **Command**
* - 1 m5.4xlarge
- 1 actor
- 10 GB
- 26M rows
- 275 s
- 94.5k rows/sec
- `python xgboost_benchmark.py --size 10GB`
* - 10 m5.4xlarge nodes
- 10 actors (12 CPUs each)
- 100 GB
- 260M rows
- 331 s
- 786k rows/sec
- `python xgboost_benchmark.py --size 100GB`
XGBoost training
----------------
This task uses the XGBoostTrainer module to train on different sizes of data
with different amounts of parallelism.
XGBoost parameters were kept as defaults for xgboost==1.6.1 this task.
- `XGBoost Training Script`_
- `XGBoost Cluster configuration`_
.. list-table::
* - **Cluster Setup**
- **# workers**
- **Data Size**
- **# of rows**
- **Time taken**
- **Command**
* - 1 m5.4xlarge
- 1 actor
- 10 GB
- 26M rows
- 692 s
- `python xgboost_benchmark.py --size 10GB`
* - 10 m5.4xlarge nodes
- 10 actors (12 CPUs each)
- 100 GB
- 260M rows
- 693 s
- `python xgboost_benchmark.py --size 100GB`
.. _`XGBoost Training Script`: https://github.com/ray-project/ray/blob/a241e6a0f5a630d6ed5b84cce30c51963834d15b/release/air_tests/air_benchmarks/workloads/xgboost_benchmark.py#L40-L58
.. _`XGBoost Prediction Script`: https://github.com/ray-project/ray/blob/a241e6a0f5a630d6ed5b84cce30c51963834d15b/release/air_tests/air_benchmarks/workloads/xgboost_benchmark.py#L63-L71
.. _`XGBoost Cluster configuration`: https://github.com/ray-project/ray/blob/a241e6a0f5a630d6ed5b84cce30c51963834d15b/release/air_tests/air_benchmarks/xgboost_compute_tpl.yaml#L6-L24

View file

@ -75,4 +75,13 @@ AIR Feature Guides
.. link-button:: use-pretrained-model-ref
:type: ref
:text: How to use a pretrained model for inference?
:classes: btn-link btn-block stretched-link
:classes: btn-link btn-block stretched-link
---
:img-top: /ray-overview/images/ray_svg_logo.svg
+++
.. link-button:: benchmarks
:type: ref
:text: Ray AIR benchmarks
:classes: btn-link btn-block stretched-link

View file

@ -19,6 +19,17 @@ _XGB_MODEL_PATH = "model.json"
_TRAINING_TIME_THRESHOLD = 1000
_PREDICTION_TIME_THRESHOLD = 450
_EXPERIMENT_PARAMS = {
"10G": {
"data": "s3://air-example-data-2/10G-xgboost-data.parquet/",
"num_workers": 1,
},
"100G": {
"data": "s3://air-example-data-2/100G-xgboost-data.parquet/",
"num_workers": 10,
},
}
def run_and_time_it(f):
"""Runs f in a separate process and time it."""
@ -37,17 +48,18 @@ def run_and_time_it(f):
@run_and_time_it
def run_xgboost_training():
ds = data.read_parquet(
"s3://air-example-data-2/100G-xgboost-data.parquet/"
) # silver tier
def run_xgboost_training(data_path: str, num_workers: int):
ds = data.read_parquet(data_path)
params = {
"objective": "binary:logistic",
"eval_metric": ["logloss", "error"],
}
trainer = XGBoostTrainer(
scaling_config={"num_workers": 10, "resources_per_worker": {"CPU": 12}},
scaling_config={
"num_workers": num_workers,
"resources_per_worker": {"CPU": 12},
},
label_column="labels",
params=params,
datasets={"train": ds},
@ -59,23 +71,23 @@ def run_xgboost_training():
@run_and_time_it
def run_xgboost_prediction(model_path: str):
def run_xgboost_prediction(model_path: str, data_path: str):
model = xgb.Booster()
model.load_model(model_path)
ds = data.read_parquet(
"s3://air-example-data-2/100G-xgboost-data.parquet/"
) # silver tier
ds = data.read_parquet(data_path)
ckpt = to_air_checkpoint(".", model)
batch_predictor = BatchPredictor.from_checkpoint(ckpt, XGBoostPredictor)
result = batch_predictor.predict(ds.drop_columns(["labels"]))
return result
def main():
def main(args):
experiment_params = _EXPERIMENT_PARAMS[args.size]
data_path, num_workers = experiment_params["data"], experiment_params["num_workers"]
print("Running xgboost training benchmark...")
training_time = run_xgboost_training()
training_time = run_xgboost_training(data_path, num_workers)
print("Running xgboost prediction benchmark...")
prediction_time = run_xgboost_prediction(_XGB_MODEL_PATH)
prediction_time = run_xgboost_prediction(_XGB_MODEL_PATH, data_path)
result = {
"training_time": training_time,
"prediction_time": prediction_time,
@ -99,4 +111,9 @@ def main():
if __name__ == "__main__":
main()
import argparse
parser = argparse.ArgumentParser()
parser.add_argument("--size", type=str, choices=["10G", "100G"], default="100G")
args = parser.parse_args()
main(args)