2021-05-31 15:28:02 -07:00
|
|
|
#
|
|
|
|
# Single node shuffle
|
|
|
|
#
|
|
|
|
|
2021-05-24 15:33:31 -07:00
|
|
|
# Test basic single node 10GB shuffle with a small number of partitions.
|
|
|
|
# This doesn't require object spilling.
|
2021-05-08 14:21:55 -07:00
|
|
|
- name: shuffle_10gb
|
|
|
|
owner:
|
|
|
|
mail: "ekl@anyscale.com"
|
|
|
|
slack: "@proj-data-processing"
|
|
|
|
|
|
|
|
cluster:
|
|
|
|
app_config: shuffle/shuffle_app_config.yaml
|
2021-05-24 15:33:31 -07:00
|
|
|
compute_template: shuffle/shuffle_compute_single.yaml
|
2021-05-08 14:21:55 -07:00
|
|
|
|
|
|
|
run:
|
2021-05-24 15:33:31 -07:00
|
|
|
timeout: 3000
|
|
|
|
script: python shuffle/shuffle_test.py --num-partitions=50 --partition-size=200e6
|
2021-05-08 14:21:55 -07:00
|
|
|
|
|
|
|
smoke_test:
|
|
|
|
cluster:
|
|
|
|
compute_template: shuffle/shuffle_compute_smoke.yaml # Does not exist yet
|
2021-05-24 15:33:31 -07:00
|
|
|
|
|
|
|
# Test single node 50GB shuffle with a large number of partitions.
|
|
|
|
- name: shuffle_50gb
|
|
|
|
owner:
|
|
|
|
mail: "sangcho@anyscale.com"
|
|
|
|
slack: "@proj-data-processing"
|
|
|
|
|
|
|
|
cluster:
|
|
|
|
app_config: shuffle/shuffle_app_config.yaml
|
|
|
|
compute_template: shuffle/shuffle_compute_single.yaml
|
|
|
|
|
|
|
|
run:
|
|
|
|
timeout: 3000
|
|
|
|
script: python shuffle/shuffle_test.py --num-partitions=50 --partition-size=1e9
|
|
|
|
|
|
|
|
smoke_test:
|
|
|
|
cluster:
|
|
|
|
compute_template: shuffle/shuffle_compute_smoke.yaml # Does not exist yet
|
|
|
|
|
|
|
|
# Test single node 50GB shuffle with a large number of partitions.
|
|
|
|
- name: shuffle_50gb_large_partition
|
|
|
|
owner:
|
|
|
|
mail: "sangcho@anyscale.com"
|
|
|
|
slack: "@proj-data-processing"
|
|
|
|
|
|
|
|
cluster:
|
|
|
|
app_config: shuffle/shuffle_app_config.yaml
|
|
|
|
compute_template: shuffle/shuffle_compute_single.yaml
|
|
|
|
|
|
|
|
run:
|
|
|
|
timeout: 3000
|
|
|
|
script: python shuffle/shuffle_test.py --num-partitions=500 --partition-size=100e6
|
|
|
|
|
|
|
|
smoke_test:
|
|
|
|
cluster:
|
|
|
|
compute_template: shuffle/shuffle_compute_smoke.yaml # Does not exist yet
|
|
|
|
|
2021-05-31 15:28:02 -07:00
|
|
|
# Test non streaming shuffle in a single node with a small number of partition.
|
|
|
|
- name: non_streaming_shuffle_50gb
|
|
|
|
owner:
|
|
|
|
mail: "sangcho@anyscale.com"
|
|
|
|
slack: "@proj-data-processing"
|
|
|
|
|
|
|
|
cluster:
|
|
|
|
app_config: shuffle/shuffle_app_config.yaml
|
|
|
|
compute_template: shuffle/shuffle_compute_single.yaml
|
|
|
|
|
|
|
|
run:
|
|
|
|
timeout: 3000
|
|
|
|
script: python shuffle/shuffle_test.py --num-partitions=50 --partition-size=1e9 --no-streaming
|
|
|
|
|
|
|
|
smoke_test:
|
|
|
|
cluster:
|
|
|
|
compute_template: shuffle/shuffle_compute_smoke.yaml # Does not exist yet
|
|
|
|
|
|
|
|
# Test non streaming shuffle in a single node with a large number of partition.
|
|
|
|
- name: non_streaming_shuffle_50gb_large_partition
|
|
|
|
owner:
|
|
|
|
mail: "sangcho@anyscale.com"
|
|
|
|
slack: "@proj-data-processing"
|
|
|
|
|
|
|
|
cluster:
|
|
|
|
app_config: shuffle/shuffle_app_config.yaml
|
|
|
|
compute_template: shuffle/shuffle_compute_single.yaml
|
|
|
|
|
|
|
|
run:
|
|
|
|
timeout: 3000
|
|
|
|
script: python shuffle/shuffle_test.py --num-partitions=500 --partition-size=100e6 --no-streaming
|
|
|
|
|
|
|
|
smoke_test:
|
|
|
|
cluster:
|
|
|
|
compute_template: shuffle/shuffle_compute_smoke.yaml # Does not exist yet
|
|
|
|
|
|
|
|
#
|
|
|
|
# Multi node shuffle
|
|
|
|
#
|
|
|
|
|
2021-05-24 15:33:31 -07:00
|
|
|
# Test multi nodes 100GB shuffle with a small number of partitions.
|
|
|
|
- name: shuffle_100gb
|
|
|
|
owner:
|
|
|
|
mail: "sangcho@anyscale.com"
|
|
|
|
slack: "@proj-data-processing"
|
|
|
|
|
|
|
|
cluster:
|
|
|
|
app_config: shuffle/shuffle_app_config.yaml
|
|
|
|
compute_template: shuffle/shuffle_compute_multi.yaml
|
|
|
|
|
|
|
|
run:
|
|
|
|
timeout: 3000
|
|
|
|
prepare: python wait_cluster.py 4 600
|
|
|
|
script: python shuffle/shuffle_test.py --num-partitions=200 --partition-size=500e6
|
|
|
|
|
|
|
|
smoke_test:
|
|
|
|
cluster:
|
|
|
|
compute_template: shuffle/shuffle_compute_smoke.yaml # Does not exist yet
|
|
|
|
|
2021-05-31 15:28:02 -07:00
|
|
|
# Test non streaming multi nodes 100GB shuffle with a small number of partitions.
|
|
|
|
- name: non_streaming_shuffle_100gb
|
|
|
|
owner:
|
|
|
|
mail: "sangcho@anyscale.com"
|
|
|
|
slack: "@proj-data-processing"
|
|
|
|
|
|
|
|
cluster:
|
|
|
|
app_config: shuffle/shuffle_app_config.yaml
|
|
|
|
compute_template: shuffle/shuffle_compute_multi.yaml
|
|
|
|
|
|
|
|
run:
|
|
|
|
timeout: 3000
|
|
|
|
prepare: python wait_cluster.py 4 600
|
|
|
|
script: python shuffle/shuffle_test.py --num-partitions=200 --partition-size=500e6 --no-streaming
|
|
|
|
|
|
|
|
smoke_test:
|
|
|
|
cluster:
|
|
|
|
compute_template: shuffle/shuffle_compute_smoke.yaml # Does not exist yet
|
|
|
|
|
2021-05-24 15:33:31 -07:00
|
|
|
# Test multi nodes 100GB shuffle with a large number of partitions.
|
|
|
|
# TODO(sang): Not working due to a bug https://github.com/ray-project/ray/issues/16025.
|
|
|
|
# - name: shuffle_100gb_large_partition
|
|
|
|
# owner:
|
|
|
|
# mail: "sangcho@anyscale.com"
|
|
|
|
# slack: "@proj-data-processing"
|
|
|
|
|
|
|
|
# cluster:
|
|
|
|
# app_config: shuffle/shuffle_app_config.yaml
|
|
|
|
# compute_template: shuffle/shuffle_compute_multi.yaml
|
|
|
|
|
|
|
|
# run:
|
|
|
|
# timeout: 3000
|
|
|
|
# prepare: python wait_cluster.py 4 600
|
|
|
|
# script: python shuffle/shuffle_test.py --num-partitions=1000 --partition-size=100e6
|
|
|
|
|
|
|
|
# smoke_test:
|
|
|
|
# cluster:
|
|
|
|
# compute_template: shuffle/shuffle_compute_smoke.yaml # Does not exist yet
|