mirror of
https://github.com/vale981/ray
synced 2025-03-06 02:21:39 -05:00
[Nightly test] Test non streaming shuffle (#16150)
This commit is contained in:
parent
45d2331d5a
commit
9fa3b9f6f3
4 changed files with 77 additions and 8 deletions
|
@ -1,3 +1,7 @@
|
|||
#
|
||||
# Single node shuffle
|
||||
#
|
||||
|
||||
# Test basic single node 10GB shuffle with a small number of partitions.
|
||||
# This doesn't require object spilling.
|
||||
- name: shuffle_10gb
|
||||
|
@ -53,6 +57,46 @@
|
|||
cluster:
|
||||
compute_template: shuffle/shuffle_compute_smoke.yaml # Does not exist yet
|
||||
|
||||
# Test non streaming shuffle in a single node with a small number of partition.
|
||||
- name: non_streaming_shuffle_50gb
|
||||
owner:
|
||||
mail: "sangcho@anyscale.com"
|
||||
slack: "@proj-data-processing"
|
||||
|
||||
cluster:
|
||||
app_config: shuffle/shuffle_app_config.yaml
|
||||
compute_template: shuffle/shuffle_compute_single.yaml
|
||||
|
||||
run:
|
||||
timeout: 3000
|
||||
script: python shuffle/shuffle_test.py --num-partitions=50 --partition-size=1e9 --no-streaming
|
||||
|
||||
smoke_test:
|
||||
cluster:
|
||||
compute_template: shuffle/shuffle_compute_smoke.yaml # Does not exist yet
|
||||
|
||||
# Test non streaming shuffle in a single node with a large number of partition.
|
||||
- name: non_streaming_shuffle_50gb_large_partition
|
||||
owner:
|
||||
mail: "sangcho@anyscale.com"
|
||||
slack: "@proj-data-processing"
|
||||
|
||||
cluster:
|
||||
app_config: shuffle/shuffle_app_config.yaml
|
||||
compute_template: shuffle/shuffle_compute_single.yaml
|
||||
|
||||
run:
|
||||
timeout: 3000
|
||||
script: python shuffle/shuffle_test.py --num-partitions=500 --partition-size=100e6 --no-streaming
|
||||
|
||||
smoke_test:
|
||||
cluster:
|
||||
compute_template: shuffle/shuffle_compute_smoke.yaml # Does not exist yet
|
||||
|
||||
#
|
||||
# Multi node shuffle
|
||||
#
|
||||
|
||||
# Test multi nodes 100GB shuffle with a small number of partitions.
|
||||
- name: shuffle_100gb
|
||||
owner:
|
||||
|
@ -72,6 +116,25 @@
|
|||
cluster:
|
||||
compute_template: shuffle/shuffle_compute_smoke.yaml # Does not exist yet
|
||||
|
||||
# Test non streaming multi nodes 100GB shuffle with a small number of partitions.
|
||||
- name: non_streaming_shuffle_100gb
|
||||
owner:
|
||||
mail: "sangcho@anyscale.com"
|
||||
slack: "@proj-data-processing"
|
||||
|
||||
cluster:
|
||||
app_config: shuffle/shuffle_app_config.yaml
|
||||
compute_template: shuffle/shuffle_compute_multi.yaml
|
||||
|
||||
run:
|
||||
timeout: 3000
|
||||
prepare: python wait_cluster.py 4 600
|
||||
script: python shuffle/shuffle_test.py --num-partitions=200 --partition-size=500e6 --no-streaming
|
||||
|
||||
smoke_test:
|
||||
cluster:
|
||||
compute_template: shuffle/shuffle_compute_smoke.yaml # Does not exist yet
|
||||
|
||||
# Test multi nodes 100GB shuffle with a large number of partitions.
|
||||
# TODO(sang): Not working due to a bug https://github.com/ray-project/ray/issues/16025.
|
||||
# - name: shuffle_100gb_large_partition
|
||||
|
|
|
@ -10,7 +10,7 @@ head_node_type:
|
|||
|
||||
worker_node_types:
|
||||
- name: worker_node
|
||||
instance_type: i3.8xlarge
|
||||
instance_type: i3.4xlarge
|
||||
min_workers: 3
|
||||
max_workers: 3
|
||||
use_spot: false
|
||||
|
|
|
@ -9,7 +9,7 @@ head_node_type:
|
|||
instance_type: i3.2xlarge
|
||||
|
||||
worker_node_types:
|
||||
- name: worker_node
|
||||
- name: worker_node4
|
||||
instance_type: i3.2xlarge
|
||||
min_workers: 0
|
||||
max_workers: 0
|
||||
|
|
|
@ -13,17 +13,23 @@ if __name__ == "__main__":
|
|||
help="number of reducer actors used",
|
||||
default="200e6",
|
||||
type=str)
|
||||
parser.add_argument(
|
||||
"--no-streaming", help="Non streaming shuffle", action="store_true")
|
||||
args = parser.parse_args()
|
||||
|
||||
start = time.time()
|
||||
success = 1
|
||||
commands = [
|
||||
"python", "-m", "ray.experimental.shuffle", "--ray-address={}".format(
|
||||
os.environ["RAY_ADDRESS"]),
|
||||
f"--num-partitions={args.num_partitions}",
|
||||
f"--partition-size={args.partition_size}"
|
||||
]
|
||||
if args.no_streaming:
|
||||
commands.append("--no-streaming")
|
||||
|
||||
try:
|
||||
subprocess.check_call([
|
||||
"python", "-m", "ray.experimental.shuffle",
|
||||
"--ray-address={}".format(os.environ["RAY_ADDRESS"]),
|
||||
f"--num-partitions={args.num_partitions}",
|
||||
f"--partition-size={args.partition_size}"
|
||||
])
|
||||
subprocess.check_call(commands)
|
||||
except Exception as e:
|
||||
print(f"The test failed with {e}")
|
||||
success = 0
|
||||
|
|
Loading…
Add table
Reference in a new issue