mirror of
https://github.com/vale981/ray
synced 2025-03-05 10:01:43 -05:00

This PR adds concurrency groups to Buildkite release test runs with new release test package. Five concurrency groups are defined (large-gpu, small-gpu, large, medium, small). If not specified manually, concurrency groups are inferred from used cluster resources. Example pipeline: https://buildkite.com/ray-project/release-tests-branch/builds/55#09109eac-d22e-43bc-889e-078cfb037373 (click on Artifacts --> pipeline.json)
45 lines
1.1 KiB
Python
45 lines
1.1 KiB
Python
import csv
|
|
import sys
|
|
from typing import List, Tuple, Dict
|
|
|
|
import boto3
|
|
|
|
|
|
def get_aws_instance_information() -> List[Dict[str, Tuple[int, int]]]:
|
|
rows = []
|
|
client = boto3.client("ec2")
|
|
|
|
args = {}
|
|
while True:
|
|
result = client.describe_instance_types(**args)
|
|
|
|
for instance in result["InstanceTypes"]:
|
|
num_cpus = instance["VCpuInfo"]["DefaultVCpus"]
|
|
num_gpus = sum(
|
|
gpu["Count"] for gpu in instance.get("GpuInfo", {"Gpus": []})["Gpus"]
|
|
)
|
|
rows.append(
|
|
{
|
|
"instance": instance["InstanceType"],
|
|
"cpus": num_cpus,
|
|
"gpus": num_gpus,
|
|
}
|
|
)
|
|
|
|
if "NextToken" not in result:
|
|
break
|
|
|
|
args["NextToken"] = result["NextToken"]
|
|
|
|
return rows
|
|
|
|
|
|
if __name__ == "__main__":
|
|
rows = []
|
|
|
|
rows += get_aws_instance_information()
|
|
|
|
writer = csv.DictWriter(fieldnames=["instance", "cpus", "gpus"], f=sys.stdout)
|
|
writer.writeheader()
|
|
for row in sorted(rows, key=lambda item: item["instance"]):
|
|
writer.writerow(row)
|