2022-02-16 17:35:02 +00:00
|
|
|
import abc
|
|
|
|
import time
|
|
|
|
from typing import Dict, Any, Optional
|
|
|
|
|
|
|
|
from anyscale.sdk.anyscale_client.sdk import AnyscaleSDK
|
|
|
|
|
|
|
|
from ray_release.anyscale_util import get_project_name
|
|
|
|
from ray_release.util import dict_hash, get_anyscale_sdk, anyscale_cluster_url
|
2022-03-11 08:03:50 +09:00
|
|
|
from ray_release.config import DEFAULT_AUTOSUSPEND_MINS
|
2022-02-16 17:35:02 +00:00
|
|
|
|
|
|
|
|
|
|
|
class ClusterManager(abc.ABC):
|
|
|
|
def __init__(
|
|
|
|
self, test_name: str, project_id: str, sdk: Optional[AnyscaleSDK] = None
|
|
|
|
):
|
|
|
|
self.sdk = sdk or get_anyscale_sdk()
|
|
|
|
|
|
|
|
self.test_name = test_name
|
|
|
|
self.project_id = project_id
|
|
|
|
self.project_name = get_project_name(self.project_id, self.sdk)
|
|
|
|
|
|
|
|
self.cluster_name = f"{test_name}_{int(time.time())}"
|
|
|
|
self.cluster_id = None
|
|
|
|
|
|
|
|
self.cluster_env = None
|
|
|
|
self.cluster_env_name = None
|
|
|
|
self.cluster_env_id = None
|
|
|
|
self.cluster_env_build_id = None
|
|
|
|
|
|
|
|
self.cluster_compute = None
|
|
|
|
self.cluster_compute_name = None
|
|
|
|
self.cluster_compute_id = None
|
|
|
|
|
2022-03-11 08:03:50 +09:00
|
|
|
self.autosuspend_minutes = DEFAULT_AUTOSUSPEND_MINS
|
2022-02-16 17:35:02 +00:00
|
|
|
|
|
|
|
def set_cluster_env(self, cluster_env: Dict[str, Any]):
|
|
|
|
self.cluster_env = cluster_env
|
2022-02-28 21:05:01 +01:00
|
|
|
|
|
|
|
# Add flags for redisless Ray
|
|
|
|
self.cluster_env.setdefault("env_vars", {})
|
|
|
|
self.cluster_env["env_vars"]["MATCH_AUTOSCALER_AND_RAY_IMAGES"] = "1"
|
|
|
|
self.cluster_env["env_vars"]["RAY_gcs_storage"] = "memory"
|
2022-03-30 09:28:33 -07:00
|
|
|
self.cluster_env["env_vars"]["RAY_bootstrap_with_gcs"] = "1"
|
2022-03-08 04:48:43 +09:00
|
|
|
self.cluster_env["env_vars"]["RAY_USAGE_STATS_ENABLED"] = "1"
|
|
|
|
self.cluster_env["env_vars"]["RAY_USAGE_STATS_SOURCE"] = "nightly-tests"
|
2022-02-28 21:05:01 +01:00
|
|
|
|
2022-02-16 17:35:02 +00:00
|
|
|
self.cluster_env_name = (
|
|
|
|
f"{self.project_name}_{self.project_id[4:8]}"
|
|
|
|
f"__env__{self.test_name}__"
|
2022-03-30 09:28:33 -07:00
|
|
|
f"{dict_hash(self.cluster_env)}"
|
2022-02-16 17:35:02 +00:00
|
|
|
)
|
|
|
|
|
|
|
|
def set_cluster_compute(self, cluster_compute: Dict[str, Any]):
|
|
|
|
self.cluster_compute = cluster_compute
|
|
|
|
self.cluster_compute_name = (
|
|
|
|
f"{self.project_name}_{self.project_id[4:8]}"
|
|
|
|
f"__compute__{self.test_name}__"
|
|
|
|
f"{dict_hash(cluster_compute)}"
|
|
|
|
)
|
|
|
|
|
|
|
|
def build_configs(self, timeout: float = 30.0):
|
|
|
|
raise NotImplementedError
|
|
|
|
|
|
|
|
def delete_configs(self):
|
|
|
|
raise NotImplementedError
|
|
|
|
|
|
|
|
def start_cluster(self, timeout: float = 600.0):
|
|
|
|
raise NotImplementedError
|
|
|
|
|
|
|
|
def terminate_cluster(self):
|
|
|
|
raise NotImplementedError
|
|
|
|
|
2022-03-11 16:31:21 +00:00
|
|
|
def get_cluster_address(self) -> str:
|
2022-02-16 17:35:02 +00:00
|
|
|
raise NotImplementedError
|
|
|
|
|
|
|
|
def get_cluster_url(self) -> Optional[str]:
|
|
|
|
if not self.project_id or not self.cluster_id:
|
|
|
|
return None
|
|
|
|
return anyscale_cluster_url(self.project_id, self.cluster_id)
|