mirror of
https://github.com/vale981/ray
synced 2025-03-04 17:41:43 -05:00

Adds a unit-tested and restructured ray_release package for running release tests. Relevant changes in behavior: Per default, Buildkite will wait for the wheels of the current commit to be available. Alternatively, users can a) specify a different commit hash, b) a wheels URL (which we will also wait for to be available) or c) specify a branch (or user/branch combination), in which case the latest available wheels will be used (e.g. if master is passed, behavior matches old default behavior). The main subpackages are: Cluster manager: Creates cluster envs/computes, starts cluster, terminates cluster Command runner: Runs commands, e.g. as client command or sdk command File manager: Uploads/downloads files to/from session Reporter: Reports results (e.g. to database) Much of the code base is unit tested, but there are probably some pieces missing. Example build (waited for wheels to be built): https://buildkite.com/ray-project/kf-dev/builds/51#_ Wheel build: https://buildkite.com/ray-project/ray-builders-branch/builds/6023
137 lines
2.8 KiB
Python
137 lines
2.8 KiB
Python
from ray_release.result import ExitCode
|
|
|
|
|
|
class ReleaseTestError(RuntimeError):
|
|
exit_code = ExitCode.UNSPECIFIED
|
|
|
|
|
|
class ReleaseTestPackageError(ReleaseTestError):
|
|
pass
|
|
|
|
|
|
class ReleaseTestConfigError(ReleaseTestPackageError):
|
|
exit_code = ExitCode.CONFIG_ERROR
|
|
|
|
|
|
class ReleaseTestCLIError(ReleaseTestPackageError):
|
|
exit_code = ExitCode.CLI_ERROR
|
|
|
|
|
|
class ReleaseTestSetupError(ReleaseTestPackageError):
|
|
exit_code = ExitCode.SETUP_ERROR
|
|
|
|
|
|
class RayWheelsError(ReleaseTestError):
|
|
exit_code = ExitCode.CLI_ERROR
|
|
|
|
|
|
class RayWheelsUnspecifiedError(RayWheelsError):
|
|
exit_code = ExitCode.CLI_ERROR
|
|
|
|
|
|
class RayWheelsNotFoundError(RayWheelsError):
|
|
exit_code = ExitCode.CLI_ERROR
|
|
|
|
|
|
class RayWheelsTimeoutError(RayWheelsError):
|
|
exit_code = ExitCode.RAY_WHEELS_TIMEOUT
|
|
|
|
|
|
class ClusterManagerError(ReleaseTestError):
|
|
exit_code = ExitCode.CLUSTER_RESOURCE_ERROR
|
|
|
|
|
|
class ClusterEnvCreateError(ClusterManagerError):
|
|
exit_code = ExitCode.CLUSTER_RESOURCE_ERROR
|
|
|
|
|
|
class ClusterEnvBuildError(ClusterManagerError):
|
|
exit_code = ExitCode.CLUSTER_ENV_BUILD_ERROR
|
|
|
|
|
|
class ClusterEnvBuildTimeout(ClusterManagerError):
|
|
exit_code = ExitCode.CLUSTER_ENV_BUILD_TIMEOUT
|
|
|
|
|
|
class ClusterComputeCreateError(ClusterManagerError):
|
|
exit_code = ExitCode.CLUSTER_RESOURCE_ERROR
|
|
|
|
|
|
class ClusterCreationError(ClusterManagerError):
|
|
exit_code = ExitCode.CLUSTER_RESOURCE_ERROR
|
|
|
|
|
|
class ClusterStartupError(ClusterManagerError):
|
|
exit_code = ExitCode.CLUSTER_STARTUP_ERROR
|
|
|
|
|
|
class ClusterStartupTimeout(ClusterManagerError):
|
|
exit_code = ExitCode.CLUSTER_STARTUP_TIMEOUT
|
|
|
|
|
|
class ClusterStartupFailed(ClusterManagerError):
|
|
exit_code = ExitCode.CLUSTER_STARTUP_ERROR
|
|
|
|
|
|
class EnvironmentSetupError(ReleaseTestError):
|
|
exit_code = ExitCode.CLUSTER_STARTUP_ERROR
|
|
|
|
|
|
class LocalEnvSetupError(EnvironmentSetupError):
|
|
exit_code = ExitCode.LOCAL_ENV_SETUP_ERROR
|
|
|
|
|
|
class RemoteEnvSetupError(EnvironmentSetupError):
|
|
exit_code = ExitCode.REMOTE_ENV_SETUP_ERROR
|
|
|
|
|
|
class FileManagerError(ReleaseTestError):
|
|
pass
|
|
|
|
|
|
class FileUploadError(FileManagerError):
|
|
pass
|
|
|
|
|
|
class FileDownloadError(FileManagerError):
|
|
pass
|
|
|
|
|
|
class ClusterNodesWaitTimeout(ReleaseTestError):
|
|
exit_code = ExitCode.CLUSTER_WAIT_TIMEOUT
|
|
|
|
|
|
class CommandTimeout(ReleaseTestError):
|
|
exit_code = ExitCode.COMMAND_TIMEOUT
|
|
|
|
|
|
class PrepareCommandTimeout(CommandTimeout):
|
|
exit_code = ExitCode.CLUSTER_WAIT_TIMEOUT
|
|
|
|
|
|
class TestCommandTimeout(CommandTimeout):
|
|
exit_code = ExitCode.COMMAND_TIMEOUT
|
|
|
|
|
|
class CommandError(ReleaseTestError):
|
|
exit_code = ExitCode.COMMAND_ERROR
|
|
|
|
|
|
class PrepareCommandError(CommandError):
|
|
exit_code = ExitCode.PREPARE_ERROR
|
|
|
|
|
|
class TestCommandError(CommandError):
|
|
exit_code = ExitCode.COMMAND_ERROR
|
|
|
|
|
|
class ResultsError(CommandError):
|
|
pass
|
|
|
|
|
|
class LogsError(CommandError):
|
|
pass
|
|
|
|
|
|
class ResultsAlert(CommandError):
|
|
exit_code = ExitCode.COMMAND_ALERT
|