ray/release/ray_release/exception.py
Kai Fricke 331b71ea8d
[ci/release] Refactor release test e2e into package (#22351)
Adds a unit-tested and restructured ray_release package for running release tests.

Relevant changes in behavior:

Per default, Buildkite will wait for the wheels of the current commit to be available. Alternatively, users can a) specify a different commit hash, b) a wheels URL (which we will also wait for to be available) or c) specify a branch (or user/branch combination), in which case the latest available wheels will be used (e.g. if master is passed, behavior matches old default behavior).

The main subpackages are:

    Cluster manager: Creates cluster envs/computes, starts cluster, terminates cluster
    Command runner: Runs commands, e.g. as client command or sdk command
    File manager: Uploads/downloads files to/from session
    Reporter: Reports results (e.g. to database)

Much of the code base is unit tested, but there are probably some pieces missing.

Example build (waited for wheels to be built): https://buildkite.com/ray-project/kf-dev/builds/51#_
Wheel build: https://buildkite.com/ray-project/ray-builders-branch/builds/6023
2022-02-16 17:35:02 +00:00

137 lines
2.8 KiB
Python

from ray_release.result import ExitCode
class ReleaseTestError(RuntimeError):
exit_code = ExitCode.UNSPECIFIED
class ReleaseTestPackageError(ReleaseTestError):
pass
class ReleaseTestConfigError(ReleaseTestPackageError):
exit_code = ExitCode.CONFIG_ERROR
class ReleaseTestCLIError(ReleaseTestPackageError):
exit_code = ExitCode.CLI_ERROR
class ReleaseTestSetupError(ReleaseTestPackageError):
exit_code = ExitCode.SETUP_ERROR
class RayWheelsError(ReleaseTestError):
exit_code = ExitCode.CLI_ERROR
class RayWheelsUnspecifiedError(RayWheelsError):
exit_code = ExitCode.CLI_ERROR
class RayWheelsNotFoundError(RayWheelsError):
exit_code = ExitCode.CLI_ERROR
class RayWheelsTimeoutError(RayWheelsError):
exit_code = ExitCode.RAY_WHEELS_TIMEOUT
class ClusterManagerError(ReleaseTestError):
exit_code = ExitCode.CLUSTER_RESOURCE_ERROR
class ClusterEnvCreateError(ClusterManagerError):
exit_code = ExitCode.CLUSTER_RESOURCE_ERROR
class ClusterEnvBuildError(ClusterManagerError):
exit_code = ExitCode.CLUSTER_ENV_BUILD_ERROR
class ClusterEnvBuildTimeout(ClusterManagerError):
exit_code = ExitCode.CLUSTER_ENV_BUILD_TIMEOUT
class ClusterComputeCreateError(ClusterManagerError):
exit_code = ExitCode.CLUSTER_RESOURCE_ERROR
class ClusterCreationError(ClusterManagerError):
exit_code = ExitCode.CLUSTER_RESOURCE_ERROR
class ClusterStartupError(ClusterManagerError):
exit_code = ExitCode.CLUSTER_STARTUP_ERROR
class ClusterStartupTimeout(ClusterManagerError):
exit_code = ExitCode.CLUSTER_STARTUP_TIMEOUT
class ClusterStartupFailed(ClusterManagerError):
exit_code = ExitCode.CLUSTER_STARTUP_ERROR
class EnvironmentSetupError(ReleaseTestError):
exit_code = ExitCode.CLUSTER_STARTUP_ERROR
class LocalEnvSetupError(EnvironmentSetupError):
exit_code = ExitCode.LOCAL_ENV_SETUP_ERROR
class RemoteEnvSetupError(EnvironmentSetupError):
exit_code = ExitCode.REMOTE_ENV_SETUP_ERROR
class FileManagerError(ReleaseTestError):
pass
class FileUploadError(FileManagerError):
pass
class FileDownloadError(FileManagerError):
pass
class ClusterNodesWaitTimeout(ReleaseTestError):
exit_code = ExitCode.CLUSTER_WAIT_TIMEOUT
class CommandTimeout(ReleaseTestError):
exit_code = ExitCode.COMMAND_TIMEOUT
class PrepareCommandTimeout(CommandTimeout):
exit_code = ExitCode.CLUSTER_WAIT_TIMEOUT
class TestCommandTimeout(CommandTimeout):
exit_code = ExitCode.COMMAND_TIMEOUT
class CommandError(ReleaseTestError):
exit_code = ExitCode.COMMAND_ERROR
class PrepareCommandError(CommandError):
exit_code = ExitCode.PREPARE_ERROR
class TestCommandError(CommandError):
exit_code = ExitCode.COMMAND_ERROR
class ResultsError(CommandError):
pass
class LogsError(CommandError):
pass
class ResultsAlert(CommandError):
exit_code = ExitCode.COMMAND_ALERT