mirror of
https://github.com/vale981/ray
synced 2025-03-12 06:06:39 -04:00

In the snapshot, all timestamps are given in ms except for Jobs: ``` wget -q -O - http://127.0.0.1:8265/api/snapshot { "result":true, "msg":"hello", "data":{ "snapshot":{ "jobs":{ "01000000":{ "status":null, "statusMessage":null, "isDead":false, "startTime":1650315791249, "endTime":0, "config":{ "namespace":"_ray_internal_dashboard", "metadata":{ }, "runtimeEnv":{ } } } }, "jobSubmission":{ "raysubmit9Bsej1Rtxqqetxup":{ "status":"SUCCEEDED", "message":"Job finished successfully.", "errorType":null, "startTime":1650315925, "endTime":1650315926, "metadata":{ "creatorId":"usr_f6tgCaaFBJC6tZz1ZVzzAVf4" }, "runtimeEnv":{ "workingDir":"gcs://_ray_pkg_6068c19fb3b8530f.zip" }, "entrypoint":"ls" }, "raysubmitEibragqkyg16Hpcj":{ "status":"SUCCEEDED", "message":"Job finished successfully.", "errorType":null, "startTime":1650316039, "endTime":1650316041, "metadata":{ "creatorId":"usr_f6tgCaaFBJC6tZz1ZVzzAVf4" }, "runtimeEnv":{ "workingDir":"gcs://_ray_pkg_6068c19fb3b8530f.zip" }, "entrypoint":"echo hi" }, "raysubmitSh1U7Grdsbqrf6Je":{ "status":"SUCCEEDED", "message":"Job finished successfully.", "errorType":null, "startTime":1650316354, "endTime":1650316355, "metadata":{ "creatorId":"usr_f6tgCaaFBJC6tZz1ZVzzAVf4" }, "runtimeEnv":{ "workingDir":"gcs://_ray_pkg_6068c19fb3b8530f.zip" }, "entrypoint":"echo hi" } }, "actors":{ "8c8e28e642ba2cfd0457d45e01000000":{ "jobId":"01000000", "state":"DEAD", "name":"_ray_internal_job_actor_raysubmit_9BSeJ1rTXQqEtXuP", "namespace":"_ray_internal_dashboard", "runtimeEnv":{ "uris":{ "workingDirUri":"gcs://_ray_pkg_6068c19fb3b8530f.zip" }, "workingDir":"gcs://_ray_pkg_6068c19fb3b8530f.zip" }, "startTime":1650315926620, "endTime":1650315927499, "isDetached":true, "resources":{ "node:172.31.73.39":0.001 }, "actorClass":"JobSupervisor", "currentWorkerId":"9628b5eb54e98353601413845fbca0a8c4e5379d1469ce95f3dfbace", "currentRayletId":"61ab3958258c82266b222f4691a53e71b6315e312408a21cb3350bc7", "ipAddress":"172.31.73.39", "port":10003, "metadata":{ } }, "a7fd8354567129910c44298401000000":{ "jobId":"01000000", "state":"DEAD", "name":"_ray_internal_job_actor_raysubmit_sh1u7grDsBQRf6je", "namespace":"_ray_internal_dashboard", "runtimeEnv":{ "uris":{ "workingDirUri":"gcs://_ray_pkg_6068c19fb3b8530f.zip" }, "workingDir":"gcs://_ray_pkg_6068c19fb3b8530f.zip" }, "startTime":1650316355718, "endTime":1650316356620, "isDetached":true, "resources":{ "node:172.31.73.39":0.001 }, "actorClass":"JobSupervisor", "currentWorkerId":"f07fd7a393898bf7d9027a5de0b0f566bb64ae80c0fcbcc107185505", "currentRayletId":"61ab3958258c82266b222f4691a53e71b6315e312408a21cb3350bc7", "ipAddress":"172.31.73.39", "port":10005, "metadata":{ } }, "19ca9ad190f47bae963592d601000000":{ "jobId":"01000000", "state":"DEAD", "name":"_ray_internal_job_actor_raysubmit_eibRAGqKyG16HpCj", "namespace":"_ray_internal_dashboard", "runtimeEnv":{ "uris":{ "workingDirUri":"gcs://_ray_pkg_6068c19fb3b8530f.zip" }, "workingDir":"gcs://_ray_pkg_6068c19fb3b8530f.zip" }, "startTime":1650316041089, "endTime":1650316041978, "isDetached":true, "resources":{ "node:172.31.73.39":0.001 }, "actorClass":"JobSupervisor", "currentWorkerId":"50b8e7e9a6981fe0270afd7f6387bc93788356822c9a664c2988f5ba", "currentRayletId":"61ab3958258c82266b222f4691a53e71b6315e312408a21cb3350bc7", "ipAddress":"172.31.73.39", "port":10004, "metadata":{ } } }, "deployments":{ }, "sessionName":"session_2022-04-18_13-49-44_814862_139", "rayVersion":"1.12.0", "rayCommit":"f18fc31c7562990955556899090f8e8656b48d2d" } } } ``` This PR fixes the inconsistency by changing Jobs start/end timestamps to ms.
162 lines
5.7 KiB
Python
162 lines
5.7 KiB
Python
import logging
|
|
import os
|
|
import sys
|
|
import time
|
|
import json
|
|
import jsonschema
|
|
|
|
import pprint
|
|
import pytest
|
|
import requests
|
|
|
|
from ray._private.test_utils import (
|
|
format_web_url,
|
|
wait_for_condition,
|
|
wait_until_server_available,
|
|
)
|
|
from ray.dashboard import dashboard
|
|
from ray.dashboard.tests.conftest import * # noqa
|
|
from ray.job_submission import JobSubmissionClient
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
def _get_snapshot(address: str):
|
|
response = requests.get(f"{address}/api/snapshot")
|
|
response.raise_for_status()
|
|
data = response.json()
|
|
schema_path = os.path.join(
|
|
os.path.dirname(dashboard.__file__), "modules/snapshot/snapshot_schema.json"
|
|
)
|
|
pprint.pprint(data)
|
|
jsonschema.validate(instance=data, schema=json.load(open(schema_path)))
|
|
return data
|
|
|
|
|
|
def test_successful_job_status(
|
|
ray_start_with_dashboard, disable_aiohttp_cache, enable_test_module
|
|
):
|
|
address = ray_start_with_dashboard.address_info["webui_url"]
|
|
assert wait_until_server_available(address)
|
|
address = format_web_url(address)
|
|
|
|
job_sleep_time_s = 5
|
|
entrypoint = (
|
|
'python -c"'
|
|
"import ray;"
|
|
"ray.init();"
|
|
"import time;"
|
|
f"time.sleep({job_sleep_time_s});"
|
|
'"'
|
|
)
|
|
|
|
client = JobSubmissionClient(address)
|
|
start_time_s = int(time.time())
|
|
runtime_env = {"env_vars": {"RAY_TEST_123": "123"}}
|
|
metadata = {"ray_test_456": "456"}
|
|
job_id = client.submit_job(
|
|
entrypoint=entrypoint, metadata=metadata, runtime_env=runtime_env
|
|
)
|
|
|
|
def wait_for_job_to_succeed():
|
|
data = _get_snapshot(address)
|
|
legacy_job_succeeded = False
|
|
job_succeeded = False
|
|
|
|
# Test legacy job snapshot (one driver per job).
|
|
for job_entry in data["data"]["snapshot"]["jobs"].values():
|
|
if job_entry["status"] is not None:
|
|
assert job_entry["config"]["metadata"]["jobSubmissionId"] == job_id
|
|
assert job_entry["status"] in {"PENDING", "RUNNING", "SUCCEEDED"}
|
|
assert job_entry["statusMessage"] is not None
|
|
legacy_job_succeeded = job_entry["status"] == "SUCCEEDED"
|
|
|
|
# Test new jobs snapshot (0 to N drivers per job).
|
|
for job_submission_id, entry in data["data"]["snapshot"][
|
|
"jobSubmission"
|
|
].items():
|
|
if entry["status"] is not None:
|
|
assert entry["entrypoint"] == entrypoint
|
|
assert entry["status"] in {"PENDING", "RUNNING", "SUCCEEDED"}
|
|
assert entry["message"] is not None
|
|
# TODO(architkulkarni): Disable automatic camelcase.
|
|
assert entry["runtimeEnv"] == {"envVars": {"RAYTest123": "123"}}
|
|
assert entry["metadata"] == {"rayTest456": "456"}
|
|
assert entry["errorType"] is None
|
|
assert abs(entry["startTime"] - start_time_s * 1000) <= 2000
|
|
if entry["status"] == "SUCCEEDED":
|
|
job_succeeded = True
|
|
assert (
|
|
entry["endTime"] >= entry["startTime"] + job_sleep_time_s * 1000
|
|
)
|
|
|
|
return legacy_job_succeeded and job_succeeded
|
|
|
|
wait_for_condition(wait_for_job_to_succeed, timeout=30)
|
|
|
|
|
|
def test_failed_job_status(
|
|
ray_start_with_dashboard, disable_aiohttp_cache, enable_test_module
|
|
):
|
|
address = ray_start_with_dashboard.address_info["webui_url"]
|
|
assert wait_until_server_available(address)
|
|
address = format_web_url(address)
|
|
|
|
job_sleep_time_s = 5
|
|
entrypoint = (
|
|
'python -c"'
|
|
"import ray;"
|
|
"ray.init();"
|
|
"import time;"
|
|
f"time.sleep({job_sleep_time_s});"
|
|
"import sys;"
|
|
"sys.exit(1);"
|
|
'"'
|
|
)
|
|
start_time_s = int(time.time())
|
|
client = JobSubmissionClient(address)
|
|
runtime_env = {"env_vars": {"RAY_TEST_456": "456"}}
|
|
metadata = {"ray_test_789": "789"}
|
|
job_id = client.submit_job(
|
|
entrypoint=entrypoint, metadata=metadata, runtime_env=runtime_env
|
|
)
|
|
|
|
def wait_for_job_to_fail():
|
|
data = _get_snapshot(address)
|
|
|
|
legacy_job_failed = False
|
|
job_failed = False
|
|
|
|
# Test legacy job snapshot (one driver per job).
|
|
for job_entry in data["data"]["snapshot"]["jobs"].values():
|
|
if job_entry["status"] is not None:
|
|
assert job_entry["config"]["metadata"]["jobSubmissionId"] == job_id
|
|
assert job_entry["status"] in {"PENDING", "RUNNING", "FAILED"}
|
|
assert job_entry["statusMessage"] is not None
|
|
legacy_job_failed = job_entry["status"] == "FAILED"
|
|
|
|
# Test new jobs snapshot (0 to N drivers per job).
|
|
for job_submission_id, entry in data["data"]["snapshot"][
|
|
"jobSubmission"
|
|
].items():
|
|
if entry["status"] is not None:
|
|
assert entry["entrypoint"] == entrypoint
|
|
assert entry["status"] in {"PENDING", "RUNNING", "FAILED"}
|
|
assert entry["message"] is not None
|
|
# TODO(architkulkarni): Disable automatic camelcase.
|
|
assert entry["runtimeEnv"] == {"envVars": {"RAYTest456": "456"}}
|
|
assert entry["metadata"] == {"rayTest789": "789"}
|
|
assert entry["errorType"] is None
|
|
assert abs(entry["startTime"] - start_time_s * 1000) <= 2000
|
|
if entry["status"] == "FAILED":
|
|
job_failed = True
|
|
assert (
|
|
entry["endTime"] >= entry["startTime"] + job_sleep_time_s * 1000
|
|
)
|
|
return legacy_job_failed and job_failed
|
|
|
|
wait_for_condition(wait_for_job_to_fail, timeout=25)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
sys.exit(pytest.main(["-v", __file__]))
|