mirror of
https://github.com/vale981/ray
synced 2025-03-06 10:31:39 -05:00
[dashboard] Add last_activity_at field to /api/component_activities (#27463)
Signed-off-by: Nikita Vemuri <nikitavemuri@gmail.com>
This commit is contained in:
parent
b7540d46cd
commit
f55f605cfd
3 changed files with 50 additions and 4 deletions
|
@ -15,6 +15,9 @@
|
|||
},
|
||||
"timestamp": {
|
||||
"type": ["number"]
|
||||
},
|
||||
"last_activity_at": {
|
||||
"type": ["number", "null"]
|
||||
}
|
||||
},
|
||||
"required": ["is_active"]
|
||||
|
|
|
@ -62,6 +62,14 @@ class RayActivityResponse(BaseModel, extra=Extra.allow):
|
|||
"This is in the format of seconds since unix epoch."
|
||||
),
|
||||
)
|
||||
last_activity_at: Optional[float] = Field(
|
||||
None,
|
||||
description=(
|
||||
"Timestamp when last actvity of this Ray component finished in format of "
|
||||
"seconds since unix epoch. This field does not need to be populated "
|
||||
"for Ray components where it is not meaningful."
|
||||
),
|
||||
)
|
||||
|
||||
@validator("reason", always=True)
|
||||
def reason_required(cls, v, values, **kwargs):
|
||||
|
@ -212,14 +220,31 @@ class APIHead(dashboard_utils.DashboardHeadModule):
|
|||
)
|
||||
|
||||
num_active_drivers = 0
|
||||
latest_job_end_time = 0
|
||||
for job_table_entry in reply.job_info_list:
|
||||
is_dead = bool(job_table_entry.is_dead)
|
||||
in_internal_namespace = job_table_entry.config.ray_namespace.startswith(
|
||||
"_ray_internal_"
|
||||
)
|
||||
latest_job_end_time = (
|
||||
max(latest_job_end_time, job_table_entry.end_time)
|
||||
if job_table_entry.end_time
|
||||
else latest_job_end_time
|
||||
)
|
||||
if not is_dead and not in_internal_namespace:
|
||||
num_active_drivers += 1
|
||||
|
||||
current_timestamp = datetime.now().timestamp()
|
||||
# Latest job end time must be before or equal to the current timestamp.
|
||||
# Job end times may be provided in epoch milliseconds. Check if this
|
||||
# is true, and convert to seconds
|
||||
if latest_job_end_time > current_timestamp:
|
||||
latest_job_end_time = latest_job_end_time / 1000
|
||||
assert current_timestamp >= latest_job_end_time, (
|
||||
f"Most recent job end time {latest_job_end_time} must be "
|
||||
f"before or equal to the current timestamp {current_timestamp}"
|
||||
)
|
||||
|
||||
is_active = (
|
||||
RayActivityStatus.ACTIVE
|
||||
if num_active_drivers > 0
|
||||
|
@ -230,7 +255,10 @@ class APIHead(dashboard_utils.DashboardHeadModule):
|
|||
reason=f"Number of active drivers: {num_active_drivers}"
|
||||
if num_active_drivers
|
||||
else None,
|
||||
timestamp=datetime.now().timestamp(),
|
||||
timestamp=current_timestamp,
|
||||
# If latest_job_end_time == 0, no jobs have finished yet so don't
|
||||
# populate last_activity_at
|
||||
last_activity_at=latest_job_end_time if latest_job_end_time else None,
|
||||
)
|
||||
except Exception as e:
|
||||
logger.exception("Failed to get activity status of Ray drivers.")
|
||||
|
|
|
@ -126,6 +126,9 @@ import ray
|
|||
ray.init(address="auto", namespace="{namespace}")
|
||||
"""
|
||||
run_string_as_driver_nonblocking(driver_template.format(namespace="my_namespace"))
|
||||
# Wait for above driver to start and finish
|
||||
time.sleep(2)
|
||||
|
||||
run_string_as_driver_nonblocking(driver_template.format(namespace="my_namespace"))
|
||||
run_string_as_driver_nonblocking(
|
||||
driver_template.format(namespace="_ray_internal_job_info_id1")
|
||||
|
@ -135,7 +138,7 @@ ray.init(address="auto", namespace="{namespace}")
|
|||
driver_template.format(namespace="_ray_internal_dashboard")
|
||||
)
|
||||
|
||||
# Wait 1 sec for drivers to start
|
||||
# Wait 1.5 sec for drivers to start (but not finish)
|
||||
time.sleep(1.5)
|
||||
|
||||
# Verify drivers are considered active after running script
|
||||
|
@ -158,10 +161,22 @@ ray.init(address="auto", namespace="{namespace}")
|
|||
|
||||
assert driver_ray_activity_response.is_active == "ACTIVE"
|
||||
# Drivers with namespace starting with "_ray_internal" are not
|
||||
# considered active drivers. Three active drivers are the two
|
||||
# considered active drivers. Two active drivers are the second one
|
||||
# run with namespace "my_namespace" and the one started
|
||||
# from ray_start_with_dashboard
|
||||
assert driver_ray_activity_response.reason == "Number of active drivers: 3"
|
||||
assert driver_ray_activity_response.reason == "Number of active drivers: 2"
|
||||
|
||||
# Get expected_last_activity at from snapshot endpoint which returns details
|
||||
# about all jobs
|
||||
jobs_snapshot_data = requests.get(f"{webui_url}/api/snapshot").json()["data"][
|
||||
"snapshot"
|
||||
]["jobs"]
|
||||
# Divide endTime by 1000 to convert from milliseconds to seconds
|
||||
expected_last_activity_at = max(
|
||||
[job.get("endTime", 0) / 1000 for (job_id, job) in jobs_snapshot_data.items()]
|
||||
)
|
||||
|
||||
assert driver_ray_activity_response.last_activity_at == expected_last_activity_at
|
||||
|
||||
|
||||
def test_snapshot(ray_start_with_dashboard):
|
||||
|
|
Loading…
Add table
Reference in a new issue