ray/dashboard/modules/snapshot/tests/test_job_submission.py
Archit Kulkarni 27e7c284ee
[Jobs] Change jobs start_time end_time from seconds to ms for consistency (#24123)
In the snapshot, all timestamps are given in ms except for Jobs:

```
wget -q -O - http://127.0.0.1:8265/api/snapshot

{
   "result":true,
   "msg":"hello",
   "data":{
      "snapshot":{
         "jobs":{
            "01000000":{
               "status":null,
               "statusMessage":null,
               "isDead":false,
               "startTime":1650315791249,
               "endTime":0,
               "config":{
                  "namespace":"_ray_internal_dashboard",
                  "metadata":{
                     
                  },
                  "runtimeEnv":{
                     
                  }
               }
            }
         },
         "jobSubmission":{
            "raysubmit9Bsej1Rtxqqetxup":{
               "status":"SUCCEEDED",
               "message":"Job finished successfully.",
               "errorType":null,
               "startTime":1650315925,
               "endTime":1650315926,
               "metadata":{
                  "creatorId":"usr_f6tgCaaFBJC6tZz1ZVzzAVf4"
               },
               "runtimeEnv":{
                  "workingDir":"gcs://_ray_pkg_6068c19fb3b8530f.zip"
               },
               "entrypoint":"ls"
            },
            "raysubmitEibragqkyg16Hpcj":{
               "status":"SUCCEEDED",
               "message":"Job finished successfully.",
               "errorType":null,
               "startTime":1650316039,
               "endTime":1650316041,
               "metadata":{
                  "creatorId":"usr_f6tgCaaFBJC6tZz1ZVzzAVf4"
               },
               "runtimeEnv":{
                  "workingDir":"gcs://_ray_pkg_6068c19fb3b8530f.zip"
               },
               "entrypoint":"echo hi"
            },
            "raysubmitSh1U7Grdsbqrf6Je":{
               "status":"SUCCEEDED",
               "message":"Job finished successfully.",
               "errorType":null,
               "startTime":1650316354,
               "endTime":1650316355,
               "metadata":{
                  "creatorId":"usr_f6tgCaaFBJC6tZz1ZVzzAVf4"
               },
               "runtimeEnv":{
                  "workingDir":"gcs://_ray_pkg_6068c19fb3b8530f.zip"
               },
               "entrypoint":"echo hi"
            }
         },
         "actors":{
            "8c8e28e642ba2cfd0457d45e01000000":{
               "jobId":"01000000",
               "state":"DEAD",
               "name":"_ray_internal_job_actor_raysubmit_9BSeJ1rTXQqEtXuP",
               "namespace":"_ray_internal_dashboard",
               "runtimeEnv":{
                  "uris":{
                     "workingDirUri":"gcs://_ray_pkg_6068c19fb3b8530f.zip"
                  },
                  "workingDir":"gcs://_ray_pkg_6068c19fb3b8530f.zip"
               },
               "startTime":1650315926620,
               "endTime":1650315927499,
               "isDetached":true,
               "resources":{
                  "node:172.31.73.39":0.001
               },
               "actorClass":"JobSupervisor",
               "currentWorkerId":"9628b5eb54e98353601413845fbca0a8c4e5379d1469ce95f3dfbace",
               "currentRayletId":"61ab3958258c82266b222f4691a53e71b6315e312408a21cb3350bc7",
               "ipAddress":"172.31.73.39",
               "port":10003,
               "metadata":{
                  
               }
            },
            "a7fd8354567129910c44298401000000":{
               "jobId":"01000000",
               "state":"DEAD",
               "name":"_ray_internal_job_actor_raysubmit_sh1u7grDsBQRf6je",
               "namespace":"_ray_internal_dashboard",
               "runtimeEnv":{
                  "uris":{
                     "workingDirUri":"gcs://_ray_pkg_6068c19fb3b8530f.zip"
                  },
                  "workingDir":"gcs://_ray_pkg_6068c19fb3b8530f.zip"
               },
               "startTime":1650316355718,
               "endTime":1650316356620,
               "isDetached":true,
               "resources":{
                  "node:172.31.73.39":0.001
               },
               "actorClass":"JobSupervisor",
               "currentWorkerId":"f07fd7a393898bf7d9027a5de0b0f566bb64ae80c0fcbcc107185505",
               "currentRayletId":"61ab3958258c82266b222f4691a53e71b6315e312408a21cb3350bc7",
               "ipAddress":"172.31.73.39",
               "port":10005,
               "metadata":{
                  
               }
            },
            "19ca9ad190f47bae963592d601000000":{
               "jobId":"01000000",
               "state":"DEAD",
               "name":"_ray_internal_job_actor_raysubmit_eibRAGqKyG16HpCj",
               "namespace":"_ray_internal_dashboard",
               "runtimeEnv":{
                  "uris":{
                     "workingDirUri":"gcs://_ray_pkg_6068c19fb3b8530f.zip"
                  },
                  "workingDir":"gcs://_ray_pkg_6068c19fb3b8530f.zip"
               },
               "startTime":1650316041089,
               "endTime":1650316041978,
               "isDetached":true,
               "resources":{
                  "node:172.31.73.39":0.001
               },
               "actorClass":"JobSupervisor",
               "currentWorkerId":"50b8e7e9a6981fe0270afd7f6387bc93788356822c9a664c2988f5ba",
               "currentRayletId":"61ab3958258c82266b222f4691a53e71b6315e312408a21cb3350bc7",
               "ipAddress":"172.31.73.39",
               "port":10004,
               "metadata":{
                  
               }
            }
         },
         "deployments":{
            
         },
         "sessionName":"session_2022-04-18_13-49-44_814862_139",
         "rayVersion":"1.12.0",
         "rayCommit":"f18fc31c7562990955556899090f8e8656b48d2d"
      }
   }
}
```

 This PR fixes the inconsistency by changing Jobs start/end timestamps to ms.
2022-04-26 08:37:41 -07:00

162 lines
5.7 KiB
Python

import logging
import os
import sys
import time
import json
import jsonschema
import pprint
import pytest
import requests
from ray._private.test_utils import (
format_web_url,
wait_for_condition,
wait_until_server_available,
)
from ray.dashboard import dashboard
from ray.dashboard.tests.conftest import * # noqa
from ray.job_submission import JobSubmissionClient
logger = logging.getLogger(__name__)
def _get_snapshot(address: str):
response = requests.get(f"{address}/api/snapshot")
response.raise_for_status()
data = response.json()
schema_path = os.path.join(
os.path.dirname(dashboard.__file__), "modules/snapshot/snapshot_schema.json"
)
pprint.pprint(data)
jsonschema.validate(instance=data, schema=json.load(open(schema_path)))
return data
def test_successful_job_status(
ray_start_with_dashboard, disable_aiohttp_cache, enable_test_module
):
address = ray_start_with_dashboard.address_info["webui_url"]
assert wait_until_server_available(address)
address = format_web_url(address)
job_sleep_time_s = 5
entrypoint = (
'python -c"'
"import ray;"
"ray.init();"
"import time;"
f"time.sleep({job_sleep_time_s});"
'"'
)
client = JobSubmissionClient(address)
start_time_s = int(time.time())
runtime_env = {"env_vars": {"RAY_TEST_123": "123"}}
metadata = {"ray_test_456": "456"}
job_id = client.submit_job(
entrypoint=entrypoint, metadata=metadata, runtime_env=runtime_env
)
def wait_for_job_to_succeed():
data = _get_snapshot(address)
legacy_job_succeeded = False
job_succeeded = False
# Test legacy job snapshot (one driver per job).
for job_entry in data["data"]["snapshot"]["jobs"].values():
if job_entry["status"] is not None:
assert job_entry["config"]["metadata"]["jobSubmissionId"] == job_id
assert job_entry["status"] in {"PENDING", "RUNNING", "SUCCEEDED"}
assert job_entry["statusMessage"] is not None
legacy_job_succeeded = job_entry["status"] == "SUCCEEDED"
# Test new jobs snapshot (0 to N drivers per job).
for job_submission_id, entry in data["data"]["snapshot"][
"jobSubmission"
].items():
if entry["status"] is not None:
assert entry["entrypoint"] == entrypoint
assert entry["status"] in {"PENDING", "RUNNING", "SUCCEEDED"}
assert entry["message"] is not None
# TODO(architkulkarni): Disable automatic camelcase.
assert entry["runtimeEnv"] == {"envVars": {"RAYTest123": "123"}}
assert entry["metadata"] == {"rayTest456": "456"}
assert entry["errorType"] is None
assert abs(entry["startTime"] - start_time_s * 1000) <= 2000
if entry["status"] == "SUCCEEDED":
job_succeeded = True
assert (
entry["endTime"] >= entry["startTime"] + job_sleep_time_s * 1000
)
return legacy_job_succeeded and job_succeeded
wait_for_condition(wait_for_job_to_succeed, timeout=30)
def test_failed_job_status(
ray_start_with_dashboard, disable_aiohttp_cache, enable_test_module
):
address = ray_start_with_dashboard.address_info["webui_url"]
assert wait_until_server_available(address)
address = format_web_url(address)
job_sleep_time_s = 5
entrypoint = (
'python -c"'
"import ray;"
"ray.init();"
"import time;"
f"time.sleep({job_sleep_time_s});"
"import sys;"
"sys.exit(1);"
'"'
)
start_time_s = int(time.time())
client = JobSubmissionClient(address)
runtime_env = {"env_vars": {"RAY_TEST_456": "456"}}
metadata = {"ray_test_789": "789"}
job_id = client.submit_job(
entrypoint=entrypoint, metadata=metadata, runtime_env=runtime_env
)
def wait_for_job_to_fail():
data = _get_snapshot(address)
legacy_job_failed = False
job_failed = False
# Test legacy job snapshot (one driver per job).
for job_entry in data["data"]["snapshot"]["jobs"].values():
if job_entry["status"] is not None:
assert job_entry["config"]["metadata"]["jobSubmissionId"] == job_id
assert job_entry["status"] in {"PENDING", "RUNNING", "FAILED"}
assert job_entry["statusMessage"] is not None
legacy_job_failed = job_entry["status"] == "FAILED"
# Test new jobs snapshot (0 to N drivers per job).
for job_submission_id, entry in data["data"]["snapshot"][
"jobSubmission"
].items():
if entry["status"] is not None:
assert entry["entrypoint"] == entrypoint
assert entry["status"] in {"PENDING", "RUNNING", "FAILED"}
assert entry["message"] is not None
# TODO(architkulkarni): Disable automatic camelcase.
assert entry["runtimeEnv"] == {"envVars": {"RAYTest456": "456"}}
assert entry["metadata"] == {"rayTest789": "789"}
assert entry["errorType"] is None
assert abs(entry["startTime"] - start_time_s * 1000) <= 2000
if entry["status"] == "FAILED":
job_failed = True
assert (
entry["endTime"] >= entry["startTime"] + job_sleep_time_s * 1000
)
return legacy_job_failed and job_failed
wait_for_condition(wait_for_job_to_fail, timeout=25)
if __name__ == "__main__":
sys.exit(pytest.main(["-v", __file__]))