mirror of
https://github.com/vale981/ray
synced 2025-03-06 10:31:39 -05:00
Raise an exception in Jenkins tests after a timeout. (#1477)
This commit is contained in:
parent
8c96c34bb3
commit
4c6dae5517
1 changed files with 22 additions and 1 deletions
|
@ -6,6 +6,7 @@ import argparse
|
|||
import numpy as np
|
||||
import os
|
||||
import re
|
||||
import signal
|
||||
import subprocess
|
||||
import sys
|
||||
|
||||
|
@ -235,7 +236,8 @@ class DockerRunner(object):
|
|||
|
||||
return success
|
||||
|
||||
def run_test(self, test_script, num_drivers, driver_locations=None):
|
||||
def run_test(self, test_script, num_drivers, driver_locations=None,
|
||||
timeout_seconds=600):
|
||||
"""Run a test script.
|
||||
|
||||
Run a test using the Ray cluster.
|
||||
|
@ -246,9 +248,15 @@ class DockerRunner(object):
|
|||
driver_locations: A list of the indices of the containers that the
|
||||
different copies of the test script should be run on. If this
|
||||
is None, then the containers will be chosen randomly.
|
||||
timeout_seconds: The amount of time in seconds to wait before
|
||||
considering the test to have failed. When the timeout expires,
|
||||
this will cause this function to raise an exception.
|
||||
|
||||
Returns:
|
||||
A dictionary with information about the test script run.
|
||||
|
||||
Raises:
|
||||
Exception: An exception is raised if the timeout expires.
|
||||
"""
|
||||
all_container_ids = ([self.head_container_id] +
|
||||
self.worker_container_ids)
|
||||
|
@ -256,6 +264,15 @@ class DockerRunner(object):
|
|||
driver_locations = [np.random.randint(0, len(all_container_ids))
|
||||
for _ in range(num_drivers)]
|
||||
|
||||
# Define a signal handler and set an alarm to go off in
|
||||
# timeout_seconds.
|
||||
def handler(signum, frame):
|
||||
raise RuntimeError("This test timed out after {} seconds."
|
||||
.format(timeout_seconds))
|
||||
|
||||
signal.signal(signal.SIGALRM, handler)
|
||||
signal.alarm(timeout_seconds)
|
||||
|
||||
# Start the different drivers.
|
||||
driver_processes = []
|
||||
for i in range(len(driver_locations)):
|
||||
|
@ -280,6 +297,10 @@ class DockerRunner(object):
|
|||
print(stderr_data)
|
||||
results.append({"success": p.returncode == 0,
|
||||
"return_code": p.returncode})
|
||||
|
||||
# Disable the alarm.
|
||||
signal.alarm(0)
|
||||
|
||||
return results
|
||||
|
||||
|
||||
|
|
Loading…
Add table
Reference in a new issue