[CI] Add a retry helper to e2e.py (#19045)

This commit is contained in:
Jiajun Yao 2021-10-02 09:54:41 -07:00 committed by GitHub
parent 28d905dcb0
commit b8ef4f0a34
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23

View file

@ -267,6 +267,23 @@ REPORT_S = 30
RETRY_MULTIPLIER = 2
def exponential_backoff_retry(f, retry_exceptions, initial_retry_delay_s,
max_retries):
retry_cnt = 0
retry_delay_s = initial_retry_delay_s
while True:
try:
return f()
except retry_exceptions as e:
retry_cnt += 1
if retry_cnt > max_retries:
raise
logger.info(f"Retry function call failed due to {e} "
f"in {retry_delay_s} seconds...")
time.sleep(retry_delay_s)
retry_delay_s *= RETRY_MULTIPLIER
def maybe_fetch_api_token():
if GLOBAL_CONFIG["ANYSCALE_CLI_TOKEN"] is None:
print("Missing ANYSCALE_CLI_TOKEN, retrieving from AWS secrets store")
@ -524,38 +541,21 @@ def report_result(test_suite: str, test_name: str, status: str, logs: str,
}
}]
retry_cnt = 0
# Default boto3 call timeout is 45 seconds.
retry_delay_s = 64
MAX_RDS_RETRY = 3
db_updated = False
while retry_cnt < MAX_RDS_RETRY and not db_updated:
try:
rds_data_client.execute_statement(
database=GLOBAL_CONFIG["RELEASE_AWS_DB_NAME"],
parameters=parameters,
secretArn=GLOBAL_CONFIG["RELEASE_AWS_DB_SECRET_ARN"],
resourceArn=GLOBAL_CONFIG["RELEASE_AWS_DB_RESOURCE_ARN"],
schema=schema,
sql=sql,
)
logger.info("Result has been persisted to the databse")
db_updated = True
except rds_data_client.exceptions.StatementTimeoutException as e:
logger.info(
f"Database operation failis due to time out. Error: {e}")
logger.info(f"Retry count: {retry_cnt} / {MAX_RDS_RETRY}")
logger.info(f"Retrying in {retry_delay_s} seconds...")
time.sleep(retry_delay_s)
retry_delay_s *= RETRY_MULTIPLIER
finally:
retry_cnt += 1
if not db_updated:
raise CommandTimeoutError(
"RDS command failed after retrying "
f"{MAX_RDS_RETRY} times. Check logs for more details")
exponential_backoff_retry(
lambda: rds_data_client.execute_statement(
database=GLOBAL_CONFIG["RELEASE_AWS_DB_NAME"],
parameters=parameters,
secretArn=GLOBAL_CONFIG["RELEASE_AWS_DB_SECRET_ARN"],
resourceArn=GLOBAL_CONFIG["RELEASE_AWS_DB_RESOURCE_ARN"],
schema=schema,
sql=sql),
retry_exceptions=rds_data_client.exceptions.StatementTimeoutException,
initial_retry_delay_s=retry_delay_s,
max_retries=MAX_RDS_RETRY)
logger.info("Result has been persisted to the databse")
def log_results_and_artifacts(result: Dict):
@ -921,7 +921,11 @@ def wait_for_session_command_to_complete(create_session_command_result,
# Sleep 1 sec before next check.
time.sleep(1)
result = sdk.get_session_command(session_command_id=scd_id)
result = exponential_backoff_retry(
lambda: sdk.get_session_command(session_command_id=scd_id),
retry_exceptions=Exception,
initial_retry_delay_s=10,
max_retries=3)
completed = result.result.finished_at
if state_str == "CMD_RUN":
@ -952,10 +956,14 @@ def wait_for_session_command_to_complete(create_session_command_result,
def get_command_logs(session_controller: SessionController,
scd_id: str,
lines: int = 50):
result = session_controller.api_client.get_execution_logs_api_v2_session_commands_session_command_id_execution_logs_get( # noqa: E501
session_command_id=scd_id,
start_line=-1 * lines,
end_line=0)
result = exponential_backoff_retry(
lambda: session_controller.api_client.get_execution_logs_api_v2_session_commands_session_command_id_execution_logs_get( # noqa: E501
session_command_id=scd_id,
start_line=-1 * lines,
end_line=0),
retry_exceptions=Exception,
initial_retry_delay_s=10,
max_retries=3)
return result.result.lines