From 640d92c385d6abceae74d1d439e0d218f7d8db25 Mon Sep 17 00:00:00 2001
From: SangBin Cho <rkooo567@gmail.com>
Date: Sat, 12 Feb 2022 11:58:58 +0900
Subject: [PATCH] It seems like the S3 read sometimes fails; #22214. I found
 out the file actually does exist in S3, so it is highly likely a transient
 error. This PR adds a retry mechanism to avoid the issue.

It seems like the S3 read sometimes fails; #22214. I found out the file actually does exist in S3, so it is highly likely a transient error. This PR adds a retry mechanism to avoid the issue.
---
 .../nightly_tests/dask_on_ray/dask_on_ray_sort.py  | 14 +++++++++++++-
 1 file changed, 13 insertions(+), 1 deletion(-)

diff --git a/release/nightly_tests/dask_on_ray/dask_on_ray_sort.py b/release/nightly_tests/dask_on_ray/dask_on_ray_sort.py
index a512c4055..92999e61a 100644
--- a/release/nightly_tests/dask_on_ray/dask_on_ray_sort.py
+++ b/release/nightly_tests/dask_on_ray/dask_on_ray_sort.py
@@ -53,7 +53,19 @@ def load_dataset(client, data_dir, s3_bucket, nbytes, npartitions):
         f"s3://{s3_bucket}/df-{num_bytes_per_partition}-{i}.parquet.gzip"
         for i in range(npartitions)
     ]
-    df = dd.read_parquet(filenames)
+
+    df = None
+    max_retry = 3
+    retry = 0
+    while not df and retry < max_retry:
+        try:
+            df = dd.read_parquet(filenames)
+        except FileNotFoundError as e:
+            print(f"Failed to load a file. {e}")
+            # Wait a little bit before retrying.
+            time.sleep(30)
+            retry += 1
+
     return df