mirror of
https://github.com/vale981/ray
synced 2025-03-06 02:21:39 -05:00
It seems like the S3 read sometimes fails; #22214. I found out the file actually does exist in S3, so it is highly likely a transient error. This PR adds a retry mechanism to avoid the issue.
It seems like the S3 read sometimes fails; #22214. I found out the file actually does exist in S3, so it is highly likely a transient error. This PR adds a retry mechanism to avoid the issue.
This commit is contained in:
parent
531e215921
commit
640d92c385
1 changed files with 13 additions and 1 deletions
|
@ -53,7 +53,19 @@ def load_dataset(client, data_dir, s3_bucket, nbytes, npartitions):
|
|||
f"s3://{s3_bucket}/df-{num_bytes_per_partition}-{i}.parquet.gzip"
|
||||
for i in range(npartitions)
|
||||
]
|
||||
|
||||
df = None
|
||||
max_retry = 3
|
||||
retry = 0
|
||||
while not df and retry < max_retry:
|
||||
try:
|
||||
df = dd.read_parquet(filenames)
|
||||
except FileNotFoundError as e:
|
||||
print(f"Failed to load a file. {e}")
|
||||
# Wait a little bit before retrying.
|
||||
time.sleep(30)
|
||||
retry += 1
|
||||
|
||||
return df
|
||||
|
||||
|
||||
|
|
Loading…
Add table
Reference in a new issue