mirror of
https://github.com/vale981/ray
synced 2025-03-06 02:21:39 -05:00
It seems like the S3 read sometimes fails; #22214. I found out the file actually does exist in S3, so it is highly likely a transient error. This PR adds a retry mechanism to avoid the issue.
It seems like the S3 read sometimes fails; #22214. I found out the file actually does exist in S3, so it is highly likely a transient error. This PR adds a retry mechanism to avoid the issue.
This commit is contained in:
parent
531e215921
commit
640d92c385
1 changed files with 13 additions and 1 deletions
|
@ -53,7 +53,19 @@ def load_dataset(client, data_dir, s3_bucket, nbytes, npartitions):
|
||||||
f"s3://{s3_bucket}/df-{num_bytes_per_partition}-{i}.parquet.gzip"
|
f"s3://{s3_bucket}/df-{num_bytes_per_partition}-{i}.parquet.gzip"
|
||||||
for i in range(npartitions)
|
for i in range(npartitions)
|
||||||
]
|
]
|
||||||
|
|
||||||
|
df = None
|
||||||
|
max_retry = 3
|
||||||
|
retry = 0
|
||||||
|
while not df and retry < max_retry:
|
||||||
|
try:
|
||||||
df = dd.read_parquet(filenames)
|
df = dd.read_parquet(filenames)
|
||||||
|
except FileNotFoundError as e:
|
||||||
|
print(f"Failed to load a file. {e}")
|
||||||
|
# Wait a little bit before retrying.
|
||||||
|
time.sleep(30)
|
||||||
|
retry += 1
|
||||||
|
|
||||||
return df
|
return df
|
||||||
|
|
||||||
|
|
||||||
|
|
Loading…
Add table
Reference in a new issue