mirror of
https://github.com/vale981/ray
synced 2025-03-06 02:21:39 -05:00
[Data][Split] Fix split ownership (#27149)
fb54679
introduced a bug by calling ray.put in the remote _split_single_block. This changes the ownership from driver to the worker who runs _split_single_block, which breaks dataset's lineage requirement and failed the chaos test.
To fix the issue we need to ensure the split block refs are created by the driver, which we can achieved by creating the block_refs as part of function returns.
This commit is contained in:
parent
df124d0ad5
commit
0d49901651
2 changed files with 89 additions and 46 deletions
|
@ -1,6 +1,6 @@
|
||||||
import itertools
|
import itertools
|
||||||
import logging
|
import logging
|
||||||
from typing import Iterable, Tuple, List
|
from typing import Union, Iterable, Tuple, List
|
||||||
|
|
||||||
import ray
|
import ray
|
||||||
from ray.data._internal.block_list import BlockList
|
from ray.data._internal.block_list import BlockList
|
||||||
|
@ -93,9 +93,23 @@ def _split_single_block(
|
||||||
block: Block,
|
block: Block,
|
||||||
meta: BlockMetadata,
|
meta: BlockMetadata,
|
||||||
split_indices: List[int],
|
split_indices: List[int],
|
||||||
) -> Tuple[int, BlockPartition]:
|
) -> Tuple[Union[Tuple[int, List[BlockMetadata]], Block], ...]:
|
||||||
"""Split the provided block at the given indices."""
|
"""Split the provided block at the given indices.
|
||||||
split_result = []
|
|
||||||
|
Args:
|
||||||
|
block_id: the id of this block in the block list.
|
||||||
|
block: block to be split.
|
||||||
|
meta: metadata of the block, we expect meta.num is valid.
|
||||||
|
split_indices: the indices where the block should be split.
|
||||||
|
Returns:
|
||||||
|
returns block_id, split blocks metadata, and a list of blocks
|
||||||
|
in the following form. We return blocks in this way
|
||||||
|
so that the owner of blocks could be the caller(driver)
|
||||||
|
instead of worker itself.
|
||||||
|
Tuple(block_id, split_blocks_meta), block0, block1 ...
|
||||||
|
"""
|
||||||
|
split_meta = []
|
||||||
|
split_blocks = []
|
||||||
block_accessor = BlockAccessor.for_block(block)
|
block_accessor = BlockAccessor.for_block(block)
|
||||||
prev_index = 0
|
prev_index = 0
|
||||||
# append one more entry at the last so we don't
|
# append one more entry at the last so we don't
|
||||||
|
@ -106,16 +120,19 @@ def _split_single_block(
|
||||||
stats = BlockExecStats.builder()
|
stats = BlockExecStats.builder()
|
||||||
split_block = block_accessor.slice(prev_index, index, copy=True)
|
split_block = block_accessor.slice(prev_index, index, copy=True)
|
||||||
accessor = BlockAccessor.for_block(split_block)
|
accessor = BlockAccessor.for_block(split_block)
|
||||||
split_meta = BlockMetadata(
|
_meta = BlockMetadata(
|
||||||
num_rows=accessor.num_rows(),
|
num_rows=accessor.num_rows(),
|
||||||
size_bytes=accessor.size_bytes(),
|
size_bytes=accessor.size_bytes(),
|
||||||
schema=meta.schema,
|
schema=meta.schema,
|
||||||
input_files=meta.input_files,
|
input_files=meta.input_files,
|
||||||
exec_stats=stats.build(),
|
exec_stats=stats.build(),
|
||||||
)
|
)
|
||||||
split_result.append((ray.put(split_block), split_meta))
|
split_meta.append(_meta)
|
||||||
|
split_blocks.append(split_block)
|
||||||
prev_index = index
|
prev_index = index
|
||||||
return (block_id, split_result)
|
results = [(block_id, split_meta)]
|
||||||
|
results.extend(split_blocks)
|
||||||
|
return tuple(results)
|
||||||
|
|
||||||
|
|
||||||
def _drop_empty_block_split(block_split_indices: List[int], num_rows: int) -> List[int]:
|
def _drop_empty_block_split(block_split_indices: List[int], num_rows: int) -> List[int]:
|
||||||
|
@ -145,8 +162,10 @@ def _split_all_blocks(
|
||||||
blocks_with_metadata = block_list.get_blocks_with_metadata()
|
blocks_with_metadata = block_list.get_blocks_with_metadata()
|
||||||
all_blocks_split_results: List[BlockPartition] = [None] * len(blocks_with_metadata)
|
all_blocks_split_results: List[BlockPartition] = [None] * len(blocks_with_metadata)
|
||||||
|
|
||||||
split_single_block_futures = []
|
per_block_split_metadata_futures = []
|
||||||
|
per_block_split_block_refs = []
|
||||||
|
|
||||||
|
# tracking splitted blocks for gc.
|
||||||
blocks_splitted = []
|
blocks_splitted = []
|
||||||
for block_id, block_split_indices in enumerate(per_block_split_indices):
|
for block_id, block_split_indices in enumerate(per_block_split_indices):
|
||||||
(block_ref, meta) = blocks_with_metadata[block_id]
|
(block_ref, meta) = blocks_with_metadata[block_id]
|
||||||
|
@ -158,19 +177,27 @@ def _split_all_blocks(
|
||||||
all_blocks_split_results[block_id] = [(block_ref, meta)]
|
all_blocks_split_results[block_id] = [(block_ref, meta)]
|
||||||
else:
|
else:
|
||||||
# otherwise call split remote function.
|
# otherwise call split remote function.
|
||||||
split_single_block_futures.append(
|
object_refs = split_single_block.options(
|
||||||
split_single_block.options(scheduling_strategy="SPREAD").remote(
|
scheduling_strategy="SPREAD", num_returns=2 + len(block_split_indices)
|
||||||
block_id,
|
).remote(
|
||||||
block_ref,
|
block_id,
|
||||||
meta,
|
block_ref,
|
||||||
block_split_indices,
|
meta,
|
||||||
)
|
block_split_indices,
|
||||||
)
|
)
|
||||||
|
per_block_split_metadata_futures.append(object_refs[0])
|
||||||
|
per_block_split_block_refs.append(object_refs[1:])
|
||||||
|
|
||||||
blocks_splitted.append(block_ref)
|
blocks_splitted.append(block_ref)
|
||||||
if split_single_block_futures:
|
|
||||||
split_single_block_results = ray.get(split_single_block_futures)
|
if per_block_split_metadata_futures:
|
||||||
for block_id, block_split_result in split_single_block_results:
|
# only get metadata.
|
||||||
all_blocks_split_results[block_id] = block_split_result
|
per_block_split_metadata = ray.get(per_block_split_metadata_futures)
|
||||||
|
for (block_id, meta), block_refs in zip(
|
||||||
|
per_block_split_metadata, per_block_split_block_refs
|
||||||
|
):
|
||||||
|
assert len(meta) == len(block_refs)
|
||||||
|
all_blocks_split_results[block_id] = zip(block_refs, meta)
|
||||||
|
|
||||||
# We make a copy for the blocks that have been splitted, so the input blocks
|
# We make a copy for the blocks that have been splitted, so the input blocks
|
||||||
# can be cleared if they are owned by consumer (consumer-owned blocks will
|
# can be cleared if they are owned by consumer (consumer-owned blocks will
|
||||||
|
|
|
@ -522,47 +522,63 @@ def _create_blocklist(blocks):
|
||||||
|
|
||||||
def test_split_single_block(ray_start_regular_shared):
|
def test_split_single_block(ray_start_regular_shared):
|
||||||
block = [1, 2, 3]
|
block = [1, 2, 3]
|
||||||
meta = _create_meta(3)
|
metadata = _create_meta(3)
|
||||||
|
|
||||||
block_id, splits = ray.get(
|
results = ray.get(
|
||||||
ray.remote(_split_single_block).remote(234, block, meta, [])
|
ray.remote(_split_single_block)
|
||||||
|
.options(num_returns=2)
|
||||||
|
.remote(234, block, metadata, [])
|
||||||
)
|
)
|
||||||
|
block_id, meta = results[0]
|
||||||
|
blocks = results[1:]
|
||||||
assert 234 == block_id
|
assert 234 == block_id
|
||||||
assert len(splits) == 1
|
assert len(blocks) == 1
|
||||||
assert ray.get(splits[0][0]) == [1, 2, 3]
|
assert blocks[0] == [1, 2, 3]
|
||||||
assert splits[0][1].num_rows == 3
|
assert meta[0].num_rows == 3
|
||||||
|
|
||||||
block_id, splits = ray.get(
|
results = ray.get(
|
||||||
ray.remote(_split_single_block).remote(234, block, meta, [1])
|
ray.remote(_split_single_block)
|
||||||
|
.options(num_returns=3)
|
||||||
|
.remote(234, block, metadata, [1])
|
||||||
)
|
)
|
||||||
|
block_id, meta = results[0]
|
||||||
|
blocks = results[1:]
|
||||||
assert 234 == block_id
|
assert 234 == block_id
|
||||||
assert len(splits) == 2
|
assert len(blocks) == 2
|
||||||
assert ray.get(splits[0][0]) == [1]
|
assert blocks[0] == [1]
|
||||||
assert splits[0][1].num_rows == 1
|
assert meta[0].num_rows == 1
|
||||||
assert ray.get(splits[1][0]) == [2, 3]
|
assert blocks[1] == [2, 3]
|
||||||
assert splits[1][1].num_rows == 2
|
assert meta[1].num_rows == 2
|
||||||
|
|
||||||
block_id, splits = ray.get(
|
results = ray.get(
|
||||||
ray.remote(_split_single_block).remote(234, block, meta, [0, 1, 1, 3])
|
ray.remote(_split_single_block)
|
||||||
|
.options(num_returns=6)
|
||||||
|
.remote(234, block, metadata, [0, 1, 1, 3])
|
||||||
)
|
)
|
||||||
|
block_id, meta = results[0]
|
||||||
|
blocks = results[1:]
|
||||||
assert 234 == block_id
|
assert 234 == block_id
|
||||||
assert len(splits) == 5
|
assert len(blocks) == 5
|
||||||
assert ray.get(splits[0][0]) == []
|
assert blocks[0] == []
|
||||||
assert ray.get(splits[1][0]) == [1]
|
assert blocks[1] == [1]
|
||||||
assert ray.get(splits[2][0]) == []
|
assert blocks[2] == []
|
||||||
assert ray.get(splits[3][0]) == [2, 3]
|
assert blocks[3] == [2, 3]
|
||||||
assert ray.get(splits[4][0]) == []
|
assert blocks[4] == []
|
||||||
|
|
||||||
block = []
|
block = []
|
||||||
meta = _create_meta(0)
|
metadata = _create_meta(0)
|
||||||
|
|
||||||
block_id, splits = ray.get(
|
results = ray.get(
|
||||||
ray.remote(_split_single_block).remote(234, block, meta, [0])
|
ray.remote(_split_single_block)
|
||||||
|
.options(num_returns=3)
|
||||||
|
.remote(234, block, metadata, [0])
|
||||||
)
|
)
|
||||||
|
block_id, meta = results[0]
|
||||||
|
blocks = results[1:]
|
||||||
assert 234 == block_id
|
assert 234 == block_id
|
||||||
assert len(splits) == 2
|
assert len(blocks) == 2
|
||||||
assert ray.get(splits[0][0]) == []
|
assert blocks[0] == []
|
||||||
assert ray.get(splits[1][0]) == []
|
assert blocks[1] == []
|
||||||
|
|
||||||
|
|
||||||
def test_drop_empty_block_split():
|
def test_drop_empty_block_split():
|
||||||
|
|
Loading…
Add table
Reference in a new issue