[Datasets] Clean Up docs around pipelining -> windowing rename (#19142)

2025-03-06 10:31:39 -05:00 · 2021-10-06 11:07:55 -07:00 · 2021-10-06 11:07:55 -07:00 · 8cab8d3ae9
commit 8cab8d3ae9
parent db1105fa83
2 changed files with 8 additions and 6 deletions
--- a/python/ray/data/dataset.py
+++ b/python/ray/data/dataset.py
@ -1645,7 +1645,8 @@ class Dataset(Generic[T]):
        return DatasetPipeline(Iterable(self), length=times)

    def pipeline(self, *, parallelism: int = 10) -> "DatasetPipeline[T]":
-        raise DeprecationWarning("Use .window(n) instead of .pipeline(n)")
+        raise DeprecationWarning("Use .window(blocks_per_window=n) instead of "
+                                 ".pipeline(parallelism=n)")

    def window(self, *, blocks_per_window: int = 10) -> "DatasetPipeline[T]":
        """Convert this into a DatasetPipeline by windowing over data blocks.
@ -1655,18 +1656,18 @@ class Dataset(Generic[T]):
        pipeline are evaluated incrementally per window of blocks as data is
        read from the output of the pipeline.

-        Pipelining execution allows for output to be read sooner without
+        Windowing execution allows for output to be read sooner without
        waiting for all transformations to fully execute, and can also improve
        efficiency if transforms use different resources (e.g., GPUs).

-        Without pipelining::
+        Without windowing::

            [preprocessing......]
                                  [inference.......]
                                                     [write........]
            Time ----------------------------------------------------------->

-        With pipelining::
+        With windowing::

            [prep1] [prep2] [prep3]
                    [infer1] [infer2] [infer3]
--- a/python/ray/data/dataset_pipeline.py
+++ b/python/ray/data/dataset_pipeline.py
@ -245,8 +245,9 @@ class DatasetPipeline(Generic[T]):

        Changes the windowing of this pipeline to the specified size. For
        example, if the current pipeline has two blocks per dataset, and
-        `.window(4)` is requested, adjacent datasets will be merged until each
-        dataset is 4 blocks. If `.window(1)` was requested the datasets will
+        `.window(blocks_per_window=4)` is requested, adjacent datasets will
+        be merged until each dataset is 4 blocks. If
+        `.window(blocks_per_window=1)` was requested the datasets will
        be split into smaller windows.

        Args: