Add imports to docs examples to make the code more runnable. (#17240)

2025-03-06 02:21:39 -05:00 · 2021-07-21 12:18:45 -06:00 · 2021-07-21 12:18:45 -06:00 · b5194ca9f9
commit b5194ca9f9
parent afd59be8ca
1 changed files with 7 additions and 2 deletions
--- a/doc/source/data/dataset.rst
+++ b/doc/source/data/dataset.rst
@ -131,6 +131,8 @@ Get started by creating Datasets from synthetic data using ``ray.data.range()``

 .. code-block:: python

+    import ray
+    
    # Create a Dataset of Python objects.
    ds = ray.data.range(10000)
    # -> Dataset(num_rows=10000, num_blocks=200, schema=<class 'int'>)
@ -173,6 +175,9 @@ Finally, you can create a Dataset from existing data in the Ray object store or

 .. code-block:: python

+    import pandas as pd
+    import dask.dataframe as dd
+
    # Create a Dataset from a list of Pandas DataFrame objects.
    pdf = pd.DataFrame({"one": [1, 2, 3], "two": ["a", "b", "c"]})
    ds = ray.data.from_pandas([ray.put(pdf)])
@ -239,7 +244,7 @@ By default, transformations are executed using Ray tasks. For transformations th
    class BatchInferModel:
        def __init__(self):
            self.model = ImageNetModel()
-        def __call__(self, batch: pandas.DataFrame) -> pandas.DataFrame:
+        def __call__(self, batch: pd.DataFrame) -> pd.DataFrame:
            return self.model(batch)

    ds = ray.data.read_binary_files("s3://bucket/image-dir")
@ -283,7 +288,7 @@ Datasets can be split up into disjoint sub-datasets. Locality-aware splitting is
        def __init__(self, rank: int):
            pass

-        def train(self, shard: Dataset[int]) -> int:
+        def train(self, shard: ray.data.Dataset[int]) -> int:
            for batch in shard.iter_batches(batch_size=256):
                pass
            return shard.count()