Add imports to docs examples to make the code more runnable. (#17240)

This commit is contained in:
Clark Zinzow 2021-07-21 12:18:45 -06:00 committed by GitHub
parent afd59be8ca
commit b5194ca9f9
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23

View file

@ -131,6 +131,8 @@ Get started by creating Datasets from synthetic data using ``ray.data.range()``
.. code-block:: python
import ray
# Create a Dataset of Python objects.
ds = ray.data.range(10000)
# -> Dataset(num_rows=10000, num_blocks=200, schema=<class 'int'>)
@ -173,6 +175,9 @@ Finally, you can create a Dataset from existing data in the Ray object store or
.. code-block:: python
import pandas as pd
import dask.dataframe as dd
# Create a Dataset from a list of Pandas DataFrame objects.
pdf = pd.DataFrame({"one": [1, 2, 3], "two": ["a", "b", "c"]})
ds = ray.data.from_pandas([ray.put(pdf)])
@ -239,7 +244,7 @@ By default, transformations are executed using Ray tasks. For transformations th
class BatchInferModel:
def __init__(self):
self.model = ImageNetModel()
def __call__(self, batch: pandas.DataFrame) -> pandas.DataFrame:
def __call__(self, batch: pd.DataFrame) -> pd.DataFrame:
return self.model(batch)
ds = ray.data.read_binary_files("s3://bucket/image-dir")
@ -283,7 +288,7 @@ Datasets can be split up into disjoint sub-datasets. Locality-aware splitting is
def __init__(self, rank: int):
pass
def train(self, shard: Dataset[int]) -> int:
def train(self, shard: ray.data.Dataset[int]) -> int:
for batch in shard.iter_batches(batch_size=256):
pass
return shard.count()