ray/streaming/python/context.py
2020-02-25 10:33:33 +08:00

168 lines
4.8 KiB
Python

from abc import ABC, abstractmethod
from ray.streaming.datastream import StreamSource
from ray.streaming.function import LocalFileSourceFunction
from ray.streaming.function import CollectionSourceFunction
from ray.streaming.function import SourceFunction
from ray.streaming.runtime.gateway_client import GatewayClient
class StreamingContext:
"""
Main entry point for ray streaming functionality.
A StreamingContext is also a wrapper of java
`org.ray.streaming.api.context.StreamingContext`
"""
class Builder:
def __init__(self):
self._options = {}
def option(self, key=None, value=None, conf=None):
"""
Sets a config option. Options set using this method are
automatically propagated to :class:`StreamingContext`'s own
configuration.
Args:
key: a key name string for configuration property
value: a value string for configuration property
conf: multi key-value pairs as a dict
Returns:
self
"""
if key is not None:
assert value
self._options[key] = str(value)
if conf is not None:
for k, v in conf.items():
self._options[k] = v
return self
def build(self):
"""
Creates a StreamingContext based on the options set in this
builder.
"""
ctx = StreamingContext()
ctx._gateway_client.with_config(self._options)
return ctx
def __init__(self):
self.__gateway_client = GatewayClient()
self._j_ctx = self._gateway_client.create_streaming_context()
def source(self, source_func: SourceFunction):
"""Create an input data stream with a SourceFunction
Args:
source_func: the SourceFunction used to create the data stream
Returns:
The data stream constructed from the source_func
"""
return StreamSource.build_source(self, source_func)
def from_values(self, *values):
"""Creates a data stream from values
Args:
values: The elements to create the data stream from.
Returns:
The data stream representing the given values
"""
return self.from_collection(values)
def from_collection(self, values):
"""Creates a data stream from the given non-empty collection.
Args:
values: The collection of elements to create the data stream from.
Returns:
The data stream representing the given collection.
"""
assert values, "values shouldn't be None or empty"
func = CollectionSourceFunction(values)
return self.source(func)
def read_text_file(self, filename: str):
"""Reads the given file line-by-line and creates a data stream that
contains a string with the contents of each such line."""
func = LocalFileSourceFunction(filename)
return self.source(func)
def submit(self, job_name: str):
"""Submit job for execution.
Args:
job_name: name of the job
Returns:
An JobSubmissionResult future
"""
self._gateway_client.execute(job_name)
# TODO return a JobSubmissionResult future
def execute(self, job_name: str):
"""Execute the job. This method will block until job finished.
Args:
job_name: name of the job
"""
# TODO support block to job finish
# job_submit_result = self.submit(job_name)
# job_submit_result.wait_finish()
raise Exception("Unsupported")
@property
def _gateway_client(self):
return self.__gateway_client
class RuntimeContext(ABC):
@abstractmethod
def get_task_id(self):
"""
Returns:
Task id of the parallel task.
"""
pass
@abstractmethod
def get_task_index(self):
"""
Gets the index of this parallel subtask. The index starts from 0
and goes up to parallelism-1 (parallelism as returned by
`get_parallelism()`).
Returns:
The index of the parallel subtask.
"""
pass
@abstractmethod
def get_parallelism(self):
"""
Returns:
The parallelism with which the parallel task runs.
"""
pass
class RuntimeContextImpl(RuntimeContext):
def __init__(self, task_id, task_index, parallelism):
self.task_id = task_id
self.task_index = task_index
self.parallelism = parallelism
def get_task_id(self):
return self.task_id
def get_task_index(self):
return self.task_index
def get_parallelism(self):
return self.parallelism