import enum import logging import ray import ray.streaming.generated.remote_call_pb2 as remote_call_pb import ray.streaming.operator as operator import ray.streaming.partition as partition from ray._raylet import ActorID from ray.actor import ActorHandle from ray.streaming.config import Config from ray.streaming.generated.streaming_pb2 import Language logger = logging.getLogger(__name__) class NodeType(enum.Enum): """ SOURCE: Sources are where your program reads its input from TRANSFORM: Operators transform one or more DataStreams into a new DataStream. Programs can combine multiple transformations into sophisticated dataflow topologies. SINK: Sinks consume DataStreams and forward them to files, sockets, external systems, or print them. """ SOURCE = 0 TRANSFORM = 1 SINK = 2 class ExecutionEdge: def __init__(self, execution_edge_pb, language): self.source_execution_vertex_id = execution_edge_pb \ .source_execution_vertex_id self.target_execution_vertex_id = execution_edge_pb \ .target_execution_vertex_id partition_bytes = execution_edge_pb.partition # Sink node doesn't have partition function, # so we only deserialize partition_bytes when it's not None or empty if language == Language.PYTHON and partition_bytes: self.partition = partition.load_partition(partition_bytes) class ExecutionVertex: worker_actor: ActorHandle def __init__(self, execution_vertex_pb): self.execution_vertex_id = execution_vertex_pb.execution_vertex_id self.execution_job_vertex_id = execution_vertex_pb \ .execution_job_vertex_id self.execution_job_vertex_name = execution_vertex_pb \ .execution_job_vertex_name self.execution_vertex_index = execution_vertex_pb\ .execution_vertex_index self.parallelism = execution_vertex_pb.parallelism if execution_vertex_pb\ .language == Language.PYTHON: # python operator descriptor operator_bytes = execution_vertex_pb.operator if execution_vertex_pb.chained: logger.info("Load chained operator") self.stream_operator = operator.load_chained_operator( operator_bytes) else: logger.info("Load operator") self.stream_operator = operator.load_operator(operator_bytes) self.worker_actor = None if execution_vertex_pb.worker_actor: self.worker_actor = ray.actor.ActorHandle. \ _deserialization_helper(execution_vertex_pb.worker_actor) self.container_id = execution_vertex_pb.container_id self.build_time = execution_vertex_pb.build_time self.language = execution_vertex_pb.language self.config = execution_vertex_pb.config self.resource = execution_vertex_pb.resource @property def execution_vertex_name(self): return "{}_{}_{}".format(self.execution_job_vertex_id, self.execution_job_vertex_name, self.execution_vertex_id) class ExecutionVertexContext: actor_id: ActorID execution_vertex: ExecutionVertex def __init__( self, execution_vertex_context_pb: remote_call_pb.ExecutionVertexContext ): self.execution_vertex = ExecutionVertex( execution_vertex_context_pb.current_execution_vertex) self.job_name = self.execution_vertex.config[Config.STREAMING_JOB_NAME] self.exe_vertex_name = self.execution_vertex.execution_vertex_name self.actor_id = self.execution_vertex.worker_actor._ray_actor_id self.upstream_execution_vertices = [ ExecutionVertex(vertex) for vertex in execution_vertex_context_pb.upstream_execution_vertices ] self.downstream_execution_vertices = [ ExecutionVertex(vertex) for vertex in execution_vertex_context_pb.downstream_execution_vertices ] self.input_execution_edges = [ ExecutionEdge(edge, self.execution_vertex.language) for edge in execution_vertex_context_pb.input_execution_edges ] self.output_execution_edges = [ ExecutionEdge(edge, self.execution_vertex.language) for edge in execution_vertex_context_pb.output_execution_edges ] def get_parallelism(self): return self.execution_vertex.parallelism def get_upstream_parallelism(self): if self.upstream_execution_vertices: return self.upstream_execution_vertices[0].parallelism return 0 def get_downstream_parallelism(self): if self.downstream_execution_vertices: return self.downstream_execution_vertices[0].parallelism return 0 @property def build_time(self): return self.execution_vertex.build_time @property def stream_operator(self): return self.execution_vertex.stream_operator @property def config(self): return self.execution_vertex.config def get_task_id(self): return self.execution_vertex.execution_vertex_id def get_source_actor_by_execution_vertex_id(self, execution_vertex_id): for execution_vertex in self.upstream_execution_vertices: if execution_vertex.execution_vertex_id == execution_vertex_id: return execution_vertex.worker_actor raise Exception( "Vertex {} does not exist!".format(execution_vertex_id)) def get_target_actor_by_execution_vertex_id(self, execution_vertex_id): for execution_vertex in self.downstream_execution_vertices: if execution_vertex.execution_vertex_id == execution_vertex_id: return execution_vertex.worker_actor raise Exception( "Vertex {} does not exist!".format(execution_vertex_id))