[Streaming] Streaming data transfer java (#6474)

This commit is contained in:
Chaokun Yang 2019-12-22 10:56:05 +08:00 committed by Hao Chen
parent 1b14fbe179
commit 7bbfa85c66
146 changed files with 3923 additions and 786 deletions

8
.gitignore vendored
View file

@ -148,6 +148,14 @@ java/runtime/native_dependencies/
# streaming/python
streaming/python/generated/
streaming/java/streaming-runtime/src/main/java/org/ray/streaming/runtime/generated/
streaming/build/java
.clwb
streaming/**/.settings
streaming/java/**/target
streaming/java/**/.classpath
streaming/java/**/.project
streaming/java/**/*.log
# python virtual env
venv

View file

@ -35,11 +35,16 @@ matrix:
- ./java/test.sh
- os: linux
env: BAZEL_PYTHON_VERSION=PY3 PYTHON=3.5 PYTHONWARNINGS=ignore TESTSUITE=streaming
env:
- TESTSUITE=streaming
- JDK='Oracle JDK 8'
- RAY_INSTALL_JAVA=1
- BAZEL_PYTHON_VERSION=PY3
- PYTHON=3.5 PYTHONWARNINGS=ignore
install:
- python $TRAVIS_BUILD_DIR/ci/travis/determine_tests_to_run.py
- eval `python $TRAVIS_BUILD_DIR/ci/travis/determine_tests_to_run.py`
- if [ $RAY_CI_STREAMING_PYTHON_AFFECTED != "1" ]; then exit; fi
- if [[ $RAY_CI_STREAMING_PYTHON_AFFECTED != "1" && $RAY_CI_STREAMING_JAVA_AFFECTED != "1" ]]; then exit; fi
- ./ci/suppress_output ./ci/travis/install-bazel.sh
- ./ci/suppress_output ./ci/travis/install-dependencies.sh
- export PATH="$HOME/miniconda/bin:$PATH"
@ -47,7 +52,8 @@ matrix:
script:
# Streaming cpp test.
- if [ $RAY_CI_STREAMING_CPP_AFFECTED == "1" ]; then ./ci/suppress_output bash streaming/src/test/run_streaming_queue_test.sh; fi
- if [ RAY_CI_STREAMING_PYTHON_AFFECTED == "1" ]; then python -m pytest -v --durations=5 --timeout=300 python/ray/streaming/tests/; fi
- if [ $RAY_CI_STREAMING_PYTHON_AFFECTED == "1" ]; then python -m pytest -v --durations=5 --timeout=300 streaming/python/tests/; fi
- if [ $RAY_CI_STREAMING_JAVA_AFFECTED == "1" ]; then ./streaming/java/test.sh; fi
- os: linux
env: LINT=1 PYTHONWARNINGS=ignore

View file

@ -968,10 +968,23 @@ cc_binary(
"@bazel_tools//src/conditions:darwin": ["external/bazel_tools/tools/jdk/include/darwin"],
"//conditions:default": ["external/bazel_tools/tools/jdk/include/linux"],
}),
# Export ray ABI symbols, which can then be used by libstreaming_java.so. see `//:_raylet`
linkopts = select({
"@bazel_tools//src/conditions:darwin": [
"-Wl,-exported_symbols_list,$(location //:src/ray/ray_exported_symbols.lds)",
],
"@bazel_tools//src/conditions:windows": [
],
"//conditions:default": [
"-Wl,--version-script,$(location //:src/ray/ray_version_script.lds)",
],
}),
linkshared = 1,
linkstatic = 1,
deps = [
"//:core_worker_lib",
"//:src/ray/ray_exported_symbols.lds",
"//:src/ray/ray_version_script.lds",
],
)

View file

@ -47,7 +47,7 @@ def define_java_module(
)
checkstyle_test(
name = "org_ray_ray_" + name + "-checkstyle",
target = "//java:org_ray_ray_" + name,
target = ":org_ray_ray_" + name,
config = "//java:checkstyle.xml",
suppressions = "//java:checkstyle-suppressions.xml",
size = "small",
@ -63,7 +63,7 @@ def define_java_module(
)
checkstyle_test(
name = "org_ray_ray_" + name + "_test-checkstyle",
target = "//java:org_ray_ray_" + name + "_test",
target = ":org_ray_ray_" + name + "_test",
config = "//java:checkstyle.xml",
suppressions = "//java:checkstyle-suppressions.xml",
size = "small",

View file

@ -1,4 +1,5 @@
load("@com_github_ray_project_ray//java:dependencies.bzl", "gen_java_deps")
load("@com_github_ray_project_ray//streaming/java:dependencies.bzl", "gen_streaming_java_deps")
load("@com_github_nelhage_rules_boost//:boost/boost.bzl", "boost_deps")
load("@com_github_jupp0r_prometheus_cpp//bazel:repositories.bzl", "prometheus_cpp_repositories")
load("@com_github_checkstyle_java//:repo.bzl", "checkstyle_deps")
@ -9,6 +10,7 @@ load("@rules_proto_grpc//:repositories.bzl", "rules_proto_grpc_toolchains")
def ray_deps_build_all():
gen_java_deps()
gen_streaming_java_deps()
checkstyle_deps()
boost_deps()
prometheus_cpp_repositories()

View file

@ -45,6 +45,6 @@ done
pushd $ROOT_DIR/../..
BAZEL_FILES="bazel/BUILD bazel/BUILD.plasma bazel/ray.bzl BUILD.bazel
streaming/BUILD.bazel WORKSPACE"
streaming/BUILD.bazel streaming/java/BUILD.bazel WORKSPACE"
buildifier -mode=$RUN_TYPE -diff_command="diff -u" $BAZEL_FILES
popd

View file

@ -40,6 +40,7 @@ if __name__ == "__main__":
RAY_CI_MACOS_WHEELS_AFFECTED = 0
RAY_CI_STREAMING_CPP_AFFECTED = 0
RAY_CI_STREAMING_PYTHON_AFFECTED = 0
RAY_CI_STREAMING_JAVA_AFFECTED = 0
if os.environ["TRAVIS_EVENT_TYPE"] == "pull_request":
@ -76,6 +77,7 @@ if __name__ == "__main__":
RAY_CI_STREAMING_PYTHON_AFFECTED = 1
elif changed_file.startswith("java/"):
RAY_CI_JAVA_AFFECTED = 1
RAY_CI_STREAMING_JAVA_AFFECTED = 1
elif any(
changed_file.startswith(prefix)
for prefix in skip_prefix_list):
@ -91,11 +93,15 @@ if __name__ == "__main__":
RAY_CI_MACOS_WHEELS_AFFECTED = 1
RAY_CI_STREAMING_CPP_AFFECTED = 1
RAY_CI_STREAMING_PYTHON_AFFECTED = 1
RAY_CI_STREAMING_JAVA_AFFECTED = 1
elif changed_file.startswith("streaming/src"):
RAY_CI_STREAMING_CPP_AFFECTED = 1
RAY_CI_STREAMING_PYTHON_AFFECTED = 1
RAY_CI_STREAMING_JAVA_AFFECTED = 1
elif changed_file.startswith("streaming/python"):
RAY_CI_STREAMING_PYTHON_AFFECTED = 1
elif changed_file.startswith("streaming/java"):
RAY_CI_STREAMING_JAVA_AFFECTED = 1
else:
RAY_CI_TUNE_AFFECTED = 1
RAY_CI_RLLIB_AFFECTED = 1
@ -105,6 +111,8 @@ if __name__ == "__main__":
RAY_CI_LINUX_WHEELS_AFFECTED = 1
RAY_CI_MACOS_WHEELS_AFFECTED = 1
RAY_CI_STREAMING_CPP_AFFECTED = 1
RAY_CI_STREAMING_PYTHON_AFFECTED = 1
RAY_CI_STREAMING_JAVA_AFFECTED = 1
else:
RAY_CI_TUNE_AFFECTED = 1
RAY_CI_RLLIB_AFFECTED = 1
@ -114,6 +122,8 @@ if __name__ == "__main__":
RAY_CI_LINUX_WHEELS_AFFECTED = 1
RAY_CI_MACOS_WHEELS_AFFECTED = 1
RAY_CI_STREAMING_CPP_AFFECTED = 1
RAY_CI_STREAMING_PYTHON_AFFECTED = 1
RAY_CI_STREAMING_JAVA_AFFECTED = 1
# Log the modified environment variables visible in console.
for output_stream in [sys.stdout, sys.stderr]:
@ -132,3 +142,5 @@ if __name__ == "__main__":
.format(RAY_CI_STREAMING_CPP_AFFECTED))
_print("export RAY_CI_STREAMING_PYTHON_AFFECTED={}"
.format(RAY_CI_STREAMING_PYTHON_AFFECTED))
_print("export RAY_CI_STREAMING_JAVA_AFFECTED={}"
.format(RAY_CI_STREAMING_JAVA_AFFECTED))

View file

@ -5,7 +5,6 @@ exports_files([
"testng.xml",
"checkstyle.xml",
"checkstyle-suppressions.xml",
"streaming/testng.xml",
])
all_modules = [
@ -13,7 +12,6 @@ all_modules = [
"runtime",
"test",
"tutorial",
"streaming",
]
java_import(
@ -25,14 +23,11 @@ java_import(
] + [
"all_tests_deploy.jar",
"all_tests_deploy-src.jar",
"streaming_tests_deploy.jar",
"streaming_tests_deploy-src.jar",
],
deps = [
":org_ray_ray_" + module for module in all_modules
] + [
":all_tests",
":streaming_tests",
],
)
@ -45,6 +40,7 @@ define_java_module(
"@maven//:com_sun_xml_bind_jaxb_core",
"@maven//:com_sun_xml_bind_jaxb_impl",
],
visibility = ["//visibility:public"]
)
define_java_module(
@ -79,7 +75,9 @@ define_java_module(
"@maven//:org_slf4j_slf4j_api",
"@maven//:org_slf4j_slf4j_log4j12",
"@maven//:redis_clients_jedis",
"@maven//:net_java_dev_jna_jna",
],
visibility = ["//visibility:public"]
)
define_java_module(
@ -107,28 +105,6 @@ define_java_module(
],
)
define_java_module(
name = "streaming",
deps = [
":org_ray_ray_api",
":org_ray_ray_runtime",
"@maven//:com_google_guava_guava",
"@maven//:org_slf4j_slf4j_api",
"@maven//:org_slf4j_slf4j_log4j12",
],
define_test_lib = True,
test_deps = [
":org_ray_ray_api",
":org_ray_ray_runtime",
":org_ray_ray_streaming",
"@maven//:com_beust_jcommander",
"@maven//:com_google_guava_guava",
"@maven//:org_slf4j_slf4j_api",
"@maven//:org_slf4j_slf4j_log4j12",
"@maven//:org_testng_testng",
],
)
java_binary(
name = "all_tests",
main_class = "org.testng.TestNG",
@ -140,16 +116,6 @@ java_binary(
],
)
java_binary(
name = "streaming_tests",
main_class = "org.testng.TestNG",
data = ["streaming/testng.xml"],
args = ["java/streaming/testng.xml"],
runtime_deps = [
":org_ray_ray_streaming_test",
],
)
java_proto_compile(
name = "common_java_proto",
deps = ["@//:common_proto"],
@ -236,7 +202,6 @@ genrule(
cp -f $(location //java:org_ray_ray_runtime_pom) $$WORK_DIR/java/runtime/pom.xml
cp -f $(location //java:org_ray_ray_tutorial_pom) $$WORK_DIR/java/tutorial/pom.xml
cp -f $(location //java:org_ray_ray_test_pom) $$WORK_DIR/java/test/pom.xml
cp -f $(location //java:org_ray_ray_streaming_pom) $$WORK_DIR/java/streaming/pom.xml
echo $$(date) > $@
""",
local = 1,

View file

@ -18,8 +18,9 @@ def gen_java_deps():
"org.slf4j:slf4j-log4j12:1.7.25",
"org.testng:testng:6.9.10",
"redis.clients:jedis:2.8.0",
"net.java.dev.jna:jna:5.5.0"
],
repositories = [
"https://repo1.maven.org/maven2",
"https://repo1.maven.org/maven2/",
],
)

View file

@ -12,7 +12,6 @@
<module>api</module>
<module>runtime</module>
<module>test</module>
<module>streaming</module>
<module>tutorial</module>
</modules>

View file

@ -52,6 +52,11 @@
<artifactId>fst</artifactId>
<version>2.57</version>
</dependency>
<dependency>
<groupId>net.java.dev.jna</groupId>
<artifactId>jna</artifactId>
<version>5.5.0</version>
</dependency>
<dependency>
<groupId>org.apache.commons</groupId>
<artifactId>commons-lang3</artifactId>

View file

@ -1,10 +1,8 @@
package org.ray.runtime;
import com.google.common.base.Preconditions;
import com.google.common.base.Strings;
import java.io.File;
import java.io.IOException;
import java.lang.reflect.Field;
import java.util.HashMap;
import java.util.Map;
import org.apache.commons.io.FileUtils;
@ -22,7 +20,7 @@ import org.ray.runtime.runner.RunManager;
import org.ray.runtime.task.NativeTaskExecutor;
import org.ray.runtime.task.NativeTaskSubmitter;
import org.ray.runtime.task.TaskExecutor;
import org.ray.runtime.util.FileUtil;
import org.ray.runtime.util.JniUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@ -42,16 +40,11 @@ public final class RayNativeRuntime extends AbstractRayRuntime {
static {
LOGGER.debug("Loading native libraries.");
// Load native libraries.
String[] libraries = new String[]{"core_worker_library_java"};
for (String library : libraries) {
String fileName = System.mapLibraryName(library);
try (FileUtil.TempFile libFile = FileUtil.getTempFileFromResource(fileName)) {
System.load(libFile.getFile().getAbsolutePath());
}
// Expose ray ABI symbols which may be depended by other shared
// libraries such as libstreaming_java.so.
// See BUILD.bazel:libcore_worker_library_java.so
JniUtils.loadLibrary("core_worker_library_java", true);
LOGGER.debug("Native libraries loaded.");
}
RayConfig globalRayConfig = RayConfig.create();
resetLibraryPath(globalRayConfig);
@ -65,30 +58,9 @@ public final class RayNativeRuntime extends AbstractRayRuntime {
}
private static void resetLibraryPath(RayConfig rayConfig) {
if (rayConfig.libraryPath.isEmpty()) {
return;
}
String path = System.getProperty("java.library.path");
if (Strings.isNullOrEmpty(path)) {
path = "";
} else {
path += ":";
}
path += String.join(":", rayConfig.libraryPath);
// This is a hack to reset library path at runtime,
// see https://stackoverflow.com/questions/15409223/.
System.setProperty("java.library.path", path);
// Set sys_paths to null so that java.library.path will be re-evaluated next time it is needed.
final Field sysPathsField;
try {
sysPathsField = ClassLoader.class.getDeclaredField("sys_paths");
sysPathsField.setAccessible(true);
sysPathsField.set(null, null);
} catch (NoSuchFieldException | IllegalAccessException e) {
LOGGER.error("Failed to set library path.", e);
}
String separator = System.getProperty("path.separator");
String libraryPath = String.join(separator, rayConfig.libraryPath);
JniUtils.resetLibraryPath(libraryPath);
}
public RayNativeRuntime(RayConfig rayConfig, FunctionManager functionManager) {

View file

@ -17,6 +17,9 @@ public class DefaultWorker {
public static void main(String[] args) {
try {
System.setProperty("ray.worker.mode", "WORKER");
// Set run-mode to `CLUSTER` explicitly, to prevent the DefaultWorker to receive
// a wrong run-mode parameter through jvm options.
System.setProperty("ray.run-mode", "CLUSTER");
Thread.setDefaultUncaughtExceptionHandler((Thread t, Throwable e) -> {
LOGGER.error("Uncaught worker exception in thread {}: {}", t, e);
});

View file

@ -0,0 +1,84 @@
package org.ray.runtime.util;
import com.google.common.base.Strings;
import com.google.common.collect.Sets;
import com.sun.jna.NativeLibrary;
import java.lang.reflect.Field;
import java.util.Set;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
public class JniUtils {
private static final Logger LOGGER = LoggerFactory.getLogger(JniUtils.class);
private static Set<String> loadedLibs = Sets.newHashSet();
/**
* Loads the native library specified by the <code>libraryName</code> argument.
* The <code>libraryName</code> argument must not contain any platform specific
* prefix, file extension or path.
*
* @param libraryName the name of the library.
*/
public static synchronized void loadLibrary(String libraryName) {
loadLibrary(libraryName, false);
}
/**
* Loads the native library specified by the <code>libraryName</code> argument.
* The <code>libraryName</code> argument must not contain any platform specific
* prefix, file extension or path.
*
* @param libraryName the name of the library.
* @param exportSymbols export symbols of library so that it can be used by other libs.
*/
public static synchronized void loadLibrary(String libraryName, boolean exportSymbols) {
if (!loadedLibs.contains(libraryName)) {
LOGGER.debug("Loading native library {}.", libraryName);
// Load native library.
String fileName = System.mapLibraryName(libraryName);
String libPath = null;
try (FileUtil.TempFile libFile = FileUtil.getTempFileFromResource(fileName)) {
libPath = libFile.getFile().getAbsolutePath();
if (exportSymbols) {
// Expose library symbols using RTLD_GLOBAL which may be depended by other shared
// libraries.
NativeLibrary.getInstance(libFile.getFile().getAbsolutePath());
}
System.load(libPath);
}
LOGGER.debug("Native library loaded.");
resetLibraryPath(libPath);
loadedLibs.add(libraryName);
}
}
/**
* This is a hack to reset library path at runtime. Please don't use it outside of ray
*/
public static synchronized void resetLibraryPath(String libPath) {
if (Strings.isNullOrEmpty(libPath)) {
return;
}
String path = System.getProperty("java.library.path");
String separator = System.getProperty("path.separator");
if (Strings.isNullOrEmpty(path)) {
path = "";
} else {
path += separator;
}
path += String.join(separator, libPath);
// This is a hack to reset library path at runtime,
// see https://stackoverflow.com/questions/15409223/.
System.setProperty("java.library.path", path);
// Set sys_paths to null so that java.library.path will be re-evaluated next time it is needed.
final Field sysPathsField;
try {
sysPathsField = ClassLoader.class.getDeclaredField("sys_paths");
sysPathsField.setAccessible(true);
sysPathsField.set(null, null);
} catch (NoSuchFieldException | IllegalAccessException e) {
LOGGER.error("Failed to set library path.", e);
}
}
}

View file

@ -1,21 +0,0 @@
package org.ray.streaming.api.partition;
import org.ray.streaming.api.function.Function;
/**
* Interface of the partitioning strategy.
* @param <T> Type of the input data.
*/
@FunctionalInterface
public interface Partition<T> extends Function {
/**
* Given a record and downstream tasks, determine which task(s) should receive the record.
*
* @param record The record.
* @param taskIds IDs of all downstream tasks.
* @return IDs of the downstream tasks that should receive the record.
*/
int[] partition(T record, int[] taskIds);
}

View file

@ -1,17 +0,0 @@
package org.ray.streaming.api.partition.impl;
import org.ray.streaming.api.partition.Partition;
/**
* Broadcast the record to all downstream tasks.
*/
public class BroadcastPartition<T> implements Partition<T> {
public BroadcastPartition() {
}
@Override
public int[] partition(T value, int[] taskIds) {
return taskIds;
}
}

View file

@ -1,20 +0,0 @@
package org.ray.streaming.cluster;
import java.util.ArrayList;
import java.util.List;
import org.ray.api.Ray;
import org.ray.api.RayActor;
import org.ray.streaming.core.runtime.StreamWorker;
public class ResourceManager {
public List<RayActor<StreamWorker>> createWorker(int workerNum) {
List<RayActor<StreamWorker>> workers = new ArrayList<>();
for (int i = 0; i < workerNum; i++) {
RayActor<StreamWorker> worker = Ray.createActor(StreamWorker::new);
workers.add(worker);
}
return workers;
}
}

View file

@ -1,101 +0,0 @@
package org.ray.streaming.core.processor;
import java.io.Serializable;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.stream.Collectors;
import org.ray.streaming.api.collector.Collector;
import org.ray.streaming.core.command.BatchInfo;
import org.ray.streaming.core.graph.ExecutionGraph;
import org.ray.streaming.core.graph.ExecutionNode;
import org.ray.streaming.core.graph.ExecutionNode.NodeType;
import org.ray.streaming.core.graph.ExecutionTask;
import org.ray.streaming.core.runtime.context.RuntimeContext;
import org.ray.streaming.message.Record;
import org.ray.streaming.operator.impl.MasterOperator;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* MasterProcessor is responsible for overall control logic.
*/
public class MasterProcessor extends StreamProcessor<BatchInfo, MasterOperator> {
private static final Logger LOGGER = LoggerFactory.getLogger(MasterProcessor.class);
private Thread batchControllerThread;
private long maxBatch;
public MasterProcessor(MasterOperator masterOperator) {
super(masterOperator);
}
public void open(List<Collector> collectors, RuntimeContext runtimeContext,
ExecutionGraph executionGraph) {
super.open(collectors, runtimeContext);
this.maxBatch = runtimeContext.getMaxBatch();
startBatchController(executionGraph);
}
private void startBatchController(ExecutionGraph executionGraph) {
BatchController batchController = new BatchController(maxBatch, collectors);
List<Integer> sinkTasks = new ArrayList<>();
for (ExecutionNode executionNode : executionGraph.getExecutionNodeList()) {
if (executionNode.getNodeType() == NodeType.SINK) {
List<Integer> nodeTasks = executionNode.getExecutionTaskList().stream()
.map(ExecutionTask::getTaskId).collect(Collectors.toList());
sinkTasks.addAll(nodeTasks);
}
}
batchControllerThread = new Thread(batchController, "controller-thread");
batchControllerThread.start();
}
@Override
public void process(BatchInfo executionGraph) {
}
@Override
public void close() {
}
static class BatchController implements Runnable, Serializable {
private AtomicInteger batchId;
private List<Collector> collectors;
private Map<Integer, Integer> sinkBatchMap;
private Integer frequency;
private long maxBatch;
public BatchController(long maxBatch, List<Collector> collectors) {
this.batchId = new AtomicInteger(0);
this.maxBatch = maxBatch;
this.collectors = collectors;
// TODO(zhenxuanpan): Use config to set.
this.frequency = 1000;
}
@Override
public void run() {
while (batchId.get() < maxBatch) {
try {
Record record = new Record<>(new BatchInfo(batchId.getAndIncrement()));
for (Collector collector : collectors) {
collector.collect(record);
}
Thread.sleep(frequency);
} catch (Exception e) {
LOGGER.error(e.getMessage(), e);
}
}
}
}
}

View file

@ -1,25 +0,0 @@
package org.ray.streaming.core.processor;
import org.ray.streaming.operator.impl.SourceOperator;
/**
* The processor for the stream sources, containing a SourceOperator.
*
* @param <T> The type of source data.
*/
public class SourceProcessor<T> extends StreamProcessor<Long, SourceOperator<T>> {
public SourceProcessor(SourceOperator<T> operator) {
super(operator);
}
@Override
public void process(Long batchId) {
this.operator.process(batchId);
}
@Override
public void close() {
}
}

View file

@ -1,86 +0,0 @@
package org.ray.streaming.core.runtime;
import java.io.Serializable;
import java.util.ArrayList;
import java.util.List;
import org.ray.api.annotation.RayRemote;
import org.ray.streaming.api.collector.Collector;
import org.ray.streaming.core.command.BatchInfo;
import org.ray.streaming.core.graph.ExecutionEdge;
import org.ray.streaming.core.graph.ExecutionGraph;
import org.ray.streaming.core.graph.ExecutionNode;
import org.ray.streaming.core.graph.ExecutionNode.NodeType;
import org.ray.streaming.core.graph.ExecutionTask;
import org.ray.streaming.core.processor.MasterProcessor;
import org.ray.streaming.core.processor.StreamProcessor;
import org.ray.streaming.core.runtime.collector.RayCallCollector;
import org.ray.streaming.core.runtime.context.RayRuntimeContext;
import org.ray.streaming.core.runtime.context.RuntimeContext;
import org.ray.streaming.core.runtime.context.WorkerContext;
import org.ray.streaming.message.Message;
import org.ray.streaming.message.Record;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* The stream worker, it is a ray actor.
*/
@RayRemote
public class StreamWorker implements Serializable {
private static final Logger LOGGER = LoggerFactory.getLogger(StreamWorker.class);
private int taskId;
private WorkerContext workerContext;
private StreamProcessor streamProcessor;
private NodeType nodeType;
public StreamWorker() {
}
public Boolean init(WorkerContext workerContext) {
this.workerContext = workerContext;
this.taskId = workerContext.getTaskId();
ExecutionGraph executionGraph = this.workerContext.getExecutionGraph();
ExecutionTask executionTask = executionGraph.getExecutionTaskByTaskId(taskId);
ExecutionNode executionNode = executionGraph.getExecutionNodeByTaskId(taskId);
this.nodeType = executionNode.getNodeType();
this.streamProcessor = executionNode.getStreamProcessor();
LOGGER.debug("Initializing StreamWorker, taskId: {}, operator: {}.", taskId, streamProcessor);
List<ExecutionEdge> executionEdges = executionNode.getExecutionEdgeList();
List<Collector> collectors = new ArrayList<>();
for (ExecutionEdge executionEdge : executionEdges) {
collectors.add(new RayCallCollector(taskId, executionEdge, executionGraph));
}
RuntimeContext runtimeContext = new RayRuntimeContext(executionTask, workerContext.getConfig(),
executionNode.getParallelism());
if (this.nodeType == NodeType.MASTER) {
((MasterProcessor) streamProcessor).open(collectors, runtimeContext, executionGraph);
} else {
this.streamProcessor.open(collectors, runtimeContext);
}
return true;
}
public Boolean process(Message message) {
LOGGER.debug("Processing message, taskId: {}, message: {}.", taskId, message);
if (nodeType == NodeType.SOURCE) {
Record record = message.getRecord(0);
BatchInfo batchInfo = (BatchInfo) record.getValue();
this.streamProcessor.process(batchInfo.getBatchId());
} else {
List<Record> records = message.getRecordList();
for (Record record : records) {
record.setBatchId(message.getBatchId());
record.setStream(message.getStream());
this.streamProcessor.process(record);
}
}
return true;
}
}

View file

@ -1,58 +0,0 @@
package org.ray.streaming.core.runtime.collector;
import java.util.Arrays;
import java.util.Map;
import org.ray.api.Ray;
import org.ray.api.RayActor;
import org.ray.streaming.api.collector.Collector;
import org.ray.streaming.api.partition.Partition;
import org.ray.streaming.core.graph.ExecutionEdge;
import org.ray.streaming.core.graph.ExecutionGraph;
import org.ray.streaming.core.runtime.StreamWorker;
import org.ray.streaming.message.Message;
import org.ray.streaming.message.Record;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* The collector that emits data via Ray remote calls.
*/
public class RayCallCollector implements Collector<Record> {
private static final Logger LOGGER = LoggerFactory.getLogger(RayCallCollector.class);
private int taskId;
private String stream;
private Map<Integer, RayActor<StreamWorker>> taskId2Worker;
private int[] targetTaskIds;
private Partition partition;
public RayCallCollector(int taskId, ExecutionEdge executionEdge, ExecutionGraph executionGraph) {
this.taskId = taskId;
this.stream = executionEdge.getStream();
int targetNodeId = executionEdge.getTargetNodeId();
taskId2Worker = executionGraph
.getTaskId2WorkerByNodeId(targetNodeId);
targetTaskIds = Arrays.stream(taskId2Worker.keySet()
.toArray(new Integer[taskId2Worker.size()]))
.mapToInt(Integer::valueOf).toArray();
this.partition = executionEdge.getPartition();
LOGGER.debug("RayCallCollector constructed, taskId:{}, add stream:{}, partition:{}.",
taskId, stream, this.partition);
}
@Override
public void collect(Record record) {
int[] taskIds = this.partition.partition(record, targetTaskIds);
LOGGER.debug("Sending data from task {} to remote tasks {}, collector stream:{}, record:{}",
taskId, taskIds, stream, record);
Message message = new Message(taskId, record.getBatchId(), stream, record);
for (int targetTaskId : taskIds) {
RayActor<StreamWorker> streamWorker = this.taskId2Worker.get(targetTaskId);
// Use ray call to send message to downstream actor.
Ray.call(StreamWorker::process, streamWorker, message);
}
}
}

View file

@ -1,17 +0,0 @@
package org.ray.streaming.operator.impl;
import org.ray.streaming.operator.OperatorType;
import org.ray.streaming.operator.StreamOperator;
public class MasterOperator extends StreamOperator {
public MasterOperator() {
super(null);
}
@Override
public OperatorType getOpType() {
return OperatorType.MASTER;
}
}

View file

@ -1,93 +0,0 @@
package org.ray.streaming.schedule.impl;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import org.ray.api.Ray;
import org.ray.api.RayActor;
import org.ray.api.RayObject;
import org.ray.streaming.api.partition.impl.BroadcastPartition;
import org.ray.streaming.cluster.ResourceManager;
import org.ray.streaming.core.graph.ExecutionGraph;
import org.ray.streaming.core.graph.ExecutionNode;
import org.ray.streaming.core.graph.ExecutionNode.NodeType;
import org.ray.streaming.core.graph.ExecutionTask;
import org.ray.streaming.core.runtime.StreamWorker;
import org.ray.streaming.core.runtime.context.WorkerContext;
import org.ray.streaming.operator.impl.MasterOperator;
import org.ray.streaming.plan.Plan;
import org.ray.streaming.plan.PlanEdge;
import org.ray.streaming.plan.PlanVertex;
import org.ray.streaming.plan.VertexType;
import org.ray.streaming.schedule.IJobSchedule;
import org.ray.streaming.schedule.ITaskAssign;
public class JobScheduleImpl implements IJobSchedule {
private Plan plan;
private Map<String, Object> jobConfig;
private ResourceManager resourceManager;
private ITaskAssign taskAssign;
public JobScheduleImpl(Map<String, Object> jobConfig) {
this.resourceManager = new ResourceManager();
this.taskAssign = new TaskAssignImpl();
this.jobConfig = jobConfig;
}
/**
* Schedule physical plan to execution graph, and call streaming worker to init and run.
*/
@Override
public void schedule(Plan plan) {
this.plan = plan;
addJobMaster(plan);
List<RayActor<StreamWorker>> workers = this.resourceManager.createWorker(getPlanWorker());
ExecutionGraph executionGraph = this.taskAssign.assign(this.plan, workers);
List<ExecutionNode> executionNodes = executionGraph.getExecutionNodeList();
List<RayObject<Boolean>> waits = new ArrayList<>();
ExecutionTask masterTask = null;
for (ExecutionNode executionNode : executionNodes) {
List<ExecutionTask> executionTasks = executionNode.getExecutionTaskList();
for (ExecutionTask executionTask : executionTasks) {
if (executionNode.getNodeType() != NodeType.MASTER) {
Integer taskId = executionTask.getTaskId();
RayActor<StreamWorker> streamWorker = executionTask.getWorker();
waits.add(Ray.call(StreamWorker::init, streamWorker,
new WorkerContext(taskId, executionGraph, jobConfig)));
} else {
masterTask = executionTask;
}
}
}
Ray.wait(waits);
Integer masterId = masterTask.getTaskId();
RayActor<StreamWorker> masterWorker = masterTask.getWorker();
Ray.call(StreamWorker::init, masterWorker,
new WorkerContext(masterId, executionGraph, jobConfig)).get();
}
private void addJobMaster(Plan plan) {
int masterVertexId = 0;
int masterParallelism = 1;
PlanVertex masterVertex = new PlanVertex(masterVertexId, masterParallelism, VertexType.MASTER,
new MasterOperator());
plan.getPlanVertexList().add(masterVertex);
List<PlanVertex> planVertices = plan.getPlanVertexList();
for (PlanVertex planVertex : planVertices) {
if (planVertex.getVertexType() == VertexType.SOURCE) {
PlanEdge planEdge = new PlanEdge(masterVertexId, planVertex.getVertexId(),
new BroadcastPartition());
plan.getPlanEdgeList().add(planEdge);
}
}
}
private int getPlanWorker() {
List<PlanVertex> planVertexList = plan.getPlanVertexList();
return planVertexList.stream().map(vertex -> vertex.getParallelism()).reduce(0, Integer::sum);
}
}

View file

@ -1,10 +0,0 @@
package org.ray.streaming.util;
public class ConfigKey {
/**
* Maximum number of batches to run in a streaming job.
*/
public static final String STREAMING_MAX_BATCH_COUNT = "streaming.max.batch.count";
}

View file

@ -1,60 +0,0 @@
package org.ray.streaming.schedule.impl;
import org.ray.api.id.ActorId;
import org.ray.api.id.ObjectId;
import org.ray.runtime.actor.LocalModeRayActor;
import org.ray.streaming.api.partition.impl.RoundRobinPartition;
import org.ray.streaming.core.graph.ExecutionEdge;
import org.ray.streaming.core.graph.ExecutionGraph;
import org.ray.streaming.core.graph.ExecutionNode;
import org.ray.streaming.core.graph.ExecutionNode.NodeType;
import org.ray.streaming.core.runtime.StreamWorker;
import org.ray.streaming.plan.Plan;
import org.ray.streaming.plan.PlanBuilderTest;
import org.ray.streaming.schedule.ITaskAssign;
import java.util.ArrayList;
import java.util.List;
import org.ray.api.RayActor;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.testng.Assert;
import org.testng.annotations.Test;
public class TaskAssignImplTest {
private static final Logger LOGGER = LoggerFactory.getLogger(TaskAssignImplTest.class);
@Test
public void testTaskAssignImpl() {
PlanBuilderTest planBuilderTest = new PlanBuilderTest();
Plan plan = planBuilderTest.buildDataSyncPlan();
List<RayActor<StreamWorker>> workers = new ArrayList<>();
for(int i = 0; i < plan.getPlanVertexList().size(); i++) {
workers.add(new LocalModeRayActor(ActorId.fromRandom(), ObjectId.fromRandom()));
}
ITaskAssign taskAssign = new TaskAssignImpl();
ExecutionGraph executionGraph = taskAssign.assign(plan, workers);
List<ExecutionNode> executionNodeList = executionGraph.getExecutionNodeList();
Assert.assertEquals(executionNodeList.size(), 2);
ExecutionNode sourceNode = executionNodeList.get(0);
Assert.assertEquals(sourceNode.getNodeType(), NodeType.SOURCE);
Assert.assertEquals(sourceNode.getExecutionTaskList().size(), 1);
Assert.assertEquals(sourceNode.getExecutionEdgeList().size(), 1);
List<ExecutionEdge> sourceExecutionEdges = sourceNode.getExecutionEdgeList();
Assert.assertEquals(sourceExecutionEdges.size(), 1);
ExecutionEdge source2Sink = sourceExecutionEdges.get(0);
Assert.assertEquals(source2Sink.getPartition().getClass(), RoundRobinPartition.class);
ExecutionNode sinkNode = executionNodeList.get(1);
Assert.assertEquals(sinkNode.getNodeType(), NodeType.SINK);
Assert.assertEquals(sinkNode.getExecutionTaskList().size(), 1);
Assert.assertEquals(sinkNode.getExecutionEdgeList().size(), 0);
}
}

View file

@ -34,9 +34,6 @@ echo "Running tests under single-process mode."
# bazel test //java:all_tests --jvmopt="-Dray.run-mode=SINGLE_PROCESS" --test_output="errors" || single_exit_code=$?
run_testng java -Dray.run-mode="SINGLE_PROCESS" -cp $ROOT_DIR/../bazel-bin/java/all_tests_deploy.jar org.testng.TestNG -d /tmp/ray_java_test_output $ROOT_DIR/testng.xml
echo "Running streaming tests."
run_testng java -cp $ROOT_DIR/../bazel-bin/java/streaming_tests_deploy.jar org.testng.TestNG -d /tmp/ray_java_test_output $ROOT_DIR/streaming/testng.xml
popd
pushd $ROOT_DIR

View file

@ -29,12 +29,14 @@ ray_files = [
"ray/core/src/ray/raylet/raylet_monitor",
"ray/core/src/ray/raylet/raylet",
"ray/dashboard/dashboard.py",
"ray/streaming/_streaming.so",
]
# These are the directories where automatically generated Python protobuf
# bindings are created.
generated_python_directories = [
"ray/core/generated",
"ray/streaming/generated",
]
optional_ray_files = []

View file

@ -25,3 +25,4 @@
*PyInit*
*init_raylet*
*Java*
*JNI_*

View file

@ -27,5 +27,6 @@ VERSION_1.0 {
*PyInit*;
*init_raylet*;
*Java*;
*JNI_*;
local: *;
};

View file

@ -235,6 +235,7 @@ genrule(
GENERATED_DIR=$$WORK_DIR/streaming/python/generated
rm -rf $$GENERATED_DIR
mkdir -p $$GENERATED_DIR
touch $$GENERATED_DIR/__init__.py
for f in $(locations //streaming:streaming_py_proto); do
cp $$f $$GENERATED_DIR
done
@ -243,3 +244,43 @@ genrule(
local = 1,
visibility = ["//visibility:public"],
)
# Streaming java
genrule(
name = "copy_jni_h",
srcs = ["@bazel_tools//tools/jdk:jni_header"],
outs = ["jni.h"],
cmd = "cp -f $< $@",
)
genrule(
name = "copy_jni_md_h",
srcs = select({
"@bazel_tools//src/conditions:windows": ["@bazel_tools//tools/jdk:jni_md_header-windows"],
"@bazel_tools//src/conditions:darwin": ["@bazel_tools//tools/jdk:jni_md_header-darwin"],
"//conditions:default": ["@bazel_tools//tools/jdk:jni_md_header-linux"],
}),
outs = ["jni_md.h"],
cmd = "cp -f $< $@",
)
cc_binary(
name = "libstreaming_java.so",
srcs = glob([
"src/lib/java/*.cc",
"src/lib/java/*.h",
]) + [
":jni.h", # needed for `include "jni.h"`
":jni_md.h",
],
includes = [
".", # needed for `include <jni.h>`
"src",
],
linkshared = 1,
linkstatic = 1,
visibility = ["//visibility:public"],
deps = [
":streaming_lib",
],
)

View file

@ -2,27 +2,32 @@
1. Build streaming java
* build ray
* `sh build.sh -l java`
* `cd java && mvn clean install -Dmaven.test.skip=true`
* `bazel build //java:gen_maven_deps`
* `cd java && mvn clean install -Dmaven.test.skip=true && cd ..`
* build streaming
* `cd ray/streaming/java && bazel build all_modules`
* `bazel build //streaming/java:gen_maven_deps`
* `mvn clean install -Dmaven.test.skip=true`
2. Build ray will build ray streaming python.
2. Build ray python will build ray streaming python.
3. Run examples
```bash
# c++ test
cd streaming/ && bazel test ...
sh src/test/run_streaming_queue_test.sh
cd ..
```bash
# c++ test
cd streaming/ && bazel test ...
sh src/test/run_streaming_queue_test.sh
cd ..
# python test
cd python/ray/streaming/
pushd examples
python simple.py --input-file toy.txt
popd
pushd tests
pytest .
popd
```
# python test
pushd python/ray/streaming/
pushd examples
python simple.py --input-file toy.txt
popd
pushd tests
pytest .
popd
popd
# java test
cd streaming/java/streaming-runtime
mvn test
```

213
streaming/java/BUILD.bazel Normal file
View file

@ -0,0 +1,213 @@
load("//bazel:ray.bzl", "define_java_module")
load("@rules_proto_grpc//java:defs.bzl", "java_proto_compile")
exports_files([
"testng.xml",
])
all_modules = [
"streaming-api",
"streaming-runtime",
]
java_import(
name = "all_modules",
jars = [
"liborg_ray_ray_" + module + ".jar"
for module in all_modules
] + [
"liborg_ray_ray_" + module + "-src.jar"
for module in all_modules
] + [
"all_streaming_tests_deploy.jar",
"all_streaming_tests_deploy-src.jar",
],
deps = [
":org_ray_ray_" + module
for module in all_modules
] + [
":all_streaming_tests",
],
)
define_java_module(
name = "streaming-api",
define_test_lib = True,
test_deps = [
"//java:org_ray_ray_api",
":org_ray_ray_streaming-api",
"@ray_streaming_maven//:com_google_guava_guava",
"@ray_streaming_maven//:org_slf4j_slf4j_api",
"@ray_streaming_maven//:org_slf4j_slf4j_log4j12",
"@ray_streaming_maven//:org_testng_testng",
],
visibility = ["//visibility:public"],
deps = [
"@ray_streaming_maven//:com_google_guava_guava",
"@ray_streaming_maven//:org_slf4j_slf4j_api",
"@ray_streaming_maven//:org_slf4j_slf4j_log4j12",
],
)
# `//streaming:streaming_java` will be located in jar `streaming` directory,
# but we need it located in jar root path.
# resource_strip_prefix = "streaming" will make other resources file located in wrong path.
# So we copy libs explicitly to remove `streaming` path.
filegroup(
name = "java_native_deps",
srcs = [":streaming_java"],
)
filegroup(
name = "streaming_java",
srcs = select({
"@bazel_tools//src/conditions:darwin": [":streaming_java_darwin"],
"//conditions:default": [":streaming_java_linux"],
}),
visibility = ["//visibility:public"],
)
genrule(
name = "streaming_java_darwin",
srcs = ["//streaming:libstreaming_java.so"],
outs = ["libstreaming_java.dylib"],
cmd = "cp $< $@",
output_to_bindir = 1,
)
genrule(
name = "streaming_java_linux",
srcs = ["//streaming:libstreaming_java.so"],
outs = ["libstreaming_java.so"],
cmd = "cp $< $@",
output_to_bindir = 1,
)
define_java_module(
name = "streaming-runtime",
additional_resources = [
":java_native_deps",
],
additional_srcs = [
":all_java_proto",
],
define_test_lib = True,
exclude_srcs = [
"streaming-runtime/src/main/java/org/ray/streaming/runtime/generated/*.java",
],
test_deps = [
"//java:org_ray_ray_api",
"//java:org_ray_ray_runtime",
":org_ray_ray_streaming-api",
":org_ray_ray_streaming-runtime",
"@ray_streaming_maven//:com_google_guava_guava",
"@ray_streaming_maven//:org_slf4j_slf4j_api",
"@ray_streaming_maven//:org_slf4j_slf4j_log4j12",
"@ray_streaming_maven//:org_testng_testng",
],
visibility = ["//visibility:public"],
deps = [
":org_ray_ray_streaming-api",
"//java:org_ray_ray_api",
"//java:org_ray_ray_runtime",
"@ray_streaming_maven//:com_github_davidmoten_flatbuffers_java",
"@ray_streaming_maven//:com_google_guava_guava",
"@ray_streaming_maven//:com_google_protobuf_protobuf_java",
"@ray_streaming_maven//:org_slf4j_slf4j_api",
"@ray_streaming_maven//:org_slf4j_slf4j_log4j12",
],
)
java_binary(
name = "all_streaming_tests",
args = ["streaming/java/testng.xml"],
data = ["testng.xml"],
main_class = "org.testng.TestNG",
runtime_deps = [
":org_ray_ray_streaming-api_test",
":org_ray_ray_streaming-runtime",
":org_ray_ray_streaming-runtime_test",
"//java:org_ray_ray_runtime",
"@ray_streaming_maven//:com_beust_jcommander",
"@ray_streaming_maven//:org_testng_testng",
],
)
# proto buffer
java_proto_compile(
name = "streaming_java_proto",
deps = ["//streaming:streaming_proto"],
)
filegroup(
name = "all_java_proto",
srcs = [
":streaming_java_proto",
],
)
genrule(
name = "copy_pom_file",
srcs = [
"//streaming/java:org_ray_ray_" + module + "_pom"
for module in all_modules
],
outs = ["copy_pom_file.out"],
cmd = """
set -x
WORK_DIR=$$(pwd)
cp -f $(location //streaming/java:org_ray_ray_streaming-api_pom) $$WORK_DIR/streaming/java/streaming-api/pom.xml
cp -f $(location //streaming/java:org_ray_ray_streaming-runtime_pom) $$WORK_DIR/streaming/java/streaming-runtime/pom.xml
echo $$(date) > $@
""",
local = 1,
tags = ["no-cache"],
)
genrule(
name = "cp_java_generated",
srcs = [
":all_java_proto",
":copy_pom_file",
],
outs = ["cp_java_generated.out"],
cmd = """
set -x
WORK_DIR=$$(pwd)
GENERATED_DIR=$$WORK_DIR/streaming/java/streaming-runtime/src/main/java/org/ray/streaming/runtime/generated
rm -rf $$GENERATED_DIR
mkdir -p $$GENERATED_DIR
# Copy protobuf-generated files.
for f in $(locations //streaming/java:all_java_proto); do
unzip $$f -x META-INF/MANIFEST.MF -d $$WORK_DIR/streaming/java/streaming-runtime/src/main/java
done
echo $$(date) > $@
""",
local = 1,
tags = ["no-cache"],
)
# Generates the dependencies needed by maven.
genrule(
name = "gen_maven_deps",
srcs = [
":java_native_deps",
":cp_java_generated",
],
outs = ["gen_maven_deps.out"],
cmd = """
set -x
WORK_DIR=$$(pwd)
# Copy native dependencies.
NATIVE_DEPS_DIR=$$WORK_DIR/streaming/java/streaming-runtime/native_dependencies/
rm -rf $$NATIVE_DEPS_DIR
mkdir -p $$NATIVE_DEPS_DIR
for f in $(locations //streaming/java:java_native_deps); do
chmod +w $$f
cp $$f $$NATIVE_DEPS_DIR
done
echo $$(date) > $@
""",
local = 1,
tags = ["no-cache"],
)

View file

@ -0,0 +1,14 @@
<!DOCTYPE suppressions PUBLIC
"-//Puppy Crawl//DTD Suppressions 1.1//EN"
"http://www.puppycrawl.com/dtds/suppressions_1_1.dtd">
<suppressions>
<suppress checks="OperatorWrap" files=".*" />
<suppress checks="JavadocParagraph" files=".*" />
<suppress checks="SummaryJavadoc" files=".*" />
<suppress checks="AbbreviationAsWordInNameCheck" files=".*"/>
<suppress checks="ClassTypeParameterName" files="OneInputStreamTask.java"/>
<suppress checks="ClassTypeParameterName" files="StreamTask.java"/>
<!-- suppress check for flatbuffer-generated files. -->
<suppress checks=".*" files="org[\\/]ray[\\/]streaming[\\/]runtime[\\/]generated[\\/]" />
</suppressions>

View file

@ -0,0 +1,20 @@
load("@rules_jvm_external//:defs.bzl", "maven_install")
def gen_streaming_java_deps():
maven_install(
name = "ray_streaming_maven",
artifacts = [
"com.beust:jcommander:1.72",
"com.google.guava:guava:27.0.1-jre",
"com.github.davidmoten:flatbuffers-java:1.9.0.1",
"com.google.protobuf:protobuf-java:3.8.0",
"de.ruedigermoeller:fst:2.57",
"org.slf4j:slf4j-api:1.7.12",
"org.slf4j:slf4j-log4j12:1.7.25",
"org.apache.logging.log4j:log4j-core:2.8.2",
"org.testng:testng:6.9.10",
],
repositories = [
"https://repo1.maven.org/maven2/",
],
)

154
streaming/java/pom.xml Normal file
View file

@ -0,0 +1,154 @@
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xmlns="http://maven.apache.org/POM/4.0.0"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<packaging>pom</packaging>
<groupId>org.ray</groupId>
<artifactId>ray-streaming</artifactId>
<version>0.1-SNAPSHOT</version>
<name>ray streaming</name>
<description>ray streaming</description>
<modules>
<module>streaming-api</module>
<module>streaming-runtime</module>
</modules>
<properties>
<java.version>1.8</java.version>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
<projetct.version>0.1-SNAPSHOT</projetct.version>
</properties>
<dependencies>
<dependency>
<groupId>com.google.guava</groupId>
<artifactId>guava</artifactId>
<version>27.0.1-jre</version>
</dependency>
<dependency>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-api</artifactId>
<version>1.7.25</version>
</dependency>
<dependency>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-log4j12</artifactId>
<version>1.7.25</version>
</dependency>
<dependency>
<groupId>org.testng</groupId>
<artifactId>testng</artifactId>
<version>6.9.10</version>
</dependency>
</dependencies>
<build>
<pluginManagement>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-compiler-plugin</artifactId>
<version>3.6.1</version>
<configuration>
<source>${java.version}</source>
<target>${java.version}</target>
<encoding>${project.build.sourceEncoding}</encoding>
<compilerArgument>-parameters</compilerArgument>
<testCompilerArgument>-parameters</testCompilerArgument>
</configuration>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-source-plugin</artifactId>
<version>3.0.1</version>
<executions>
<execution>
<id>attach-sources</id>
<phase>deploy</phase>
<goals>
<goal>jar</goal>
</goals>
</execution>
</executions>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-dependency-plugin</artifactId>
<version>2.10</version>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-clean-plugin</artifactId>
<version>3.0.0</version>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-assembly-plugin</artifactId>
<version>2.2</version>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-javadoc-plugin</artifactId>
<version>2.10.4</version>
<executions>
<execution>
<id>attach-javadocs</id>
<phase>deploy</phase>
<goals>
<goal>jar</goal>
</goals>
</execution>
</executions>
</plugin>
<plugin>
<artifactId>maven-deploy-plugin</artifactId>
<version>2.8.2</version>
<executions>
<execution>
<id>deploy</id>
<phase>deploy</phase>
<goals>
<goal>deploy</goal>
</goals>
</execution>
</executions>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-checkstyle-plugin</artifactId>
<version>3.0.0</version>
<executions>
<execution>
<id>validate</id>
<phase>validate</phase>
<goals>
<goal>check</goal>
</goals>
</execution>
</executions>
<configuration>
<configLocation>../../java/checkstyle.xml</configLocation>
<suppressionsLocation>checkstyle-suppressions.xml</suppressionsLocation>
<encoding>UTF-8</encoding>
<consoleOutput>true</consoleOutput>
<failsOnError>true</failsOnError>
<failOnViolation>true</failOnViolation>
<violationSeverity>warning</violationSeverity>
<outputFile>${project.build.directory}/checkstyle-errors.xml</outputFile>
<linkXRef>false</linkXRef>
</configuration>
</plugin>
</plugins>
</pluginManagement>
</build>
</project>

View file

@ -1,18 +1,18 @@
<?xml version="1.0" encoding="UTF-8"?>
<!-- This file is auto-generated by Bazel from pom_template.xml, do not modify it. -->
<!-- This file is auto-generated by Bazel from pom_template.xml, do not modify it. -->
<project xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xmlns="http://maven.apache.org/POM/4.0.0"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
<parent>
<artifactId>ray-streaming</artifactId>
<groupId>org.ray</groupId>
<artifactId>ray-superpom</artifactId>
<version>0.1-SNAPSHOT</version>
</parent>
<modelVersion>4.0.0</modelVersion>
<artifactId>streaming</artifactId>
<name>ray streaming</name>
<description>ray streaming</description>
<artifactId>streaming-api</artifactId>
<name>ray streaming api</name>
<description>ray streaming api</description>
<packaging>jar</packaging>
@ -23,16 +23,6 @@
<version>${project.version}</version>
</dependency>
<dependency>
<groupId>org.ray</groupId>
<artifactId>ray-runtime</artifactId>
<version>${project.version}</version>
</dependency>
<dependency>
<groupId>com.beust</groupId>
<artifactId>jcommander</artifactId>
<version>1.72</version>
</dependency>
<dependency>
<groupId>com.google.guava</groupId>
<artifactId>guava</artifactId>
<version>27.0.1-jre</version>

View file

@ -1,18 +1,18 @@
<?xml version="1.0" encoding="UTF-8"?>
{auto_gen_header}
{auto_gen_header}
<project xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xmlns="http://maven.apache.org/POM/4.0.0"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
<parent>
<artifactId>ray-streaming</artifactId>
<groupId>org.ray</groupId>
<artifactId>ray-superpom</artifactId>
<version>0.1-SNAPSHOT</version>
</parent>
<modelVersion>4.0.0</modelVersion>
<artifactId>streaming</artifactId>
<name>ray streaming</name>
<description>ray streaming</description>
<artifactId>streaming-api</artifactId>
<name>ray streaming api</name>
<description>ray streaming api</description>
<packaging>jar</packaging>
@ -22,11 +22,6 @@
<artifactId>ray-api</artifactId>
<version>${project.version}</version>
</dependency>
<dependency>
<groupId>org.ray</groupId>
<artifactId>ray-runtime</artifactId>
<version>${project.version}</version>
</dependency>
{generated_bzl_deps}
{generated_bzl_deps}
</dependencies>
</project>

View file

@ -1,4 +1,4 @@
package org.ray.streaming.core.runtime.collector;
package org.ray.streaming.api.collector;
import java.util.List;
import org.ray.streaming.api.collector.Collector;

View file

@ -1,4 +1,4 @@
package org.ray.streaming.core.runtime.context;
package org.ray.streaming.api.context;
/**
* Encapsulate the runtime information of a streaming task.
@ -15,5 +15,4 @@ public interface RuntimeContext {
Long getMaxBatch();
}

View file

@ -1,17 +1,18 @@
package org.ray.streaming.api.context;
import com.google.common.base.Preconditions;
import java.io.Serializable;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.ServiceLoader;
import java.util.concurrent.atomic.AtomicInteger;
import org.ray.api.Ray;
import org.ray.streaming.api.stream.StreamSink;
import org.ray.streaming.plan.Plan;
import org.ray.streaming.plan.PlanBuilder;
import org.ray.streaming.schedule.IJobSchedule;
import org.ray.streaming.schedule.impl.JobScheduleImpl;
import org.ray.streaming.schedule.JobScheduler;
/**
* Encapsulate the context information of a streaming Job.
@ -32,11 +33,10 @@ public class StreamingContext implements Serializable {
private StreamingContext() {
this.idGenerator = new AtomicInteger(0);
this.streamSinks = new ArrayList<>();
this.jobConfig = new HashMap();
this.jobConfig = new HashMap<>();
}
public static StreamingContext buildContext() {
Ray.init();
return new StreamingContext();
}
@ -48,8 +48,12 @@ public class StreamingContext implements Serializable {
this.plan = planBuilder.buildPlan();
plan.printPlan();
IJobSchedule jobSchedule = new JobScheduleImpl(jobConfig);
jobSchedule.schedule(plan);
ServiceLoader<JobScheduler> serviceLoader = ServiceLoader.load(JobScheduler.class);
Iterator<JobScheduler> iterator = serviceLoader.iterator();
Preconditions.checkArgument(iterator.hasNext(),
"No JobScheduler implementation has been provided.");
JobScheduler jobSchedule = iterator.next();
jobSchedule.schedule(plan, jobConfig);
}
public int generateId() {

View file

@ -11,7 +11,7 @@ public interface SourceFunction<T> extends Function {
void init(int parallel, int index);
void fetch(long batchId, SourceContext<T> ctx) throws Exception;
void run(SourceContext<T> ctx) throws Exception;
void close();

View file

@ -1,5 +1,6 @@
package org.ray.streaming.api.function.internal;
import java.util.ArrayList;
import java.util.Collection;
import org.ray.streaming.api.function.impl.SourceFunction;
@ -21,10 +22,12 @@ public class CollectionSourceFunction<T> implements SourceFunction<T> {
}
@Override
public void fetch(long batchId, SourceContext<T> ctx) throws Exception {
public void run(SourceContext<T> ctx) throws Exception {
for (T value : values) {
ctx.collect(value);
}
// empty collection
values = new ArrayList<>();
}
@Override

View file

@ -0,0 +1,23 @@
package org.ray.streaming.api.partition;
import org.ray.streaming.api.function.Function;
/**
* Interface of the partitioning strategy.
*
* @param <T> Type of the input data.
*/
@FunctionalInterface
public interface Partition<T> extends Function {
/**
* Given a record and downstream partitions, determine which partition(s) should receive the
* record.
*
* @param record The record.
* @param numPartition num of partitions
* @return IDs of the downstream partitions that should receive the record.
*/
int[] partition(T record, int numPartition);
}

View file

@ -0,0 +1,24 @@
package org.ray.streaming.api.partition.impl;
import java.util.stream.IntStream;
import org.ray.streaming.api.partition.Partition;
/**
* Broadcast the record to all downstream partitions.
*/
public class BroadcastPartition<T> implements Partition<T> {
private int[] partitions = new int[0];
public BroadcastPartition() {
}
@Override
public int[] partition(T value, int numPartition) {
if (partitions.length != numPartition) {
partitions = IntStream.rangeClosed(0, numPartition - 1).toArray();
}
return partitions;
}
}

View file

@ -10,11 +10,11 @@ import org.ray.streaming.message.KeyRecord;
* @param <T> Type of the input record.
*/
public class KeyPartition<K, T> implements Partition<KeyRecord<K, T>> {
private int[] partitions = new int[1];
@Override
public int[] partition(KeyRecord<K, T> keyRecord, int[] taskIds) {
int length = taskIds.length;
int taskId = taskIds[Math.abs(keyRecord.getKey().hashCode() % length)];
return new int[]{taskId};
public int[] partition(KeyRecord<K, T> keyRecord, int numPartition) {
partitions[0] = Math.abs(keyRecord.getKey().hashCode() % numPartition);
return partitions;
}
}

View file

@ -8,17 +8,17 @@ import org.ray.streaming.api.partition.Partition;
* @param <T> Type of the input record.
*/
public class RoundRobinPartition<T> implements Partition<T> {
private int seq;
private int[] partitions = new int[1];
public RoundRobinPartition() {
this.seq = 0;
}
@Override
public int[] partition(T value, int[] taskIds) {
int length = taskIds.length;
int taskId = taskIds[seq++ % length];
return new int[]{taskId};
public int[] partition(T value, int numPartition) {
seq = (seq + 1) % numPartition;
partitions[0] = seq;
return partitions;
}
}

View file

@ -9,9 +9,9 @@ import org.ray.streaming.operator.StreamOperator;
/**
* Represents a DataStream of two joined DataStream.
*
* @param <L> Lype of the data in the left stream.
* @param <R> Lype of the data in the right stream.
* @param <J> Lype of the data in the joined stream.
* @param <L> Type of the data in the left stream.
* @param <R> Type of the data in the right stream.
* @param <J> Type of the data in the joined stream.
*/
public class JoinStream<L, R, J> extends DataStream<L> {
@ -33,10 +33,10 @@ public class JoinStream<L, R, J> extends DataStream<L> {
/**
* Where clause of the join transformation.
*
* @param <L> Lype of the data in the left stream.
* @param <R> Lype of the data in the right stream.
* @param <J> Lype of the data in the joined stream.
* @param <K> Lype of the join key.
* @param <L> Type of the data in the left stream.
* @param <R> Type of the data in the right stream.
* @param <J> Type of the data in the joined stream.
* @param <K> Type of the join key.
*/
class Where<L, R, J, K> implements Serializable {
@ -56,10 +56,10 @@ public class JoinStream<L, R, J> extends DataStream<L> {
/**
* Equal clause of the join transformation.
*
* @param <L> Lype of the data in the left stream.
* @param <R> Lype of the data in the right stream.
* @param <J> Lype of the data in the joined stream.
* @param <K> Lype of the join key.
* @param <L> Type of the data in the left stream.
* @param <R> Type of the data in the right stream.
* @param <J> Type of the data in the joined stream.
* @param <K> Type of the join key.
*/
class Equal<L, R, J, K> implements Serializable {

View file

@ -4,20 +4,13 @@ import java.io.Serializable;
public class Record<T> implements Serializable {
protected transient String stream;
protected transient long batchId;
protected T value;
public Record(T value) {
this.value = value;
}
public Record(long batchId, T value) {
this.batchId = batchId;
this.value = value;
}
public T getValue() {
return value;
}
@ -34,14 +27,6 @@ public class Record<T> implements Serializable {
this.stream = stream;
}
public long getBatchId() {
return batchId;
}
public void setBatchId(long batchId) {
this.batchId = batchId;
}
@Override
public String toString() {
return value.toString();

View file

@ -3,7 +3,7 @@ package org.ray.streaming.operator;
import java.io.Serializable;
import java.util.List;
import org.ray.streaming.api.collector.Collector;
import org.ray.streaming.core.runtime.context.RuntimeContext;
import org.ray.streaming.api.context.RuntimeContext;
public interface Operator extends Serializable {

View file

@ -2,7 +2,6 @@ package org.ray.streaming.operator;
public enum OperatorType {
MASTER,
SOURCE,
ONE_INPUT,
TWO_INPUT,

View file

@ -2,8 +2,8 @@ package org.ray.streaming.operator;
import java.util.List;
import org.ray.streaming.api.collector.Collector;
import org.ray.streaming.api.context.RuntimeContext;
import org.ray.streaming.api.function.Function;
import org.ray.streaming.core.runtime.context.RuntimeContext;
import org.ray.streaming.message.KeyRecord;
import org.ray.streaming.message.Record;

View file

@ -1,10 +1,10 @@
package org.ray.streaming.operator.impl;
import java.util.List;
import org.ray.streaming.api.collector.CollectionCollector;
import org.ray.streaming.api.collector.Collector;
import org.ray.streaming.api.context.RuntimeContext;
import org.ray.streaming.api.function.impl.FlatMapFunction;
import org.ray.streaming.core.runtime.collector.CollectionCollector;
import org.ray.streaming.core.runtime.context.RuntimeContext;
import org.ray.streaming.message.Record;
import org.ray.streaming.operator.OneInputOperator;
import org.ray.streaming.operator.StreamOperator;

View file

@ -4,8 +4,8 @@ import java.util.HashMap;
import java.util.List;
import java.util.Map;
import org.ray.streaming.api.collector.Collector;
import org.ray.streaming.api.context.RuntimeContext;
import org.ray.streaming.api.function.impl.ReduceFunction;
import org.ray.streaming.core.runtime.context.RuntimeContext;
import org.ray.streaming.message.KeyRecord;
import org.ray.streaming.message.Record;
import org.ray.streaming.operator.OneInputOperator;

View file

@ -2,9 +2,9 @@ package org.ray.streaming.operator.impl;
import java.util.List;
import org.ray.streaming.api.collector.Collector;
import org.ray.streaming.api.context.RuntimeContext;
import org.ray.streaming.api.function.impl.SourceFunction;
import org.ray.streaming.api.function.impl.SourceFunction.SourceContext;
import org.ray.streaming.core.runtime.context.RuntimeContext;
import org.ray.streaming.message.Record;
import org.ray.streaming.operator.OperatorType;
import org.ray.streaming.operator.StreamOperator;
@ -24,24 +24,20 @@ public class SourceOperator<T> extends StreamOperator<SourceFunction<T>> {
this.function.init(runtimeContext.getParallelism(), runtimeContext.getTaskIndex());
}
public void process(Long batchId) {
public void run() {
try {
this.sourceContext.setBatchId(batchId);
this.function.fetch(batchId, this.sourceContext);
this.function.run(this.sourceContext);
} catch (Exception e) {
throw new RuntimeException(e);
}
}
@Override
public OperatorType getOpType() {
return OperatorType.SOURCE;
}
class SourceContextImpl implements SourceContext<T> {
private long batchId;
private List<Collector> collectors;
public SourceContextImpl(List<Collector> collectors) {
@ -51,12 +47,9 @@ public class SourceOperator<T> extends StreamOperator<SourceFunction<T>> {
@Override
public void collect(T t) throws Exception {
for (Collector collector : collectors) {
collector.collect(new Record(batchId, t));
collector.collect(new Record(t));
}
}
private void setBatchId(long batchId) {
this.batchId = batchId;
}
}
}

View file

@ -1,17 +1,19 @@
package org.ray.streaming.schedule;
import java.util.Map;
import org.ray.streaming.plan.Plan;
/**
* Interface of the job scheduler.
*/
public interface IJobSchedule {
public interface JobScheduler {
/**
* Assign logical plan to physical execution graph, and schedule job to run.
*
* @param plan The logical plan.
*/
void schedule(Plan plan);
void schedule(Plan plan, Map<String, Object> conf);
}

View file

@ -0,0 +1,44 @@
package org.ray.streaming.util;
public class Config {
/**
* Maximum number of batches to run in a streaming job.
*/
public static final String STREAMING_BATCH_MAX_COUNT = "streaming.batch.max.count";
/**
* batch frequency in milliseconds
*/
public static final String STREAMING_BATCH_FREQUENCY = "streaming.batch.frequency";
public static final long STREAMING_BATCH_FREQUENCY_DEFAULT = 1000;
public static final String STREAMING_JOB_NAME = "streaming.job.name";
public static final String STREAMING_OP_NAME = "streaming.op_name";
public static final String TASK_JOB_ID = "streaming.task_job_id";
public static final String STREAMING_WORKER_NAME = "streaming.worker_name";
// channel
public static final String CHANNEL_TYPE = "channel_type";
public static final String MEMORY_CHANNEL = "memory_channel";
public static final String NATIVE_CHANNEL = "native_channel";
public static final String DEFAULT_CHANNEL_TYPE = NATIVE_CHANNEL;
public static final String CHANNEL_SIZE = "channel_size";
public static final String CHANNEL_SIZE_DEFAULT = String.valueOf((long)Math.pow(10, 8));
public static final String IS_RECREATE = "streaming.is_recreate";
// return from DataReader.getBundle if only empty message read in this interval.
public static final String TIMER_INTERVAL_MS = "timer_interval_ms";
public static final String READ_TIMEOUT_MS = "read_timeout_ms";
public static final String DEFAULT_READ_TIMEOUT_MS = "10";
public static final String STREAMING_RING_BUFFER_CAPACITY = "streaming.ring_buffer_capacity";
// write an empty message if there is no data to be written in this
// interval.
public static final String STREAMING_EMPTY_MESSAGE_INTERVAL = "streaming.empty_message_interval";
// operator type
public static final String OPERATOR_TYPE = "operator_type";
}

View file

@ -0,0 +1,106 @@
<?xml version="1.0" encoding="UTF-8"?>
<!-- This file is auto-generated by Bazel from pom_template.xml, do not modify it. -->
<project xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xmlns="http://maven.apache.org/POM/4.0.0"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
<parent>
<artifactId>ray-streaming</artifactId>
<groupId>org.ray</groupId>
<version>0.1-SNAPSHOT</version>
</parent>
<modelVersion>4.0.0</modelVersion>
<artifactId>streaming-runtime</artifactId>
<name>ray streaming runtime</name>
<description>ray streaming runtime</description>
<packaging>jar</packaging>
<dependencies>
<dependency>
<groupId>org.ray</groupId>
<artifactId>ray-api</artifactId>
<version>${project.version}</version>
</dependency>
<dependency>
<groupId>org.ray</groupId>
<artifactId>ray-runtime</artifactId>
<version>${project.version}</version>
</dependency>
<dependency>
<groupId>org.ray</groupId>
<artifactId>streaming-api</artifactId>
<version>${project.version}</version>
</dependency>
<dependency>
<groupId>com.github.davidmoten</groupId>
<artifactId>flatbuffers-java</artifactId>
<version>1.9.0.1</version>
</dependency>
<dependency>
<groupId>com.google.guava</groupId>
<artifactId>guava</artifactId>
<version>27.0.1-jre</version>
</dependency>
<dependency>
<groupId>com.google.protobuf</groupId>
<artifactId>protobuf-java</artifactId>
<version>3.8.0</version>
</dependency>
<dependency>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-api</artifactId>
<version>1.7.25</version>
</dependency>
<dependency>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-log4j12</artifactId>
<version>1.7.25</version>
</dependency>
<dependency>
<groupId>org.testng</groupId>
<artifactId>testng</artifactId>
<version>6.9.10</version>
</dependency>
</dependencies>
<build>
<resources>
<resource>
<directory>src/main/resources</directory>
</resource>
<resource>
<directory>native_dependencies</directory>
</resource>
</resources>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-dependency-plugin</artifactId>
<executions>
<execution>
<id>copy-dependencies-to-build</id>
<phase>package</phase>
<goals>
<goal>copy-dependencies</goal>
</goals>
<configuration>
<outputDirectory>${basedir}/../../build/java</outputDirectory>
<overWriteReleases>false</overWriteReleases>
<overWriteSnapshots>false</overWriteSnapshots>
<overWriteIfNewer>true</overWriteIfNewer>
</configuration>
</execution>
</executions>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-jar-plugin</artifactId>
<version>2.3.1</version>
<configuration>
<outputDirectory>${basedir}/../../build/java</outputDirectory>
</configuration>
</plugin>
</plugins>
</build>
</project>

View file

@ -0,0 +1,77 @@
<?xml version="1.0" encoding="UTF-8"?>
{auto_gen_header}
<project xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xmlns="http://maven.apache.org/POM/4.0.0"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
<parent>
<artifactId>ray-streaming</artifactId>
<groupId>org.ray</groupId>
<version>0.1-SNAPSHOT</version>
</parent>
<modelVersion>4.0.0</modelVersion>
<artifactId>streaming-runtime</artifactId>
<name>ray streaming runtime</name>
<description>ray streaming runtime</description>
<packaging>jar</packaging>
<dependencies>
<dependency>
<groupId>org.ray</groupId>
<artifactId>ray-api</artifactId>
<version>${project.version}</version>
</dependency>
<dependency>
<groupId>org.ray</groupId>
<artifactId>ray-runtime</artifactId>
<version>${project.version}</version>
</dependency>
<dependency>
<groupId>org.ray</groupId>
<artifactId>streaming-api</artifactId>
<version>${project.version}</version>
</dependency>
{generated_bzl_deps}
</dependencies>
<build>
<resources>
<resource>
<directory>src/main/resources</directory>
</resource>
<resource>
<directory>native_dependencies</directory>
</resource>
</resources>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-dependency-plugin</artifactId>
<executions>
<execution>
<id>copy-dependencies-to-build</id>
<phase>package</phase>
<goals>
<goal>copy-dependencies</goal>
</goals>
<configuration>
<outputDirectory>${basedir}/../../build/java</outputDirectory>
<overWriteReleases>false</overWriteReleases>
<overWriteSnapshots>false</overWriteSnapshots>
<overWriteIfNewer>true</overWriteIfNewer>
</configuration>
</execution>
</executions>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-jar-plugin</artifactId>
<version>2.3.1</version>
<configuration>
<outputDirectory>${basedir}/../../build/java</outputDirectory>
</configuration>
</plugin>
</plugins>
</build>
</project>

View file

@ -0,0 +1,24 @@
package org.ray.streaming.runtime.cluster;
import java.util.ArrayList;
import java.util.List;
import org.ray.api.Ray;
import org.ray.api.RayActor;
import org.ray.streaming.runtime.worker.JobWorker;
/**
* Resource-Manager is used to do the management of resources
*/
public class ResourceManager {
public List<RayActor<JobWorker>> createWorkers(int workerNum) {
List<RayActor<JobWorker>> workers = new ArrayList<>();
for (int i = 0; i < workerNum; i++) {
RayActor<JobWorker> worker = Ray.createActor(JobWorker::new);
workers.add(worker);
}
return workers;
}
}

View file

@ -0,0 +1,40 @@
package org.ray.streaming.runtime.core.collector;
import java.nio.ByteBuffer;
import java.util.Collection;
import org.ray.runtime.util.Serializer;
import org.ray.streaming.api.collector.Collector;
import org.ray.streaming.api.partition.Partition;
import org.ray.streaming.message.Record;
import org.ray.streaming.runtime.transfer.ChannelID;
import org.ray.streaming.runtime.transfer.DataWriter;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
public class OutputCollector implements Collector<Record> {
private static final Logger LOGGER = LoggerFactory.getLogger(OutputCollector.class);
private Partition partition;
private DataWriter writer;
private ChannelID[] outputQueues;
public OutputCollector(Collection<String> outputQueueIds,
DataWriter writer,
Partition partition) {
this.outputQueues = outputQueueIds.stream().map(ChannelID::from).toArray(ChannelID[]::new);
this.writer = writer;
this.partition = partition;
LOGGER.debug("OutputCollector constructed, outputQueueIds:{}, partition:{}.",
outputQueueIds, this.partition);
}
@Override
public void collect(Record record) {
int[] partitions = this.partition.partition(record, outputQueues.length);
ByteBuffer msgBuffer = ByteBuffer.wrap(Serializer.encode(record));
for (int partition : partitions) {
writer.write(outputQueues[partition], msgBuffer);
}
}
}

View file

@ -1,4 +1,4 @@
package org.ray.streaming.core.command;
package org.ray.streaming.runtime.core.command;
import java.io.Serializable;

View file

@ -1,6 +1,7 @@
package org.ray.streaming.core.graph;
package org.ray.streaming.runtime.core.graph;
import java.io.Serializable;
import org.ray.streaming.api.partition.Partition;
/**

View file

@ -1,25 +1,47 @@
package org.ray.streaming.core.graph;
package org.ray.streaming.runtime.core.graph;
import java.io.Serializable;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.stream.Collectors;
import org.ray.api.RayActor;
import org.ray.streaming.core.runtime.StreamWorker;
import org.ray.streaming.runtime.worker.JobWorker;
/**
* Physical execution graph.
*/
public class ExecutionGraph implements Serializable {
private long buildTime;
private List<ExecutionNode> executionNodeList;
private List<RayActor<JobWorker>> sourceWorkers = new ArrayList<>();
private List<RayActor<JobWorker>> sinkWorkers = new ArrayList<>();
public ExecutionGraph(List<ExecutionNode> executionNodes) {
this.executionNodeList = executionNodes;
for (ExecutionNode executionNode : executionNodeList) {
if (executionNode.getNodeType() == ExecutionNode.NodeType.SOURCE) {
List<RayActor<JobWorker>> actors = executionNode.getExecutionTasks().stream()
.map(ExecutionTask::getWorker).collect(Collectors.toList());
sourceWorkers.addAll(actors);
}
if (executionNode.getNodeType() == ExecutionNode.NodeType.SINK) {
List<RayActor<JobWorker>> actors = executionNode.getExecutionTasks().stream()
.map(ExecutionTask::getWorker).collect(Collectors.toList());
sinkWorkers.addAll(actors);
}
}
buildTime = System.currentTimeMillis();
}
public void addExectionNode(ExecutionNode executionNode) {
this.executionNodeList.add(executionNode);
public List<RayActor<JobWorker>> getSourceWorkers() {
return sourceWorkers;
}
public List<RayActor<JobWorker>> getSinkWorkers() {
return sinkWorkers;
}
public List<ExecutionNode> getExecutionNodeList() {
@ -28,7 +50,7 @@ public class ExecutionGraph implements Serializable {
public ExecutionTask getExecutionTaskByTaskId(int taskId) {
for (ExecutionNode executionNode : executionNodeList) {
for (ExecutionTask executionTask : executionNode.getExecutionTaskList()) {
for (ExecutionTask executionTask : executionNode.getExecutionTasks()) {
if (executionTask.getTaskId() == taskId) {
return executionTask;
}
@ -48,7 +70,7 @@ public class ExecutionGraph implements Serializable {
public ExecutionNode getExecutionNodeByTaskId(int taskId) {
for (ExecutionNode executionNode : executionNodeList) {
for (ExecutionTask executionTask : executionNode.getExecutionTaskList()) {
for (ExecutionTask executionTask : executionNode.getExecutionTasks()) {
if (executionTask.getTaskId() == taskId) {
return executionNode;
}
@ -57,11 +79,11 @@ public class ExecutionGraph implements Serializable {
throw new RuntimeException("Task " + taskId + " does not exist!");
}
public Map<Integer, RayActor<StreamWorker>> getTaskId2WorkerByNodeId(int nodeId) {
public Map<Integer, RayActor<JobWorker>> getTaskId2WorkerByNodeId(int nodeId) {
for (ExecutionNode executionNode : executionNodeList) {
if (executionNode.getNodeId() == nodeId) {
Map<Integer, RayActor<StreamWorker>> taskId2Worker = new HashMap<>();
for (ExecutionTask executionTask : executionNode.getExecutionTaskList()) {
Map<Integer, RayActor<JobWorker>> taskId2Worker = new HashMap<>();
for (ExecutionTask executionTask : executionNode.getExecutionTasks()) {
taskId2Worker.put(executionTask.getTaskId(), executionTask.getWorker());
}
return taskId2Worker;
@ -70,4 +92,7 @@ public class ExecutionGraph implements Serializable {
throw new RuntimeException("Node " + nodeId + " does not exist!");
}
public long getBuildTime() {
return buildTime;
}
}

View file

@ -1,10 +1,10 @@
package org.ray.streaming.core.graph;
package org.ray.streaming.runtime.core.graph;
import java.io.Serializable;
import java.util.ArrayList;
import java.util.List;
import org.ray.streaming.core.processor.StreamProcessor;
import org.ray.streaming.plan.VertexType;
import org.ray.streaming.runtime.core.processor.StreamProcessor;
/**
* A node in the physical execution graph.
@ -15,14 +15,16 @@ public class ExecutionNode implements Serializable {
private int parallelism;
private NodeType nodeType;
private StreamProcessor streamProcessor;
private List<ExecutionTask> executionTaskList;
private List<ExecutionEdge> executionEdgeList;
private List<ExecutionTask> executionTasks;
private List<ExecutionEdge> inputsEdges;
private List<ExecutionEdge> outputEdges;
public ExecutionNode(int nodeId, int parallelism) {
this.nodeId = nodeId;
this.parallelism = parallelism;
this.executionTaskList = new ArrayList<>();
this.executionEdgeList = new ArrayList<>();
this.executionTasks = new ArrayList<>();
this.inputsEdges = new ArrayList<>();
this.outputEdges = new ArrayList<>();
}
public int getNodeId() {
@ -41,24 +43,32 @@ public class ExecutionNode implements Serializable {
this.parallelism = parallelism;
}
public List<ExecutionTask> getExecutionTaskList() {
return executionTaskList;
public List<ExecutionTask> getExecutionTasks() {
return executionTasks;
}
public void setExecutionTaskList(List<ExecutionTask> executionTaskList) {
this.executionTaskList = executionTaskList;
public void setExecutionTasks(List<ExecutionTask> executionTasks) {
this.executionTasks = executionTasks;
}
public List<ExecutionEdge> getExecutionEdgeList() {
return executionEdgeList;
public List<ExecutionEdge> getOutputEdges() {
return outputEdges;
}
public void setExecutionEdgeList(List<ExecutionEdge> executionEdgeList) {
this.executionEdgeList = executionEdgeList;
public void setOutputEdges(List<ExecutionEdge> outputEdges) {
this.outputEdges = outputEdges;
}
public void addExecutionEdge(ExecutionEdge executionEdge) {
this.executionEdgeList.add(executionEdge);
this.outputEdges.add(executionEdge);
}
public void addInputEdge(ExecutionEdge executionEdge) {
this.inputsEdges.add(executionEdge);
}
public List<ExecutionEdge> getInputsEdges() {
return inputsEdges;
}
public StreamProcessor getStreamProcessor() {
@ -75,9 +85,6 @@ public class ExecutionNode implements Serializable {
public void setNodeType(VertexType vertexType) {
switch (vertexType) {
case MASTER:
this.nodeType = NodeType.MASTER;
break;
case SOURCE:
this.nodeType = NodeType.SOURCE;
break;
@ -89,8 +96,18 @@ public class ExecutionNode implements Serializable {
}
}
@Override
public String toString() {
final StringBuilder sb = new StringBuilder("ExecutionNode{");
sb.append("nodeId=").append(nodeId);
sb.append(", parallelism=").append(parallelism);
sb.append(", nodeType=").append(nodeType);
sb.append(", streamProcessor=").append(streamProcessor);
sb.append('}');
return sb.toString();
}
public enum NodeType {
MASTER,
SOURCE,
PROCESS,
SINK,

View file

@ -1,21 +1,22 @@
package org.ray.streaming.core.graph;
package org.ray.streaming.runtime.core.graph;
import java.io.Serializable;
import org.ray.api.RayActor;
import org.ray.streaming.core.runtime.StreamWorker;
import org.ray.streaming.runtime.worker.JobWorker;
/**
* ExecutionTask is minimal execution unit.
*
* <p>
* An ExecutionNode has n ExecutionTasks if parallelism is n.
*/
public class ExecutionTask implements Serializable {
private int taskId;
private int taskIndex;
private RayActor<StreamWorker> worker;
private RayActor<JobWorker> worker;
public ExecutionTask(int taskId, int taskIndex, RayActor<StreamWorker> worker) {
public ExecutionTask(int taskId, int taskIndex, RayActor<JobWorker> worker) {
this.taskId = taskId;
this.taskIndex = taskIndex;
this.worker = worker;
@ -37,11 +38,11 @@ public class ExecutionTask implements Serializable {
this.taskIndex = taskIndex;
}
public RayActor<StreamWorker> getWorker() {
public RayActor<JobWorker> getWorker() {
return worker;
}
public void setWorker(RayActor<StreamWorker> worker) {
public void setWorker(RayActor<JobWorker> worker) {
this.worker = worker;
}
}

View file

@ -1,4 +1,4 @@
package org.ray.streaming.core.processor;
package org.ray.streaming.runtime.core.processor;
import org.ray.streaming.message.Record;
import org.ray.streaming.operator.OneInputOperator;

View file

@ -1,4 +1,4 @@
package org.ray.streaming.core.processor;
package org.ray.streaming.runtime.core.processor;
import org.ray.streaming.operator.OneInputOperator;
import org.ray.streaming.operator.OperatorType;
@ -18,8 +18,6 @@ public class ProcessBuilder {
LOGGER.info("Building StreamProcessor, operator type = {}, operator = {}.", type,
streamOperator.getClass().getSimpleName().toString());
switch (type) {
case MASTER:
return new MasterProcessor(null);
case SOURCE:
return new SourceProcessor<>((SourceOperator) streamOperator);
case ONE_INPUT:

Some files were not shown because too many files have changed in this diff Show more