mirror of
https://github.com/vale981/ray
synced 2025-03-04 09:31:43 -05:00
[Streaming]Farewell : remove all of streaming related from ray repo. (#21770)
New repo url is https://github.com/ray-project/mobius Co-authored-by: 林濯 <lingxuzn.zlx@antgroup.com>
This commit is contained in:
parent
2da2ac52ce
commit
ec62d7f510
412 changed files with 13509 additions and 36499 deletions
|
@ -3,7 +3,6 @@
|
|||
[
|
||||
"RAY_CI_LINUX_WHEELS_AFFECTED",
|
||||
"RAY_CI_JAVA_AFFECTED",
|
||||
"RAY_CI_STREAMING_JAVA_AFFECTED",
|
||||
]
|
||||
commands:
|
||||
# Build the wheels and jars
|
||||
|
@ -41,7 +40,6 @@
|
|||
[
|
||||
"RAY_CI_LINUX_WHEELS_AFFECTED",
|
||||
"RAY_CI_JAVA_AFFECTED",
|
||||
"RAY_CI_STREAMING_JAVA_AFFECTED",
|
||||
]
|
||||
commands:
|
||||
# Build the debug wheels
|
||||
|
@ -61,7 +59,6 @@
|
|||
# [
|
||||
# "RAY_CI_LINUX_WHEELS_AFFECTED",
|
||||
# "RAY_CI_JAVA_AFFECTED",
|
||||
# "RAY_CI_STREAMING_JAVA_AFFECTED",
|
||||
# ]
|
||||
# commands:
|
||||
# # Build the asan wheels
|
||||
|
@ -156,24 +153,6 @@
|
|||
commands:
|
||||
- RAY_bootstrap_with_gcs=1 RAY_gcs_grpc_based_pubsub=1 RAY_gcs_storage=memory ./java/test.sh
|
||||
|
||||
- label: ":java: Streaming"
|
||||
conditions:
|
||||
["RAY_CI_STREAMING_PYTHON_AFFECTED", "RAY_CI_STREAMING_JAVA_AFFECTED", "RAY_CI_PYTHON_AFFECTED"]
|
||||
commands:
|
||||
- bazel test --config=ci $(./scripts/bazel_export_options)
|
||||
//streaming:all
|
||||
- bash streaming/src/test/run_streaming_queue_test.sh
|
||||
|
||||
- label: ":java: :redis: Streaming"
|
||||
conditions:
|
||||
["RAY_CI_STREAMING_PYTHON_AFFECTED", "RAY_CI_STREAMING_JAVA_AFFECTED", "RAY_CI_PYTHON_AFFECTED"]
|
||||
commands:
|
||||
- bazel test --config=ci
|
||||
--test_env=RAY_gcs_grpc_based_pubsub=1
|
||||
--test_env=RAY_bootstrap_with_gcs=1
|
||||
--test_env=RAY_gcs_storage=memory $(./scripts/bazel_export_options) //streaming:all
|
||||
- RAY_bootstrap_with_gcs=1 RAY_gcs_grpc_based_pubsub=1 RAY_gcs_storage=memory bash streaming/src/test/run_streaming_queue_test.sh
|
||||
|
||||
- label: ":cpp: Ray CPP Worker"
|
||||
conditions: [ "RAY_CI_CPP_AFFECTED" ]
|
||||
commands:
|
||||
|
|
1
.flake8
1
.flake8
|
@ -1,7 +1,6 @@
|
|||
[flake8]
|
||||
exclude =
|
||||
python/ray/core/generated/
|
||||
streaming/python/generated
|
||||
doc/source/conf.py
|
||||
python/ray/cloudpickle/
|
||||
python/ray/thirdparty_files/
|
||||
|
|
10
.gitignore
vendored
10
.gitignore
vendored
|
@ -169,20 +169,10 @@ dependency-reduced-pom.xml
|
|||
# Cpp
|
||||
cpp/example/thirdparty/
|
||||
|
||||
# streaming/python
|
||||
streaming/python/generated/
|
||||
streaming/java/streaming-runtime/src/main/java/io/ray/streaming/runtime/generated/
|
||||
streaming/build/java
|
||||
.clwb
|
||||
streaming/**/.settings
|
||||
streaming/java/**/target
|
||||
streaming/java/**/.classpath
|
||||
streaming/java/**/.project
|
||||
streaming/java/**/*.log
|
||||
|
||||
# pom.xml files generated from pom_template.xml
|
||||
java/**/pom.xml
|
||||
streaming/java/**/pom.xml
|
||||
|
||||
# python virtual env
|
||||
venv
|
||||
|
|
51
BUILD.bazel
51
BUILD.bazel
|
@ -450,18 +450,18 @@ cc_library(
|
|||
],
|
||||
)
|
||||
|
||||
# This header is used to warp some streaming code so we can reduce suspicious
|
||||
# This header is used to warp some internal code so we can reduce suspicious
|
||||
# symbols export.
|
||||
cc_library(
|
||||
name = "exported_streaming_internal",
|
||||
name = "exported_internal",
|
||||
srcs = glob(
|
||||
[
|
||||
"src/ray/streaming/streaming.cc",
|
||||
"src/ray/internal/internal.cc",
|
||||
],
|
||||
),
|
||||
hdrs = glob(
|
||||
[
|
||||
"src/ray/streaming/streaming.h",
|
||||
"src/ray/internal/internal.h",
|
||||
],
|
||||
),
|
||||
copts = COPTS,
|
||||
|
@ -674,7 +674,7 @@ cc_library(
|
|||
],
|
||||
}),
|
||||
strip_include_prefix = "src",
|
||||
visibility = ["//streaming:__subpackages__"],
|
||||
visibility = ["//visibility:public"],
|
||||
deps = [
|
||||
":agent_manager_rpc",
|
||||
":gcs",
|
||||
|
@ -722,7 +722,7 @@ cc_library(
|
|||
],
|
||||
}),
|
||||
strip_include_prefix = "src",
|
||||
visibility = ["//streaming:__subpackages__"],
|
||||
visibility = ["//visibility:public"],
|
||||
deps = [
|
||||
":agent_manager_rpc",
|
||||
":node_manager_fbs",
|
||||
|
@ -2337,7 +2337,7 @@ pyx_library(
|
|||
),
|
||||
deps = [
|
||||
"//:core_worker_lib",
|
||||
"//:exported_streaming_internal",
|
||||
"//:exported_internal",
|
||||
"//:global_state_accessor_lib",
|
||||
"//:ray_util",
|
||||
"//:raylet_lib",
|
||||
|
@ -2348,33 +2348,6 @@ pyx_library(
|
|||
],
|
||||
)
|
||||
|
||||
pyx_library(
|
||||
name = "_streaming",
|
||||
srcs = glob([
|
||||
"python/ray/streaming/_streaming.pyx",
|
||||
"python/ray/__init__.py",
|
||||
"python/ray/_raylet.pxd",
|
||||
"python/ray/includes/*.pxd",
|
||||
"python/ray/includes/*.pxi",
|
||||
"python/ray/streaming/__init__.pxd",
|
||||
"python/ray/streaming/includes/*.pxd",
|
||||
"python/ray/streaming/includes/*.pxi",
|
||||
]),
|
||||
cc_kwargs = dict(
|
||||
srcs = PYX_SRCS,
|
||||
copts = COPTS + PYX_COPTS,
|
||||
),
|
||||
deps = [
|
||||
"//streaming:streaming_lib",
|
||||
"@com_github_grpc_grpc//:grpc++",
|
||||
"@com_google_absl//absl/container:flat_hash_map",
|
||||
"@com_google_absl//absl/container:flat_hash_set",
|
||||
"@com_google_absl//absl/memory",
|
||||
"@com_google_absl//absl/synchronization",
|
||||
"@com_google_absl//absl/time",
|
||||
],
|
||||
)
|
||||
|
||||
cc_binary(
|
||||
name = "libcore_worker_library_java.so",
|
||||
srcs = glob([
|
||||
|
@ -2398,7 +2371,7 @@ cc_binary(
|
|||
visibility = ["//java:__subpackages__"],
|
||||
deps = [
|
||||
"//:core_worker_lib",
|
||||
"//:exported_streaming_internal",
|
||||
"//:exported_internal",
|
||||
"//:global_state_accessor_lib",
|
||||
"//:src/ray/ray_exported_symbols.lds",
|
||||
"//:src/ray/ray_version_script.lds",
|
||||
|
@ -2491,12 +2464,6 @@ copy_to_workspace(
|
|||
dstdir = "python/ray",
|
||||
)
|
||||
|
||||
copy_to_workspace(
|
||||
name = "cp_streaming",
|
||||
srcs = ["python/ray/streaming/_streaming.so"],
|
||||
dstdir = "python/ray/streaming",
|
||||
)
|
||||
|
||||
copy_to_workspace(
|
||||
name = "cp_all_py_proto",
|
||||
srcs = [":all_py_proto"],
|
||||
|
@ -2535,7 +2502,6 @@ genrule(
|
|||
srcs = [
|
||||
":cp_all_py_proto",
|
||||
":cp_serve_py_proto",
|
||||
"//streaming:copy_streaming_py_proto",
|
||||
],
|
||||
outs = ["install_py_proto.out"],
|
||||
cmd = """
|
||||
|
@ -2555,7 +2521,6 @@ genrule(
|
|||
name = "ray_pkg",
|
||||
srcs = [
|
||||
":cp_raylet_so",
|
||||
":cp_streaming",
|
||||
":python_sources",
|
||||
":install_py_proto",
|
||||
":cp_redis",
|
||||
|
|
|
@ -1,6 +1,5 @@
|
|||
load("@bazel_skylib//:workspace.bzl", "bazel_skylib_workspace")
|
||||
load("@com_github_ray_project_ray//java:dependencies.bzl", "gen_java_deps")
|
||||
load("@com_github_ray_project_ray//streaming/java:dependencies.bzl", "gen_streaming_java_deps")
|
||||
load("@com_github_nelhage_rules_boost//:boost/boost.bzl", "boost_deps")
|
||||
load("@com_github_jupp0r_prometheus_cpp//bazel:repositories.bzl", "prometheus_cpp_repositories")
|
||||
load("@com_github_grpc_grpc//third_party/py:python_configure.bzl", "python_configure")
|
||||
|
@ -12,7 +11,6 @@ load("@com_github_johnynek_bazel_jar_jar//:jar_jar.bzl", "jar_jar_repositories")
|
|||
def ray_deps_build_all():
|
||||
bazel_skylib_workspace()
|
||||
gen_java_deps()
|
||||
gen_streaming_java_deps()
|
||||
boost_deps()
|
||||
prometheus_cpp_repositories()
|
||||
python_configure(name = "local_config_python")
|
||||
|
|
|
@ -45,6 +45,6 @@ done
|
|||
|
||||
pushd "$ROOT_DIR"/../..
|
||||
BAZEL_FILES=(bazel/BUILD bazel/ray.bzl BUILD.bazel java/BUILD.bazel \
|
||||
cpp/BUILD.bazel cpp/example/BUILD.bazel streaming/BUILD.bazel streaming/java/BUILD.bazel WORKSPACE)
|
||||
cpp/BUILD.bazel cpp/example/BUILD.bazel WORKSPACE)
|
||||
buildifier -mode=$RUN_TYPE -diff_command="diff -u" "${BAZEL_FILES[@]}"
|
||||
popd
|
||||
|
|
|
@ -88,9 +88,6 @@ if __name__ == "__main__":
|
|||
RAY_CI_PYTHON_AFFECTED = 0
|
||||
RAY_CI_LINUX_WHEELS_AFFECTED = 0
|
||||
RAY_CI_MACOS_WHEELS_AFFECTED = 0
|
||||
RAY_CI_STREAMING_CPP_AFFECTED = 0
|
||||
RAY_CI_STREAMING_PYTHON_AFFECTED = 0
|
||||
RAY_CI_STREAMING_JAVA_AFFECTED = 0
|
||||
RAY_CI_DASHBOARD_AFFECTED = 0
|
||||
RAY_CI_DOCKER_AFFECTED = 0
|
||||
RAY_CI_DOC_AFFECTED = 0
|
||||
|
@ -179,7 +176,6 @@ if __name__ == "__main__":
|
|||
RAY_CI_DASHBOARD_AFFECTED = 1
|
||||
RAY_CI_LINUX_WHEELS_AFFECTED = 1
|
||||
RAY_CI_MACOS_WHEELS_AFFECTED = 1
|
||||
RAY_CI_STREAMING_PYTHON_AFFECTED = 1
|
||||
RAY_CI_DOC_AFFECTED = 1
|
||||
# Python changes might impact cross language stack in Java.
|
||||
# Java also depends on Python CLI to manage processes.
|
||||
|
@ -189,7 +185,6 @@ if __name__ == "__main__":
|
|||
RAY_CI_PYTHON_DEPENDENCIES_AFFECTED = 1
|
||||
elif changed_file.startswith("java/"):
|
||||
RAY_CI_JAVA_AFFECTED = 1
|
||||
RAY_CI_STREAMING_JAVA_AFFECTED = 1
|
||||
elif changed_file.startswith("cpp/"):
|
||||
RAY_CI_CPP_AFFECTED = 1
|
||||
elif changed_file.startswith("docker/"):
|
||||
|
@ -218,19 +213,8 @@ if __name__ == "__main__":
|
|||
RAY_CI_PYTHON_AFFECTED = 1
|
||||
RAY_CI_LINUX_WHEELS_AFFECTED = 1
|
||||
RAY_CI_MACOS_WHEELS_AFFECTED = 1
|
||||
RAY_CI_STREAMING_CPP_AFFECTED = 1
|
||||
RAY_CI_STREAMING_PYTHON_AFFECTED = 1
|
||||
RAY_CI_STREAMING_JAVA_AFFECTED = 1
|
||||
RAY_CI_DASHBOARD_AFFECTED = 1
|
||||
RAY_CI_DOC_AFFECTED = 1
|
||||
elif changed_file.startswith("streaming/src"):
|
||||
RAY_CI_STREAMING_CPP_AFFECTED = 1
|
||||
RAY_CI_STREAMING_PYTHON_AFFECTED = 1
|
||||
RAY_CI_STREAMING_JAVA_AFFECTED = 1
|
||||
elif changed_file.startswith("streaming/python"):
|
||||
RAY_CI_STREAMING_PYTHON_AFFECTED = 1
|
||||
elif changed_file.startswith("streaming/java"):
|
||||
RAY_CI_STREAMING_JAVA_AFFECTED = 1
|
||||
else:
|
||||
RAY_CI_TUNE_AFFECTED = 1
|
||||
RAY_CI_SGD_AFFECTED = 1
|
||||
|
@ -244,9 +228,6 @@ if __name__ == "__main__":
|
|||
RAY_CI_DOC_AFFECTED = 1
|
||||
RAY_CI_LINUX_WHEELS_AFFECTED = 1
|
||||
RAY_CI_MACOS_WHEELS_AFFECTED = 1
|
||||
RAY_CI_STREAMING_CPP_AFFECTED = 1
|
||||
RAY_CI_STREAMING_PYTHON_AFFECTED = 1
|
||||
RAY_CI_STREAMING_JAVA_AFFECTED = 1
|
||||
RAY_CI_DASHBOARD_AFFECTED = 1
|
||||
else:
|
||||
RAY_CI_TUNE_AFFECTED = 1
|
||||
|
@ -262,9 +243,6 @@ if __name__ == "__main__":
|
|||
RAY_CI_DOC_AFFECTED = 1
|
||||
RAY_CI_LINUX_WHEELS_AFFECTED = 1
|
||||
RAY_CI_MACOS_WHEELS_AFFECTED = 1
|
||||
RAY_CI_STREAMING_CPP_AFFECTED = 1
|
||||
RAY_CI_STREAMING_PYTHON_AFFECTED = 1
|
||||
RAY_CI_STREAMING_JAVA_AFFECTED = 1
|
||||
RAY_CI_DASHBOARD_AFFECTED = 1
|
||||
|
||||
# Log the modified environment variables visible in console.
|
||||
|
@ -284,12 +262,6 @@ if __name__ == "__main__":
|
|||
"RAY_CI_PYTHON_AFFECTED={}".format(RAY_CI_PYTHON_AFFECTED),
|
||||
"RAY_CI_LINUX_WHEELS_AFFECTED={}".format(RAY_CI_LINUX_WHEELS_AFFECTED),
|
||||
"RAY_CI_MACOS_WHEELS_AFFECTED={}".format(RAY_CI_MACOS_WHEELS_AFFECTED),
|
||||
"RAY_CI_STREAMING_CPP_AFFECTED={}".format(
|
||||
RAY_CI_STREAMING_CPP_AFFECTED),
|
||||
"RAY_CI_STREAMING_PYTHON_AFFECTED={}".format(
|
||||
RAY_CI_STREAMING_PYTHON_AFFECTED),
|
||||
"RAY_CI_STREAMING_JAVA_AFFECTED={}".format(
|
||||
RAY_CI_STREAMING_JAVA_AFFECTED),
|
||||
"RAY_CI_DOCKER_AFFECTED={}".format(RAY_CI_DOCKER_AFFECTED),
|
||||
"RAY_CI_PYTHON_DEPENDENCIES_AFFECTED={}".format(
|
||||
RAY_CI_PYTHON_DEPENDENCIES_AFFECTED),
|
||||
|
|
|
@ -312,8 +312,6 @@ java_binary(
|
|||
"//java:io_ray_ray_api",
|
||||
"//java:io_ray_ray_runtime",
|
||||
"//java:io_ray_ray_serve",
|
||||
"//streaming/java:io_ray_ray_streaming-api",
|
||||
"//streaming/java:io_ray_ray_streaming-runtime",
|
||||
],
|
||||
)
|
||||
|
||||
|
@ -335,7 +333,6 @@ genrule(
|
|||
srcs = [
|
||||
"//java:ray_dist_shaded.jar",
|
||||
"//java:gen_maven_deps",
|
||||
"//streaming/java:gen_maven_deps",
|
||||
],
|
||||
outs = ["ray_java_pkg.out"],
|
||||
cmd = """
|
||||
|
|
|
@ -1 +0,0 @@
|
|||
../../streaming/python/
|
26
python/ray/streaming/__init__.py
Normal file
26
python/ray/streaming/__init__.py
Normal file
|
@ -0,0 +1,26 @@
|
|||
# flake8: noqa
|
||||
# Ray should be imported before streaming
|
||||
import ray
|
||||
|
||||
|
||||
# Redirect ray.streaming.* to raystreaming, so user can keep
|
||||
# their original habit.
|
||||
def _update_modules():
|
||||
try:
|
||||
import raystreaming
|
||||
import raystreaming.context
|
||||
import sys
|
||||
ray_streaming_module_name = raystreaming.__name__
|
||||
ray_streaming_modules = {}
|
||||
for mod_name, module in sys.modules.items():
|
||||
if mod_name.startswith(ray_streaming_module_name):
|
||||
ray_streaming_modules[mod_name.replace(
|
||||
"raystreaming", "ray.streaming")] = module
|
||||
sys.modules.update(ray_streaming_modules)
|
||||
except Exception as e:
|
||||
print("import raystreaming error: ", e)
|
||||
|
||||
|
||||
_update_modules()
|
||||
|
||||
__all__ = ["StreamingContext"]
|
|
@ -12,12 +12,12 @@
|
|||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "ray/streaming/streaming.h"
|
||||
#include "ray/internal/internal.h"
|
||||
|
||||
#include "ray/core_worker/core_worker.h"
|
||||
|
||||
namespace ray {
|
||||
namespace streaming {
|
||||
namespace internal {
|
||||
|
||||
using ray::core::CoreWorkerProcess;
|
||||
using ray::core::TaskOptions;
|
||||
|
@ -52,5 +52,5 @@ std::vector<rpc::ObjectReference> SendInternal(const ActorID &peer_actor_id,
|
|||
}
|
||||
return result.value();
|
||||
}
|
||||
} // namespace streaming
|
||||
} // namespace internal
|
||||
} // namespace ray
|
|
@ -17,10 +17,10 @@
|
|||
#include "ray/common/id.h"
|
||||
#include "ray/core_worker/common.h"
|
||||
|
||||
// This header is used to warp some streaming code so we can reduce suspicious
|
||||
// This header is used to warp some internal code so we can reduce suspicious
|
||||
// symbols export.
|
||||
namespace ray {
|
||||
namespace streaming {
|
||||
namespace internal {
|
||||
|
||||
using ray::core::RayFunction;
|
||||
|
||||
|
@ -33,5 +33,5 @@ std::vector<rpc::ObjectReference> SendInternal(const ActorID &peer_actor_id,
|
|||
std::shared_ptr<LocalMemoryBuffer> buffer,
|
||||
RayFunction &function, int return_num);
|
||||
|
||||
} // namespace streaming
|
||||
} // namespace internal
|
||||
} // namespace ray
|
|
@ -1,443 +0,0 @@
|
|||
# Bazel build
|
||||
# C/C++ documentation: https://docs.bazel.build/versions/master/be/c-cpp.html
|
||||
|
||||
load("@rules_proto_grpc//python:defs.bzl", "python_proto_compile")
|
||||
load("//bazel:ray.bzl", "COPTS", "copy_to_workspace")
|
||||
|
||||
proto_library(
|
||||
name = "streaming_proto",
|
||||
srcs = ["src/protobuf/streaming.proto"],
|
||||
strip_import_prefix = "src",
|
||||
visibility = ["//visibility:public"],
|
||||
)
|
||||
|
||||
proto_library(
|
||||
name = "streaming_queue_proto",
|
||||
srcs = ["src/protobuf/streaming_queue.proto"],
|
||||
strip_import_prefix = "src",
|
||||
)
|
||||
|
||||
proto_library(
|
||||
name = "remote_call_proto",
|
||||
srcs = ["src/protobuf/remote_call.proto"],
|
||||
strip_import_prefix = "src",
|
||||
visibility = ["//visibility:public"],
|
||||
deps = [
|
||||
"streaming_proto",
|
||||
"@com_google_protobuf//:any_proto",
|
||||
],
|
||||
)
|
||||
|
||||
cc_proto_library(
|
||||
name = "streaming_cc_proto",
|
||||
deps = [":streaming_proto"],
|
||||
)
|
||||
|
||||
cc_proto_library(
|
||||
name = "streaming_queue_cc_proto",
|
||||
deps = ["streaming_queue_proto"],
|
||||
)
|
||||
|
||||
# Use `linkshared` to ensure ray related symbols are not packed into streaming libs
|
||||
# to avoid duplicate symbols. In runtime we expose ray related symbols, which can
|
||||
# be linked into streaming libs by dynamic linker. See bazel rule `//:_raylet`
|
||||
cc_binary(
|
||||
name = "ray_util.so",
|
||||
copts = COPTS,
|
||||
linkshared = 1,
|
||||
visibility = ["//visibility:public"],
|
||||
deps = ["//:ray_util"],
|
||||
)
|
||||
|
||||
cc_binary(
|
||||
name = "ray_common.so",
|
||||
copts = COPTS,
|
||||
linkshared = 1,
|
||||
visibility = ["//visibility:public"],
|
||||
deps = ["//:ray_common"],
|
||||
)
|
||||
|
||||
cc_binary(
|
||||
name = "stats_lib.so",
|
||||
copts = COPTS,
|
||||
linkshared = 1,
|
||||
visibility = ["//visibility:public"],
|
||||
deps = ["//:stats_lib"],
|
||||
)
|
||||
|
||||
cc_binary(
|
||||
name = "core_worker_lib.so",
|
||||
copts = COPTS,
|
||||
linkshared = 1,
|
||||
deps = ["//:core_worker_lib"],
|
||||
)
|
||||
|
||||
cc_binary(
|
||||
name = "exported_streaming_internal.so",
|
||||
copts = COPTS,
|
||||
linkshared = 1,
|
||||
deps = ["//:exported_streaming_internal"],
|
||||
)
|
||||
|
||||
cc_library(
|
||||
name = "streaming_util",
|
||||
srcs = glob([
|
||||
"src/util/*.cc",
|
||||
]),
|
||||
hdrs = glob([
|
||||
"src/util/*.h",
|
||||
]),
|
||||
copts = COPTS,
|
||||
includes = ["src"],
|
||||
visibility = ["//visibility:public"],
|
||||
deps = [
|
||||
"ray_common.so",
|
||||
"ray_util.so",
|
||||
"@boost//:any",
|
||||
"@com_google_googletest//:gtest",
|
||||
],
|
||||
)
|
||||
|
||||
cc_library(
|
||||
name = "streaming_metrics",
|
||||
srcs = glob([
|
||||
"src/metrics/*.cc",
|
||||
]),
|
||||
hdrs = glob([
|
||||
"src/metrics/*.h",
|
||||
]),
|
||||
copts = COPTS,
|
||||
strip_include_prefix = "src",
|
||||
visibility = ["//visibility:public"],
|
||||
deps = [
|
||||
"stats_lib.so",
|
||||
":streaming_config",
|
||||
":streaming_util",
|
||||
],
|
||||
)
|
||||
|
||||
cc_library(
|
||||
name = "streaming_config",
|
||||
srcs = glob([
|
||||
"src/config/*.cc",
|
||||
]),
|
||||
hdrs = glob([
|
||||
"src/config/*.h",
|
||||
]),
|
||||
copts = COPTS,
|
||||
strip_include_prefix = "src",
|
||||
deps = [
|
||||
"ray_common.so",
|
||||
":streaming_cc_proto",
|
||||
":streaming_util",
|
||||
],
|
||||
)
|
||||
|
||||
cc_library(
|
||||
name = "streaming_message",
|
||||
srcs = glob([
|
||||
"src/message/*.cc",
|
||||
]),
|
||||
hdrs = glob([
|
||||
"src/message/*.h",
|
||||
]),
|
||||
copts = COPTS,
|
||||
strip_include_prefix = "src",
|
||||
deps = [
|
||||
"ray_common.so",
|
||||
":streaming_config",
|
||||
":streaming_util",
|
||||
],
|
||||
)
|
||||
|
||||
cc_library(
|
||||
name = "streaming_queue",
|
||||
srcs = glob([
|
||||
"src/queue/*.cc",
|
||||
]),
|
||||
hdrs = glob([
|
||||
"src/queue/*.h",
|
||||
]),
|
||||
copts = COPTS,
|
||||
strip_include_prefix = "src",
|
||||
deps = [
|
||||
"ray_common.so",
|
||||
"ray_util.so",
|
||||
":streaming_config",
|
||||
":streaming_message",
|
||||
":streaming_queue_cc_proto",
|
||||
":streaming_util",
|
||||
"@boost//:asio",
|
||||
"@boost//:thread",
|
||||
] + select({
|
||||
"@bazel_tools//src/conditions:windows": [
|
||||
# TODO(mehrdadn): This is to resolve symbols on Windows for now. Should remove this later. (See d7f8d18.)
|
||||
"//:core_worker_lib",
|
||||
"//:exported_streaming_internal",
|
||||
],
|
||||
"//conditions:default": [
|
||||
"core_worker_lib.so",
|
||||
"exported_streaming_internal.so",
|
||||
],
|
||||
}),
|
||||
)
|
||||
|
||||
cc_library(
|
||||
name = "streaming_channel",
|
||||
srcs = glob(["src/channel/*.cc"]),
|
||||
hdrs = glob(["src/channel/*.h"]),
|
||||
copts = COPTS,
|
||||
visibility = ["//visibility:public"],
|
||||
deps = [
|
||||
":streaming_common",
|
||||
":streaming_message",
|
||||
":streaming_queue",
|
||||
":streaming_ring_buffer",
|
||||
":streaming_util",
|
||||
],
|
||||
)
|
||||
|
||||
cc_library(
|
||||
name = "streaming_reliability",
|
||||
srcs = glob(["src/reliability/*.cc"]),
|
||||
hdrs = glob(["src/reliability/*.h"]),
|
||||
copts = COPTS,
|
||||
includes = ["src/"],
|
||||
visibility = ["//visibility:public"],
|
||||
deps = [
|
||||
":streaming_channel",
|
||||
":streaming_message",
|
||||
":streaming_util",
|
||||
],
|
||||
)
|
||||
|
||||
cc_library(
|
||||
name = "streaming_ring_buffer",
|
||||
srcs = glob(["src/ring_buffer/*.cc"]),
|
||||
hdrs = glob(["src/ring_buffer/*.h"]),
|
||||
copts = COPTS,
|
||||
includes = ["src/"],
|
||||
visibility = ["//visibility:public"],
|
||||
deps = [
|
||||
"core_worker_lib.so",
|
||||
":ray_common.so",
|
||||
":ray_util.so",
|
||||
":streaming_message",
|
||||
"@boost//:circular_buffer",
|
||||
"@boost//:thread",
|
||||
],
|
||||
)
|
||||
|
||||
cc_library(
|
||||
name = "streaming_common",
|
||||
srcs = glob(["src/common/*.cc"]),
|
||||
hdrs = glob(["src/common/*.h"]),
|
||||
copts = COPTS,
|
||||
includes = ["src/"],
|
||||
visibility = ["//visibility:public"],
|
||||
deps = [],
|
||||
)
|
||||
|
||||
cc_library(
|
||||
name = "streaming_lib",
|
||||
srcs = glob([
|
||||
"src/*.cc",
|
||||
]),
|
||||
hdrs = glob([
|
||||
"src/*.h",
|
||||
"src/queue/*.h",
|
||||
"src/test/*.h",
|
||||
]),
|
||||
copts = COPTS,
|
||||
strip_include_prefix = "src",
|
||||
visibility = ["//visibility:public"],
|
||||
deps = [
|
||||
"ray_common.so",
|
||||
"ray_util.so",
|
||||
":streaming_channel",
|
||||
":streaming_common",
|
||||
":streaming_config",
|
||||
":streaming_message",
|
||||
":streaming_metrics",
|
||||
":streaming_queue",
|
||||
":streaming_reliability",
|
||||
":streaming_util",
|
||||
],
|
||||
)
|
||||
|
||||
test_common_deps = [
|
||||
"//:exported_streaming_internal",
|
||||
":streaming_lib",
|
||||
"//:ray_common",
|
||||
"//:ray_util",
|
||||
"//:core_worker_lib",
|
||||
]
|
||||
|
||||
# streaming queue mock actor binary
|
||||
cc_binary(
|
||||
name = "streaming_test_worker",
|
||||
srcs = glob(["src/test/*.h"]) + [
|
||||
"src/test/mock_actor.cc",
|
||||
],
|
||||
copts = COPTS,
|
||||
deps = test_common_deps,
|
||||
)
|
||||
|
||||
# use src/test/run_streaming_queue_test.sh to run this test
|
||||
cc_binary(
|
||||
name = "streaming_queue_tests",
|
||||
srcs = glob(["src/test/*.h"]) + [
|
||||
"src/test/streaming_queue_tests.cc",
|
||||
],
|
||||
copts = COPTS,
|
||||
deps = test_common_deps,
|
||||
)
|
||||
|
||||
cc_test(
|
||||
name = "streaming_message_ring_buffer_tests",
|
||||
srcs = [
|
||||
"src/test/ring_buffer_tests.cc",
|
||||
],
|
||||
copts = COPTS,
|
||||
tags = ["team:ant-group"],
|
||||
deps = test_common_deps,
|
||||
)
|
||||
|
||||
cc_test(
|
||||
name = "barrier_helper_tests",
|
||||
srcs = [
|
||||
"src/test/barrier_helper_tests.cc",
|
||||
],
|
||||
copts = COPTS,
|
||||
tags = ["team:ant-group"],
|
||||
deps = test_common_deps,
|
||||
)
|
||||
|
||||
cc_test(
|
||||
name = "streaming_message_serialization_tests",
|
||||
srcs = [
|
||||
"src/test/message_serialization_tests.cc",
|
||||
],
|
||||
copts = COPTS,
|
||||
tags = ["team:ant-group"],
|
||||
deps = test_common_deps,
|
||||
)
|
||||
|
||||
cc_test(
|
||||
name = "streaming_mock_transfer",
|
||||
srcs = [
|
||||
"src/test/mock_transfer_tests.cc",
|
||||
],
|
||||
copts = COPTS,
|
||||
tags = ["team:ant-group"],
|
||||
deps = test_common_deps,
|
||||
)
|
||||
|
||||
cc_test(
|
||||
name = "streaming_util_tests",
|
||||
srcs = [
|
||||
"src/test/streaming_util_tests.cc",
|
||||
],
|
||||
copts = COPTS,
|
||||
tags = ["team:ant-group"],
|
||||
deps = test_common_deps,
|
||||
)
|
||||
|
||||
cc_test(
|
||||
name = "streaming_perf_tests",
|
||||
srcs = [
|
||||
"src/test/streaming_perf_tests.cc",
|
||||
],
|
||||
copts = COPTS,
|
||||
tags = ["team:ant-group"],
|
||||
deps = test_common_deps,
|
||||
)
|
||||
|
||||
cc_test(
|
||||
name = "event_service_tests",
|
||||
srcs = [
|
||||
"src/test/event_service_tests.cc",
|
||||
],
|
||||
copts = COPTS,
|
||||
tags = ["team:ant-group"],
|
||||
deps = test_common_deps,
|
||||
)
|
||||
|
||||
cc_test(
|
||||
name = "queue_protobuf_tests",
|
||||
srcs = [
|
||||
"src/test/queue_protobuf_tests.cc",
|
||||
],
|
||||
tags = ["team:ant-group"],
|
||||
deps = test_common_deps,
|
||||
)
|
||||
|
||||
cc_test(
|
||||
name = "data_writer_tests",
|
||||
srcs = [
|
||||
"src/test/data_writer_tests.cc",
|
||||
],
|
||||
copts = COPTS,
|
||||
tags = ["team:ant-group"],
|
||||
deps = test_common_deps,
|
||||
)
|
||||
|
||||
python_proto_compile(
|
||||
name = "streaming_py_proto",
|
||||
deps = [":streaming_proto"],
|
||||
)
|
||||
|
||||
python_proto_compile(
|
||||
name = "remote_call_py_proto",
|
||||
deps = [":remote_call_proto"],
|
||||
)
|
||||
|
||||
filegroup(
|
||||
name = "all_py_proto",
|
||||
srcs = [
|
||||
":remote_call_py_proto",
|
||||
":streaming_py_proto",
|
||||
],
|
||||
)
|
||||
|
||||
copy_to_workspace(
|
||||
name = "cp_all_py_proto",
|
||||
srcs = [":all_py_proto"],
|
||||
dstdir = "streaming/python/generated",
|
||||
)
|
||||
|
||||
genrule(
|
||||
name = "copy_streaming_py_proto",
|
||||
srcs = [
|
||||
":cp_all_py_proto",
|
||||
],
|
||||
outs = [
|
||||
"copy_streaming_py_proto.out",
|
||||
],
|
||||
cmd = """
|
||||
GENERATED_DIR="streaming/python/generated"
|
||||
mkdir -p "$$GENERATED_DIR"
|
||||
touch "$$GENERATED_DIR/__init__.py"
|
||||
# Use this `sed` command to change the import path in the generated file.
|
||||
sed -i -E 's/from streaming.src.protobuf/from ./' "$$GENERATED_DIR/remote_call_pb2.py"
|
||||
sed -i -E 's/from protobuf/from ./' "$$GENERATED_DIR/remote_call_pb2.py"
|
||||
date > $@
|
||||
""",
|
||||
local = 1,
|
||||
visibility = ["//visibility:public"],
|
||||
)
|
||||
|
||||
cc_binary(
|
||||
name = "libstreaming_java.so",
|
||||
srcs = glob([
|
||||
"src/lib/java/*.cc",
|
||||
"src/lib/java/*.h",
|
||||
]),
|
||||
copts = COPTS,
|
||||
linkshared = 1,
|
||||
visibility = ["//visibility:public"],
|
||||
deps = [
|
||||
":streaming_lib",
|
||||
"@bazel_tools//tools/jdk:jni",
|
||||
],
|
||||
)
|
|
@ -1,232 +0,0 @@
|
|||
|
||||
Ray Streaming
|
||||
=============
|
||||
|
||||
Ray Streaming is a streaming data processing framework built on ray. It will be helpful for you to build jobs dealing with real-time data.
|
||||
|
||||
Key Features
|
||||
------------
|
||||
|
||||
|
||||
#.
|
||||
**Cross Language**. Based on Ray's multi-language actor, Ray Streaming can also run in multiple
|
||||
languages(only Python and Java is supported currently) with high efficiency. You can implement your
|
||||
operator in different languages and run them in one job.
|
||||
|
||||
#.
|
||||
**Single Node Failover**. We designed a special failover mechanism that only needs to rollback the
|
||||
failed node it's own, in most cases, to recover the job. This will be a huge benefit if your job is
|
||||
sensitive about failure recovery time. In other frameworks like Flink, instead, the entire job should
|
||||
be restarted once a node has failure.
|
||||
|
||||
Examples
|
||||
--------
|
||||
|
||||
Python
|
||||
^^^^^^
|
||||
|
||||
.. code-block:: Python
|
||||
|
||||
import ray
|
||||
from ray.streaming import StreamingContext
|
||||
|
||||
ctx = StreamingContext.Builder() \
|
||||
.build()
|
||||
ctx.read_text_file(__file__) \
|
||||
.set_parallelism(1) \
|
||||
.flat_map(lambda x: x.split()) \
|
||||
.map(lambda x: (x, 1)) \
|
||||
.key_by(lambda x: x[0]) \
|
||||
.reduce(lambda old_value, new_value:
|
||||
(old_value[0], old_value[1] + new_value[1])) \
|
||||
.filter(lambda x: "ray" not in x) \
|
||||
.sink(lambda x: print("result", x))
|
||||
ctx.submit("word_count")
|
||||
|
||||
Java
|
||||
^^^^
|
||||
|
||||
.. code-block:: Java
|
||||
|
||||
StreamingContext context = StreamingContext.buildContext();
|
||||
List<String> text = Collections.singletonList("hello world");
|
||||
DataStreamSource.fromCollection(context, text)
|
||||
.flatMap((FlatMapFunction<String, WordAndCount>) (value, collector) -> {
|
||||
String[] records = value.split(" ");
|
||||
for (String record : records) {
|
||||
collector.collect(new WordAndCount(record, 1));
|
||||
}
|
||||
})
|
||||
.filter(pair -> !pair.word.contains("world"))
|
||||
.keyBy(pair -> pair.word)
|
||||
.reduce((oldValue, newValue) ->
|
||||
new WordAndCount(oldValue.word, oldValue.count + newValue.count))
|
||||
.sink(result -> System.out.println("sink result=" + result));
|
||||
context.execute("testWordCount");
|
||||
|
||||
Use Java Operators in Python
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
.. code-block:: Python
|
||||
|
||||
import ray
|
||||
from ray.streaming import StreamingContext
|
||||
|
||||
ctx = StreamingContext.Builder().build()
|
||||
ctx.from_values("a", "b", "c") \
|
||||
.as_java_stream() \
|
||||
.map("io.ray.streaming.runtime.demo.HybridStreamTest$Mapper1") \
|
||||
.filter("io.ray.streaming.runtime.demo.HybridStreamTest$Filter1") \
|
||||
.as_python_stream() \
|
||||
.sink(lambda x: print("result", x))
|
||||
ctx.submit("HybridStreamTest")
|
||||
|
||||
Use Python Operators in Java
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
.. code-block:: Java
|
||||
|
||||
StreamingContext context = StreamingContext.buildContext();
|
||||
DataStreamSource<String> streamSource =
|
||||
DataStreamSource.fromCollection(context, Arrays.asList("a", "b", "c"));
|
||||
streamSource
|
||||
.map(x -> x + x)
|
||||
.asPythonStream()
|
||||
.map("ray.streaming.tests.test_hybrid_stream", "map_func1")
|
||||
.filter("ray.streaming.tests.test_hybrid_stream", "filter_func1")
|
||||
.asJavaStream()
|
||||
.sink(value -> System.out.println("HybridStream sink=" + value));
|
||||
context.execute("HybridStreamTestJob");
|
||||
|
||||
Installation
|
||||
------------
|
||||
|
||||
Python
|
||||
^^^^^^
|
||||
|
||||
Ray Streaming is packaged together with Ray, install Ray with: ``pip install ray``\ ,
|
||||
this wheel contains all dependencies your need to run Python streaming, including Java operators supporting.
|
||||
|
||||
Java
|
||||
^^^^
|
||||
|
||||
Import Ray Streaming using maven:
|
||||
|
||||
.. code-block:: xml
|
||||
|
||||
<dependency>
|
||||
<artifactId>ray-api</artifactId>
|
||||
<groupId>io.ray</groupId>
|
||||
<version>1.0.1</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<artifactId>ray-runtime</artifactId>
|
||||
<groupId>io.ray</groupId>
|
||||
<version>1.0.1</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<artifactId>streaming-api</artifactId>
|
||||
<groupId>io.ray</groupId>
|
||||
<version>1.0.1</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<artifactId>streaming-runtime</artifactId>
|
||||
<groupId>io.ray</groupId>
|
||||
<version>1.0.1</version>
|
||||
</dependency>
|
||||
|
||||
Internal Design
|
||||
---------------
|
||||
|
||||
Overall Architecture
|
||||
^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
|
||||
.. image:: assets/architecture.jpg
|
||||
:target: assets/architecture.jpg
|
||||
:alt: architecture
|
||||
|
||||
|
||||
Ray Streaming is built on Ray. We use Ray's actor to run everything, and use Ray's direct call for communication.
|
||||
|
||||
There are two main types of actor: job master and job worker.
|
||||
|
||||
When you execute ``context.submit()`` in your driver, we'll first create a job master, then job master will create all job workers needed to run your operator. Then job master will be responsible to coordinate all workers, including checkpoint, failover, etc.
|
||||
|
||||
Check `Ray Streaming Proposal <https://docs.google.com/document/d/1EubVMFSFJqNLmbNztnYKj6m0VMzg3a8ZVQZg-mgbLQ0>`_
|
||||
to get more detailed information about the overall design.
|
||||
|
||||
Fault Tolerance Mechanism
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
As mentioned above, different from other frameworks, We designed a special failover mechanism that only needs to rollback the failed node it's own, in most cases, to recover the job. The main idea to achieve this feature is saving messages for each node, and replay them from upstream when node has failure.
|
||||
|
||||
Check `Fault Tolerance Proposal <https://docs.google.com/document/d/1NKjGr7fi-45cEzWA-N_wJ5CoUgaJfnsW9YeWsSg1shY>`_
|
||||
for more detailed information about our fault tolerance mechanism.
|
||||
|
||||
Development Guides
|
||||
------------------
|
||||
|
||||
|
||||
#.
|
||||
Build streaming java
|
||||
|
||||
|
||||
* build ray
|
||||
|
||||
* ``bazel build //java:gen_maven_deps``
|
||||
* ``cd java && mvn clean install -Dmaven.test.skip=true && cd ..``
|
||||
|
||||
* build streaming
|
||||
|
||||
* ``bazel build //streaming/java:gen_maven_deps``
|
||||
* ``mvn clean install -Dmaven.test.skip=true``
|
||||
|
||||
#.
|
||||
Build ray python will build ray streaming python.
|
||||
|
||||
#.
|
||||
Run examples
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
# c++ test
|
||||
cd streaming/ && bazel test ...
|
||||
sh src/test/run_streaming_queue_test.sh
|
||||
cd ..
|
||||
|
||||
# python test
|
||||
pushd python/ray/streaming/
|
||||
pushd examples
|
||||
python simple.py --input-file toy.txt
|
||||
popd
|
||||
pushd tests
|
||||
pytest .
|
||||
popd
|
||||
popd
|
||||
|
||||
# java test
|
||||
cd streaming/java/streaming-runtime
|
||||
mvn test
|
||||
|
||||
|
||||
More Information
|
||||
----------------
|
||||
|
||||
|
||||
* `Ray Streaming implementation plan <https://github.com/ray-project/ray/issues/6184>`_
|
||||
* `Fault Tolerance Proposal <https://docs.google.com/document/d/1NKjGr7fi-45cEzWA-N_wJ5CoUgaJfnsW9YeWsSg1shY>`_
|
||||
* `Data Transfer Proposal <https://docs.google.com/document/d/1cpGr40e9N8knmynqUnnrKhbNnz_6ucn5I2Koq2p4Xp8>`_
|
||||
* `Ray Streaming Proposal <https://docs.google.com/document/d/1EubVMFSFJqNLmbNztnYKj6m0VMzg3a8ZVQZg-mgbLQ0>`_
|
||||
* `Open Source Plan <https://docs.google.com/document/d/1fHFpPgXy853z0m--BZ_L1wmQADf2KXDrLQ1ebFlHLws>`_
|
||||
|
||||
Getting Involved
|
||||
----------------
|
||||
|
||||
- `Community Slack`_: Join our Slack workspace.
|
||||
- `GitHub Discussions`_: For discussions about development, questions about usage, and feature requests.
|
||||
- `GitHub Issues`_: For reporting bugs.
|
||||
|
||||
.. _`GitHub Discussions`: https://github.com/ray-project/ray/discussions
|
||||
.. _`GitHub Issues`: https://github.com/ray-project/ray/issues
|
||||
.. _`Community Slack`: https://forms.gle/9TSdDYUgxYs8SA9e8
|
Binary file not shown.
Before Width: | Height: | Size: 76 KiB |
|
@ -1,268 +0,0 @@
|
|||
load("//bazel:ray.bzl", "define_java_module")
|
||||
load("//bazel:ray.bzl", "native_java_binary")
|
||||
load("//bazel:ray.bzl", "native_java_library")
|
||||
load("@rules_proto_grpc//java:defs.bzl", "java_proto_compile")
|
||||
|
||||
exports_files([
|
||||
"testng.xml",
|
||||
])
|
||||
|
||||
all_modules = [
|
||||
"streaming-state",
|
||||
"streaming-api",
|
||||
"streaming-runtime",
|
||||
]
|
||||
|
||||
java_import(
|
||||
name = "all_modules",
|
||||
jars = [
|
||||
"libio_ray_ray_" + module + ".jar"
|
||||
for module in all_modules
|
||||
] + [
|
||||
"libio_ray_ray_" + module + "-src.jar"
|
||||
for module in all_modules
|
||||
] + [
|
||||
"all_streaming_tests_deploy.jar",
|
||||
"all_streaming_tests_deploy-src.jar",
|
||||
],
|
||||
deps = [
|
||||
":io_ray_ray_" + module
|
||||
for module in all_modules
|
||||
] + [
|
||||
":all_streaming_tests",
|
||||
],
|
||||
)
|
||||
|
||||
define_java_module(
|
||||
name = "streaming-api",
|
||||
define_test_lib = True,
|
||||
test_deps = [
|
||||
"//java:io_ray_ray_api",
|
||||
":io_ray_ray_streaming-state",
|
||||
":io_ray_ray_streaming-api",
|
||||
"@maven//:com_google_guava_guava",
|
||||
"@maven//:org_apache_logging_log4j_log4j_api",
|
||||
"@maven//:org_apache_logging_log4j_log4j_core",
|
||||
"@maven//:org_apache_logging_log4j_log4j_slf4j_impl",
|
||||
"@maven//:org_slf4j_slf4j_api",
|
||||
"@maven//:org_testng_testng",
|
||||
"@maven//:org_yaml_snakeyaml",
|
||||
"@ray_streaming_maven//:org_apache_commons_commons_lang3",
|
||||
],
|
||||
visibility = ["//visibility:public"],
|
||||
deps = [
|
||||
":io_ray_ray_streaming-state",
|
||||
"//java:io_ray_ray_api",
|
||||
"//java:io_ray_ray_runtime",
|
||||
"@maven//:com_google_guava_guava",
|
||||
"@maven//:org_apache_logging_log4j_log4j_api",
|
||||
"@maven//:org_apache_logging_log4j_log4j_core",
|
||||
"@maven//:org_apache_logging_log4j_log4j_slf4j_impl",
|
||||
"@maven//:org_slf4j_slf4j_api",
|
||||
"@ray_streaming_maven//:com_google_code_findbugs_jsr305",
|
||||
"@ray_streaming_maven//:com_google_code_gson_gson",
|
||||
"@ray_streaming_maven//:org_apache_commons_commons_lang3",
|
||||
],
|
||||
)
|
||||
|
||||
define_java_module(
|
||||
name = "streaming-state",
|
||||
define_test_lib = True,
|
||||
test_deps = [
|
||||
":io_ray_ray_streaming-state",
|
||||
"@maven//:com_google_guava_guava",
|
||||
"@maven//:org_apache_logging_log4j_log4j_api",
|
||||
"@maven//:org_apache_logging_log4j_log4j_core",
|
||||
"@maven//:org_apache_logging_log4j_log4j_slf4j_impl",
|
||||
"@maven//:org_slf4j_slf4j_api",
|
||||
"@maven//:org_testng_testng",
|
||||
"@maven//:de_ruedigermoeller_fst",
|
||||
"@maven//:org_yaml_snakeyaml",
|
||||
"@ray_streaming_maven//:org_apache_commons_commons_lang3",
|
||||
"@ray_streaming_maven//:org_mockito_mockito_all",
|
||||
],
|
||||
visibility = ["//visibility:public"],
|
||||
deps = [
|
||||
"@maven//:com_google_guava_guava",
|
||||
"@maven//:de_ruedigermoeller_fst",
|
||||
"@maven//:org_apache_logging_log4j_log4j_api",
|
||||
"@maven//:org_apache_logging_log4j_log4j_core",
|
||||
"@maven//:org_apache_logging_log4j_log4j_slf4j_impl",
|
||||
"@maven//:org_slf4j_slf4j_api",
|
||||
"@ray_streaming_maven//:org_apache_commons_commons_lang3",
|
||||
],
|
||||
)
|
||||
|
||||
native_java_library("streaming-runtime", "streaming_java", "//streaming:libstreaming_java.so")
|
||||
|
||||
filegroup(
|
||||
name = "java_native_deps",
|
||||
srcs = [":streaming_java"],
|
||||
)
|
||||
|
||||
define_java_module(
|
||||
name = "streaming-runtime",
|
||||
additional_resources = [
|
||||
":java_native_deps",
|
||||
],
|
||||
additional_srcs = [
|
||||
":all_java_proto",
|
||||
],
|
||||
define_test_lib = True,
|
||||
exclude_srcs = [
|
||||
"streaming-runtime/src/main/java/io/ray/streaming/runtime/generated/*.java",
|
||||
],
|
||||
test_deps = [
|
||||
"//java:io_ray_ray_api",
|
||||
"//java:io_ray_ray_runtime",
|
||||
":io_ray_ray_streaming-state",
|
||||
":io_ray_ray_streaming-api",
|
||||
":io_ray_ray_streaming-runtime",
|
||||
"@maven//:com_google_guava_guava",
|
||||
"@maven//:de_ruedigermoeller_fst",
|
||||
"@maven//:org_apache_logging_log4j_log4j_api",
|
||||
"@maven//:org_apache_logging_log4j_log4j_core",
|
||||
"@maven//:org_apache_logging_log4j_log4j_slf4j_impl",
|
||||
"@maven//:org_slf4j_slf4j_api",
|
||||
"@maven//:org_testng_testng",
|
||||
"@maven//:org_yaml_snakeyaml",
|
||||
"@ray_streaming_maven//:com_google_code_findbugs_jsr305",
|
||||
"@ray_streaming_maven//:org_aeonbits_owner_owner",
|
||||
"@ray_streaming_maven//:org_apache_commons_commons_lang3",
|
||||
"@ray_streaming_maven//:org_mockito_mockito_all",
|
||||
"@ray_streaming_maven//:org_powermock_powermock_api_mockito",
|
||||
"@ray_streaming_maven//:org_powermock_powermock_module_testng",
|
||||
],
|
||||
visibility = ["//visibility:public"],
|
||||
deps = [
|
||||
":io_ray_ray_streaming-api",
|
||||
":io_ray_ray_streaming-state",
|
||||
"//java:io_ray_ray_api",
|
||||
"//java:io_ray_ray_runtime",
|
||||
"@maven//:com_google_guava_guava",
|
||||
"@maven//:com_google_protobuf_protobuf_java",
|
||||
"@maven//:commons_io_commons_io",
|
||||
"@maven//:de_ruedigermoeller_fst",
|
||||
"@maven//:org_apache_commons_commons_lang3",
|
||||
"@maven//:org_apache_logging_log4j_log4j_api",
|
||||
"@maven//:org_apache_logging_log4j_log4j_core",
|
||||
"@maven//:org_apache_logging_log4j_log4j_slf4j_impl",
|
||||
"@maven//:org_msgpack_msgpack_core",
|
||||
"@maven//:org_slf4j_slf4j_api",
|
||||
"@ray_streaming_maven//:com_github_davidmoten_flatbuffers_java",
|
||||
"@ray_streaming_maven//:com_google_code_findbugs_jsr305",
|
||||
"@ray_streaming_maven//:commons_collections_commons_collections",
|
||||
"@ray_streaming_maven//:org_aeonbits_owner_owner",
|
||||
],
|
||||
)
|
||||
|
||||
java_binary(
|
||||
name = "all_streaming_tests",
|
||||
args = ["streaming/java/testng.xml"],
|
||||
data = ["testng.xml"],
|
||||
main_class = "org.testng.TestNG",
|
||||
runtime_deps = [
|
||||
":io_ray_ray_streaming-api_test",
|
||||
":io_ray_ray_streaming-runtime",
|
||||
":io_ray_ray_streaming-runtime_test",
|
||||
":io_ray_ray_streaming-state",
|
||||
"//java:io_ray_ray_runtime",
|
||||
"@maven//:org_testng_testng",
|
||||
"@maven//:org_yaml_snakeyaml",
|
||||
"@ray_streaming_maven//:org_mockito_mockito_all",
|
||||
"@ray_streaming_maven//:org_powermock_powermock_api_mockito",
|
||||
"@ray_streaming_maven//:org_powermock_powermock_module_testng",
|
||||
],
|
||||
)
|
||||
|
||||
# proto buffer
|
||||
java_proto_compile(
|
||||
name = "streaming_java_proto",
|
||||
deps = ["//streaming:streaming_proto"],
|
||||
)
|
||||
|
||||
java_proto_compile(
|
||||
name = "remote_call_java_proto",
|
||||
deps = ["//streaming:remote_call_proto"],
|
||||
)
|
||||
|
||||
filegroup(
|
||||
name = "all_java_proto",
|
||||
srcs = [
|
||||
":remote_call_java_proto",
|
||||
":streaming_java_proto",
|
||||
],
|
||||
)
|
||||
|
||||
genrule(
|
||||
name = "copy_pom_file",
|
||||
srcs = [
|
||||
"//streaming/java:io_ray_ray_" + module + "_pom"
|
||||
for module in all_modules
|
||||
],
|
||||
outs = ["copy_pom_file.out"],
|
||||
cmd = """
|
||||
WORK_DIR="$$(pwd)"
|
||||
cp -f $(location //streaming/java:io_ray_ray_streaming-api_pom) "$$WORK_DIR/streaming/java/streaming-api/pom.xml"
|
||||
cp -f $(location //streaming/java:io_ray_ray_streaming-runtime_pom) "$$WORK_DIR/streaming/java/streaming-runtime/pom.xml"
|
||||
cp -f $(location //streaming/java:io_ray_ray_streaming-state_pom) "$$WORK_DIR/streaming/java/streaming-state/pom.xml"
|
||||
date > $@
|
||||
""",
|
||||
local = 1,
|
||||
tags = ["no-cache"],
|
||||
)
|
||||
|
||||
genrule(
|
||||
name = "cp_java_generated",
|
||||
srcs = [
|
||||
":all_java_proto",
|
||||
":copy_pom_file",
|
||||
],
|
||||
outs = ["cp_java_generated.out"],
|
||||
cmd = """
|
||||
WORK_DIR="$$(pwd)"
|
||||
GENERATED_DIR="$$WORK_DIR/streaming/java/streaming-runtime/src/main/java/io/ray/streaming/runtime/generated"
|
||||
rm -rf "$$GENERATED_DIR"
|
||||
mkdir -p "$$GENERATED_DIR"
|
||||
# Copy protobuf-generated files.
|
||||
for f in $(locations //streaming/java:all_java_proto); do
|
||||
unzip -q -o "$$f" -x META-INF/MANIFEST.MF -d "$$WORK_DIR/streaming/java/streaming-runtime/src/main/java"
|
||||
done
|
||||
# remove third party protobuf
|
||||
rm -rf $$WORK_DIR/streaming/java/streaming-runtime/src/main/java/com/google/protobuf/
|
||||
date > $@
|
||||
""",
|
||||
local = 1,
|
||||
tags = ["no-cache"],
|
||||
)
|
||||
|
||||
# Generates the dependencies needed by maven.
|
||||
genrule(
|
||||
name = "gen_maven_deps",
|
||||
srcs = [
|
||||
":java_native_deps",
|
||||
":cp_java_generated",
|
||||
],
|
||||
outs = ["gen_maven_deps.out"],
|
||||
cmd = """
|
||||
WORK_DIR="$${PWD}"
|
||||
# Copy native dependencies.
|
||||
OS_NAME=""
|
||||
case "$${OSTYPE}" in
|
||||
linux*) OS_NAME="linux";;
|
||||
darwin*) OS_NAME="darwin";;
|
||||
*) echo "$${OSTYPE} is not supported currently"; exit 1;;
|
||||
esac
|
||||
NATIVE_DEPS_DIR="$$WORK_DIR/streaming/java/streaming-runtime/native_dependencies/native/$$OS_NAME"
|
||||
rm -rf "$$NATIVE_DEPS_DIR"
|
||||
mkdir -p "$$NATIVE_DEPS_DIR"
|
||||
for f in $(locations //streaming/java:java_native_deps); do
|
||||
chmod +w "$$f"
|
||||
cp "$$f" "$$NATIVE_DEPS_DIR"
|
||||
done
|
||||
date > $@
|
||||
""",
|
||||
local = 1,
|
||||
tags = ["no-cache"],
|
||||
visibility = ["//visibility:public"],
|
||||
)
|
|
@ -1,17 +0,0 @@
|
|||
<!DOCTYPE suppressions PUBLIC
|
||||
"-//Puppy Crawl//DTD Suppressions 1.1//EN"
|
||||
"http://www.puppycrawl.com/dtds/suppressions_1_1.dtd">
|
||||
|
||||
<suppressions>
|
||||
<suppress checks="OperatorWrap" files=".*"/>
|
||||
<suppress checks="JavadocParagraph" files=".*"/>
|
||||
<suppress checks="SummaryJavadoc" files=".*"/>
|
||||
<suppress checks="AbbreviationAsWordInNameCheck" files=".*"/>
|
||||
<suppress checks="ClassTypeParameterName" files="OneInputStreamTask.java"/>
|
||||
<suppress checks="ClassTypeParameterName" files="StreamTask.java"/>
|
||||
<!-- suppress check for flatbuffer-generated files. -->
|
||||
<suppress checks=".*" files="io[\\/]ray[\\/]streaming[\\/]runtime[\\/]generated[\\/]"/>
|
||||
|
||||
<!-- suppress indention check for lambdas-->
|
||||
<suppress checks="Indentation" files="FailoverCoordinator.java"/>
|
||||
</suppressions>
|
|
@ -1,22 +0,0 @@
|
|||
load("@rules_jvm_external//:defs.bzl", "maven_install")
|
||||
|
||||
def gen_streaming_java_deps():
|
||||
maven_install(
|
||||
name = "ray_streaming_maven",
|
||||
artifacts = [
|
||||
"com.google.code.findbugs:jsr305:3.0.2",
|
||||
"com.google.code.gson:gson:2.8.5",
|
||||
"com.github.davidmoten:flatbuffers-java:1.9.0.1",
|
||||
"org.apache.commons:commons-lang3:3.4",
|
||||
"org.aeonbits.owner:owner:1.0.10",
|
||||
"org.mockito:mockito-all:1.10.19",
|
||||
"org.apache.commons:commons-lang3:3.3.2",
|
||||
"org.mockito:mockito-all:1.10.19",
|
||||
"org.powermock:powermock-module-testng:1.6.6",
|
||||
"org.powermock:powermock-api-mockito:1.6.6",
|
||||
"commons-collections:commons-collections:3.2.2",
|
||||
],
|
||||
repositories = [
|
||||
"https://repo1.maven.org/maven2/",
|
||||
],
|
||||
)
|
|
@ -1,42 +0,0 @@
|
|||
#!/usr/bin/env bash
|
||||
|
||||
set -e
|
||||
set -x
|
||||
|
||||
cd "$(dirname "$0")"
|
||||
|
||||
bazel build all_streaming_tests_deploy.jar
|
||||
|
||||
function generate_one()
|
||||
{
|
||||
file=${1//./_}.h
|
||||
javah -classpath ../../bazel-bin/streaming/java/all_streaming_tests_deploy.jar "$1"
|
||||
|
||||
# prepend licence first
|
||||
cat <<EOF > ../src/lib/java/"$file"
|
||||
// Copyright 2017 The Ray Authors.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
EOF
|
||||
# then append the generated header file
|
||||
cat "$file" >> ../src/lib/java/"$file"
|
||||
rm -f "$file"
|
||||
}
|
||||
|
||||
generate_one io.ray.streaming.runtime.transfer.channel.ChannelId
|
||||
generate_one io.ray.streaming.runtime.transfer.DataReader
|
||||
generate_one io.ray.streaming.runtime.transfer.DataWriter
|
||||
generate_one io.ray.streaming.runtime.transfer.TransferHandler
|
||||
|
||||
rm -f io_ray_streaming_*.h
|
|
@ -1,273 +0,0 @@
|
|||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
|
||||
|
||||
<project xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
|
||||
xmlns="http://maven.apache.org/POM/4.0.0"
|
||||
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
|
||||
<modelVersion>4.0.0</modelVersion>
|
||||
<groupId>io.ray</groupId>
|
||||
<artifactId>ray-streaming</artifactId>
|
||||
<version>2.0.0-SNAPSHOT</version>
|
||||
<packaging>pom</packaging>
|
||||
<name>Ray Project Streaming Parent POM</name>
|
||||
<description>A streaming framework built on ray</description>
|
||||
<url>https://github.com/ray-project/ray</url>
|
||||
|
||||
<licenses>
|
||||
<license>
|
||||
<name>The Apache License, Version 2.0</name>
|
||||
<url>http://www.apache.org/licenses/LICENSE-2.0.txt</url>
|
||||
</license>
|
||||
</licenses>
|
||||
|
||||
<scm>
|
||||
<url>https://github.com/ray-project/ray</url>
|
||||
<connection>git@github.com:ray-project/ray.git</connection>
|
||||
<developerConnection>scm:git:ssh://github.com:ray-project/ray.git</developerConnection>
|
||||
</scm>
|
||||
|
||||
<developers>
|
||||
<developer>
|
||||
<organizationUrl>https://ray.io</organizationUrl>
|
||||
</developer>
|
||||
</developers>
|
||||
|
||||
<distributionManagement>
|
||||
<snapshotRepository>
|
||||
<id>ossrh</id>
|
||||
<url>https://oss.sonatype.org/content/repositories/snapshots</url>
|
||||
</snapshotRepository>
|
||||
<repository>
|
||||
<id>ossrh</id>
|
||||
<url>https://oss.sonatype.org/service/local/staging/deploy/maven2/</url>
|
||||
</repository>
|
||||
</distributionManagement>
|
||||
|
||||
<repositories>
|
||||
<repository>
|
||||
<id>spring</id>
|
||||
<url>https://repo.spring.io/plugins-release/</url>
|
||||
</repository>
|
||||
<repository>
|
||||
<id>central</id>
|
||||
<url>https://repo1.maven.org/maven2/</url>
|
||||
</repository>
|
||||
</repositories>
|
||||
|
||||
<modules>
|
||||
<module>streaming-api</module>
|
||||
<module>streaming-runtime</module>
|
||||
<module>streaming-state</module>
|
||||
</modules>
|
||||
|
||||
<properties>
|
||||
<java.version>1.8</java.version>
|
||||
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
|
||||
<project.version>2.0.0-SNAPSHOT</project.version>
|
||||
<mockito.version>1.10.19</mockito.version>
|
||||
<fst.version>2.57</fst.version>
|
||||
</properties>
|
||||
<profiles>
|
||||
<profile>
|
||||
<id>release</id>
|
||||
<activation>
|
||||
<property>
|
||||
<name>release</name>
|
||||
<value>true</value>
|
||||
</property>
|
||||
</activation>
|
||||
<properties>
|
||||
<output.directory>${basedir}</output.directory>
|
||||
</properties>
|
||||
<build>
|
||||
<plugins>
|
||||
<plugin>
|
||||
<groupId>org.apache.maven.plugins</groupId>
|
||||
<artifactId>maven-gpg-plugin</artifactId>
|
||||
</plugin>
|
||||
<plugin>
|
||||
<groupId>org.apache.maven.plugins</groupId>
|
||||
<artifactId>maven-javadoc-plugin</artifactId>
|
||||
</plugin>
|
||||
</plugins>
|
||||
</build>
|
||||
</profile>
|
||||
</profiles>
|
||||
|
||||
<dependencyManagement>
|
||||
<dependencies>
|
||||
<dependency>
|
||||
<groupId>org.testng</groupId>
|
||||
<artifactId>testng</artifactId>
|
||||
<version>7.3.0</version>
|
||||
<exclusions>
|
||||
<exclusion>
|
||||
<groupId>org.yaml</groupId>
|
||||
<artifactId>snakeyaml</artifactId>
|
||||
</exclusion>
|
||||
</exclusions>
|
||||
</dependency>
|
||||
</dependencies>
|
||||
</dependencyManagement>
|
||||
|
||||
<build>
|
||||
<pluginManagement>
|
||||
<plugins>
|
||||
<plugin>
|
||||
<groupId>org.apache.maven.plugins</groupId>
|
||||
<artifactId>maven-compiler-plugin</artifactId>
|
||||
<version>3.6.1</version>
|
||||
<configuration>
|
||||
<source>${java.version}</source>
|
||||
<target>${java.version}</target>
|
||||
<encoding>${project.build.sourceEncoding}</encoding>
|
||||
<compilerArgument>-parameters</compilerArgument>
|
||||
<testCompilerArgument>-parameters</testCompilerArgument>
|
||||
</configuration>
|
||||
</plugin>
|
||||
|
||||
<plugin>
|
||||
<groupId>org.apache.maven.plugins</groupId>
|
||||
<artifactId>maven-source-plugin</artifactId>
|
||||
<version>3.0.1</version>
|
||||
<executions>
|
||||
<execution>
|
||||
<id>attach-sources</id>
|
||||
<goals>
|
||||
<goal>jar</goal>
|
||||
</goals>
|
||||
</execution>
|
||||
</executions>
|
||||
</plugin>
|
||||
<plugin>
|
||||
<groupId>org.apache.maven.plugins</groupId>
|
||||
<artifactId>maven-javadoc-plugin</artifactId>
|
||||
<version>3.1.0</version>
|
||||
<executions>
|
||||
<execution>
|
||||
<id>attach-javadocs</id>
|
||||
<goals>
|
||||
<goal>jar</goal>
|
||||
</goals>
|
||||
</execution>
|
||||
</executions>
|
||||
<configuration>
|
||||
<doclint>none</doclint>
|
||||
</configuration>
|
||||
</plugin>
|
||||
<plugin>
|
||||
<groupId>org.apache.maven.plugins</groupId>
|
||||
<artifactId>maven-dependency-plugin</artifactId>
|
||||
<version>2.10</version>
|
||||
</plugin>
|
||||
<plugin>
|
||||
<groupId>org.apache.maven.plugins</groupId>
|
||||
<artifactId>maven-clean-plugin</artifactId>
|
||||
<version>3.0.0</version>
|
||||
</plugin>
|
||||
|
||||
<plugin>
|
||||
<groupId>org.apache.maven.plugins</groupId>
|
||||
<artifactId>maven-assembly-plugin</artifactId>
|
||||
<version>2.2</version>
|
||||
</plugin>
|
||||
<plugin>
|
||||
<artifactId>maven-deploy-plugin</artifactId>
|
||||
<version>2.8.2</version>
|
||||
<executions>
|
||||
<execution>
|
||||
<id>deploy</id>
|
||||
<phase>deploy</phase>
|
||||
<goals>
|
||||
<goal>deploy</goal>
|
||||
</goals>
|
||||
</execution>
|
||||
</executions>
|
||||
</plugin>
|
||||
<plugin>
|
||||
<groupId>org.apache.maven.plugins</groupId>
|
||||
<artifactId>maven-checkstyle-plugin</artifactId>
|
||||
<version>3.1.0</version>
|
||||
<dependencies>
|
||||
<dependency>
|
||||
<groupId>com.puppycrawl.tools</groupId>
|
||||
<artifactId>checkstyle</artifactId>
|
||||
<version>8.19</version>
|
||||
</dependency>
|
||||
</dependencies>
|
||||
<executions>
|
||||
<execution>
|
||||
<id>validate</id>
|
||||
<phase>validate</phase>
|
||||
<goals>
|
||||
<goal>check</goal>
|
||||
</goals>
|
||||
</execution>
|
||||
</executions>
|
||||
<configuration>
|
||||
<configLocation>../../java/checkstyle.xml</configLocation>
|
||||
<suppressionsLocation>checkstyle-suppressions.xml</suppressionsLocation>
|
||||
<encoding>UTF-8</encoding>
|
||||
<consoleOutput>true</consoleOutput>
|
||||
<failsOnError>true</failsOnError>
|
||||
<failOnViolation>true</failOnViolation>
|
||||
<violationSeverity>warning</violationSeverity>
|
||||
<outputFile>${project.build.directory}/checkstyle-errors.xml</outputFile>
|
||||
<linkXRef>false</linkXRef>
|
||||
</configuration>
|
||||
</plugin>
|
||||
<plugin>
|
||||
<groupId>com.diffplug.spotless</groupId>
|
||||
<artifactId>spotless-maven-plugin</artifactId>
|
||||
<version>2.5.0</version>
|
||||
<configuration>
|
||||
<java>
|
||||
<excludes>
|
||||
<exclude>**/runtime/generated/**/*.*</exclude>
|
||||
</excludes>
|
||||
<googleJavaFormat>
|
||||
<version>1.7</version>
|
||||
<style>GOOGLE</style>
|
||||
</googleJavaFormat>
|
||||
</java>
|
||||
</configuration>
|
||||
</plugin>
|
||||
<plugin>
|
||||
<groupId>org.apache.maven.plugins</groupId>
|
||||
<artifactId>maven-gpg-plugin</artifactId>
|
||||
<version>1.6</version>
|
||||
<executions>
|
||||
<execution>
|
||||
<id>sign-artifacts</id>
|
||||
<goals>
|
||||
<goal>sign</goal>
|
||||
</goals>
|
||||
<configuration>
|
||||
<!-- Prevent `gpg` from using pinentry programs -->
|
||||
<gpgArguments>
|
||||
<arg>--pinentry-mode</arg>
|
||||
<arg>loopback</arg>
|
||||
</gpgArguments>
|
||||
</configuration>
|
||||
</execution>
|
||||
</executions>
|
||||
</plugin>
|
||||
</plugins>
|
||||
</pluginManagement>
|
||||
<plugins>
|
||||
<plugin>
|
||||
<groupId>org.apache.maven.plugins</groupId>
|
||||
<artifactId>maven-checkstyle-plugin</artifactId>
|
||||
</plugin>
|
||||
<plugin>
|
||||
<groupId>org.apache.maven.plugins</groupId>
|
||||
<artifactId>maven-source-plugin</artifactId>
|
||||
</plugin>
|
||||
<plugin>
|
||||
<groupId>com.diffplug.spotless</groupId>
|
||||
<artifactId>spotless-maven-plugin</artifactId>
|
||||
</plugin>
|
||||
</plugins>
|
||||
</build>
|
||||
|
||||
</project>
|
86
streaming/java/streaming-api/pom.xml
Executable file
86
streaming/java/streaming-api/pom.xml
Executable file
|
@ -0,0 +1,86 @@
|
|||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<!-- This file is auto-generated by Bazel from pom_template.xml, do not modify it. -->
|
||||
<project xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
|
||||
xmlns="http://maven.apache.org/POM/4.0.0"
|
||||
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
|
||||
<parent>
|
||||
<artifactId>ray-streaming</artifactId>
|
||||
<groupId>io.ray</groupId>
|
||||
<version>2.0.0-SNAPSHOT</version>
|
||||
</parent>
|
||||
<modelVersion>4.0.0</modelVersion>
|
||||
|
||||
<artifactId>streaming-api</artifactId>
|
||||
<name>ray streaming api</name>
|
||||
<description>ray streaming api</description>
|
||||
|
||||
<packaging>jar</packaging>
|
||||
|
||||
<dependencies>
|
||||
<dependency>
|
||||
<groupId>io.ray</groupId>
|
||||
<artifactId>ray-api</artifactId>
|
||||
<version>${project.version}</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>io.ray</groupId>
|
||||
<artifactId>ray-runtime</artifactId>
|
||||
<version>${project.version}</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>io.ray</groupId>
|
||||
<artifactId>streaming-state</artifactId>
|
||||
<version>${project.version}</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>com.google.code.findbugs</groupId>
|
||||
<artifactId>jsr305</artifactId>
|
||||
<version>3.0.2</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>com.google.code.gson</groupId>
|
||||
<artifactId>gson</artifactId>
|
||||
<version>2.8.5</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>com.google.guava</groupId>
|
||||
<artifactId>guava</artifactId>
|
||||
<version>30.1-android</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.commons</groupId>
|
||||
<artifactId>commons-lang3</artifactId>
|
||||
<version>3.4</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.logging.log4j</groupId>
|
||||
<artifactId>log4j-api</artifactId>
|
||||
<version>2.17.1</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.logging.log4j</groupId>
|
||||
<artifactId>log4j-core</artifactId>
|
||||
<version>2.17.1</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.logging.log4j</groupId>
|
||||
<artifactId>log4j-slf4j-impl</artifactId>
|
||||
<version>2.17.1</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.slf4j</groupId>
|
||||
<artifactId>slf4j-api</artifactId>
|
||||
<version>1.7.25</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.testng</groupId>
|
||||
<artifactId>testng</artifactId>
|
||||
<version>7.3.0</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.yaml</groupId>
|
||||
<artifactId>snakeyaml</artifactId>
|
||||
<version>1.26</version>
|
||||
</dependency>
|
||||
</dependencies>
|
||||
</project>
|
|
@ -1,37 +0,0 @@
|
|||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
{auto_gen_header}
|
||||
<project xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
|
||||
xmlns="http://maven.apache.org/POM/4.0.0"
|
||||
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
|
||||
<parent>
|
||||
<artifactId>ray-streaming</artifactId>
|
||||
<groupId>io.ray</groupId>
|
||||
<version>2.0.0-SNAPSHOT</version>
|
||||
</parent>
|
||||
<modelVersion>4.0.0</modelVersion>
|
||||
|
||||
<artifactId>streaming-api</artifactId>
|
||||
<name>ray streaming api</name>
|
||||
<description>ray streaming api</description>
|
||||
|
||||
<packaging>jar</packaging>
|
||||
|
||||
<dependencies>
|
||||
<dependency>
|
||||
<groupId>io.ray</groupId>
|
||||
<artifactId>ray-api</artifactId>
|
||||
<version>${project.version}</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>io.ray</groupId>
|
||||
<artifactId>ray-runtime</artifactId>
|
||||
<version>${project.version}</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>io.ray</groupId>
|
||||
<artifactId>streaming-state</artifactId>
|
||||
<version>${project.version}</version>
|
||||
</dependency>
|
||||
{generated_bzl_deps}
|
||||
</dependencies>
|
||||
</project>
|
|
@ -1,6 +0,0 @@
|
|||
package io.ray.streaming.api;
|
||||
|
||||
public enum Language {
|
||||
JAVA,
|
||||
PYTHON
|
||||
}
|
|
@ -1,25 +0,0 @@
|
|||
package io.ray.streaming.api.collector;
|
||||
|
||||
import io.ray.streaming.message.Record;
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* Combination of multiple collectors.
|
||||
*
|
||||
* @param <T> The type of output data.
|
||||
*/
|
||||
public class CollectionCollector<T> implements Collector<T> {
|
||||
|
||||
private List<Collector> collectorList;
|
||||
|
||||
public CollectionCollector(List<Collector> collectorList) {
|
||||
this.collectorList = collectorList;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void collect(T value) {
|
||||
for (Collector collector : collectorList) {
|
||||
collector.collect(new Record(value));
|
||||
}
|
||||
}
|
||||
}
|
|
@ -1,12 +0,0 @@
|
|||
package io.ray.streaming.api.collector;
|
||||
|
||||
/**
|
||||
* The collector that collects data from an upstream operator, and emits data to downstream
|
||||
* operators.
|
||||
*
|
||||
* @param <T> Type of the data to collect.
|
||||
*/
|
||||
public interface Collector<T> {
|
||||
|
||||
void collect(T value);
|
||||
}
|
|
@ -1,28 +0,0 @@
|
|||
package io.ray.streaming.api.context;
|
||||
|
||||
import com.google.common.base.Preconditions;
|
||||
import io.ray.api.Ray;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
class ClusterStarter {
|
||||
|
||||
private static final Logger LOG = LoggerFactory.getLogger(ClusterStarter.class);
|
||||
|
||||
static synchronized void startCluster(boolean isLocal) {
|
||||
Preconditions.checkArgument(!Ray.isInitialized());
|
||||
if (!isLocal) {
|
||||
System.setProperty("ray.run-mode", "CLUSTER");
|
||||
} else {
|
||||
System.setProperty("ray.run-mode", "SINGLE_PROCESS");
|
||||
}
|
||||
|
||||
Ray.init();
|
||||
}
|
||||
|
||||
public static synchronized void stopCluster() {
|
||||
// Disconnect to the cluster.
|
||||
Ray.shutdown();
|
||||
System.clearProperty("ray.run-mode");
|
||||
}
|
||||
}
|
|
@ -1,42 +0,0 @@
|
|||
package io.ray.streaming.api.context;
|
||||
|
||||
import io.ray.streaming.state.backend.KeyStateBackend;
|
||||
import io.ray.streaming.state.keystate.desc.ListStateDescriptor;
|
||||
import io.ray.streaming.state.keystate.desc.MapStateDescriptor;
|
||||
import io.ray.streaming.state.keystate.desc.ValueStateDescriptor;
|
||||
import io.ray.streaming.state.keystate.state.ListState;
|
||||
import io.ray.streaming.state.keystate.state.MapState;
|
||||
import io.ray.streaming.state.keystate.state.ValueState;
|
||||
import java.util.Map;
|
||||
|
||||
/** Encapsulate the runtime information of a streaming task. */
|
||||
public interface RuntimeContext {
|
||||
|
||||
int getTaskId();
|
||||
|
||||
int getTaskIndex();
|
||||
|
||||
int getParallelism();
|
||||
|
||||
/** Returns config of current function */
|
||||
Map<String, String> getConfig();
|
||||
|
||||
/** Returns config of the job */
|
||||
Map<String, String> getJobConfig();
|
||||
|
||||
Long getCheckpointId();
|
||||
|
||||
void setCheckpointId(long checkpointId);
|
||||
|
||||
void setCurrentKey(Object key);
|
||||
|
||||
KeyStateBackend getKeyStateBackend();
|
||||
|
||||
void setKeyStateBackend(KeyStateBackend keyStateBackend);
|
||||
|
||||
<T> ValueState<T> getValueState(ValueStateDescriptor<T> stateDescriptor);
|
||||
|
||||
<T> ListState<T> getListState(ListStateDescriptor<T> stateDescriptor);
|
||||
|
||||
<S, T> MapState<S, T> getMapState(MapStateDescriptor<S, T> stateDescriptor);
|
||||
}
|
|
@ -1,98 +0,0 @@
|
|||
package io.ray.streaming.api.context;
|
||||
|
||||
import com.google.common.base.Preconditions;
|
||||
import io.ray.api.Ray;
|
||||
import io.ray.streaming.api.stream.StreamSink;
|
||||
import io.ray.streaming.client.JobClient;
|
||||
import io.ray.streaming.jobgraph.JobGraph;
|
||||
import io.ray.streaming.jobgraph.JobGraphBuilder;
|
||||
import io.ray.streaming.jobgraph.JobGraphOptimizer;
|
||||
import io.ray.streaming.util.Config;
|
||||
import java.io.Serializable;
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashMap;
|
||||
import java.util.Iterator;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.ServiceLoader;
|
||||
import java.util.concurrent.atomic.AtomicInteger;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
/** Encapsulate the context information of a streaming Job. */
|
||||
public class StreamingContext implements Serializable {
|
||||
|
||||
private static final Logger LOG = LoggerFactory.getLogger(StreamingContext.class);
|
||||
|
||||
private transient AtomicInteger idGenerator;
|
||||
|
||||
/** The sinks of this streaming job. */
|
||||
private List<StreamSink> streamSinks;
|
||||
|
||||
/** The user custom streaming job configuration. */
|
||||
private Map<String, String> jobConfig;
|
||||
|
||||
/** The logic plan. */
|
||||
private JobGraph jobGraph;
|
||||
|
||||
private StreamingContext() {
|
||||
this.idGenerator = new AtomicInteger(0);
|
||||
this.streamSinks = new ArrayList<>();
|
||||
this.jobConfig = new HashMap<>();
|
||||
}
|
||||
|
||||
public static StreamingContext buildContext() {
|
||||
return new StreamingContext();
|
||||
}
|
||||
|
||||
/** Construct job DAG, and execute the job. */
|
||||
public void execute(String jobName) {
|
||||
JobGraphBuilder jobGraphBuilder = new JobGraphBuilder(this.streamSinks, jobName);
|
||||
JobGraph originalJobGraph = jobGraphBuilder.build();
|
||||
this.jobGraph = new JobGraphOptimizer(originalJobGraph).optimize();
|
||||
jobGraph.printJobGraph();
|
||||
LOG.info("JobGraph digraph\n{}", jobGraph.generateDigraph());
|
||||
|
||||
if (!Ray.isInitialized()) {
|
||||
if (Config.MEMORY_CHANNEL.equalsIgnoreCase(jobConfig.get(Config.CHANNEL_TYPE))) {
|
||||
ClusterStarter.startCluster(true);
|
||||
LOG.info("Created local cluster for job {}.", jobName);
|
||||
} else {
|
||||
ClusterStarter.startCluster(false);
|
||||
LOG.info("Created multi process cluster for job {}.", jobName);
|
||||
}
|
||||
Runtime.getRuntime().addShutdownHook(new Thread(StreamingContext.this::stop));
|
||||
} else {
|
||||
LOG.info("Reuse existing cluster.");
|
||||
}
|
||||
|
||||
ServiceLoader<JobClient> serviceLoader = ServiceLoader.load(JobClient.class);
|
||||
Iterator<JobClient> iterator = serviceLoader.iterator();
|
||||
Preconditions.checkArgument(
|
||||
iterator.hasNext(), "No JobClient implementation has been provided.");
|
||||
JobClient jobClient = iterator.next();
|
||||
jobClient.submit(jobGraph, jobConfig);
|
||||
}
|
||||
|
||||
public int generateId() {
|
||||
return this.idGenerator.incrementAndGet();
|
||||
}
|
||||
|
||||
public void addSink(StreamSink streamSink) {
|
||||
streamSinks.add(streamSink);
|
||||
}
|
||||
|
||||
public List<StreamSink> getStreamSinks() {
|
||||
return streamSinks;
|
||||
}
|
||||
|
||||
public void withConfig(Map<String, String> jobConfig) {
|
||||
this.jobConfig = jobConfig;
|
||||
}
|
||||
|
||||
public void stop() {
|
||||
if (Ray.isInitialized()) {
|
||||
ClusterStarter.stopCluster();
|
||||
}
|
||||
}
|
||||
}
|
|
@ -1,28 +0,0 @@
|
|||
package io.ray.streaming.api.function;
|
||||
|
||||
import java.io.Serializable;
|
||||
|
||||
/** Interface of streaming functions. */
|
||||
public interface Function extends Serializable {
|
||||
|
||||
/**
|
||||
* This method will be called periodically by framework, you should return a a serializable object
|
||||
* which represents function state, framework will help you to serialize this object, save it to
|
||||
* storage, and load it back when in fail-over through. {@link
|
||||
* Function#loadCheckpoint(Serializable)}.
|
||||
*
|
||||
* @return A serializable object which represents function state.
|
||||
*/
|
||||
default Serializable saveCheckpoint() {
|
||||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* This method will be called by framework when a worker died and been restarted. We will pass the
|
||||
* last object you returned in {@link Function#saveCheckpoint()} when doing checkpoint, you are
|
||||
* responsible to load this object back to you function.
|
||||
*
|
||||
* @param checkpointObject the last object you returned in {@link Function#saveCheckpoint()}
|
||||
*/
|
||||
default void loadCheckpoint(Serializable checkpointObject) {}
|
||||
}
|
|
@ -1,23 +0,0 @@
|
|||
package io.ray.streaming.api.function;
|
||||
|
||||
import io.ray.streaming.api.context.RuntimeContext;
|
||||
|
||||
/**
|
||||
* An interface for all user-defined functions to define the life cycle methods of the functions,
|
||||
* and access the task context where the functions get executed.
|
||||
*/
|
||||
public interface RichFunction extends Function {
|
||||
|
||||
/**
|
||||
* Initialization method for user function which called before the first call to the user
|
||||
* function.
|
||||
*
|
||||
* @param runtimeContext runtime context
|
||||
*/
|
||||
void open(RuntimeContext runtimeContext);
|
||||
|
||||
/**
|
||||
* Tear-down method for the user function which called after the last call to the user function.
|
||||
*/
|
||||
void close();
|
||||
}
|
|
@ -1,23 +0,0 @@
|
|||
package io.ray.streaming.api.function.impl;
|
||||
|
||||
import io.ray.streaming.api.function.Function;
|
||||
|
||||
/**
|
||||
* Interface of aggregate functions.
|
||||
*
|
||||
* @param <I> Type of the input data.
|
||||
* @param <A> Type of the intermediate data.
|
||||
* @param <O> Type of the output data.
|
||||
*/
|
||||
public interface AggregateFunction<I, A, O> extends Function {
|
||||
|
||||
A createAccumulator();
|
||||
|
||||
void add(I value, A accumulator);
|
||||
|
||||
O getResult(A accumulator);
|
||||
|
||||
A merge(A a, A b);
|
||||
|
||||
void retract(A acc, I value);
|
||||
}
|
|
@ -1,21 +0,0 @@
|
|||
package io.ray.streaming.api.function.impl;
|
||||
|
||||
import io.ray.streaming.api.function.Function;
|
||||
|
||||
/**
|
||||
* A filter function is a predicate applied individually to each record. The predicate decides
|
||||
* whether to keep the element, or to discard it.
|
||||
*
|
||||
* @param <T> type of the input data.
|
||||
*/
|
||||
@FunctionalInterface
|
||||
public interface FilterFunction<T> extends Function {
|
||||
|
||||
/**
|
||||
* The filter function that evaluates the predicate.
|
||||
*
|
||||
* @param value The value to be filtered.
|
||||
* @return True for values that should be retained, false for values to be filtered out.
|
||||
*/
|
||||
boolean filter(T value) throws Exception;
|
||||
}
|
|
@ -1,16 +0,0 @@
|
|||
package io.ray.streaming.api.function.impl;
|
||||
|
||||
import io.ray.streaming.api.collector.Collector;
|
||||
import io.ray.streaming.api.function.Function;
|
||||
|
||||
/**
|
||||
* Interface of flat-map functions.
|
||||
*
|
||||
* @param <T> Type of the input data.
|
||||
* @param <R> Type of the output data.
|
||||
*/
|
||||
@FunctionalInterface
|
||||
public interface FlatMapFunction<T, R> extends Function {
|
||||
|
||||
void flatMap(T value, Collector<R> collector);
|
||||
}
|
|
@ -1,16 +0,0 @@
|
|||
package io.ray.streaming.api.function.impl;
|
||||
|
||||
import io.ray.streaming.api.function.Function;
|
||||
|
||||
/**
|
||||
* Interface of join functions.
|
||||
*
|
||||
* @param <T> Type of the left input data.
|
||||
* @param <O> Type of the right input data.
|
||||
* @param <R> Type of the output data.
|
||||
*/
|
||||
@FunctionalInterface
|
||||
public interface JoinFunction<T, O, R> extends Function {
|
||||
|
||||
R join(T left, O right);
|
||||
}
|
|
@ -1,15 +0,0 @@
|
|||
package io.ray.streaming.api.function.impl;
|
||||
|
||||
import io.ray.streaming.api.function.Function;
|
||||
|
||||
/**
|
||||
* Interface of key-by functions.
|
||||
*
|
||||
* @param <T> Type of the input data.
|
||||
* @param <K> Type of the key-by field.
|
||||
*/
|
||||
@FunctionalInterface
|
||||
public interface KeyFunction<T, K> extends Function {
|
||||
|
||||
K keyBy(T value);
|
||||
}
|
|
@ -1,15 +0,0 @@
|
|||
package io.ray.streaming.api.function.impl;
|
||||
|
||||
import io.ray.streaming.api.function.Function;
|
||||
|
||||
/**
|
||||
* Interface of map functions.
|
||||
*
|
||||
* @param <T> type of the input data.
|
||||
* @param <R> type of the output data.
|
||||
*/
|
||||
@FunctionalInterface
|
||||
public interface MapFunction<T, R> extends Function {
|
||||
|
||||
R map(T value);
|
||||
}
|
|
@ -1,14 +0,0 @@
|
|||
package io.ray.streaming.api.function.impl;
|
||||
|
||||
import io.ray.streaming.api.function.Function;
|
||||
|
||||
/**
|
||||
* Interface of process functions.
|
||||
*
|
||||
* @param <T> Type of the input data.
|
||||
*/
|
||||
@FunctionalInterface
|
||||
public interface ProcessFunction<T> extends Function {
|
||||
|
||||
void process(T value);
|
||||
}
|
|
@ -1,14 +0,0 @@
|
|||
package io.ray.streaming.api.function.impl;
|
||||
|
||||
import io.ray.streaming.api.function.Function;
|
||||
|
||||
/**
|
||||
* Interface of reduce functions.
|
||||
*
|
||||
* @param <T> Type of the input data.
|
||||
*/
|
||||
@FunctionalInterface
|
||||
public interface ReduceFunction<T> extends Function {
|
||||
|
||||
T reduce(T oldValue, T newValue);
|
||||
}
|
|
@ -1,14 +0,0 @@
|
|||
package io.ray.streaming.api.function.impl;
|
||||
|
||||
import io.ray.streaming.api.function.Function;
|
||||
|
||||
/**
|
||||
* Interface of sink functions.
|
||||
*
|
||||
* @param <T> Type of the sink data.
|
||||
*/
|
||||
@FunctionalInterface
|
||||
public interface SinkFunction<T> extends Function {
|
||||
|
||||
void sink(T value);
|
||||
}
|
|
@ -1,22 +0,0 @@
|
|||
package io.ray.streaming.api.function.impl;
|
||||
|
||||
import io.ray.streaming.api.function.Function;
|
||||
|
||||
/**
|
||||
* Interface of Source functions.
|
||||
*
|
||||
* @param <T> Type of the data output by the source.
|
||||
*/
|
||||
public interface SourceFunction<T> extends Function {
|
||||
|
||||
void init(int parallelism, int index);
|
||||
|
||||
void fetch(SourceContext<T> ctx) throws Exception;
|
||||
|
||||
void close();
|
||||
|
||||
interface SourceContext<T> {
|
||||
|
||||
void collect(T element) throws Exception;
|
||||
}
|
||||
}
|
|
@ -1,36 +0,0 @@
|
|||
package io.ray.streaming.api.function.internal;
|
||||
|
||||
import io.ray.streaming.api.function.impl.SourceFunction;
|
||||
import java.util.Collection;
|
||||
|
||||
/**
|
||||
* The SourceFunction that fetch data from a Java Collection object.
|
||||
*
|
||||
* @param <T> Type of the data output by the source.
|
||||
*/
|
||||
public class CollectionSourceFunction<T> implements SourceFunction<T> {
|
||||
|
||||
private Collection<T> values;
|
||||
private boolean finished = false;
|
||||
|
||||
public CollectionSourceFunction(Collection<T> values) {
|
||||
this.values = values;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void init(int totalParallel, int currentIndex) {}
|
||||
|
||||
@Override
|
||||
public void fetch(SourceContext<T> ctx) throws Exception {
|
||||
if (finished) {
|
||||
return;
|
||||
}
|
||||
for (T value : values) {
|
||||
ctx.collect(value);
|
||||
}
|
||||
finished = true;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void close() {}
|
||||
}
|
|
@ -1,40 +0,0 @@
|
|||
package io.ray.streaming.api.function.internal;
|
||||
|
||||
import io.ray.streaming.api.context.RuntimeContext;
|
||||
import io.ray.streaming.api.function.Function;
|
||||
import io.ray.streaming.api.function.RichFunction;
|
||||
|
||||
/** A util class for {@link Function} */
|
||||
public class Functions {
|
||||
|
||||
private static class DefaultRichFunction implements RichFunction {
|
||||
|
||||
private final Function function;
|
||||
|
||||
private DefaultRichFunction(Function function) {
|
||||
this.function = function;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void open(RuntimeContext runtimeContext) {}
|
||||
|
||||
@Override
|
||||
public void close() {}
|
||||
|
||||
public Function getFunction() {
|
||||
return function;
|
||||
}
|
||||
}
|
||||
|
||||
public static RichFunction wrap(Function function) {
|
||||
if (function instanceof RichFunction) {
|
||||
return (RichFunction) function;
|
||||
} else {
|
||||
return new DefaultRichFunction(function);
|
||||
}
|
||||
}
|
||||
|
||||
public static RichFunction emptyFunction() {
|
||||
return new DefaultRichFunction(null);
|
||||
}
|
||||
}
|
|
@ -1,22 +0,0 @@
|
|||
package io.ray.streaming.api.partition;
|
||||
|
||||
import io.ray.streaming.api.function.Function;
|
||||
|
||||
/**
|
||||
* Interface of the partitioning strategy.
|
||||
*
|
||||
* @param <T> Type of the input data.
|
||||
*/
|
||||
@FunctionalInterface
|
||||
public interface Partition<T> extends Function {
|
||||
|
||||
/**
|
||||
* Given a record and downstream partitions, determine which partition(s) should receive the
|
||||
* record.
|
||||
*
|
||||
* @param record The record.
|
||||
* @param numPartition num of partitions
|
||||
* @return IDs of the downstream partitions that should receive the record.
|
||||
*/
|
||||
int[] partition(T record, int numPartition);
|
||||
}
|
|
@ -1,20 +0,0 @@
|
|||
package io.ray.streaming.api.partition.impl;
|
||||
|
||||
import io.ray.streaming.api.partition.Partition;
|
||||
import java.util.stream.IntStream;
|
||||
|
||||
/** Broadcast the record to all downstream partitions. */
|
||||
public class BroadcastPartition<T> implements Partition<T> {
|
||||
|
||||
private int[] partitions = new int[0];
|
||||
|
||||
public BroadcastPartition() {}
|
||||
|
||||
@Override
|
||||
public int[] partition(T value, int numPartition) {
|
||||
if (partitions.length != numPartition) {
|
||||
partitions = IntStream.rangeClosed(0, numPartition - 1).toArray();
|
||||
}
|
||||
return partitions;
|
||||
}
|
||||
}
|
|
@ -1,20 +0,0 @@
|
|||
package io.ray.streaming.api.partition.impl;
|
||||
|
||||
import io.ray.streaming.api.partition.Partition;
|
||||
|
||||
/**
|
||||
* Default partition for operator if the operator can be chained with succeeding operators.
|
||||
* Partition will be set to {@link RoundRobinPartition} if the operator can't be chiained with
|
||||
* succeeding operators.
|
||||
*
|
||||
* @param <T> Type of the input record.
|
||||
*/
|
||||
public class ForwardPartition<T> implements Partition<T> {
|
||||
|
||||
private int[] partitions = new int[] {0};
|
||||
|
||||
@Override
|
||||
public int[] partition(T record, int numPartition) {
|
||||
return partitions;
|
||||
}
|
||||
}
|
|
@ -1,21 +0,0 @@
|
|||
package io.ray.streaming.api.partition.impl;
|
||||
|
||||
import io.ray.streaming.api.partition.Partition;
|
||||
import io.ray.streaming.message.KeyRecord;
|
||||
|
||||
/**
|
||||
* Partition the record by the key.
|
||||
*
|
||||
* @param <K> Type of the partition key.
|
||||
* @param <T> Type of the input record.
|
||||
*/
|
||||
public class KeyPartition<K, T> implements Partition<KeyRecord<K, T>> {
|
||||
|
||||
private int[] partitions = new int[1];
|
||||
|
||||
@Override
|
||||
public int[] partition(KeyRecord<K, T> keyRecord, int numPartition) {
|
||||
partitions[0] = Math.abs(keyRecord.getKey().hashCode() % numPartition);
|
||||
return partitions;
|
||||
}
|
||||
}
|
|
@ -1,25 +0,0 @@
|
|||
package io.ray.streaming.api.partition.impl;
|
||||
|
||||
import io.ray.streaming.api.partition.Partition;
|
||||
|
||||
/**
|
||||
* Partition record to downstream tasks in a round-robin matter.
|
||||
*
|
||||
* @param <T> Type of the input record.
|
||||
*/
|
||||
public class RoundRobinPartition<T> implements Partition<T> {
|
||||
|
||||
private int seq;
|
||||
private int[] partitions = new int[1];
|
||||
|
||||
public RoundRobinPartition() {
|
||||
this.seq = 0;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int[] partition(T value, int numPartition) {
|
||||
seq = (seq + 1) % numPartition;
|
||||
partitions[0] = seq;
|
||||
return partitions;
|
||||
}
|
||||
}
|
|
@ -1,202 +0,0 @@
|
|||
package io.ray.streaming.api.stream;
|
||||
|
||||
import io.ray.streaming.api.Language;
|
||||
import io.ray.streaming.api.context.StreamingContext;
|
||||
import io.ray.streaming.api.function.impl.FilterFunction;
|
||||
import io.ray.streaming.api.function.impl.FlatMapFunction;
|
||||
import io.ray.streaming.api.function.impl.KeyFunction;
|
||||
import io.ray.streaming.api.function.impl.MapFunction;
|
||||
import io.ray.streaming.api.function.impl.SinkFunction;
|
||||
import io.ray.streaming.api.partition.Partition;
|
||||
import io.ray.streaming.api.partition.impl.BroadcastPartition;
|
||||
import io.ray.streaming.operator.StreamOperator;
|
||||
import io.ray.streaming.operator.impl.FilterOperator;
|
||||
import io.ray.streaming.operator.impl.FlatMapOperator;
|
||||
import io.ray.streaming.operator.impl.KeyByOperator;
|
||||
import io.ray.streaming.operator.impl.MapOperator;
|
||||
import io.ray.streaming.operator.impl.SinkOperator;
|
||||
import io.ray.streaming.python.stream.PythonDataStream;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* Represents a stream of data.
|
||||
*
|
||||
* <p>This class defines all the streaming operations.
|
||||
*
|
||||
* @param <T> Type of data in the stream.
|
||||
*/
|
||||
public class DataStream<T> extends Stream<DataStream<T>, T> {
|
||||
|
||||
public DataStream(StreamingContext streamingContext, StreamOperator streamOperator) {
|
||||
super(streamingContext, streamOperator);
|
||||
}
|
||||
|
||||
public DataStream(
|
||||
StreamingContext streamingContext, StreamOperator streamOperator, Partition<T> partition) {
|
||||
super(streamingContext, streamOperator, partition);
|
||||
}
|
||||
|
||||
public <R> DataStream(DataStream<R> input, StreamOperator streamOperator) {
|
||||
super(input, streamOperator);
|
||||
}
|
||||
|
||||
public <R> DataStream(
|
||||
DataStream<R> input, StreamOperator streamOperator, Partition<T> partition) {
|
||||
super(input, streamOperator, partition);
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a java stream that reference passed python stream. Changes in new stream will be
|
||||
* reflected in referenced stream and vice versa
|
||||
*/
|
||||
public DataStream(PythonDataStream referencedStream) {
|
||||
super(referencedStream);
|
||||
}
|
||||
|
||||
/**
|
||||
* Apply a map function to this stream.
|
||||
*
|
||||
* @param mapFunction The map function.
|
||||
* @param <R> Type of data returned by the map function.
|
||||
* @return A new DataStream.
|
||||
*/
|
||||
public <R> DataStream<R> map(MapFunction<T, R> mapFunction) {
|
||||
return new DataStream<>(this, new MapOperator<>(mapFunction));
|
||||
}
|
||||
|
||||
/**
|
||||
* Apply a flat-map function to this stream.
|
||||
*
|
||||
* @param flatMapFunction The FlatMapFunction
|
||||
* @param <R> Type of data returned by the flatmap function.
|
||||
* @return A new DataStream
|
||||
*/
|
||||
public <R> DataStream<R> flatMap(FlatMapFunction<T, R> flatMapFunction) {
|
||||
return new DataStream<>(this, new FlatMapOperator<>(flatMapFunction));
|
||||
}
|
||||
|
||||
public DataStream<T> filter(FilterFunction<T> filterFunction) {
|
||||
return new DataStream<>(this, new FilterOperator<>(filterFunction));
|
||||
}
|
||||
|
||||
/**
|
||||
* Apply union transformations to this stream by merging {@link DataStream} outputs of the same
|
||||
* type with each other.
|
||||
*
|
||||
* @param stream The DataStream to union output with.
|
||||
* @param others The other DataStreams to union output with.
|
||||
* @return A new UnionStream.
|
||||
*/
|
||||
@SafeVarargs
|
||||
public final DataStream<T> union(DataStream<T> stream, DataStream<T>... others) {
|
||||
List<DataStream<T>> streams = new ArrayList<>();
|
||||
streams.add(stream);
|
||||
streams.addAll(Arrays.asList(others));
|
||||
return union(streams);
|
||||
}
|
||||
|
||||
/**
|
||||
* Apply union transformations to this stream by merging {@link DataStream} outputs of the same
|
||||
* type with each other.
|
||||
*
|
||||
* @param streams The DataStreams to union output with.
|
||||
* @return A new UnionStream.
|
||||
*/
|
||||
public final DataStream<T> union(List<DataStream<T>> streams) {
|
||||
if (this instanceof UnionStream) {
|
||||
UnionStream<T> unionStream = (UnionStream<T>) this;
|
||||
streams.forEach(unionStream::addStream);
|
||||
return unionStream;
|
||||
} else {
|
||||
return new UnionStream<>(this, streams);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Apply a join transformation to this stream, with another stream.
|
||||
*
|
||||
* @param other Another stream.
|
||||
* @param <O> The type of the other stream data.
|
||||
* @param <R> The type of the data in the joined stream.
|
||||
* @return A new JoinStream.
|
||||
*/
|
||||
public <O, R> JoinStream<T, O, R> join(DataStream<O> other) {
|
||||
return new JoinStream<>(this, other);
|
||||
}
|
||||
|
||||
public <R> DataStream<R> process() {
|
||||
// TODO(zhenxuanpan): Need to add processFunction.
|
||||
return new DataStream(this, null);
|
||||
}
|
||||
|
||||
/**
|
||||
* Apply a sink function and get a StreamSink.
|
||||
*
|
||||
* @param sinkFunction The sink function.
|
||||
* @return A new StreamSink.
|
||||
*/
|
||||
public DataStreamSink<T> sink(SinkFunction<T> sinkFunction) {
|
||||
return new DataStreamSink<>(this, new SinkOperator<>(sinkFunction));
|
||||
}
|
||||
|
||||
/**
|
||||
* Apply a key-by function to this stream.
|
||||
*
|
||||
* @param keyFunction the key function.
|
||||
* @param <K> The type of the key.
|
||||
* @return A new KeyDataStream.
|
||||
*/
|
||||
public <K> KeyDataStream<K, T> keyBy(KeyFunction<T, K> keyFunction) {
|
||||
checkPartitionCall();
|
||||
return new KeyDataStream<>(this, new KeyByOperator<>(keyFunction));
|
||||
}
|
||||
|
||||
/**
|
||||
* Apply broadcast to this stream.
|
||||
*
|
||||
* @return This stream.
|
||||
*/
|
||||
public DataStream<T> broadcast() {
|
||||
checkPartitionCall();
|
||||
return setPartition(new BroadcastPartition<>());
|
||||
}
|
||||
|
||||
/**
|
||||
* Apply a partition to this stream.
|
||||
*
|
||||
* @param partition The partitioning strategy.
|
||||
* @return This stream.
|
||||
*/
|
||||
public DataStream<T> partitionBy(Partition<T> partition) {
|
||||
checkPartitionCall();
|
||||
return setPartition(partition);
|
||||
}
|
||||
|
||||
/**
|
||||
* If parent stream is a python stream, we can't call partition related methods in the java
|
||||
* stream.
|
||||
*/
|
||||
private void checkPartitionCall() {
|
||||
if (getInputStream() != null && getInputStream().getLanguage() == Language.PYTHON) {
|
||||
throw new RuntimeException(
|
||||
"Partition related methods can't be called on a "
|
||||
+ "java stream if parent stream is a python stream.");
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Convert this stream as a python stream. The converted stream and this stream are the same
|
||||
* logical stream, which has same stream id. Changes in converted stream will be reflected in this
|
||||
* stream and vice versa.
|
||||
*/
|
||||
public PythonDataStream asPythonStream() {
|
||||
return new PythonDataStream(this);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Language getLanguage() {
|
||||
return Language.JAVA;
|
||||
}
|
||||
}
|
|
@ -1,22 +0,0 @@
|
|||
package io.ray.streaming.api.stream;
|
||||
|
||||
import io.ray.streaming.api.Language;
|
||||
import io.ray.streaming.operator.impl.SinkOperator;
|
||||
|
||||
/**
|
||||
* Represents a sink of the DataStream.
|
||||
*
|
||||
* @param <T> Type of the input data of this sink.
|
||||
*/
|
||||
public class DataStreamSink<T> extends StreamSink<T> {
|
||||
|
||||
public DataStreamSink(DataStream input, SinkOperator sinkOperator) {
|
||||
super(input, sinkOperator);
|
||||
getStreamingContext().addSink(this);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Language getLanguage() {
|
||||
return Language.JAVA;
|
||||
}
|
||||
}
|
|
@ -1,37 +0,0 @@
|
|||
package io.ray.streaming.api.stream;
|
||||
|
||||
import io.ray.streaming.api.context.StreamingContext;
|
||||
import io.ray.streaming.api.function.impl.SourceFunction;
|
||||
import io.ray.streaming.api.function.internal.CollectionSourceFunction;
|
||||
import io.ray.streaming.operator.impl.SourceOperatorImpl;
|
||||
import java.util.Collection;
|
||||
|
||||
/**
|
||||
* Represents a source of the DataStream.
|
||||
*
|
||||
* @param <T> The type of StreamSource data.
|
||||
*/
|
||||
public class DataStreamSource<T> extends DataStream<T> implements StreamSource<T> {
|
||||
|
||||
private DataStreamSource(StreamingContext streamingContext, SourceFunction<T> sourceFunction) {
|
||||
super(streamingContext, new SourceOperatorImpl<>(sourceFunction));
|
||||
}
|
||||
|
||||
public static <T> DataStreamSource<T> fromSource(
|
||||
StreamingContext context, SourceFunction<T> sourceFunction) {
|
||||
return new DataStreamSource<>(context, sourceFunction);
|
||||
}
|
||||
|
||||
/**
|
||||
* Build a DataStreamSource source from a collection.
|
||||
*
|
||||
* @param context Stream context.
|
||||
* @param values A collection of values.
|
||||
* @param <T> The type of source data.
|
||||
* @return A DataStreamSource.
|
||||
*/
|
||||
public static <T> DataStreamSource<T> fromCollection(
|
||||
StreamingContext context, Collection<T> values) {
|
||||
return new DataStreamSource<>(context, new CollectionSourceFunction<>(values));
|
||||
}
|
||||
}
|
|
@ -1,80 +0,0 @@
|
|||
package io.ray.streaming.api.stream;
|
||||
|
||||
import io.ray.streaming.api.function.impl.JoinFunction;
|
||||
import io.ray.streaming.api.function.impl.KeyFunction;
|
||||
import io.ray.streaming.operator.impl.JoinOperator;
|
||||
import java.io.Serializable;
|
||||
|
||||
/**
|
||||
* Represents a DataStream of two joined DataStream.
|
||||
*
|
||||
* @param <L> Type of the data in the left stream.
|
||||
* @param <R> Type of the data in the right stream.
|
||||
* @param <O> Type of the data in the joined stream.
|
||||
*/
|
||||
public class JoinStream<L, R, O> extends DataStream<L> {
|
||||
|
||||
private final DataStream<R> rightStream;
|
||||
|
||||
public JoinStream(DataStream<L> leftStream, DataStream<R> rightStream) {
|
||||
super(leftStream, new JoinOperator<>());
|
||||
this.rightStream = rightStream;
|
||||
}
|
||||
|
||||
public DataStream<R> getRightStream() {
|
||||
return rightStream;
|
||||
}
|
||||
|
||||
/** Apply key-by to the left join stream. */
|
||||
public <K> Where<K> where(KeyFunction<L, K> keyFunction) {
|
||||
return new Where<>(this, keyFunction);
|
||||
}
|
||||
|
||||
/**
|
||||
* Where clause of the join transformation.
|
||||
*
|
||||
* @param <K> Type of the join key.
|
||||
*/
|
||||
class Where<K> implements Serializable {
|
||||
|
||||
private JoinStream<L, R, O> joinStream;
|
||||
private KeyFunction<L, K> leftKeyByFunction;
|
||||
|
||||
Where(JoinStream<L, R, O> joinStream, KeyFunction<L, K> leftKeyByFunction) {
|
||||
this.joinStream = joinStream;
|
||||
this.leftKeyByFunction = leftKeyByFunction;
|
||||
}
|
||||
|
||||
public Equal<K> equalTo(KeyFunction<R, K> rightKeyFunction) {
|
||||
return new Equal<>(joinStream, leftKeyByFunction, rightKeyFunction);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Equal clause of the join transformation.
|
||||
*
|
||||
* @param <K> Type of the join key.
|
||||
*/
|
||||
class Equal<K> implements Serializable {
|
||||
|
||||
private JoinStream<L, R, O> joinStream;
|
||||
private KeyFunction<L, K> leftKeyByFunction;
|
||||
private KeyFunction<R, K> rightKeyByFunction;
|
||||
|
||||
Equal(
|
||||
JoinStream<L, R, O> joinStream,
|
||||
KeyFunction<L, K> leftKeyByFunction,
|
||||
KeyFunction<R, K> rightKeyByFunction) {
|
||||
this.joinStream = joinStream;
|
||||
this.leftKeyByFunction = leftKeyByFunction;
|
||||
this.rightKeyByFunction = rightKeyByFunction;
|
||||
}
|
||||
|
||||
@SuppressWarnings("unchecked")
|
||||
public DataStream<O> with(JoinFunction<L, R, O> joinFunction) {
|
||||
JoinOperator joinOperator = (JoinOperator) joinStream.getOperator();
|
||||
joinOperator.setFunction(joinFunction);
|
||||
return (DataStream<O>) joinStream;
|
||||
}
|
||||
}
|
||||
}
|
|
@ -1,63 +0,0 @@
|
|||
package io.ray.streaming.api.stream;
|
||||
|
||||
import io.ray.streaming.api.function.impl.AggregateFunction;
|
||||
import io.ray.streaming.api.function.impl.ReduceFunction;
|
||||
import io.ray.streaming.api.partition.Partition;
|
||||
import io.ray.streaming.api.partition.impl.KeyPartition;
|
||||
import io.ray.streaming.operator.StreamOperator;
|
||||
import io.ray.streaming.operator.impl.ReduceOperator;
|
||||
import io.ray.streaming.python.stream.PythonDataStream;
|
||||
import io.ray.streaming.python.stream.PythonKeyDataStream;
|
||||
|
||||
/**
|
||||
* Represents a DataStream returned by a key-by operation.
|
||||
*
|
||||
* @param <K> Type of the key.
|
||||
* @param <T> Type of the data.
|
||||
*/
|
||||
@SuppressWarnings("unchecked")
|
||||
public class KeyDataStream<K, T> extends DataStream<T> {
|
||||
|
||||
public KeyDataStream(DataStream<T> input, StreamOperator streamOperator) {
|
||||
super(input, streamOperator, (Partition<T>) new KeyPartition<K, T>());
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a java stream that reference passed python stream. Changes in new stream will be
|
||||
* reflected in referenced stream and vice versa
|
||||
*/
|
||||
public KeyDataStream(PythonDataStream referencedStream) {
|
||||
super(referencedStream);
|
||||
}
|
||||
|
||||
/**
|
||||
* Apply a reduce function to this stream.
|
||||
*
|
||||
* @param reduceFunction The reduce function.
|
||||
* @return A new DataStream.
|
||||
*/
|
||||
public DataStream<T> reduce(ReduceFunction reduceFunction) {
|
||||
return new DataStream<>(this, new ReduceOperator(reduceFunction));
|
||||
}
|
||||
|
||||
/**
|
||||
* Apply an aggregate Function to this stream.
|
||||
*
|
||||
* @param aggregateFunction The aggregate function
|
||||
* @param <A> The type of aggregated intermediate data.
|
||||
* @param <O> The type of result data.
|
||||
* @return A new DataStream.
|
||||
*/
|
||||
public <A, O> DataStream<O> aggregate(AggregateFunction<T, A, O> aggregateFunction) {
|
||||
return new DataStream<>(this, null);
|
||||
}
|
||||
|
||||
/**
|
||||
* Convert this stream as a python stream. The converted stream and this stream are the same
|
||||
* logical stream, which has same stream id. Changes in converted stream will be reflected in this
|
||||
* stream and vice versa.
|
||||
*/
|
||||
public PythonKeyDataStream asPythonStream() {
|
||||
return new PythonKeyDataStream(this);
|
||||
}
|
||||
}
|
|
@ -1,192 +0,0 @@
|
|||
package io.ray.streaming.api.stream;
|
||||
|
||||
import com.google.common.base.Preconditions;
|
||||
import io.ray.streaming.api.Language;
|
||||
import io.ray.streaming.api.context.StreamingContext;
|
||||
import io.ray.streaming.api.partition.Partition;
|
||||
import io.ray.streaming.api.partition.impl.ForwardPartition;
|
||||
import io.ray.streaming.operator.ChainStrategy;
|
||||
import io.ray.streaming.operator.Operator;
|
||||
import io.ray.streaming.operator.StreamOperator;
|
||||
import io.ray.streaming.python.PythonPartition;
|
||||
import java.io.Serializable;
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
|
||||
/**
|
||||
* Abstract base class of all stream types.
|
||||
*
|
||||
* @param <S> Type of stream class
|
||||
* @param <T> Type of the data in the stream.
|
||||
*/
|
||||
public abstract class Stream<S extends Stream<S, T>, T> implements Serializable {
|
||||
|
||||
private final int id;
|
||||
private final StreamingContext streamingContext;
|
||||
private final Stream inputStream;
|
||||
private final StreamOperator operator;
|
||||
private int parallelism = 1;
|
||||
private Map<String, String> config = new HashMap<>();
|
||||
private Partition<T> partition;
|
||||
private Stream originalStream;
|
||||
|
||||
public Stream(StreamingContext streamingContext, StreamOperator streamOperator) {
|
||||
this(streamingContext, null, streamOperator, getForwardPartition(streamOperator));
|
||||
}
|
||||
|
||||
public Stream(
|
||||
StreamingContext streamingContext, StreamOperator streamOperator, Partition<T> partition) {
|
||||
this(streamingContext, null, streamOperator, partition);
|
||||
}
|
||||
|
||||
public Stream(Stream inputStream, StreamOperator streamOperator) {
|
||||
this(
|
||||
inputStream.getStreamingContext(),
|
||||
inputStream,
|
||||
streamOperator,
|
||||
getForwardPartition(streamOperator));
|
||||
}
|
||||
|
||||
public Stream(Stream inputStream, StreamOperator streamOperator, Partition<T> partition) {
|
||||
this(inputStream.getStreamingContext(), inputStream, streamOperator, partition);
|
||||
}
|
||||
|
||||
protected Stream(
|
||||
StreamingContext streamingContext,
|
||||
Stream inputStream,
|
||||
StreamOperator streamOperator,
|
||||
Partition<T> partition) {
|
||||
this.streamingContext = streamingContext;
|
||||
this.inputStream = inputStream;
|
||||
this.operator = streamOperator;
|
||||
this.partition = partition;
|
||||
this.id = streamingContext.generateId();
|
||||
if (inputStream != null) {
|
||||
this.parallelism = inputStream.getParallelism();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a proxy stream of original stream. Changes in new stream will be reflected in original
|
||||
* stream and vice versa
|
||||
*/
|
||||
protected Stream(Stream originalStream) {
|
||||
this.originalStream = originalStream;
|
||||
this.id = originalStream.getId();
|
||||
this.streamingContext = originalStream.getStreamingContext();
|
||||
this.inputStream = originalStream.getInputStream();
|
||||
this.operator = originalStream.getOperator();
|
||||
Preconditions.checkNotNull(operator);
|
||||
}
|
||||
|
||||
@SuppressWarnings("unchecked")
|
||||
private static <T> Partition<T> getForwardPartition(Operator operator) {
|
||||
switch (operator.getLanguage()) {
|
||||
case PYTHON:
|
||||
return (Partition<T>) PythonPartition.ForwardPartition;
|
||||
case JAVA:
|
||||
return new ForwardPartition<>();
|
||||
default:
|
||||
throw new UnsupportedOperationException("Unsupported language " + operator.getLanguage());
|
||||
}
|
||||
}
|
||||
|
||||
public int getId() {
|
||||
return id;
|
||||
}
|
||||
|
||||
public StreamingContext getStreamingContext() {
|
||||
return streamingContext;
|
||||
}
|
||||
|
||||
public Stream getInputStream() {
|
||||
return inputStream;
|
||||
}
|
||||
|
||||
public StreamOperator getOperator() {
|
||||
return operator;
|
||||
}
|
||||
|
||||
@SuppressWarnings("unchecked")
|
||||
private S self() {
|
||||
return (S) this;
|
||||
}
|
||||
|
||||
public int getParallelism() {
|
||||
return originalStream != null ? originalStream.getParallelism() : parallelism;
|
||||
}
|
||||
|
||||
public S setParallelism(int parallelism) {
|
||||
if (originalStream != null) {
|
||||
originalStream.setParallelism(parallelism);
|
||||
} else {
|
||||
this.parallelism = parallelism;
|
||||
}
|
||||
return self();
|
||||
}
|
||||
|
||||
@SuppressWarnings("unchecked")
|
||||
public Partition<T> getPartition() {
|
||||
return originalStream != null ? originalStream.getPartition() : partition;
|
||||
}
|
||||
|
||||
@SuppressWarnings("unchecked")
|
||||
protected S setPartition(Partition<T> partition) {
|
||||
if (originalStream != null) {
|
||||
originalStream.setPartition(partition);
|
||||
} else {
|
||||
this.partition = partition;
|
||||
}
|
||||
return self();
|
||||
}
|
||||
|
||||
public S withConfig(Map<String, String> config) {
|
||||
config.forEach(this::withConfig);
|
||||
return self();
|
||||
}
|
||||
|
||||
public S withConfig(String key, String value) {
|
||||
if (isProxyStream()) {
|
||||
originalStream.withConfig(key, value);
|
||||
} else {
|
||||
this.config.put(key, value);
|
||||
}
|
||||
return self();
|
||||
}
|
||||
|
||||
@SuppressWarnings("unchecked")
|
||||
public Map<String, String> getConfig() {
|
||||
return isProxyStream() ? originalStream.getConfig() : config;
|
||||
}
|
||||
|
||||
public boolean isProxyStream() {
|
||||
return originalStream != null;
|
||||
}
|
||||
|
||||
public Stream getOriginalStream() {
|
||||
Preconditions.checkArgument(isProxyStream());
|
||||
return originalStream;
|
||||
}
|
||||
|
||||
/** Set chain strategy for this stream */
|
||||
public S withChainStrategy(ChainStrategy chainStrategy) {
|
||||
Preconditions.checkArgument(!isProxyStream());
|
||||
operator.setChainStrategy(chainStrategy);
|
||||
return self();
|
||||
}
|
||||
|
||||
/** Disable chain for this stream */
|
||||
public S disableChain() {
|
||||
return withChainStrategy(ChainStrategy.NEVER);
|
||||
}
|
||||
|
||||
/**
|
||||
* Set the partition function of this {@link Stream} so that output elements are forwarded to next
|
||||
* operator locally.
|
||||
*/
|
||||
public S forward() {
|
||||
return setPartition(getForwardPartition(operator));
|
||||
}
|
||||
|
||||
public abstract Language getLanguage();
|
||||
}
|
|
@ -1,15 +0,0 @@
|
|||
package io.ray.streaming.api.stream;
|
||||
|
||||
import io.ray.streaming.operator.StreamOperator;
|
||||
|
||||
/**
|
||||
* Represents a sink of the Stream.
|
||||
*
|
||||
* @param <T> Type of the input data of this sink.
|
||||
*/
|
||||
public abstract class StreamSink<T> extends Stream<StreamSink<T>, T> {
|
||||
|
||||
public StreamSink(Stream inputStream, StreamOperator streamOperator) {
|
||||
super(inputStream, streamOperator);
|
||||
}
|
||||
}
|
|
@ -1,8 +0,0 @@
|
|||
package io.ray.streaming.api.stream;
|
||||
|
||||
/**
|
||||
* A mark interface that represents a source of the Stream.
|
||||
*
|
||||
* @param <T> The type of StreamSource data.
|
||||
*/
|
||||
public interface StreamSource<T> {}
|
|
@ -1,38 +0,0 @@
|
|||
package io.ray.streaming.api.stream;
|
||||
|
||||
import io.ray.streaming.operator.impl.UnionOperator;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* Represents a union DataStream.
|
||||
*
|
||||
* <p>This stream does not create a physical operation, it only affects how upstream data are
|
||||
* connected to downstream data.
|
||||
*
|
||||
* @param <T> The type of union data.
|
||||
*/
|
||||
public class UnionStream<T> extends DataStream<T> {
|
||||
|
||||
private List<DataStream<T>> unionStreams;
|
||||
|
||||
public UnionStream(DataStream<T> input, List<DataStream<T>> streams) {
|
||||
// Union stream does not create a physical operation, so we don't have to set partition
|
||||
// function for it.
|
||||
super(input, new UnionOperator());
|
||||
this.unionStreams = new ArrayList<>();
|
||||
streams.forEach(this::addStream);
|
||||
}
|
||||
|
||||
void addStream(DataStream<T> stream) {
|
||||
if (stream instanceof UnionStream) {
|
||||
this.unionStreams.addAll(((UnionStream<T>) stream).getUnionStreams());
|
||||
} else {
|
||||
this.unionStreams.add(stream);
|
||||
}
|
||||
}
|
||||
|
||||
public List<DataStream<T>> getUnionStreams() {
|
||||
return unionStreams;
|
||||
}
|
||||
}
|
|
@ -1,15 +0,0 @@
|
|||
package io.ray.streaming.client;
|
||||
|
||||
import io.ray.streaming.jobgraph.JobGraph;
|
||||
import java.util.Map;
|
||||
|
||||
/** Interface of the job client. */
|
||||
public interface JobClient {
|
||||
|
||||
/**
|
||||
* Submit job with logical plan to run.
|
||||
*
|
||||
* @param jobGraph The logical plan.
|
||||
*/
|
||||
void submit(JobGraph jobGraph, Map<String, String> conf);
|
||||
}
|
|
@ -1,54 +0,0 @@
|
|||
package io.ray.streaming.jobgraph;
|
||||
|
||||
import io.ray.streaming.api.partition.Partition;
|
||||
import java.io.Serializable;
|
||||
|
||||
/** Job edge is connection and partition rules of upstream and downstream execution nodes. */
|
||||
public class JobEdge implements Serializable {
|
||||
|
||||
private int srcVertexId;
|
||||
private int targetVertexId;
|
||||
private Partition partition;
|
||||
|
||||
public JobEdge(int srcVertexId, int targetVertexId, Partition partition) {
|
||||
this.srcVertexId = srcVertexId;
|
||||
this.targetVertexId = targetVertexId;
|
||||
this.partition = partition;
|
||||
}
|
||||
|
||||
public int getSrcVertexId() {
|
||||
return srcVertexId;
|
||||
}
|
||||
|
||||
public void setSrcVertexId(int srcVertexId) {
|
||||
this.srcVertexId = srcVertexId;
|
||||
}
|
||||
|
||||
public int getTargetVertexId() {
|
||||
return targetVertexId;
|
||||
}
|
||||
|
||||
public void setTargetVertexId(int targetVertexId) {
|
||||
this.targetVertexId = targetVertexId;
|
||||
}
|
||||
|
||||
public Partition getPartition() {
|
||||
return partition;
|
||||
}
|
||||
|
||||
public void setPartition(Partition partition) {
|
||||
this.partition = partition;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return "Edge("
|
||||
+ "from:"
|
||||
+ srcVertexId
|
||||
+ "-"
|
||||
+ targetVertexId
|
||||
+ "-"
|
||||
+ this.partition.getClass()
|
||||
+ ")";
|
||||
}
|
||||
}
|
|
@ -1,139 +0,0 @@
|
|||
package io.ray.streaming.jobgraph;
|
||||
|
||||
import java.io.Serializable;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
import java.util.stream.Collectors;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
/** Job graph, the logical plan of streaming job. */
|
||||
public class JobGraph implements Serializable {
|
||||
|
||||
private static final Logger LOG = LoggerFactory.getLogger(JobGraph.class);
|
||||
|
||||
private final String jobName;
|
||||
private final Map<String, String> jobConfig;
|
||||
private List<JobVertex> jobVertices;
|
||||
private List<JobEdge> jobEdges;
|
||||
private String digraph;
|
||||
|
||||
public JobGraph(String jobName, Map<String, String> jobConfig) {
|
||||
this.jobName = jobName;
|
||||
this.jobConfig = jobConfig;
|
||||
this.jobVertices = new ArrayList<>();
|
||||
this.jobEdges = new ArrayList<>();
|
||||
}
|
||||
|
||||
public JobGraph(
|
||||
String jobName,
|
||||
Map<String, String> jobConfig,
|
||||
List<JobVertex> jobVertices,
|
||||
List<JobEdge> jobEdges) {
|
||||
this.jobName = jobName;
|
||||
this.jobConfig = jobConfig;
|
||||
this.jobVertices = jobVertices;
|
||||
this.jobEdges = jobEdges;
|
||||
generateDigraph();
|
||||
}
|
||||
|
||||
/**
|
||||
* Generate direct-graph(made up of a set of vertices and connected by edges) by current job graph
|
||||
* for simple log printing.
|
||||
*
|
||||
* @return Digraph in string type.
|
||||
*/
|
||||
public String generateDigraph() {
|
||||
StringBuilder digraph = new StringBuilder();
|
||||
digraph.append("digraph ").append(jobName).append(" ").append(" {");
|
||||
|
||||
for (JobEdge jobEdge : jobEdges) {
|
||||
String srcNode = null;
|
||||
String targetNode = null;
|
||||
for (JobVertex jobVertex : jobVertices) {
|
||||
if (jobEdge.getSrcVertexId() == jobVertex.getVertexId()) {
|
||||
srcNode = jobVertex.getVertexId() + "-" + jobVertex.getStreamOperator().getName();
|
||||
} else if (jobEdge.getTargetVertexId() == jobVertex.getVertexId()) {
|
||||
targetNode = jobVertex.getVertexId() + "-" + jobVertex.getStreamOperator().getName();
|
||||
}
|
||||
}
|
||||
digraph.append(System.getProperty("line.separator"));
|
||||
digraph.append(String.format(" \"%s\" -> \"%s\"", srcNode, targetNode));
|
||||
}
|
||||
digraph.append(System.getProperty("line.separator")).append("}");
|
||||
|
||||
this.digraph = digraph.toString();
|
||||
return this.digraph;
|
||||
}
|
||||
|
||||
public void addVertex(JobVertex vertex) {
|
||||
this.jobVertices.add(vertex);
|
||||
}
|
||||
|
||||
public void addEdge(JobEdge jobEdge) {
|
||||
this.jobEdges.add(jobEdge);
|
||||
}
|
||||
|
||||
public List<JobVertex> getJobVertices() {
|
||||
return jobVertices;
|
||||
}
|
||||
|
||||
public List<JobVertex> getSourceVertices() {
|
||||
return jobVertices.stream()
|
||||
.filter(v -> v.getVertexType() == VertexType.SOURCE)
|
||||
.collect(Collectors.toList());
|
||||
}
|
||||
|
||||
public List<JobVertex> getSinkVertices() {
|
||||
return jobVertices.stream()
|
||||
.filter(v -> v.getVertexType() == VertexType.SINK)
|
||||
.collect(Collectors.toList());
|
||||
}
|
||||
|
||||
public JobVertex getVertex(int vertexId) {
|
||||
return jobVertices.stream().filter(v -> v.getVertexId() == vertexId).findFirst().get();
|
||||
}
|
||||
|
||||
public List<JobEdge> getJobEdges() {
|
||||
return jobEdges;
|
||||
}
|
||||
|
||||
public Set<JobEdge> getVertexInputEdges(int vertexId) {
|
||||
return jobEdges.stream()
|
||||
.filter(jobEdge -> jobEdge.getTargetVertexId() == vertexId)
|
||||
.collect(Collectors.toSet());
|
||||
}
|
||||
|
||||
public Set<JobEdge> getVertexOutputEdges(int vertexId) {
|
||||
return jobEdges.stream()
|
||||
.filter(jobEdge -> jobEdge.getSrcVertexId() == vertexId)
|
||||
.collect(Collectors.toSet());
|
||||
}
|
||||
|
||||
public String getDigraph() {
|
||||
return digraph;
|
||||
}
|
||||
|
||||
public String getJobName() {
|
||||
return jobName;
|
||||
}
|
||||
|
||||
public Map<String, String> getJobConfig() {
|
||||
return jobConfig;
|
||||
}
|
||||
|
||||
public void printJobGraph() {
|
||||
if (!LOG.isInfoEnabled()) {
|
||||
return;
|
||||
}
|
||||
LOG.info("Printing job graph:");
|
||||
for (JobVertex jobVertex : jobVertices) {
|
||||
LOG.info(jobVertex.toString());
|
||||
}
|
||||
for (JobEdge jobEdge : jobEdges) {
|
||||
LOG.info(jobEdge.toString());
|
||||
}
|
||||
}
|
||||
}
|
|
@ -1,117 +0,0 @@
|
|||
package io.ray.streaming.jobgraph;
|
||||
|
||||
import com.google.common.base.Preconditions;
|
||||
import io.ray.streaming.api.stream.DataStream;
|
||||
import io.ray.streaming.api.stream.JoinStream;
|
||||
import io.ray.streaming.api.stream.Stream;
|
||||
import io.ray.streaming.api.stream.StreamSink;
|
||||
import io.ray.streaming.api.stream.StreamSource;
|
||||
import io.ray.streaming.api.stream.UnionStream;
|
||||
import io.ray.streaming.operator.StreamOperator;
|
||||
import io.ray.streaming.python.stream.PythonDataStream;
|
||||
import io.ray.streaming.python.stream.PythonUnionStream;
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.concurrent.atomic.AtomicInteger;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
public class JobGraphBuilder {
|
||||
|
||||
private static final Logger LOG = LoggerFactory.getLogger(JobGraphBuilder.class);
|
||||
|
||||
private JobGraph jobGraph;
|
||||
|
||||
private AtomicInteger edgeIdGenerator;
|
||||
private List<StreamSink> streamSinkList;
|
||||
|
||||
public JobGraphBuilder(List<StreamSink> streamSinkList) {
|
||||
this(streamSinkList, "job_" + System.currentTimeMillis());
|
||||
}
|
||||
|
||||
public JobGraphBuilder(List<StreamSink> streamSinkList, String jobName) {
|
||||
this(streamSinkList, jobName, new HashMap<>());
|
||||
}
|
||||
|
||||
public JobGraphBuilder(
|
||||
List<StreamSink> streamSinkList, String jobName, Map<String, String> jobConfig) {
|
||||
this.jobGraph = new JobGraph(jobName, jobConfig);
|
||||
this.streamSinkList = streamSinkList;
|
||||
this.edgeIdGenerator = new AtomicInteger(0);
|
||||
}
|
||||
|
||||
public JobGraph build() {
|
||||
for (StreamSink streamSink : streamSinkList) {
|
||||
processStream(streamSink);
|
||||
}
|
||||
return this.jobGraph;
|
||||
}
|
||||
|
||||
@SuppressWarnings("unchecked")
|
||||
private void processStream(Stream stream) {
|
||||
while (stream.isProxyStream()) {
|
||||
// Proxy stream and original stream are the same logical stream, both refer to the
|
||||
// same data flow transformation. We should skip proxy stream to avoid applying same
|
||||
// transformation multiple times.
|
||||
LOG.debug("Skip proxy stream {} of id {}", stream, stream.getId());
|
||||
stream = stream.getOriginalStream();
|
||||
}
|
||||
StreamOperator streamOperator = stream.getOperator();
|
||||
Preconditions.checkArgument(
|
||||
stream.getLanguage() == streamOperator.getLanguage(),
|
||||
"Reference stream should be skipped.");
|
||||
int vertexId = stream.getId();
|
||||
int parallelism = stream.getParallelism();
|
||||
Map<String, String> config = stream.getConfig();
|
||||
JobVertex jobVertex;
|
||||
if (stream instanceof StreamSink) {
|
||||
jobVertex = new JobVertex(vertexId, parallelism, VertexType.SINK, streamOperator, config);
|
||||
Stream parentStream = stream.getInputStream();
|
||||
int inputVertexId = parentStream.getId();
|
||||
JobEdge jobEdge = new JobEdge(inputVertexId, vertexId, parentStream.getPartition());
|
||||
this.jobGraph.addEdge(jobEdge);
|
||||
processStream(parentStream);
|
||||
} else if (stream instanceof StreamSource) {
|
||||
jobVertex = new JobVertex(vertexId, parallelism, VertexType.SOURCE, streamOperator, config);
|
||||
} else if (stream instanceof DataStream || stream instanceof PythonDataStream) {
|
||||
jobVertex =
|
||||
new JobVertex(vertexId, parallelism, VertexType.TRANSFORMATION, streamOperator, config);
|
||||
Stream parentStream = stream.getInputStream();
|
||||
int inputVertexId = parentStream.getId();
|
||||
JobEdge jobEdge = new JobEdge(inputVertexId, vertexId, parentStream.getPartition());
|
||||
this.jobGraph.addEdge(jobEdge);
|
||||
processStream(parentStream);
|
||||
|
||||
// process union stream
|
||||
List<Stream> streams = new ArrayList<>();
|
||||
if (stream instanceof UnionStream) {
|
||||
streams.addAll(((UnionStream) stream).getUnionStreams());
|
||||
}
|
||||
if (stream instanceof PythonUnionStream) {
|
||||
streams.addAll(((PythonUnionStream) stream).getUnionStreams());
|
||||
}
|
||||
for (Stream otherStream : streams) {
|
||||
JobEdge otherEdge = new JobEdge(otherStream.getId(), vertexId, otherStream.getPartition());
|
||||
this.jobGraph.addEdge(otherEdge);
|
||||
processStream(otherStream);
|
||||
}
|
||||
|
||||
// process join stream
|
||||
if (stream instanceof JoinStream) {
|
||||
DataStream rightStream = ((JoinStream) stream).getRightStream();
|
||||
this.jobGraph.addEdge(
|
||||
new JobEdge(rightStream.getId(), vertexId, rightStream.getPartition()));
|
||||
processStream(rightStream);
|
||||
}
|
||||
} else {
|
||||
throw new UnsupportedOperationException("Unsupported stream: " + stream);
|
||||
}
|
||||
this.jobGraph.addVertex(jobVertex);
|
||||
}
|
||||
|
||||
private int getEdgeId() {
|
||||
return this.edgeIdGenerator.incrementAndGet();
|
||||
}
|
||||
}
|
|
@ -1,204 +0,0 @@
|
|||
package io.ray.streaming.jobgraph;
|
||||
|
||||
import io.ray.streaming.api.Language;
|
||||
import io.ray.streaming.api.partition.Partition;
|
||||
import io.ray.streaming.api.partition.impl.ForwardPartition;
|
||||
import io.ray.streaming.api.partition.impl.RoundRobinPartition;
|
||||
import io.ray.streaming.operator.ChainStrategy;
|
||||
import io.ray.streaming.operator.StreamOperator;
|
||||
import io.ray.streaming.operator.chain.ChainedOperator;
|
||||
import io.ray.streaming.python.PythonOperator;
|
||||
import io.ray.streaming.python.PythonOperator.ChainedPythonOperator;
|
||||
import io.ray.streaming.python.PythonPartition;
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashMap;
|
||||
import java.util.HashSet;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
import java.util.function.Function;
|
||||
import java.util.stream.Collectors;
|
||||
import org.apache.commons.lang3.tuple.Pair;
|
||||
|
||||
/**
|
||||
* Optimize job graph by chaining some operators so that some operators can be run in the same
|
||||
* thread.
|
||||
*/
|
||||
public class JobGraphOptimizer {
|
||||
|
||||
private final JobGraph jobGraph;
|
||||
private Set<JobVertex> visited = new HashSet<>();
|
||||
// vertex id -> vertex
|
||||
private Map<Integer, JobVertex> vertexMap;
|
||||
private Map<JobVertex, Set<JobEdge>> outputEdgesMap;
|
||||
// tail vertex id -> mergedVertex
|
||||
private Map<Integer, Pair<JobVertex, List<JobVertex>>> mergedVertexMap;
|
||||
|
||||
public JobGraphOptimizer(JobGraph jobGraph) {
|
||||
this.jobGraph = jobGraph;
|
||||
vertexMap =
|
||||
jobGraph.getJobVertices().stream()
|
||||
.collect(Collectors.toMap(JobVertex::getVertexId, Function.identity()));
|
||||
outputEdgesMap =
|
||||
vertexMap.keySet().stream()
|
||||
.collect(
|
||||
Collectors.toMap(
|
||||
id -> vertexMap.get(id),
|
||||
id -> new HashSet<>(jobGraph.getVertexOutputEdges(id))));
|
||||
mergedVertexMap = new HashMap<>();
|
||||
}
|
||||
|
||||
public JobGraph optimize() {
|
||||
// Deep-first traverse nodes from source to sink to merge vertices that can be chained
|
||||
// together.
|
||||
jobGraph
|
||||
.getSourceVertices()
|
||||
.forEach(
|
||||
vertex -> {
|
||||
List<JobVertex> verticesToMerge = new ArrayList<>();
|
||||
verticesToMerge.add(vertex);
|
||||
mergeVerticesRecursively(vertex, verticesToMerge);
|
||||
});
|
||||
|
||||
List<JobVertex> vertices =
|
||||
mergedVertexMap.values().stream().map(Pair::getLeft).collect(Collectors.toList());
|
||||
|
||||
return new JobGraph(jobGraph.getJobName(), jobGraph.getJobConfig(), vertices, createEdges());
|
||||
}
|
||||
|
||||
private void mergeVerticesRecursively(JobVertex vertex, List<JobVertex> verticesToMerge) {
|
||||
if (!visited.contains(vertex)) {
|
||||
visited.add(vertex);
|
||||
Set<JobEdge> outputEdges = outputEdgesMap.get(vertex);
|
||||
if (outputEdges.isEmpty()) {
|
||||
mergeAndAddVertex(verticesToMerge);
|
||||
} else {
|
||||
outputEdges.forEach(
|
||||
edge -> {
|
||||
JobVertex succeedingVertex = vertexMap.get(edge.getTargetVertexId());
|
||||
if (canBeChained(vertex, succeedingVertex, edge)) {
|
||||
verticesToMerge.add(succeedingVertex);
|
||||
mergeVerticesRecursively(succeedingVertex, verticesToMerge);
|
||||
} else {
|
||||
mergeAndAddVertex(verticesToMerge);
|
||||
List<JobVertex> newMergedVertices = new ArrayList<>();
|
||||
newMergedVertices.add(succeedingVertex);
|
||||
mergeVerticesRecursively(succeedingVertex, newMergedVertices);
|
||||
}
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private void mergeAndAddVertex(List<JobVertex> verticesToMerge) {
|
||||
JobVertex mergedVertex;
|
||||
JobVertex headVertex = verticesToMerge.get(0);
|
||||
Language language = headVertex.getLanguage();
|
||||
if (verticesToMerge.size() == 1) {
|
||||
// no chain
|
||||
mergedVertex = headVertex;
|
||||
} else {
|
||||
List<StreamOperator> operators =
|
||||
verticesToMerge.stream()
|
||||
.map(v -> vertexMap.get(v.getVertexId()).getStreamOperator())
|
||||
.collect(Collectors.toList());
|
||||
List<Map<String, String>> configs =
|
||||
verticesToMerge.stream()
|
||||
.map(v -> vertexMap.get(v.getVertexId()).getConfig())
|
||||
.collect(Collectors.toList());
|
||||
StreamOperator operator;
|
||||
if (language == Language.JAVA) {
|
||||
operator = ChainedOperator.newChainedOperator(operators, configs);
|
||||
} else {
|
||||
List<PythonOperator> pythonOperators =
|
||||
operators.stream().map(o -> (PythonOperator) o).collect(Collectors.toList());
|
||||
operator = new ChainedPythonOperator(pythonOperators, configs);
|
||||
}
|
||||
// chained operator config is placed into `ChainedOperator`.
|
||||
mergedVertex =
|
||||
new JobVertex(
|
||||
headVertex.getVertexId(),
|
||||
headVertex.getParallelism(),
|
||||
headVertex.getVertexType(),
|
||||
operator,
|
||||
new HashMap<>());
|
||||
}
|
||||
|
||||
mergedVertexMap.put(mergedVertex.getVertexId(), Pair.of(mergedVertex, verticesToMerge));
|
||||
}
|
||||
|
||||
private List<JobEdge> createEdges() {
|
||||
List<JobEdge> edges = new ArrayList<>();
|
||||
mergedVertexMap.forEach(
|
||||
(id, pair) -> {
|
||||
JobVertex mergedVertex = pair.getLeft();
|
||||
List<JobVertex> mergedVertices = pair.getRight();
|
||||
JobVertex tailVertex = mergedVertices.get(mergedVertices.size() - 1);
|
||||
// input edge will be set up in input vertices
|
||||
if (outputEdgesMap.containsKey(tailVertex)) {
|
||||
outputEdgesMap
|
||||
.get(tailVertex)
|
||||
.forEach(
|
||||
edge -> {
|
||||
Pair<JobVertex, List<JobVertex>> downstreamPair =
|
||||
mergedVertexMap.get(edge.getTargetVertexId());
|
||||
// change ForwardPartition to RoundRobinPartition.
|
||||
Partition partition = changePartition(edge.getPartition());
|
||||
JobEdge newEdge =
|
||||
new JobEdge(
|
||||
mergedVertex.getVertexId(),
|
||||
downstreamPair.getLeft().getVertexId(),
|
||||
partition);
|
||||
edges.add(newEdge);
|
||||
});
|
||||
}
|
||||
});
|
||||
return edges;
|
||||
}
|
||||
|
||||
/** Change ForwardPartition to RoundRobinPartition. */
|
||||
private Partition changePartition(Partition partition) {
|
||||
if (partition instanceof PythonPartition) {
|
||||
PythonPartition pythonPartition = (PythonPartition) partition;
|
||||
if (!pythonPartition.isConstructedFromBinary()
|
||||
&& pythonPartition.getFunctionName().equals(PythonPartition.FORWARD_PARTITION_CLASS)) {
|
||||
return PythonPartition.RoundRobinPartition;
|
||||
} else {
|
||||
return partition;
|
||||
}
|
||||
} else {
|
||||
if (partition instanceof ForwardPartition) {
|
||||
return new RoundRobinPartition();
|
||||
} else {
|
||||
return partition;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private boolean canBeChained(
|
||||
JobVertex precedingVertex, JobVertex succeedingVertex, JobEdge edge) {
|
||||
if (jobGraph.getVertexOutputEdges(precedingVertex.getVertexId()).size() > 1
|
||||
|| jobGraph.getVertexInputEdges(succeedingVertex.getVertexId()).size() > 1) {
|
||||
return false;
|
||||
}
|
||||
if (precedingVertex.getParallelism() != succeedingVertex.getParallelism()) {
|
||||
return false;
|
||||
}
|
||||
if (precedingVertex.getStreamOperator().getChainStrategy() == ChainStrategy.NEVER
|
||||
|| succeedingVertex.getStreamOperator().getChainStrategy() == ChainStrategy.NEVER
|
||||
|| succeedingVertex.getStreamOperator().getChainStrategy() == ChainStrategy.HEAD) {
|
||||
return false;
|
||||
}
|
||||
if (precedingVertex.getLanguage() != succeedingVertex.getLanguage()) {
|
||||
return false;
|
||||
}
|
||||
Partition partition = edge.getPartition();
|
||||
if (!(partition instanceof PythonPartition)) {
|
||||
return partition instanceof ForwardPartition;
|
||||
} else {
|
||||
PythonPartition pythonPartition = (PythonPartition) partition;
|
||||
return !pythonPartition.isConstructedFromBinary()
|
||||
&& pythonPartition.getFunctionName().equals(PythonPartition.FORWARD_PARTITION_CLASS);
|
||||
}
|
||||
}
|
||||
}
|
|
@ -1,72 +0,0 @@
|
|||
package io.ray.streaming.jobgraph;
|
||||
|
||||
import com.google.common.base.MoreObjects;
|
||||
import io.ray.streaming.api.Language;
|
||||
import io.ray.streaming.operator.StreamOperator;
|
||||
import java.io.Serializable;
|
||||
import java.util.Map;
|
||||
|
||||
/** Job vertex is a cell node where logic is executed. */
|
||||
public class JobVertex implements Serializable {
|
||||
|
||||
private int vertexId;
|
||||
private int parallelism;
|
||||
private VertexType vertexType;
|
||||
private Language language;
|
||||
private StreamOperator streamOperator;
|
||||
private Map<String, String> config;
|
||||
|
||||
public JobVertex(
|
||||
int vertexId,
|
||||
int parallelism,
|
||||
VertexType vertexType,
|
||||
StreamOperator streamOperator,
|
||||
Map<String, String> config) {
|
||||
this.vertexId = vertexId;
|
||||
this.parallelism = parallelism;
|
||||
this.vertexType = vertexType;
|
||||
this.streamOperator = streamOperator;
|
||||
this.language = streamOperator.getLanguage();
|
||||
this.config = config;
|
||||
}
|
||||
|
||||
public int getVertexId() {
|
||||
return vertexId;
|
||||
}
|
||||
|
||||
public int getParallelism() {
|
||||
return parallelism;
|
||||
}
|
||||
|
||||
public StreamOperator getStreamOperator() {
|
||||
return streamOperator;
|
||||
}
|
||||
|
||||
public VertexType getVertexType() {
|
||||
return vertexType;
|
||||
}
|
||||
|
||||
public Language getLanguage() {
|
||||
return language;
|
||||
}
|
||||
|
||||
public Map<String, String> getConfig() {
|
||||
return config;
|
||||
}
|
||||
|
||||
public void setConfig(Map<String, String> config) {
|
||||
this.config = config;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return MoreObjects.toStringHelper(this)
|
||||
.add("vertexId", vertexId)
|
||||
.add("parallelism", parallelism)
|
||||
.add("vertexType", vertexType)
|
||||
.add("language", language)
|
||||
.add("streamOperator", streamOperator)
|
||||
.add("config", config)
|
||||
.toString();
|
||||
}
|
||||
}
|
|
@ -1,8 +0,0 @@
|
|||
package io.ray.streaming.jobgraph;
|
||||
|
||||
/** Different roles for a node. */
|
||||
public enum VertexType {
|
||||
SOURCE,
|
||||
TRANSFORMATION,
|
||||
SINK,
|
||||
}
|
|
@ -1,41 +0,0 @@
|
|||
package io.ray.streaming.message;
|
||||
|
||||
import java.util.Objects;
|
||||
|
||||
public class KeyRecord<K, T> extends Record<T> {
|
||||
|
||||
private K key;
|
||||
|
||||
public KeyRecord(K key, T value) {
|
||||
super(value);
|
||||
this.key = key;
|
||||
}
|
||||
|
||||
public K getKey() {
|
||||
return key;
|
||||
}
|
||||
|
||||
public void setKey(K key) {
|
||||
this.key = key;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals(Object o) {
|
||||
if (this == o) {
|
||||
return true;
|
||||
}
|
||||
if (o == null || getClass() != o.getClass()) {
|
||||
return false;
|
||||
}
|
||||
if (!super.equals(o)) {
|
||||
return false;
|
||||
}
|
||||
KeyRecord<?, ?> keyRecord = (KeyRecord<?, ?>) o;
|
||||
return Objects.equals(key, keyRecord.key);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
return Objects.hash(super.hashCode(), key);
|
||||
}
|
||||
}
|
|
@ -1,52 +0,0 @@
|
|||
package io.ray.streaming.message;
|
||||
|
||||
import java.io.Serializable;
|
||||
import java.util.Objects;
|
||||
|
||||
public class Record<T> implements Serializable {
|
||||
|
||||
protected transient String stream;
|
||||
protected T value;
|
||||
|
||||
public Record(T value) {
|
||||
this.value = value;
|
||||
}
|
||||
|
||||
public T getValue() {
|
||||
return value;
|
||||
}
|
||||
|
||||
public void setValue(T value) {
|
||||
this.value = value;
|
||||
}
|
||||
|
||||
public String getStream() {
|
||||
return stream;
|
||||
}
|
||||
|
||||
public void setStream(String stream) {
|
||||
this.stream = stream;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals(Object o) {
|
||||
if (this == o) {
|
||||
return true;
|
||||
}
|
||||
if (o == null || getClass() != o.getClass()) {
|
||||
return false;
|
||||
}
|
||||
Record<?> record = (Record<?>) o;
|
||||
return Objects.equals(stream, record.stream) && Objects.equals(value, record.value);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
return Objects.hash(stream, value);
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return value.toString();
|
||||
}
|
||||
}
|
|
@ -1,14 +0,0 @@
|
|||
package io.ray.streaming.operator;
|
||||
|
||||
/** Chain strategy for streaming operators. Chained operators are run in the same thread. */
|
||||
public enum ChainStrategy {
|
||||
/**
|
||||
* The operator won't be chained with preceding operators, but maybe chained with succeeding
|
||||
* operators.
|
||||
*/
|
||||
HEAD,
|
||||
/** Operators will be chained together when possible. */
|
||||
ALWAYS,
|
||||
/** The operator won't be chained with any operator. */
|
||||
NEVER
|
||||
}
|
|
@ -1,12 +0,0 @@
|
|||
package io.ray.streaming.operator;
|
||||
|
||||
import io.ray.streaming.message.Record;
|
||||
|
||||
public interface OneInputOperator<T> extends Operator {
|
||||
|
||||
void processElement(Record<T> record) throws Exception;
|
||||
|
||||
default OperatorType getOpType() {
|
||||
return OperatorType.ONE_INPUT;
|
||||
}
|
||||
}
|
|
@ -1,33 +0,0 @@
|
|||
package io.ray.streaming.operator;
|
||||
|
||||
import io.ray.streaming.api.Language;
|
||||
import io.ray.streaming.api.collector.Collector;
|
||||
import io.ray.streaming.api.context.RuntimeContext;
|
||||
import io.ray.streaming.api.function.Function;
|
||||
import java.io.Serializable;
|
||||
import java.util.List;
|
||||
|
||||
public interface Operator extends Serializable {
|
||||
|
||||
String getName();
|
||||
|
||||
void open(List<Collector> collectors, RuntimeContext runtimeContext);
|
||||
|
||||
void finish();
|
||||
|
||||
void close();
|
||||
|
||||
Function getFunction();
|
||||
|
||||
Language getLanguage();
|
||||
|
||||
OperatorType getOpType();
|
||||
|
||||
ChainStrategy getChainStrategy();
|
||||
|
||||
/** See {@link Function#saveCheckpoint()}. */
|
||||
Serializable saveCheckpoint();
|
||||
|
||||
/** See {@link Function#loadCheckpoint(Serializable)}. */
|
||||
void loadCheckpoint(Serializable checkpointObject);
|
||||
}
|
|
@ -1,7 +0,0 @@
|
|||
package io.ray.streaming.operator;
|
||||
|
||||
public enum OperatorType {
|
||||
SOURCE,
|
||||
ONE_INPUT,
|
||||
TWO_INPUT,
|
||||
}
|
|
@ -1,14 +0,0 @@
|
|||
package io.ray.streaming.operator;
|
||||
|
||||
import io.ray.streaming.api.function.impl.SourceFunction.SourceContext;
|
||||
|
||||
public interface SourceOperator<T> extends Operator {
|
||||
|
||||
void fetch();
|
||||
|
||||
SourceContext<T> getSourceContext();
|
||||
|
||||
default OperatorType getOpType() {
|
||||
return OperatorType.SOURCE;
|
||||
}
|
||||
}
|
|
@ -1,97 +0,0 @@
|
|||
package io.ray.streaming.operator;
|
||||
|
||||
import io.ray.streaming.api.Language;
|
||||
import io.ray.streaming.api.collector.Collector;
|
||||
import io.ray.streaming.api.context.RuntimeContext;
|
||||
import io.ray.streaming.api.function.Function;
|
||||
import io.ray.streaming.api.function.RichFunction;
|
||||
import io.ray.streaming.api.function.internal.Functions;
|
||||
import io.ray.streaming.message.KeyRecord;
|
||||
import io.ray.streaming.message.Record;
|
||||
import java.io.Serializable;
|
||||
import java.util.List;
|
||||
|
||||
public abstract class StreamOperator<F extends Function> implements Operator {
|
||||
|
||||
protected final String name;
|
||||
protected F function;
|
||||
protected RichFunction richFunction;
|
||||
protected List<Collector> collectorList;
|
||||
protected RuntimeContext runtimeContext;
|
||||
private ChainStrategy chainStrategy = ChainStrategy.ALWAYS;
|
||||
|
||||
protected StreamOperator() {
|
||||
this.name = getClass().getSimpleName();
|
||||
}
|
||||
|
||||
protected StreamOperator(F function) {
|
||||
this();
|
||||
setFunction(function);
|
||||
}
|
||||
|
||||
public void setFunction(F function) {
|
||||
this.function = function;
|
||||
this.richFunction = Functions.wrap(function);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void open(List<Collector> collectorList, RuntimeContext runtimeContext) {
|
||||
this.collectorList = collectorList;
|
||||
this.runtimeContext = runtimeContext;
|
||||
richFunction.open(runtimeContext);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void finish() {}
|
||||
|
||||
@Override
|
||||
public void close() {
|
||||
richFunction.close();
|
||||
}
|
||||
|
||||
@Override
|
||||
public Function getFunction() {
|
||||
return function;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Language getLanguage() {
|
||||
return Language.JAVA;
|
||||
}
|
||||
|
||||
protected void collect(Record record) {
|
||||
for (Collector collector : this.collectorList) {
|
||||
collector.collect(record);
|
||||
}
|
||||
}
|
||||
|
||||
protected void collect(KeyRecord keyRecord) {
|
||||
for (Collector collector : this.collectorList) {
|
||||
collector.collect(keyRecord);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public Serializable saveCheckpoint() {
|
||||
return function.saveCheckpoint();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void loadCheckpoint(Serializable checkpointObject) {
|
||||
function.loadCheckpoint(checkpointObject);
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getName() {
|
||||
return name;
|
||||
}
|
||||
|
||||
public void setChainStrategy(ChainStrategy chainStrategy) {
|
||||
this.chainStrategy = chainStrategy;
|
||||
}
|
||||
|
||||
@Override
|
||||
public ChainStrategy getChainStrategy() {
|
||||
return chainStrategy;
|
||||
}
|
||||
}
|
|
@ -1,12 +0,0 @@
|
|||
package io.ray.streaming.operator;
|
||||
|
||||
import io.ray.streaming.message.Record;
|
||||
|
||||
public interface TwoInputOperator<T, O> extends Operator {
|
||||
|
||||
void processElement(Record<T> record1, Record<O> record2);
|
||||
|
||||
default OperatorType getOpType() {
|
||||
return OperatorType.TWO_INPUT;
|
||||
}
|
||||
}
|
|
@ -1,187 +0,0 @@
|
|||
package io.ray.streaming.operator.chain;
|
||||
|
||||
import com.google.common.base.Preconditions;
|
||||
import io.ray.streaming.api.Language;
|
||||
import io.ray.streaming.api.collector.Collector;
|
||||
import io.ray.streaming.api.context.RuntimeContext;
|
||||
import io.ray.streaming.api.function.Function;
|
||||
import io.ray.streaming.api.function.impl.SourceFunction.SourceContext;
|
||||
import io.ray.streaming.message.Record;
|
||||
import io.ray.streaming.operator.OneInputOperator;
|
||||
import io.ray.streaming.operator.Operator;
|
||||
import io.ray.streaming.operator.OperatorType;
|
||||
import io.ray.streaming.operator.SourceOperator;
|
||||
import io.ray.streaming.operator.StreamOperator;
|
||||
import io.ray.streaming.operator.TwoInputOperator;
|
||||
import java.io.Serializable;
|
||||
import java.lang.reflect.Proxy;
|
||||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
/** Abstract base class for chained operators. */
|
||||
public abstract class ChainedOperator extends StreamOperator<Function> {
|
||||
|
||||
protected final List<StreamOperator> operators;
|
||||
protected final Operator headOperator;
|
||||
protected final Operator tailOperator;
|
||||
private final List<Map<String, String>> configs;
|
||||
|
||||
public ChainedOperator(List<StreamOperator> operators, List<Map<String, String>> configs) {
|
||||
Preconditions.checkArgument(
|
||||
operators.size() >= 2, "Need at lease two operators to be chained together");
|
||||
operators.stream()
|
||||
.skip(1)
|
||||
.forEach(operator -> Preconditions.checkArgument(operator instanceof OneInputOperator));
|
||||
this.operators = operators;
|
||||
this.configs = configs;
|
||||
this.headOperator = operators.get(0);
|
||||
this.tailOperator = operators.get(operators.size() - 1);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void open(List<Collector> collectorList, RuntimeContext runtimeContext) {
|
||||
// Dont' call super.open() as we `open` every operator separately.
|
||||
List<ForwardCollector> succeedingCollectors =
|
||||
operators.stream()
|
||||
.skip(1)
|
||||
.map(operator -> new ForwardCollector((OneInputOperator) operator))
|
||||
.collect(Collectors.toList());
|
||||
for (int i = 0; i < operators.size() - 1; i++) {
|
||||
StreamOperator operator = operators.get(i);
|
||||
List<ForwardCollector> forwardCollectors =
|
||||
Collections.singletonList(succeedingCollectors.get(i));
|
||||
operator.open(forwardCollectors, createRuntimeContext(runtimeContext, i));
|
||||
}
|
||||
// tail operator send data to downstream using provided collectors.
|
||||
tailOperator.open(collectorList, createRuntimeContext(runtimeContext, operators.size() - 1));
|
||||
}
|
||||
|
||||
@Override
|
||||
public OperatorType getOpType() {
|
||||
return headOperator.getOpType();
|
||||
}
|
||||
|
||||
@Override
|
||||
public Language getLanguage() {
|
||||
return headOperator.getLanguage();
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getName() {
|
||||
return operators.stream().map(Operator::getName).collect(Collectors.joining(" -> ", "[", "]"));
|
||||
}
|
||||
|
||||
public List<StreamOperator> getOperators() {
|
||||
return operators;
|
||||
}
|
||||
|
||||
public Operator getHeadOperator() {
|
||||
return headOperator;
|
||||
}
|
||||
|
||||
public Operator getTailOperator() {
|
||||
return tailOperator;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Serializable saveCheckpoint() {
|
||||
Serializable[] checkpoints = new Serializable[operators.size()];
|
||||
for (int i = 0; i < operators.size(); ++i) {
|
||||
checkpoints[i] = operators.get(i).saveCheckpoint();
|
||||
}
|
||||
return checkpoints;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void loadCheckpoint(Serializable checkpointObject) {
|
||||
Serializable[] checkpoints = (Serializable[]) checkpointObject;
|
||||
for (int i = 0; i < operators.size(); ++i) {
|
||||
operators.get(i).loadCheckpoint(checkpoints[i]);
|
||||
}
|
||||
}
|
||||
|
||||
private RuntimeContext createRuntimeContext(RuntimeContext runtimeContext, int index) {
|
||||
return (RuntimeContext)
|
||||
Proxy.newProxyInstance(
|
||||
runtimeContext.getClass().getClassLoader(),
|
||||
new Class[] {RuntimeContext.class},
|
||||
(proxy, method, methodArgs) -> {
|
||||
if (method.getName().equals("getConfig")) {
|
||||
return configs.get(index);
|
||||
} else {
|
||||
return method.invoke(runtimeContext, methodArgs);
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
public static ChainedOperator newChainedOperator(
|
||||
List<StreamOperator> operators, List<Map<String, String>> configs) {
|
||||
switch (operators.get(0).getOpType()) {
|
||||
case SOURCE:
|
||||
return new ChainedSourceOperator(operators, configs);
|
||||
case ONE_INPUT:
|
||||
return new ChainedOneInputOperator(operators, configs);
|
||||
case TWO_INPUT:
|
||||
return new ChainedTwoInputOperator(operators, configs);
|
||||
default:
|
||||
throw new IllegalArgumentException(
|
||||
"Unsupported operator type " + operators.get(0).getOpType());
|
||||
}
|
||||
}
|
||||
|
||||
static class ChainedSourceOperator<T> extends ChainedOperator implements SourceOperator<T> {
|
||||
|
||||
private final SourceOperator<T> sourceOperator;
|
||||
|
||||
@SuppressWarnings("unchecked")
|
||||
ChainedSourceOperator(List<StreamOperator> operators, List<Map<String, String>> configs) {
|
||||
super(operators, configs);
|
||||
sourceOperator = (SourceOperator<T>) headOperator;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void fetch() {
|
||||
sourceOperator.fetch();
|
||||
}
|
||||
|
||||
@Override
|
||||
public SourceContext<T> getSourceContext() {
|
||||
return sourceOperator.getSourceContext();
|
||||
}
|
||||
}
|
||||
|
||||
static class ChainedOneInputOperator<T> extends ChainedOperator implements OneInputOperator<T> {
|
||||
|
||||
private final OneInputOperator<T> inputOperator;
|
||||
|
||||
@SuppressWarnings("unchecked")
|
||||
ChainedOneInputOperator(List<StreamOperator> operators, List<Map<String, String>> configs) {
|
||||
super(operators, configs);
|
||||
inputOperator = (OneInputOperator<T>) headOperator;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void processElement(Record<T> record) throws Exception {
|
||||
inputOperator.processElement(record);
|
||||
}
|
||||
}
|
||||
|
||||
static class ChainedTwoInputOperator<L, R> extends ChainedOperator
|
||||
implements TwoInputOperator<L, R> {
|
||||
|
||||
private final TwoInputOperator<L, R> inputOperator;
|
||||
|
||||
@SuppressWarnings("unchecked")
|
||||
ChainedTwoInputOperator(List<StreamOperator> operators, List<Map<String, String>> configs) {
|
||||
super(operators, configs);
|
||||
inputOperator = (TwoInputOperator<L, R>) headOperator;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void processElement(Record<L> record1, Record<R> record2) {
|
||||
inputOperator.processElement(record1, record2);
|
||||
}
|
||||
}
|
||||
}
|
|
@ -1,24 +0,0 @@
|
|||
package io.ray.streaming.operator.chain;
|
||||
|
||||
import io.ray.streaming.api.collector.Collector;
|
||||
import io.ray.streaming.message.Record;
|
||||
import io.ray.streaming.operator.OneInputOperator;
|
||||
|
||||
class ForwardCollector implements Collector<Record> {
|
||||
|
||||
private final OneInputOperator succeedingOperator;
|
||||
|
||||
ForwardCollector(OneInputOperator succeedingOperator) {
|
||||
this.succeedingOperator = succeedingOperator;
|
||||
}
|
||||
|
||||
@SuppressWarnings("unchecked")
|
||||
@Override
|
||||
public void collect(Record record) {
|
||||
try {
|
||||
succeedingOperator.processElement(record);
|
||||
} catch (Exception e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
}
|
||||
}
|
|
@ -1,21 +0,0 @@
|
|||
package io.ray.streaming.operator.impl;
|
||||
|
||||
import io.ray.streaming.api.function.impl.FilterFunction;
|
||||
import io.ray.streaming.message.Record;
|
||||
import io.ray.streaming.operator.OneInputOperator;
|
||||
import io.ray.streaming.operator.StreamOperator;
|
||||
|
||||
public class FilterOperator<T> extends StreamOperator<FilterFunction<T>>
|
||||
implements OneInputOperator<T> {
|
||||
|
||||
public FilterOperator(FilterFunction<T> filterFunction) {
|
||||
super(filterFunction);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void processElement(Record<T> record) throws Exception {
|
||||
if (this.function.filter(record.getValue())) {
|
||||
this.collect(record);
|
||||
}
|
||||
}
|
||||
}
|
|
@ -1,31 +0,0 @@
|
|||
package io.ray.streaming.operator.impl;
|
||||
|
||||
import io.ray.streaming.api.collector.CollectionCollector;
|
||||
import io.ray.streaming.api.collector.Collector;
|
||||
import io.ray.streaming.api.context.RuntimeContext;
|
||||
import io.ray.streaming.api.function.impl.FlatMapFunction;
|
||||
import io.ray.streaming.message.Record;
|
||||
import io.ray.streaming.operator.OneInputOperator;
|
||||
import io.ray.streaming.operator.StreamOperator;
|
||||
import java.util.List;
|
||||
|
||||
public class FlatMapOperator<T, R> extends StreamOperator<FlatMapFunction<T, R>>
|
||||
implements OneInputOperator<T> {
|
||||
|
||||
private CollectionCollector collectionCollector;
|
||||
|
||||
public FlatMapOperator(FlatMapFunction<T, R> flatMapFunction) {
|
||||
super(flatMapFunction);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void open(List<Collector> collectorList, RuntimeContext runtimeContext) {
|
||||
super.open(collectorList, runtimeContext);
|
||||
this.collectionCollector = new CollectionCollector(collectorList);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void processElement(Record<T> record) throws Exception {
|
||||
this.function.flatMap(record.getValue(), (Collector<R>) collectionCollector);
|
||||
}
|
||||
}
|
|
@ -1,35 +0,0 @@
|
|||
package io.ray.streaming.operator.impl;
|
||||
|
||||
import io.ray.streaming.api.function.impl.JoinFunction;
|
||||
import io.ray.streaming.message.Record;
|
||||
import io.ray.streaming.operator.ChainStrategy;
|
||||
import io.ray.streaming.operator.OperatorType;
|
||||
import io.ray.streaming.operator.StreamOperator;
|
||||
import io.ray.streaming.operator.TwoInputOperator;
|
||||
|
||||
/**
|
||||
* Join operator
|
||||
*
|
||||
* @param <L> Type of the data in the left stream.
|
||||
* @param <R> Type of the data in the right stream.
|
||||
* @param <K> Type of the data in the join key.
|
||||
* @param <O> Type of the data in the joined stream.
|
||||
*/
|
||||
public class JoinOperator<L, R, K, O> extends StreamOperator<JoinFunction<L, R, O>>
|
||||
implements TwoInputOperator<L, R> {
|
||||
|
||||
public JoinOperator() {}
|
||||
|
||||
public JoinOperator(JoinFunction<L, R, O> function) {
|
||||
super(function);
|
||||
setChainStrategy(ChainStrategy.HEAD);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void processElement(Record<L> record1, Record<R> record2) {}
|
||||
|
||||
@Override
|
||||
public OperatorType getOpType() {
|
||||
return OperatorType.TWO_INPUT;
|
||||
}
|
||||
}
|
|
@ -1,21 +0,0 @@
|
|||
package io.ray.streaming.operator.impl;
|
||||
|
||||
import io.ray.streaming.api.function.impl.KeyFunction;
|
||||
import io.ray.streaming.message.KeyRecord;
|
||||
import io.ray.streaming.message.Record;
|
||||
import io.ray.streaming.operator.OneInputOperator;
|
||||
import io.ray.streaming.operator.StreamOperator;
|
||||
|
||||
public class KeyByOperator<T, K> extends StreamOperator<KeyFunction<T, K>>
|
||||
implements OneInputOperator<T> {
|
||||
|
||||
public KeyByOperator(KeyFunction<T, K> keyFunction) {
|
||||
super(keyFunction);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void processElement(Record<T> record) throws Exception {
|
||||
K key = this.function.keyBy(record.getValue());
|
||||
collect(new KeyRecord<>(key, record.getValue()));
|
||||
}
|
||||
}
|
|
@ -1,19 +0,0 @@
|
|||
package io.ray.streaming.operator.impl;
|
||||
|
||||
import io.ray.streaming.api.function.impl.MapFunction;
|
||||
import io.ray.streaming.message.Record;
|
||||
import io.ray.streaming.operator.OneInputOperator;
|
||||
import io.ray.streaming.operator.StreamOperator;
|
||||
|
||||
public class MapOperator<T, R> extends StreamOperator<MapFunction<T, R>>
|
||||
implements OneInputOperator<T> {
|
||||
|
||||
public MapOperator(MapFunction<T, R> mapFunction) {
|
||||
super(mapFunction);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void processElement(Record<T> record) throws Exception {
|
||||
this.collect(new Record<R>(this.function.map(record.getValue())));
|
||||
}
|
||||
}
|
|
@ -1,46 +0,0 @@
|
|||
package io.ray.streaming.operator.impl;
|
||||
|
||||
import io.ray.streaming.api.collector.Collector;
|
||||
import io.ray.streaming.api.context.RuntimeContext;
|
||||
import io.ray.streaming.api.function.impl.ReduceFunction;
|
||||
import io.ray.streaming.message.KeyRecord;
|
||||
import io.ray.streaming.message.Record;
|
||||
import io.ray.streaming.operator.ChainStrategy;
|
||||
import io.ray.streaming.operator.OneInputOperator;
|
||||
import io.ray.streaming.operator.StreamOperator;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
public class ReduceOperator<K, T> extends StreamOperator<ReduceFunction<T>>
|
||||
implements OneInputOperator<T> {
|
||||
|
||||
private Map<K, T> reduceState;
|
||||
|
||||
public ReduceOperator(ReduceFunction<T> reduceFunction) {
|
||||
super(reduceFunction);
|
||||
setChainStrategy(ChainStrategy.HEAD);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void open(List<Collector> collectorList, RuntimeContext runtimeContext) {
|
||||
super.open(collectorList, runtimeContext);
|
||||
this.reduceState = new HashMap<>();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void processElement(Record<T> record) throws Exception {
|
||||
KeyRecord<K, T> keyRecord = (KeyRecord<K, T>) record;
|
||||
K key = keyRecord.getKey();
|
||||
T value = keyRecord.getValue();
|
||||
if (reduceState.containsKey(key)) {
|
||||
T oldValue = reduceState.get(key);
|
||||
T newValue = this.function.reduce(oldValue, value);
|
||||
reduceState.put(key, newValue);
|
||||
collect(new Record(newValue));
|
||||
} else {
|
||||
reduceState.put(key, value);
|
||||
collect(record);
|
||||
}
|
||||
}
|
||||
}
|
|
@ -1,19 +0,0 @@
|
|||
package io.ray.streaming.operator.impl;
|
||||
|
||||
import io.ray.streaming.api.function.impl.SinkFunction;
|
||||
import io.ray.streaming.message.Record;
|
||||
import io.ray.streaming.operator.OneInputOperator;
|
||||
import io.ray.streaming.operator.StreamOperator;
|
||||
|
||||
public class SinkOperator<T> extends StreamOperator<SinkFunction<T>>
|
||||
implements OneInputOperator<T> {
|
||||
|
||||
public SinkOperator(SinkFunction<T> sinkFunction) {
|
||||
super(sinkFunction);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void processElement(Record<T> record) throws Exception {
|
||||
this.function.sink(record.getValue());
|
||||
}
|
||||
}
|
|
@ -1,65 +0,0 @@
|
|||
package io.ray.streaming.operator.impl;
|
||||
|
||||
import io.ray.streaming.api.collector.Collector;
|
||||
import io.ray.streaming.api.context.RuntimeContext;
|
||||
import io.ray.streaming.api.function.impl.SourceFunction;
|
||||
import io.ray.streaming.api.function.impl.SourceFunction.SourceContext;
|
||||
import io.ray.streaming.message.Record;
|
||||
import io.ray.streaming.operator.ChainStrategy;
|
||||
import io.ray.streaming.operator.OperatorType;
|
||||
import io.ray.streaming.operator.SourceOperator;
|
||||
import io.ray.streaming.operator.StreamOperator;
|
||||
import java.util.List;
|
||||
|
||||
public class SourceOperatorImpl<T> extends StreamOperator<SourceFunction<T>>
|
||||
implements SourceOperator {
|
||||
|
||||
private SourceContextImpl sourceContext;
|
||||
|
||||
public SourceOperatorImpl(SourceFunction<T> function) {
|
||||
super(function);
|
||||
setChainStrategy(ChainStrategy.HEAD);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void open(List<Collector> collectorList, RuntimeContext runtimeContext) {
|
||||
super.open(collectorList, runtimeContext);
|
||||
this.sourceContext = new SourceContextImpl(collectorList);
|
||||
this.function.init(runtimeContext.getParallelism(), runtimeContext.getTaskIndex());
|
||||
}
|
||||
|
||||
@Override
|
||||
public void fetch() {
|
||||
try {
|
||||
this.function.fetch(this.sourceContext);
|
||||
} catch (Exception e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public SourceContext getSourceContext() {
|
||||
return sourceContext;
|
||||
}
|
||||
|
||||
@Override
|
||||
public OperatorType getOpType() {
|
||||
return OperatorType.SOURCE;
|
||||
}
|
||||
|
||||
class SourceContextImpl implements SourceContext<T> {
|
||||
|
||||
private List<Collector> collectors;
|
||||
|
||||
public SourceContextImpl(List<Collector> collectors) {
|
||||
this.collectors = collectors;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void collect(T t) throws Exception {
|
||||
for (Collector collector : collectors) {
|
||||
collector.collect(new Record<>(t));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
|
@ -1,19 +0,0 @@
|
|||
package io.ray.streaming.operator.impl;
|
||||
|
||||
import io.ray.streaming.api.function.Function;
|
||||
import io.ray.streaming.api.function.internal.Functions;
|
||||
import io.ray.streaming.message.Record;
|
||||
import io.ray.streaming.operator.OneInputOperator;
|
||||
import io.ray.streaming.operator.StreamOperator;
|
||||
|
||||
public class UnionOperator<T> extends StreamOperator<Function> implements OneInputOperator<T> {
|
||||
|
||||
public UnionOperator() {
|
||||
super(Functions.emptyFunction());
|
||||
}
|
||||
|
||||
@Override
|
||||
public void processElement(Record<T> record) {
|
||||
collect(record);
|
||||
}
|
||||
}
|
|
@ -1,124 +0,0 @@
|
|||
package io.ray.streaming.python;
|
||||
|
||||
import com.google.common.base.Preconditions;
|
||||
import io.ray.streaming.api.function.Function;
|
||||
import java.util.StringJoiner;
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
|
||||
/**
|
||||
* Represents a user defined python function.
|
||||
*
|
||||
* <p>Python worker can use information in this class to create a function object.
|
||||
*
|
||||
* <p>If this object is constructed from serialized python function, python worker can deserialize
|
||||
* it to create python function directly. If this object is constructed from moduleName and
|
||||
* className/functionName, python worker will use `importlib` to load python function.
|
||||
*
|
||||
* <p>If the python data stream api is invoked from python, `function` will be not null.
|
||||
*
|
||||
* <p>If the python data stream api is invoked from java, `moduleName` and `functionName` will be
|
||||
* not null.
|
||||
*
|
||||
* <p>
|
||||
*/
|
||||
public class PythonFunction implements Function {
|
||||
|
||||
public enum FunctionInterface {
|
||||
SOURCE_FUNCTION("SourceFunction"),
|
||||
MAP_FUNCTION("MapFunction"),
|
||||
FLAT_MAP_FUNCTION("FlatMapFunction"),
|
||||
FILTER_FUNCTION("FilterFunction"),
|
||||
KEY_FUNCTION("KeyFunction"),
|
||||
REDUCE_FUNCTION("ReduceFunction"),
|
||||
SINK_FUNCTION("SinkFunction");
|
||||
|
||||
private String functionInterface;
|
||||
|
||||
/** @param functionInterface function class name in `ray.streaming.function` module. */
|
||||
FunctionInterface(String functionInterface) {
|
||||
this.functionInterface = functionInterface;
|
||||
}
|
||||
}
|
||||
|
||||
// null if this function is constructed from moduleName/functionName.
|
||||
private final byte[] function;
|
||||
// null if this function is constructed from serialized python function.
|
||||
private final String moduleName;
|
||||
// null if this function is constructed from serialized python function.
|
||||
private final String functionName;
|
||||
/**
|
||||
* FunctionInterface can be used to validate python function, and look up operator class from
|
||||
* FunctionInterface.
|
||||
*/
|
||||
private String functionInterface;
|
||||
|
||||
/**
|
||||
* Create a {@link PythonFunction} from a serialized streaming python function.
|
||||
*
|
||||
* @param function serialized streaming python function from python driver.
|
||||
*/
|
||||
public PythonFunction(byte[] function) {
|
||||
Preconditions.checkNotNull(function);
|
||||
this.function = function;
|
||||
this.moduleName = null;
|
||||
this.functionName = null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a {@link PythonFunction} from a moduleName and streaming function name.
|
||||
*
|
||||
* @param moduleName module name of streaming function.
|
||||
* @param functionName function name of streaming function. {@code functionName} is the name of a
|
||||
* python function, or class name of subclass of `ray.streaming.function.`
|
||||
*/
|
||||
public PythonFunction(String moduleName, String functionName) {
|
||||
Preconditions.checkArgument(StringUtils.isNotBlank(moduleName));
|
||||
Preconditions.checkArgument(StringUtils.isNotBlank(functionName));
|
||||
this.function = null;
|
||||
this.moduleName = moduleName;
|
||||
this.functionName = functionName;
|
||||
}
|
||||
|
||||
public void setFunctionInterface(FunctionInterface functionInterface) {
|
||||
this.functionInterface = functionInterface.functionInterface;
|
||||
}
|
||||
|
||||
public byte[] getFunction() {
|
||||
return function;
|
||||
}
|
||||
|
||||
public String getModuleName() {
|
||||
return moduleName;
|
||||
}
|
||||
|
||||
public String getFunctionName() {
|
||||
return functionName;
|
||||
}
|
||||
|
||||
public String getFunctionInterface() {
|
||||
return functionInterface;
|
||||
}
|
||||
|
||||
public String toSimpleString() {
|
||||
if (function != null) {
|
||||
return "binary function";
|
||||
} else {
|
||||
return String.format("%s-%s.%s", functionInterface, moduleName, functionName);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
StringJoiner stringJoiner =
|
||||
new StringJoiner(", ", PythonFunction.class.getSimpleName() + "[", "]");
|
||||
if (function != null) {
|
||||
stringJoiner.add("function=binary function");
|
||||
} else {
|
||||
stringJoiner
|
||||
.add("moduleName='" + moduleName + "'")
|
||||
.add("functionName='" + functionName + "'");
|
||||
}
|
||||
stringJoiner.add("functionInterface='" + functionInterface + "'");
|
||||
return stringJoiner.toString();
|
||||
}
|
||||
}
|
|
@ -1,171 +0,0 @@
|
|||
package io.ray.streaming.python;
|
||||
|
||||
import com.google.common.base.Preconditions;
|
||||
import io.ray.streaming.api.Language;
|
||||
import io.ray.streaming.api.context.RuntimeContext;
|
||||
import io.ray.streaming.api.function.Function;
|
||||
import io.ray.streaming.operator.Operator;
|
||||
import io.ray.streaming.operator.OperatorType;
|
||||
import io.ray.streaming.operator.StreamOperator;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.StringJoiner;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
/** Represents a {@link StreamOperator} that wraps python {@link PythonFunction}. */
|
||||
@SuppressWarnings("unchecked")
|
||||
public class PythonOperator extends StreamOperator {
|
||||
|
||||
private final String moduleName;
|
||||
private final String className;
|
||||
|
||||
public PythonOperator(String moduleName, String className) {
|
||||
super(null);
|
||||
this.moduleName = moduleName;
|
||||
this.className = className;
|
||||
}
|
||||
|
||||
public PythonOperator(PythonFunction function) {
|
||||
super(function);
|
||||
this.moduleName = null;
|
||||
this.className = null;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Language getLanguage() {
|
||||
return Language.PYTHON;
|
||||
}
|
||||
|
||||
public String getModuleName() {
|
||||
return moduleName;
|
||||
}
|
||||
|
||||
public String getClassName() {
|
||||
return className;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void open(List list, RuntimeContext runtimeContext) {
|
||||
throwUnsupportedException();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void finish() {
|
||||
throwUnsupportedException();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void close() {
|
||||
throwUnsupportedException();
|
||||
}
|
||||
|
||||
void throwUnsupportedException() {
|
||||
StackTraceElement[] trace = Thread.currentThread().getStackTrace();
|
||||
Preconditions.checkState(trace.length >= 2);
|
||||
StackTraceElement traceElement = trace[2];
|
||||
String msg =
|
||||
String.format(
|
||||
"Method %s.%s shouldn't be called.",
|
||||
traceElement.getClassName(), traceElement.getMethodName());
|
||||
throw new UnsupportedOperationException(msg);
|
||||
}
|
||||
|
||||
@Override
|
||||
public OperatorType getOpType() {
|
||||
String msg = String.format("Methods of %s shouldn't be called.", getClass().getSimpleName());
|
||||
throw new UnsupportedOperationException(msg);
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getName() {
|
||||
StringBuilder builder = new StringBuilder();
|
||||
builder.append(PythonOperator.class.getSimpleName()).append("[");
|
||||
if (function != null) {
|
||||
builder.append(((PythonFunction) function).toSimpleString());
|
||||
} else {
|
||||
builder.append(moduleName).append(".").append(className);
|
||||
}
|
||||
return builder.append("]").toString();
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
StringJoiner stringJoiner =
|
||||
new StringJoiner(", ", PythonOperator.class.getSimpleName() + "[", "]");
|
||||
if (function != null) {
|
||||
stringJoiner.add("function='" + function + "'");
|
||||
} else {
|
||||
stringJoiner.add("moduleName='" + moduleName + "'").add("className='" + className + "'");
|
||||
}
|
||||
return stringJoiner.toString();
|
||||
}
|
||||
|
||||
public static class ChainedPythonOperator extends PythonOperator {
|
||||
|
||||
private final List<PythonOperator> operators;
|
||||
private final PythonOperator headOperator;
|
||||
private final PythonOperator tailOperator;
|
||||
private final List<Map<String, String>> configs;
|
||||
|
||||
public ChainedPythonOperator(
|
||||
List<PythonOperator> operators, List<Map<String, String>> configs) {
|
||||
super(null);
|
||||
Preconditions.checkArgument(!operators.isEmpty());
|
||||
this.operators = operators;
|
||||
this.configs = configs;
|
||||
this.headOperator = operators.get(0);
|
||||
this.tailOperator = operators.get(operators.size() - 1);
|
||||
}
|
||||
|
||||
@Override
|
||||
public OperatorType getOpType() {
|
||||
return headOperator.getOpType();
|
||||
}
|
||||
|
||||
@Override
|
||||
public Language getLanguage() {
|
||||
return Language.PYTHON;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getName() {
|
||||
return operators.stream()
|
||||
.map(Operator::getName)
|
||||
.collect(Collectors.joining(" -> ", "[", "]"));
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getModuleName() {
|
||||
throwUnsupportedException();
|
||||
return null; // impossible
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getClassName() {
|
||||
throwUnsupportedException();
|
||||
return null; // impossible
|
||||
}
|
||||
|
||||
@Override
|
||||
public Function getFunction() {
|
||||
throwUnsupportedException();
|
||||
return null; // impossible
|
||||
}
|
||||
|
||||
public List<PythonOperator> getOperators() {
|
||||
return operators;
|
||||
}
|
||||
|
||||
public PythonOperator getHeadOperator() {
|
||||
return headOperator;
|
||||
}
|
||||
|
||||
public PythonOperator getTailOperator() {
|
||||
return tailOperator;
|
||||
}
|
||||
|
||||
public List<Map<String, String>> getConfigs() {
|
||||
return configs;
|
||||
}
|
||||
}
|
||||
}
|
|
@ -1,89 +0,0 @@
|
|||
package io.ray.streaming.python;
|
||||
|
||||
import com.google.common.base.Preconditions;
|
||||
import io.ray.streaming.api.partition.Partition;
|
||||
import java.util.StringJoiner;
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
|
||||
/**
|
||||
* Represents a python partition function.
|
||||
*
|
||||
* <p>Python worker can create a partition object using information in this PythonPartition.
|
||||
*
|
||||
* <p>If this object is constructed from serialized python partition, python worker can deserialize
|
||||
* it to create python partition directly. If this object is constructed from moduleName and
|
||||
* className/functionName, python worker will use `importlib` to load python partition function.
|
||||
*
|
||||
* <p>
|
||||
*/
|
||||
public class PythonPartition implements Partition<Object> {
|
||||
|
||||
public static final PythonPartition BroadcastPartition =
|
||||
new PythonPartition("ray.streaming.partition", "BroadcastPartition");
|
||||
public static final PythonPartition KeyPartition =
|
||||
new PythonPartition("ray.streaming.partition", "KeyPartition");
|
||||
public static final PythonPartition RoundRobinPartition =
|
||||
new PythonPartition("ray.streaming.partition", "RoundRobinPartition");
|
||||
public static final String FORWARD_PARTITION_CLASS = "ForwardPartition";
|
||||
public static final PythonPartition ForwardPartition =
|
||||
new PythonPartition("ray.streaming.partition", FORWARD_PARTITION_CLASS);
|
||||
|
||||
private byte[] partition;
|
||||
private String moduleName;
|
||||
private String functionName;
|
||||
|
||||
public PythonPartition(byte[] partition) {
|
||||
Preconditions.checkNotNull(partition);
|
||||
this.partition = partition;
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a python partition from a moduleName and partition function name
|
||||
*
|
||||
* @param moduleName module name of python partition
|
||||
* @param functionName function/class name of the partition function.
|
||||
*/
|
||||
public PythonPartition(String moduleName, String functionName) {
|
||||
Preconditions.checkArgument(StringUtils.isNotBlank(moduleName));
|
||||
Preconditions.checkArgument(StringUtils.isNotBlank(functionName));
|
||||
this.moduleName = moduleName;
|
||||
this.functionName = functionName;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int[] partition(Object record, int numPartition) {
|
||||
String msg =
|
||||
String.format("partition method of %s shouldn't be called.", getClass().getSimpleName());
|
||||
throw new UnsupportedOperationException(msg);
|
||||
}
|
||||
|
||||
public byte[] getPartition() {
|
||||
return partition;
|
||||
}
|
||||
|
||||
public String getModuleName() {
|
||||
return moduleName;
|
||||
}
|
||||
|
||||
public String getFunctionName() {
|
||||
return functionName;
|
||||
}
|
||||
|
||||
public boolean isConstructedFromBinary() {
|
||||
return partition != null;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
StringJoiner stringJoiner =
|
||||
new StringJoiner(", ", PythonPartition.class.getSimpleName() + "[", "]");
|
||||
if (partition != null) {
|
||||
stringJoiner.add("partition=binary partition");
|
||||
} else {
|
||||
stringJoiner
|
||||
.add("moduleName='" + moduleName + "'")
|
||||
.add("functionName='" + functionName + "'");
|
||||
}
|
||||
return stringJoiner.toString();
|
||||
}
|
||||
}
|
|
@ -1,202 +0,0 @@
|
|||
package io.ray.streaming.python.stream;
|
||||
|
||||
import io.ray.streaming.api.Language;
|
||||
import io.ray.streaming.api.context.StreamingContext;
|
||||
import io.ray.streaming.api.partition.Partition;
|
||||
import io.ray.streaming.api.stream.DataStream;
|
||||
import io.ray.streaming.api.stream.Stream;
|
||||
import io.ray.streaming.python.PythonFunction;
|
||||
import io.ray.streaming.python.PythonFunction.FunctionInterface;
|
||||
import io.ray.streaming.python.PythonOperator;
|
||||
import io.ray.streaming.python.PythonPartition;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
|
||||
/** Represents a stream of data whose transformations will be executed in python. */
|
||||
public class PythonDataStream extends Stream<PythonDataStream, Object> implements PythonStream {
|
||||
|
||||
protected PythonDataStream(StreamingContext streamingContext, PythonOperator pythonOperator) {
|
||||
super(streamingContext, pythonOperator);
|
||||
}
|
||||
|
||||
protected PythonDataStream(
|
||||
StreamingContext streamingContext,
|
||||
PythonOperator pythonOperator,
|
||||
Partition<Object> partition) {
|
||||
super(streamingContext, pythonOperator, partition);
|
||||
}
|
||||
|
||||
public PythonDataStream(PythonDataStream input, PythonOperator pythonOperator) {
|
||||
super(input, pythonOperator);
|
||||
}
|
||||
|
||||
public PythonDataStream(
|
||||
PythonDataStream input, PythonOperator pythonOperator, Partition<Object> partition) {
|
||||
super(input, pythonOperator, partition);
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a python stream that reference passed java stream. Changes in new stream will be
|
||||
* reflected in referenced stream and vice versa
|
||||
*/
|
||||
public PythonDataStream(DataStream referencedStream) {
|
||||
super(referencedStream);
|
||||
}
|
||||
|
||||
public PythonDataStream map(String moduleName, String funcName) {
|
||||
return map(new PythonFunction(moduleName, funcName));
|
||||
}
|
||||
|
||||
/**
|
||||
* Apply a map function to this stream.
|
||||
*
|
||||
* @param func The python MapFunction.
|
||||
* @return A new PythonDataStream.
|
||||
*/
|
||||
public PythonDataStream map(PythonFunction func) {
|
||||
func.setFunctionInterface(FunctionInterface.MAP_FUNCTION);
|
||||
return new PythonDataStream(this, new PythonOperator(func));
|
||||
}
|
||||
|
||||
public PythonDataStream flatMap(String moduleName, String funcName) {
|
||||
return flatMap(new PythonFunction(moduleName, funcName));
|
||||
}
|
||||
|
||||
/**
|
||||
* Apply a flat-map function to this stream.
|
||||
*
|
||||
* @param func The python FlapMapFunction.
|
||||
* @return A new PythonDataStream
|
||||
*/
|
||||
public PythonDataStream flatMap(PythonFunction func) {
|
||||
func.setFunctionInterface(FunctionInterface.FLAT_MAP_FUNCTION);
|
||||
return new PythonDataStream(this, new PythonOperator(func));
|
||||
}
|
||||
|
||||
public PythonDataStream filter(String moduleName, String funcName) {
|
||||
return filter(new PythonFunction(moduleName, funcName));
|
||||
}
|
||||
|
||||
/**
|
||||
* Apply a filter function to this stream.
|
||||
*
|
||||
* @param func The python FilterFunction.
|
||||
* @return A new PythonDataStream that contains only the elements satisfying the given filter
|
||||
* predicate.
|
||||
*/
|
||||
public PythonDataStream filter(PythonFunction func) {
|
||||
func.setFunctionInterface(FunctionInterface.FILTER_FUNCTION);
|
||||
return new PythonDataStream(this, new PythonOperator(func));
|
||||
}
|
||||
|
||||
/**
|
||||
* Apply union transformations to this stream by merging {@link PythonDataStream} outputs of the
|
||||
* same type with each other.
|
||||
*
|
||||
* @param stream The DataStream to union output with.
|
||||
* @param others The other DataStreams to union output with.
|
||||
* @return A new UnionStream.
|
||||
*/
|
||||
public final PythonDataStream union(PythonDataStream stream, PythonDataStream... others) {
|
||||
List<PythonDataStream> streams = new ArrayList<>();
|
||||
streams.add(stream);
|
||||
streams.addAll(Arrays.asList(others));
|
||||
return union(streams);
|
||||
}
|
||||
|
||||
/**
|
||||
* Apply union transformations to this stream by merging {@link PythonDataStream} outputs of the
|
||||
* same type with each other.
|
||||
*
|
||||
* @param streams The DataStreams to union output with.
|
||||
* @return A new UnionStream.
|
||||
*/
|
||||
public final PythonDataStream union(List<PythonDataStream> streams) {
|
||||
if (this instanceof PythonUnionStream) {
|
||||
PythonUnionStream unionStream = (PythonUnionStream) this;
|
||||
streams.forEach(unionStream::addStream);
|
||||
return unionStream;
|
||||
} else {
|
||||
return new PythonUnionStream(this, streams);
|
||||
}
|
||||
}
|
||||
|
||||
public PythonStreamSink sink(String moduleName, String funcName) {
|
||||
return sink(new PythonFunction(moduleName, funcName));
|
||||
}
|
||||
|
||||
/**
|
||||
* Apply a sink function and get a StreamSink.
|
||||
*
|
||||
* @param func The python SinkFunction.
|
||||
* @return A new StreamSink.
|
||||
*/
|
||||
public PythonStreamSink sink(PythonFunction func) {
|
||||
func.setFunctionInterface(FunctionInterface.SINK_FUNCTION);
|
||||
return new PythonStreamSink(this, new PythonOperator(func));
|
||||
}
|
||||
|
||||
public PythonKeyDataStream keyBy(String moduleName, String funcName) {
|
||||
return keyBy(new PythonFunction(moduleName, funcName));
|
||||
}
|
||||
|
||||
/**
|
||||
* Apply a key-by function to this stream.
|
||||
*
|
||||
* @param func the python keyFunction.
|
||||
* @return A new KeyDataStream.
|
||||
*/
|
||||
public PythonKeyDataStream keyBy(PythonFunction func) {
|
||||
checkPartitionCall();
|
||||
func.setFunctionInterface(FunctionInterface.KEY_FUNCTION);
|
||||
return new PythonKeyDataStream(this, new PythonOperator(func));
|
||||
}
|
||||
|
||||
/**
|
||||
* Apply broadcast to this stream.
|
||||
*
|
||||
* @return This stream.
|
||||
*/
|
||||
public PythonDataStream broadcast() {
|
||||
checkPartitionCall();
|
||||
return setPartition(PythonPartition.BroadcastPartition);
|
||||
}
|
||||
|
||||
/**
|
||||
* Apply a partition to this stream.
|
||||
*
|
||||
* @param partition The partitioning strategy.
|
||||
* @return This stream.
|
||||
*/
|
||||
public PythonDataStream partitionBy(PythonPartition partition) {
|
||||
checkPartitionCall();
|
||||
return setPartition(partition);
|
||||
}
|
||||
|
||||
/**
|
||||
* If parent stream is a python stream, we can't call partition related methods in the java
|
||||
* stream.
|
||||
*/
|
||||
private void checkPartitionCall() {
|
||||
if (getInputStream() != null && getInputStream().getLanguage() == Language.JAVA) {
|
||||
throw new RuntimeException(
|
||||
"Partition related methods can't be called on a "
|
||||
+ "python stream if parent stream is a java stream.");
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Convert this stream as a java stream. The converted stream and this stream are the same logical
|
||||
* stream, which has same stream id. Changes in converted stream will be reflected in this stream
|
||||
* and vice versa.
|
||||
*/
|
||||
public DataStream<Object> asJavaStream() {
|
||||
return new DataStream<>(this);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Language getLanguage() {
|
||||
return Language.PYTHON;
|
||||
}
|
||||
}
|
|
@ -1,52 +0,0 @@
|
|||
package io.ray.streaming.python.stream;
|
||||
|
||||
import io.ray.streaming.api.stream.DataStream;
|
||||
import io.ray.streaming.api.stream.KeyDataStream;
|
||||
import io.ray.streaming.operator.ChainStrategy;
|
||||
import io.ray.streaming.python.PythonFunction;
|
||||
import io.ray.streaming.python.PythonFunction.FunctionInterface;
|
||||
import io.ray.streaming.python.PythonOperator;
|
||||
import io.ray.streaming.python.PythonPartition;
|
||||
|
||||
/** Represents a python DataStream returned by a key-by operation. */
|
||||
@SuppressWarnings("unchecked")
|
||||
public class PythonKeyDataStream extends PythonDataStream implements PythonStream {
|
||||
|
||||
public PythonKeyDataStream(PythonDataStream input, PythonOperator pythonOperator) {
|
||||
super(input, pythonOperator, PythonPartition.KeyPartition);
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a python stream that reference passed python stream. Changes in new stream will be
|
||||
* reflected in referenced stream and vice versa
|
||||
*/
|
||||
public PythonKeyDataStream(DataStream referencedStream) {
|
||||
super(referencedStream);
|
||||
}
|
||||
|
||||
public PythonDataStream reduce(String moduleName, String funcName) {
|
||||
return reduce(new PythonFunction(moduleName, funcName));
|
||||
}
|
||||
|
||||
/**
|
||||
* Apply a reduce function to this stream.
|
||||
*
|
||||
* @param func The reduce function.
|
||||
* @return A new DataStream.
|
||||
*/
|
||||
public PythonDataStream reduce(PythonFunction func) {
|
||||
func.setFunctionInterface(FunctionInterface.REDUCE_FUNCTION);
|
||||
PythonDataStream stream = new PythonDataStream(this, new PythonOperator(func));
|
||||
stream.withChainStrategy(ChainStrategy.HEAD);
|
||||
return stream;
|
||||
}
|
||||
|
||||
/**
|
||||
* Convert this stream as a java stream. The converted stream and this stream are the same logical
|
||||
* stream, which has same stream id. Changes in converted stream will be reflected in this stream
|
||||
* and vice versa.
|
||||
*/
|
||||
public KeyDataStream<Object, Object> asJavaStream() {
|
||||
return new KeyDataStream(this);
|
||||
}
|
||||
}
|
|
@ -1,4 +0,0 @@
|
|||
package io.ray.streaming.python.stream;
|
||||
|
||||
/** A marker interface used to identify all python streams. */
|
||||
public interface PythonStream {}
|
|
@ -1,19 +0,0 @@
|
|||
package io.ray.streaming.python.stream;
|
||||
|
||||
import io.ray.streaming.api.Language;
|
||||
import io.ray.streaming.api.stream.StreamSink;
|
||||
import io.ray.streaming.python.PythonOperator;
|
||||
|
||||
/** Represents a sink of the PythonStream. */
|
||||
public class PythonStreamSink extends StreamSink implements PythonStream {
|
||||
|
||||
public PythonStreamSink(PythonDataStream input, PythonOperator sinkOperator) {
|
||||
super(input, sinkOperator);
|
||||
getStreamingContext().addSink(this);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Language getLanguage() {
|
||||
return Language.PYTHON;
|
||||
}
|
||||
}
|
|
@ -1,23 +0,0 @@
|
|||
package io.ray.streaming.python.stream;
|
||||
|
||||
import io.ray.streaming.api.context.StreamingContext;
|
||||
import io.ray.streaming.api.stream.StreamSource;
|
||||
import io.ray.streaming.operator.ChainStrategy;
|
||||
import io.ray.streaming.python.PythonFunction;
|
||||
import io.ray.streaming.python.PythonFunction.FunctionInterface;
|
||||
import io.ray.streaming.python.PythonOperator;
|
||||
|
||||
/** Represents a source of the PythonStream. */
|
||||
public class PythonStreamSource extends PythonDataStream implements StreamSource {
|
||||
|
||||
private PythonStreamSource(StreamingContext streamingContext, PythonFunction sourceFunction) {
|
||||
super(streamingContext, new PythonOperator(sourceFunction));
|
||||
withChainStrategy(ChainStrategy.HEAD);
|
||||
}
|
||||
|
||||
public static PythonStreamSource from(
|
||||
StreamingContext streamingContext, PythonFunction sourceFunction) {
|
||||
sourceFunction.setFunctionInterface(FunctionInterface.SOURCE_FUNCTION);
|
||||
return new PythonStreamSource(streamingContext, sourceFunction);
|
||||
}
|
||||
}
|
|
@ -1,36 +0,0 @@
|
|||
package io.ray.streaming.python.stream;
|
||||
|
||||
import io.ray.streaming.python.PythonOperator;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* Represents a union DataStream.
|
||||
*
|
||||
* <p>This stream does not create a physical operation, it only affects how upstream data are
|
||||
* connected to downstream data.
|
||||
*/
|
||||
public class PythonUnionStream extends PythonDataStream {
|
||||
|
||||
private List<PythonDataStream> unionStreams;
|
||||
|
||||
public PythonUnionStream(PythonDataStream input, List<PythonDataStream> others) {
|
||||
// Union stream does not create a physical operation, so we don't have to set partition
|
||||
// function for it.
|
||||
super(input, new PythonOperator("ray.streaming.operator", "UnionOperator"));
|
||||
this.unionStreams = new ArrayList<>();
|
||||
others.forEach(this::addStream);
|
||||
}
|
||||
|
||||
void addStream(PythonDataStream stream) {
|
||||
if (stream instanceof PythonUnionStream) {
|
||||
this.unionStreams.addAll(((PythonUnionStream) stream).getUnionStreams());
|
||||
} else {
|
||||
this.unionStreams.add(stream);
|
||||
}
|
||||
}
|
||||
|
||||
public List<PythonDataStream> getUnionStreams() {
|
||||
return unionStreams;
|
||||
}
|
||||
}
|
|
@ -1,33 +0,0 @@
|
|||
package io.ray.streaming.util;
|
||||
|
||||
public class Config {
|
||||
|
||||
public static final String STREAMING_JOB_NAME = "streaming.job.name";
|
||||
public static final String STREAMING_OP_NAME = "streaming.op_name";
|
||||
public static final String STREAMING_WORKER_NAME = "streaming.worker_name";
|
||||
|
||||
// channel
|
||||
public static final String CHANNEL_TYPE = "channel_type";
|
||||
public static final String MEMORY_CHANNEL = "memory_channel";
|
||||
public static final String NATIVE_CHANNEL = "native_channel";
|
||||
public static final String CHANNEL_SIZE = "channel_size";
|
||||
public static final String CHANNEL_SIZE_DEFAULT = String.valueOf((long) Math.pow(10, 8));
|
||||
public static final String IS_RECREATE = "streaming.is_recreate";
|
||||
// return from DataReader.getBundle if only empty message read in this interval.
|
||||
public static final String TIMER_INTERVAL_MS = "timer_interval_ms";
|
||||
public static final String READ_TIMEOUT_MS = "read_timeout_ms";
|
||||
public static final String DEFAULT_READ_TIMEOUT_MS = "10";
|
||||
|
||||
public static final String STREAMING_RING_BUFFER_CAPACITY = "streaming.ring_buffer_capacity";
|
||||
// write an empty message if there is no data to be written in this
|
||||
// interval.
|
||||
public static final String STREAMING_EMPTY_MESSAGE_INTERVAL = "streaming.empty_message_interval";
|
||||
|
||||
// operator type
|
||||
public static final String OPERATOR_TYPE = "operator_type";
|
||||
|
||||
// flow control
|
||||
public static final String FLOW_CONTROL_TYPE = "streaming.flow_control_type";
|
||||
public static final String WRITER_CONSUMED_STEP = "streaming.writer.consumed_step";
|
||||
public static final String READER_CONSUMED_STEP = "streaming.reader.consumed_step";
|
||||
}
|
|
@ -1,6 +0,0 @@
|
|||
log4j.rootLogger=INFO, stdout
|
||||
# Direct log messages to stdout
|
||||
log4j.appender.stdout=org.apache.log4j.ConsoleAppender
|
||||
log4j.appender.stdout.Target=System.out
|
||||
log4j.appender.stdout.layout=org.apache.log4j.PatternLayout
|
||||
log4j.appender.stdout.layout.ConversionPattern=%d{yyyy-MM-dd HH:mm:ss} %-5p %c{1}:%L - %m%n
|
|
@ -1,5 +0,0 @@
|
|||
ray {
|
||||
run-mode = SINGLE_PROCESS
|
||||
resources = "CPU:4"
|
||||
address = ""
|
||||
}
|
|
@ -1,40 +0,0 @@
|
|||
package io.ray.streaming.api.stream;
|
||||
|
||||
import static org.testng.Assert.assertEquals;
|
||||
|
||||
import io.ray.streaming.api.context.StreamingContext;
|
||||
import io.ray.streaming.operator.impl.MapOperator;
|
||||
import io.ray.streaming.python.stream.PythonDataStream;
|
||||
import io.ray.streaming.python.stream.PythonKeyDataStream;
|
||||
import org.testng.annotations.Test;
|
||||
|
||||
@SuppressWarnings("unchecked")
|
||||
public class StreamTest {
|
||||
|
||||
@Test
|
||||
public void testReferencedDataStream() {
|
||||
DataStream dataStream =
|
||||
new DataStream(StreamingContext.buildContext(), new MapOperator(value -> null));
|
||||
PythonDataStream pythonDataStream = dataStream.asPythonStream();
|
||||
DataStream javaStream = pythonDataStream.asJavaStream();
|
||||
assertEquals(dataStream.getId(), pythonDataStream.getId());
|
||||
assertEquals(dataStream.getId(), javaStream.getId());
|
||||
javaStream.setParallelism(10);
|
||||
assertEquals(dataStream.getParallelism(), pythonDataStream.getParallelism());
|
||||
assertEquals(dataStream.getParallelism(), javaStream.getParallelism());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testReferencedKeyDataStream() {
|
||||
DataStream dataStream =
|
||||
new DataStream(StreamingContext.buildContext(), new MapOperator(value -> null));
|
||||
KeyDataStream keyDataStream = dataStream.keyBy(value -> null);
|
||||
PythonKeyDataStream pythonKeyDataStream = keyDataStream.asPythonStream();
|
||||
KeyDataStream javaKeyDataStream = pythonKeyDataStream.asJavaStream();
|
||||
assertEquals(keyDataStream.getId(), pythonKeyDataStream.getId());
|
||||
assertEquals(keyDataStream.getId(), javaKeyDataStream.getId());
|
||||
javaKeyDataStream.setParallelism(10);
|
||||
assertEquals(keyDataStream.getParallelism(), pythonKeyDataStream.getParallelism());
|
||||
assertEquals(keyDataStream.getParallelism(), javaKeyDataStream.getParallelism());
|
||||
}
|
||||
}
|
|
@ -1,93 +0,0 @@
|
|||
package io.ray.streaming.jobgraph;
|
||||
|
||||
import com.google.common.collect.Lists;
|
||||
import io.ray.streaming.api.context.StreamingContext;
|
||||
import io.ray.streaming.api.partition.impl.ForwardPartition;
|
||||
import io.ray.streaming.api.partition.impl.KeyPartition;
|
||||
import io.ray.streaming.api.stream.DataStream;
|
||||
import io.ray.streaming.api.stream.DataStreamSource;
|
||||
import io.ray.streaming.api.stream.StreamSink;
|
||||
import java.util.List;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
import org.testng.Assert;
|
||||
import org.testng.annotations.Test;
|
||||
|
||||
public class JobGraphBuilderTest {
|
||||
|
||||
private static final Logger LOG = LoggerFactory.getLogger(JobGraphBuilderTest.class);
|
||||
|
||||
@Test
|
||||
public void testDataSync() {
|
||||
JobGraph jobGraph = buildDataSyncJobGraph();
|
||||
List<JobVertex> jobVertexList = jobGraph.getJobVertices();
|
||||
List<JobEdge> jobEdgeList = jobGraph.getJobEdges();
|
||||
|
||||
Assert.assertEquals(jobVertexList.size(), 2);
|
||||
Assert.assertEquals(jobEdgeList.size(), 1);
|
||||
|
||||
JobEdge jobEdge = jobEdgeList.get(0);
|
||||
Assert.assertEquals(jobEdge.getPartition().getClass(), ForwardPartition.class);
|
||||
|
||||
JobVertex sinkVertex = jobVertexList.get(1);
|
||||
JobVertex sourceVertex = jobVertexList.get(0);
|
||||
Assert.assertEquals(sinkVertex.getVertexType(), VertexType.SINK);
|
||||
Assert.assertEquals(sourceVertex.getVertexType(), VertexType.SOURCE);
|
||||
}
|
||||
|
||||
public JobGraph buildDataSyncJobGraph() {
|
||||
StreamingContext streamingContext = StreamingContext.buildContext();
|
||||
DataStream<String> dataStream =
|
||||
DataStreamSource.fromCollection(streamingContext, Lists.newArrayList("a", "b", "c"));
|
||||
StreamSink streamSink = dataStream.sink(x -> LOG.info(x));
|
||||
JobGraphBuilder jobGraphBuilder = new JobGraphBuilder(Lists.newArrayList(streamSink));
|
||||
|
||||
JobGraph jobGraph = jobGraphBuilder.build();
|
||||
return jobGraph;
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testKeyByJobGraph() {
|
||||
JobGraph jobGraph = buildKeyByJobGraph();
|
||||
List<JobVertex> jobVertexList = jobGraph.getJobVertices();
|
||||
List<JobEdge> jobEdgeList = jobGraph.getJobEdges();
|
||||
|
||||
Assert.assertEquals(jobVertexList.size(), 3);
|
||||
Assert.assertEquals(jobEdgeList.size(), 2);
|
||||
|
||||
JobVertex source = jobVertexList.get(0);
|
||||
JobVertex map = jobVertexList.get(1);
|
||||
JobVertex sink = jobVertexList.get(2);
|
||||
|
||||
Assert.assertEquals(source.getVertexType(), VertexType.SOURCE);
|
||||
Assert.assertEquals(map.getVertexType(), VertexType.TRANSFORMATION);
|
||||
Assert.assertEquals(sink.getVertexType(), VertexType.SINK);
|
||||
|
||||
JobEdge keyBy2Sink = jobEdgeList.get(0);
|
||||
JobEdge source2KeyBy = jobEdgeList.get(1);
|
||||
|
||||
Assert.assertEquals(keyBy2Sink.getPartition().getClass(), KeyPartition.class);
|
||||
Assert.assertEquals(source2KeyBy.getPartition().getClass(), ForwardPartition.class);
|
||||
}
|
||||
|
||||
public JobGraph buildKeyByJobGraph() {
|
||||
StreamingContext streamingContext = StreamingContext.buildContext();
|
||||
DataStream<String> dataStream =
|
||||
DataStreamSource.fromCollection(streamingContext, Lists.newArrayList("1", "2", "3", "4"));
|
||||
StreamSink streamSink = dataStream.keyBy(x -> x).sink(x -> LOG.info(x));
|
||||
JobGraphBuilder jobGraphBuilder = new JobGraphBuilder(Lists.newArrayList(streamSink));
|
||||
|
||||
JobGraph jobGraph = jobGraphBuilder.build();
|
||||
return jobGraph;
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testJobGraphViz() {
|
||||
JobGraph jobGraph = buildKeyByJobGraph();
|
||||
jobGraph.generateDigraph();
|
||||
String diGraph = jobGraph.getDigraph();
|
||||
LOG.info(diGraph);
|
||||
Assert.assertTrue(diGraph.contains("\"1-SourceOperatorImpl\" -> \"2-KeyByOperator\""));
|
||||
Assert.assertTrue(diGraph.contains("\"2-KeyByOperator\" -> \"3-SinkOperator\""));
|
||||
}
|
||||
}
|
|
@ -1,72 +0,0 @@
|
|||
package io.ray.streaming.jobgraph;
|
||||
|
||||
import static org.testng.Assert.assertEquals;
|
||||
|
||||
import com.google.common.collect.Lists;
|
||||
import io.ray.streaming.api.context.StreamingContext;
|
||||
import io.ray.streaming.api.stream.DataStream;
|
||||
import io.ray.streaming.api.stream.DataStreamSource;
|
||||
import io.ray.streaming.python.PythonFunction;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
import org.testng.annotations.Test;
|
||||
|
||||
public class JobGraphOptimizerTest {
|
||||
|
||||
private static final Logger LOG = LoggerFactory.getLogger(JobGraphOptimizerTest.class);
|
||||
|
||||
@Test
|
||||
public void testOptimize() {
|
||||
StreamingContext context = StreamingContext.buildContext();
|
||||
DataStream<Integer> source1 =
|
||||
DataStreamSource.fromCollection(context, Lists.newArrayList(1, 2, 3));
|
||||
DataStream<String> source2 =
|
||||
DataStreamSource.fromCollection(context, Lists.newArrayList("1", "2", "3"));
|
||||
DataStream<String> source3 =
|
||||
DataStreamSource.fromCollection(context, Lists.newArrayList("2", "3", "4"));
|
||||
source1
|
||||
.filter(x -> x > 1)
|
||||
.map(String::valueOf)
|
||||
.union(source2)
|
||||
.join(source3)
|
||||
.sink(x -> System.out.println("Sink " + x));
|
||||
JobGraph jobGraph = new JobGraphBuilder(context.getStreamSinks()).build();
|
||||
LOG.info("Digraph {}", jobGraph.generateDigraph());
|
||||
assertEquals(jobGraph.getJobVertices().size(), 8);
|
||||
|
||||
JobGraphOptimizer graphOptimizer = new JobGraphOptimizer(jobGraph);
|
||||
JobGraph optimizedJobGraph = graphOptimizer.optimize();
|
||||
optimizedJobGraph.printJobGraph();
|
||||
LOG.info("Optimized graph {}", optimizedJobGraph.generateDigraph());
|
||||
assertEquals(optimizedJobGraph.getJobVertices().size(), 5);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testOptimizeHybridStream() {
|
||||
StreamingContext context = StreamingContext.buildContext();
|
||||
DataStream<Integer> source1 =
|
||||
DataStreamSource.fromCollection(context, Lists.newArrayList(1, 2, 3));
|
||||
DataStream<String> source2 =
|
||||
DataStreamSource.fromCollection(context, Lists.newArrayList("1", "2", "3"));
|
||||
source1
|
||||
.asPythonStream()
|
||||
.map(pyFunc(1))
|
||||
.filter(pyFunc(2))
|
||||
.union(source2.asPythonStream().filter(pyFunc(3)).map(pyFunc(4)))
|
||||
.asJavaStream()
|
||||
.sink(x -> System.out.println("Sink " + x));
|
||||
JobGraph jobGraph = new JobGraphBuilder(context.getStreamSinks()).build();
|
||||
LOG.info("Digraph {}", jobGraph.generateDigraph());
|
||||
assertEquals(jobGraph.getJobVertices().size(), 8);
|
||||
|
||||
JobGraphOptimizer graphOptimizer = new JobGraphOptimizer(jobGraph);
|
||||
JobGraph optimizedJobGraph = graphOptimizer.optimize();
|
||||
optimizedJobGraph.printJobGraph();
|
||||
LOG.info("Optimized graph {}", optimizedJobGraph.generateDigraph());
|
||||
assertEquals(optimizedJobGraph.getJobVertices().size(), 6);
|
||||
}
|
||||
|
||||
private PythonFunction pyFunc(int number) {
|
||||
return new PythonFunction("module", "func" + number);
|
||||
}
|
||||
}
|
|
@ -1,6 +0,0 @@
|
|||
log4j.rootLogger=INFO, stdout
|
||||
# Direct log messages to stdout
|
||||
log4j.appender.stdout=org.apache.log4j.ConsoleAppender
|
||||
log4j.appender.stdout.Target=System.out
|
||||
log4j.appender.stdout.layout=org.apache.log4j.PatternLayout
|
||||
log4j.appender.stdout.layout.ConversionPattern=%d{yyyy-MM-dd HH:mm:ss} %-5p %c{1}:%L - %m%n
|
|
@ -1,3 +0,0 @@
|
|||
ray {
|
||||
run-mode = SINGLE_PROCESS
|
||||
}
|
Some files were not shown because too many files have changed in this diff Show more
Loading…
Add table
Reference in a new issue