[Java Worker] Support raylet on Java (#2479)

This commit is contained in:
Wang Qing 2018-08-02 08:52:49 +08:00 committed by Robert Nishihara
parent 9a479b3a63
commit e4f68ff8cf
41 changed files with 916 additions and 303 deletions

6
.gitignore vendored
View file

@ -150,6 +150,12 @@ build
.vscode/
*.iml
# Java
java/**/target
java/run
java/**/lib
java/**/.settings
java/**/.classpath
java/**/.project

View file

@ -85,8 +85,8 @@ public final class Ray extends Rpc {
if (cls.getConstructor() == null) {
System.err.println("class " + cls.getName()
+ " does not (actors must) have a constructor with no arguments");
RayLog.core.error("class " + cls.getName()
+ " does not (actors must) have a constructor with no arguments");
RayLog.core.error("class {} does not (actors must) have a constructor with no arguments",
cls.getName());
}
} catch (Exception e) {
System.exit(1);

View file

@ -19,7 +19,7 @@ public class RayConnector {
m.setAccessible(false);
return api;
} catch (ReflectiveOperationException | IllegalArgumentException | SecurityException e) {
RayLog.core.error("Load " + className + " class failed.", e);
RayLog.core.error("Load {} class failed.", className, e);
throw new Error("RayApi is not successfully initiated.");
}
}

View file

@ -19,4 +19,5 @@
<suppress checks="MethodTypeParameterNameCheck" files="Rpc.java"/>
<suppress checks="AbbreviationAsWordInNameCheck" files="UniqueID.java"/>
<suppress checks="TypeNameCheck" files="RayFunc_[0-9]_.+\.java"/>
<suppress checks=".*" files="[/\\]format[/\\]" />
</suppressions>

View file

@ -5,4 +5,6 @@ pkill -9 plasma_store
pkill -9 global_scheduler
pkill -9 redis-server
pkill -9 redis
pkill -9 raylet
ps aux | grep ray | awk '{system("kill "$2);}'
rm /tmp/raylet*

View file

@ -21,8 +21,9 @@ import org.ray.spi.PathConfig;
import org.ray.spi.RemoteFunctionManager;
import org.ray.spi.StateStoreProxy;
import org.ray.spi.impl.NativeRemoteFunctionManager;
import org.ray.spi.impl.NonRayletStateStoreProxyImpl;
import org.ray.spi.impl.RayletStateStoreProxyImpl;
import org.ray.spi.impl.RedisClient;
import org.ray.spi.impl.StateStoreProxyImpl;
import org.ray.util.FileUtil;
import org.ray.util.config.ConfigReader;
import org.ray.util.logger.RayLog;
@ -47,7 +48,7 @@ public class RayCli {
throw new RuntimeException("Ray head node start failed", e);
}
RayLog.core.info("Started Ray head node. Redis address: " + manager.info().redisAddress);
RayLog.core.info("Started Ray head node. Redis address: {}", manager.info().redisAddress);
return manager;
}
@ -74,7 +75,7 @@ public class RayCli {
// Init RayLog before using it.
RayLog.init(params.working_directory);
RayLog.core.info("Using IP address " + params.node_ip_address + " for this node.");
RayLog.core.info("Using IP address {} for this node.", params.node_ip_address);
RunManager manager;
if (cmdStart.head) {
manager = startRayHead(params, paths, config);
@ -152,7 +153,9 @@ public class RayCli {
KeyValueStoreLink kvStore = new RedisClient();
kvStore.setAddr(cmdSubmit.redisAddress);
StateStoreProxy stateStoreProxy = new StateStoreProxyImpl(kvStore);
StateStoreProxy stateStoreProxy = params.use_raylet
? new RayletStateStoreProxyImpl(kvStore)
: new NonRayletStateStoreProxyImpl(kvStore);
stateStoreProxy.initializeGlobalState();
RemoteFunctionManager functionManager = new NativeRemoteFunctionManager(kvStore);

View file

@ -117,7 +117,7 @@ public final class MethodId {
cls = Class
.forName(className, true, loader == null ? this.getClass().getClassLoader() : loader);
} catch (Throwable e) {
RayLog.core.error("Cannot load class " + className, e);
RayLog.core.error("Cannot load class {}", className, e);
return null;
}
@ -148,7 +148,7 @@ public final class MethodId {
if (methods.size() != 1) {
RayLog.core.error(
"Load method " + toString() + " failed as there are " + methods.size() + " definitions");
"Load method {} failed as there are {} definitions.", toString(), methods.size());
return null;
}

View file

@ -35,9 +35,9 @@ public class NetworkUtil {
return addr.getHostAddress();
}
}
RayLog.core.warn("you may need to correctly specify [ray.java] net_interface in config");
RayLog.core.warn("You need to correctly specify [ray.java] net_interface in config.");
} catch (Exception e) {
RayLog.core.error("Can't get our ip address, use 127.0.0.1 as default.", e);
RayLog.core.error("Can't get ip address, use 127.0.0.1 as default.", e);
}
return "127.0.0.1";

View file

@ -10,8 +10,8 @@ public class Sha1Digestor {
try {
return MessageDigest.getInstance("SHA1");
} catch (Exception e) {
RayLog.core.error("cannot get SHA1 MessageDigest", e);
throw new RuntimeException("cannot get SHA1 digest", e);
RayLog.core.error("Cannot get SHA1 MessageDigest", e);
throw new RuntimeException("Cannot get SHA1 digest", e);
}
});

View file

@ -63,7 +63,7 @@
<dependency>
<groupId>com.github.davidmoten</groupId>
<artifactId>flatbuffers-java</artifactId>
<version>1.7.0.1</version>
<version>1.9.0.1</version>
</dependency>
<!-- https://mvnrepository.com/artifact/com.beust/jcommander -->

View file

@ -47,12 +47,15 @@ declare -a nativeBinaries=(
"./src/plasma/plasma_manager"
"./src/local_scheduler/local_scheduler"
"./src/global_scheduler/global_scheduler"
"./src/ray/raylet/raylet"
"./src/ray/raylet/raylet_monitor"
)
declare -a nativeLibraries=(
"./src/common/redis_module/libray_redis_module.so"
"./src/local_scheduler/liblocal_scheduler_library_java.*"
"./src/plasma/libplasma_java.*"
"./src/ray/raylet/*lib.a"
)
declare -a javaBinaries=(

View file

@ -62,6 +62,8 @@ deploy = false
onebox_delay_seconds_before_run_app_logic = 0
use_raylet = false
; java class which main is served as the driver in a java worker
driver_class =
@ -123,6 +125,7 @@ store = %CONFIG_FILE_DIR%/../build/src/plasma/plasma_store
store_manager = %CONFIG_FILE_DIR%/../build/src/plasma/plasma_manager
local_scheduler = %CONFIG_FILE_DIR%/../build/src/local_scheduler/local_scheduler
global_scheduler = %CONFIG_FILE_DIR%/../build/src/global_scheduler/global_scheduler
raylet = %CONFIG_FILE_DIR%/../build/src/ray/raylet/raylet
python_dir = %CONFIG_FILE_DIR%/../build/
java_runtime_rewritten_jars_dir =
java_class_paths = ray.java.path.classes.source
@ -135,6 +138,7 @@ store = %CONFIG_FILE_DIR%/../build/src/plasma/plasma_store
store_manager = %CONFIG_FILE_DIR%/../build/src/plasma/plasma_manager
local_scheduler = %CONFIG_FILE_DIR%/../build/src/local_scheduler/local_scheduler
global_scheduler = %CONFIG_FILE_DIR%/../build/src/global_scheduler/global_scheduler
raylet = %CONFIG_FILE_DIR%/../build/src/ray/raylet/raylet
python_dir = %CONFIG_FILE_DIR%/../build/
java_runtime_rewritten_jars_dir =
java_class_paths = ray.java.path.classes.package
@ -147,6 +151,7 @@ store = %CONFIG_FILE_DIR%/native/bin/plasma_store
store_manager = %CONFIG_FILE_DIR%/native/bin/plasma_manager
local_scheduler = %CONFIG_FILE_DIR%/native/bin/local_scheduler
global_scheduler = %CONFIG_FILE_DIR%/native/bin/global_scheduler
raylet = %CONFIG_FILE_DIR%/native/bin/raylet
python_dir = %CONFIG_FILE_DIR%/python
java_runtime_rewritten_jars_dir = %CONFIG_FILE_DIR%/java/lib/
java_class_paths = ray.java.path.classes.deploy

View file

@ -1,5 +1,6 @@
package org.ray.core;
import com.google.common.collect.ImmutableList;
import java.lang.reflect.Constructor;
import java.lang.reflect.InvocationTargetException;
import java.util.ArrayList;
@ -122,7 +123,13 @@ public abstract class RayRuntime implements RayApi {
functions = new LocalFunctionManager(remoteLoader);
localSchedulerProxy = new LocalSchedulerProxy(slink);
objectStoreProxy = new ObjectStoreProxy(plink);
if (!params.use_raylet) {
objectStoreProxy = new ObjectStoreProxy(plink);
} else {
objectStoreProxy = new ObjectStoreProxy(plink, slink);
}
worker = new Worker(localSchedulerProxy, functions);
}
@ -188,7 +195,9 @@ public abstract class RayRuntime implements RayApi {
public <T, TMT> void putRaw(UniqueID taskId, UniqueID objectId, T obj, TMT metadata) {
RayLog.core.info("Task " + taskId.toString() + " Object " + objectId.toString() + " put");
localSchedulerProxy.markTaskPutDependency(taskId, objectId);
if (!params.use_raylet) {
localSchedulerProxy.markTaskPutDependency(taskId, objectId);
}
objectStoreProxy.put(objectId, obj, metadata);
}
@ -274,22 +283,32 @@ public abstract class RayRuntime implements RayApi {
return worker.rpcWithReturnIndices(taskId, funcCls, lambda, returnCount, args);
}
private <T> List<T> doGet(List<UniqueID> objectIds, boolean isMetadata)
throws TaskExecutionException {
boolean wasBlocked = false;
UniqueID taskId = getCurrentTaskId();
try {
int numObjectIds = objectIds.size();
// Do an initial fetch for remote objects.
dividedFetch(objectIds);
List<List<UniqueID>> fetchBatches =
splitIntoBatches(objectIds, params.worker_fetch_request_size);
for (List<UniqueID> batch : fetchBatches) {
if (!params.use_raylet) {
objectStoreProxy.fetch(batch);
} else {
localSchedulerProxy.reconstructObjects(batch, true);
}
}
// Get the objects. We initially try to get the objects immediately.
List<Pair<T, GetStatus>> ret = objectStoreProxy
.get(objectIds, params.default_first_check_timeout_ms, isMetadata);
assert ret.size() == numObjectIds;
// mapping the object IDs that we haven't gotten yet to their original index in objectIds
// Mapping the object IDs that we haven't gotten yet to their original index in objectIds.
Map<UniqueID, Integer> unreadys = new HashMap<>();
for (int i = 0; i < numObjectIds; i++) {
if (ret.get(i).getRight() != GetStatus.SUCCESS) {
@ -301,15 +320,22 @@ public abstract class RayRuntime implements RayApi {
// Try reconstructing any objects we haven't gotten yet. Try to get them
// until at least PlasmaLink.GET_TIMEOUT_MS milliseconds passes, then repeat.
while (unreadys.size() > 0) {
for (UniqueID id : unreadys.keySet()) {
localSchedulerProxy.reconstructObject(id);
}
// Do another fetch for objects that aren't available locally yet, in case
// they were evicted since the last fetch.
List<UniqueID> unreadyList = new ArrayList<>(unreadys.keySet());
List<List<UniqueID>> reconstructBatches =
splitIntoBatches(unreadyList, params.worker_fetch_request_size);
dividedFetch(unreadyList);
for (List<UniqueID> batch : reconstructBatches) {
if (!params.use_raylet) {
for (UniqueID objectId : batch) {
localSchedulerProxy.reconstructObject(objectId, false);
}
// Do another fetch for objects that aren't available locally yet, in case
// they were evicted since the last fetch.
objectStoreProxy.fetch(batch);
} else {
localSchedulerProxy.reconstructObjects(batch, false);
}
}
List<Pair<T, GetStatus>> results = objectStoreProxy
.get(unreadyList, params.default_get_check_interval_ms, isMetadata);
@ -329,9 +355,11 @@ public abstract class RayRuntime implements RayApi {
RayLog.core
.debug("Task " + taskId + " Objects " + Arrays.toString(objectIds.toArray()) + " get");
List<T> finalRet = new ArrayList<>();
for (Pair<T, GetStatus> value : ret) {
finalRet.add(value.getLeft());
}
return finalRet;
} catch (TaskExecutionException e) {
RayLog.core.error("Task " + taskId + " Objects " + Arrays.toString(objectIds.toArray())
@ -344,68 +372,30 @@ public abstract class RayRuntime implements RayApi {
localSchedulerProxy.notifyUnblocked();
}
}
}
private <T> T doGet(UniqueID objectId, boolean isMetadata) throws TaskExecutionException {
ImmutableList<UniqueID> objectIds = ImmutableList.of(objectId);
List<T> results = doGet(objectIds, isMetadata);
boolean wasBlocked = false;
UniqueID taskId = getCurrentTaskId();
try {
// Do an initial fetch.
objectStoreProxy.fetch(objectId);
// Get the object. We initially try to get the object immediately.
Pair<T, GetStatus> ret = objectStoreProxy
.get(objectId, params.default_first_check_timeout_ms, isMetadata);
wasBlocked = (ret.getRight() != GetStatus.SUCCESS);
// Try reconstructing the object. Try to get it until at least PlasmaLink.GET_TIMEOUT_MS
// milliseconds passes, then repeat.
while (ret.getRight() != GetStatus.SUCCESS) {
RayLog.core.warn(
"Task " + taskId + " Object " + objectId.toString() + " get failed, reconstruct ...");
localSchedulerProxy.reconstructObject(objectId);
// Do another fetch
objectStoreProxy.fetch(objectId);
ret = objectStoreProxy.get(objectId, params.default_get_check_interval_ms,
isMetadata);//check the result every 5s, but it will return once available
}
RayLog.core.debug(
"Task " + taskId + " Object " + objectId.toString() + " get" + ", the result " + ret
.getLeft());
return ret.getLeft();
} catch (TaskExecutionException e) {
RayLog.core
.error("Task " + taskId + " Object " + objectId.toString() + " get with Exception", e);
throw e;
} finally {
// If the object was not able to get locally, let the local scheduler
// know that we're now unblocked.
if (wasBlocked) {
localSchedulerProxy.notifyUnblocked();
}
}
assert results.size() == 1;
return results.get(0);
}
// We divide the fetch into smaller fetches so as to not block the manager
// for a prolonged period of time in a single call.
private void dividedFetch(List<UniqueID> objectIds) {
int fetchSize = objectStoreProxy.getFetchSize();
private List<List<UniqueID>> splitIntoBatches(List<UniqueID> objectIds, int batchSize) {
List<List<UniqueID>> batches = new ArrayList<>();
int objectsSize = objectIds.size();
int numObjectIds = objectIds.size();
for (int i = 0; i < numObjectIds; i += fetchSize) {
int endIndex = i + fetchSize;
if (endIndex < numObjectIds) {
objectStoreProxy.fetch(objectIds.subList(i, endIndex));
} else {
objectStoreProxy.fetch(objectIds.subList(i, numObjectIds));
}
for (int i = 0; i < objectsSize; i += batchSize) {
int endIndex = i + batchSize;
List<UniqueID> batchIds = (endIndex < objectsSize)
? objectIds.subList(i, endIndex)
: objectIds.subList(i, objectsSize);
batches.add(batchIds);
}
return batches;
}
/**

View file

@ -112,6 +112,18 @@ public class RayParameters {
@AConfig(comment = "delay seconds under onebox before app logic for debugging")
public int onebox_delay_seconds_before_run_app_logic = 0;
@AConfig(comment = "whether to use raylet")
public boolean use_raylet = false;
@AConfig(comment = "raylet socket name (e.g., /tmp/raylet1111")
public String raylet_socket_name = "";
@AConfig(comment = "raylet rpc listen port")
public int raylet_port = 35567;
@AConfig(comment = "worker fetch request size")
public int worker_fetch_request_size = 10000;
public RayParameters(ConfigReader config) {
if (null != config) {
String networkInterface = config.getStringValue("ray.java", "network_interface", null,

View file

@ -1,5 +1,6 @@
package org.ray.spi;
import java.util.List;
import org.ray.api.UniqueID;
import org.ray.spi.model.TaskSpec;
@ -14,7 +15,11 @@ public interface LocalSchedulerLink {
void markTaskPutDependency(UniqueID taskId, UniqueID objectId);
void reconstructObject(UniqueID objectId);
void reconstructObject(UniqueID objectId, boolean fetchOnly);
void reconstructObjects(List<UniqueID> objectIds, boolean fetchOnly);
void notifyUnblocked();
List<byte[]> wait(byte[][] objectIds, int timeoutMs, int numReturns);
}

View file

@ -1,13 +1,17 @@
package org.ray.spi;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import org.ray.api.RayList;
import org.ray.api.RayMap;
import org.ray.api.RayObject;
import org.ray.api.RayObjects;
import org.ray.api.UniqueID;
import org.ray.api.WaitResult;
import org.ray.core.ArgumentsBuilder;
import org.ray.core.UniqueIdHelper;
import org.ray.core.WorkerContext;
@ -124,11 +128,44 @@ public class LocalSchedulerProxy {
scheduler.markTaskPutDependency(taskId, objectId);
}
public void reconstructObject(UniqueID objectId) {
scheduler.reconstructObject(objectId);
public void reconstructObject(UniqueID objectId, boolean fetchOnly) {
scheduler.reconstructObject(objectId, fetchOnly);
}
public void reconstructObjects(List<UniqueID> objectIds, boolean fetchOnly) {
scheduler.reconstructObjects(objectIds, fetchOnly);
}
public void notifyUnblocked() {
scheduler.notifyUnblocked();
}
private static byte[][] getIdBytes(List<UniqueID> objectIds) {
int size = objectIds.size();
byte[][] ids = new byte[size][];
for (int i = 0; i < size; i++) {
ids[i] = objectIds.get(i).getBytes();
}
return ids;
}
public <T> WaitResult<T> wait(RayList<T> waitfor, int numReturns, int timeout) {
List<UniqueID> ids = new ArrayList<>();
for (RayObject<T> obj : waitfor.Objects()) {
ids.add(obj.getId());
}
List<byte[]> readys = scheduler.wait(getIdBytes(ids), timeout, numReturns);
RayList<T> readyObjs = new RayList<>();
RayList<T> remainObjs = new RayList<>();
for (RayObject<T> obj : waitfor.Objects()) {
if (readys.contains(obj.getId().getBytes())) {
readyObjs.add(obj);
} else {
remainObjs.add(obj);
}
}
return new WaitResult<>(readyObjs, remainObjs);
}
}

View file

@ -10,6 +10,7 @@ import org.ray.api.UniqueID;
import org.ray.api.WaitResult;
import org.ray.core.Serializer;
import org.ray.core.WorkerContext;
import org.ray.spi.LocalSchedulerLink;
import org.ray.util.exception.TaskExecutionException;
/**
@ -19,12 +20,19 @@ import org.ray.util.exception.TaskExecutionException;
public class ObjectStoreProxy {
private final ObjectStoreLink store;
private final LocalSchedulerLink localSchedulerLink;
private final int getTimeoutMs = 1000;
public ObjectStoreProxy(ObjectStoreLink store) {
this.store = store;
this.localSchedulerLink = null;
}
public ObjectStoreProxy(ObjectStoreLink store, LocalSchedulerLink localSchedulerLink) {
this.store = store;
this.localSchedulerLink = localSchedulerLink;
}
public <T> Pair<T, GetStatus> get(UniqueID objectId, boolean isMetadata)
throws TaskExecutionException {
return get(objectId, getTimeoutMs, isMetadata);
@ -88,7 +96,12 @@ public class ObjectStoreProxy {
for (RayObject<T> obj : waitfor.Objects()) {
ids.add(obj.getId());
}
List<byte[]> readys = store.wait(getIdBytes(ids), timeout, numReturns);
List<byte[]> readys;
if (localSchedulerLink == null) {
readys = store.wait(getIdBytes(ids), timeout, numReturns);
} else {
readys = localSchedulerLink.wait(getIdBytes(ids), timeout, numReturns);
}
RayList<T> readyObjs = new RayList<>();
RayList<T> remainObjs = new RayList<>();
@ -103,19 +116,14 @@ public class ObjectStoreProxy {
return new WaitResult<>(readyObjs, remainObjs);
}
public void fetch(UniqueID objectId) {
store.fetch(objectId.getBytes());
}
public void fetch(List<UniqueID> objectIds) {
store.fetch(getIdBytes(objectIds));
if (localSchedulerLink == null) {
store.fetch(getIdBytes(objectIds));
} else {
localSchedulerLink.reconstructObjects(objectIds, true);
}
}
public int getFetchSize() {
return 10000;
}
public enum GetStatus {
SUCCESS, FAILED
}

View file

@ -37,6 +37,9 @@ public class PathConfig {
@AConfig(comment = "path to global scheduler")
public String global_scheduler;
@AConfig(comment = "path to raylet")
public String raylet;
@AConfig(comment = "path to python directory")
public String python_dir;

View file

@ -8,9 +8,11 @@ public class AddressInfo {
public String managerName;
public String storeName;
public String schedulerName;
public String rayletSocketName;
public int managerPort;
public int workerCount;
public String managerRpcAddr;
public String storeRpcAddr;
public String schedulerRpcAddr;
public String rayletRpcAddr;
}

View file

@ -1,5 +1,4 @@
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xmlns="http://maven.apache.org/POM/4.0.0"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">

View file

@ -1,5 +1,6 @@
package org.ray.spi.impl;
import java.util.List;
import java.util.Map;
import java.util.concurrent.ConcurrentHashMap;
import org.ray.api.UniqueID;
@ -74,7 +75,12 @@ public class MockLocalScheduler implements LocalSchedulerLink {
}
@Override
public void reconstructObject(UniqueID objectId) {
public void reconstructObject(UniqueID objectId, boolean fetchOnly) {
}
@Override
public void reconstructObjects(List<UniqueID> objectIds, boolean fetchOnly) {
}
@ -82,4 +88,9 @@ public class MockLocalScheduler implements LocalSchedulerLink {
public void notifyUnblocked() {
}
@Override
public List<byte[]> wait(byte[][] objectIds, int timeoutMs, int numReturns) {
return store.wait(objectIds, timeoutMs, numReturns);
}
}

View file

@ -25,8 +25,9 @@ import org.ray.spi.RemoteFunctionManager;
import org.ray.spi.StateStoreProxy;
import org.ray.spi.impl.DefaultLocalSchedulerClient;
import org.ray.spi.impl.NativeRemoteFunctionManager;
import org.ray.spi.impl.NonRayletStateStoreProxyImpl;
import org.ray.spi.impl.RayletStateStoreProxyImpl;
import org.ray.spi.impl.RedisClient;
import org.ray.spi.impl.StateStoreProxyImpl;
import org.ray.spi.model.AddressInfo;
import org.ray.util.exception.TaskExecutionException;
import org.ray.util.logger.RayLog;
@ -62,14 +63,19 @@ public class RayNativeRuntime extends RayRuntime {
throw new Error("Redis address must be configured under Worker mode.");
}
startOnebox(params, pathConfig);
initStateStore(params.redis_address);
initStateStore(params.redis_address, params.use_raylet);
} else {
initStateStore(params.redis_address);
initStateStore(params.redis_address, params.use_raylet);
if (!isWorker) {
List<AddressInfo> nodes = stateStoreProxy.getAddressInfo(params.node_ip_address, 5);
List<AddressInfo> nodes = stateStoreProxy.getAddressInfo(
params.node_ip_address, params.redis_address, 5);
params.object_store_name = nodes.get(0).storeName;
params.object_store_manager_name = nodes.get(0).managerName;
params.local_scheduler_name = nodes.get(0).schedulerName;
if (!params.use_raylet) {
params.object_store_manager_name = nodes.get(0).managerName;
params.local_scheduler_name = nodes.get(0).schedulerName;
} else {
params.raylet_socket_name = nodes.get(0).rayletSocketName;
}
}
}
@ -101,23 +107,45 @@ public class RayNativeRuntime extends RayRuntime {
.getIntegerValue("ray", "plasma_default_release_delay", 0,
"how many release requests should be delayed in plasma client");
ObjectStoreLink plink = new PlasmaClient(params.object_store_name, params
.object_store_manager_name, releaseDelay);
if (!params.use_raylet) {
ObjectStoreLink plink = new PlasmaClient(params.object_store_name,
params.object_store_manager_name, releaseDelay);
LocalSchedulerLink slink = new DefaultLocalSchedulerClient(
params.local_scheduler_name,
WorkerContext.currentWorkerId(),
UniqueID.nil,
isWorker,
WorkerContext.currentTask().taskId,
0
);
LocalSchedulerLink slink = new DefaultLocalSchedulerClient(
params.local_scheduler_name,
WorkerContext.currentWorkerId(),
UniqueID.nil,
isWorker,
WorkerContext.currentTask().taskId,
0,
false
);
init(slink, plink, funcMgr, pathConfig);
init(slink, plink, funcMgr, pathConfig);
// register
registerWorker(isWorker, params.node_ip_address, params.object_store_name,
params.object_store_manager_name, params.local_scheduler_name);
// register
registerWorker(isWorker, params.node_ip_address, params.object_store_name,
params.object_store_manager_name, params.local_scheduler_name);
} else {
ObjectStoreLink plink = new PlasmaClient(params.object_store_name, "", releaseDelay);
LocalSchedulerLink slink = new DefaultLocalSchedulerClient(
params.raylet_socket_name,
WorkerContext.currentWorkerId(),
UniqueID.nil,
isWorker,
WorkerContext.currentTask().taskId,
0,
true
);
init(slink, plink, funcMgr, pathConfig);
// register
registerWorker(isWorker, params.node_ip_address, params.object_store_name,
params.raylet_socket_name);
}
}
RayLog.core.info("RayNativeRuntime start with "
@ -152,19 +180,44 @@ public class RayNativeRuntime extends RayRuntime {
params.object_store_name = manager.info().localStores.get(0).storeName;
params.object_store_manager_name = manager.info().localStores.get(0).managerName;
params.local_scheduler_name = manager.info().localStores.get(0).schedulerName;
params.raylet_socket_name = manager.info().localStores.get(0).rayletSocketName;
//params.node_ip_address = NetworkUtil.getIpAddress();
}
private void initStateStore(String redisAddress) throws Exception {
private void initStateStore(String redisAddress, boolean useRaylet) throws Exception {
kvStore = new RedisClient();
kvStore.setAddr(redisAddress);
stateStoreProxy = new StateStoreProxyImpl(kvStore);
stateStoreProxy = useRaylet
? new RayletStateStoreProxyImpl(kvStore)
: new NonRayletStateStoreProxyImpl(kvStore);
//stateStoreProxy.setStore(kvStore);
stateStoreProxy.initializeGlobalState();
}
private void registerWorker(boolean isWorker, String nodeIpAddress, String storeName,
String managerName, String schedulerName) {
String rayletSocketName) {
Map<String, String> workerInfo = new HashMap<>();
String workerId = new String(WorkerContext.currentWorkerId().getBytes());
if (!isWorker) {
workerInfo.put("node_ip_address", nodeIpAddress);
workerInfo.put("driver_id", workerId);
workerInfo.put("start_time", String.valueOf(System.currentTimeMillis()));
workerInfo.put("plasma_store_socket", storeName);
workerInfo.put("raylet_socket", rayletSocketName);
workerInfo.put("name", System.getProperty("user.dir"));
//TODO: worker.redis_client.hmset(b"Drivers:" + worker.workerId, driver_info)
kvStore.hmset("Drivers:" + workerId, workerInfo);
} else {
workerInfo.put("node_ip_address", nodeIpAddress);
workerInfo.put("plasma_store_socket", storeName);
workerInfo.put("raylet_socket", rayletSocketName);
//TODO: b"Workers:" + worker.workerId,
kvStore.hmset("Workers:" + workerId, workerInfo);
}
}
private void registerWorker(boolean isWorker, String nodeIpAddress, String storeName,
String managerName, String schedulerName) {
Map<String, String> workerInfo = new HashMap<>();
String workerId = new String(WorkerContext.currentWorkerId().getBytes());
if (!isWorker) {

View file

@ -0,0 +1,79 @@
package org.ray.format.gcs;
// automatically generated by the FlatBuffers compiler, do not modify
import java.nio.*;
import java.lang.*;
import com.google.flatbuffers.*;
@SuppressWarnings("unused")
public final class ClientTableData extends Table {
public static ClientTableData getRootAsClientTableData(ByteBuffer _bb) { return getRootAsClientTableData(_bb, new ClientTableData()); }
public static ClientTableData getRootAsClientTableData(ByteBuffer _bb, ClientTableData obj) { _bb.order(ByteOrder.LITTLE_ENDIAN); return (obj.__assign(_bb.getInt(_bb.position()) + _bb.position(), _bb)); }
public void __init(int _i, ByteBuffer _bb) { bb_pos = _i; bb = _bb; }
public ClientTableData __assign(int _i, ByteBuffer _bb) { __init(_i, _bb); return this; }
public String clientId() { int o = __offset(4); return o != 0 ? __string(o + bb_pos) : null; }
public ByteBuffer clientIdAsByteBuffer() { return __vector_as_bytebuffer(4, 1); }
public ByteBuffer clientIdInByteBuffer(ByteBuffer _bb) { return __vector_in_bytebuffer(_bb, 4, 1); }
public String nodeManagerAddress() { int o = __offset(6); return o != 0 ? __string(o + bb_pos) : null; }
public ByteBuffer nodeManagerAddressAsByteBuffer() { return __vector_as_bytebuffer(6, 1); }
public ByteBuffer nodeManagerAddressInByteBuffer(ByteBuffer _bb) { return __vector_in_bytebuffer(_bb, 6, 1); }
public String rayletSocketName() { int o = __offset(8); return o != 0 ? __string(o + bb_pos) : null; }
public ByteBuffer rayletSocketNameAsByteBuffer() { return __vector_as_bytebuffer(8, 1); }
public ByteBuffer rayletSocketNameInByteBuffer(ByteBuffer _bb) { return __vector_in_bytebuffer(_bb, 8, 1); }
public String objectStoreSocketName() { int o = __offset(10); return o != 0 ? __string(o + bb_pos) : null; }
public ByteBuffer objectStoreSocketNameAsByteBuffer() { return __vector_as_bytebuffer(10, 1); }
public ByteBuffer objectStoreSocketNameInByteBuffer(ByteBuffer _bb) { return __vector_in_bytebuffer(_bb, 10, 1); }
public int nodeManagerPort() { int o = __offset(12); return o != 0 ? bb.getInt(o + bb_pos) : 0; }
public int objectManagerPort() { int o = __offset(14); return o != 0 ? bb.getInt(o + bb_pos) : 0; }
public boolean isInsertion() { int o = __offset(16); return o != 0 ? 0!=bb.get(o + bb_pos) : false; }
public String resourcesTotalLabel(int j) { int o = __offset(18); return o != 0 ? __string(__vector(o) + j * 4) : null; }
public int resourcesTotalLabelLength() { int o = __offset(18); return o != 0 ? __vector_len(o) : 0; }
public double resourcesTotalCapacity(int j) { int o = __offset(20); return o != 0 ? bb.getDouble(__vector(o) + j * 8) : 0; }
public int resourcesTotalCapacityLength() { int o = __offset(20); return o != 0 ? __vector_len(o) : 0; }
public ByteBuffer resourcesTotalCapacityAsByteBuffer() { return __vector_as_bytebuffer(20, 8); }
public ByteBuffer resourcesTotalCapacityInByteBuffer(ByteBuffer _bb) { return __vector_in_bytebuffer(_bb, 20, 8); }
public static int createClientTableData(FlatBufferBuilder builder,
int client_idOffset,
int node_manager_addressOffset,
int raylet_socket_nameOffset,
int object_store_socket_nameOffset,
int node_manager_port,
int object_manager_port,
boolean is_insertion,
int resources_total_labelOffset,
int resources_total_capacityOffset) {
builder.startObject(9);
ClientTableData.addResourcesTotalCapacity(builder, resources_total_capacityOffset);
ClientTableData.addResourcesTotalLabel(builder, resources_total_labelOffset);
ClientTableData.addObjectManagerPort(builder, object_manager_port);
ClientTableData.addNodeManagerPort(builder, node_manager_port);
ClientTableData.addObjectStoreSocketName(builder, object_store_socket_nameOffset);
ClientTableData.addRayletSocketName(builder, raylet_socket_nameOffset);
ClientTableData.addNodeManagerAddress(builder, node_manager_addressOffset);
ClientTableData.addClientId(builder, client_idOffset);
ClientTableData.addIsInsertion(builder, is_insertion);
return ClientTableData.endClientTableData(builder);
}
public static void startClientTableData(FlatBufferBuilder builder) { builder.startObject(9); }
public static void addClientId(FlatBufferBuilder builder, int clientIdOffset) { builder.addOffset(0, clientIdOffset, 0); }
public static void addNodeManagerAddress(FlatBufferBuilder builder, int nodeManagerAddressOffset) { builder.addOffset(1, nodeManagerAddressOffset, 0); }
public static void addRayletSocketName(FlatBufferBuilder builder, int rayletSocketNameOffset) { builder.addOffset(2, rayletSocketNameOffset, 0); }
public static void addObjectStoreSocketName(FlatBufferBuilder builder, int objectStoreSocketNameOffset) { builder.addOffset(3, objectStoreSocketNameOffset, 0); }
public static void addNodeManagerPort(FlatBufferBuilder builder, int nodeManagerPort) { builder.addInt(4, nodeManagerPort, 0); }
public static void addObjectManagerPort(FlatBufferBuilder builder, int objectManagerPort) { builder.addInt(5, objectManagerPort, 0); }
public static void addIsInsertion(FlatBufferBuilder builder, boolean isInsertion) { builder.addBoolean(6, isInsertion, false); }
public static void addResourcesTotalLabel(FlatBufferBuilder builder, int resourcesTotalLabelOffset) { builder.addOffset(7, resourcesTotalLabelOffset, 0); }
public static int createResourcesTotalLabelVector(FlatBufferBuilder builder, int[] data) { builder.startVector(4, data.length, 4); for (int i = data.length - 1; i >= 0; i--) builder.addOffset(data[i]); return builder.endVector(); }
public static void startResourcesTotalLabelVector(FlatBufferBuilder builder, int numElems) { builder.startVector(4, numElems, 4); }
public static void addResourcesTotalCapacity(FlatBufferBuilder builder, int resourcesTotalCapacityOffset) { builder.addOffset(8, resourcesTotalCapacityOffset, 0); }
public static int createResourcesTotalCapacityVector(FlatBufferBuilder builder, double[] data) { builder.startVector(8, data.length, 8); for (int i = data.length - 1; i >= 0; i--) builder.addDouble(data[i]); return builder.endVector(); }
public static void startResourcesTotalCapacityVector(FlatBufferBuilder builder, int numElems) { builder.startVector(8, numElems, 8); }
public static int endClientTableData(FlatBufferBuilder builder) {
int o = builder.endObject();
return o;
}
}

View file

@ -35,7 +35,7 @@ public class RunInfo {
public enum ProcessType {
PT_WORKER, PT_LOCAL_SCHEDULER, PT_PLASMA_MANAGER, PT_PLASMA_STORE,
PT_GLOBAL_SCHEDULER, PT_REDIS_SERVER, PT_WEB_UI,
PT_GLOBAL_SCHEDULER, PT_REDIS_SERVER, PT_WEB_UI, PT_RAYLET,
PT_DRIVER
}
}

View file

@ -48,7 +48,7 @@ public class RunManager {
private static boolean killProcess(Process p) {
if (p.isAlive()) {
p.destroyForcibly();
p.destroy();
return true;
} else {
return false;
@ -307,7 +307,7 @@ public class RunManager {
redisClient.close();
// start global scheduler
if (params.include_global_scheduler) {
if (params.include_global_scheduler && !params.use_raylet) {
startGlobalScheduler(params.working_directory + "/globalScheduler",
params.redis_address, params.node_ip_address, params.redirect, params.cleanup);
}
@ -340,49 +340,70 @@ public class RunManager {
}
}
// start object stores
for (int i = 0; i < params.num_local_schedulers; i++) {
AddressInfo info = new AddressInfo();
// store
startObjectStore(i, info, params.working_directory + "/store",
AddressInfo info = new AddressInfo();
if (params.use_raylet) {
// Start object store
int rpcPort = params.object_store_rpc_port;
String storeName = "/tmp/plasma_store" + rpcPort;
startObjectStore(0, info, params.working_directory + "/store",
params.redis_address, params.node_ip_address, params.redirect, params.cleanup);
// store manager
startObjectManager(i, info,
params.working_directory + "/storeManager", params.redis_address,
params.node_ip_address, params.redirect, params.cleanup);
//Start raylet
startRaylet(storeName, info, params.num_cpus[0],params.num_gpus[0],
params.num_workers,params.working_directory + "/raylet",
params.redis_address, params.node_ip_address, params.redirect, params.cleanup);
runInfo.localStores.add(info);
}
} else {
for (int i = 0; i < params.num_local_schedulers; i++) {
// Start object stores
startObjectStore(i, info, params.working_directory + "/store",
params.redis_address, params.node_ip_address, params.redirect, params.cleanup);
// start local scheduler
for (int i = 0; i < params.num_local_schedulers; i++) {
int workerCount = 0;
startObjectManager(i, info,
params.working_directory + "/storeManager", params.redis_address,
params.node_ip_address, params.redirect, params.cleanup);
if (params.start_workers_from_local_scheduler) {
workerCount = localNumWorkers[i];
localNumWorkers[i] = 0;
// Start local scheduler
int workerCount = 0;
if (params.start_workers_from_local_scheduler) {
workerCount = localNumWorkers[i];
localNumWorkers[i] = 0;
}
startLocalScheduler(i, info,
params.num_cpus[i], params.num_gpus[i], workerCount,
params.working_directory + "/localsc", params.redis_address,
params.node_ip_address, params.redirect, params.cleanup);
runInfo.localStores.add(info);
}
startLocalScheduler(i, runInfo.localStores.get(i),
params.num_cpus[i], params.num_gpus[i], workerCount,
params.working_directory + "/localScheduler", params.redis_address,
params.node_ip_address, params.redirect, params.cleanup);
}
// start local workers
for (int i = 0; i < params.num_local_schedulers; i++) {
runInfo.localStores.get(i).workerCount = localNumWorkers[i];
for (int j = 0; j < localNumWorkers[i]; j++) {
startWorker(runInfo.localStores.get(i).storeName,
runInfo.localStores.get(i).managerName, runInfo.localStores.get(i).schedulerName,
params.working_directory + "/worker" + i + "." + j, params.redis_address,
params.node_ip_address, UniqueID.nil, "",
params.redirect, params.cleanup);
if (!params.use_raylet) {
for (int i = 0; i < params.num_local_schedulers; i++) {
AddressInfo localStores = runInfo.localStores.get(i);
localStores.workerCount = localNumWorkers[i];
for (int j = 0; j < localNumWorkers[i]; j++) {
startWorker(localStores.storeName, localStores.managerName, localStores.schedulerName,
params.working_directory + "/worker" + i + "." + j, params.redis_address,
params.node_ip_address, UniqueID.nil, "", params.redirect, params.cleanup);
}
}
}
HashSet<RunInfo.ProcessType> excludeTypes = new HashSet<>();
if (!params.use_raylet) {
excludeTypes.add(RunInfo.ProcessType.PT_RAYLET);
} else {
excludeTypes.add(RunInfo.ProcessType.PT_LOCAL_SCHEDULER);
excludeTypes.add(RunInfo.ProcessType.PT_GLOBAL_SCHEDULER);
excludeTypes.add(RunInfo.ProcessType.PT_PLASMA_MANAGER);
}
if (!checkAlive(excludeTypes)) {
cleanup(true);
throw new RuntimeException("Start Ray processes failed");
@ -622,8 +643,8 @@ public class RunManager {
cmd += " -m " + info.managerName;
String workerCmd = null;
workerCmd = buildWorkerCommand(true, info.storeName, info.managerName, name, UniqueID.nil,
"", workDir + rpcPort, ip, redisAddress);
workerCmd = buildWorkerCommand(true, info.storeName, info.managerName, name,
UniqueID.nil, "", workDir + rpcPort, ip, redisAddress);
cmd += " -w \"" + workerCmd + "\"";
if (redisAddress.length() > 0) {
@ -656,6 +677,82 @@ public class RunManager {
}
}
private void startRaylet(String storeName, AddressInfo info, int numCpus,
int numGpus, int numWorkers, String workDir,
String redisAddress, String ip, boolean redirect,
boolean cleanup) {
int rpcPort = params.raylet_port;
String rayletSocketName = "/tmp/raylet" + rpcPort;
String filePath = paths.raylet;
String workerCmd = null;
workerCmd = buildWorkerCommandRaylet(info.storeName, rayletSocketName, UniqueID.nil,
"", workDir + rpcPort, ip, redisAddress);
int sep = redisAddress.indexOf(':');
assert (sep != -1);
String gcsIp = redisAddress.substring(0, sep);
String gcsPort = redisAddress.substring(sep + 1);
String resourceArgument = "GPU," + numGpus + ",CPU," + numCpus;
String[] cmds = new String[]{filePath, rayletSocketName, storeName, ip, gcsIp,
gcsPort, "" + numWorkers, workerCmd, resourceArgument};
Process p = startProcess(cmds, null, RunInfo.ProcessType.PT_RAYLET,
workDir + rpcPort, redisAddress, ip, redirect, cleanup);
if (p != null && p.isAlive()) {
try {
TimeUnit.MILLISECONDS.sleep(100);
} catch (InterruptedException e) {
e.printStackTrace();
}
}
if (p == null || !p.isAlive()) {
info.rayletSocketName = "";
info.rayletRpcAddr = "";
throw new RuntimeException("Failed to start raylet process.");
} else {
info.rayletSocketName = rayletSocketName;
info.rayletRpcAddr = ip + ":" + rpcPort;
}
}
private String buildWorkerCommandRaylet(String storeName, String rayletSocketName,
UniqueID actorId, String actorClass, String workDir,
String ip, String redisAddress) {
String workerConfigs = "ray.java.start.object_store_name=" + storeName
+ ";ray.java.start.raylet_socket_name=" + rayletSocketName
+ ";ray.java.start.worker_mode=WORKER;ray.java.start.use_raylet=true";
workerConfigs += ";ray.java.start.deploy=" + params.deploy;
if (!actorId.equals(UniqueID.nil)) {
workerConfigs += ";ray.java.start.actor_id=" + actorId;
}
if (!actorClass.equals("")) {
workerConfigs += ";ray.java.start.driver_class=" + actorClass;
}
String jvmArgs = "";
jvmArgs += " -Dlogging.path=" + params.working_directory + "/logs/workers";
jvmArgs += " -Dlogging.file.name=core-*pid_suffix*";
return buildJavaProcessCommand(
RunInfo.ProcessType.PT_WORKER,
"org.ray.runner.worker.DefaultWorker",
"",
workerConfigs,
jvmArgs,
workDir,
ip,
redisAddress,
null
);
}
private String buildWorkerCommand(boolean isFromLocalScheduler, String storeName,
String storeManagerName, String localSchedulerName,
UniqueID actorId, String actorClass, String workDir, String

View file

@ -103,6 +103,15 @@ public interface KeyValueStoreLink {
*/
List<String> lrange(final String key, final long start, final long end);
/**
* Return the set of elements of the sorted set stored at the specified key.
* @param key The specified key you want to query.
* @param start The start index of the range.
* @param end The end index of the range.
* @return The set of elements you queried.
*/
Set<byte[]> zrange(byte[] key, long start, long end);
/**
* Rpush.
* @return Integer reply, specifically, the number of elements inside the list after the push
@ -123,4 +132,7 @@ public interface KeyValueStoreLink {
Long publish(byte[] channel, byte[] message);
Object getImpl();
byte[] sendCommand(String command, int commandType, byte[] objectId);
}

View file

@ -31,5 +31,7 @@ public interface StateStoreProxy {
* getAddressInfo.
* @return list of address information
*/
List<AddressInfo> getAddressInfo(final String nodeIpAddress, int numRetries);
List<AddressInfo> getAddressInfo(final String nodeIpAddress,
final String redisAddress,
int numRetries);
}

View file

@ -0,0 +1,124 @@
package org.ray.spi.impl;
import java.io.UnsupportedEncodingException;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.TimeUnit;
import org.ray.spi.KeyValueStoreLink;
import org.ray.spi.StateStoreProxy;
import org.ray.spi.model.AddressInfo;
import org.ray.util.logger.RayLog;
/**
* Base class used to interface with the Ray control state.
*/
public abstract class BaseStateStoreProxyImpl implements StateStoreProxy {
public KeyValueStoreLink rayKvStore;
public ArrayList<KeyValueStoreLink> shardStoreList = new ArrayList<>();
public BaseStateStoreProxyImpl(KeyValueStoreLink rayKvStore) {
this.rayKvStore = rayKvStore;
}
@Override
public void setStore(KeyValueStoreLink rayKvStore) {
this.rayKvStore = rayKvStore;
}
@Override
public synchronized void initializeGlobalState() throws Exception {
String es;
checkConnected();
String s = rayKvStore.get("NumRedisShards", null);
if (s == null) {
throw new Exception("NumRedisShards not found in redis.");
}
int numRedisShards = Integer.parseInt(s);
if (numRedisShards < 1) {
es = String.format("Expected at least one Redis shard, found %d", numRedisShards);
throw new Exception(es);
}
List<String> ipAddressPorts = rayKvStore.lrange("RedisShards", 0, -1);
Set<String> distinctIpAddress = new HashSet<String>(ipAddressPorts);
if (distinctIpAddress.size() != numRedisShards) {
es = String.format("Expected %d Redis shard addresses, found2 %d.", numRedisShards,
distinctIpAddress.size());
throw new Exception(es);
}
shardStoreList.clear();
for (String ipPort : distinctIpAddress) {
shardStoreList.add(new RedisClient(ipPort));
}
}
public void checkConnected() throws Exception {
rayKvStore.checkConnected();
}
@Override
public synchronized Set<String> keys(final String pattern) {
Set<String> allKeys = new HashSet<>();
Set<String> tmpKey;
for (KeyValueStoreLink ashardStoreList : shardStoreList) {
tmpKey = ashardStoreList.keys(pattern);
allKeys.addAll(tmpKey);
}
return allKeys;
}
@Override
public List<AddressInfo> getAddressInfo(final String nodeIpAddress,
final String redisAddress,
int numRetries) {
int count = 0;
while (count < numRetries) {
try {
return doGetAddressInfo(nodeIpAddress, redisAddress);
} catch (Exception e) {
try {
RayLog.core.warn("Error occurred in BaseStateStoreProxyImpl getAddressInfo, "
+ (numRetries - count) + " retries remaining", e);
TimeUnit.MILLISECONDS.sleep(1000);
} catch (InterruptedException ie) {
RayLog.core.error("error at BaseStateStoreProxyImpl getAddressInfo", e);
throw new RuntimeException(e);
}
}
count++;
}
throw new RuntimeException("cannot get address info from state store");
}
/**
* Get address info of one node from primary redis.
* This method only tries to get address info once, without any retry.
*
* @param nodeIpAddress Usually local ip address.
* @param redisAddress The primary redis address.
* @return A list of SchedulerInfo which contains node manager or local scheduler address info.
* @throws Exception No redis client exception.
*/
protected abstract List<AddressInfo> doGetAddressInfo(final String nodeIpAddress,
final String redisAddress) throws Exception;
protected String charsetDecode(byte[] bs, String charset) throws UnsupportedEncodingException {
return new String(bs, charset);
}
protected byte[] charsetEncode(String str, String charset) throws UnsupportedEncodingException {
if (str != null) {
return str.getBytes(charset);
}
return null;
}
}

View file

@ -24,20 +24,44 @@ public class DefaultLocalSchedulerClient implements LocalSchedulerLink {
return bb;
});
private long client = 0;
boolean useRaylet = false;
public DefaultLocalSchedulerClient(String schedulerSockName, UniqueID clientId, UniqueID actorId,
boolean isWorker, UniqueID driverId, long numGpus) {
public DefaultLocalSchedulerClient(String schedulerSockName, UniqueID clientId,
UniqueID actorId, boolean isWorker, UniqueID driverId,
long numGpus, boolean useRaylet) {
client = _init(schedulerSockName, clientId.getBytes(), actorId.getBytes(), isWorker,
driverId.getBytes(), numGpus);
driverId.getBytes(), numGpus, useRaylet);
this.useRaylet = useRaylet;
}
private static native long _init(String localSchedulerSocket, byte[] workerId, byte[] actorId,
boolean isWorker, byte[] driverTaskId, long numGpus);
private static native long _init(String localSchedulerSocket, byte[] workerId,
byte[] actorId, boolean isWorker, byte[] driverTaskId,
long numGpus, boolean useRaylet);
private static native byte[] _computePutId(long client, byte[] taskId, int putIndex);
private static native void _task_done(long client);
private static native boolean[] _waitObject(long conn, byte[][] objectIds,
int numReturns, int timeout, boolean waitLocal);
@Override
public List<byte[]> wait(byte[][] objectIds, int timeoutMs, int numReturns) {
assert (useRaylet == true);
boolean[] readys = _waitObject(client, objectIds, numReturns, timeoutMs, false);
List<byte[]> ret = new ArrayList<>();
for (int i = 0; i < readys.length; i++) {
if (readys[i]) {
ret.add(objectIds[i]);
}
}
assert (ret.size() == readys.length);
return ret;
}
@Override
public void submitTask(TaskSpec task) {
ByteBuffer info = taskSpec2Info(task);
@ -45,12 +69,13 @@ public class DefaultLocalSchedulerClient implements LocalSchedulerLink {
if (!task.actorId.isNil()) {
a = task.cursorId.getBytes();
}
_submitTask(client, a, info, info.position(), info.remaining());
_submitTask(client, a, info, info.position(), info.remaining(), useRaylet);
}
@Override
public TaskSpec getTaskTodo() {
byte[] bytes = _getTaskTodo(client);
byte[] bytes = _getTaskTodo(client, useRaylet);
assert (null != bytes);
ByteBuffer bb = ByteBuffer.wrap(bytes);
return taskInfo2Spec(bb);
@ -62,8 +87,16 @@ public class DefaultLocalSchedulerClient implements LocalSchedulerLink {
}
@Override
public void reconstructObject(UniqueID objectId) {
_reconstruct_object(client, objectId.getBytes());
public void reconstructObject(UniqueID objectId, boolean fetchOnly) {
List<UniqueID> objects = new ArrayList<>();
objects.add(objectId);
_reconstruct_objects(client, getIdBytes(objects), fetchOnly);
}
@Override
public void reconstructObjects(List<UniqueID> objectIds, boolean fetchOnly) {
RayLog.core.info("reconstruct objects {}", objectIds);
_reconstruct_objects(client, getIdBytes(objectIds), fetchOnly);
}
@Override
@ -73,12 +106,13 @@ public class DefaultLocalSchedulerClient implements LocalSchedulerLink {
private static native void _notify_unblocked(long client);
private static native void _reconstruct_object(long client, byte[] objectId);
private static native void _reconstruct_objects(long client, byte[][] objectIds,
boolean fetchOnly);
private static native void _put_object(long client, byte[] taskId, byte[] objectId);
// return TaskInfo (in FlatBuffer)
private static native byte[] _getTaskTodo(long client);
private static native byte[] _getTaskTodo(long client, boolean useRaylet);
public static TaskSpec taskInfo2Spec(ByteBuffer bb) {
bb.order(ByteOrder.LITTLE_ENDIAN);
@ -162,7 +196,10 @@ public class DefaultLocalSchedulerClient implements LocalSchedulerLink {
idOffsets[k] = fbb.createString(task.args[i].ids.get(k).toByteBuffer());
}
objectIdOffset = fbb.createVectorOfTables(idOffsets);
} else {
objectIdOffset = fbb.createVectorOfTables(new int[0]);
}
if (task.args[i].data != null) {
dataOffset = fbb.createString(ByteBuffer.wrap(task.args[i].data));
}
@ -214,8 +251,17 @@ public class DefaultLocalSchedulerClient implements LocalSchedulerLink {
}
// task -> TaskInfo (with FlatBuffer)
private static native void _submitTask(long client, byte[] cursorId, /*Direct*/ByteBuffer task,
int pos, int sz);
protected static native void _submitTask(long client, byte[] cursorId, /*Direct*/ByteBuffer task,
int pos, int sz, boolean useRaylet);
private static byte[][] getIdBytes(List<UniqueID> objectIds) {
int size = objectIds.size();
byte[][] ids = new byte[size][];
for (int i = 0; i < size; i++) {
ids[i] = objectIds.get(i).getBytes();
}
return ids;
}
public void destroy() {
_destroy(client);

View file

@ -1,96 +1,18 @@
package org.ray.spi.impl;
import java.io.UnsupportedEncodingException;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.TimeUnit;
import org.ray.spi.KeyValueStoreLink;
import org.ray.spi.StateStoreProxy;
import org.ray.spi.model.AddressInfo;
import org.ray.util.logger.RayLog;
/**
* A class used to interface with the Ray control state.
* A class used to interface with the Ray control state for non-raylet.
*/
public class StateStoreProxyImpl implements StateStoreProxy {
public KeyValueStoreLink rayKvStore;
public ArrayList<KeyValueStoreLink> shardStoreList = new ArrayList<>();
public StateStoreProxyImpl(KeyValueStoreLink rayKvStore) {
this.rayKvStore = rayKvStore;
}
public void setStore(KeyValueStoreLink rayKvStore) {
this.rayKvStore = rayKvStore;
}
public synchronized void initializeGlobalState() throws Exception {
String es;
checkConnected();
String s = rayKvStore.get("NumRedisShards", null);
if (s == null) {
throw new Exception("NumRedisShards not found in redis.");
}
int numRedisShards = Integer.parseInt(s);
if (numRedisShards < 1) {
es = String.format("Expected at least one Redis shard, found %d", numRedisShards);
throw new Exception(es);
}
List<String> ipAddressPorts = rayKvStore.lrange("RedisShards", 0, -1);
if (ipAddressPorts.size() != numRedisShards) {
es = String.format("Expected %d Redis shard addresses, found %d.", numRedisShards,
ipAddressPorts.size());
throw new Exception(es);
}
shardStoreList.clear();
for (String ipPort : ipAddressPorts) {
shardStoreList.add(new RedisClient(ipPort));
}
}
public void checkConnected() throws Exception {
rayKvStore.checkConnected();
}
public synchronized Set<String> keys(final String pattern) {
Set<String> allKeys = new HashSet<>();
Set<String> tmpKey;
for (KeyValueStoreLink ashardStoreList : shardStoreList) {
tmpKey = ashardStoreList.keys(pattern);
allKeys.addAll(tmpKey);
}
return allKeys;
}
public List<AddressInfo> getAddressInfo(final String nodeIpAddress, int numRetries) {
int count = 0;
while (count < numRetries) {
try {
return getAddressInfoHelper(nodeIpAddress);
} catch (Exception e) {
try {
RayLog.core.warn("Error occurred in StateStoreProxyImpl getAddressInfo, "
+ (numRetries - count) + " retries remaining", e);
TimeUnit.MILLISECONDS.sleep(1000);
} catch (InterruptedException ie) {
RayLog.core.error("error at StateStoreProxyImpl getAddressInfo", e);
throw new RuntimeException(e);
}
}
count++;
}
throw new RuntimeException("cannot get address info from state store");
public class NonRayletStateStoreProxyImpl extends BaseStateStoreProxyImpl {
public NonRayletStateStoreProxyImpl(KeyValueStoreLink rayKvStore) {
super(rayKvStore);
}
/*
@ -108,9 +30,11 @@ public class StateStoreProxyImpl implements StateStoreProxy {
* "manager_socket_name"(op)
* "local_scheduler_socket_name"(op)
*/
public List<AddressInfo> getAddressInfoHelper(final String nodeIpAddress) throws Exception {
@Override
public List<AddressInfo> doGetAddressInfo(final String nodeIpAddress,
final String redisAddress) throws Exception {
if (this.rayKvStore == null) {
throw new Exception("no redis client when use getAddressInfoHelper");
throw new Exception("no redis client when use doGetAddressInfo");
}
List<AddressInfo> schedulerInfo = new ArrayList<>();
@ -136,13 +60,13 @@ public class StateStoreProxyImpl implements StateStoreProxy {
} else if (!info.containsKey("client_type".getBytes())) {
throw new Exception("no client_type in any client");
}
if (charsetDecode(info.get("node_ip_address".getBytes()), "US-ASCII")
.equals(nodeIpAddress)) {
String clientType = charsetDecode(info.get("client_type".getBytes()), "US-ASCII");
if (clientType.equals("plasma_manager")) {
if ("plasma_manager".equals(clientType)) {
plasmaManager.add(info);
} else if (clientType.equals("local_scheduler")) {
} else if ("local_scheduler".equals(clientType)) {
localScheduler.add(info);
}
}
@ -157,9 +81,9 @@ public class StateStoreProxyImpl implements StateStoreProxy {
for (int i = 0; i < plasmaManager.size(); i++) {
AddressInfo si = new AddressInfo();
si.storeName = charsetDecode(plasmaManager.get(i).get("store_socket_name".getBytes()),
"US-ASCII");
"US-ASCII");
si.managerName = charsetDecode(plasmaManager.get(i).get("manager_socket_name".getBytes()),
"US-ASCII");
"US-ASCII");
byte[] rpc = plasmaManager.get(i).get("manager_rpc_name".getBytes());
if (rpc != null) {
@ -188,14 +112,4 @@ public class StateStoreProxyImpl implements StateStoreProxy {
return schedulerInfo;
}
private String charsetDecode(byte[] bs, String charset) throws UnsupportedEncodingException {
return new String(bs, charset);
}
private byte[] charsetEncode(String str, String charset) throws UnsupportedEncodingException {
if (str != null) {
return str.getBytes(charset);
}
return null;
}
}

View file

@ -0,0 +1,62 @@
package org.ray.spi.impl;
import java.nio.ByteBuffer;
import java.util.ArrayList;
import java.util.List;
import java.util.Objects;
import java.util.Set;
import org.ray.api.UniqueID;
import org.ray.format.gcs.ClientTableData;
import org.ray.spi.KeyValueStoreLink;
import org.ray.spi.model.AddressInfo;
import org.ray.util.NetworkUtil;
/**
* A class used to interface with the Ray control state for raylet.
*/
public class RayletStateStoreProxyImpl extends BaseStateStoreProxyImpl {
public RayletStateStoreProxyImpl(KeyValueStoreLink rayKvStore) {
super(rayKvStore);
}
@Override
public List<AddressInfo> doGetAddressInfo(final String nodeIpAddress,
final String redisAddress) throws Exception {
if (this.rayKvStore == null) {
throw new Exception("no redis client when use doGetAddressInfo");
}
List<AddressInfo> schedulerInfo = new ArrayList<>();
byte[] prefix = "CLIENT".getBytes();
byte[] postfix = UniqueID.genNil().getBytes();
byte[] clientKey = new byte[prefix.length + postfix.length];
System.arraycopy(prefix, 0, clientKey, 0, prefix.length);
System.arraycopy(postfix, 0, clientKey, prefix.length, postfix.length);
Set<byte[]> clients = rayKvStore.zrange(clientKey, 0, -1);
for (byte[] clientMessage : clients) {
ByteBuffer bb = ByteBuffer.wrap(clientMessage);
ClientTableData client = ClientTableData.getRootAsClientTableData(bb);
String clientNodeIpAddress = client.nodeManagerAddress();
String localIpAddress = NetworkUtil.getIpAddress(null);
String redisIpAddress = redisAddress.substring(0, redisAddress.indexOf(':'));
boolean headNodeAddress = "127.0.0.1".equals(clientNodeIpAddress)
&& Objects.equals(redisIpAddress, localIpAddress);
boolean notHeadNodeAddress = Objects.equals(clientNodeIpAddress, nodeIpAddress);
if (headNodeAddress || notHeadNodeAddress) {
AddressInfo si = new AddressInfo();
si.storeName = client.objectStoreSocketName();
si.rayletSocketName = client.rayletSocketName();
si.managerRpcAddr = client.nodeManagerAddress();
si.managerPort = client.nodeManagerPort();
schedulerInfo.add(si);
}
}
return schedulerInfo;
}
}

View file

@ -13,6 +13,7 @@ public class RedisClient implements KeyValueStoreLink {
private String redisAddress;
private JedisPool jedisPool;
private int handle = 0;
public RedisClient() {
}
@ -171,6 +172,13 @@ public class RedisClient implements KeyValueStoreLink {
}
}
@Override
public Set<byte[]> zrange(byte[] key, long start, long end) {
try (Jedis jedis = jedisPool.getResource()) {
return jedis.zrange(key, start, end);
}
}
@Override
public Long rpush(String key, String... strings) {
try (Jedis jedis = jedisPool.getResource()) {
@ -203,4 +211,20 @@ public class RedisClient implements KeyValueStoreLink {
public Object getImpl() {
return jedisPool;
}
@Override
public byte[] sendCommand(String command, int commandType, byte[] objectId) {
if (handle == 0) {
String[] ipPort = redisAddress.split(":");
handle = connect(ipPort[0], Integer.parseInt(ipPort[1]));
}
return execute_command(handle, command, commandType, objectId);
}
private static native int connect(String redisAddress, int port);
private static native void disconnect(int handle);
private static native byte[] execute_command(int handle,
String command, int commandType, byte[] objectId);
}

View file

@ -10,7 +10,17 @@ mvn clean install -Dmaven.test.skip
check_style=$(mvn checkstyle:check)
echo "${check_style}"
[[ ${check_style} =~ "BUILD FAILURE" ]] && exit 1
# test non-raylet
sed -i 's/^use_raylet.*$/use_raylet = false/g' $ROOT_DIR/../java/ray.config.ini
mvn_test=$(mvn test)
echo "${mvn_test}"
[[ ${mvn_test} =~ "BUILD SUCCESS" ]] || exit 1
popd
# test raylet
sed -i 's/^use_raylet.*$/use_raylet = true/g' $ROOT_DIR/../java/ray.config.ini
mvn_test=$(mvn test)
echo "${mvn_test}"
[[ ${mvn_test} =~ "BUILD SUCCESS" ]] || exit 1
popd

View file

@ -43,15 +43,15 @@ Java_org_ray_spi_impl_DefaultLocalSchedulerClient__1init(JNIEnv *env,
jbyteArray actorId,
jboolean isWorker,
jbyteArray driverId,
jlong numGpus) {
jlong numGpus,
jboolean useRaylet) {
// native private static long _init(String localSchedulerSocket,
// byte[] workerId, byte[] actorId, boolean isWorker, long numGpus);
UniqueIdFromJByteArray worker_id(env, wid);
UniqueIdFromJByteArray driver_id(env, driverId);
const char *nativeString = env->GetStringUTFChars(sockName, JNI_FALSE);
bool use_raylet = false;
auto client = LocalSchedulerConnection_init(
nativeString, *worker_id.PID, isWorker, *driver_id.PID, use_raylet);
nativeString, *worker_id.PID, isWorker, *driver_id.PID, useRaylet);
env->ReleaseStringUTFChars(sockName, nativeString);
return reinterpret_cast<jlong>(client);
}
@ -69,21 +69,30 @@ Java_org_ray_spi_impl_DefaultLocalSchedulerClient__1submitTask(
jbyteArray cursorId,
jobject buff,
jint pos,
jint sz) {
jint sz,
jboolean useRaylet) {
// task -> TaskInfo (with FlatBuffer)
// native private static void _submitTask(long client, /*Direct*/ByteBuffer
// task);
auto client = reinterpret_cast<LocalSchedulerConnection *>(c);
TaskSpec *task =
reinterpret_cast<char *>(env->GetDirectBufferAddress(buff)) + pos;
std::vector<ObjectID> execution_dependencies;
if (cursorId != nullptr) {
UniqueIdFromJByteArray cursor_id(env, cursorId);
execution_dependencies.push_back(*cursor_id.PID);
}
TaskExecutionSpec taskExecutionSpec =
TaskExecutionSpec(execution_dependencies, task, sz);
local_scheduler_submit(client, taskExecutionSpec);
if (!useRaylet) {
TaskSpec *task =
reinterpret_cast<char *>(env->GetDirectBufferAddress(buff)) + pos;
TaskExecutionSpec taskExecutionSpec =
TaskExecutionSpec(execution_dependencies, task, sz);
local_scheduler_submit(client, taskExecutionSpec);
} else {
auto data =
reinterpret_cast<char *>(env->GetDirectBufferAddress(buff)) + pos;
ray::raylet::TaskSpecification task_spec(std::string(data, sz));
local_scheduler_submit_raylet(client, execution_dependencies, task_spec);
}
}
/*
@ -92,15 +101,19 @@ Java_org_ray_spi_impl_DefaultLocalSchedulerClient__1submitTask(
* Signature: (J)[B
*/
JNIEXPORT jbyteArray JNICALL
Java_org_ray_spi_impl_DefaultLocalSchedulerClient__1getTaskTodo(JNIEnv *env,
jclass,
jlong c) {
Java_org_ray_spi_impl_DefaultLocalSchedulerClient__1getTaskTodo(
JNIEnv *env,
jclass,
jlong c,
jboolean useRaylet) {
// native private static ByteBuffer _getTaskTodo(long client);
auto client = reinterpret_cast<LocalSchedulerConnection *>(c);
int64_t task_size = 0;
// TODO: handle actor failure later
TaskSpec *spec = local_scheduler_get_task(client, &task_size);
TaskSpec *spec = !useRaylet
? local_scheduler_get_task(client, &task_size)
: local_scheduler_get_task_raylet(client, &task_size);
jbyteArray result;
result = env->NewByteArray(task_size);
@ -178,20 +191,29 @@ Java_org_ray_spi_impl_DefaultLocalSchedulerClient__1task_1done(JNIEnv *,
/*
* Class: org_ray_spi_impl_DefaultLocalSchedulerClient
* Method: _reconstruct_object
* Method: _reconstruct_objects
* Signature: (J[B)V
*/
JNIEXPORT void JNICALL
Java_org_ray_spi_impl_DefaultLocalSchedulerClient__1reconstruct_1object(
Java_org_ray_spi_impl_DefaultLocalSchedulerClient__1reconstruct_1objects(
JNIEnv *env,
jclass,
jlong c,
jbyteArray oid) {
// native private static void _reconstruct_object(long client, byte[]
// objectId);
UniqueIdFromJByteArray o(env, oid);
jobjectArray oids,
jboolean fetch_only) {
// native private static void _reconstruct_objects(long client, byte[][]
// objectIds, boolean fetchOnly);
std::vector<ObjectID> object_ids;
auto len = env->GetArrayLength(oids);
for (int i = 0; i < len; i++) {
jbyteArray oid = (jbyteArray) env->GetObjectArrayElement(oids, i);
UniqueIdFromJByteArray o(env, oid);
object_ids.push_back(*o.PID);
env->DeleteLocalRef(oid);
}
auto client = reinterpret_cast<LocalSchedulerConnection *>(c);
local_scheduler_reconstruct_objects(client, {*o.PID});
local_scheduler_reconstruct_objects(client, object_ids, fetch_only);
}
/*
@ -227,6 +249,55 @@ Java_org_ray_spi_impl_DefaultLocalSchedulerClient__1put_1object(
local_scheduler_put_object(client, *t.PID, *o.PID);
}
JNIEXPORT jbooleanArray JNICALL
Java_org_ray_spi_impl_DefaultLocalSchedulerClient__1waitObject(
JNIEnv *env,
jclass,
jlong c,
jobjectArray oids,
jint num_returns,
jint timeout_ms,
jboolean wait_local) {
std::vector<ObjectID> object_ids;
auto len = env->GetArrayLength(oids);
for (int i = 0; i < len; i++) {
jbyteArray oid = (jbyteArray) env->GetObjectArrayElement(oids, i);
UniqueIdFromJByteArray o(env, oid);
object_ids.push_back(*o.PID);
env->DeleteLocalRef(oid);
}
auto client = reinterpret_cast<LocalSchedulerConnection *>(c);
// Invoke wait.
std::pair<std::vector<ObjectID>, std::vector<ObjectID>> result =
local_scheduler_wait(client, object_ids, num_returns, timeout_ms,
static_cast<bool>(wait_local));
// Convert result to java object.
jboolean putValue = true;
jbooleanArray resultArray = env->NewBooleanArray(object_ids.size());
for (uint i = 0; i < result.first.size(); ++i) {
for (uint j = 0; j < object_ids.size(); ++j) {
if (result.first[i] == object_ids[j]) {
env->SetBooleanArrayRegion(resultArray, j, 1, &putValue);
break;
}
}
}
putValue = false;
for (uint i = 0; i < result.second.size(); ++i) {
for (uint j = 0; j < object_ids.size(); ++j) {
if (result.second[i] == object_ids[j]) {
env->SetBooleanArrayRegion(resultArray, j, 1, &putValue);
break;
}
}
}
return resultArray;
}
#ifdef __cplusplus
}
#endif

View file

@ -20,7 +20,8 @@ Java_org_ray_spi_impl_DefaultLocalSchedulerClient__1init(JNIEnv *,
jbyteArray,
jboolean,
jbyteArray,
jlong);
jlong,
jboolean);
/*
* Class: org_ray_spi_impl_DefaultLocalSchedulerClient
@ -34,7 +35,8 @@ Java_org_ray_spi_impl_DefaultLocalSchedulerClient__1submitTask(JNIEnv *,
jbyteArray,
jobject,
jint,
jint);
jint,
jboolean);
/*
* Class: org_ray_spi_impl_DefaultLocalSchedulerClient
@ -44,7 +46,8 @@ Java_org_ray_spi_impl_DefaultLocalSchedulerClient__1submitTask(JNIEnv *,
JNIEXPORT jbyteArray JNICALL
Java_org_ray_spi_impl_DefaultLocalSchedulerClient__1getTaskTodo(JNIEnv *,
jclass,
jlong);
jlong,
jboolean);
/*
* Class: org_ray_spi_impl_DefaultLocalSchedulerClient
@ -80,15 +83,16 @@ Java_org_ray_spi_impl_DefaultLocalSchedulerClient__1task_1done(JNIEnv *,
/*
* Class: org_ray_spi_impl_DefaultLocalSchedulerClient
* Method: _reconstruct_object
* Method: _reconstruct_objects
* Signature: (J[B)V
*/
JNIEXPORT void JNICALL
Java_org_ray_spi_impl_DefaultLocalSchedulerClient__1reconstruct_1object(
Java_org_ray_spi_impl_DefaultLocalSchedulerClient__1reconstruct_1objects(
JNIEnv *,
jclass,
jlong,
jbyteArray);
jobjectArray,
jboolean);
/*
* Class: org_ray_spi_impl_DefaultLocalSchedulerClient
@ -112,6 +116,20 @@ Java_org_ray_spi_impl_DefaultLocalSchedulerClient__1put_1object(JNIEnv *,
jbyteArray,
jbyteArray);
/*
* Class: org_ray_spi_impl_DefaultLocalSchedulerClient
* Method: _waitObject
* Signature: (J[[BIIZ)[Z
*/
JNIEXPORT jbooleanArray JNICALL
Java_org_ray_spi_impl_DefaultLocalSchedulerClient__1waitObject(JNIEnv *,
jclass,
jlong,
jobjectArray,
jint,
jint,
jboolean);
#ifdef __cplusplus
}
#endif

View file

@ -69,7 +69,7 @@ void local_scheduler_log_event(LocalSchedulerConnection *conn,
}
void local_scheduler_submit(LocalSchedulerConnection *conn,
TaskExecutionSpec &execution_spec) {
const TaskExecutionSpec &execution_spec) {
flatbuffers::FlatBufferBuilder fbb;
auto execution_dependencies =
to_flatbuf(fbb, execution_spec.ExecutionDependencies());
@ -86,7 +86,7 @@ void local_scheduler_submit(LocalSchedulerConnection *conn,
void local_scheduler_submit_raylet(
LocalSchedulerConnection *conn,
const std::vector<ObjectID> &execution_dependencies,
ray::raylet::TaskSpecification task_spec) {
const ray::raylet::TaskSpecification &task_spec) {
flatbuffers::FlatBufferBuilder fbb;
auto execution_dependencies_message = to_flatbuf(fbb, execution_dependencies);
auto message = ray::local_scheduler::protocol::CreateSubmitTaskRequest(

View file

@ -65,7 +65,7 @@ void LocalSchedulerConnection_free(LocalSchedulerConnection *conn);
* @return Void.
*/
void local_scheduler_submit(LocalSchedulerConnection *conn,
TaskExecutionSpec &execution_spec);
const TaskExecutionSpec &execution_spec);
/// Submit a task using the raylet code path.
///
@ -76,7 +76,7 @@ void local_scheduler_submit(LocalSchedulerConnection *conn,
void local_scheduler_submit_raylet(
LocalSchedulerConnection *conn,
const std::vector<ObjectID> &execution_dependencies,
ray::raylet::TaskSpecification task_spec);
const ray::raylet::TaskSpecification &task_spec);
/**
* Notify the local scheduler that this client is disconnecting gracefully. This

View file

@ -39,11 +39,10 @@ int main(int argc, char *argv[]) {
RayConfig::instance().num_workers_per_process();
// Use a default worker that can execute empty tasks with dependencies.
std::stringstream worker_command_stream(worker_command);
std::string token;
while (getline(worker_command_stream, token, ' ')) {
node_manager_config.worker_command.push_back(token);
}
std::istringstream iss(worker_command);
std::vector<std::string> results(std::istream_iterator<std::string>{iss},
std::istream_iterator<std::string>());
node_manager_config.worker_command.swap(results);
node_manager_config.heartbeat_period_ms =
RayConfig::instance().heartbeat_timeout_milliseconds();
@ -84,8 +83,12 @@ int main(int argc, char *argv[]) {
// Destroy the Raylet on a SIGTERM. The pointer to main_service is
// guaranteed to be valid since this function will run the event loop
// instead of returning immediately.
auto handler = [&main_service](const boost::system::error_code &error,
int signal_number) { main_service.stop(); };
// We should stop the service and remove the local socket file.
auto handler = [&main_service, &raylet_socket_name](
const boost::system::error_code &error, int signal_number) {
main_service.stop();
remove(raylet_socket_name.c_str());
};
boost::asio::signal_set signals(main_service, SIGTERM);
signals.async_wait(handler);

View file

@ -37,6 +37,10 @@ TaskSpecification::TaskSpecification(const flatbuffers::String &string) {
AssignSpecification(reinterpret_cast<const uint8_t *>(string.data()), string.size());
}
TaskSpecification::TaskSpecification(const std::string &string) {
AssignSpecification(reinterpret_cast<const uint8_t *>(string.data()), string.size());
}
TaskSpecification::TaskSpecification(
const UniqueID &driver_id, const TaskID &parent_task_id, int64_t parent_counter,
const FunctionID &function_id,

View file

@ -109,6 +109,12 @@ class TaskSpecification {
int64_t num_returns,
const std::unordered_map<std::string, double> &required_resources);
/// Deserialize a task specification from a flatbuffer's string data.
///
/// \param string The string data for a serialized task specification
/// flatbuffer.
TaskSpecification(const std::string &string);
~TaskSpecification() {}
/// Serialize the TaskSpecification to a flatbuffer.

View file

@ -121,6 +121,7 @@ void WorkerPool::RegisterWorker(std::shared_ptr<Worker> worker) {
auto pid = worker->Pid();
RAY_LOG(DEBUG) << "Registering worker with pid " << pid;
registered_workers_.push_back(std::move(worker));
auto it = starting_worker_processes_.find(pid);
RAY_CHECK(it != starting_worker_processes_.end());
it->second--;