2016-03-25 13:46:12 -07:00
|
|
|
// This file defines the GRPC interface between scheduler, object stores and
|
|
|
|
// workers. These are used for communication over the network.
|
|
|
|
|
|
|
|
// Terminology:
|
|
|
|
// Worker: A cluster consists of multiple worker processes (typically one
|
|
|
|
// per core) which execute tasks that can access objects from object stores.
|
|
|
|
// Object store: Typically there is one object store per node which holds the
|
|
|
|
// objects locally stored on that node.
|
|
|
|
// Scheduler: The scheduler process keeps track of a mapping from object
|
|
|
|
// references to object stores, orchestrates data transfer between object
|
|
|
|
// stores and assigns tasks to workers.
|
|
|
|
|
2016-02-07 15:50:02 -08:00
|
|
|
syntax = "proto3";
|
|
|
|
|
2016-02-10 12:12:19 -08:00
|
|
|
import "types.proto";
|
|
|
|
|
2016-03-25 13:46:12 -07:00
|
|
|
// Scheduler
|
|
|
|
|
|
|
|
service Scheduler {
|
|
|
|
// Register a new worker with the scheduler
|
|
|
|
rpc RegisterWorker(RegisterWorkerRequest) returns (RegisterWorkerReply);
|
|
|
|
// Register an object store with the scheduler
|
|
|
|
rpc RegisterObjStore(RegisterObjStoreRequest) returns (RegisterObjStoreReply);
|
|
|
|
// Tell the scheduler that a worker can execute a certain function
|
|
|
|
rpc RegisterFunction(RegisterFunctionRequest) returns (AckReply);
|
|
|
|
// Asks the scheduler to execute a task, immediately returns an object reference to the result
|
2016-05-26 16:33:30 -07:00
|
|
|
rpc SubmitTask(SubmitTaskRequest) returns (SubmitTaskReply);
|
2016-03-25 13:46:12 -07:00
|
|
|
// Increment the count of the object reference
|
|
|
|
rpc IncrementCount(ChangeCountRequest) returns (AckReply);
|
|
|
|
// Decrement the count of the object reference
|
|
|
|
rpc DecrementCount(ChangeCountRequest) returns (AckReply);
|
|
|
|
// Request an object reference for an object that will be pushed to an object store
|
|
|
|
rpc PushObj(PushObjRequest) returns (PushObjReply);
|
|
|
|
// Request delivery of an object from an object store that holds the object to the local object store
|
2016-04-06 22:06:53 -07:00
|
|
|
rpc RequestObj(RequestObjRequest) returns (AckReply);
|
2016-04-08 12:58:08 -07:00
|
|
|
// Used by the worker to tell the scheduler that two objrefs should refer to the same object
|
|
|
|
rpc AliasObjRefs(AliasObjRefsRequest) returns (AckReply);
|
2016-03-25 13:46:12 -07:00
|
|
|
// Used by an object store to tell the scheduler that an object is ready (i.e. has been finalized and can be shared)
|
|
|
|
rpc ObjReady(ObjReadyRequest) returns (AckReply);
|
2016-04-18 13:05:36 -07:00
|
|
|
// Increments the reference count of a particular object reference
|
|
|
|
rpc IncrementRefCount(IncrementRefCountRequest) returns (AckReply);
|
|
|
|
// Decrements the reference count of a particular object reference
|
|
|
|
rpc DecrementRefCount(DecrementRefCountRequest) returns (AckReply);
|
|
|
|
// Used by the worker to notify the scheduler about which objrefs a particular object contains
|
|
|
|
rpc AddContainedObjRefs(AddContainedObjRefsRequest) returns (AckReply);
|
2016-06-16 16:04:52 -07:00
|
|
|
// Used by the worker to ask for work, this also returns the status of the previous task if there was one
|
|
|
|
rpc ReadyForNewTask(ReadyForNewTaskRequest) returns (AckReply);
|
2016-04-18 13:05:36 -07:00
|
|
|
// Get information about the scheduler state
|
|
|
|
rpc SchedulerInfo(SchedulerInfoRequest) returns (SchedulerInfoReply);
|
2016-06-18 01:01:48 +03:00
|
|
|
// Get information about tasks
|
|
|
|
rpc TaskInfo(TaskInfoRequest) returns (TaskInfoReply);
|
2016-03-25 13:46:12 -07:00
|
|
|
}
|
|
|
|
|
2016-02-22 13:55:06 -08:00
|
|
|
message AckReply {
|
|
|
|
}
|
|
|
|
|
2016-02-07 15:50:02 -08:00
|
|
|
message RegisterWorkerRequest {
|
2016-03-25 13:46:12 -07:00
|
|
|
string worker_address = 1; // IP address of the worker being registered
|
|
|
|
string objstore_address = 2; // IP address of the object store the worker is connected to
|
2016-02-07 15:50:02 -08:00
|
|
|
}
|
|
|
|
|
|
|
|
message RegisterWorkerReply {
|
2016-03-25 13:46:12 -07:00
|
|
|
uint64 workerid = 1; // Worker ID assigned by the scheduler
|
2016-04-22 12:07:02 -07:00
|
|
|
uint64 objstoreid = 2; // The Object store ID of the worker's local object store
|
2016-02-07 15:50:02 -08:00
|
|
|
}
|
|
|
|
|
2016-02-22 13:55:06 -08:00
|
|
|
message RegisterObjStoreRequest {
|
2016-03-25 13:46:12 -07:00
|
|
|
string objstore_address = 1; // IP address of the object store being registered
|
2016-02-22 13:55:06 -08:00
|
|
|
}
|
|
|
|
|
|
|
|
message RegisterObjStoreReply {
|
2016-03-25 13:46:12 -07:00
|
|
|
uint64 objstoreid = 1; // Object store ID assigned by the scheduler
|
2016-02-22 13:55:06 -08:00
|
|
|
}
|
|
|
|
|
|
|
|
message RegisterFunctionRequest {
|
2016-03-25 13:46:12 -07:00
|
|
|
uint64 workerid = 1; // Worker that can execute the function
|
|
|
|
string fnname = 2; // Name of the function that is registered
|
|
|
|
uint64 num_return_vals = 3; // Number of return values of the function
|
2016-02-22 13:55:06 -08:00
|
|
|
}
|
|
|
|
|
2016-05-26 16:33:30 -07:00
|
|
|
message SubmitTaskRequest {
|
|
|
|
Task task = 1; // Contains name of the function to be executed and arguments
|
2016-02-07 15:50:02 -08:00
|
|
|
}
|
|
|
|
|
2016-05-26 16:33:30 -07:00
|
|
|
message SubmitTaskReply {
|
2016-03-25 13:46:12 -07:00
|
|
|
repeated uint64 result = 1; // Object references of the function return values
|
2016-06-13 23:21:00 +00:00
|
|
|
bool function_registered = 2; // True if the function was registered; false otherwise
|
2016-02-07 15:50:02 -08:00
|
|
|
}
|
|
|
|
|
2016-04-06 22:06:53 -07:00
|
|
|
message RequestObjRequest {
|
|
|
|
uint64 workerid = 1; // Worker that tries to request the object
|
|
|
|
uint64 objref = 2; // Object reference of the object being requested
|
2016-02-07 15:50:02 -08:00
|
|
|
}
|
|
|
|
|
2016-02-22 13:55:06 -08:00
|
|
|
message PushObjRequest {
|
2016-03-25 13:46:12 -07:00
|
|
|
uint64 workerid = 1; // Worker that tries to push an object
|
2016-02-22 13:55:06 -08:00
|
|
|
}
|
|
|
|
|
|
|
|
message PushObjReply {
|
2016-03-25 13:46:12 -07:00
|
|
|
uint64 objref = 1; // Object reference assigned by the scheduler to the object
|
2016-02-22 13:55:06 -08:00
|
|
|
}
|
|
|
|
|
2016-04-08 12:58:08 -07:00
|
|
|
message AliasObjRefsRequest {
|
|
|
|
uint64 alias_objref = 1; // ObjRef which will be aliased
|
|
|
|
uint64 target_objref = 2; // The target ObjRef
|
|
|
|
}
|
|
|
|
|
2016-03-10 16:55:56 -08:00
|
|
|
message ObjReadyRequest {
|
2016-03-25 13:46:12 -07:00
|
|
|
uint64 objref = 1; // Object reference of the object that has been finalized
|
|
|
|
uint64 objstoreid = 2; // ID of the object store the object lives on
|
2016-03-10 16:55:56 -08:00
|
|
|
}
|
|
|
|
|
2016-04-18 13:05:36 -07:00
|
|
|
message IncrementRefCountRequest {
|
|
|
|
repeated uint64 objref = 1; // Object references whose reference count should be incremented. Duplicates will be incremented multiple times.
|
|
|
|
}
|
|
|
|
|
|
|
|
message AddContainedObjRefsRequest {
|
|
|
|
uint64 objref = 1; // The objref of the object in question
|
|
|
|
repeated uint64 contained_objref = 2; // Object references contained in the object
|
|
|
|
}
|
|
|
|
|
|
|
|
message DecrementRefCountRequest {
|
|
|
|
repeated uint64 objref = 1; // Object references whose reference count should be decremented. Duplicates will be decremented multiple times.
|
|
|
|
}
|
|
|
|
|
2016-06-16 16:04:52 -07:00
|
|
|
message ReadyForNewTaskRequest {
|
2016-06-11 15:44:56 -07:00
|
|
|
uint64 workerid = 1; // ID of the worker which executed the task
|
2016-06-16 16:04:52 -07:00
|
|
|
message PreviousTaskInfo {
|
|
|
|
bool task_succeeded = 1; // True if the task succeeded, false if it threw an exception
|
|
|
|
string error_message = 2; // The contents of the exception, if the task threw an exception
|
|
|
|
}
|
|
|
|
PreviousTaskInfo previous_task_info = 2; // Information about the previous task, this is only present if there was a previous task
|
2016-03-10 16:55:56 -08:00
|
|
|
}
|
|
|
|
|
2016-02-22 13:55:06 -08:00
|
|
|
message ChangeCountRequest {
|
2016-03-25 13:46:12 -07:00
|
|
|
uint64 objref = 1; // Object reference of the object whose reference count is increased or decreased
|
2016-02-22 13:55:06 -08:00
|
|
|
}
|
|
|
|
|
2016-04-18 13:05:36 -07:00
|
|
|
// The following messages are used to get information about the scheduler state
|
2016-03-25 13:46:12 -07:00
|
|
|
|
2016-04-18 13:05:36 -07:00
|
|
|
message SchedulerInfoRequest {
|
2016-02-22 13:55:06 -08:00
|
|
|
}
|
|
|
|
|
|
|
|
message FnTableEntry {
|
2016-03-25 13:46:12 -07:00
|
|
|
repeated uint64 workerid = 1; // ID of the worker that can execute the function
|
|
|
|
uint64 num_return_vals = 2; // Number of return values of the function
|
2016-02-22 13:55:06 -08:00
|
|
|
}
|
|
|
|
|
2016-04-18 13:05:36 -07:00
|
|
|
message SchedulerInfoReply {
|
2016-06-02 16:35:46 -07:00
|
|
|
repeated uint64 operationid = 1; // OperationIds of the tasks on the task queue
|
2016-03-25 13:46:12 -07:00
|
|
|
repeated uint64 avail_worker = 3; // List of workers waiting to get a task assigned
|
|
|
|
map<string, FnTableEntry> function_table = 2; // Table of all available remote function
|
2016-04-18 13:05:36 -07:00
|
|
|
repeated uint64 target_objref = 4; // The target_objrefs_ data structure
|
|
|
|
repeated uint64 reference_count = 5; // The reference_counts_ data structure
|
2016-02-22 13:55:06 -08:00
|
|
|
}
|
|
|
|
|
2016-03-25 13:46:12 -07:00
|
|
|
// Object stores
|
|
|
|
|
|
|
|
service ObjStore {
|
2016-04-24 19:06:14 -07:00
|
|
|
// Tell the object store to begin pulling an object from another object store (called by the scheduler)
|
|
|
|
rpc StartDelivery(StartDeliveryRequest) returns (AckReply);
|
2016-03-25 13:46:12 -07:00
|
|
|
// Accept incoming data from another object store, as a stream of object chunks
|
2016-04-24 19:06:14 -07:00
|
|
|
rpc StreamObjTo(StreamObjToRequest) returns (stream ObjChunk);
|
2016-04-08 12:58:08 -07:00
|
|
|
// Notify the object store about objref aliasing. This is called by the scheduler
|
|
|
|
rpc NotifyAlias(NotifyAliasRequest) returns (AckReply);
|
2016-04-18 13:05:36 -07:00
|
|
|
// Tell the object store to deallocate an object held by the object store. This is called by the scheduler.
|
|
|
|
rpc DeallocateObject(DeallocateObjectRequest) returns (AckReply);
|
|
|
|
// Get info about the object store state
|
|
|
|
rpc ObjStoreInfo(ObjStoreInfoRequest) returns (ObjStoreInfoReply);
|
2016-02-22 13:55:06 -08:00
|
|
|
}
|
|
|
|
|
2016-04-24 19:06:14 -07:00
|
|
|
message StartDeliveryRequest {
|
|
|
|
string objstore_address = 1; // Object store to pull the object from
|
2016-03-25 13:46:12 -07:00
|
|
|
uint64 objref = 2; // Reference of object that gets delivered
|
2016-02-22 13:55:06 -08:00
|
|
|
}
|
|
|
|
|
|
|
|
message RegisterObjRequest {
|
2016-03-25 13:46:12 -07:00
|
|
|
uint64 objref = 1; // Reference of object that gets registered
|
2016-02-22 13:55:06 -08:00
|
|
|
}
|
|
|
|
|
|
|
|
message RegisterObjReply {
|
2016-03-25 13:46:12 -07:00
|
|
|
uint64 handle = 1; // Handle to memory segment where object is stored
|
2016-02-22 13:55:06 -08:00
|
|
|
}
|
|
|
|
|
2016-04-24 19:06:14 -07:00
|
|
|
message StreamObjToRequest {
|
2016-03-25 13:46:12 -07:00
|
|
|
uint64 objref = 1; // Object reference of the object being streamed
|
2016-04-24 19:06:14 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
message ObjChunk {
|
|
|
|
uint64 total_size = 1; // Total size of the object
|
|
|
|
uint64 metadata_offset = 2; // Offset of the arrow metadata
|
2016-03-25 13:46:12 -07:00
|
|
|
bytes data = 3; // Data for this chunk of the object
|
2016-02-22 13:55:06 -08:00
|
|
|
}
|
|
|
|
|
2016-04-08 12:58:08 -07:00
|
|
|
message NotifyAliasRequest {
|
|
|
|
uint64 alias_objref = 1; // The objref being aliased
|
|
|
|
uint64 canonical_objref = 2; // The canonical objref that points to the actual object
|
|
|
|
}
|
|
|
|
|
2016-04-18 13:05:36 -07:00
|
|
|
message DeallocateObjectRequest {
|
|
|
|
uint64 canonical_objref = 1; // The canonical objref of the object to deallocate
|
|
|
|
}
|
|
|
|
|
2016-02-22 13:55:06 -08:00
|
|
|
message GetObjRequest {
|
2016-03-25 13:46:12 -07:00
|
|
|
uint64 objref = 1; // Object reference of the object being requested by the worker
|
2016-02-22 13:55:06 -08:00
|
|
|
}
|
|
|
|
|
2016-06-18 01:01:48 +03:00
|
|
|
message TaskInfoRequest {
|
|
|
|
}
|
|
|
|
|
|
|
|
message TaskInfoReply {
|
|
|
|
repeated TaskStatus failed_task = 1;
|
|
|
|
// TODO(mehrdadn): We'll want to return information from computation_graph since it's important for visualizing tasks that have been completed etc.
|
|
|
|
}
|
|
|
|
|
|
|
|
message TaskStatus {
|
|
|
|
uint64 operationid = 1;
|
|
|
|
string worker_address = 2;
|
|
|
|
string error_message = 3;
|
|
|
|
}
|
|
|
|
|
2016-04-18 13:05:36 -07:00
|
|
|
// These messages are for getting information about the object store state
|
2016-03-25 13:46:12 -07:00
|
|
|
|
2016-04-18 13:05:36 -07:00
|
|
|
message ObjStoreInfoRequest {
|
2016-03-25 13:46:12 -07:00
|
|
|
repeated uint64 objref = 1; // Object references we want to retrieve from the store for inspection
|
2016-03-15 13:06:51 -07:00
|
|
|
}
|
2016-02-22 13:55:06 -08:00
|
|
|
|
2016-04-18 13:05:36 -07:00
|
|
|
message ObjStoreInfoReply {
|
2016-03-25 13:46:12 -07:00
|
|
|
repeated uint64 objref = 1; // List of object references in the store
|
|
|
|
repeated Obj obj = 2; // Protocol buffer objects that were requested
|
2016-02-22 13:55:06 -08:00
|
|
|
}
|
|
|
|
|
2016-03-25 13:46:12 -07:00
|
|
|
// Workers
|
|
|
|
|
|
|
|
service WorkerService {
|
2016-05-26 16:33:30 -07:00
|
|
|
rpc ExecuteTask(ExecuteTaskRequest) returns (ExecuteTaskReply); // Scheduler calls a function from the worker
|
2016-02-10 12:12:19 -08:00
|
|
|
}
|
|
|
|
|
2016-05-26 16:33:30 -07:00
|
|
|
message ExecuteTaskRequest {
|
|
|
|
Task task = 1; // Contains name of the function to be executed and arguments
|
2016-02-10 12:12:19 -08:00
|
|
|
}
|
|
|
|
|
2016-05-26 16:33:30 -07:00
|
|
|
message ExecuteTaskReply {
|
2016-02-10 12:12:19 -08:00
|
|
|
|
|
|
|
}
|