mirror of
https://github.com/vale981/ray
synced 2025-03-06 10:31:39 -05:00
Add task reconstruction function to task info handler (#6711)
This commit is contained in:
parent
3673835f30
commit
ca454c5c1b
6 changed files with 73 additions and 0 deletions
|
@ -94,5 +94,34 @@ void DefaultTaskInfoHandler::HandleAddTaskLease(const AddTaskLeaseRequest &reque
|
||||||
<< ", node id = " << node_id;
|
<< ", node id = " << node_id;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void DefaultTaskInfoHandler::HandleAttemptTaskReconstruction(
|
||||||
|
const AttemptTaskReconstructionRequest &request,
|
||||||
|
AttemptTaskReconstructionReply *reply, SendReplyCallback send_reply_callback) {
|
||||||
|
ClientID node_id =
|
||||||
|
ClientID::FromBinary(request.task_reconstruction().node_manager_id());
|
||||||
|
RAY_LOG(DEBUG) << "Reconstructing task, reconstructions num = "
|
||||||
|
<< request.task_reconstruction().num_reconstructions()
|
||||||
|
<< ", node id = " << node_id;
|
||||||
|
auto task_reconstruction_data = std::make_shared<TaskReconstructionData>();
|
||||||
|
task_reconstruction_data->CopyFrom(request.task_reconstruction());
|
||||||
|
auto on_done = [node_id, request, send_reply_callback](Status status) {
|
||||||
|
if (!status.ok()) {
|
||||||
|
RAY_LOG(ERROR) << "Failed to reconstruct task, reconstructions num = "
|
||||||
|
<< request.task_reconstruction().num_reconstructions()
|
||||||
|
<< ", node id = " << node_id;
|
||||||
|
}
|
||||||
|
send_reply_callback(status, nullptr, nullptr);
|
||||||
|
};
|
||||||
|
|
||||||
|
Status status =
|
||||||
|
gcs_client_.Tasks().AttemptTaskReconstruction(task_reconstruction_data, on_done);
|
||||||
|
if (!status.ok()) {
|
||||||
|
on_done(status);
|
||||||
|
}
|
||||||
|
RAY_LOG(DEBUG) << "Finished reconstructing task, reconstructions num = "
|
||||||
|
<< request.task_reconstruction().num_reconstructions()
|
||||||
|
<< ", node id = " << node_id;
|
||||||
|
}
|
||||||
|
|
||||||
} // namespace rpc
|
} // namespace rpc
|
||||||
} // namespace ray
|
} // namespace ray
|
||||||
|
|
|
@ -25,6 +25,10 @@ class DefaultTaskInfoHandler : public rpc::TaskInfoHandler {
|
||||||
void HandleAddTaskLease(const AddTaskLeaseRequest &request, AddTaskLeaseReply *reply,
|
void HandleAddTaskLease(const AddTaskLeaseRequest &request, AddTaskLeaseReply *reply,
|
||||||
SendReplyCallback send_reply_callback) override;
|
SendReplyCallback send_reply_callback) override;
|
||||||
|
|
||||||
|
void HandleAttemptTaskReconstruction(const AttemptTaskReconstructionRequest &request,
|
||||||
|
AttemptTaskReconstructionReply *reply,
|
||||||
|
SendReplyCallback send_reply_callback) override;
|
||||||
|
|
||||||
private:
|
private:
|
||||||
gcs::RedisGcsClient &gcs_client_;
|
gcs::RedisGcsClient &gcs_client_;
|
||||||
};
|
};
|
||||||
|
|
|
@ -334,6 +334,17 @@ class GcsServerTest : public RedisServiceManagerForTest {
|
||||||
return WaitReady(promise.get_future(), timeout_ms_);
|
return WaitReady(promise.get_future(), timeout_ms_);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool AttemptTaskReconstruction(const rpc::AttemptTaskReconstructionRequest &request) {
|
||||||
|
std::promise<bool> promise;
|
||||||
|
client_->AttemptTaskReconstruction(
|
||||||
|
request, [&promise](const Status &status,
|
||||||
|
const rpc::AttemptTaskReconstructionReply &reply) {
|
||||||
|
RAY_CHECK_OK(status);
|
||||||
|
promise.set_value(true);
|
||||||
|
});
|
||||||
|
return WaitReady(promise.get_future(), timeout_ms_);
|
||||||
|
}
|
||||||
|
|
||||||
bool WaitReady(const std::future<bool> &future, uint64_t timeout_ms) {
|
bool WaitReady(const std::future<bool> &future, uint64_t timeout_ms) {
|
||||||
auto status = future.wait_for(std::chrono::milliseconds(timeout_ms));
|
auto status = future.wait_for(std::chrono::milliseconds(timeout_ms));
|
||||||
return status == std::future_status::ready;
|
return status == std::future_status::ready;
|
||||||
|
@ -572,6 +583,16 @@ TEST_F(GcsServerTest, TestTaskInfo) {
|
||||||
rpc::AddTaskLeaseRequest add_task_lease_request;
|
rpc::AddTaskLeaseRequest add_task_lease_request;
|
||||||
add_task_lease_request.mutable_task_lease_data()->CopyFrom(task_lease_data);
|
add_task_lease_request.mutable_task_lease_data()->CopyFrom(task_lease_data);
|
||||||
ASSERT_TRUE(AddTaskLease(add_task_lease_request));
|
ASSERT_TRUE(AddTaskLease(add_task_lease_request));
|
||||||
|
|
||||||
|
// Attempt task reconstruction
|
||||||
|
rpc::AttemptTaskReconstructionRequest attempt_task_reconstruction_request;
|
||||||
|
rpc::TaskReconstructionData task_reconstruction_data;
|
||||||
|
task_reconstruction_data.set_task_id(task_id.Binary());
|
||||||
|
task_reconstruction_data.set_node_manager_id(node_id.Binary());
|
||||||
|
task_reconstruction_data.set_num_reconstructions(0);
|
||||||
|
attempt_task_reconstruction_request.mutable_task_reconstruction()->CopyFrom(
|
||||||
|
task_reconstruction_data);
|
||||||
|
ASSERT_TRUE(AttemptTaskReconstruction(attempt_task_reconstruction_request));
|
||||||
}
|
}
|
||||||
|
|
||||||
} // namespace ray
|
} // namespace ray
|
||||||
|
|
|
@ -248,6 +248,13 @@ message AddTaskLeaseRequest {
|
||||||
message AddTaskLeaseReply {
|
message AddTaskLeaseReply {
|
||||||
}
|
}
|
||||||
|
|
||||||
|
message AttemptTaskReconstructionRequest {
|
||||||
|
TaskReconstructionData task_reconstruction = 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
message AttemptTaskReconstructionReply {
|
||||||
|
}
|
||||||
|
|
||||||
// Service for task info access.
|
// Service for task info access.
|
||||||
service TaskInfoGcsService {
|
service TaskInfoGcsService {
|
||||||
// Add a task to GCS Service.
|
// Add a task to GCS Service.
|
||||||
|
@ -258,4 +265,7 @@ service TaskInfoGcsService {
|
||||||
rpc DeleteTasks(DeleteTasksRequest) returns (DeleteTasksReply);
|
rpc DeleteTasks(DeleteTasksRequest) returns (DeleteTasksReply);
|
||||||
// Add a task lease to GCS Service.
|
// Add a task lease to GCS Service.
|
||||||
rpc AddTaskLease(AddTaskLeaseRequest) returns (AddTaskLeaseReply);
|
rpc AddTaskLease(AddTaskLeaseRequest) returns (AddTaskLeaseReply);
|
||||||
|
// Attempt task reconstruction to GCS Service.
|
||||||
|
rpc AttemptTaskReconstruction(AttemptTaskReconstructionRequest)
|
||||||
|
returns (AttemptTaskReconstructionReply);
|
||||||
}
|
}
|
||||||
|
|
|
@ -105,6 +105,10 @@ class GcsRpcClient {
|
||||||
/// Add a task lease to GCS Service.
|
/// Add a task lease to GCS Service.
|
||||||
VOID_RPC_CLIENT_METHOD(TaskInfoGcsService, AddTaskLease, task_info_grpc_client_, )
|
VOID_RPC_CLIENT_METHOD(TaskInfoGcsService, AddTaskLease, task_info_grpc_client_, )
|
||||||
|
|
||||||
|
/// Attempt task reconstruction to GCS Service.
|
||||||
|
VOID_RPC_CLIENT_METHOD(TaskInfoGcsService, AttemptTaskReconstruction,
|
||||||
|
task_info_grpc_client_, )
|
||||||
|
|
||||||
private:
|
private:
|
||||||
/// The gRPC-generated stub.
|
/// The gRPC-generated stub.
|
||||||
std::unique_ptr<GrpcClient<JobInfoGcsService>> job_info_grpc_client_;
|
std::unique_ptr<GrpcClient<JobInfoGcsService>> job_info_grpc_client_;
|
||||||
|
|
|
@ -259,6 +259,10 @@ class TaskInfoGcsServiceHandler {
|
||||||
virtual void HandleAddTaskLease(const AddTaskLeaseRequest &request,
|
virtual void HandleAddTaskLease(const AddTaskLeaseRequest &request,
|
||||||
AddTaskLeaseReply *reply,
|
AddTaskLeaseReply *reply,
|
||||||
SendReplyCallback send_reply_callback) = 0;
|
SendReplyCallback send_reply_callback) = 0;
|
||||||
|
|
||||||
|
virtual void HandleAttemptTaskReconstruction(
|
||||||
|
const AttemptTaskReconstructionRequest &request,
|
||||||
|
AttemptTaskReconstructionReply *reply, SendReplyCallback send_reply_callback) = 0;
|
||||||
};
|
};
|
||||||
|
|
||||||
/// The `GrpcService` for `TaskInfoGcsService`.
|
/// The `GrpcService` for `TaskInfoGcsService`.
|
||||||
|
@ -282,6 +286,7 @@ class TaskInfoGrpcService : public GrpcService {
|
||||||
TASK_INFO_SERVICE_RPC_HANDLER(GetTask, 1);
|
TASK_INFO_SERVICE_RPC_HANDLER(GetTask, 1);
|
||||||
TASK_INFO_SERVICE_RPC_HANDLER(DeleteTasks, 1);
|
TASK_INFO_SERVICE_RPC_HANDLER(DeleteTasks, 1);
|
||||||
TASK_INFO_SERVICE_RPC_HANDLER(AddTaskLease, 1);
|
TASK_INFO_SERVICE_RPC_HANDLER(AddTaskLease, 1);
|
||||||
|
TASK_INFO_SERVICE_RPC_HANDLER(AttemptTaskReconstruction, 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
|
Loading…
Add table
Reference in a new issue