raylet memory corruption fixes (#2591)

* raylet memory corruption fixes

* add util function to translate boost error to ray status

* tcp client connection now using ray status utility function

* lint
This commit is contained in:
Alexey Tumanov 2018-08-08 19:50:43 -07:00 committed by Philipp Moritz
parent 6ab01a2cad
commit df7ee7ff1e
5 changed files with 22 additions and 19 deletions

View file

@ -4,6 +4,7 @@
#include "common.h" #include "common.h"
#include "ray/raylet/format/node_manager_generated.h" #include "ray/raylet/format/node_manager_generated.h"
#include "ray/util/util.h"
namespace ray { namespace ray {
@ -14,11 +15,7 @@ ray::Status TcpConnect(boost::asio::ip::tcp::socket &socket,
boost::asio::ip::tcp::endpoint endpoint(ip_address, port); boost::asio::ip::tcp::endpoint endpoint(ip_address, port);
boost::system::error_code error; boost::system::error_code error;
socket.connect(endpoint, error); socket.connect(endpoint, error);
if (error) { return boost_to_ray_status(error);
return ray::Status::IOError(error.message());
} else {
return ray::Status::OK();
}
} }
template <class T> template <class T>
@ -83,11 +80,7 @@ ray::Status ServerConnection<T>::WriteMessage(int64_t type, int64_t length,
// TODO(swang): Does this need to be an async write? // TODO(swang): Does this need to be an async write?
boost::system::error_code error; boost::system::error_code error;
WriteBuffer(message_buffers, error); WriteBuffer(message_buffers, error);
if (error) { return boost_to_ray_status(error);
return ray::Status::IOError(error.message());
} else {
return ray::Status::OK();
}
} }
template <class T> template <class T>

View file

@ -1,4 +1,5 @@
#include "ray/object_manager/object_manager.h" #include "ray/object_manager/object_manager.h"
#include "ray/util/util.h"
namespace asio = boost::asio; namespace asio = boost::asio;
@ -332,11 +333,10 @@ ray::Status ObjectManager::SendObjectData(const ObjectID &object_id,
buffer.push_back(asio::buffer(chunk_info.data, chunk_info.buffer_length)); buffer.push_back(asio::buffer(chunk_info.data, chunk_info.buffer_length));
conn->WriteBuffer(buffer, ec); conn->WriteBuffer(buffer, ec);
ray::Status status = ray::Status::OK(); ray::Status status = boost_to_ray_status(ec);
if (ec.value() != 0) { if (ec.value() != boost::system::errc::success) {
// Push failed. Deal with partial objects on the receiving end. // Push failed. Deal with partial objects on the receiving end.
// TODO(hme): Try to invoke disconnect on sender connection, then remove it. // TODO(hme): Try to invoke disconnect on sender connection, then remove it.
status = ray::Status::IOError(ec.message());
} }
// Do this regardless of whether it failed or succeeded. // Do this regardless of whether it failed or succeeded.
@ -626,7 +626,7 @@ void ObjectManager::ExecuteReceiveObject(const ClientID &client_id,
buffer.push_back(asio::buffer(chunk_info.data, chunk_info.buffer_length)); buffer.push_back(asio::buffer(chunk_info.data, chunk_info.buffer_length));
boost::system::error_code ec; boost::system::error_code ec;
conn.ReadBuffer(buffer, ec); conn.ReadBuffer(buffer, ec);
if (ec.value() == 0) { if (ec.value() == boost::system::errc::success) {
buffer_pool_.SealChunk(object_id, chunk_index); buffer_pool_.SealChunk(object_id, chunk_index);
} else { } else {
buffer_pool_.AbortCreateChunk(object_id, chunk_index); buffer_pool_.AbortCreateChunk(object_id, chunk_index);
@ -643,8 +643,8 @@ void ObjectManager::ExecuteReceiveObject(const ClientID &client_id,
buffer.push_back(asio::buffer(mutable_vec, buffer_length)); buffer.push_back(asio::buffer(mutable_vec, buffer_length));
boost::system::error_code ec; boost::system::error_code ec;
conn.ReadBuffer(buffer, ec); conn.ReadBuffer(buffer, ec);
if (ec.value() != 0) { if (ec.value() != boost::system::errc::success) {
RAY_LOG(ERROR) << ec.message(); RAY_LOG(ERROR) << boost_to_ray_status(ec).ToString();
} }
// TODO(hme): If the object isn't local, create a pull request for this chunk. // TODO(hme): If the object isn't local, create a pull request for this chunk.
} }

View file

@ -9,6 +9,7 @@
#include "common/common_protocol.h" #include "common/common_protocol.h"
#include "ray/object_manager/object_store_notification_manager.h" #include "ray/object_manager/object_store_notification_manager.h"
#include "ray/util/util.h"
namespace ray { namespace ray {
@ -46,8 +47,8 @@ void ObjectStoreNotificationManager::ProcessStoreLength(
void ObjectStoreNotificationManager::ProcessStoreNotification( void ObjectStoreNotificationManager::ProcessStoreNotification(
const boost::system::error_code &error) { const boost::system::error_code &error) {
if (error) { if (error.value() != boost::system::errc::success) {
RAY_LOG(FATAL) << error.message(); RAY_LOG(FATAL) << boost_to_ray_status(error).ToString();
} }
const auto &object_info = flatbuffers::GetRoot<ObjectInfo>(notification_.data()); const auto &object_info = flatbuffers::GetRoot<ObjectInfo>(notification_.data());

View file

@ -160,12 +160,12 @@ void ReconstructionPolicy::Cancel(const ObjectID &object_id) {
// If there are no more needed objects created by this task, stop listening // If there are no more needed objects created by this task, stop listening
// for notifications. // for notifications.
if (it->second.created_objects.empty()) { if (it->second.created_objects.empty()) {
listening_tasks_.erase(it);
// Cancel notifications for the task lease if we were subscribed to them. // Cancel notifications for the task lease if we were subscribed to them.
if (it->second.subscribed) { if (it->second.subscribed) {
RAY_CHECK_OK( RAY_CHECK_OK(
task_lease_pubsub_.CancelNotifications(JobID::nil(), task_id, client_id_)); task_lease_pubsub_.CancelNotifications(JobID::nil(), task_id, client_id_));
} }
listening_tasks_.erase(it);
} }
} }

View file

@ -26,4 +26,13 @@ inline int64_t current_sys_time_ms() {
return ms_since_epoch.count(); return ms_since_epoch.count();
} }
inline ray::Status boost_to_ray_status(const boost::system::error_code &error) {
switch (error.value()) {
case boost::system::errc::success:
return ray::Status::OK();
default:
return ray::Status::IOError(strerror(error.value()));
}
}
#endif // RAY_UTIL_UTIL_H #endif // RAY_UTIL_UTIL_H