mirror of
https://github.com/vale981/ray
synced 2025-03-08 19:41:38 -05:00
[Core] Put raylet ip's in resource usage report (#13871)
* . * done? Co-authored-by: Alex Wu <alex@anyscale.com>
This commit is contained in:
parent
79310452e7
commit
f14171ced9
3 changed files with 29 additions and 0 deletions
|
@ -7,6 +7,7 @@ import time
|
||||||
|
|
||||||
import ray
|
import ray
|
||||||
import ray.ray_constants
|
import ray.ray_constants
|
||||||
|
import ray.services
|
||||||
import ray.test_utils
|
import ray.test_utils
|
||||||
|
|
||||||
from ray._raylet import GlobalStateAccessor
|
from ray._raylet import GlobalStateAccessor
|
||||||
|
@ -332,6 +333,31 @@ def test_backlog_report(shutdown_only):
|
||||||
global_state_accessor.disconnect()
|
global_state_accessor.disconnect()
|
||||||
|
|
||||||
|
|
||||||
|
def test_heartbeat_ip(shutdown_only):
|
||||||
|
cluster = ray.init(
|
||||||
|
num_cpus=1, _system_config={
|
||||||
|
"report_worker_backlog": True,
|
||||||
|
})
|
||||||
|
global_state_accessor = GlobalStateAccessor(
|
||||||
|
cluster["redis_address"], ray.ray_constants.REDIS_DEFAULT_PASSWORD)
|
||||||
|
global_state_accessor.connect()
|
||||||
|
|
||||||
|
self_ip = ray.services.get_node_ip_address()
|
||||||
|
|
||||||
|
def self_ip_is_set():
|
||||||
|
message = global_state_accessor.get_all_resource_usage()
|
||||||
|
if message is None:
|
||||||
|
return False
|
||||||
|
|
||||||
|
resource_usage = ray.gcs_utils.ResourceUsageBatchData.FromString(
|
||||||
|
message)
|
||||||
|
resources_data = resource_usage.batch[0]
|
||||||
|
return resources_data.node_manager_address == self_ip
|
||||||
|
|
||||||
|
ray.test_utils.wait_for_condition(self_ip_is_set, timeout=2)
|
||||||
|
global_state_accessor.disconnect()
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
import pytest
|
import pytest
|
||||||
import sys
|
import sys
|
||||||
|
|
|
@ -327,6 +327,8 @@ message ResourcesData {
|
||||||
ResourceLoad resource_load_by_shape = 7;
|
ResourceLoad resource_load_by_shape = 7;
|
||||||
// Whether this node manager is requesting global GC.
|
// Whether this node manager is requesting global GC.
|
||||||
bool should_global_gc = 8;
|
bool should_global_gc = 8;
|
||||||
|
// IP address of the node.
|
||||||
|
string node_manager_address = 9;
|
||||||
}
|
}
|
||||||
|
|
||||||
message ResourceUsageBatchData {
|
message ResourceUsageBatchData {
|
||||||
|
|
|
@ -456,6 +456,7 @@ void NodeManager::Heartbeat() {
|
||||||
void NodeManager::ReportResourceUsage() {
|
void NodeManager::ReportResourceUsage() {
|
||||||
auto resources_data = std::make_shared<rpc::ResourcesData>();
|
auto resources_data = std::make_shared<rpc::ResourcesData>();
|
||||||
resources_data->set_node_id(self_node_id_.Binary());
|
resources_data->set_node_id(self_node_id_.Binary());
|
||||||
|
resources_data->set_node_manager_address(initial_config_.node_manager_address);
|
||||||
// Update local chche from gcs remote cache, this is needed when gcs restart.
|
// Update local chche from gcs remote cache, this is needed when gcs restart.
|
||||||
// We should always keep the cache view consistent.
|
// We should always keep the cache view consistent.
|
||||||
cluster_resource_scheduler_->UpdateLastResourceUsage(
|
cluster_resource_scheduler_->UpdateLastResourceUsage(
|
||||||
|
|
Loading…
Add table
Reference in a new issue