diff --git a/python/ray/autoscaler/autoscaler.py b/python/ray/autoscaler/autoscaler.py index 8eba80aaf..b30040e73 100644 --- a/python/ray/autoscaler/autoscaler.py +++ b/python/ray/autoscaler/autoscaler.py @@ -158,7 +158,16 @@ class LoadMetrics(object): def update(self, ip, static_resources, dynamic_resources): self.static_resources_by_ip[ip] = static_resources - self.dynamic_resources_by_ip[ip] = dynamic_resources + # We are not guaranteed to have a corresponding dynamic resource for + # every static resource because dynamic resources are based on the + # available resources in the heartbeat, which does not exist if it is + # zero. Thus, we have to update dynamic resources here. + dynamic_resources_update = dynamic_resources.copy() + for resource_name, capacity in static_resources.items(): + if resource_name not in dynamic_resources_update: + dynamic_resources_update[resource_name] = 0.0 + self.dynamic_resources_by_ip[ip] = dynamic_resources_update + now = time.time() if ip not in self.last_used_time_by_ip or \ static_resources != dynamic_resources: diff --git a/python/ray/monitor.py b/python/ray/monitor.py index 49b337c2d..ded86611e 100644 --- a/python/ray/monitor.py +++ b/python/ray/monitor.py @@ -112,7 +112,7 @@ class Monitor(object): for j in range(message.BatchLength()): heartbeat_message = message.Batch(j) - num_resources = heartbeat_message.ResourcesAvailableLabelLength() + num_resources = heartbeat_message.ResourcesTotalLabelLength() static_resources = {} dynamic_resources = {} for i in range(num_resources):