Fix resource bookkeeping bug with acquiring unknown resource. (#4945)

This commit is contained in:
Robert Nishihara 2019-06-07 21:07:27 -07:00
parent a218a14c92
commit f6520cb5b1
3 changed files with 26 additions and 9 deletions

View file

@ -1754,7 +1754,7 @@ def test_multi_resource_constraints(shutdown_only):
def g(n):
time.sleep(n)
time_buffer = 0.5
time_buffer = 2
start_time = time.time()
ray.get([f.remote(0.5), g.remote(0.5)])
@ -1878,13 +1878,23 @@ def test_gpu_ids(shutdown_only):
def test_zero_cpus(shutdown_only):
ray.init(num_cpus=0)
# We should be able to execute a task that requires 0 CPU resources.
@ray.remote(num_cpus=0)
def f():
return 1
# The task should be able to execute.
ray.get(f.remote())
# We should be able to create an actor that requires 0 CPU resources.
@ray.remote(num_cpus=0)
class Actor(object):
def method(self):
pass
a = Actor.remote()
x = a.method.remote()
ray.get(x)
def test_zero_cpus_actor(ray_start_cluster):
cluster = ray_start_cluster

View file

@ -1814,9 +1814,9 @@ bool NodeManager::AssignTask(const Task &task) {
cluster_resource_map_[my_client_id].Acquire(spec.GetRequiredResources());
if (spec.IsActorCreationTask()) {
// Check that we are not placing an actor creation task on a node with 0 CPUs.
RAY_CHECK(cluster_resource_map_[my_client_id].GetTotalResources().GetResourceMap().at(
kCPU_ResourceLabel) != 0);
// Check that the actor's placement resource requirements are satisfied.
RAY_CHECK(spec.GetRequiredPlacementResources().IsSubset(
cluster_resource_map_[my_client_id].GetTotalResources()));
worker->SetLifetimeResourceIds(acquired_resources);
} else {
worker->SetTaskResourceIds(acquired_resources);

View file

@ -76,7 +76,11 @@ ResourceSet::ResourceSet() {}
ResourceSet::ResourceSet(
const std::unordered_map<std::string, FractionalResourceQuantity> &resource_map)
: resource_capacity_(resource_map) {}
: resource_capacity_(resource_map) {
for (auto const &resource_pair : resource_map) {
RAY_CHECK(resource_pair.second > 0);
}
}
ResourceSet::ResourceSet(const std::unordered_map<std::string, double> &resource_map) {
for (auto const &resource_pair : resource_map) {
@ -169,7 +173,8 @@ void ResourceSet::SubtractResourcesStrict(const ResourceSet &other) {
const std::string &resource_label = resource_pair.first;
const FractionalResourceQuantity &resource_capacity = resource_pair.second;
RAY_CHECK(resource_capacity_.count(resource_label) == 1)
<< "Attempt to acquire unknown resource: " << resource_label;
<< "Attempt to acquire unknown resource: " << resource_label << " capacity "
<< resource_capacity.ToDouble();
resource_capacity_[resource_label] -= resource_capacity;
// Ensure that quantity is positive. Note, we have to have the check before
@ -233,8 +238,10 @@ FractionalResourceQuantity ResourceSet::GetResource(
const ResourceSet ResourceSet::GetNumCpus() const {
ResourceSet cpu_resource_set;
cpu_resource_set.resource_capacity_[kCPU_ResourceLabel] =
GetResource(kCPU_ResourceLabel);
const FractionalResourceQuantity cpu_quantity = GetResource(kCPU_ResourceLabel);
if (cpu_quantity > 0) {
cpu_resource_set.resource_capacity_[kCPU_ResourceLabel] = cpu_quantity;
}
return cpu_resource_set;
}