mirror of
https://github.com/vale981/ray
synced 2025-03-05 10:01:43 -05:00
[Test] Fix broken test_base_trainer (#27855)
The test was written incorrectly. This root cause was that the trainer & worker both requires 1 CPU, meaning pg requires {CPU: 1} * 2 resources. And when the max fraction is 0.001, we only allow up to 1 CPU for pg, so we cannot schedule the requested pgs in any case.
This commit is contained in:
parent
a2c168cd6d
commit
d654636bfc
1 changed files with 7 additions and 3 deletions
|
@ -183,9 +183,13 @@ def test_reserved_cpus(ray_start_4_cpus):
|
|||
)
|
||||
tune.run(trainer.as_trainable(), num_samples=4)
|
||||
|
||||
# TODO(ekl/sang) this currently fails.
|
||||
# Check we don't deadlock with too low of a fraction either.
|
||||
scale_config = ScalingConfig(num_workers=1, _max_cpu_fraction_per_node=0.01)
|
||||
# Needs to request 0 CPU for the trainer otherwise the pg
|
||||
# will require {CPU: 1} * 2 resources, which means
|
||||
# _max_cpu_fraction_per_node == 0.01 cannot schedule it
|
||||
# (because this only allows to have 1 CPU for pg per node).
|
||||
scale_config = ScalingConfig(
|
||||
num_workers=1, _max_cpu_fraction_per_node=0.01, trainer_resources={"CPU": 0}
|
||||
)
|
||||
trainer = DummyTrainer(
|
||||
train_loop,
|
||||
scaling_config=scale_config,
|
||||
|
|
Loading…
Add table
Reference in a new issue