mirror of
https://github.com/vale981/ray
synced 2025-03-05 10:01:43 -05:00
[AIR] Split train dataset in HuggingFaceTrainer
(#28170)
https://github.com/ray-project/ray/pull/25428 inadvertently turned off train dataset splitting for the `HuggingFaceTrainer`, which meant it wasn't actually running in a data parallel fashion. This PR fixes that. Signed-off-by: Antoni Baum antoni.baum@protonmail.com
This commit is contained in:
parent
f74f155af4
commit
d7f712d202
1 changed files with 5 additions and 1 deletions
|
@ -266,7 +266,11 @@ class HuggingFaceTrainer(TorchTrainer):
|
||||||
_checkpoint_manager_cls = _DataParallelSyncingCheckpointManager
|
_checkpoint_manager_cls = _DataParallelSyncingCheckpointManager
|
||||||
|
|
||||||
_dataset_config = {
|
_dataset_config = {
|
||||||
"train": DatasetConfig(fit=True, split=False, required=True),
|
# training dataset should be split by us
|
||||||
|
"train": DatasetConfig(fit=True, split=True, required=True),
|
||||||
|
# do not split eval dataset, as HF has a system to parallelize
|
||||||
|
# evaluation across workers, and it requires each worker
|
||||||
|
# to have the full eval dataset
|
||||||
"evaluation": DatasetConfig(split=False),
|
"evaluation": DatasetConfig(split=False),
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Add table
Reference in a new issue