diff --git a/fastNLP/core/callbacks/topk_saver.py b/fastNLP/core/callbacks/topk_saver.py index e2b0eb29..1ceea82e 100644 --- a/fastNLP/core/callbacks/topk_saver.py +++ b/fastNLP/core/callbacks/topk_saver.py @@ -31,7 +31,7 @@ class Saver: folder = Path.cwd() folder = Path(folder) if not folder.exists(): - raise NotADirectoryError(f"Path '{folder.absolute()}' is not existed!") + folder.mkdir(parents=True, exist_ok=True) elif folder.is_file(): raise ValueError("Parameter `folder` should be a directory instead of a file.") diff --git a/fastNLP/core/controllers/loops/train_batch_loop.py b/fastNLP/core/controllers/loops/train_batch_loop.py index 7dbe9775..cfb54111 100644 --- a/fastNLP/core/controllers/loops/train_batch_loop.py +++ b/fastNLP/core/controllers/loops/train_batch_loop.py @@ -36,7 +36,8 @@ class TrainBatchLoop(Loop): raise e trainer.on_train_batch_begin(batch, indices) - self.batch_step_fn(trainer, batch) + with trainer.get_no_sync_context(): # 在多卡的时候可能需要关闭 sync + self.batch_step_fn(trainer, batch) trainer.global_forward_batches += 1 trainer.batch_idx_in_epoch += 1 diff --git a/fastNLP/core/controllers/trainer.py b/fastNLP/core/controllers/trainer.py index 779d3d83..3f346b56 100644 --- a/fastNLP/core/controllers/trainer.py +++ b/fastNLP/core/controllers/trainer.py @@ -696,8 +696,9 @@ class Trainer(TrainerEventTrigger): self.on_before_backward(outputs) loss = self.extract_loss_from_outputs(outputs) loss = loss / self.accumulation_steps - with self.get_no_sync_context(): - self.driver.backward(loss) + # with self.get_no_sync_context(): + # self.driver.backward(loss) + self.driver.backward(loss) self.on_after_backward() def zero_grad(self):