!1962 修复llama2-7b训练16步左右oom

Merge pull request !1962 from wucong/fix_oom
This commit is contained in:
wucong 2024-11-28 14:16:08 +00:00 committed by i-robot
parent 7e4c4831e3
commit 3f52981fb0

View File

@ -2,6 +2,7 @@
export CUDA_DEVICE_MAX_CONNECTIONS=1 export CUDA_DEVICE_MAX_CONNECTIONS=1
export NPU_ASD_ENABLE=0 export NPU_ASD_ENABLE=0
export PYTORCH_NPU_ALLOC_CONF=expandable_segments:True
GPUS_PER_NODE=8 GPUS_PER_NODE=8
MASTER_ADDR=localhost MASTER_ADDR=localhost