mirror of
https://gitee.com/ascend/ModelLink.git
synced 2024-12-01 19:39:02 +08:00
!1622 权重转换代码位置优化&修复反向依赖
Merge pull request !1622 from sunjunjie/ckpt_position
This commit is contained in:
parent
de65e81113
commit
58e8133311
@ -57,7 +57,7 @@
|
||||
|
||||
|
||||
## 公开接口声明
|
||||
ModelLink 暂时未发布wheel包,无正式对外公开接口,所有功能均通过shell脚本调用。5个入口脚本分别为[pretrain_gpt.py](https://gitee.com/ascend/ModelLink/blob/master/pretrain_gpt.py), [inference.py](https://gitee.com/ascend/ModelLink/blob/master/inference.py), [evaluation.py](https://gitee.com/ascend/ModelLink/blob/master/evaluation.py), [preprocess_data.py](https://gitee.com/ascend/ModelLink/blob/master/preprocess_data.py) 和 [convert_ckpt.py](https://gitee.com/ascend/ModelLink/blob/master/tools/checkpoint/convert_ckpt.py)。
|
||||
ModelLink 暂时未发布wheel包,无正式对外公开接口,所有功能均通过shell脚本调用。5个入口脚本分别为[pretrain_gpt.py](https://gitee.com/ascend/ModelLink/blob/master/pretrain_gpt.py), [inference.py](https://gitee.com/ascend/ModelLink/blob/master/inference.py), [evaluation.py](https://gitee.com/ascend/ModelLink/blob/master/evaluation.py), [preprocess_data.py](https://gitee.com/ascend/ModelLink/blob/master/preprocess_data.py) 和 [convert_ckpt.py](https://gitee.com/ascend/ModelLink/blob/master/convert_ckpt.py)。
|
||||
|
||||
|
||||
## 通信安全加固
|
||||
|
@ -21,14 +21,17 @@ import sys
|
||||
from functools import wraps
|
||||
import torch.multiprocessing as mp
|
||||
import modellink
|
||||
from pretrain_gpt import model_provider
|
||||
|
||||
MODULE_ROOT = "modellink.tasks.checkpoint"
|
||||
|
||||
|
||||
def load_plugin(plugin_type, name):
|
||||
module_name = f"{plugin_type}_{name}"
|
||||
module_name = f"{MODULE_ROOT}.{plugin_type}_{name}"
|
||||
try:
|
||||
plugin = importlib.import_module(module_name)
|
||||
except ModuleNotFoundError:
|
||||
module_name = name
|
||||
module_name = f"{MODULE_ROOT}.{name}"
|
||||
try:
|
||||
plugin = importlib.import_module(module_name)
|
||||
except ModuleNotFoundError:
|
||||
@ -76,11 +79,11 @@ def main():
|
||||
queue = mp.Queue(maxsize=args.max_queue_size)
|
||||
|
||||
print("Starting saver...")
|
||||
saver_proc = mp.Process(target=saver.save_model_checkpoint, args=(queue, args))
|
||||
saver_proc = mp.Process(target=saver.save_model_checkpoint, args=(model_provider, queue, args))
|
||||
saver_proc.start()
|
||||
|
||||
print("Starting loader...")
|
||||
loader.load_checkpoint(queue, args)
|
||||
loader.load_checkpoint(model_provider, queue, args)
|
||||
|
||||
print("Waiting for saver to complete...")
|
||||
saver_proc.join()
|
@ -111,7 +111,7 @@ cd ../../
|
||||
# 请按照您的真实环境修改 set_env.sh 路径
|
||||
source /usr/local/Ascend/ascend-toolkit/set_env.sh
|
||||
|
||||
python tools/checkpoint/convert_ckpt.py \
|
||||
python convert_ckpt.py \
|
||||
--model-type GPT \
|
||||
--loader llama2_hf \
|
||||
--saver megatron \
|
||||
@ -164,7 +164,7 @@ bash examples/llama2/ckpt_convert_llama2_hf2legacy.sh
|
||||
# 请按照您的真实环境修改 set_env.sh 路径
|
||||
source /usr/local/Ascend/ascend-toolkit/set_env.sh
|
||||
|
||||
python tools/checkpoint/convert_ckpt.py \
|
||||
python convert_ckpt.py \
|
||||
--model-type GPT \
|
||||
--loader megatron \
|
||||
--saver megatron \
|
||||
@ -202,7 +202,7 @@ bash examples/llama2/ckpt_convert_llama2_legacy2hf.sh
|
||||
# 请按照您的真实环境修改 set_env.sh 路径
|
||||
source /usr/local/Ascend/ascend-toolkit/set_env.sh
|
||||
|
||||
python tools/checkpoint/convert_ckpt.py \
|
||||
python convert_ckpt.py \
|
||||
--model-type GPT \
|
||||
--loader megatron \
|
||||
--saver megatron \
|
||||
@ -227,7 +227,7 @@ bash examples/llama2/ckpt_convert_llama2_legacy2legacy_lora.sh
|
||||
# 请按照您的真实环境修改 set_env.sh 路径
|
||||
source /usr/local/Ascend/ascend-toolkit/set_env.sh
|
||||
|
||||
python tools/checkpoint/convert_ckpt.py \
|
||||
python convert_ckpt.py \
|
||||
--model-type GPT \
|
||||
--loader megatron \
|
||||
--saver megatron \
|
||||
|
@ -2,7 +2,7 @@
|
||||
source /usr/local/Ascend/ascend-toolkit/set_env.sh
|
||||
|
||||
#选择你需要的并行策略
|
||||
python tools/checkpoint/convert_ckpt.py \
|
||||
python convert_ckpt.py \
|
||||
--model-type GPT \
|
||||
--load-dir ./model_from_hf/Aquila-hf/ \
|
||||
--save-dir ./model_weights/Aquila-legacy/ \
|
||||
|
@ -1,6 +1,6 @@
|
||||
source /usr/local/Ascend/ascend-toolkit/set_env.sh
|
||||
|
||||
python tools/checkpoint/convert_ckpt.py --model-type GPT \
|
||||
python convert_ckpt.py --model-type GPT \
|
||||
--loader megatron \
|
||||
--saver megatron \
|
||||
--save-model-type save_huggingface_llama \
|
||||
|
@ -2,7 +2,7 @@
|
||||
source /usr/local/Ascend/ascend-toolkit/set_env.sh
|
||||
|
||||
#选择你需要的并行策略,--params-dtype bf16 结合需要选择使用
|
||||
python tools/checkpoint/convert_ckpt.py \
|
||||
python convert_ckpt.py \
|
||||
--model-type GPT \
|
||||
--load-dir ./model_from_hf/Aquila2-hf/ \
|
||||
--save-dir ./model_weights/Aquila2-legacy/ \
|
||||
|
@ -2,7 +2,7 @@
|
||||
source /usr/local/Ascend/ascend-toolkit/set_env.sh
|
||||
|
||||
# --params-dtype bf16 结合需要使用
|
||||
python tools/checkpoint/convert_ckpt.py --model-type GPT \
|
||||
python convert_ckpt.py --model-type GPT \
|
||||
--loader megatron \
|
||||
--saver megatron \
|
||||
--save-model-type save_huggingface_llama \
|
||||
|
@ -2,7 +2,7 @@
|
||||
source /usr/local/Ascend/ascend-toolkit/set_env.sh
|
||||
|
||||
# 选择你需要的并行策略,--params-dtype bf16 \ 结合需要使用
|
||||
python tools/checkpoint/convert_ckpt.py \
|
||||
python convert_ckpt.py \
|
||||
--model-type GPT \
|
||||
--loader llama2_hf \
|
||||
--saver megatron \
|
||||
|
@ -2,7 +2,7 @@
|
||||
source /usr/local/Ascend/ascend-toolkit/set_env.sh
|
||||
|
||||
|
||||
python tools/checkpoint/convert_ckpt.py --model-type GPT \
|
||||
python convert_ckpt.py --model-type GPT \
|
||||
--loader megatron \
|
||||
--saver megatron \
|
||||
--save-model-type save_huggingface_llama \
|
||||
|
@ -2,7 +2,7 @@
|
||||
source /usr/local/Ascend/ascend-toolkit/set_env.sh
|
||||
|
||||
# 设置你需要的并行策略
|
||||
python tools/checkpoint/convert_ckpt.py \
|
||||
python convert_ckpt.py \
|
||||
--model-type GPT \
|
||||
--loader llama2_hf \
|
||||
--saver megatron \
|
||||
|
@ -2,7 +2,7 @@
|
||||
source /usr/local/Ascend/ascend-toolkit/set_env.sh
|
||||
|
||||
|
||||
python tools/checkpoint/convert_ckpt.py --model-type GPT \
|
||||
python convert_ckpt.py --model-type GPT \
|
||||
--loader megatron \
|
||||
--saver megatron \
|
||||
--save-model-type save_huggingface_llama \
|
||||
|
@ -2,12 +2,12 @@
|
||||
source /usr/local/Ascend/ascend-toolkit/set_env.sh
|
||||
|
||||
#设置你需要的并行策略,--params-dtype bf16 结合需要使用
|
||||
python tools/checkpoint/convert_ckpt.py \
|
||||
python convert_ckpt.py \
|
||||
--model-type GPT \
|
||||
--loader hf_mcore \
|
||||
--saver mg_mcore \
|
||||
--target-tensor-parallel-size 8 \
|
||||
--target-pipeline-parallel-size 1 \
|
||||
--target-tensor-parallel-size 8 \
|
||||
--target-pipeline-parallel-size 1 \
|
||||
--load-dir --load-dir ./model_from_hf/Bloom-hf/ \
|
||||
--save-dir --save-dir ./model_weights/Bloom-legacy/ \
|
||||
--tokenizer-model None \
|
||||
|
@ -1,17 +1,16 @@
|
||||
# 请按照您的真实环境修改 set_env.sh 路径
|
||||
source /usr/local/Ascend/ascend-toolkit/set_env.sh
|
||||
|
||||
python tools/checkpoint/convert_ckpt.py \
|
||||
python convert_ckpt.py \
|
||||
--model-type GPT \
|
||||
--loader mg_mcore \
|
||||
--saver mg_mcore \
|
||||
--save-model-type huggingface \
|
||||
--target-tensor-parallel-size 1 \
|
||||
--target-pipeline-parallel-size 1 \
|
||||
--save-dir ./model_from_hf/Bloom-hf/ # <-- 需要填入原始HF模型路径,新权重会存于./model_from_hf/Bloom-hf/mg2hg/
|
||||
--load-dir ./model_weights/Bloom-legacy/
|
||||
--target-tensor-parallel-size 1 \
|
||||
--target-pipeline-parallel-size 1 \
|
||||
--model-type-hf bloom \
|
||||
--add-qkv-bias \
|
||||
--add-dense-bias
|
||||
|
||||
--add-dense-bias \
|
||||
--load-dir ./model_weights/Bloom-legacy/ \
|
||||
--save-dir ./model_from_hf/Bloom-hf/ # <-- 需要填入原始HF模型路径,新权重会存于./model_from_hf/Bloom-hf/mg2hg/
|
||||
# --params-dtype bf16 \ 结合需要使用
|
@ -101,7 +101,7 @@ ChatGLM3-6B 训练的硬件配置:
|
||||
source /usr/local/Ascend/ascend-toolkit/set_env.sh
|
||||
|
||||
# 权重格式转换
|
||||
python tools/checkpoint/convert_ckpt.py \
|
||||
python convert_ckpt.py \
|
||||
--model-type GPT \
|
||||
--loader chatglm3_hf \
|
||||
--saver megatron \
|
||||
@ -121,7 +121,7 @@ ChatGLM3-6B 训练的硬件配置:
|
||||
```shell
|
||||
# 请按照您的真实环境修改 set_env.sh 路径
|
||||
source /usr/local/Ascend/ascend-toolkit/set_env.sh
|
||||
python tools/checkpoint/convert_ckpt.py \
|
||||
python convert_ckpt.py \
|
||||
--model-type GPT \
|
||||
--loader megatron \
|
||||
--saver megatron \
|
||||
|
@ -101,7 +101,7 @@ Here's a hardware summary of pre-training ChatGLM3-6B:
|
||||
source /usr/local/Ascend/ascend-toolkit/set_env.sh
|
||||
|
||||
# convert to ptd weights
|
||||
python tools/checkpoint/convert_ckpt.py \
|
||||
python convert_ckpt.py \
|
||||
--model-type GPT \
|
||||
--loader chatglm3_hf \
|
||||
--saver megatron \
|
||||
@ -120,7 +120,7 @@ Here's a hardware summary of pre-training ChatGLM3-6B:
|
||||
```shell
|
||||
# Modify the ascend-toolkit path
|
||||
source /usr/local/Ascend/ascend-toolkit/set_env.sh
|
||||
python tools/checkpoint/convert_ckpt.py \
|
||||
python convert_ckpt.py \
|
||||
--model-type GPT \
|
||||
--loader megatron \
|
||||
--saver megatron \
|
||||
|
@ -2,7 +2,7 @@
|
||||
source /usr/local/Ascend/ascend-toolkit/set_env.sh
|
||||
|
||||
# 权重格式转换
|
||||
python tools/checkpoint/convert_ckpt.py \
|
||||
python convert_ckpt.py \
|
||||
--model-type GPT \
|
||||
--loader hf_mcore \
|
||||
--saver mg_mcore \
|
||||
|
@ -1,7 +1,7 @@
|
||||
# 请按照您的真实环境修改 set_env.sh 路径
|
||||
source /usr/local/Ascend/ascend-toolkit/set_env.sh
|
||||
|
||||
python tools/checkpoint/convert_ckpt.py \
|
||||
python convert_ckpt.py \
|
||||
--model-type GPT \
|
||||
--loader mg_mcore \
|
||||
--saver mg_mcore \
|
||||
|
@ -1,7 +1,7 @@
|
||||
# 修改 ascend-toolkit 路径
|
||||
source /usr/local/Ascend/ascend-toolkit/set_env.sh
|
||||
|
||||
python tools/checkpoint/convert_ckpt.py \
|
||||
python convert_ckpt.py \
|
||||
--model-type GPT \
|
||||
--loader llama2_hf \
|
||||
--saver megatron \
|
||||
|
@ -1,7 +1,7 @@
|
||||
# 请按照您的真实环境修改 set_env.sh 路径
|
||||
source /usr/local/Ascend/ascend-toolkit/set_env.sh
|
||||
|
||||
python tools/checkpoint/convert_ckpt.py --model-type GPT \
|
||||
python convert_ckpt.py --model-type GPT \
|
||||
--loader megatron \
|
||||
--saver megatron \
|
||||
--save-model-type save_huggingface_llama \
|
||||
|
@ -2,7 +2,7 @@
|
||||
source /usr/local/Ascend/ascend-toolkit/set_env.sh
|
||||
|
||||
# 设置并行策略
|
||||
python tools/checkpoint/convert_ckpt.py \
|
||||
python convert_ckpt.py \
|
||||
--model-type-hf gemma \
|
||||
--model-type GPT \
|
||||
--loader hf_mcore \
|
||||
|
@ -1,7 +1,7 @@
|
||||
# 请按照您的真实环境修改 set_env.sh 路径
|
||||
source /usr/local/Ascend/ascend-toolkit/set_env.sh
|
||||
|
||||
python tools/checkpoint/convert_ckpt.py \
|
||||
python convert_ckpt.py \
|
||||
--model-type-hf gemma \
|
||||
--model-type GPT \
|
||||
--loader mg_mcore \
|
||||
|
@ -1,6 +1,6 @@
|
||||
source /usr/local/Ascend/ascend-toolkit/set_env.sh
|
||||
|
||||
python tools/checkpoint/convert_ckpt.py \
|
||||
python convert_ckpt.py \
|
||||
--model-type GPT \
|
||||
--loader llama2_hf \
|
||||
--saver megatron \
|
||||
|
@ -1,7 +1,7 @@
|
||||
# 请按照您的真实环境修改 set_env.sh 路径
|
||||
source /usr/local/Ascend/ascend-toolkit/set_env.sh
|
||||
|
||||
python tools/checkpoint/convert_ckpt.py \
|
||||
python convert_ckpt.py \
|
||||
--model-type GPT \
|
||||
--loader megatron \
|
||||
--saver megatron \
|
||||
|
@ -2,7 +2,7 @@
|
||||
source /usr/local/Ascend/ascend-toolkit/set_env.sh
|
||||
|
||||
# 权重格式转换
|
||||
python tools/checkpoint/convert_ckpt.py \
|
||||
python convert_ckpt.py \
|
||||
--model-type GPT \
|
||||
--loader llama2_hf \
|
||||
--saver megatron \
|
||||
|
@ -1,7 +1,7 @@
|
||||
# 请按照您的真实环境修改 set_env.sh 路径
|
||||
source /usr/local/Ascend/ascend-toolkit/set_env.sh
|
||||
|
||||
python tools/checkpoint/convert_ckpt.py \
|
||||
python convert_ckpt.py \
|
||||
--model-type GPT \
|
||||
--loader megatron \
|
||||
--saver megatron \
|
||||
|
@ -2,7 +2,7 @@
|
||||
source /usr/local/Ascend/ascend-toolkit/set_env.sh
|
||||
|
||||
# 权重格式转换,设置需要的并行配置,--num-layers-per-virtual-pipeline-stage 5,--params-dtype bf16 结合需要使用
|
||||
python tools/checkpoint/convert_ckpt.py \
|
||||
python convert_ckpt.py \
|
||||
--model-type GPT \
|
||||
--loader llama2_hf \
|
||||
--saver megatron \
|
||||
|
@ -2,7 +2,7 @@
|
||||
source /usr/local/Ascend/ascend-toolkit/set_env.sh
|
||||
|
||||
# --num-layers-per-virtual-pipeline-stage 5 \ 结合需要使用
|
||||
python tools/checkpoint/convert_ckpt.py \
|
||||
python convert_ckpt.py \
|
||||
--model-type GPT \
|
||||
--loader megatron \
|
||||
--saver megatron \
|
||||
|
@ -1,7 +1,7 @@
|
||||
# 请按照您的真实环境修改 set_env.sh 路径
|
||||
source /usr/local/Ascend/ascend-toolkit/set_env.sh
|
||||
|
||||
python tools/checkpoint/convert_ckpt.py \
|
||||
python convert_ckpt.py \
|
||||
--model-type GPT \
|
||||
--loader megatron \
|
||||
--saver megatron \
|
||||
|
@ -1,7 +1,7 @@
|
||||
# 请按照您的真实环境修改 set_env.sh 路径
|
||||
source /usr/local/Ascend/ascend-toolkit/set_env.sh
|
||||
|
||||
python tools/checkpoint/convert_ckpt.py \
|
||||
python convert_ckpt.py \
|
||||
--model-type GPT \
|
||||
--loader megatron \
|
||||
--saver megatron \
|
||||
|
@ -2,7 +2,7 @@
|
||||
source /usr/local/Ascend/ascend-toolkit/set_env.sh
|
||||
|
||||
# 权重格式转换
|
||||
python tools/checkpoint/convert_ckpt.py \
|
||||
python convert_ckpt.py \
|
||||
--model-type GPT \
|
||||
--loader llama2_hf \
|
||||
--saver megatron \
|
||||
|
@ -1,7 +1,7 @@
|
||||
# 请按照您的真实环境修改 set_env.sh 路径
|
||||
source /usr/local/Ascend/ascend-toolkit/set_env.sh
|
||||
|
||||
python tools/checkpoint/convert_ckpt.py \
|
||||
python convert_ckpt.py \
|
||||
--model-type GPT \
|
||||
--loader megatron \
|
||||
--saver megatron \
|
||||
|
@ -2,7 +2,7 @@
|
||||
source /usr/local/Ascend/ascend-toolkit/set_env.sh
|
||||
|
||||
# 权重格式转换
|
||||
python tools/checkpoint/convert_ckpt.py \
|
||||
python convert_ckpt.py \
|
||||
--model-type GPT \
|
||||
--loader hf_mcore \
|
||||
--saver mg_mcore \
|
||||
|
@ -2,7 +2,7 @@
|
||||
source /usr/local/Ascend/ascend-toolkit/set_env.sh
|
||||
|
||||
# 权重格式转换
|
||||
python tools/checkpoint/convert_ckpt.py \
|
||||
python convert_ckpt.py \
|
||||
--use-mcore-models \
|
||||
--model-type GPT \
|
||||
--model-type-hf llama2 \
|
||||
|
@ -1,7 +1,7 @@
|
||||
# 请按照您的真实环境修改 set_env.sh 路径
|
||||
source /usr/local/Ascend/ascend-toolkit/set_env.sh
|
||||
|
||||
python tools/checkpoint/convert_ckpt.py \
|
||||
python convert_ckpt.py \
|
||||
--use-mcore-models \
|
||||
--model-type-hf llama2 \
|
||||
--save-model-type huggingface \
|
||||
|
@ -2,7 +2,7 @@
|
||||
source /usr/local/Ascend/ascend-toolkit/set_env.sh
|
||||
|
||||
# 权重格式转换,设置需要的并行策略
|
||||
python tools/checkpoint/convert_ckpt.py \
|
||||
python convert_ckpt.py \
|
||||
--use-mcore-models \
|
||||
--moe-grouped-gemm \
|
||||
--model-type-hf deepseek2 \
|
||||
|
@ -2,7 +2,7 @@
|
||||
source /usr/local/Ascend/ascend-toolkit/set_env.sh
|
||||
|
||||
# 权重格式转换
|
||||
python tools/checkpoint/convert_ckpt.py \
|
||||
python convert_ckpt.py \
|
||||
--use-mcore-models \
|
||||
--moe-grouped-gemm \
|
||||
--model-type-hf deepseek2 \
|
||||
|
@ -2,7 +2,7 @@
|
||||
source /usr/local/Ascend/ascend-toolkit/set_env.sh
|
||||
|
||||
# 设置并行策略
|
||||
python tools/checkpoint/convert_ckpt.py \
|
||||
python convert_ckpt.py \
|
||||
--use-mcore-models \
|
||||
--model-type-hf gemma \
|
||||
--model-type GPT \
|
||||
|
@ -2,7 +2,7 @@
|
||||
source /usr/local/Ascend/ascend-toolkit/set_env.sh
|
||||
|
||||
# 设置并行策略
|
||||
python tools/checkpoint/convert_ckpt.py \
|
||||
python convert_ckpt.py \
|
||||
--use-mcore-models \
|
||||
--model-type-hf gemma \
|
||||
--model-type GPT \
|
||||
|
@ -2,7 +2,7 @@
|
||||
source /usr/local/Ascend/ascend-toolkit/set_env.sh
|
||||
|
||||
# 权重格式转换
|
||||
python tools/checkpoint/convert_ckpt.py \
|
||||
python convert_ckpt.py \
|
||||
--use-mcore-models \
|
||||
--model-type-hf gemma2 \
|
||||
--model-type GPT \
|
||||
|
@ -2,7 +2,7 @@
|
||||
source /usr/local/Ascend/ascend-toolkit/set_env.sh
|
||||
|
||||
# 权重格式转换
|
||||
python tools/checkpoint/convert_ckpt.py \
|
||||
python convert_ckpt.py \
|
||||
--use-mcore-models \
|
||||
--model-type-hf internlm2 \
|
||||
--model-type GPT \
|
||||
|
@ -2,7 +2,7 @@
|
||||
source /usr/local/Ascend/ascend-toolkit/set_env.sh
|
||||
|
||||
# 权重格式转换,设置需要的并行配置,--num-layers-per-virtual-pipeline-stage 5,--params-dtype bf16 结合需要使用
|
||||
python tools/checkpoint/convert_ckpt.py \
|
||||
python convert_ckpt.py \
|
||||
--model-type GPT \
|
||||
--loader hf_mcore \
|
||||
--saver mg_mcore \
|
||||
|
@ -2,7 +2,7 @@
|
||||
source /usr/local/Ascend/ascend-toolkit/set_env.sh
|
||||
|
||||
# 权重格式转换
|
||||
python tools/checkpoint/convert_ckpt.py \
|
||||
python convert_ckpt.py \
|
||||
--use-mcore-models \
|
||||
--model-type-hf llama2 \
|
||||
--model-type GPT \
|
||||
|
@ -2,7 +2,7 @@
|
||||
source /usr/local/Ascend/ascend-toolkit/set_env.sh
|
||||
|
||||
# 设置你需要的并行参数
|
||||
python tools/checkpoint/convert_ckpt.py \
|
||||
python convert_ckpt.py \
|
||||
--model-type GPT \
|
||||
--loader hf_mcore \
|
||||
--saver mg_mcore \
|
||||
|
@ -4,7 +4,7 @@ export CUDA_DEVICE_MAX_CONNECTIONS=1
|
||||
source /usr/local/Ascend/ascend-toolkit/set_env.sh
|
||||
|
||||
# 设置需要的并行配置
|
||||
python tools/checkpoint/convert_ckpt.py \
|
||||
python convert_ckpt.py \
|
||||
--model-type GPT \
|
||||
--loader hf_mcore \
|
||||
--saver mg_mcore \
|
||||
|
@ -1,7 +1,7 @@
|
||||
# 修改 ascend-toolkit 路径
|
||||
source /usr/local/Ascend/ascend-toolkit/set_env.sh
|
||||
|
||||
python tools/checkpoint/convert_ckpt.py \
|
||||
python convert_ckpt.py \
|
||||
--model-type GPT \
|
||||
--loader hf_mcore \
|
||||
--saver mg_mcore \
|
||||
|
@ -2,7 +2,7 @@
|
||||
source /usr/local/Ascend/ascend-toolkit/set_env.sh
|
||||
|
||||
# 设置需要的权重转换参数
|
||||
python tools/checkpoint/convert_ckpt.py \
|
||||
python convert_ckpt.py \
|
||||
--use-mcore-models \
|
||||
--model-type GPT \
|
||||
--loader hf_mcore \
|
||||
|
@ -1,7 +1,7 @@
|
||||
# 请按照您的真实环境修改 set_env.sh 路径
|
||||
source /usr/local/Ascend/ascend-toolkit/set_env.sh
|
||||
|
||||
python tools/checkpoint/convert_ckpt.py \
|
||||
python convert_ckpt.py \
|
||||
--use-mcore-models \
|
||||
--model-type-hf llama2 \
|
||||
--model-type GPT \
|
||||
|
@ -2,7 +2,7 @@
|
||||
source /usr/local/Ascend/ascend-toolkit/set_env.sh
|
||||
|
||||
# 权重格式转换
|
||||
python tools/checkpoint/convert_ckpt.py \
|
||||
python convert_ckpt.py \
|
||||
--use-mcore-models \
|
||||
--model-type-hf llama2 \
|
||||
--save-model-type huggingface \
|
||||
|
@ -2,7 +2,7 @@
|
||||
source /usr/local/Ascend/ascend-toolkit/set_env.sh
|
||||
|
||||
# 设置你需要的并行参数
|
||||
python tools/checkpoint/convert_ckpt.py \
|
||||
python convert_ckpt.py \
|
||||
--model-type GPT \
|
||||
--loader llama2_hf \
|
||||
--saver megatron \
|
||||
|
@ -1,7 +1,7 @@
|
||||
# 修改 ascend-toolkit 路径
|
||||
source /usr/local/Ascend/ascend-toolkit/set_env.sh
|
||||
|
||||
python tools/checkpoint/convert_ckpt.py \
|
||||
python convert_ckpt.py \
|
||||
--target-tensor-parallel-size 1 \
|
||||
--target-pipeline-parallel-size 1 \
|
||||
--model-type GPT \
|
||||
|
@ -6,7 +6,7 @@
|
||||
# 修改 ascend-toolkit 路径
|
||||
source /usr/local/Ascend/ascend-toolkit/set_env.sh
|
||||
|
||||
python tools/checkpoint/convert_ckpt.py \
|
||||
python convert_ckpt.py \
|
||||
--model-type GPT \
|
||||
--loader hf_mcore \
|
||||
--saver mg_mcore \
|
||||
|
@ -1,4 +1,4 @@
|
||||
# 修改modelling_qwen.py文件第39行,将:
|
||||
# 修改modellink_qwen.py文件第39行,将:
|
||||
# SUPPORT_FP16 = SUPPORT_CUDA and torch.cuda.get_device_capability(0)[0] >= 7
|
||||
# 修改为:
|
||||
# SUPPORT_FP16 = True
|
||||
@ -6,14 +6,14 @@
|
||||
# 请按照您的真实环境修改 set_env.sh 路径
|
||||
source /usr/local/Ascend/ascend-toolkit/set_env.sh
|
||||
|
||||
python tools/checkpoint/convert_ckpt.py \
|
||||
python convert_ckpt.py \
|
||||
--model-type GPT \
|
||||
--loader mg_mcore \
|
||||
--saver mg_mcore \
|
||||
--save-model-type huggingface \
|
||||
--target-tensor-parallel-size 1 \
|
||||
--target-pipeline-parallel-size 1 \
|
||||
--save-dir ./model_from_hf/qwen-hf/ \ # 需要填入原始HF模型路径,新权重会存于./model_from_hf/qwen-hf/mg2hg/
|
||||
--load-dir ./model_weights/qwen-legacy/ \
|
||||
--model-type-hf qwen \
|
||||
--add-qkv-bias
|
||||
--add-qkv-bias \
|
||||
--load-dir ./model_weights/qwen-legacy/ \
|
||||
--save-dir ./model_from_hf/qwen-hf/ # 需要填入原始HF模型路径,新权重会存于./model_from_hf/qwen-hf/mg2hg/
|
@ -2,7 +2,7 @@
|
||||
source /usr/local/Ascend/ascend-toolkit/set_env.sh
|
||||
|
||||
# 权重格式转换
|
||||
python tools/checkpoint/convert_ckpt.py \
|
||||
python convert_ckpt.py \
|
||||
--model-type GPT \
|
||||
--loader llama2_hf \
|
||||
--saver megatron \
|
||||
|
@ -1,7 +1,7 @@
|
||||
# 请按照您的真实环境修改 set_env.sh 路径
|
||||
source /usr/local/Ascend/ascend-toolkit/set_env.sh
|
||||
|
||||
python tools/checkpoint/convert_ckpt.py \
|
||||
python convert_ckpt.py \
|
||||
--model-type GPT \
|
||||
--loader megatron \
|
||||
--saver megatron \
|
||||
|
@ -1,7 +1,7 @@
|
||||
# 修改 ascend-toolkit 路径
|
||||
source /usr/local/Ascend/ascend-toolkit/set_env.sh
|
||||
|
||||
python tools/checkpoint/convert_ckpt.py \
|
||||
python convert_ckpt.py \
|
||||
--model-type GPT \
|
||||
--loader llama2_hf \
|
||||
--saver megatron \
|
||||
|
@ -1,7 +1,7 @@
|
||||
# 请按照您的真实环境修改 set_env.sh 路径
|
||||
source /usr/local/Ascend/ascend-toolkit/set_env.sh
|
||||
|
||||
python tools/checkpoint/convert_ckpt.py --model-type GPT \
|
||||
python convert_ckpt.py --model-type GPT \
|
||||
--loader megatron \
|
||||
--saver megatron \
|
||||
--save-model-type save_huggingface_llama \
|
||||
|
14
modellink/tasks/checkpoint/__init__.py
Normal file
14
modellink/tasks/checkpoint/__init__.py
Normal file
@ -0,0 +1,14 @@
|
||||
# coding=utf-8
|
||||
# Copyright (c) 2024, HUAWEI CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
@ -19,8 +19,8 @@ import types
|
||||
import logging as logger
|
||||
import torch
|
||||
import transformers
|
||||
from models import get_megatron_model
|
||||
from models import get_huggingface_model
|
||||
from .models import get_megatron_model
|
||||
from .models import get_huggingface_model
|
||||
|
||||
logger.basicConfig(format="")
|
||||
logger.getLogger().setLevel(logger.INFO)
|
||||
@ -300,7 +300,7 @@ def get_message_output_layer(model, md):
|
||||
return message
|
||||
|
||||
|
||||
def _load_checkpoint(queue, args):
|
||||
def _load_checkpoint(model_provider, queue, args):
|
||||
# Llama-2 requires HF transformers >=4.31.0.
|
||||
verify_transformers_version()
|
||||
|
||||
@ -316,7 +316,7 @@ def _load_checkpoint(queue, args):
|
||||
args_hf = model_hf.get_args()
|
||||
args_hf.moe_grouped_gemm = args.moe_grouped_gemm
|
||||
|
||||
model_mg = get_megatron_model(args_cmd=args)
|
||||
model_mg = get_megatron_model(model_provider, args_cmd=args)
|
||||
model_mg.initialize_megatron_args(args_hf, queue)
|
||||
|
||||
model_mg.set_tensor_model_parallel_world_size(model_mg.args.tensor_model_parallel_size)
|
||||
@ -366,9 +366,9 @@ def _load_checkpoint(queue, args):
|
||||
queue.put("done")
|
||||
|
||||
|
||||
def load_checkpoint(queue, args):
|
||||
def load_checkpoint(model_provider, queue, args):
|
||||
try:
|
||||
_load_checkpoint(queue, args)
|
||||
_load_checkpoint(model_provider, queue, args)
|
||||
except:
|
||||
queue.put("exit")
|
||||
raise
|
@ -178,10 +178,9 @@ def set_layer_state(args, model, hf_model, layer_idx):
|
||||
layer.post_attention_norm.weight.data.copy_(hf_layer.post_attention_layernorm.weight)
|
||||
|
||||
|
||||
def load_checkpoint_to_model(args):
|
||||
def load_checkpoint_to_model(model_provider, args):
|
||||
'''Set model params.'''
|
||||
|
||||
from pretrain_gpt import model_provider
|
||||
from transformers import AutoModelForCausalLM
|
||||
|
||||
# Load Huggingface model.
|
||||
@ -199,7 +198,7 @@ def load_checkpoint_to_model(args):
|
||||
return model
|
||||
|
||||
|
||||
def _load_checkpoint(queue, args):
|
||||
def _load_checkpoint(model_provider, queue, args):
|
||||
# Llama-2 requires HF transformers >=4.31.0.
|
||||
verify_transformers_version()
|
||||
|
||||
@ -329,7 +328,7 @@ def _load_checkpoint(queue, args):
|
||||
# Get first pipe stage.
|
||||
mpu.set_tensor_model_parallel_rank(0)
|
||||
mpu.set_pipeline_model_parallel_rank(0)
|
||||
model = load_checkpoint_to_model(margs)
|
||||
model = load_checkpoint_to_model(model_provider, margs)
|
||||
|
||||
queue.put(md)
|
||||
|
||||
@ -433,9 +432,9 @@ def _load_checkpoint(queue, args):
|
||||
queue.put("done")
|
||||
|
||||
|
||||
def load_checkpoint(queue, args):
|
||||
def load_checkpoint(model_provider, queue, args):
|
||||
try:
|
||||
_load_checkpoint(queue, args)
|
||||
_load_checkpoint(model_provider, queue, args)
|
||||
except:
|
||||
queue.put("exit")
|
||||
raise
|
@ -133,10 +133,8 @@ def _load_checkpoint(queue, args):
|
||||
|
||||
# Determine how to make our models
|
||||
if args.model_type == 'GPT':
|
||||
from pretrain_gpt import model_provider
|
||||
margs.model_type = ModelType.encoder_or_decoder
|
||||
elif args.model_type == 'BERT':
|
||||
from pretrain_bert import model_provider
|
||||
margs.model_type = ModelType.encoder_or_decoder
|
||||
else:
|
||||
raise Exception(f'unrecognized model type: {args.model_type}')
|
||||
@ -402,9 +400,9 @@ def _load_checkpoint(queue, args):
|
||||
queue.put("done")
|
||||
|
||||
|
||||
def load_checkpoint(queue, args):
|
||||
def load_checkpoint(model_provider, queue, args):
|
||||
try:
|
||||
_load_checkpoint(queue, args)
|
||||
_load_checkpoint(model_provider, queue, args)
|
||||
except:
|
||||
queue.put("exit")
|
||||
raise
|
@ -7,7 +7,7 @@ import sys
|
||||
import types
|
||||
import logging as logger
|
||||
import torch
|
||||
from models import get_megatron_model
|
||||
from .models import get_megatron_model
|
||||
|
||||
logger.basicConfig(format="")
|
||||
logger.getLogger().setLevel(logger.INFO)
|
||||
@ -315,7 +315,7 @@ def to_detach(message):
|
||||
message[key] = value.detach()
|
||||
|
||||
|
||||
def _load_checkpoint(queue, args):
|
||||
def _load_checkpoint(model_provider, queue, args):
|
||||
|
||||
# Search in directory above this
|
||||
sys.path.append(os.path.abspath(
|
||||
@ -324,7 +324,7 @@ def _load_checkpoint(queue, args):
|
||||
if args.megatron_path is not None:
|
||||
sys.path.insert(0, args.megatron_path)
|
||||
|
||||
model_mg = get_megatron_model(args_cmd=args)
|
||||
model_mg = get_megatron_model(model_provider, args_cmd=args)
|
||||
model_mg.initialize_megatron_args(queue=queue, loader_megatron=True)
|
||||
|
||||
model_mg.set_tensor_model_parallel_world_size(model_mg.args.tensor_model_parallel_size)
|
||||
@ -384,9 +384,9 @@ def _load_checkpoint(queue, args):
|
||||
queue.put("done")
|
||||
|
||||
|
||||
def load_checkpoint(queue, args):
|
||||
def load_checkpoint(model_provider, queue, args):
|
||||
try:
|
||||
_load_checkpoint(queue, args)
|
||||
_load_checkpoint(model_provider, queue, args)
|
||||
except:
|
||||
queue.put("exit")
|
||||
raise
|
@ -18,7 +18,6 @@ from megatron.training.checkpointing import load_args_from_checkpoint
|
||||
from megatron.training.global_vars import set_args
|
||||
from megatron.training.checkpointing import load_checkpoint
|
||||
from megatron.core import tensor_parallel
|
||||
from pretrain_gpt import model_provider
|
||||
from modellink.utils import parse_args
|
||||
from modellink.training import model_provider_func_wrapper
|
||||
from modellink.checkpointing import load_checkpoint_wrapper
|
||||
@ -26,7 +25,6 @@ from modellink.checkpointing import load_checkpoint_wrapper
|
||||
logger.basicConfig(format="")
|
||||
logger.getLogger().setLevel(logger.INFO)
|
||||
|
||||
model_provider = model_provider_func_wrapper(model_provider)
|
||||
load_checkpoint = load_checkpoint_wrapper(load_checkpoint)
|
||||
|
||||
|
||||
@ -381,7 +379,7 @@ class HuggingfaceModel(ModelBase):
|
||||
else:
|
||||
load_dir = self.args_cmd.load_dir
|
||||
self.module = [AutoModelForCausalLM.from_pretrained(load_dir, device_map=device_map, trust_remote_code=trust_remote_code)]
|
||||
if self.args.torch_dtype in ["float16", "bfloat16"]:
|
||||
if hasattr(self.args, "torch_dtype") and self.args.torch_dtype in ["float16", "bfloat16"]:
|
||||
self.module[0] = self.module[0].to(eval(f'torch.{self.args.torch_dtype}'))
|
||||
|
||||
def get_module_mapping(self):
|
||||
@ -639,8 +637,9 @@ class HuggingfaceModel(ModelBase):
|
||||
|
||||
|
||||
class MegatronModel(ModelBase):
|
||||
def __init__(self, args_cmd, md=None):
|
||||
def __init__(self, model_provider, args_cmd, md=None):
|
||||
super(MegatronModel, self).__init__(args_cmd)
|
||||
self.model_provider = model_provider_func_wrapper(model_provider)
|
||||
self.md = md
|
||||
self.pp_stage_cache = []
|
||||
|
||||
@ -846,7 +845,7 @@ class MegatronModel(ModelBase):
|
||||
pre_process = mpu.is_pipeline_first_stage()
|
||||
post_process = mpu.is_pipeline_last_stage()
|
||||
expert_parallel_size = mpu.get_expert_model_parallel_world_size()
|
||||
this_model = model_provider(
|
||||
this_model = self.model_provider(
|
||||
pre_process=pre_process,
|
||||
post_process=post_process
|
||||
).to(self.args.params_dtype)
|
||||
@ -854,7 +853,7 @@ class MegatronModel(ModelBase):
|
||||
else:
|
||||
pre_process = mpu.is_pipeline_first_stage()
|
||||
post_process = mpu.is_pipeline_last_stage()
|
||||
model_ = [model_provider(pre_process, post_process).to(self.args.params_dtype)]
|
||||
model_ = [self.model_provider(pre_process, post_process).to(self.args.params_dtype)]
|
||||
self.args.consumed_train_samples = 0
|
||||
self.args.consumed_valid_samples = 0
|
||||
if from_pretrained:
|
||||
@ -1015,8 +1014,8 @@ class MegatronModel(ModelBase):
|
||||
|
||||
|
||||
class MegatronLegacyModel(MegatronModel):
|
||||
def __init__(self, args_cmd, md=None):
|
||||
super(MegatronLegacyModel, self).__init__(args_cmd, md)
|
||||
def __init__(self, model_provider, args_cmd, md=None):
|
||||
super(MegatronLegacyModel, self).__init__(model_provider, args_cmd, md)
|
||||
|
||||
def get_module_mapping(self):
|
||||
module_layer = "language_model.encoder.layers[layer_idx]."
|
||||
@ -1042,8 +1041,8 @@ class MegatronLegacyModel(MegatronModel):
|
||||
|
||||
|
||||
class MegatronMCoreModel(MegatronModel):
|
||||
def __init__(self, args_cmd, md=None):
|
||||
super(MegatronMCoreModel, self).__init__(args_cmd, md)
|
||||
def __init__(self, model_provider, args_cmd, md=None):
|
||||
super(MegatronMCoreModel, self).__init__(model_provider, args_cmd, md)
|
||||
|
||||
def get_module_mapping(self):
|
||||
module_layer = "decoder.layers[layer_idx]."
|
||||
@ -1098,11 +1097,11 @@ class MegatronMCoreModel(MegatronModel):
|
||||
"layers_mlp_experts_weight2"] = module_layer + "mlp.experts.weight2"
|
||||
|
||||
|
||||
def get_megatron_model(args_cmd, md=None):
|
||||
def get_megatron_model(model_provider, args_cmd, md=None):
|
||||
if args_cmd.use_mcore_models:
|
||||
return MegatronMCoreModel(args_cmd=args_cmd, md=md)
|
||||
return MegatronMCoreModel(model_provider, args_cmd=args_cmd, md=md)
|
||||
else:
|
||||
return MegatronLegacyModel(args_cmd=args_cmd, md=md)
|
||||
return MegatronLegacyModel(model_provider, args_cmd=args_cmd, md=md)
|
||||
|
||||
|
||||
def get_huggingface_model(args_cmd):
|
@ -344,7 +344,7 @@ def vocab_padding(md, margs, orig_tensor, _vocab_size_with_padding):
|
||||
return full_word_embed
|
||||
|
||||
|
||||
def save_model_checkpoint(queue, args):
|
||||
def save_model_checkpoint(model_provider, queue, args):
|
||||
|
||||
# Search in directory above this
|
||||
sys.path.append(os.path.abspath(
|
||||
@ -503,10 +503,8 @@ def save_model_checkpoint(queue, args):
|
||||
|
||||
# Determine how to make our models
|
||||
if md.model_type == 'GPT':
|
||||
from pretrain_gpt import model_provider
|
||||
margs.model_type = ModelType.encoder_or_decoder
|
||||
elif md.model_type == 'BERT':
|
||||
from pretrain_bert import model_provider
|
||||
margs.model_type = ModelType.encoder_or_decoder
|
||||
else:
|
||||
raise Exception(f'unrecognized model type: {args.model_type}')
|
@ -20,7 +20,7 @@ import logging as logger
|
||||
import torch
|
||||
from megatron.training.checkpointing import save_checkpoint
|
||||
from megatron.core import mpu
|
||||
from models import get_megatron_model
|
||||
from .models import get_megatron_model
|
||||
|
||||
logger.basicConfig(format="")
|
||||
logger.getLogger().setLevel(logger.INFO)
|
||||
@ -416,7 +416,7 @@ def save_model(model_mg, md, **kwargs):
|
||||
|
||||
def save_huggingface(args, model):
|
||||
'''Set model params.'''
|
||||
from models import get_huggingface_model
|
||||
from .models import get_huggingface_model
|
||||
model_hf = get_huggingface_model(args)
|
||||
model_hf.get_modules_from_pretrained()
|
||||
args_cmd = model_hf.get_args_cmd()
|
||||
@ -428,7 +428,7 @@ def save_huggingface(args, model):
|
||||
model_hf.get_model_item().save_pretrained(save_dir)
|
||||
|
||||
|
||||
def save_model_checkpoint(queue, args):
|
||||
def save_model_checkpoint(model_provider, queue, args):
|
||||
# Search in directory above this
|
||||
sys.path.append(os.path.abspath(
|
||||
os.path.join(os.path.dirname(__file__),
|
||||
@ -470,7 +470,7 @@ def save_model_checkpoint(queue, args):
|
||||
os.environ["WORLD_SIZE"] = f'{args.target_tensor_parallel_size * args.target_pipeline_parallel_size}'
|
||||
|
||||
# We want all arguments to come from us
|
||||
model_mg = get_megatron_model(args_cmd=args, md=md)
|
||||
model_mg = get_megatron_model(model_provider=model_provider, args_cmd=args, md=md)
|
||||
model_mg.initialize_megatron_args(queue=queue, saver_megatron=True)
|
||||
|
||||
# Make models for first pipeline stage and fill in embeddings
|
@ -30,7 +30,7 @@ class TestConvertCkptFromHuggingface(unittest.TestCase):
|
||||
to compose the unique transformer layer and all these layer stack to compose the entity of the model.
|
||||
"""
|
||||
base_dir = Path(__file__).absolute().parent.parent.parent.parent
|
||||
file_path = os.path.join(base_dir, "tools/checkpoint/convert_ckpt.py")
|
||||
file_path = os.path.join(base_dir, "convert_ckpt.py")
|
||||
arguments = sys.argv[1:]
|
||||
subprocess.run(["python", file_path] + arguments)
|
||||
output_dir = os.path.join(sys.argv[6], "iter_0000001")
|
||||
|
@ -30,7 +30,7 @@ class TestConvertCkptFromHuggingface(unittest.TestCase):
|
||||
to compose the unique transformer layer and all these layer stack to compose the entity of the model.
|
||||
"""
|
||||
base_dir = Path(__file__).absolute().parent.parent.parent.parent
|
||||
file_path = os.path.join(base_dir, "tools/checkpoint/convert_ckpt.py")
|
||||
file_path = os.path.join(base_dir, "convert_ckpt.py")
|
||||
arguments = sys.argv[1:]
|
||||
subprocess.run(["python", file_path] + arguments)
|
||||
output_dir = os.path.join(sys.argv[12], "iter_0000001")
|
||||
|
@ -30,7 +30,7 @@ class TestConvertCkptFromHuggingface(unittest.TestCase):
|
||||
to compose the unique transformer layer and all these layer stack to compose the entity of the model.
|
||||
"""
|
||||
base_dir = Path(__file__).absolute().parent.parent.parent.parent
|
||||
file_path = os.path.join(base_dir, "tools/checkpoint/convert_ckpt.py")
|
||||
file_path = os.path.join(base_dir, "convert_ckpt.py")
|
||||
arguments = sys.argv[1:]
|
||||
subprocess.run(["python", file_path] + arguments)
|
||||
output_dir = os.path.join(sys.argv[12], "iter_0000001")
|
||||
|
@ -23,7 +23,7 @@ class TestConvertCkptFromHuggingface(unittest.TestCase):
|
||||
to compose the unique transformer layer and all these layer stack to compose the entity of the model.
|
||||
"""
|
||||
base_dir = Path(__file__).absolute().parent.parent.parent.parent
|
||||
file_path = os.path.join(base_dir, "tools/checkpoint/convert_ckpt.py")
|
||||
file_path = os.path.join(base_dir, "convert_ckpt.py")
|
||||
arguments = sys.argv[1:]
|
||||
subprocess.run(["python", file_path] + arguments)
|
||||
output_dir = os.path.join(sys.argv[10], "iter_0000001")
|
||||
|
@ -37,7 +37,7 @@ class TestConvertCkptFromHuggingface(unittest.TestCase):
|
||||
"""
|
||||
# run convert weight
|
||||
base_dir = Path(__file__).absolute().parent.parent.parent.parent
|
||||
file_path = os.path.join(base_dir, "tools/checkpoint/convert_ckpt.py")
|
||||
file_path = os.path.join(base_dir, "convert_ckpt.py")
|
||||
arguments = sys.argv[1:]
|
||||
subprocess.run(["python", file_path] + arguments)
|
||||
|
||||
|
@ -31,7 +31,7 @@ class TestConvertCkptFromHuggingface(unittest.TestCase):
|
||||
to compose the unique transformer layer and all these layer stack to compose the entity of the model.
|
||||
"""
|
||||
base_dir = Path(__file__).absolute().parent.parent.parent.parent
|
||||
file_path = os.path.join(base_dir, "tools/checkpoint/convert_ckpt.py")
|
||||
file_path = os.path.join(base_dir, "convert_ckpt.py")
|
||||
arguments = sys.argv[1:]
|
||||
subprocess.run(["python", file_path] + arguments)
|
||||
output_dir = os.path.join(self.config.convert_ckpt_param[9], "iter_0000001")
|
||||
|
@ -30,7 +30,7 @@ class TestConvertCkptFromHuggingface(unittest.TestCase):
|
||||
to compose the unique transformer layer and all these layer stack to compose the entity of the model.
|
||||
"""
|
||||
base_dir = Path(__file__).absolute().parent.parent.parent.parent
|
||||
file_path = os.path.join(base_dir, "tools/checkpoint/convert_ckpt.py")
|
||||
file_path = os.path.join(base_dir, "convert_ckpt.py")
|
||||
arguments = sys.argv[1:]
|
||||
subprocess.run(["python", file_path] + arguments)
|
||||
output_dir = os.path.join(self.config.convert_ckpt_param[11], "iter_0000001")
|
||||
|
@ -31,7 +31,7 @@ class TestConvertCkptFromHuggingface(unittest.TestCase):
|
||||
to compose the unique transformer layer and all these layer stack to compose the entity of the model.
|
||||
"""
|
||||
base_dir = Path(__file__).absolute().parent.parent.parent.parent
|
||||
file_path = os.path.join(base_dir, "tools/checkpoint/convert_ckpt.py")
|
||||
file_path = os.path.join(base_dir, "convert_ckpt.py")
|
||||
arguments = sys.argv[1:]
|
||||
subprocess.run(["python", file_path] + arguments)
|
||||
output_dir = os.path.join(self.config.convert_ckpt_param[11], "iter_0000001")
|
||||
|
@ -30,7 +30,7 @@ class TestConvertCkptFromHuggingface(unittest.TestCase):
|
||||
to compose the unique transformer layer and all these layer stack to compose the entity of the model.
|
||||
"""
|
||||
base_dir = Path(__file__).absolute().parent.parent.parent.parent
|
||||
file_path = os.path.join(base_dir, "tools/checkpoint/convert_ckpt.py")
|
||||
file_path = os.path.join(base_dir, "convert_ckpt.py")
|
||||
arguments = sys.argv[1:]
|
||||
subprocess.run(["python", file_path] + arguments)
|
||||
output_dir = os.path.join(self.config.convert_ckpt_param[11], "iter_0000001")
|
||||
|
@ -31,7 +31,7 @@ class TestConvertCkptFromHuggingface(unittest.TestCase):
|
||||
to compose the unique transformer layer and all these layer stack to compose the entity of the model.
|
||||
"""
|
||||
base_dir = Path(__file__).absolute().parent.parent.parent.parent
|
||||
file_path = os.path.join(base_dir, "tools/checkpoint/convert_ckpt.py")
|
||||
file_path = os.path.join(base_dir, "convert_ckpt.py")
|
||||
arguments = sys.argv[1:]
|
||||
subprocess.run(["python", file_path] + arguments)
|
||||
output_dir = os.path.join(self.config.convert_ckpt_param[9], "iter_0000001")
|
||||
|
@ -30,7 +30,7 @@ class TestConvertCkptFromHuggingface(unittest.TestCase):
|
||||
to compose the unique transformer layer and all these layer stack to compose the entity of the model.
|
||||
"""
|
||||
base_dir = Path(__file__).absolute().parent.parent.parent.parent
|
||||
file_path = os.path.join(base_dir, "tools/checkpoint/convert_ckpt.py")
|
||||
file_path = os.path.join(base_dir, "convert_ckpt.py")
|
||||
arguments = sys.argv[1:]
|
||||
subprocess.run(["python", file_path] + arguments)
|
||||
output_dir = os.path.join(self.config.convert_ckpt_param[11], "iter_0000001")
|
||||
|
@ -37,7 +37,7 @@ class TestConvertCkptFromHuggingface(unittest.TestCase):
|
||||
to compose the unique transformer layer and all these layer stack to compose the entity of the model.
|
||||
"""
|
||||
base_dir = Path(__file__).absolute().parent.parent.parent.parent
|
||||
file_path = os.path.join(base_dir, "tools/checkpoint/convert_ckpt.py")
|
||||
file_path = os.path.join(base_dir, "convert_ckpt.py")
|
||||
arguments = sys.argv[1:]
|
||||
subprocess.run(["python", file_path] + arguments)
|
||||
output_dir = os.path.join(self.config.convert_ckpt_param[9], "iter_0000001")
|
||||
|
@ -31,7 +31,7 @@ class TestConvertCkptFromHuggingface(unittest.TestCase):
|
||||
to compose the unique transformer layer and all these layer stack to compose the entity of the model.
|
||||
"""
|
||||
base_dir = Path(__file__).absolute().parent.parent.parent.parent
|
||||
file_path = os.path.join(base_dir, "tools/checkpoint/convert_ckpt.py")
|
||||
file_path = os.path.join(base_dir, "convert_ckpt.py")
|
||||
arguments = sys.argv[1:]
|
||||
subprocess.run(["python", file_path] + arguments)
|
||||
output_dir = os.path.join(self.config.convert_ckpt_param[9], "iter_0000001")
|
||||
|
@ -117,7 +117,7 @@ class TestConvertCkptFromHuggingface:
|
||||
to compose the unique transformer layer and all these layer stack to compose the entity of the model.
|
||||
"""
|
||||
base_dir = Path(__file__).absolute().parents[3]
|
||||
file_path = os.path.join(base_dir, "tools/checkpoint/convert_ckpt.py")
|
||||
file_path = os.path.join(base_dir, "convert_ckpt.py")
|
||||
arguments = [
|
||||
"--model-type", args.model_type,
|
||||
"--loader", args.loader,
|
||||
@ -143,7 +143,7 @@ class TestConvertCkptFromHuggingface:
|
||||
to compose the unique transformer layer and all these layer stack to compose the entity of the model.
|
||||
"""
|
||||
base_dir = Path(__file__).absolute().parents[3]
|
||||
file_path = os.path.join(base_dir, "tools/checkpoint/convert_ckpt.py")
|
||||
file_path = os.path.join(base_dir, "convert_ckpt.py")
|
||||
arguments = [
|
||||
"--model-type", args.model_type,
|
||||
"--loader", args.loader,
|
||||
@ -168,7 +168,7 @@ class TestConvertCkptFromHuggingface:
|
||||
to compose the unique transformer layer and all these layer stack to compose the entity of the model.
|
||||
"""
|
||||
base_dir = Path(__file__).absolute().parents[3]
|
||||
file_path = os.path.join(base_dir, "tools/checkpoint/convert_ckpt.py")
|
||||
file_path = os.path.join(base_dir, "convert_ckpt.py")
|
||||
arguments = [
|
||||
"--model-type", args.model_type,
|
||||
"--loader", args.loader,
|
||||
@ -191,7 +191,7 @@ class TestConvertCkptFromHuggingface:
|
||||
to compose the unique transformer layer and all these layer stack to compose the entity of the model.
|
||||
"""
|
||||
base_dir = Path(__file__).absolute().parents[3]
|
||||
file_path = os.path.join(base_dir, "tools/checkpoint/convert_ckpt.py")
|
||||
file_path = os.path.join(base_dir, "convert_ckpt.py")
|
||||
arguments = [
|
||||
"--model-type", args.model_type,
|
||||
"--loader", args.loader,
|
||||
@ -216,7 +216,7 @@ class TestConvertCkptFromHuggingface:
|
||||
to compose the unique transformer layer and all these layer stack to compose the entity of the model.
|
||||
"""
|
||||
base_dir = Path(__file__).absolute().parent.parent.parent
|
||||
file_path = os.path.join(base_dir, "tools/checkpoint/convert_ckpt.py")
|
||||
file_path = os.path.join(base_dir, "convert_ckpt.py")
|
||||
arguments = [
|
||||
"--model-type", args.model_type,
|
||||
"--loader", args.loader,
|
||||
@ -255,7 +255,7 @@ class TestConvertCkptFromHuggingface:
|
||||
to compose the unique transformer layer and all these layer stack to compose the entity of the model.
|
||||
"""
|
||||
base_dir = Path(__file__).absolute().parent.parent.parent
|
||||
file_path = os.path.join(base_dir, "tools/checkpoint/convert_ckpt.py")
|
||||
file_path = os.path.join(base_dir, "convert_ckpt.py")
|
||||
arguments = [
|
||||
"--model-type", args.model_type,
|
||||
"--loader", args.loader,
|
||||
@ -290,7 +290,7 @@ class TestConvertCkptFromHuggingface:
|
||||
to compose the unique transformer layer and all these layer stack to compose the entity of the model.
|
||||
"""
|
||||
base_dir = Path(__file__).absolute().parents[3]
|
||||
file_path = os.path.join(base_dir, "tools/checkpoint/convert_ckpt.py")
|
||||
file_path = os.path.join(base_dir, "convert_ckpt.py")
|
||||
arguments = [
|
||||
"--model-type", args.model_type,
|
||||
"--loader", args.loader,
|
||||
|
@ -24,7 +24,7 @@ class TestConvertCkptFromMegatron:
|
||||
args = CovertCkptFromMegatronArgs()
|
||||
|
||||
base_dir = Path(__file__).absolute().parent.parent.parent
|
||||
file_path = os.path.join(base_dir, "tools/checkpoint/convert_ckpt.py")
|
||||
file_path = os.path.join(base_dir, "convert_ckpt.py")
|
||||
arguments = [
|
||||
"--model-type", args.model_type,
|
||||
"--loader", args.loader,
|
||||
|
@ -35,7 +35,7 @@ class TestConvertCkptFromHuggingface:
|
||||
dk = 128
|
||||
|
||||
base_dir = Path(__file__).absolute().parent.parent.parent
|
||||
file_path = os.path.join(base_dir, "tools/checkpoint/convert_ckpt.py")
|
||||
file_path = os.path.join(base_dir, "convert_ckpt.py")
|
||||
arguments = [
|
||||
"--model-type", args.model_type,
|
||||
"--loader", args.loader,
|
||||
|
Loading…
Reference in New Issue
Block a user