!1680 aquila、baichuan、baichuan2权重转换脚本切换新框架

Merge pull request !1680 from 曲玥泽/master
This commit is contained in:
曲玥泽 2024-09-19 06:52:00 +00:00 committed by i-robot
parent cd014cfde1
commit 86b7a099ec
8 changed files with 31 additions and 19 deletions

View File

@ -72,7 +72,7 @@ def main():
help='Do not perform checking on the name and ordering of weights',
dest='checking')
parser.add_argument('--model-type-hf', type=str, default="llama2",
choices=['baichuan2', 'llama2', 'mixtral', 'chatglm3', 'gemma', 'gemma2', 'bloom', 'qwen', 'internlm2', 'deepseek2', 'minicpm', 'minicpm-moe'],
choices=['baichuan', 'baichuan2', 'llama2', 'mixtral', 'chatglm3', 'gemma', 'gemma2', 'bloom', 'qwen', 'internlm2', 'deepseek2', 'minicpm', 'minicpm-moe'],
help='model type of huggingface')
known_args, _ = parser.parse_known_args()

View File

@ -6,7 +6,8 @@ python convert_ckpt.py \
--model-type GPT \
--load-dir ./model_from_hf/Aquila-hf/ \
--save-dir ./model_weights/Aquila-legacy/ \
--loader llama2_hf \
--saver megatron \
--load-model-type hf \
--save-model-type mg \
--target-tensor-parallel-size 8 \
--tokenizer-model ./model_from_hf/Aquila-hf/tokenizer.json
--tokenizer-model ./model_from_hf/Aquila-hf/tokenizer.json \
--model-type-hf llama2

View File

@ -1,9 +1,9 @@
source /usr/local/Ascend/ascend-toolkit/set_env.sh
python convert_ckpt.py --model-type GPT \
--loader megatron \
--saver megatron \
--save-model-type save_huggingface_llama \
--load-model-type mg \
--save-model-type hf \
--model-type-hf llama2 \
--load-dir ./model_weights/Aquila-legacy/ \
--target-tensor-parallel-size 1 \
--target-pipeline-parallel-size 1 \

View File

@ -4,11 +4,12 @@
# 选择你需要的并行策略,--params-dtype bf16 \ 结合需要使用
python convert_ckpt.py \
--model-type GPT \
--loader llama2_hf \
--saver megatron \
--load-model-type hf \
--save-model-type mg \
--target-tensor-parallel-size 8 \
--target-pipeline-parallel-size 1 \
--load-dir ./model_from_hf/Baichuan-hf/ \
--save-dir ./model_weights/Baichuan-legacy/ \
--tokenizer-model ./model_from_hf/Baichuan-hf/tokenizer.model \
--w-pack True
--w-pack True \
--model-type-hf baichuan

View File

@ -3,9 +3,9 @@ source /usr/local/Ascend/ascend-toolkit/set_env.sh
python convert_ckpt.py --model-type GPT \
--loader megatron \
--saver megatron \
--save-model-type save_huggingface_llama \
--load-model-type mg \
--save-model-type hf \
--model-type-hf baichuan \
--load-dir ./model_weights/Baichuan-legacy/ \
--target-tensor-parallel-size 1 \
--target-pipeline-parallel-size 1 \

View File

@ -4,11 +4,12 @@ source /usr/local/Ascend/ascend-toolkit/set_env.sh
# 设置你需要的并行策略
python convert_ckpt.py \
--model-type GPT \
--loader llama2_hf \
--saver megatron \
--load-model-type hf \
--save-model-type mg \
--target-tensor-parallel-size 8 \
--load-dir ./model_from_hf/Baichuan2-hf/ \
--save-dir ./model_weights/Baichuan2-legacy/ \
--tokenizer-model ./model_from_hf/Baichuan2-hf/tokenizer.model \
--params-dtype bf16 \
--w-pack True
--w-pack True \
--model-type-hf baichuan2

View File

@ -3,9 +3,9 @@ source /usr/local/Ascend/ascend-toolkit/set_env.sh
python convert_ckpt.py --model-type GPT \
--loader megatron \
--saver megatron \
--save-model-type save_huggingface_llama \
--load-model-type mg \
--save-model-type hf \
--model-type-hf baichuan2 \
--load-dir ./model_weights/Baichuan2-legacy/ \
--target-tensor-parallel-size 1 \
--target-pipeline-parallel-size 1 \

View File

@ -49,6 +49,15 @@
"llama2": {
"__base__": "base"
},
"baichuan": {
"__base__": "base",
"config_set_value": {
"qkv_type": "pack_gqa"
},
"model_hf_key_mapping": {
"layers_self_attention_linear_qkv_pack": "model.layers[layer_idx].self_attn.W_pack"
}
},
"chatglm3": {
"__base__": "base",
"config_set_value": {