mirror of
https://gitee.com/ascend/ModelLink.git
synced 2024-11-29 18:38:39 +08:00
!1680 aquila、baichuan、baichuan2权重转换脚本切换新框架
Merge pull request !1680 from 曲玥泽/master
This commit is contained in:
parent
cd014cfde1
commit
86b7a099ec
@ -72,7 +72,7 @@ def main():
|
||||
help='Do not perform checking on the name and ordering of weights',
|
||||
dest='checking')
|
||||
parser.add_argument('--model-type-hf', type=str, default="llama2",
|
||||
choices=['baichuan2', 'llama2', 'mixtral', 'chatglm3', 'gemma', 'gemma2', 'bloom', 'qwen', 'internlm2', 'deepseek2', 'minicpm', 'minicpm-moe'],
|
||||
choices=['baichuan', 'baichuan2', 'llama2', 'mixtral', 'chatglm3', 'gemma', 'gemma2', 'bloom', 'qwen', 'internlm2', 'deepseek2', 'minicpm', 'minicpm-moe'],
|
||||
help='model type of huggingface')
|
||||
known_args, _ = parser.parse_known_args()
|
||||
|
||||
|
@ -6,7 +6,8 @@ python convert_ckpt.py \
|
||||
--model-type GPT \
|
||||
--load-dir ./model_from_hf/Aquila-hf/ \
|
||||
--save-dir ./model_weights/Aquila-legacy/ \
|
||||
--loader llama2_hf \
|
||||
--saver megatron \
|
||||
--load-model-type hf \
|
||||
--save-model-type mg \
|
||||
--target-tensor-parallel-size 8 \
|
||||
--tokenizer-model ./model_from_hf/Aquila-hf/tokenizer.json
|
||||
--tokenizer-model ./model_from_hf/Aquila-hf/tokenizer.json \
|
||||
--model-type-hf llama2
|
@ -1,9 +1,9 @@
|
||||
source /usr/local/Ascend/ascend-toolkit/set_env.sh
|
||||
|
||||
python convert_ckpt.py --model-type GPT \
|
||||
--loader megatron \
|
||||
--saver megatron \
|
||||
--save-model-type save_huggingface_llama \
|
||||
--load-model-type mg \
|
||||
--save-model-type hf \
|
||||
--model-type-hf llama2 \
|
||||
--load-dir ./model_weights/Aquila-legacy/ \
|
||||
--target-tensor-parallel-size 1 \
|
||||
--target-pipeline-parallel-size 1 \
|
||||
|
@ -4,11 +4,12 @@
|
||||
# 选择你需要的并行策略,--params-dtype bf16 \ 结合需要使用
|
||||
python convert_ckpt.py \
|
||||
--model-type GPT \
|
||||
--loader llama2_hf \
|
||||
--saver megatron \
|
||||
--load-model-type hf \
|
||||
--save-model-type mg \
|
||||
--target-tensor-parallel-size 8 \
|
||||
--target-pipeline-parallel-size 1 \
|
||||
--load-dir ./model_from_hf/Baichuan-hf/ \
|
||||
--save-dir ./model_weights/Baichuan-legacy/ \
|
||||
--tokenizer-model ./model_from_hf/Baichuan-hf/tokenizer.model \
|
||||
--w-pack True
|
||||
--w-pack True \
|
||||
--model-type-hf baichuan
|
||||
|
@ -3,9 +3,9 @@ source /usr/local/Ascend/ascend-toolkit/set_env.sh
|
||||
|
||||
|
||||
python convert_ckpt.py --model-type GPT \
|
||||
--loader megatron \
|
||||
--saver megatron \
|
||||
--save-model-type save_huggingface_llama \
|
||||
--load-model-type mg \
|
||||
--save-model-type hf \
|
||||
--model-type-hf baichuan \
|
||||
--load-dir ./model_weights/Baichuan-legacy/ \
|
||||
--target-tensor-parallel-size 1 \
|
||||
--target-pipeline-parallel-size 1 \
|
||||
|
@ -4,11 +4,12 @@ source /usr/local/Ascend/ascend-toolkit/set_env.sh
|
||||
# 设置你需要的并行策略
|
||||
python convert_ckpt.py \
|
||||
--model-type GPT \
|
||||
--loader llama2_hf \
|
||||
--saver megatron \
|
||||
--load-model-type hf \
|
||||
--save-model-type mg \
|
||||
--target-tensor-parallel-size 8 \
|
||||
--load-dir ./model_from_hf/Baichuan2-hf/ \
|
||||
--save-dir ./model_weights/Baichuan2-legacy/ \
|
||||
--tokenizer-model ./model_from_hf/Baichuan2-hf/tokenizer.model \
|
||||
--params-dtype bf16 \
|
||||
--w-pack True
|
||||
--w-pack True \
|
||||
--model-type-hf baichuan2
|
@ -3,9 +3,9 @@ source /usr/local/Ascend/ascend-toolkit/set_env.sh
|
||||
|
||||
|
||||
python convert_ckpt.py --model-type GPT \
|
||||
--loader megatron \
|
||||
--saver megatron \
|
||||
--save-model-type save_huggingface_llama \
|
||||
--load-model-type mg \
|
||||
--save-model-type hf \
|
||||
--model-type-hf baichuan2 \
|
||||
--load-dir ./model_weights/Baichuan2-legacy/ \
|
||||
--target-tensor-parallel-size 1 \
|
||||
--target-pipeline-parallel-size 1 \
|
||||
|
@ -49,6 +49,15 @@
|
||||
"llama2": {
|
||||
"__base__": "base"
|
||||
},
|
||||
"baichuan": {
|
||||
"__base__": "base",
|
||||
"config_set_value": {
|
||||
"qkv_type": "pack_gqa"
|
||||
},
|
||||
"model_hf_key_mapping": {
|
||||
"layers_self_attention_linear_qkv_pack": "model.layers[layer_idx].self_attn.W_pack"
|
||||
}
|
||||
},
|
||||
"chatglm3": {
|
||||
"__base__": "base",
|
||||
"config_set_value": {
|
||||
|
Loading…
Reference in New Issue
Block a user