!1680 aquila、baichuan、baichuan2权重转换脚本切换新框架

Merge pull request !1680 from 曲玥泽/master
2024-11-29 18:38:39 +08:00 · 2024-09-19 06:52:00 +00:00 · 2024-09-19 06:52:00 +00:00 · 86b7a099ec
commit 86b7a099ec
parent cd014cfde1
8 changed files with 31 additions and 19 deletions
--- a/convert_ckpt.py
+++ b/convert_ckpt.py
@ -72,7 +72,7 @@ def main():
                        help='Do not perform checking on the name and ordering of weights',
                        dest='checking')
    parser.add_argument('--model-type-hf', type=str, default="llama2",
-                        choices=['baichuan2', 'llama2', 'mixtral', 'chatglm3', 'gemma', 'gemma2', 'bloom', 'qwen', 'internlm2', 'deepseek2', 'minicpm', 'minicpm-moe'],
+                        choices=['baichuan', 'baichuan2', 'llama2', 'mixtral', 'chatglm3', 'gemma', 'gemma2', 'bloom', 'qwen', 'internlm2', 'deepseek2', 'minicpm', 'minicpm-moe'],
                        help='model type of huggingface')
    known_args, _ = parser.parse_known_args()

--- a/examples/aquila/ckpt_convert_aquila_hf2legacy.sh
+++ b/examples/aquila/ckpt_convert_aquila_hf2legacy.sh
@ -6,7 +6,8 @@ python convert_ckpt.py \
    --model-type GPT \
    --load-dir ./model_from_hf/Aquila-hf/ \
    --save-dir ./model_weights/Aquila-legacy/ \
-    --loader llama2_hf \
-    --saver megatron \
+    --load-model-type hf \
+    --save-model-type mg \
    --target-tensor-parallel-size 8 \
-    --tokenizer-model ./model_from_hf/Aquila-hf/tokenizer.json
+    --tokenizer-model ./model_from_hf/Aquila-hf/tokenizer.json \
+    --model-type-hf llama2
--- a/examples/aquila/ckpt_convert_aquila_legacy2hf.sh
+++ b/examples/aquila/ckpt_convert_aquila_legacy2hf.sh
@ -1,9 +1,9 @@
 source /usr/local/Ascend/ascend-toolkit/set_env.sh

 python convert_ckpt.py --model-type GPT \
-    --loader megatron \
-    --saver megatron \
-    --save-model-type save_huggingface_llama \
+    --load-model-type mg \
+    --save-model-type hf \
+    --model-type-hf llama2 \
    --load-dir ./model_weights/Aquila-legacy/ \
    --target-tensor-parallel-size 1 \
    --target-pipeline-parallel-size 1 \
--- a/examples/baichuan/ckpt_convert_baichuan_hf2legacy.sh
+++ b/examples/baichuan/ckpt_convert_baichuan_hf2legacy.sh
@ -4,11 +4,12 @@
 # 选择你需要的并行策略，--params-dtype bf16 \  结合需要使用
 python convert_ckpt.py \
   --model-type GPT \
-   --loader llama2_hf \
-   --saver megatron \
+   --load-model-type hf \
+   --save-model-type mg \
   --target-tensor-parallel-size 8 \
   --target-pipeline-parallel-size 1 \
   --load-dir ./model_from_hf/Baichuan-hf/ \
   --save-dir ./model_weights/Baichuan-legacy/ \
   --tokenizer-model ./model_from_hf/Baichuan-hf/tokenizer.model \
-   --w-pack True
+   --w-pack True \
+   --model-type-hf baichuan
--- a/examples/baichuan/ckpt_convert_baichuan_legacy2hf.sh
+++ b/examples/baichuan/ckpt_convert_baichuan_legacy2hf.sh
@ -3,9 +3,9 @@ source /usr/local/Ascend/ascend-toolkit/set_env.sh


 python convert_ckpt.py --model-type GPT \
-   --loader megatron \
-   --saver megatron \
-   --save-model-type save_huggingface_llama \
+   --load-model-type mg \
+   --save-model-type hf \
+   --model-type-hf baichuan \
   --load-dir ./model_weights/Baichuan-legacy/ \
   --target-tensor-parallel-size 1 \
   --target-pipeline-parallel-size 1 \
--- a/examples/baichuan2/ckpt_convert_baichuan2_hf2legacy.sh
+++ b/examples/baichuan2/ckpt_convert_baichuan2_hf2legacy.sh
@ -4,11 +4,12 @@ source /usr/local/Ascend/ascend-toolkit/set_env.sh
 # 设置你需要的并行策略
 python convert_ckpt.py \
    --model-type GPT \
-    --loader llama2_hf \
-    --saver megatron \
+    --load-model-type hf \
+    --save-model-type mg \
    --target-tensor-parallel-size 8 \
    --load-dir ./model_from_hf/Baichuan2-hf/ \
    --save-dir ./model_weights/Baichuan2-legacy/ \
    --tokenizer-model ./model_from_hf/Baichuan2-hf/tokenizer.model \
    --params-dtype bf16 \
-    --w-pack True
+    --w-pack True \
+    --model-type-hf baichuan2
--- a/examples/baichuan2/ckpt_convert_baichuan2_legacy2hf.sh
+++ b/examples/baichuan2/ckpt_convert_baichuan2_legacy2hf.sh
@ -3,9 +3,9 @@ source /usr/local/Ascend/ascend-toolkit/set_env.sh


 python convert_ckpt.py --model-type GPT \
-    --loader megatron \
-    --saver megatron \
-    --save-model-type save_huggingface_llama \
+    --load-model-type mg \
+    --save-model-type hf \
+    --model-type-hf baichuan2 \
    --load-dir ./model_weights/Baichuan2-legacy/ \
    --target-tensor-parallel-size 1 \
    --target-pipeline-parallel-size 1 \
--- a/modellink/tasks/checkpoint/model_cfg.json
+++ b/modellink/tasks/checkpoint/model_cfg.json
@ -49,6 +49,15 @@
    "llama2": {
      "__base__": "base"
    },
+    "baichuan": {
+      "__base__": "base",
+      "config_set_value": {
+        "qkv_type": "pack_gqa"
+      },
+      "model_hf_key_mapping": {
+        "layers_self_attention_linear_qkv_pack": "model.layers[layer_idx].self_attn.W_pack"
+      }
+    },
    "chatglm3": {
      "__base__": "base",
      "config_set_value": {