mirror of
https://gitee.com/ascend/ModelLink.git
synced 2024-11-29 10:29:11 +08:00
parent
aa3013f9b7
commit
f80514ad86
3
OWNERS
3
OWNERS
@ -7,6 +7,7 @@ reviewers:
|
||||
- guhangsong
|
||||
- leizhenzhen
|
||||
- liuyanghan
|
||||
- jzh
|
||||
- guo-xinjie-1
|
||||
- wucong0509
|
||||
- dhrhank
|
||||
@ -15,7 +16,7 @@ reviewers:
|
||||
- sunjunjie1587
|
||||
- dingzicha
|
||||
- zhangjianxiang4
|
||||
- guozhihua
|
||||
- guozhihua9814
|
||||
- huangyiming123
|
||||
- zhangshengdong
|
||||
- xiong-liangcheng_admin
|
||||
|
@ -2,7 +2,6 @@
|
||||
|
||||
# See README, please remember to source the set_env.sh file in CLI, or here
|
||||
# source /path/to/your/ascend-toolkit/set_env.sh
|
||||
export TOKENIZERS_PARALLELISM=false
|
||||
export CUDA_DEVICE_MAX_CONNECTIONS=1
|
||||
CKPT_LOAD_DIR="your checkpoint load dir"
|
||||
TOKENIZER_PATH="your tokenizer path"
|
||||
|
@ -3,7 +3,6 @@
|
||||
# See README, please remember to source the set_env.sh file in CLI, or here
|
||||
# source /path/to/your/ascend-toolkit/set_env.sh
|
||||
export CUDA_DEVICE_MAX_CONNECTIONS=1
|
||||
export NPU_ASD_ENABLE=0
|
||||
|
||||
CKPT_SAVE_DIR="your checkpoint save dir"
|
||||
DATA_PATH="your training data dir"
|
||||
|
@ -2,7 +2,6 @@
|
||||
|
||||
# See README, please remember to source the set_env.sh file in CLI, or here
|
||||
# source /path/to/your/ascend-toolkit/set_env.sh
|
||||
export TOKENIZERS_PARALLELISM=false
|
||||
export CUDA_DEVICE_MAX_CONNECTIONS=1
|
||||
|
||||
# please fill these path configurations
|
||||
|
@ -2,7 +2,6 @@
|
||||
|
||||
# See README, please remember to source the set_env.sh file in CLI, or here
|
||||
# source /path/to/your/ascend-toolkit/set_env.sh
|
||||
export TOKENIZERS_PARALLELISM=false
|
||||
export CUDA_DEVICE_MAX_CONNECTIONS=1
|
||||
|
||||
# please fill these path configurations
|
||||
|
@ -3,7 +3,6 @@
|
||||
# See README, please remember to source the set_env.sh file in CLI, or here
|
||||
# source /path/to/your/ascend-toolkit/set_env.sh
|
||||
export CUDA_DEVICE_MAX_CONNECTIONS=1
|
||||
export NPU_ASD_ENABLE=0
|
||||
|
||||
# please fill these path configurations
|
||||
CKPT_SAVE_DIR="your checkpoint save dir"
|
||||
|
@ -3,7 +3,6 @@
|
||||
# See README, please remember to source the set_env.sh file in CLI, or here
|
||||
# source /path/to/your/ascend-toolkit/set_env.sh
|
||||
export CUDA_DEVICE_MAX_CONNECTIONS=1
|
||||
export NPU_ASD_ENABLE=0
|
||||
|
||||
# please fill these path configurations
|
||||
CKPT_SAVE_DIR="your checkpoint save dir"
|
||||
|
@ -1,7 +1,6 @@
|
||||
#!/bin/bash
|
||||
|
||||
export CUDA_DEVICE_MAX_CONNECTIONS=1
|
||||
export NPU_ASD_ENABLE=0
|
||||
|
||||
GPUS_PER_NODE=8
|
||||
MASTER_ADDR=localhost
|
||||
|
@ -1,7 +1,6 @@
|
||||
#!/bin/bash
|
||||
|
||||
export CUDA_DEVICE_MAX_CONNECTIONS=1
|
||||
export NPU_ASD_ENABLE=0
|
||||
|
||||
GPUS_PER_NODE=8
|
||||
MASTER_ADDR=localhost
|
||||
|
@ -1,7 +1,6 @@
|
||||
#!/bin/bash
|
||||
|
||||
export CUDA_DEVICE_MAX_CONNECTIONS=1
|
||||
export NPU_ASD_ENABLE=0
|
||||
|
||||
GPUS_PER_NODE=8
|
||||
MASTER_ADDR=localhost
|
||||
|
@ -1,7 +1,6 @@
|
||||
#!/bin/bash
|
||||
|
||||
export CUDA_DEVICE_MAX_CONNECTIONS=1
|
||||
export NPU_ASD_ENABLE=0
|
||||
|
||||
GPUS_PER_NODE=8
|
||||
MASTER_ADDR=localhost
|
||||
|
@ -2,7 +2,6 @@
|
||||
|
||||
# The number of parameters is not aligned
|
||||
export CUDA_DEVICE_MAX_CONNECTIONS=1
|
||||
export TOKENIZERS_PARALLELISM=false
|
||||
|
||||
export HCCL_CONNECT_TIMEOUT=1200
|
||||
|
||||
|
@ -2,7 +2,6 @@
|
||||
|
||||
# The number of parameters is not aligned
|
||||
export CUDA_DEVICE_MAX_CONNECTIONS=1
|
||||
export TOKENIZERS_PARALLELISM=false
|
||||
|
||||
export HCCL_CONNECT_TIMEOUT=1200
|
||||
|
||||
|
@ -2,7 +2,6 @@
|
||||
|
||||
# The number of parameters is not aligned
|
||||
export CUDA_DEVICE_MAX_CONNECTIONS=1
|
||||
export TOKENIZERS_PARALLELISM=false
|
||||
|
||||
export HCCL_CONNECT_TIMEOUT=1200
|
||||
|
||||
|
@ -1,7 +1,6 @@
|
||||
#!/bin/bash
|
||||
export CUDA_DEVICE_MAX_CONNECTIONS=1
|
||||
export HCCL_CONNECT_TIMEOUT=1200
|
||||
export NPU_ASD_ENABLE=0
|
||||
|
||||
GPUS_PER_NODE=8
|
||||
MASTER_ADDR=localhost
|
||||
|
@ -1,7 +1,6 @@
|
||||
#!/bin/bash
|
||||
export CUDA_DEVICE_MAX_CONNECTIONS=1
|
||||
export HCCL_CONNECT_TIMEOUT=1200
|
||||
export NPU_ASD_ENABLE=0
|
||||
|
||||
GPUS_PER_NODE=8
|
||||
MASTER_ADDR=localhost
|
||||
|
@ -1,6 +1,5 @@
|
||||
#!/bin/bash
|
||||
export CUDA_DEVICE_MAX_CONNECTIONS=1
|
||||
export NPU_ASD_ENABLE=0
|
||||
|
||||
NPUS_PER_NODE=8
|
||||
MASTER_ADDR=localhost
|
||||
|
@ -1,6 +1,5 @@
|
||||
#!/bin/bash
|
||||
export CUDA_DEVICE_MAX_CONNECTIONS=1
|
||||
export NPU_ASD_ENABLE=0
|
||||
|
||||
NPUS_PER_NODE=8
|
||||
MASTER_ADDR=localhost
|
||||
|
@ -1,7 +1,6 @@
|
||||
#!/bin/bash
|
||||
|
||||
export CUDA_DEVICE_MAX_CONNECTIONS=1
|
||||
export NPU_ASD_ENABLE=0
|
||||
|
||||
GPUS_PER_NODE=8
|
||||
MASTER_ADDR=localhost
|
||||
|
@ -1,7 +1,6 @@
|
||||
#!/bin/bash
|
||||
|
||||
export CUDA_DEVICE_MAX_CONNECTIONS=1
|
||||
export NPU_ASD_ENABLE=0
|
||||
|
||||
GPUS_PER_NODE=8
|
||||
MASTER_ADDR=localhost
|
||||
|
@ -1,7 +1,6 @@
|
||||
#!/bin/bash
|
||||
|
||||
export CUDA_DEVICE_MAX_CONNECTIONS=1
|
||||
export NPU_ASD_ENABLE=0
|
||||
|
||||
GPUS_PER_NODE=8
|
||||
MASTER_ADDR=localhost
|
||||
|
@ -5,7 +5,6 @@ export LD_LIBRARY_PATH=/usr/local/lib:/usr/local/lib:/root/miniconda3/lib:$LD_LI
|
||||
export HCCL_CONNECT_TIMEOUT=1200
|
||||
export COMBINED_ENABLE=1
|
||||
export CUDA_DEVICE_MAX_CONNECTIONS=1
|
||||
export NPU_ASD_ENABLE=0
|
||||
|
||||
# please fill these path configurations
|
||||
CHECKPOINT="your model path"
|
||||
|
@ -1,7 +1,6 @@
|
||||
#!/bin/bash
|
||||
|
||||
export CUDA_DEVICE_MAX_CONNECTIONS=1
|
||||
export NPU_ASD_ENABLE=0
|
||||
|
||||
GPUS_PER_NODE=8
|
||||
MASTER_ADDR=localhost
|
||||
|
@ -1,7 +1,6 @@
|
||||
#!/bin/bash
|
||||
|
||||
export CUDA_DEVICE_MAX_CONNECTIONS=1
|
||||
export NPU_ASD_ENABLE=0
|
||||
|
||||
GPUS_PER_NODE=8
|
||||
MASTER_ADDR=localhost
|
||||
|
@ -1,7 +1,6 @@
|
||||
#!/bin/bash
|
||||
|
||||
export CUDA_DEVICE_MAX_CONNECTIONS=1
|
||||
export NPU_ASD_ENABLE=0
|
||||
|
||||
GPUS_PER_NODE=8
|
||||
MASTER_ADDR=localhost
|
||||
|
@ -5,7 +5,6 @@ export COMBINED_ENABLE=1
|
||||
export AZUREML_EXPERIMENT_ID=0
|
||||
|
||||
export CUDA_DEVICE_MAX_CONNECTIONS=1
|
||||
export NPU_ASD_ENABLE=0
|
||||
|
||||
GPUS_PER_NODE=8
|
||||
MASTER_ADDR=localhost
|
||||
|
@ -1,7 +1,6 @@
|
||||
#!/bin/bash
|
||||
|
||||
export CUDA_DEVICE_MAX_CONNECTIONS=1
|
||||
export NPU_ASD_ENABLE=0
|
||||
|
||||
GPUS_PER_NODE=8
|
||||
MASTER_ADDR=localhost
|
||||
|
@ -1,7 +1,6 @@
|
||||
#!/bin/bash
|
||||
|
||||
export CUDA_DEVICE_MAX_CONNECTIONS=1
|
||||
export NPU_ASD_ENABLE=0
|
||||
|
||||
GPUS_PER_NODE=8
|
||||
MASTER_ADDR=localhost
|
||||
|
@ -1,6 +1,5 @@
|
||||
#!/bin/bash
|
||||
export CUDA_DEVICE_MAX_CONNECTIONS=1
|
||||
export NPU_ASD_ENABLE=0
|
||||
|
||||
GPUS_PER_NODE=8
|
||||
MASTER_ADDR=localhost
|
||||
|
@ -1,7 +1,6 @@
|
||||
#!/bin/bash
|
||||
|
||||
export CUDA_DEVICE_MAX_CONNECTIONS=1
|
||||
export NPU_ASD_ENABLE=0
|
||||
|
||||
NPUS_PER_NODE=8
|
||||
MASTER_ADDR=localhost
|
||||
|
@ -1,5 +1,4 @@
|
||||
#!/bin/bash
|
||||
export NPU_ASD_ENABLE=0
|
||||
export CUDA_DEVICE_MAX_CONNECTIONS=1
|
||||
|
||||
GPUS_PER_NODE=8
|
||||
|
@ -1,7 +1,6 @@
|
||||
#!/bin/bash
|
||||
|
||||
export CUDA_DEVICE_MAX_CONNECTIONS=1
|
||||
export NPU_ASD_ENABLE=0
|
||||
|
||||
GPUS_PER_NODE=8
|
||||
MASTER_ADDR=localhost
|
||||
|
@ -5,7 +5,6 @@ export COMBINED_ENABLE=1
|
||||
export AZUREML_EXPERIMENT_ID=0
|
||||
|
||||
export CUDA_DEVICE_MAX_CONNECTIONS=1
|
||||
export NPU_ASD_ENABLE=0
|
||||
|
||||
GPUS_PER_NODE=8
|
||||
MASTER_ADDR=localhost
|
||||
|
@ -5,7 +5,6 @@ export COMBINED_ENABLE=1
|
||||
export AZUREML_EXPERIMENT_ID=0
|
||||
|
||||
export CUDA_DEVICE_MAX_CONNECTIONS=1
|
||||
export NPU_ASD_ENABLE=0
|
||||
|
||||
GPUS_PER_NODE=8
|
||||
MASTER_ADDR=localhost
|
||||
|
@ -1,5 +1,4 @@
|
||||
#!/bin/bash
|
||||
export NPU_ASD_ENABLE=0
|
||||
export CUDA_DEVICE_MAX_CONNECTIONS=1
|
||||
|
||||
GPUS_PER_NODE=8
|
||||
|
@ -1,7 +1,6 @@
|
||||
#!/bin/bash
|
||||
|
||||
export CUDA_DEVICE_MAX_CONNECTIONS=1
|
||||
export NPU_ASD_ENABLE=0
|
||||
|
||||
GPUS_PER_NODE=8
|
||||
MASTER_ADDR=localhost
|
||||
|
@ -1,7 +1,6 @@
|
||||
#!/bin/bash
|
||||
|
||||
export CUDA_DEVICE_MAX_CONNECTIONS=1
|
||||
export NPU_ASD_ENABLE=0
|
||||
|
||||
GPUS_PER_NODE=8
|
||||
MASTER_ADDR=localhost
|
||||
|
@ -1,7 +1,6 @@
|
||||
#!/bin/bash
|
||||
|
||||
export CUDA_DEVICE_MAX_CONNECTIONS=1
|
||||
export NPU_ASD_ENABLE=0
|
||||
|
||||
GPUS_PER_NODE=8
|
||||
MASTER_ADDR=localhost
|
||||
|
@ -4,7 +4,6 @@
|
||||
export HCCL_CONNECT_TIMEOUT=1200
|
||||
export COMBINED_ENABLE=1
|
||||
export CUDA_DEVICE_MAX_CONNECTIONS=1
|
||||
export TOKENIZERS_PARALLELISM=false
|
||||
|
||||
# please fill these path configurations
|
||||
CHECKPOINT="your model ckpt path"
|
||||
|
@ -1,6 +1,5 @@
|
||||
#!/bin/bash
|
||||
export CUDA_DEVICE_MAX_CONNECTIONS=1
|
||||
export NPU_ASD_ENABLE=0
|
||||
|
||||
GPUS_PER_NODE=8
|
||||
MASTER_ADDR="your master node IP"
|
||||
|
@ -98,7 +98,7 @@
|
||||
# 修改 ascend-toolkit 路径
|
||||
source /usr/local/Ascend/ascend-toolkit/set_env.sh
|
||||
|
||||
# HF 转 tp1-pp8-ep2
|
||||
# HF 转 tp8-pp4-ep1
|
||||
python tools/checkpoint/convert_ckpt.py \
|
||||
--model-type GPT \
|
||||
--loader mixtral_hf \
|
||||
@ -137,7 +137,7 @@
|
||||
# 修改 ascend-toolkit 路径
|
||||
source /usr/local/Ascend/ascend-toolkit/set_env.sh
|
||||
|
||||
# tp1-pp8-ep2 转 HF
|
||||
# tp8-pp4-ep1 转 HF
|
||||
python tools/checkpoint/convert_ckpt.py \
|
||||
--model-type GPT \
|
||||
--loader mixtral_mg \
|
||||
@ -261,7 +261,7 @@ Mixtral-8x7B 在四机32卡上(tp8 pp4) **昇腾芯片** 和 **参考芯片**
|
||||
source /usr/local/Ascend/ascend-toolkit/set_env.sh
|
||||
|
||||
# 修改模型权重路径和词表路径
|
||||
CHECKPOINT="./model_weights/Mixtral-8x7B-v0.1-tp1-pp8-ep1/"
|
||||
CHECKPOINT="./model_weights/Mixtral-8x7B-v0.1-tp8-pp1-ep1/"
|
||||
TOKENIZER_MODEL="./model_from_hf/Mixtral-8x7B/"
|
||||
|
||||
# 根据实际加载的模型权重修改并行配置
|
||||
|
@ -99,7 +99,7 @@ Recommended hardware configuration for inference:
|
||||
# Modify the ascend-toolkit path
|
||||
source /usr/local/Ascend/ascend-toolkit/set_env.sh
|
||||
|
||||
# HF to tp1-pp8-ep2
|
||||
# HF to tp8-pp4-ep1
|
||||
python tools/checkpoint/convert_ckpt.py \
|
||||
--model-type GPT \
|
||||
--loader mixtral_hf \
|
||||
@ -113,13 +113,13 @@ Recommended hardware configuration for inference:
|
||||
```
|
||||
|
||||
Any Megatron weights with parallel slicing strategy --> Any Megatron weights with parallel slicing strategy
|
||||
***(This scenario is generally used to reconfigure the sliced model weights, such as training on a dual-node 16-card EP2-PP8 strategy, and then wanting to infer on a single-node 8-card TP8)***
|
||||
***(This scenario is generally used to reconfigure the sliced model weights, such as training on a four-node 32-card TP8-PP4 strategy, and then wanting to infer on a single-node 8-card TP8)***
|
||||
|
||||
```bash
|
||||
# Modify the ascend-toolkit path
|
||||
source /usr/local/Ascend/ascend-toolkit/set_env.sh
|
||||
|
||||
# tp1-pp8-ep2 to tp1-pp8-ep1
|
||||
# tp8-pp4-ep1 to tp8-pp1-ep1
|
||||
python tools/checkpoint/convert_ckpt.py \
|
||||
--model-type GPT \
|
||||
--loader mixtral_mg \
|
||||
@ -138,7 +138,7 @@ Recommended hardware configuration for inference:
|
||||
# Modify the ascend-toolkit path
|
||||
source /usr/local/Ascend/ascend-toolkit/set_env.sh
|
||||
|
||||
# tp1-pp8-ep2 to HF
|
||||
# tp8-pp4-ep1 to HF
|
||||
python tools/checkpoint/convert_ckpt.py \
|
||||
--model-type GPT \
|
||||
--loader mixtral_mg \
|
||||
|
@ -4,7 +4,6 @@
|
||||
export HCCL_CONNECT_TIMEOUT=1200
|
||||
export COMBINED_ENABLE=1
|
||||
export CUDA_DEVICE_MAX_CONNECTIONS=1
|
||||
export TOKENIZERS_PARALLELISM=false
|
||||
|
||||
MASTER_ADDR=localhost
|
||||
MASTER_PORT=6000
|
||||
|
@ -4,7 +4,6 @@
|
||||
export HCCL_CONNECT_TIMEOUT=1200
|
||||
export COMBINED_ENABLE=1
|
||||
export CUDA_DEVICE_MAX_CONNECTIONS=1
|
||||
export TOKENIZERS_PARALLELISM=false
|
||||
|
||||
# please fill these path configurations
|
||||
CHECKPOINT="your model ckpt path"
|
||||
|
@ -1,7 +1,6 @@
|
||||
#!/bin/bash
|
||||
|
||||
export CUDA_DEVICE_MAX_CONNECTIONS=1
|
||||
export NPU_ASD_ENABLE=0
|
||||
|
||||
NPUS_PER_NODE=8
|
||||
MASTER_ADDR=localhost
|
||||
|
@ -1,7 +1,6 @@
|
||||
#!/bin/bash
|
||||
|
||||
export CUDA_DEVICE_MAX_CONNECTIONS=1
|
||||
export NPU_ASD_ENABLE=0
|
||||
|
||||
NPUS_PER_NODE=8
|
||||
MASTER_ADDR=localhost
|
||||
|
@ -1,7 +1,6 @@
|
||||
#!/bin/bash
|
||||
|
||||
export CUDA_DEVICE_MAX_CONNECTIONS=1
|
||||
export NPU_ASD_ENABLE=0
|
||||
|
||||
NPUS_PER_NODE=8
|
||||
MASTER_ADDR=localhost
|
||||
|
@ -1,7 +1,6 @@
|
||||
#!/bin/bash
|
||||
|
||||
export CUDA_DEVICE_MAX_CONNECTIONS=1
|
||||
export NPU_ASD_ENABLE=0
|
||||
|
||||
NPUS_PER_NODE=8
|
||||
MASTER_ADDR=localhost
|
||||
|
@ -1,7 +1,6 @@
|
||||
#!/bin/bash
|
||||
|
||||
export CUDA_DEVICE_MAX_CONNECTIONS=1
|
||||
export NPU_ASD_ENABLE=0
|
||||
|
||||
NPUS_PER_NODE=8
|
||||
MASTER_ADDR=localhost
|
||||
|
@ -1,7 +1,6 @@
|
||||
#!/bin/bash
|
||||
|
||||
export CUDA_DEVICE_MAX_CONNECTIONS=1
|
||||
export NPU_ASD_ENABLE=0
|
||||
|
||||
NPUS_PER_NODE=8
|
||||
MASTER_ADDR=localhost
|
||||
|
@ -2,7 +2,6 @@
|
||||
|
||||
export CUDA_DEVICE_MAX_CONNECTIONS=1
|
||||
export HCCL_CONNECT_TIMEOUT=1800
|
||||
export NPU_ASD_ENABLE=0
|
||||
|
||||
MASTER_ADDR=localhost
|
||||
NPUS_PER_NODE=8
|
||||
|
@ -1,7 +1,7 @@
|
||||
#!/bin/bash
|
||||
|
||||
export CUDA_DEVICE_MAX_CONNECTIONS=1
|
||||
export NPU_ASD_ENABLE=0
|
||||
|
||||
|
||||
NPUS_PER_NODE=8
|
||||
# Change for multinode config
|
||||
|
@ -2,7 +2,6 @@
|
||||
|
||||
export CUDA_DEVICE_MAX_CONNECTIONS=1
|
||||
export HCCL_CONNECT_TIMEOUT=1800
|
||||
export NPU_ASD_ENABLE=0
|
||||
|
||||
MASTER_ADDR=localhost
|
||||
NPUS_PER_NODE=8
|
||||
|
@ -1,7 +1,6 @@
|
||||
#!/bin/bash
|
||||
|
||||
export CUDA_DEVICE_MAX_CONNECTIONS=1
|
||||
export NPU_ASD_ENABLE=0
|
||||
|
||||
NPUS_PER_NODE=8
|
||||
MASTER_ADDR=localhost
|
||||
|
@ -2,7 +2,6 @@
|
||||
|
||||
export CUDA_DEVICE_MAX_CONNECTIONS=1
|
||||
export HCCL_CONNECT_TIMEOUT=1800
|
||||
export NPU_ASD_ENABLE=0
|
||||
|
||||
MASTER_ADDR=localhost
|
||||
NPUS_PER_NODE=8
|
||||
|
@ -2,7 +2,6 @@
|
||||
|
||||
export CUDA_DEVICE_MAX_CONNECTIONS=1
|
||||
export HCCL_CONNECT_TIMEOUT=1800
|
||||
export NPU_ASD_ENABLE=0
|
||||
|
||||
MASTER_ADDR=localhost
|
||||
NPUS_PER_NODE=8
|
||||
|
@ -1,7 +1,6 @@
|
||||
#!/bin/bash
|
||||
|
||||
export CUDA_DEVICE_MAX_CONNECTIONS=1
|
||||
export NPU_ASD_ENABLE=0
|
||||
|
||||
NPUS_PER_NODE=8
|
||||
MASTER_ADDR=localhost
|
||||
|
2
setup.py
2
setup.py
@ -21,7 +21,7 @@ import setuptools
|
||||
if sys.version_info < (3,):
|
||||
raise Exception("Python 2 is not supported by ModelLink.")
|
||||
|
||||
__description__ = 'AscendSpeed for LLMs of Ascend'
|
||||
__description__ = 'ModelLink for LLMs of Ascend'
|
||||
__version__ = '0.0.1'
|
||||
__author__ = 'Ascend'
|
||||
__long_description__ = 'ModelLink for LLMs of Ascend'
|
||||
|
@ -1,5 +1,4 @@
|
||||
#!/bin/bash
|
||||
export TOKENIZERS_PARALLELISM=false
|
||||
export CUDA_DEVICE_MAX_CONNECTIONS=1
|
||||
|
||||
MASTER_ADDR=localhost
|
||||
|
@ -60,7 +60,7 @@ def main():
|
||||
parser.add_argument('--loader', type=str, default='megatron',
|
||||
help='Module name to load checkpoint, should be on python path')
|
||||
parser.add_argument('--saver', type=str, default='megatron',
|
||||
help='Module name to save checkpoint, shdoul be on python path')
|
||||
help='Module name to save checkpoint, should be on python path')
|
||||
parser.add_argument('--load-dir', type=str, required=True,
|
||||
help='Directory to load model checkpoint from')
|
||||
parser.add_argument('--lora-dir', type=str,
|
||||
|
@ -1,81 +0,0 @@
|
||||
# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the Apache license found in the
|
||||
# LICENSE file in the root directory of this source tree.
|
||||
|
||||
import os
|
||||
import types
|
||||
import torch
|
||||
|
||||
from megatron import get_retro_args
|
||||
from megatron.tokenizer.tokenizer import (
|
||||
_BertWordPieceTokenizer,
|
||||
_GPT2BPETokenizer,
|
||||
_GPTSentencePieceTokenizer,
|
||||
)
|
||||
|
||||
|
||||
def get_args_path(workdir):
|
||||
'''Argument copy stored within retro workdir.'''
|
||||
return os.path.join(workdir, "args.json")
|
||||
|
||||
|
||||
def get_num_chunks_per_sample():
|
||||
'''Compute seq_length // chunk_length.'''
|
||||
args = get_retro_args()
|
||||
sample_length = args.retro_gpt_seq_length
|
||||
chunk_length = args.retro_gpt_chunk_length
|
||||
if sample_length % chunk_length != 0:
|
||||
raise ValueError('chunk_length should be divisible by sample_length.')
|
||||
return sample_length // chunk_length
|
||||
|
||||
|
||||
def get_gpt_tokenizer():
|
||||
'''GPT (BPE) tokenizer.'''
|
||||
args = get_retro_args()
|
||||
tokenizer_type = args.retro_gpt_tokenizer_type
|
||||
if tokenizer_type == "GPT2BPETokenizer":
|
||||
if not args.retro_gpt_vocab_file or not args.retro_gpt_merge_file:
|
||||
raise ValueError('retro_gpt_vocab_file and retro_gpt_merge_file should not be none.')
|
||||
return _GPT2BPETokenizer(
|
||||
vocab_file=args.retro_gpt_vocab_file,
|
||||
merge_file=args.retro_gpt_merge_file,
|
||||
)
|
||||
elif tokenizer_type == 'GPTSentencePieceTokenizer':
|
||||
if args.retro_gpt_tokenizer_model is None:
|
||||
raise ValueError('retro_gpt_tokenizer_model is None.')
|
||||
return _GPTSentencePieceTokenizer(args.retro_gpt_tokenizer_model)
|
||||
else:
|
||||
raise Exception("unrecognized gpt tokenizer, '%s'." % tokenizer_type)
|
||||
|
||||
|
||||
def get_bert_tokenizer():
|
||||
'''Bert (Wordpiece) tokenizer.'''
|
||||
args = get_retro_args()
|
||||
lower_case = {
|
||||
"BertWordPieceLowerCase" : True,
|
||||
"BertWordPieceCase" : False,
|
||||
}[args.retro_bert_tokenizer_type]
|
||||
return _BertWordPieceTokenizer(
|
||||
vocab_file=args.retro_bert_vocab_file,
|
||||
lower_case=lower_case,
|
||||
)
|
||||
|
||||
|
||||
class GPTToTextDataset(torch.utils.data.Dataset):
|
||||
'''Dataset to convert GPT tokens to text.'''
|
||||
|
||||
def __init__(self, gpt_dataset):
|
||||
|
||||
super().__init__()
|
||||
|
||||
self.gpt_dataset = gpt_dataset
|
||||
self.gpt_tokenizer = get_gpt_tokenizer()
|
||||
|
||||
def __len__(self):
|
||||
return len(self.gpt_dataset)
|
||||
|
||||
def __getitem__(self, idx):
|
||||
gpt_token_ids = self.gpt_dataset[idx]["text"].tolist()
|
||||
text = self.gpt_tokenizer.detokenize(gpt_token_ids)
|
||||
return {"text": text}
|
Loading…
Reference in New Issue
Block a user