!1373 修改typos和issue

Merge pull request !1373 from fengliangjun/master
This commit is contained in:
fengliangjun 2024-06-24 13:00:04 +00:00 committed by i-robot
parent aa3013f9b7
commit f80514ad86
61 changed files with 12 additions and 146 deletions

3
OWNERS
View File

@ -7,6 +7,7 @@ reviewers:
- guhangsong
- leizhenzhen
- liuyanghan
- jzh
- guo-xinjie-1
- wucong0509
- dhrhank
@ -15,7 +16,7 @@ reviewers:
- sunjunjie1587
- dingzicha
- zhangjianxiang4
- guozhihua
- guozhihua9814
- huangyiming123
- zhangshengdong
- xiong-liangcheng_admin

View File

@ -2,7 +2,6 @@
# See README, please remember to source the set_env.sh file in CLI, or here
# source /path/to/your/ascend-toolkit/set_env.sh
export TOKENIZERS_PARALLELISM=false
export CUDA_DEVICE_MAX_CONNECTIONS=1
CKPT_LOAD_DIR="your checkpoint load dir"
TOKENIZER_PATH="your tokenizer path"

View File

@ -3,7 +3,6 @@
# See README, please remember to source the set_env.sh file in CLI, or here
# source /path/to/your/ascend-toolkit/set_env.sh
export CUDA_DEVICE_MAX_CONNECTIONS=1
export NPU_ASD_ENABLE=0
CKPT_SAVE_DIR="your checkpoint save dir"
DATA_PATH="your training data dir"

View File

@ -2,7 +2,6 @@
# See README, please remember to source the set_env.sh file in CLI, or here
# source /path/to/your/ascend-toolkit/set_env.sh
export TOKENIZERS_PARALLELISM=false
export CUDA_DEVICE_MAX_CONNECTIONS=1
# please fill these path configurations

View File

@ -2,7 +2,6 @@
# See README, please remember to source the set_env.sh file in CLI, or here
# source /path/to/your/ascend-toolkit/set_env.sh
export TOKENIZERS_PARALLELISM=false
export CUDA_DEVICE_MAX_CONNECTIONS=1
# please fill these path configurations

View File

@ -3,7 +3,6 @@
# See README, please remember to source the set_env.sh file in CLI, or here
# source /path/to/your/ascend-toolkit/set_env.sh
export CUDA_DEVICE_MAX_CONNECTIONS=1
export NPU_ASD_ENABLE=0
# please fill these path configurations
CKPT_SAVE_DIR="your checkpoint save dir"

View File

@ -3,7 +3,6 @@
# See README, please remember to source the set_env.sh file in CLI, or here
# source /path/to/your/ascend-toolkit/set_env.sh
export CUDA_DEVICE_MAX_CONNECTIONS=1
export NPU_ASD_ENABLE=0
# please fill these path configurations
CKPT_SAVE_DIR="your checkpoint save dir"

View File

@ -1,7 +1,6 @@
#!/bin/bash
export CUDA_DEVICE_MAX_CONNECTIONS=1
export NPU_ASD_ENABLE=0
GPUS_PER_NODE=8
MASTER_ADDR=localhost

View File

@ -1,7 +1,6 @@
#!/bin/bash
export CUDA_DEVICE_MAX_CONNECTIONS=1
export NPU_ASD_ENABLE=0
GPUS_PER_NODE=8
MASTER_ADDR=localhost

View File

@ -1,7 +1,6 @@
#!/bin/bash
export CUDA_DEVICE_MAX_CONNECTIONS=1
export NPU_ASD_ENABLE=0
GPUS_PER_NODE=8
MASTER_ADDR=localhost

View File

@ -1,7 +1,6 @@
#!/bin/bash
export CUDA_DEVICE_MAX_CONNECTIONS=1
export NPU_ASD_ENABLE=0
GPUS_PER_NODE=8
MASTER_ADDR=localhost

View File

@ -2,7 +2,6 @@
# The number of parameters is not aligned
export CUDA_DEVICE_MAX_CONNECTIONS=1
export TOKENIZERS_PARALLELISM=false
export HCCL_CONNECT_TIMEOUT=1200

View File

@ -2,7 +2,6 @@
# The number of parameters is not aligned
export CUDA_DEVICE_MAX_CONNECTIONS=1
export TOKENIZERS_PARALLELISM=false
export HCCL_CONNECT_TIMEOUT=1200

View File

@ -2,7 +2,6 @@
# The number of parameters is not aligned
export CUDA_DEVICE_MAX_CONNECTIONS=1
export TOKENIZERS_PARALLELISM=false
export HCCL_CONNECT_TIMEOUT=1200

View File

@ -1,7 +1,6 @@
#!/bin/bash
export CUDA_DEVICE_MAX_CONNECTIONS=1
export HCCL_CONNECT_TIMEOUT=1200
export NPU_ASD_ENABLE=0
GPUS_PER_NODE=8
MASTER_ADDR=localhost

View File

@ -1,7 +1,6 @@
#!/bin/bash
export CUDA_DEVICE_MAX_CONNECTIONS=1
export HCCL_CONNECT_TIMEOUT=1200
export NPU_ASD_ENABLE=0
GPUS_PER_NODE=8
MASTER_ADDR=localhost

View File

@ -1,6 +1,5 @@
#!/bin/bash
export CUDA_DEVICE_MAX_CONNECTIONS=1
export NPU_ASD_ENABLE=0
NPUS_PER_NODE=8
MASTER_ADDR=localhost

View File

@ -1,6 +1,5 @@
#!/bin/bash
export CUDA_DEVICE_MAX_CONNECTIONS=1
export NPU_ASD_ENABLE=0
NPUS_PER_NODE=8
MASTER_ADDR=localhost

View File

@ -1,7 +1,6 @@
#!/bin/bash
export CUDA_DEVICE_MAX_CONNECTIONS=1
export NPU_ASD_ENABLE=0
GPUS_PER_NODE=8
MASTER_ADDR=localhost

View File

@ -1,7 +1,6 @@
#!/bin/bash
export CUDA_DEVICE_MAX_CONNECTIONS=1
export NPU_ASD_ENABLE=0
GPUS_PER_NODE=8
MASTER_ADDR=localhost

View File

@ -1,7 +1,6 @@
#!/bin/bash
export CUDA_DEVICE_MAX_CONNECTIONS=1
export NPU_ASD_ENABLE=0
GPUS_PER_NODE=8
MASTER_ADDR=localhost

View File

@ -5,7 +5,6 @@ export LD_LIBRARY_PATH=/usr/local/lib:/usr/local/lib:/root/miniconda3/lib:$LD_LI
export HCCL_CONNECT_TIMEOUT=1200
export COMBINED_ENABLE=1
export CUDA_DEVICE_MAX_CONNECTIONS=1
export NPU_ASD_ENABLE=0
# please fill these path configurations
CHECKPOINT="your model path"

View File

@ -1,7 +1,6 @@
#!/bin/bash
export CUDA_DEVICE_MAX_CONNECTIONS=1
export NPU_ASD_ENABLE=0
GPUS_PER_NODE=8
MASTER_ADDR=localhost

View File

@ -1,7 +1,6 @@
#!/bin/bash
export CUDA_DEVICE_MAX_CONNECTIONS=1
export NPU_ASD_ENABLE=0
GPUS_PER_NODE=8
MASTER_ADDR=localhost

View File

@ -1,7 +1,6 @@
#!/bin/bash
export CUDA_DEVICE_MAX_CONNECTIONS=1
export NPU_ASD_ENABLE=0
GPUS_PER_NODE=8
MASTER_ADDR=localhost

View File

@ -5,7 +5,6 @@ export COMBINED_ENABLE=1
export AZUREML_EXPERIMENT_ID=0
export CUDA_DEVICE_MAX_CONNECTIONS=1
export NPU_ASD_ENABLE=0
GPUS_PER_NODE=8
MASTER_ADDR=localhost

View File

@ -1,7 +1,6 @@
#!/bin/bash
export CUDA_DEVICE_MAX_CONNECTIONS=1
export NPU_ASD_ENABLE=0
GPUS_PER_NODE=8
MASTER_ADDR=localhost

View File

@ -1,7 +1,6 @@
#!/bin/bash
export CUDA_DEVICE_MAX_CONNECTIONS=1
export NPU_ASD_ENABLE=0
GPUS_PER_NODE=8
MASTER_ADDR=localhost

View File

@ -1,6 +1,5 @@
#!/bin/bash
export CUDA_DEVICE_MAX_CONNECTIONS=1
export NPU_ASD_ENABLE=0
GPUS_PER_NODE=8
MASTER_ADDR=localhost

View File

@ -1,7 +1,6 @@
#!/bin/bash
export CUDA_DEVICE_MAX_CONNECTIONS=1
export NPU_ASD_ENABLE=0
NPUS_PER_NODE=8
MASTER_ADDR=localhost

View File

@ -1,5 +1,4 @@
#!/bin/bash
export NPU_ASD_ENABLE=0
export CUDA_DEVICE_MAX_CONNECTIONS=1
GPUS_PER_NODE=8

View File

@ -1,7 +1,6 @@
#!/bin/bash
export CUDA_DEVICE_MAX_CONNECTIONS=1
export NPU_ASD_ENABLE=0
GPUS_PER_NODE=8
MASTER_ADDR=localhost

View File

@ -5,7 +5,6 @@ export COMBINED_ENABLE=1
export AZUREML_EXPERIMENT_ID=0
export CUDA_DEVICE_MAX_CONNECTIONS=1
export NPU_ASD_ENABLE=0
GPUS_PER_NODE=8
MASTER_ADDR=localhost

View File

@ -5,7 +5,6 @@ export COMBINED_ENABLE=1
export AZUREML_EXPERIMENT_ID=0
export CUDA_DEVICE_MAX_CONNECTIONS=1
export NPU_ASD_ENABLE=0
GPUS_PER_NODE=8
MASTER_ADDR=localhost

View File

@ -1,5 +1,4 @@
#!/bin/bash
export NPU_ASD_ENABLE=0
export CUDA_DEVICE_MAX_CONNECTIONS=1
GPUS_PER_NODE=8

View File

@ -1,7 +1,6 @@
#!/bin/bash
export CUDA_DEVICE_MAX_CONNECTIONS=1
export NPU_ASD_ENABLE=0
GPUS_PER_NODE=8
MASTER_ADDR=localhost

View File

@ -1,7 +1,6 @@
#!/bin/bash
export CUDA_DEVICE_MAX_CONNECTIONS=1
export NPU_ASD_ENABLE=0
GPUS_PER_NODE=8
MASTER_ADDR=localhost

View File

@ -1,7 +1,6 @@
#!/bin/bash
export CUDA_DEVICE_MAX_CONNECTIONS=1
export NPU_ASD_ENABLE=0
GPUS_PER_NODE=8
MASTER_ADDR=localhost

View File

@ -4,7 +4,6 @@
export HCCL_CONNECT_TIMEOUT=1200
export COMBINED_ENABLE=1
export CUDA_DEVICE_MAX_CONNECTIONS=1
export TOKENIZERS_PARALLELISM=false
# please fill these path configurations
CHECKPOINT="your model ckpt path"

View File

@ -1,6 +1,5 @@
#!/bin/bash
export CUDA_DEVICE_MAX_CONNECTIONS=1
export NPU_ASD_ENABLE=0
GPUS_PER_NODE=8
MASTER_ADDR="your master node IP"

View File

@ -98,7 +98,7 @@
# 修改 ascend-toolkit 路径
source /usr/local/Ascend/ascend-toolkit/set_env.sh
# HF 转 tp1-pp8-ep2
# HF 转 tp8-pp4-ep1
python tools/checkpoint/convert_ckpt.py \
--model-type GPT \
--loader mixtral_hf \
@ -137,7 +137,7 @@
# 修改 ascend-toolkit 路径
source /usr/local/Ascend/ascend-toolkit/set_env.sh
# tp1-pp8-ep2 转 HF
# tp8-pp4-ep1 转 HF
python tools/checkpoint/convert_ckpt.py \
--model-type GPT \
--loader mixtral_mg \
@ -261,7 +261,7 @@ Mixtral-8x7B 在四机32卡上(tp8 pp4) **昇腾芯片** 和 **参考芯片**
source /usr/local/Ascend/ascend-toolkit/set_env.sh
# 修改模型权重路径和词表路径
CHECKPOINT="./model_weights/Mixtral-8x7B-v0.1-tp1-pp8-ep1/"
CHECKPOINT="./model_weights/Mixtral-8x7B-v0.1-tp8-pp1-ep1/"
TOKENIZER_MODEL="./model_from_hf/Mixtral-8x7B/"
# 根据实际加载的模型权重修改并行配置

View File

@ -99,7 +99,7 @@ Recommended hardware configuration for inference:
# Modify the ascend-toolkit path
source /usr/local/Ascend/ascend-toolkit/set_env.sh
# HF to tp1-pp8-ep2
# HF to tp8-pp4-ep1
python tools/checkpoint/convert_ckpt.py \
--model-type GPT \
--loader mixtral_hf \
@ -113,13 +113,13 @@ Recommended hardware configuration for inference:
```
Any Megatron weights with parallel slicing strategy --> Any Megatron weights with parallel slicing strategy
***(This scenario is generally used to reconfigure the sliced model weights, such as training on a dual-node 16-card EP2-PP8 strategy, and then wanting to infer on a single-node 8-card TP8)***
***(This scenario is generally used to reconfigure the sliced model weights, such as training on a four-node 32-card TP8-PP4 strategy, and then wanting to infer on a single-node 8-card TP8)***
```bash
# Modify the ascend-toolkit path
source /usr/local/Ascend/ascend-toolkit/set_env.sh
# tp1-pp8-ep2 to tp1-pp8-ep1
# tp8-pp4-ep1 to tp8-pp1-ep1
python tools/checkpoint/convert_ckpt.py \
--model-type GPT \
--loader mixtral_mg \
@ -138,7 +138,7 @@ Recommended hardware configuration for inference:
# Modify the ascend-toolkit path
source /usr/local/Ascend/ascend-toolkit/set_env.sh
# tp1-pp8-ep2 to HF
# tp8-pp4-ep1 to HF
python tools/checkpoint/convert_ckpt.py \
--model-type GPT \
--loader mixtral_mg \

View File

@ -4,7 +4,6 @@
export HCCL_CONNECT_TIMEOUT=1200
export COMBINED_ENABLE=1
export CUDA_DEVICE_MAX_CONNECTIONS=1
export TOKENIZERS_PARALLELISM=false
MASTER_ADDR=localhost
MASTER_PORT=6000

View File

@ -4,7 +4,6 @@
export HCCL_CONNECT_TIMEOUT=1200
export COMBINED_ENABLE=1
export CUDA_DEVICE_MAX_CONNECTIONS=1
export TOKENIZERS_PARALLELISM=false
# please fill these path configurations
CHECKPOINT="your model ckpt path"

View File

@ -1,7 +1,6 @@
#!/bin/bash
export CUDA_DEVICE_MAX_CONNECTIONS=1
export NPU_ASD_ENABLE=0
NPUS_PER_NODE=8
MASTER_ADDR=localhost

View File

@ -1,7 +1,6 @@
#!/bin/bash
export CUDA_DEVICE_MAX_CONNECTIONS=1
export NPU_ASD_ENABLE=0
NPUS_PER_NODE=8
MASTER_ADDR=localhost

View File

@ -1,7 +1,6 @@
#!/bin/bash
export CUDA_DEVICE_MAX_CONNECTIONS=1
export NPU_ASD_ENABLE=0
NPUS_PER_NODE=8
MASTER_ADDR=localhost

View File

@ -1,7 +1,6 @@
#!/bin/bash
export CUDA_DEVICE_MAX_CONNECTIONS=1
export NPU_ASD_ENABLE=0
NPUS_PER_NODE=8
MASTER_ADDR=localhost

View File

@ -1,7 +1,6 @@
#!/bin/bash
export CUDA_DEVICE_MAX_CONNECTIONS=1
export NPU_ASD_ENABLE=0
NPUS_PER_NODE=8
MASTER_ADDR=localhost

View File

@ -1,7 +1,6 @@
#!/bin/bash
export CUDA_DEVICE_MAX_CONNECTIONS=1
export NPU_ASD_ENABLE=0
NPUS_PER_NODE=8
MASTER_ADDR=localhost

View File

@ -2,7 +2,6 @@
export CUDA_DEVICE_MAX_CONNECTIONS=1
export HCCL_CONNECT_TIMEOUT=1800
export NPU_ASD_ENABLE=0
MASTER_ADDR=localhost
NPUS_PER_NODE=8

View File

@ -1,7 +1,7 @@
#!/bin/bash
export CUDA_DEVICE_MAX_CONNECTIONS=1
export NPU_ASD_ENABLE=0
NPUS_PER_NODE=8
# Change for multinode config

View File

@ -2,7 +2,6 @@
export CUDA_DEVICE_MAX_CONNECTIONS=1
export HCCL_CONNECT_TIMEOUT=1800
export NPU_ASD_ENABLE=0
MASTER_ADDR=localhost
NPUS_PER_NODE=8

View File

@ -1,7 +1,6 @@
#!/bin/bash
export CUDA_DEVICE_MAX_CONNECTIONS=1
export NPU_ASD_ENABLE=0
NPUS_PER_NODE=8
MASTER_ADDR=localhost

View File

@ -2,7 +2,6 @@
export CUDA_DEVICE_MAX_CONNECTIONS=1
export HCCL_CONNECT_TIMEOUT=1800
export NPU_ASD_ENABLE=0
MASTER_ADDR=localhost
NPUS_PER_NODE=8

View File

@ -2,7 +2,6 @@
export CUDA_DEVICE_MAX_CONNECTIONS=1
export HCCL_CONNECT_TIMEOUT=1800
export NPU_ASD_ENABLE=0
MASTER_ADDR=localhost
NPUS_PER_NODE=8

View File

@ -1,7 +1,6 @@
#!/bin/bash
export CUDA_DEVICE_MAX_CONNECTIONS=1
export NPU_ASD_ENABLE=0
NPUS_PER_NODE=8
MASTER_ADDR=localhost

View File

@ -21,7 +21,7 @@ import setuptools
if sys.version_info < (3,):
raise Exception("Python 2 is not supported by ModelLink.")
__description__ = 'AscendSpeed for LLMs of Ascend'
__description__ = 'ModelLink for LLMs of Ascend'
__version__ = '0.0.1'
__author__ = 'Ascend'
__long_description__ = 'ModelLink for LLMs of Ascend'

View File

@ -1,5 +1,4 @@
#!/bin/bash
export TOKENIZERS_PARALLELISM=false
export CUDA_DEVICE_MAX_CONNECTIONS=1
MASTER_ADDR=localhost

View File

@ -60,7 +60,7 @@ def main():
parser.add_argument('--loader', type=str, default='megatron',
help='Module name to load checkpoint, should be on python path')
parser.add_argument('--saver', type=str, default='megatron',
help='Module name to save checkpoint, shdoul be on python path')
help='Module name to save checkpoint, should be on python path')
parser.add_argument('--load-dir', type=str, required=True,
help='Directory to load model checkpoint from')
parser.add_argument('--lora-dir', type=str,

View File

@ -1,81 +0,0 @@
# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved.
#
# This source code is licensed under the Apache license found in the
# LICENSE file in the root directory of this source tree.
import os
import types
import torch
from megatron import get_retro_args
from megatron.tokenizer.tokenizer import (
_BertWordPieceTokenizer,
_GPT2BPETokenizer,
_GPTSentencePieceTokenizer,
)
def get_args_path(workdir):
'''Argument copy stored within retro workdir.'''
return os.path.join(workdir, "args.json")
def get_num_chunks_per_sample():
'''Compute seq_length // chunk_length.'''
args = get_retro_args()
sample_length = args.retro_gpt_seq_length
chunk_length = args.retro_gpt_chunk_length
if sample_length % chunk_length != 0:
raise ValueError('chunk_length should be divisible by sample_length.')
return sample_length // chunk_length
def get_gpt_tokenizer():
'''GPT (BPE) tokenizer.'''
args = get_retro_args()
tokenizer_type = args.retro_gpt_tokenizer_type
if tokenizer_type == "GPT2BPETokenizer":
if not args.retro_gpt_vocab_file or not args.retro_gpt_merge_file:
raise ValueError('retro_gpt_vocab_file and retro_gpt_merge_file should not be none.')
return _GPT2BPETokenizer(
vocab_file=args.retro_gpt_vocab_file,
merge_file=args.retro_gpt_merge_file,
)
elif tokenizer_type == 'GPTSentencePieceTokenizer':
if args.retro_gpt_tokenizer_model is None:
raise ValueError('retro_gpt_tokenizer_model is None.')
return _GPTSentencePieceTokenizer(args.retro_gpt_tokenizer_model)
else:
raise Exception("unrecognized gpt tokenizer, '%s'." % tokenizer_type)
def get_bert_tokenizer():
'''Bert (Wordpiece) tokenizer.'''
args = get_retro_args()
lower_case = {
"BertWordPieceLowerCase" : True,
"BertWordPieceCase" : False,
}[args.retro_bert_tokenizer_type]
return _BertWordPieceTokenizer(
vocab_file=args.retro_bert_vocab_file,
lower_case=lower_case,
)
class GPTToTextDataset(torch.utils.data.Dataset):
'''Dataset to convert GPT tokens to text.'''
def __init__(self, gpt_dataset):
super().__init__()
self.gpt_dataset = gpt_dataset
self.gpt_tokenizer = get_gpt_tokenizer()
def __len__(self):
return len(self.gpt_dataset)
def __getitem__(self, idx):
gpt_token_ids = self.gpt_dataset[idx]["text"].tolist()
text = self.gpt_tokenizer.detokenize(gpt_token_ids)
return {"text": text}