mirror of
https://gitee.com/fastnlp/fastNLP.git
synced 2024-12-01 03:37:55 +08:00
Solve the GPTTokenizer dependency problem
This commit is contained in:
parent
ab047a2aa3
commit
2dee67129a
@ -71,24 +71,6 @@ VOCAB_FILES_NAMES = {
|
||||
}
|
||||
|
||||
|
||||
PRETRAINED_VOCAB_FILES_MAP = {
|
||||
"vocab_file": {
|
||||
"gpt2": "https://s3.amazonaws.com/models.huggingface.co/bert/gpt2-vocab.json",
|
||||
"gpt2-medium": "https://s3.amazonaws.com/models.huggingface.co/bert/gpt2-medium-vocab.json",
|
||||
"gpt2-large": "https://s3.amazonaws.com/models.huggingface.co/bert/gpt2-large-vocab.json",
|
||||
"gpt2-xl": "https://s3.amazonaws.com/models.huggingface.co/bert/gpt2-xl-vocab.json",
|
||||
"distilgpt2": "https://s3.amazonaws.com/models.huggingface.co/bert/distilgpt2-vocab.json",
|
||||
},
|
||||
"merges_file": {
|
||||
"gpt2": "https://s3.amazonaws.com/models.huggingface.co/bert/gpt2-merges.txt",
|
||||
"gpt2-medium": "https://s3.amazonaws.com/models.huggingface.co/bert/gpt2-medium-merges.txt",
|
||||
"gpt2-large": "https://s3.amazonaws.com/models.huggingface.co/bert/gpt2-large-merges.txt",
|
||||
"gpt2-xl": "https://s3.amazonaws.com/models.huggingface.co/bert/gpt2-xl-merges.txt",
|
||||
"distilgpt2": "https://s3.amazonaws.com/models.huggingface.co/bert/distilgpt2-merges.txt",
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
PRETRAINED_POSITIONAL_EMBEDDINGS_SIZES = {
|
||||
"en-small": 1024,
|
||||
'en': 1024,
|
||||
@ -128,9 +110,6 @@ class GPT2Tokenizer:
|
||||
the spaces at the beginning of a string: `tokenizer.decode(tokenizer.encode(" Hello")) = "Hello"`
|
||||
"""
|
||||
|
||||
vocab_files_names = VOCAB_FILES_NAMES
|
||||
pretrained_vocab_files_map = PRETRAINED_VOCAB_FILES_MAP
|
||||
|
||||
SPECIAL_TOKENS_ATTRIBUTES = [
|
||||
"bos_token",
|
||||
"eos_token",
|
||||
|
@ -5,4 +5,5 @@ nltk>=3.4.1
|
||||
prettytable>=0.7.2
|
||||
requests
|
||||
spacy
|
||||
prettytable>=0.7.2
|
||||
prettytable>=0.7.2
|
||||
regex!=2019.12.17
|
Loading…
Reference in New Issue
Block a user