Solve the GPTTokenizer dependency problem

2024-12-01 11:48:09 +08:00 · 2020-04-12 21:13:33 +08:00 · 2020-04-12 21:13:33 +08:00 · 2dee67129a
commit 2dee67129a
parent ab047a2aa3
2 changed files with 2 additions and 22 deletions
--- a/fastNLP/modules/tokenizer/gpt2_tokenizer.py
+++ b/fastNLP/modules/tokenizer/gpt2_tokenizer.py
@ -71,24 +71,6 @@ VOCAB_FILES_NAMES = {
 }


-PRETRAINED_VOCAB_FILES_MAP = {
-    "vocab_file": {
-        "gpt2": "https://s3.amazonaws.com/models.huggingface.co/bert/gpt2-vocab.json",
-        "gpt2-medium": "https://s3.amazonaws.com/models.huggingface.co/bert/gpt2-medium-vocab.json",
-        "gpt2-large": "https://s3.amazonaws.com/models.huggingface.co/bert/gpt2-large-vocab.json",
-        "gpt2-xl": "https://s3.amazonaws.com/models.huggingface.co/bert/gpt2-xl-vocab.json",
-        "distilgpt2": "https://s3.amazonaws.com/models.huggingface.co/bert/distilgpt2-vocab.json",
-    },
-    "merges_file": {
-        "gpt2": "https://s3.amazonaws.com/models.huggingface.co/bert/gpt2-merges.txt",
-        "gpt2-medium": "https://s3.amazonaws.com/models.huggingface.co/bert/gpt2-medium-merges.txt",
-        "gpt2-large": "https://s3.amazonaws.com/models.huggingface.co/bert/gpt2-large-merges.txt",
-        "gpt2-xl": "https://s3.amazonaws.com/models.huggingface.co/bert/gpt2-xl-merges.txt",
-        "distilgpt2": "https://s3.amazonaws.com/models.huggingface.co/bert/distilgpt2-merges.txt",
-    },
-}
-
-
 PRETRAINED_POSITIONAL_EMBEDDINGS_SIZES = {
    "en-small": 1024,
    'en': 1024,
@ -128,9 +110,6 @@ class GPT2Tokenizer:
          the spaces at the beginning of a string: `tokenizer.decode(tokenizer.encode(" Hello")) = "Hello"`
    """

-    vocab_files_names = VOCAB_FILES_NAMES
-    pretrained_vocab_files_map = PRETRAINED_VOCAB_FILES_MAP
-
    SPECIAL_TOKENS_ATTRIBUTES = [
        "bos_token",
        "eos_token",
--- a/requirements.txt
+++ b/requirements.txt
@ -6,3 +6,4 @@ prettytable>=0.7.2
 requests
 spacy
 prettytable>=0.7.2
+regex!=2019.12.17