Merge branch 'dev' of https://gitee.com/fastnlp/fastNLP into dev

2024-12-04 13:17:51 +08:00 · 2020-12-14 15:34:50 +08:00 · 2020-12-14 15:34:50 +08:00 · bf9d834821
commit bf9d834821
parent 9fdcafff6a 057fa63d7e
18 changed files with 328 additions and 630 deletions
--- a/.Jenkinsfile
+++ b/.Jenkinsfile
@ -27,7 +27,6 @@ pipeline {
                }
                stage('Package Testing') {
                    steps {
-                        sh 'python -m spacy download en'
                        sh 'pip install fitlog'
                        sh 'pytest ./tests --html=test_results.html --self-contained-html'
                    }
--- a/.travis.yml
+++ b/.travis.yml
@ -13,7 +13,7 @@ install:
  - pip install pytest-cov
 # command to run tests
 script:
-  - python -m spacy download en
+#  - python -m spacy download en
  - pytest --cov=fastNLP tests/

 after_success:
--- a/docs/source/_static/notebooks/文本分类.ipynb
+++ b/docs/source/_static/notebooks/文本分类.ipynb
@ -46,10 +46,8 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 1,
-   "metadata": {
-    "collapsed": true
-   },
+   "execution_count": null,
+   "metadata": {},
   "outputs": [],
   "source": [
    "from fastNLP.io import ChnSentiCorpLoader\n",
@ -68,22 +66,9 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": null,
   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "In total 3 datasets:\n",
-      "\tdev has 1200 instances.\n",
-      "\ttrain has 9600 instances.\n",
-      "\ttest has 1200 instances.\n",
-      "In total 0 vocabs:\n",
-      "\n"
-     ]
-    }
-   ],
+   "outputs": [],
   "source": [
    "print(data_bundle)"
   ]
@ -97,20 +82,9 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": null,
   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "DataSet({'raw_chars': 选择珠江花园的原因就是方便，有电动扶梯直接到达海边，周围餐馆、食廊、商场、超市、摊位一应俱全。酒店装修一般，但还算整洁。 泳池在大堂的屋顶，因此很小，不过女儿倒是喜欢。 包的早餐是西式的，还算丰富。 服务吗，一般 type=str,\n",
-      "'target': 1 type=str},\n",
-      "{'raw_chars': 15.4寸笔记本的键盘确实爽，基本跟台式机差不多了，蛮喜欢数字小键盘，输数字特方便，样子也很美观，做工也相当不错 type=str,\n",
-      "'target': 1 type=str})\n"
-     ]
-    }
-   ],
+   "outputs": [],
   "source": [
    "print(data_bundle.get_dataset('train')[:2])  # 查看Train集前两个sample"
   ]
@ -127,10 +101,8 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 3,
-   "metadata": {
-    "collapsed": true
-   },
+   "execution_count": null,
+   "metadata": {},
   "outputs": [],
   "source": [
    "from fastNLP.io import ChnSentiCorpPipe\n",
@ -141,24 +113,9 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": null,
   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "In total 3 datasets:\n",
-      "\tdev has 1200 instances.\n",
-      "\ttrain has 9600 instances.\n",
-      "\ttest has 1200 instances.\n",
-      "In total 2 vocabs:\n",
-      "\tchars has 4409 entries.\n",
-      "\ttarget has 2 entries.\n",
-      "\n"
-     ]
-    }
-   ],
+   "outputs": [],
   "source": [
    "print(data_bundle)  # 打印data_bundle，查看其变化"
   ]
@ -172,24 +129,9 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": null,
   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "DataSet({'raw_chars': 选择珠江花园的原因就是方便，有电动扶梯直接到达海边，周围餐馆、食廊、商场、超市、摊位一应俱全。酒店装修一般，但还算整洁。 泳池在大堂的屋顶，因此很小，不过女儿倒是喜欢。 包的早餐是西式的，还算丰富。 服务吗，一般 type=str,\n",
-      "'target': 1 type=int,\n",
-      "'chars': [338, 464, 1400, 784, 468, 739, 3, 289, 151, 21, 5, 88, 143, 2, 9, 81, 134, 2573, 766, 233, 196, 23, 536, 342, 297, 2, 405, 698, 132, 281, 74, 744, 1048, 74, 420, 387, 74, 412, 433, 74, 2021, 180, 8, 219, 1929, 213, 4, 34, 31, 96, 363, 8, 230, 2, 66, 18, 229, 331, 768, 4, 11, 1094, 479, 17, 35, 593, 3, 1126, 967, 2, 151, 245, 12, 44, 2, 6, 52, 260, 263, 635, 5, 152, 162, 4, 11, 336, 3, 154, 132, 5, 236, 443, 3, 2, 18, 229, 761, 700, 4, 11, 48, 59, 653, 2, 8, 230] type=list,\n",
-      "'seq_len': 106 type=int},\n",
-      "{'raw_chars': 15.4寸笔记本的键盘确实爽，基本跟台式机差不多了，蛮喜欢数字小键盘，输数字特方便，样子也很美观，做工也相当不错 type=str,\n",
-      "'target': 1 type=int,\n",
-      "'chars': [50, 133, 20, 135, 945, 520, 343, 24, 3, 301, 176, 350, 86, 785, 2, 456, 24, 461, 163, 443, 128, 109, 6, 47, 7, 2, 916, 152, 162, 524, 296, 44, 301, 176, 2, 1384, 524, 296, 259, 88, 143, 2, 92, 67, 26, 12, 277, 269, 2, 188, 223, 26, 228, 83, 6, 63] type=list,\n",
-      "'seq_len': 56 type=int})\n"
-     ]
-    }
-   ],
+   "outputs": [],
   "source": [
    "print(data_bundle.get_dataset('train')[:2])"
   ]
@ -203,17 +145,9 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": null,
   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Vocabulary(['选', '择', '珠', '江', '花']...)\n"
-     ]
-    }
-   ],
+   "outputs": [],
   "source": [
    "char_vocab = data_bundle.get_vocab('chars')\n",
    "print(char_vocab)"
@ -228,18 +162,9 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 7,
+   "execution_count": null,
   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "'选'的index是338\n",
-      "index:338对应的汉字是选\n"
-     ]
-    }
-   ],
+   "outputs": [],
   "source": [
    "index = char_vocab.to_index('选')\n",
    "print(\"'选'的index是{}\".format(index))  # 这个值与上面打印出来的第一个instance的chars的第一个index是一致的\n",
@ -256,17 +181,9 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 8,
+   "execution_count": null,
   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Found 4321 out of 4409 words in the pre-training embedding.\n"
-     ]
-    }
-   ],
+   "outputs": [],
   "source": [
    "from fastNLP.embeddings import StaticEmbedding\n",
    "\n",
@ -283,10 +200,8 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 9,
-   "metadata": {
-    "collapsed": true
-   },
+   "execution_count": null,
+   "metadata": {},
   "outputs": [],
   "source": [
    "from torch import nn\n",
@ -329,288 +244,9 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 10,
+   "execution_count": null,
   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "input fields after batch(if batch size is 2):\n",
-      "\ttarget: (1)type:torch.Tensor (2)dtype:torch.int64, (3)shape:torch.Size([2]) \n",
-      "\tchars: (1)type:torch.Tensor (2)dtype:torch.int64, (3)shape:torch.Size([2, 106]) \n",
-      "\tseq_len: (1)type:torch.Tensor (2)dtype:torch.int64, (3)shape:torch.Size([2]) \n",
-      "target fields after batch(if batch size is 2):\n",
-      "\ttarget: (1)type:torch.Tensor (2)dtype:torch.int64, (3)shape:torch.Size([2]) \n",
-      "\tseq_len: (1)type:torch.Tensor (2)dtype:torch.int64, (3)shape:torch.Size([2]) \n",
-      "\n",
-      "Evaluate data in 0.01 seconds!\n",
-      "training epochs started 2019-09-03-23-57-10\n"
-     ]
-    },
-    {
-     "data": {
-      "text/plain": [
-       "HBox(children=(IntProgress(value=0, layout=Layout(flex='2'), max=3000), HTML(value='')), layout=Layout(display…"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "text/plain": [
-       "HBox(children=(IntProgress(value=0, layout=Layout(flex='2'), max=38), HTML(value='')), layout=Layout(display='…"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Evaluate data in 0.43 seconds!\n",
-      "\r",
-      "Evaluation on dev at Epoch 1/10. Step:300/3000: \n",
-      "\r",
-      "AccuracyMetric: acc=0.81\n",
-      "\n"
-     ]
-    },
-    {
-     "data": {
-      "text/plain": [
-       "HBox(children=(IntProgress(value=0, layout=Layout(flex='2'), max=38), HTML(value='')), layout=Layout(display='…"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Evaluate data in 0.44 seconds!\n",
-      "\r",
-      "Evaluation on dev at Epoch 2/10. Step:600/3000: \n",
-      "\r",
-      "AccuracyMetric: acc=0.8675\n",
-      "\n"
-     ]
-    },
-    {
-     "data": {
-      "text/plain": [
-       "HBox(children=(IntProgress(value=0, layout=Layout(flex='2'), max=38), HTML(value='')), layout=Layout(display='…"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Evaluate data in 0.44 seconds!\n",
-      "\r",
-      "Evaluation on dev at Epoch 3/10. Step:900/3000: \n",
-      "\r",
-      "AccuracyMetric: acc=0.878333\n",
-      "\n"
-     ]
-    },
-    {
-     "data": {
-      "text/plain": [
-       "HBox(children=(IntProgress(value=0, layout=Layout(flex='2'), max=38), HTML(value='')), layout=Layout(display='…"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Evaluate data in 0.43 seconds!\n",
-      "\r",
-      "Evaluation on dev at Epoch 4/10. Step:1200/3000: \n",
-      "\r",
-      "AccuracyMetric: acc=0.873333\n",
-      "\n"
-     ]
-    },
-    {
-     "data": {
-      "text/plain": [
-       "HBox(children=(IntProgress(value=0, layout=Layout(flex='2'), max=38), HTML(value='')), layout=Layout(display='…"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Evaluate data in 0.44 seconds!\n",
-      "\r",
-      "Evaluation on dev at Epoch 5/10. Step:1500/3000: \n",
-      "\r",
-      "AccuracyMetric: acc=0.878333\n",
-      "\n"
-     ]
-    },
-    {
-     "data": {
-      "text/plain": [
-       "HBox(children=(IntProgress(value=0, layout=Layout(flex='2'), max=38), HTML(value='')), layout=Layout(display='…"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Evaluate data in 0.42 seconds!\n",
-      "\r",
-      "Evaluation on dev at Epoch 6/10. Step:1800/3000: \n",
-      "\r",
-      "AccuracyMetric: acc=0.895833\n",
-      "\n"
-     ]
-    },
-    {
-     "data": {
-      "text/plain": [
-       "HBox(children=(IntProgress(value=0, layout=Layout(flex='2'), max=38), HTML(value='')), layout=Layout(display='…"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Evaluate data in 0.44 seconds!\n",
-      "\r",
-      "Evaluation on dev at Epoch 7/10. Step:2100/3000: \n",
-      "\r",
-      "AccuracyMetric: acc=0.8975\n",
-      "\n"
-     ]
-    },
-    {
-     "data": {
-      "text/plain": [
-       "HBox(children=(IntProgress(value=0, layout=Layout(flex='2'), max=38), HTML(value='')), layout=Layout(display='…"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Evaluate data in 0.43 seconds!\n",
-      "\r",
-      "Evaluation on dev at Epoch 8/10. Step:2400/3000: \n",
-      "\r",
-      "AccuracyMetric: acc=0.894167\n",
-      "\n"
-     ]
-    },
-    {
-     "data": {
-      "text/plain": [
-       "HBox(children=(IntProgress(value=0, layout=Layout(flex='2'), max=38), HTML(value='')), layout=Layout(display='…"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Evaluate data in 0.48 seconds!\n",
-      "\r",
-      "Evaluation on dev at Epoch 9/10. Step:2700/3000: \n",
-      "\r",
-      "AccuracyMetric: acc=0.8875\n",
-      "\n"
-     ]
-    },
-    {
-     "data": {
-      "text/plain": [
-       "HBox(children=(IntProgress(value=0, layout=Layout(flex='2'), max=38), HTML(value='')), layout=Layout(display='…"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Evaluate data in 0.43 seconds!\n",
-      "\r",
-      "Evaluation on dev at Epoch 10/10. Step:3000/3000: \n",
-      "\r",
-      "AccuracyMetric: acc=0.895833\n",
-      "\n",
-      "\r\n",
-      "In Epoch:7/Step:2100, got best dev performance:\n",
-      "AccuracyMetric: acc=0.8975\n",
-      "Reloaded the best model.\n"
-     ]
-    },
-    {
-     "data": {
-      "text/plain": [
-       "HBox(children=(IntProgress(value=0, layout=Layout(flex='2'), max=19), HTML(value='')), layout=Layout(display='…"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Evaluate data in 0.34 seconds!\n",
-      "[tester] \n",
-      "AccuracyMetric: acc=0.8975\n"
-     ]
-    },
-    {
-     "data": {
-      "text/plain": [
-       "{'AccuracyMetric': {'acc': 0.8975}}"
-      ]
-     },
-     "execution_count": 10,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
+   "outputs": [],
   "source": [
    "from fastNLP import Trainer\n",
    "from fastNLP import CrossEntropyLoss\n",
@ -643,139 +279,9 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 12,
+   "execution_count": null,
   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "loading vocabulary file /home/yh/.fastNLP/embedding/bert-chinese-wwm/vocab.txt\n",
-      "Load pre-trained BERT parameters from file /home/yh/.fastNLP/embedding/bert-chinese-wwm/chinese_wwm_pytorch.bin.\n",
-      "Start to generating word pieces for word.\n",
-      "Found(Or segment into word pieces) 4286 words out of 4409.\n",
-      "input fields after batch(if batch size is 2):\n",
-      "\ttarget: (1)type:torch.Tensor (2)dtype:torch.int64, (3)shape:torch.Size([2]) \n",
-      "\tchars: (1)type:torch.Tensor (2)dtype:torch.int64, (3)shape:torch.Size([2, 106]) \n",
-      "\tseq_len: (1)type:torch.Tensor (2)dtype:torch.int64, (3)shape:torch.Size([2]) \n",
-      "target fields after batch(if batch size is 2):\n",
-      "\ttarget: (1)type:torch.Tensor (2)dtype:torch.int64, (3)shape:torch.Size([2]) \n",
-      "\tseq_len: (1)type:torch.Tensor (2)dtype:torch.int64, (3)shape:torch.Size([2]) \n",
-      "\n",
-      "Evaluate data in 0.05 seconds!\n",
-      "training epochs started 2019-09-04-00-02-37\n"
-     ]
-    },
-    {
-     "data": {
-      "text/plain": [
-       "HBox(children=(IntProgress(value=0, layout=Layout(flex='2'), max=3600), HTML(value='')), layout=Layout(display…"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "text/plain": [
-       "HBox(children=(IntProgress(value=0, layout=Layout(flex='2'), max=150), HTML(value='')), layout=Layout(display=…"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Evaluate data in 15.89 seconds!\n",
-      "\r",
-      "Evaluation on dev at Epoch 1/3. Step:1200/3600: \n",
-      "\r",
-      "AccuracyMetric: acc=0.9\n",
-      "\n"
-     ]
-    },
-    {
-     "data": {
-      "text/plain": [
-       "HBox(children=(IntProgress(value=0, layout=Layout(flex='2'), max=150), HTML(value='')), layout=Layout(display=…"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Evaluate data in 15.92 seconds!\n",
-      "\r",
-      "Evaluation on dev at Epoch 2/3. Step:2400/3600: \n",
-      "\r",
-      "AccuracyMetric: acc=0.904167\n",
-      "\n"
-     ]
-    },
-    {
-     "data": {
-      "text/plain": [
-       "HBox(children=(IntProgress(value=0, layout=Layout(flex='2'), max=150), HTML(value='')), layout=Layout(display=…"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Evaluate data in 15.91 seconds!\n",
-      "\r",
-      "Evaluation on dev at Epoch 3/3. Step:3600/3600: \n",
-      "\r",
-      "AccuracyMetric: acc=0.918333\n",
-      "\n",
-      "\r\n",
-      "In Epoch:3/Step:3600, got best dev performance:\n",
-      "AccuracyMetric: acc=0.918333\n",
-      "Reloaded the best model.\n",
-      "Performance on test is:\n"
-     ]
-    },
-    {
-     "data": {
-      "text/plain": [
-       "HBox(children=(IntProgress(value=0, layout=Layout(flex='2'), max=19), HTML(value='')), layout=Layout(display='…"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Evaluate data in 29.24 seconds!\n",
-      "[tester] \n",
-      "AccuracyMetric: acc=0.919167\n"
-     ]
-    },
-    {
-     "data": {
-      "text/plain": [
-       "{'AccuracyMetric': {'acc': 0.919167}}"
-      ]
-     },
-     "execution_count": 12,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
+   "outputs": [],
   "source": [
    "# 只需要切换一下Embedding即可\n",
    "from fastNLP.embeddings import BertEmbedding\n",
@ -840,9 +346,7 @@
  {
   "cell_type": "code",
   "execution_count": null,
-   "metadata": {
-    "collapsed": true
-   },
+   "metadata": {},
   "outputs": [],
   "source": [
    "from fastNLP.io import ChnSentiCorpLoader\n",
@ -861,9 +365,7 @@
  {
   "cell_type": "code",
   "execution_count": null,
-   "metadata": {
-    "collapsed": true
-   },
+   "metadata": {},
   "outputs": [],
   "source": [
    "import os\n",
@ -912,15 +414,14 @@
  {
   "cell_type": "code",
   "execution_count": null,
-   "metadata": {
-    "collapsed": true
-   },
+   "metadata": {},
   "outputs": [],
   "source": [
    "from fastHan import FastHan\n",
    "from fastNLP import Vocabulary\n",
    "\n",
    "model=FastHan()\n",
+    "# model.set_device('cuda')\n",
    "\n",
    "# 定义分词处理操作\n",
    "def word_seg(ins):\n",
@ -933,6 +434,8 @@
    "    # apply函数将对内部的instance依次执行word_seg操作，并把其返回值放入到raw_words这个field\n",
    "    ds.apply(word_seg, new_field_name='raw_words')\n",
    "    # 除了apply函数，fastNLP还支持apply_field, apply_more(可同时创建多个field)等操作\n",
+    "    # 同时我们增加一个seq_len的field\n",
+    "    ds.add_seq_len('raw_words')\n",
    "\n",
    "vocab = Vocabulary()\n",
    "\n",
@ -961,11 +464,14 @@
    "# 我们把words和target分别设置为input和target，这样它们才会在训练循环中被取出并自动padding, 有关这部分更多的内容参考\n",
    "#  http://www.fastnlp.top/docs/fastNLP/tutorials/tutorial_6_datasetiter.html\n",
    "data_bundle.set_target('target')\n",
-    "data_bundle.set_input('words')  # DataSet也有这两个接口\n",
+    "data_bundle.set_input('words', 'seq_len')  # DataSet也有这两个接口\n",
    "# 如果某些field，您希望它被设置为target或者input，但是不希望fastNLP自动padding或需要使用特定的padding方式，请参考\n",
    "#  http://www.fastnlp.top/docs/fastNLP/fastNLP.core.dataset.html\n",
    "\n",
-    "print(data_bundle.get_dataset('train')[:2])  # 我们可以看一下当前dataset的内容"
+    "print(data_bundle.get_dataset('train')[:2])  # 我们可以看一下当前dataset的内容\n",
+    "\n",
+    "# 由于之后需要使用之前定义的BiLSTMMaxPoolCls模型，所以需要将words这个field修改为chars(因为该模型的forward接受chars参数)\n",
+    "data_bundle.rename_field('words', 'chars')"
   ]
  },
  {
@ -985,9 +491,7 @@
  {
   "cell_type": "code",
   "execution_count": null,
-   "metadata": {
-    "collapsed": true
-   },
+   "metadata": {},
   "outputs": [],
   "source": [
    "from fastNLP.embeddings import StaticEmbedding\n",
@ -999,11 +503,14 @@
  {
   "cell_type": "code",
   "execution_count": null,
-   "metadata": {
-    "collapsed": true
-   },
+   "metadata": {},
   "outputs": [],
   "source": [
+    "from fastNLP import Trainer\n",
+    "from fastNLP import CrossEntropyLoss\n",
+    "from torch.optim import Adam\n",
+    "from fastNLP import AccuracyMetric\n",
+    "\n",
    "# 初始化模型\n",
    "model = BiLSTMMaxPoolCls(word2vec_embed, len(data_bundle.get_vocab('target')))\n",
    "\n",
@ -1024,6 +531,13 @@
    "tester = Tester(data=data_bundle.get_dataset('test'), model=model, metrics=metric, batch_size=64, device=device)\n",
    "tester.test()"
   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
  }
 ],
 "metadata": {
@ -1042,7 +556,7 @@
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
-   "version": "3.6.10"
+   "version": "3.6.8"
  }
 },
 "nbformat": 4,
--- a/docs/source/tutorials/文本分类.rst
+++ b/docs/source/tutorials/文本分类.rst
@ -447,6 +447,7 @@ PS: 基于词进行文本分类
    from fastNLP import Vocabulary

    model=FastHan()
+    # model.set_device('cuda')  # 可以注视掉这一行增加速度

    # 定义分词处理操作
    def word_seg(ins):
@ -459,6 +460,8 @@ PS: 基于词进行文本分类
        # apply函数将对内部的instance依次执行word_seg操作，并把其返回值放入到raw_words这个field
        ds.apply(word_seg, new_field_name='raw_words')
        # 除了apply函数，fastNLP还支持apply_field, apply_more(可同时创建多个field)等操作
+        # 同时我们增加一个seq_len的field
+        ds.add_seq_len('raw_words')

    vocab = Vocabulary()

@ -500,11 +503,14 @@ PS: 基于词进行文本分类
    # | 0      | 15.4寸笔记本的键盘... | ['15.4', '寸', '笔... | [71, 72, 73, 74, ... |
    # +--------+-----------------------+-----------------------+----------------------+

+    # 由于之后需要使用之前定义的BiLSTMMaxPoolCls模型，所以需要将words这个field修改为chars
+    data_bundle.rename_field('words', 'chars')
+
 我们可以打印一下vocab看一下当前的词表内容

 .. code-block:: python

-    print(data_bundle.get_vocab('words'))
+    print(data_bundle.get_vocab('chars'))
    # Vocabulary([选择, 珠江, 花园, 的, 原因]...)

 (3) 选择预训练词向量
@ -520,7 +526,7 @@ PS: 基于词进行文本分类

        from fastNLP.embeddings import StaticEmbedding

-        word2vec_embed = StaticEmbedding(data_bundle.get_vocab('words'), model_dir_or_name='/path/to/Tencent_AILab_ChineseEmbedding.txt')
+        word2vec_embed = StaticEmbedding(data_bundle.get_vocab('chars'), model_dir_or_name='/path/to/Tencent_AILab_ChineseEmbedding.txt')

 再之后的模型定义与训练过程与上面是一致的，这里就不再赘述了。

--- a/fastNLP/core/dataset.py
+++ b/fastNLP/core/dataset.py
@ -531,11 +531,11 @@ class DataSet(object):
            |  pad_value  |   0   |       |
            +-------------+-------+-------+

-        :param field_names: DataSet中field的名称
-        :param is_input: field是否为input
-        :param is_target: field是否为target
-        :param ignore_type: 是否忽略该field的type, 一般仅在该field至少为input或target时才有意义
-        :param pad_value: 该field的pad的值，仅在该field为input或target时有意义
+        str field_names: DataSet中field的名称
+        bool is_input: field是否为input
+        bool is_target: field是否为target
+        bool ignore_type: 是否忽略该field的type, 一般仅在该field至少为input或target时才有意义
+        int pad_value: 该field的pad的值，仅在该field为input或target时有意义
        :return:
        """
        if len(self.field_arrays)>0:
@ -1146,3 +1146,40 @@ class DataSet(object):

    def _collate_batch(self, ins_list):
        return self.collater.collate_batch(ins_list)
+
+    def concat(self, dataset, inplace=True, field_mapping=None):
+        """
+        将当前dataset与输入的dataset结合成一个更大的dataset，需要保证两个dataset都包含了相同的field。结合后的dataset的input,target
+            以及collate_fn以当前dataset为准。当dataset中包含的field多于当前的dataset，则多余的field会被忽略；若dataset中未包含所有
+            当前dataset含有field，则会报错。
+
+        :param DataSet, dataset: 需要和当前dataset concat的dataset
+        :param bool, inplace: 是否直接将dataset组合到当前dataset中
+        :param dict, field_mapping: 当dataset中的field名称和当前dataset不一致时，需要通过field_mapping把输入的dataset中的field
+            名称映射到当前field. field_mapping为dict类型，key为dataset中的field名称，value是需要映射成的名称
+
+        :return: DataSet
+        """
+        assert isinstance(dataset, DataSet), "Can only concat two datasets."
+
+        fns_in_this_dataset = set(self.get_field_names())
+        fns_in_other_dataset = dataset.get_field_names()
+        reverse_field_mapping = {}
+        if field_mapping is not None:
+            fns_in_other_dataset = [field_mapping.get(fn, fn) for fn in fns_in_other_dataset]
+            reverse_field_mapping = {v:k for k, v in field_mapping.items()}
+        fns_in_other_dataset = set(fns_in_other_dataset)
+        fn_not_seen = list(fns_in_this_dataset - fns_in_other_dataset)
+
+        if fn_not_seen:
+            raise RuntimeError(f"The following fields are not provided in the dataset:{fn_not_seen}")
+
+        if inplace:
+            ds = self
+        else:
+            ds = deepcopy(self)
+
+        for fn in fns_in_this_dataset:
+            ds.get_field(fn).content.extend(deepcopy(dataset.get_field(reverse_field_mapping.get(fn, fn)).content))
+
+        return ds
--- a/fastNLP/embeddings/stack_embedding.py
+++ b/fastNLP/embeddings/stack_embedding.py
@ -13,6 +13,7 @@ import torch
 from torch import nn as nn

 from .embedding import TokenEmbedding
+from .utils import _check_vocab_has_same_index


 class StackEmbedding(TokenEmbedding):
@ -44,8 +45,9 @@ class StackEmbedding(TokenEmbedding):
                vocabs.append(embed.get_word_vocab())
        _vocab = vocabs[0]
        for vocab in vocabs[1:]:
-            assert vocab == _vocab, "All embeddings in StackEmbedding should use the same word vocabulary."
-        
+            if _vocab!=vocab:
+                _check_vocab_has_same_index(_vocab, vocab)
+
        super(StackEmbedding, self).__init__(_vocab, word_dropout=word_dropout, dropout=dropout)
        assert isinstance(embeds, list)
        for embed in embeds:
@ -60,6 +62,7 @@ class StackEmbedding(TokenEmbedding):
        :return:
        """
        assert isinstance(embed, TokenEmbedding)
+        _check_vocab_has_same_index(self.get_word_vocab(), embed.get_word_vocab())
        self._embed_size += embed.embed_size
        self.embeds.append(embed)
        return self
--- a/fastNLP/embeddings/static_embedding.py
+++ b/fastNLP/embeddings/static_embedding.py
@ -81,7 +81,7 @@ class StaticEmbedding(TokenEmbedding):
                 init_method=None, lower=False, dropout=0, word_dropout=0, normalize=False, min_freq=1, **kwargs):
        r"""
        
-        :param vocab: Vocabulary. 若该项为None则会读取所有的embedding。
+        :param Vocabulary vocab: 词表. StaticEmbedding只会加载包含在词表中的词的词向量，在预训练向量中没找到的使用随机初始化
        :param model_dir_or_name: 可以有两种方式调用预训练好的static embedding：第一种是传入embedding文件夹(文件夹下应该只有一个
            以.txt作为后缀的文件)或文件路径；第二种是传入embedding的名称，第二种情况将自动查看缓存中是否存在该模型，没有的话将自动下载。
            如果输入为None则使用embedding_dim的维度随机初始化一个embedding。
--- a/fastNLP/embeddings/utils.py
+++ b/fastNLP/embeddings/utils.py
@ -89,3 +89,16 @@ def get_sinusoid_encoding_table(n_position, d_hid, padding_idx=None):

    return torch.FloatTensor(sinusoid_table)

+
+def _check_vocab_has_same_index(vocab, other_vocab):
+    """
+    检查两个vocabulary是否含有相同的word idx
+
+    :param Vocabulary vocab:
+    :param Vocabulary other_vocab:
+    :return:
+    """
+    if other_vocab != vocab:
+        for word, word_ix in vocab:
+            other_word_idx = other_vocab.to_index(word)
+            assert other_word_idx == word_ix, f"Word {word} has different index in vocabs, {word_ix} Vs. {other_word_idx}."
--- a/fastNLP/models/base_model.py
+++ b/fastNLP/models/base_model.py
@ -34,56 +34,3 @@ class NaiveClassifier(BaseModel):
    
    def predict(self, x):
        return {"predict": torch.sigmoid(self.mlp(x)) > 0.5}
-
-
-class NaiveClassifier2(BaseModel):
-    r"""
-    一个简单的分类器例子，可用于各种测试
-    """
-
-    def __init__(self, in_feature_dim, out_feature_dim):
-        super(NaiveClassifier2, self).__init__()
-        self.mlp = MLP([in_feature_dim, in_feature_dim, out_feature_dim])
-
-    def forward(self, x):
-        return {"predict": self.mlp(x)}
-
-    def predict(self, x):
-        return {"predict": torch.sigmoid(self.mlp(x)) > 0.5}
-
-
-class NaiveClassifier3(BaseModel):
-    r"""
-    一个简单的分类器例子，可用于各种测试
-    """
-
-    def __init__(self, in_feature_dim, out_feature_dim):
-        super(NaiveClassifier3, self).__init__()
-        self.mlp = MLP([in_feature_dim, in_feature_dim, out_feature_dim])
-
-    @torch.cuda.amp.autocast()
-    def forward(self, x):
-        return {"predict": self.mlp(x)}
-
-    @torch.cuda.amp.autocast()
-    def predict(self, x):
-        return {"predict": torch.sigmoid(self.mlp(x)) > 0.5}
-
-
-class NaiveClassifier4(BaseModel):
-    r"""
-    一个简单的分类器例子，可用于各种测试
-    """
-
-    def __init__(self, in_feature_dim, out_feature_dim):
-        super(NaiveClassifier4, self).__init__()
-        self.mlp = MLP([in_feature_dim, in_feature_dim, out_feature_dim])
-
-    def forward(self, x):
-        with torch.cuda.amp.autocast():
-            return {"predict": self.mlp(x)}
-
-
-    def predict(self, x):
-        with torch.cuda.amp.autocast():
-            return {"predict": torch.sigmoid(self.mlp(x)) > 0.5}
--- a/fastNLP/modules/encoder/bert.py
+++ b/fastNLP/modules/encoder/bert.py
@ -464,6 +464,24 @@ class BertModel(nn.Module):
            logger.info('DistilBert has NOT pooler, will use hidden states of [CLS] token as pooled output.')
        self.apply(self.init_bert_weights)

+    @property
+    def dtype(self):
+        """
+        :obj:`torch.dtype`: The dtype of the module (assuming that all the module parameters have the same dtype).
+        """
+        try:
+            return next(self.parameters()).dtype
+        except StopIteration:
+            # For nn.DataParallel compatibility in PyTorch 1.5
+
+            def find_tensor_attributes(module: nn.Module):
+                tuples = [(k, v) for k, v in module.__dict__.items() if torch.is_tensor(v)]
+                return tuples
+
+            gen = self._named_members(get_members_fn=find_tensor_attributes)
+            first_tuple = next(gen)
+            return first_tuple[1].dtype
+
    def init_bert_weights(self, module):
        r""" Initialize the weights.
        """
@ -477,7 +495,8 @@ class BertModel(nn.Module):
        if isinstance(module, nn.Linear) and module.bias is not None:
            module.bias.data.zero_()

-    def forward(self, input_ids, token_type_ids=None, attention_mask=None, output_all_encoded_layers=True):
+    def forward(self, input_ids, token_type_ids=None, attention_mask=None, output_all_encoded_layers=True,
+                position_ids=None):
        """

        :param torch.LongTensor input_ids: bsz x max_len的输入id
@ -485,6 +504,7 @@ class BertModel(nn.Module):
        :param attention_mask: 需要attend的为1，不需要为0
        :param bool output_all_encoded_layers: 是否输出所有层，默认输出token embedding(包含bpe, position以及type embedding)
            及每一层的hidden states。如果为False，只输出最后一层的结果
+        :param torch.LongTensor position_ids: bsz x max_len, position的id
        :return: encode_layers: 如果output_all_encoded_layers为True，返回list(共num_layers+1个元素)，每个元素为
            bsz x max_len x hidden_size否则返回bsz x max_len x hidden_size的tensor;
            pooled_output: bsz x hidden_size为cls的表示，可以用于句子的分类
@ -506,10 +526,12 @@ class BertModel(nn.Module):
        # positions we want to attend and -10000.0 for masked positions.
        # Since we are adding it to the raw scores before the softmax, this is
        # effectively the same as removing these entirely.
-        extended_attention_mask = extended_attention_mask.to(dtype=next(self.parameters()).dtype)  # fp16 compatibility
+        # this will case an issue when DataParallel: https://github.com/pytorch/pytorch/issues/40457#issuecomment-648396469
+        # extended_attention_mask = extended_attention_mask.to(dtype=next(self.parameters()).dtype)  # fp16 compatibility
+        extended_attention_mask = extended_attention_mask.to(self.dtype)
        extended_attention_mask = (1.0 - extended_attention_mask) * -10000.0

-        embedding_output = self.embeddings(input_ids, token_type_ids)
+        embedding_output = self.embeddings(input_ids, token_type_ids=token_type_ids, position_ids=position_ids)
        encoded_layers = self.encoder(embedding_output,
                                      extended_attention_mask,
                                      output_all_encoded_layers=output_all_encoded_layers)
--- a/fastNLP/modules/encoder/gpt2.py
+++ b/fastNLP/modules/encoder/gpt2.py
@ -787,6 +787,24 @@ class GPT2Model(GPT2PreTrainedModel):
        for layer, heads in heads_to_prune.items():
            self.h[layer].attn.prune_heads(heads)

+    @property
+    def dtype(self):
+        """
+        :obj:`torch.dtype`: The dtype of the module (assuming that all the module parameters have the same dtype).
+        """
+        try:
+            return next(self.parameters()).dtype
+        except StopIteration:
+            # For nn.DataParallel compatibility in PyTorch 1.5
+
+            def find_tensor_attributes(module: nn.Module):
+                tuples = [(k, v) for k, v in module.__dict__.items() if torch.is_tensor(v)]
+                return tuples
+
+            gen = self._named_members(get_members_fn=find_tensor_attributes)
+            first_tuple = next(gen)
+            return first_tuple[1].dtype
+
    def forward(self, input_ids, state=None, attention_mask=None, token_type_ids=None, position_ids=None,
                head_mask=None, output_attentions=True):
        """
@ -834,7 +852,9 @@ class GPT2Model(GPT2PreTrainedModel):
            # positions we want to attend and -10000.0 for masked positions.
            # Since we are adding it to the raw scores before the softmax, this is
            # effectively the same as removing these entirely.
-            attention_mask = attention_mask.to(dtype=next(self.parameters()).dtype)  # fp16 compatibility
+            # this will case an issue when DataParallel: https://github.com/pytorch/pytorch/issues/40457#issuecomment-648396469
+            # attention_mask = attention_mask.to(dtype=next(self.parameters()).dtype)  # fp16 compatibility
+            attention_mask = attention_mask.to(self.dtype)
            attention_mask = (1.0 - attention_mask) * -10000.0
            # attention_mask = attention_mask.masked_fill(attention_mask.eq(0), -10000.0)

--- a/fastNLP/modules/encoder/roberta.py
+++ b/fastNLP/modules/encoder/roberta.py
@ -39,7 +39,7 @@ class RobertaEmbeddings(BertEmbeddings):
            config.max_position_embeddings, config.hidden_size, padding_idx=self.padding_idx
        )

-    def forward(self, input_ids, token_type_ids, words_embeddings=None):
+    def forward(self, input_ids, token_type_ids, words_embeddings=None, **kwargs):
        position_ids = self.create_position_ids_from_input_ids(input_ids)

        return super().forward(
--- a/requirements.txt
+++ b/requirements.txt
@ -3,6 +3,5 @@ torch>=1.0.0
 tqdm>=4.28.1
 prettytable>=0.7.2
 requests
-spacy
 prettytable>=0.7.2
 regex!=2019.12.17
--- a/tests/core/test_dataset.py
+++ b/tests/core/test_dataset.py
@ -268,6 +268,57 @@ class TestDataSetMethods(unittest.TestCase):
        with self.assertRaises(RuntimeError) as RE:
            ds.add_field('test', [])

+    def test_concat(self):
+        """
+        测试两个dataset能否正确concat
+
+        """
+        ds1 = DataSet({"x": [[1, 2, 3, 4] for i in range(10)], "y": [[5, 6] for i in range(10)]})
+        ds2 = DataSet({"x": [[4,3,2,1] for i in range(10)], "y": [[6,5] for i in range(10)]})
+        ds3 = ds1.concat(ds2)
+
+        self.assertEqual(len(ds3), 20)
+
+        self.assertListEqual(ds1[9]['x'], [1, 2, 3, 4])
+        self.assertListEqual(ds1[10]['x'], [4,3,2,1])
+
+        ds2[0]['x'][0] = 100
+        self.assertEqual(ds3[10]['x'][0], 4)  # 不改变copy后的field了
+
+        ds3[10]['x'][0] = -100
+        self.assertEqual(ds2[0]['x'][0], 100)  # 不改变copy前的field了
+
+        # 测试inplace
+        ds1 = DataSet({"x": [[1, 2, 3, 4] for i in range(10)], "y": [[5, 6] for i in range(10)]})
+        ds2 = DataSet({"x": [[4, 3, 2, 1] for i in range(10)], "y": [[6, 5] for i in range(10)]})
+        ds3 = ds1.concat(ds2, inplace=True)
+
+        ds2[0]['x'][0] = 100
+        self.assertEqual(ds3[10]['x'][0], 4)  # 不改变copy后的field了
+
+        ds3[10]['x'][0] = -100
+        self.assertEqual(ds2[0]['x'][0], 100)  # 不改变copy前的field了
+
+        ds3[0]['x'][0] = 100
+        self.assertEqual(ds1[0]['x'][0], 100)  # 改变copy前的field了
+
+        # 测试mapping
+        ds1 = DataSet({"x": [[1, 2, 3, 4] for i in range(10)], "y": [[5, 6] for i in range(10)]})
+        ds2 = DataSet({"X": [[4, 3, 2, 1] for i in range(10)], "Y": [[6, 5] for i in range(10)]})
+        ds3 = ds1.concat(ds2, field_mapping={'X':'x', 'Y':'y'})
+        self.assertEqual(len(ds3), 20)
+
+        # 测试忽略掉多余的
+        ds1 = DataSet({"x": [[1, 2, 3, 4] for i in range(10)], "y": [[5, 6] for i in range(10)]})
+        ds2 = DataSet({"X": [[4, 3, 2, 1] for i in range(10)], "Y": [[6, 5] for i in range(10)], 'Z':[0]*10})
+        ds3 = ds1.concat(ds2, field_mapping={'X':'x', 'Y':'y'})
+
+        # 测试报错
+        ds1 = DataSet({"x": [[1, 2, 3, 4] for i in range(10)], "y": [[5, 6] for i in range(10)]})
+        ds2 = DataSet({"X": [[4, 3, 2, 1] for i in range(10)]})
+        with self.assertRaises(RuntimeError):
+            ds3 = ds1.concat(ds2, field_mapping={'X':'x'})
+

 class TestDataSetIter(unittest.TestCase):
    def test__repr__(self):
--- a/tests/core/test_trainer.py
+++ b/tests/core/test_trainer.py
@ -14,8 +14,12 @@ from fastNLP import CrossEntropyLoss
 from fastNLP import AccuracyMetric
 from fastNLP import SGD
 from fastNLP import Trainer
-from fastNLP.models.base_model import NaiveClassifier, NaiveClassifier2, NaiveClassifier3, NaiveClassifier4
+from fastNLP.models.base_model import NaiveClassifier
 from fastNLP import TorchLoaderIter
+from fastNLP.models import BaseModel
+from fastNLP.modules import MLP
+from pkg_resources import parse_version
+


 def prepare_fake_dataset():
@ -577,6 +581,22 @@ class TrainerTestGround(unittest.TestCase):
    """


+class NaiveClassifier2(BaseModel):
+    r"""
+    一个简单的分类器例子，可用于各种测试
+    """
+
+    def __init__(self, in_feature_dim, out_feature_dim):
+        super(NaiveClassifier2, self).__init__()
+        self.mlp = MLP([in_feature_dim, in_feature_dim, out_feature_dim])
+
+    def forward(self, x):
+        return {"predict": self.mlp(x)}
+
+    def predict(self, x):
+        return {"predict": torch.sigmoid(self.mlp(x)) > 0.5}
+
+
 class Fp16TrainerTest(unittest.TestCase):
    def test_raise_error(self):
        data_set = prepare_fake_dataset()
@ -605,7 +625,7 @@ class Fp16TrainerTest(unittest.TestCase):
                              metrics=AccuracyMetric(pred="predict", target="y"), validate_every=-1, save_path=None,
                              use_tqdm=True, check_code_level=2, fp16=True, device=torch.device('cpu'))

-    @unittest.skipIf(torch.cuda.is_available()==False, "Skip when no cuda device detch")
+    @unittest.skipIf(torch.cuda.is_available()==False or parse_version(torch.__version__) < parse_version('1.6'), "Skip when no cuda device detch")
    def test_run_fp16(self):
        data_set = prepare_fake_dataset()
        data_set.set_input("x", flag=True)
@ -627,7 +647,7 @@ class Fp16TrainerTest(unittest.TestCase):
                          use_tqdm=True, check_code_level=2, fp16=True, device=0, test_use_fp16=False)
        trainer.train(load_best_model=False)

-    @unittest.skipIf(torch.cuda.device_count()<2, "Skip when lower than 1 gpus.")
+    @unittest.skipIf(torch.cuda.device_count()<2 or parse_version(torch.__version__) < parse_version('1.6'), "Skip when lower than 1 gpus.")
    def test_run_data_parallel(self):
        data_set = prepare_fake_dataset()
        data_set.set_input("x", flag=True)
@ -635,6 +655,21 @@ class Fp16TrainerTest(unittest.TestCase):

        train_set, dev_set = data_set.split(0.3)

+        class NaiveClassifier2(BaseModel):
+            r"""
+            一个简单的分类器例子，可用于各种测试
+            """
+
+            def __init__(self, in_feature_dim, out_feature_dim):
+                super(NaiveClassifier2, self).__init__()
+                self.mlp = MLP([in_feature_dim, in_feature_dim, out_feature_dim])
+
+            def forward(self, x):
+                return {"predict": self.mlp(x)}
+
+            def predict(self, x):
+                return {"predict": torch.sigmoid(self.mlp(x)) > 0.5}
+
        model = NaiveClassifier2(2, 1)
        with self.assertRaises(RuntimeError):
            trainer = Trainer(train_set, model, optimizer=SGD(lr=0.1), loss=BCEWithLogits(pred="predict", target="y"),
@ -643,12 +678,46 @@ class Fp16TrainerTest(unittest.TestCase):
                              use_tqdm=True, check_code_level=2, fp16=True, device=[0, 1])

        with self.assertRaises(RuntimeError):
+            class NaiveClassifier3(BaseModel):
+                r"""
+                一个简单的分类器例子，可用于各种测试
+                """
+
+                def __init__(self, in_feature_dim, out_feature_dim):
+                    super(NaiveClassifier3, self).__init__()
+                    self.mlp = MLP([in_feature_dim, in_feature_dim, out_feature_dim])
+
+                @torch.cuda.amp.autocast()
+                def forward(self, x):
+                    return {"predict": self.mlp(x)}
+
+                @torch.cuda.amp.autocast()
+                def predict(self, x):
+                    return {"predict": torch.sigmoid(self.mlp(x)) > 0.5}
+
            model = NaiveClassifier3(2, 1)
            trainer = Trainer(train_set, model, optimizer=SGD(lr=0.1), loss=BCEWithLogits(pred="predict", target="y"),
                              batch_size=32, n_epochs=10, print_every=50, dev_data=dev_set,
                              metrics=AccuracyMetric(pred="predict", target="y"), validate_every=-1, save_path=None,
                              use_tqdm=True, check_code_level=2, fp16=True, device=[0, 1], test_use_fp16=True)

+        class NaiveClassifier4(BaseModel):
+            r"""
+            一个简单的分类器例子，可用于各种测试
+            """
+
+            def __init__(self, in_feature_dim, out_feature_dim):
+                super(NaiveClassifier4, self).__init__()
+                self.mlp = MLP([in_feature_dim, in_feature_dim, out_feature_dim])
+
+            def forward(self, x):
+                with torch.cuda.amp.autocast():
+                    return {"predict": self.mlp(x)}
+
+            def predict(self, x):
+                with torch.cuda.amp.autocast():
+                    return {"predict": torch.sigmoid(self.mlp(x)) > 0.5}
+
        model = NaiveClassifier4(2, 1)
        trainer = Trainer(train_set, model, optimizer=SGD(lr=0.1), loss=BCEWithLogits(pred="predict", target="y"),
                          batch_size=32, n_epochs=10, print_every=50, dev_data=dev_set,
--- a/tests/embeddings/test_bert_embedding.py
+++ b/tests/embeddings/test_bert_embedding.py
@ -31,29 +31,33 @@ class TestDownload(unittest.TestCase):

 class TestBertEmbedding(unittest.TestCase):
    def test_bert_embedding_1(self):
-        vocab = Vocabulary().add_word_lst("this is a test . [SEP] NotInBERT".split())
-        embed = BertEmbedding(vocab, model_dir_or_name='tests/data_for_tests/embedding/small_bert', word_dropout=0.1)
-        requires_grad = embed.requires_grad
-        embed.requires_grad = not requires_grad
-        embed.train()
-        words = torch.LongTensor([[2, 3, 4, 0]])
-        result = embed(words)
-        self.assertEqual(result.size(), (1, 4, 16))
+        for pool_method in ['first', 'last', 'max', 'avg']:
+            with self.subTest(pool_method=pool_method):
+                vocab = Vocabulary().add_word_lst("this is a test . [SEP] NotInBERT".split())
+                embed = BertEmbedding(vocab, model_dir_or_name='tests/data_for_tests/embedding/small_bert', word_dropout=0.1,
+                                      pool_method=pool_method)
+                requires_grad = embed.requires_grad
+                embed.requires_grad = not requires_grad
+                embed.train()
+                words = torch.LongTensor([[2, 3, 4, 0]])
+                result = embed(words)
+                self.assertEqual(result.size(), (1, 4, 16))

-        embed = BertEmbedding(vocab, model_dir_or_name='tests/data_for_tests/embedding/small_bert', word_dropout=0.1)
-        embed.eval()
-        words = torch.LongTensor([[2, 3, 4, 0]])
-        result = embed(words)
-        self.assertEqual(result.size(), (1, 4, 16))
+                embed = BertEmbedding(vocab, model_dir_or_name='tests/data_for_tests/embedding/small_bert', word_dropout=0.1,
+                                      pool_method=pool_method)
+                embed.eval()
+                words = torch.LongTensor([[2, 3, 4, 0]])
+                result = embed(words)
+                self.assertEqual(result.size(), (1, 4, 16))

-        # 自动截断而不报错
-        embed = BertEmbedding(vocab, model_dir_or_name='tests/data_for_tests/embedding/small_bert', word_dropout=0.1,
-                              auto_truncate=True)
+                # 自动截断而不报错
+                embed = BertEmbedding(vocab, model_dir_or_name='tests/data_for_tests/embedding/small_bert', word_dropout=0.1,
+                                      auto_truncate=True, pool_method=pool_method)

-        words = torch.LongTensor([[2, 3, 4, 1]*10,
-                                  [2, 3]+[0]*38])
-        result = embed(words)
-        self.assertEqual(result.size(), (2, 40, 16))
+                words = torch.LongTensor([[2, 3, 4, 1]*10,
+                                          [2, 3]+[0]*38])
+                result = embed(words)
+                self.assertEqual(result.size(), (2, 40, 16))

    def test_save_load(self):
        bert_save_test = 'bert_save_test'
--- a/tests/embeddings/test_stack_embeddings.py
+++ b/tests/embeddings/test_stack_embeddings.py
@ -18,3 +18,16 @@ class TestCharEmbed(unittest.TestCase):
        y = embed(x)
        self.assertEqual(tuple(y.size()), (2, 3, 130))

+    def test_case_2(self):
+        # 测试只需要拥有一样的index就可以concat
+        ds = DataSet([Instance(words=['hello', 'world']), Instance(words=['hello', 'Jack'])])
+        vocab1 = Vocabulary().from_dataset(ds, field_name='words')
+        vocab2 = Vocabulary().from_dataset(ds, field_name='words')
+        self.assertEqual(len(vocab1), 5)
+        cnn_embed = CNNCharEmbedding(vocab1, embed_size=60)
+        lstm_embed = LSTMCharEmbedding(vocab2, embed_size=70)
+        embed = StackEmbedding([cnn_embed, lstm_embed])
+        x = torch.LongTensor([[2, 1, 0], [4, 3, 4]])
+        y = embed(x)
+        self.assertEqual(tuple(y.size()), (2, 3, 130))
+
--- a/tests/io/pipe/test_matching.py
+++ b/tests/io/pipe/test_matching.py
@ -74,6 +74,7 @@ class TestRunMatchingPipe(unittest.TestCase):
                name, vocabs = y
                self.assertEqual(x + 1 if name == 'words' else x, len(vocabs))

+    @unittest.skipIf('TRAVIS' in os.environ, "Skip in travis")
    def test_spacy(self):
        data_set_dict = {
            'Quora': ('tests/data_for_tests/io/Quora', QuoraPipe, QuoraBertPipe, (2, 2, 2), (93, 2)),