fastNLP/tutorials/tutorial_5_loss_optimizer.ipynb

604 lines
16 KiB
Plaintext
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# 使用Trainer和Tester快速训练和测试"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## 数据读入和处理"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/remote-home/ynzheng/anaconda3/envs/now/lib/python3.8/site-packages/FastNLP-0.5.0-py3.8.egg/fastNLP/io/loader/classification.py:340: UserWarning: SST2's test file has no target.\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"In total 3 datasets:\n",
"\ttest has 1821 instances.\n",
"\ttrain has 67349 instances.\n",
"\tdev has 872 instances.\n",
"In total 2 vocabs:\n",
"\twords has 16292 entries.\n",
"\ttarget has 2 entries.\n",
"\n",
"+-----------------------------------+--------+-----------------------------------+---------+\n",
"| raw_words | target | words | seq_len |\n",
"+-----------------------------------+--------+-----------------------------------+---------+\n",
"| hide new secretions from the p... | 1 | [4110, 97, 12009, 39, 2, 6843,... | 7 |\n",
"+-----------------------------------+--------+-----------------------------------+---------+\n",
"Vocabulary(['hide', 'new', 'secretions', 'from', 'the']...)\n"
]
}
],
"source": [
"from fastNLP.io import SST2Pipe\n",
"\n",
"pipe = SST2Pipe()\n",
"databundle = pipe.process_from_file()\n",
"vocab = databundle.get_vocab('words')\n",
"print(databundle)\n",
"print(databundle.get_dataset('train')[0])\n",
"print(databundle.get_vocab('words'))"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"4925 872 75\n"
]
}
],
"source": [
"train_data = databundle.get_dataset('train')[:5000]\n",
"train_data, test_data = train_data.split(0.015)\n",
"dev_data = databundle.get_dataset('dev')\n",
"print(len(train_data),len(dev_data),len(test_data))"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {
"scrolled": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"+-------------+-----------+--------+-------+---------+\n",
"| field_names | raw_words | target | words | seq_len |\n",
"+-------------+-----------+--------+-------+---------+\n",
"| is_input | False | False | True | True |\n",
"| is_target | False | True | False | False |\n",
"| ignore_type | | False | False | False |\n",
"| pad_value | | 0 | 0 | 0 |\n",
"+-------------+-----------+--------+-------+---------+\n"
]
},
{
"data": {
"text/plain": [
"<prettytable.PrettyTable at 0x7f49ec540160>"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"train_data.print_field_meta()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## 使用内置模型训练"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"from fastNLP.models import CNNText\n",
"\n",
"#词嵌入的维度\n",
"EMBED_DIM = 100\n",
"\n",
"#使用CNNText的时候第一个参数输入一个tuple,作为模型定义embedding的参数\n",
"#还可以传入 kernel_nums, kernel_sizes, padding, dropout的自定义值\n",
"model_cnn = CNNText((len(vocab),EMBED_DIM), num_classes=2, dropout=0.1)"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
"from fastNLP import AccuracyMetric\n",
"from fastNLP import Const\n",
"\n",
"# metrics=AccuracyMetric() 在本例中与下面这行代码等价\n",
"metrics=AccuracyMetric(pred=Const.OUTPUT, target=Const.TARGET)"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [],
"source": [
"from fastNLP import CrossEntropyLoss\n",
"\n",
"# loss = CrossEntropyLoss() 在本例中与下面这行代码等价\n",
"loss = CrossEntropyLoss(pred=Const.OUTPUT, target=Const.TARGET)"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [],
"source": [
"# 这表示构建了一个损失函数类由func计算损失函数其中将从模型返回值或者DataSet的target=True的field\n",
"# 当中找到一个参数名为`pred`的参数传入func一个参数名为`input`的参数;找到一个参数名为`label`的参数\n",
"# 传入func作为一个名为`target`的参数\n",
"#下面自己构建了一个交叉熵函数和之后直接使用fastNLP中的交叉熵函数是一个效果\n",
"import torch\n",
"from fastNLP import LossFunc\n",
"func = torch.nn.functional.cross_entropy\n",
"loss_func = LossFunc(func, input=Const.OUTPUT, target=Const.TARGET)"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [],
"source": [
"import torch.optim as optim\n",
"\n",
"#使用 torch.optim 定义优化器\n",
"optimizer=optim.RMSprop(model_cnn.parameters(), lr=0.01, alpha=0.99, eps=1e-08, weight_decay=0, momentum=0, centered=False)"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"input fields after batch(if batch size is 2):\n",
"\twords: (1)type:torch.Tensor (2)dtype:torch.int64, (3)shape:torch.Size([2, 4]) \n",
"\tseq_len: (1)type:torch.Tensor (2)dtype:torch.int64, (3)shape:torch.Size([2]) \n",
"target fields after batch(if batch size is 2):\n",
"\ttarget: (1)type:torch.Tensor (2)dtype:torch.int64, (3)shape:torch.Size([2]) \n",
"\n",
"training epochs started 2020-02-27-11-31-25\n"
]
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"HBox(children=(FloatProgress(value=0.0, layout=Layout(flex='2'), max=3080.0), HTML(value='')), layout=Layout(d…"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"HBox(children=(FloatProgress(value=0.0, layout=Layout(flex='2'), max=55.0), HTML(value='')), layout=Layout(dis…"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\r",
"Evaluate data in 0.75 seconds!\n",
"\r",
"Evaluation on dev at Epoch 1/10. Step:308/3080: \n",
"\r",
"AccuracyMetric: acc=0.751147\n",
"\n"
]
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"HBox(children=(FloatProgress(value=0.0, layout=Layout(flex='2'), max=55.0), HTML(value='')), layout=Layout(dis…"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\r",
"Evaluate data in 0.83 seconds!\n",
"\r",
"Evaluation on dev at Epoch 2/10. Step:616/3080: \n",
"\r",
"AccuracyMetric: acc=0.755734\n",
"\n"
]
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"HBox(children=(FloatProgress(value=0.0, layout=Layout(flex='2'), max=55.0), HTML(value='')), layout=Layout(dis…"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\r",
"Evaluate data in 1.32 seconds!\n",
"\r",
"Evaluation on dev at Epoch 3/10. Step:924/3080: \n",
"\r",
"AccuracyMetric: acc=0.758028\n",
"\n"
]
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"HBox(children=(FloatProgress(value=0.0, layout=Layout(flex='2'), max=55.0), HTML(value='')), layout=Layout(dis…"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\r",
"Evaluate data in 0.88 seconds!\n",
"\r",
"Evaluation on dev at Epoch 4/10. Step:1232/3080: \n",
"\r",
"AccuracyMetric: acc=0.741972\n",
"\n"
]
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"HBox(children=(FloatProgress(value=0.0, layout=Layout(flex='2'), max=55.0), HTML(value='')), layout=Layout(dis…"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\r",
"Evaluate data in 0.96 seconds!\n",
"\r",
"Evaluation on dev at Epoch 5/10. Step:1540/3080: \n",
"\r",
"AccuracyMetric: acc=0.728211\n",
"\n"
]
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"HBox(children=(FloatProgress(value=0.0, layout=Layout(flex='2'), max=55.0), HTML(value='')), layout=Layout(dis…"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\r",
"Evaluate data in 0.87 seconds!\n",
"\r",
"Evaluation on dev at Epoch 6/10. Step:1848/3080: \n",
"\r",
"AccuracyMetric: acc=0.755734\n",
"\n"
]
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"HBox(children=(FloatProgress(value=0.0, layout=Layout(flex='2'), max=55.0), HTML(value='')), layout=Layout(dis…"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\r",
"Evaluate data in 1.04 seconds!\n",
"\r",
"Evaluation on dev at Epoch 7/10. Step:2156/3080: \n",
"\r",
"AccuracyMetric: acc=0.732798\n",
"\n"
]
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"HBox(children=(FloatProgress(value=0.0, layout=Layout(flex='2'), max=55.0), HTML(value='')), layout=Layout(dis…"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\r",
"Evaluate data in 0.57 seconds!\n",
"\r",
"Evaluation on dev at Epoch 8/10. Step:2464/3080: \n",
"\r",
"AccuracyMetric: acc=0.747706\n",
"\n"
]
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"HBox(children=(FloatProgress(value=0.0, layout=Layout(flex='2'), max=55.0), HTML(value='')), layout=Layout(dis…"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\r",
"Evaluate data in 0.48 seconds!\n",
"\r",
"Evaluation on dev at Epoch 9/10. Step:2772/3080: \n",
"\r",
"AccuracyMetric: acc=0.732798\n",
"\n"
]
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"HBox(children=(FloatProgress(value=0.0, layout=Layout(flex='2'), max=55.0), HTML(value='')), layout=Layout(dis…"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\r",
"Evaluate data in 0.48 seconds!\n",
"\r",
"Evaluation on dev at Epoch 10/10. Step:3080/3080: \n",
"\r",
"AccuracyMetric: acc=0.740826\n",
"\n",
"\r\n",
"In Epoch:3/Step:924, got best dev performance:\n",
"AccuracyMetric: acc=0.758028\n",
"Reloaded the best model.\n"
]
},
{
"data": {
"text/plain": [
"{'best_eval': {'AccuracyMetric': {'acc': 0.758028}},\n",
" 'best_epoch': 3,\n",
" 'best_step': 924,\n",
" 'seconds': 160.58}"
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"from fastNLP import Trainer\n",
"\n",
"#训练的轮数和batch size\n",
"N_EPOCHS = 10\n",
"BATCH_SIZE = 16\n",
"\n",
"#如果在定义trainer的时候没有传入optimizer参数模型默认的优化器为torch.optim.Adam且learning rate为lr=4e-3\n",
"#这里只使用了loss作为损失函数输入感兴趣可以尝试其他损失函数如之前自定义的loss_func作为输入\n",
"trainer = Trainer(model=model_cnn, train_data=train_data, dev_data=dev_data, loss=loss, metrics=metrics,\n",
"optimizer=optimizer,n_epochs=N_EPOCHS, batch_size=BATCH_SIZE)\n",
"trainer.train()"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"HBox(children=(FloatProgress(value=0.0, layout=Layout(flex='2'), max=5.0), HTML(value='')), layout=Layout(disp…"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\r",
"Evaluate data in 0.43 seconds!\n",
"[tester] \n",
"AccuracyMetric: acc=0.773333\n"
]
},
{
"data": {
"text/plain": [
"{'AccuracyMetric': {'acc': 0.773333}}"
]
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"from fastNLP import Tester\n",
"\n",
"tester = Tester(test_data, model_cnn, metrics=AccuracyMetric())\n",
"tester.test()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python Now",
"language": "python",
"name": "now"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.0"
}
},
"nbformat": 4,
"nbformat_minor": 2
}