Merge pull request #2 from fastnlp/master

1
2024-12-02 04:07:35 +08:00 · 2018-07-14 14:48:56 +08:00 · 2018-07-14 14:48:56 +08:00 · 7ff29877cd
commit 7ff29877cd
parent b998593bc5 7514be6f30
40 changed files with 527 additions and 147 deletions
--- a/fastNLP/action/tester.py
+++ b/fastNLP/action/tester.py
@ -1,87 +1,154 @@
-from collections import namedtuple
+import _pickle

 import numpy as np
+import torch

 from fastNLP.action.action import Action
+from fastNLP.action.action import RandomSampler, Batchifier
+from fastNLP.modules.utils import seq_mask


-class Tester(Action):
+class BaseTester(Action):
    """docstring for Tester"""

-    TestConfig = namedtuple("config", ["validate_in_training", "save_dev_input", "save_output",
-                                       "save_loss", "batch_size"])
-
    def __init__(self, test_args):
        """
        :param test_args: named tuple
        """
-        super(Tester, self).__init__()
-        self.validate_in_training = test_args.validate_in_training
-        self.save_dev_input = test_args.save_dev_input
-        self.valid_x = None
-        self.valid_y = None
-        self.save_output = test_args.save_output
+        super(BaseTester, self).__init__()
+        self.validate_in_training = test_args["validate_in_training"]
+        self.save_dev_data = None
+        self.save_output = test_args["save_output"]
        self.output = None
-        self.save_loss = test_args.save_loss
+        self.save_loss = test_args["save_loss"]
        self.mean_loss = None
-        self.batch_size = test_args.batch_size
+        self.batch_size = test_args["batch_size"]
+        self.pickle_path = test_args["pickle_path"]
+        self.iterator = None

-    def test(self, network, data):
-        print("testing")
-        network.mode(test=True)  # turn on the testing mode
-        if self.save_dev_input:
-            if self.valid_x is None:
-                valid_x, valid_y = network.prepare_input(data)
-                self.valid_x = valid_x
-                self.valid_y = valid_y
-            else:
-                valid_x = self.valid_x
-                valid_y = self.valid_y
-        else:
-            valid_x, valid_y = network.prepare_input(data)
+        self.model = None
+        self.eval_history = []

-        # split into batches by self.batch_size
-        iterations, test_batch_generator = self.batchify(self.batch_size, valid_x, valid_y)
+    def test(self, network):
+        # print("--------------testing----------------")
+        self.model = network
+
+        # turn on the testing mode; clean up the history
+        self.mode(network, test=True)
+
+        dev_data = self.prepare_input(self.pickle_path)
+
+        self.iterator = iter(Batchifier(RandomSampler(dev_data), self.batch_size, drop_last=True))

        batch_output = list()
-        loss_history = list()
-        # turn on the testing mode of the network
-        network.mode(test=True)
+        num_iter = len(dev_data) // self.batch_size

-        for step in range(iterations):
-            batch_x, batch_y = test_batch_generator.__next__()
+        for step in range(num_iter):
+            batch_x, batch_y = self.batchify(dev_data)

-            # forward pass from test input to predicted output
-            prediction = network.data_forward(batch_x)
-
-            loss = network.get_loss(prediction, batch_y)
+            prediction = self.data_forward(network, batch_x)
+            eval_results = self.evaluate(prediction, batch_y)

            if self.save_output:
-                batch_output.append(prediction.data)
+                batch_output.append(prediction)
            if self.save_loss:
-                loss_history.append(loss)
-                self.log(self.make_log(step, loss))
+                self.eval_history.append(eval_results)

-        if self.save_loss:
-            self.mean_loss = np.mean(np.array(loss_history))
-        if self.save_output:
-            self.output = self.make_output(batch_output)
+    def prepare_input(self, data_path):
+        """
+        Save the dev data once it is loaded. Can return directly next time.
+        :param data_path: str, the path to the pickle data for dev
+        :return save_dev_data: list. Each entry is a sample, which is also a list of features and label(s).
+        """
+        if self.save_dev_data is None:
+            data_dev = _pickle.load(open(data_path + "/data_train.pkl", "rb"))
+            self.save_dev_data = data_dev
+        return self.save_dev_data

-    @property
-    def loss(self):
-        return self.mean_loss
-
-    @property
-    def result(self):
-        return self.output
+    def batchify(self, data):
+        """
+        1. Perform batching from data and produce a batch of training data.
+        2. Add padding.
+        :param data: list. Each entry is a sample, which is also a list of features and label(s).
+            E.g.
+                [
+                    [[word_11, word_12, word_13], [label_11. label_12]],  # sample 1
+                    [[word_21, word_22, word_23], [label_21. label_22]],  # sample 2
+                    ...
+                ]
+        :return batch_x: list. Each entry is a list of features of a sample. [batch_size, max_len]
+                 batch_y: list. Each entry is a list of labels of a sample.  [batch_size, num_labels]
+        """
+        indices = next(self.iterator)
+        batch = [data[idx] for idx in indices]
+        batch_x = [sample[0] for sample in batch]
+        batch_y = [sample[1] for sample in batch]
+        batch_x = self.pad(batch_x)
+        return batch_x, batch_y

    @staticmethod
-    def make_output(batch_outputs):
-        # construct full prediction with batch outputs
-        return np.concatenate(batch_outputs, axis=0)
+    def pad(batch, fill=0):
+        """
+        Pad a batch of samples to maximum length.
+        :param batch: list of list
+        :param fill: word index to pad, default 0.
+        :return: a padded batch
+        """
+        max_length = max([len(x) for x in batch])
+        for idx, sample in enumerate(batch):
+            if len(sample) < max_length:
+                batch[idx] = sample + [fill * (max_length - len(sample))]
+        return batch

-    def load_config(self, args):
+    def data_forward(self, network, data):
        raise NotImplementedError

-    def load_dataset(self, args):
+    def evaluate(self, predict, truth):
        raise NotImplementedError
+
+    @property
+    def matrices(self):
+        raise NotImplementedError
+
+    def mode(self, model, test=True):
+        """To do: combine this function with Trainer ?? """
+        if test:
+            model.eval()
+        else:
+            model.train()
+        self.eval_history.clear()
+
+
+class POSTester(BaseTester):
+    """
+    Tester for sequence labeling.
+    """
+
+    def __init__(self, test_args):
+        super(POSTester, self).__init__(test_args)
+        self.max_len = None
+        self.mask = None
+        self.batch_result = None
+
+    def data_forward(self, network, x):
+        """To Do: combine with Trainer
+
+        :param network: the PyTorch model
+        :param x: list of list, [batch_size, max_len]
+        :return y: [batch_size, num_classes]
+        """
+        seq_len = [len(seq) for seq in x]
+        x = torch.Tensor(x).long()
+        self.batch_size = x.size(0)
+        self.max_len = x.size(1)
+        self.mask = seq_mask(seq_len, self.max_len)
+        y = network(x)
+        return y
+
+    def evaluate(self, predict, truth):
+        truth = torch.Tensor(truth)
+        loss, prediction = self.model.loss(predict, truth, self.mask, self.batch_size, self.max_len)
+        return loss.data
+
+    def matrices(self):
+        return np.mean(self.eval_history)
--- a/fastNLP/action/trainer.py
+++ b/fastNLP/action/trainer.py
@ -1,12 +1,12 @@
 import _pickle
-from collections import namedtuple

 import numpy as np
 import torch

 from fastNLP.action.action import Action
 from fastNLP.action.action import RandomSampler, Batchifier
-from fastNLP.action.tester import Tester
+from fastNLP.action.tester import POSTester
+from fastNLP.modules.utils import seq_mask


 class BaseTrainer(Action):
@ -21,23 +21,29 @@ class BaseTrainer(Action):
        - grad_backward
        - get_loss
    """
-    TrainConfig = namedtuple("config", ["epochs", "validate", "batch_size", "pickle_path"])

    def __init__(self, train_args):
        """
-        training parameters
+        :param train_args: dict of (key, value)
+
+        The base trainer requires the following keys:
+        - epochs: int, the number of epochs in training
+        - validate: bool, whether or not to validate on dev set
+        - batch_size: int
+        - pickle_path: str, the path to pickle files for pre-processing
        """
        super(BaseTrainer, self).__init__()
-        self.n_epochs = train_args.epochs
-        self.validate = train_args.validate
-        self.batch_size = train_args.batch_size
-        self.pickle_path = train_args.pickle_path
+        self.n_epochs = train_args["epochs"]
+        self.validate = train_args["validate"]
+        self.batch_size = train_args["batch_size"]
+        self.pickle_path = train_args["pickle_path"]
        self.model = None
        self.iterator = None
        self.loss_func = None
+        self.optimizer = None

    def train(self, network):
-        """General training loop.
+        """General Training Steps
        :param network: a model

        The method is framework independent.
@ -51,22 +57,27 @@ class BaseTrainer(Action):
            - update
        Subclasses must implement these methods with a specific framework.
        """
+        # prepare model and data
        self.model = network
        data_train, data_dev, data_test, embedding = self.prepare_input(self.pickle_path)

-        test_args = Tester.TestConfig(save_output=True, validate_in_training=True,
-                                      save_dev_input=True, save_loss=True, batch_size=self.batch_size)
-        evaluator = Tester(test_args)
+        # define tester over dev data
+        valid_args = {"save_output": True, "validate_in_training": True, "save_dev_input": True,
+                      "save_loss": True, "batch_size": self.batch_size, "pickle_path": self.pickle_path}
+        validator = POSTester(valid_args)

-        best_loss = 1e10
+        # main training epochs
        iterations = len(data_train) // self.batch_size
-
        for epoch in range(self.n_epochs):
-            self.mode(test=False)

+            # turn on network training mode; define optimizer; prepare batch iterator
+            self.mode(test=False)
            self.define_optimizer()
+            self.iterator = iter(Batchifier(RandomSampler(data_train), self.batch_size, drop_last=True))
+
+            # training iterations in one epoch
            for step in range(iterations):
-                batch_x, batch_y = self.batchify(self.batch_size, data_train)
+                batch_x, batch_y = self.batchify(data_train)

                prediction = self.data_forward(network, batch_x)

@ -77,9 +88,8 @@ class BaseTrainer(Action):
            if self.validate:
                if data_dev is None:
                    raise RuntimeError("No validation data provided.")
-                evaluator.test(network, data_dev)
-                if evaluator.loss < best_loss:
-                    best_loss = evaluator.loss
+                validator.test(network)
+                print("[epoch {}] dev loss={:.2f}".format(epoch, validator.matrices()))

        # finish training

@ -155,23 +165,20 @@ class BaseTrainer(Action):
        """
        raise NotImplementedError

-    def batchify(self, batch_size, data):
+    def batchify(self, data):
        """
        1. Perform batching from data and produce a batch of training data.
        2. Add padding.
-        :param batch_size: int, the size of a batch
        :param data: list. Each entry is a sample, which is also a list of features and label(s).
            E.g.
                [
-                    [[feature_1, feature_2, feature_3], [label_1. label_2]],  # sample 1
-                    [[feature_1, feature_2, feature_3], [label_1. label_2]],  # sample 2
+                    [[word_11, word_12, word_13], [label_11. label_12]],  # sample 1
+                    [[word_21, word_22, word_23], [label_21. label_22]],  # sample 2
                    ...
                ]
-        :return batch_x: list. Each entry is a list of features of a sample.
-                 batch_y: list. Each entry is a list of labels of a sample.
+        :return batch_x: list. Each entry is a list of features of a sample. [batch_size, max_len]
+                 batch_y: list. Each entry is a list of labels of a sample.  [batch_size, num_labels]
        """
-        if self.iterator is None:
-            self.iterator = iter(Batchifier(RandomSampler(data), batch_size, drop_last=True))
        indices = next(self.iterator)
        batch = [data[idx] for idx in indices]
        batch_x = [sample[0] for sample in batch]
@ -195,7 +202,9 @@ class BaseTrainer(Action):


 class ToyTrainer(BaseTrainer):
-    """A simple trainer for a PyTorch model."""
+    """
+        deprecated
+    """

    def __init__(self, train_args):
        super(ToyTrainer, self).__init__(train_args)
@ -230,7 +239,7 @@ class ToyTrainer(BaseTrainer):

 class WordSegTrainer(BaseTrainer):
    """
-        reserve for changes
+        deprecated
    """

    def __init__(self, train_args):
@ -301,6 +310,7 @@ class WordSegTrainer(BaseTrainer):
        self.optimizer = torch.optim.SGD(self.model.parameters(), lr=0.01, momentum=0.85)

    def get_loss(self, predict, truth):
+        truth = torch.Tensor(truth)
        self._loss = torch.nn.CrossEntropyLoss(predict, truth)
        return self._loss

@ -313,8 +323,76 @@ class WordSegTrainer(BaseTrainer):
        self.optimizer.step()


+class POSTrainer(BaseTrainer):
+    """
+    Trainer for Sequence Modeling
+
+    """
+    def __init__(self, train_args):
+        super(POSTrainer, self).__init__(train_args)
+        self.vocab_size = train_args["vocab_size"]
+        self.num_classes = train_args["num_classes"]
+        self.max_len = None
+        self.mask = None
+
+    def prepare_input(self, data_path):
+        """
+            To do: Load pkl files of train/dev/test and embedding
+        """
+        data_train = _pickle.load(open(data_path + "/data_train.pkl", "rb"))
+        data_dev = _pickle.load(open(data_path + "/data_train.pkl", "rb"))
+        return data_train, data_dev, 0, 1
+
+    def data_forward(self, network, x):
+        """
+        :param network: the PyTorch model
+        :param x: list of list, [batch_size, max_len]
+        :return y: [batch_size, num_classes]
+        """
+        seq_len = [len(seq) for seq in x]
+        x = torch.Tensor(x).long()
+        self.batch_size = x.size(0)
+        self.max_len = x.size(1)
+        self.mask = seq_mask(seq_len, self.max_len)
+        y = network(x)
+        return y
+
+    def mode(self, test=False):
+        if test:
+            self.model.eval()
+        else:
+            self.model.train()
+
+    def define_optimizer(self):
+        self.optimizer = torch.optim.SGD(self.model.parameters(), lr=0.01, momentum=0.9)
+
+    def grad_backward(self, loss):
+        self.model.zero_grad()
+        loss.backward()
+
+    def update(self):
+        self.optimizer.step()
+
+    def get_loss(self, predict, truth):
+        """
+        Compute loss given prediction and ground truth.
+        :param predict: prediction label vector, [batch_size, num_classes]
+        :param truth: ground truth label vector, [batch_size, max_len]
+        :return: a scalar
+        """
+        truth = torch.Tensor(truth)
+        if self.loss_func is None:
+            if hasattr(self.model, "loss"):
+                self.loss_func = self.model.loss
+            else:
+                self.define_loss()
+        loss, prediction = self.loss_func(predict, truth, self.mask, self.batch_size, self.max_len)
+        # print("loss={:.2f}".format(loss.data))
+        return loss
+
+
 if __name__ == "__name__":
-    train_args = BaseTrainer.TrainConfig(epochs=1, validate=False, batch_size=3, pickle_path="./")
+    train_args = {"epochs": 1, "validate": False, "batch_size": 3, "pickle_path": "./"}
    trainer = BaseTrainer(train_args)
    data_train = [[[1, 2, 3, 4], [0]] * 10] + [[[1, 3, 5, 2], [1]] * 10]
-    trainer.batchify(batch_size=3, data=data_train)
+    trainer.batchify(data=data_train)
--- a/fastNLP/loader/dataset_loader.py
+++ b/fastNLP/loader/dataset_loader.py
@ -15,7 +15,6 @@ class POSDatasetLoader(DatasetLoader):

    def __init__(self, data_name, data_path):
        super(POSDatasetLoader, self).__init__(data_name, data_path)
-        #self.data_set = self.load()

    def load(self):
        assert os.path.exists(self.data_path)
@ -24,7 +23,7 @@ class POSDatasetLoader(DatasetLoader):
        return line

    def load_lines(self):
-        assert os.path.exists(self.data_path)
+        assert (os.path.exists(self.data_path))
        with open(self.data_path, "r", encoding="utf-8") as f:
            lines = f.readlines()
        return lines
--- a/fastNLP/loader/preprocess.py
+++ b/fastNLP/loader/preprocess.py
@ -46,19 +46,17 @@ class BasePreprocess(object):


 class POSPreprocess(BasePreprocess):
-
    """
        This class are used to preprocess the pos datasets.
-        In these datasets, each line is divided by '\t'
-        The first Col is the vocabulary.
-        The second Col is the labels.
+        In these datasets, each line are divided by '\t'
+    while the first Col is the vocabulary and the second
+    Col is the label.
        Different sentence are divided by an empty line.
        e.g:
        Tom label1
        and label2
        Jerry   label1
        .   label3
-
        Hello   label4
        world   label5
        !   label3
@ -71,11 +69,13 @@ class POSPreprocess(BasePreprocess):
        super(POSPreprocess, self).__init__(data, pickle_path)
        self.word_dict = None
        self.label_dict = None
+        self.data = data
+        self.pickle_path = pickle_path
        self.build_dict()
        self.word2id()
-        self.id2word()
+        self.vocab_size = self.id2word()
        self.class2id()
-        self.id2class()
+        self.num_classes = self.id2class()
        self.embedding()
        self.data_train()
        self.data_dev()
@ -87,7 +87,8 @@ class POSPreprocess(BasePreprocess):
                          DEFAULT_RESERVED_LABEL[2]: 4}
        self.label_dict = {}
        for w in self.data:
-            if len(w) == 0:
+            w = w.strip()
+            if len(w) <= 1:
                continue
            word = w.split('\t')

@ -95,10 +96,11 @@ class POSPreprocess(BasePreprocess):
                index = len(self.word_dict)
                self.word_dict[word[0]] = index

-            for label in word[1: ]:
-                if label not in self.label_dict:
-                    index = len(self.label_dict)
-                    self.label_dict[label] = index
+            # for label in word[1: ]:
+            label = word[1]
+            if label not in self.label_dict:
+                index = len(self.label_dict)
+                self.label_dict[label] = index

    def pickle_exist(self, pickle_name):
        """
@ -107,7 +109,7 @@ class POSPreprocess(BasePreprocess):
        """
        if not os.path.exists(self.pickle_path):
            os.makedirs(self.pickle_path)
-        file_name = self.pickle_path + pickle_name
+        file_name = os.path.join(self.pickle_path, pickle_name)
        if os.path.exists(file_name):
            return True
        else:
@ -118,42 +120,48 @@ class POSPreprocess(BasePreprocess):
            return
        # nothing will be done if word2id.pkl exists

-        file_name = self.pickle_path + "word2id.pkl"
-        with open(file_name, "wb", encoding='utf-8') as f:
+        file_name = os.path.join(self.pickle_path, "word2id.pkl")
+        with open(file_name, "wb") as f:
            _pickle.dump(self.word_dict, f)

    def id2word(self):
        if self.pickle_exist("id2word.pkl"):
-            return
+            file_name = os.path.join(self.pickle_path, "id2word.pkl")
+            id2word_dict = _pickle.load(open(file_name, "rb"))
+            return len(id2word_dict)
        # nothing will be done if id2word.pkl exists

        id2word_dict = {}
        for word in self.word_dict:
            id2word_dict[self.word_dict[word]] = word
-        file_name = self.pickle_path + "id2word.pkl"
-        with open(file_name, "wb", encoding='utf-8') as f:
+        file_name = os.path.join(self.pickle_path, "id2word.pkl")
+        with open(file_name, "wb") as f:
            _pickle.dump(id2word_dict, f)
+        return len(id2word_dict)

    def class2id(self):
        if self.pickle_exist("class2id.pkl"):
            return
        # nothing will be done if class2id.pkl exists

-        file_name = self.pickle_path + "class2id.pkl"
-        with open(file_name, "wb", encoding='utf-8') as f:
+        file_name = os.path.join(self.pickle_path, "class2id.pkl")
+        with open(file_name, "wb") as f:
            _pickle.dump(self.label_dict, f)

    def id2class(self):
        if self.pickle_exist("id2class.pkl"):
-            return
+            file_name = os.path.join(self.pickle_path, "id2class.pkl")
+            id2class_dict = _pickle.load(open(file_name, "rb"))
+            return len(id2class_dict)
        # nothing will be done if id2class.pkl exists

        id2class_dict = {}
        for label in self.label_dict:
            id2class_dict[self.label_dict[label]] = label
-        file_name = self.pickle_path + "id2class.pkl"
-        with open(file_name, "wb", encoding='utf-8') as f:
+        file_name = os.path.join(self.pickle_path, "id2class.pkl")
+        with open(file_name, "wb") as f:
            _pickle.dump(id2class_dict, f)
+        return len(id2class_dict)

    def embedding(self):
        if self.pickle_exist("embedding.pkl"):
@ -168,22 +176,26 @@ class POSPreprocess(BasePreprocess):
        data_train = []
        sentence = []
        for w in self.data:
-            if len(w) == 0:
+            w = w.strip()
+            if len(w) <= 1:
                wid = []
                lid = []
                for i in range(len(sentence)):
+                    # if sentence[i][0]=="":
+                    #     print("")
                    wid.append(self.word_dict[sentence[i][0]])
                    lid.append(self.label_dict[sentence[i][1]])
                data_train.append((wid, lid))
                sentence = []
+                continue
            sentence.append(w.split('\t'))

-        file_name = self.pickle_path + "data_train.pkl"
-        with open(file_name, "wb", encoding='utf-8') as f:
+        file_name = os.path.join(self.pickle_path, "data_train.pkl")
+        with open(file_name, "wb") as f:
            _pickle.dump(data_train, f)

    def data_dev(self):
        pass

    def data_test(self):
-        pass
+        pass
--- a/fastNLP/models/base_model.py
+++ b/fastNLP/models/base_model.py
@ -3,32 +3,12 @@ import torch

 class BaseModel(torch.nn.Module):
    """Base PyTorch model for all models.
-        Three network modules presented:
-            - embedding module
-            - aggregation module
-            - output module
-        Subclasses must implement these three modules with "components".
+        To do: add some useful common features
    """

    def __init__(self):
        super(BaseModel, self).__init__()

-    def forward(self, *inputs):
-        x = self.encode(*inputs)
-        x = self.aggregation(x)
-        x = self.output(x)
-        return x
-
-    def encode(self, x):
-        raise NotImplementedError
-
-    def aggregation(self, x):
-        raise NotImplementedError
-
-    def output(self, x):
-        raise NotImplementedError
-
-

 class Vocabulary(object):
    """A look-up table that allows you to access `Lexeme` objects. The `Vocab`
@ -93,3 +73,4 @@ class Token(object):
        self.doc = doc
        self.token = doc[offset]
        self.i = offset
+
--- a/fastNLP/models/sequence_modeling.py
+++ b/fastNLP/models/sequence_modeling.py
@ -0,0 +1,97 @@
+import torch
+import torch.nn as nn
+from torch.nn import functional as F
+
+from fastNLP.models.base_model import BaseModel
+from fastNLP.modules.CRF import ContionalRandomField
+
+
+class SeqLabeling(BaseModel):
+    """
+    PyTorch Network for sequence labeling
+    """
+
+    def __init__(self, hidden_dim,
+                 rnn_num_layer,
+                 num_classes,
+                 vocab_size,
+                 word_emb_dim=100,
+                 init_emb=None,
+                 rnn_mode="gru",
+                 bi_direction=False,
+                 dropout=0.5,
+                 use_crf=True):
+        super(SeqLabeling, self).__init__()
+
+        self.Emb = nn.Embedding(vocab_size, word_emb_dim)
+        if init_emb:
+            self.Emb.weight = nn.Parameter(init_emb)
+
+        self.num_classes = num_classes
+        self.input_dim = word_emb_dim
+        self.layers = rnn_num_layer
+        self.hidden_dim = hidden_dim
+        self.bi_direction = bi_direction
+        self.dropout = dropout
+        self.mode = rnn_mode
+
+        if self.mode == "lstm":
+            self.rnn = nn.LSTM(self.input_dim, self.hidden_dim, self.layers, batch_first=True,
+                               bidirectional=self.bi_direction, dropout=self.dropout)
+        elif self.mode == "gru":
+            self.rnn = nn.GRU(self.input_dim, self.hidden_dim, self.layers, batch_first=True,
+                              bidirectional=self.bi_direction, dropout=self.dropout)
+        elif self.mode == "rnn":
+            self.rnn = nn.RNN(self.input_dim, self.hidden_dim, self.layers, batch_first=True,
+                              bidirectional=self.bi_direction, dropout=self.dropout)
+        else:
+            raise Exception
+        if bi_direction:
+            self.linear = nn.Linear(self.hidden_dim * 2, self.num_classes)
+        else:
+            self.linear = nn.Linear(self.hidden_dim, self.num_classes)
+        self.use_crf = use_crf
+        if self.use_crf:
+            self.crf = ContionalRandomField(num_classes)
+
+    def forward(self, x):
+        """
+        :param x: LongTensor, [batch_size, mex_len]
+        :return y: [batch_size, tag_size, tag_size]
+        """
+        x = self.Emb(x)
+        # [batch_size, max_len, word_emb_dim]
+        x, hidden = self.rnn(x)
+        # [batch_size, max_len, hidden_size * direction]
+        y = self.linear(x)
+        # [batch_size, max_len, num_classes]
+        return y
+
+    def loss(self, x, y, mask, batch_size, max_len):
+        """
+        Negative log likelihood loss.
+        :param x: FloatTensor, [batch_size, tag_size, tag_size]
+        :param y: LongTensor, [batch_size, max_len]
+        :param mask: ByteTensor, [batch_size, max_len]
+        :param batch_size: int
+        :param max_len: int
+        :return loss:
+                prediction:
+        """
+        x = x.float()
+        y = y.long()
+        mask = mask.byte()
+        # print(x.shape, y.shape, mask.shape)
+
+        if self.use_crf:
+            total_loss = self.crf(x, y, mask)
+            tag_seq = self.crf.viterbi_decode(x, mask)
+        else:
+            # error
+            loss_function = nn.NLLLoss(ignore_index=0, size_average=False)
+            x = x.view(batch_size * max_len, -1)
+            score = F.log_softmax(x)
+            total_loss = loss_function(score, y.view(batch_size * max_len))
+            _, tag_seq = torch.max(score)
+            tag_seq = tag_seq.view(batch_size, max_len)
+        return torch.mean(total_loss), tag_seq
--- a/fastNLP/modules/CRF.py
+++ b/fastNLP/modules/CRF.py
@ -82,7 +82,7 @@ class ContionalRandomField(nn.Module):
    def _glod_score(self, feats, tags, masks):
        """
        Compute the score for the gold path.
-        :param feats: FloatTensor, batch_size x tag_size x tag_size
+        :param feats: FloatTensor, batch_size x max_len x tag_size
        :param tags: LongTensor, batch_size x max_len
        :param masks: ByteTensor, batch_size x max_len
        :return:FloatTensor, batch_size
@ -118,7 +118,7 @@ class ContionalRandomField(nn.Module):
    def forward(self, feats, tags, masks):
        """
        Calculate the neg log likelihood
-        :param feats:FloatTensor, batch_size x tag_size x tag_size
+        :param feats:FloatTensor, batch_size x max_len x tag_size
        :param tags:LongTensor, batch_size x max_len
        :param masks:ByteTensor batch_size x max_len
        :return:FloatTensor, batch_size
--- a/fastNLP/modules/prototype/example.py
+++ b/fastNLP/modules/prototype/example.py
@ -1,12 +1,13 @@
+import time
+
+import aggregation
+import dataloader
+import embedding
+import encoder
+import predict
 import torch
 import torch.nn as nn
-import encoder
-import aggregation
-import embedding
-import predict
 import torch.optim as optim
-import time
-import dataloader

 WORD_NUM = 357361
 WORD_SIZE = 100
@ -16,6 +17,30 @@ R = 10
 MLP_HIDDEN = 2000 
 CLASSES_NUM = 5

+from fastNLP.models.base_model import BaseModel
+from fastNLP.action.trainer import BaseTrainer
+
+
+class MyNet(BaseModel):
+    def __init__(self):
+        super(MyNet, self).__init__()
+        self.embedding = embedding.Lookuptable(WORD_NUM, WORD_SIZE)
+        self.encoder = encoder.Lstm(WORD_SIZE, HIDDEN_SIZE, 1, 0.5, True)
+        self.aggregation = aggregation.Selfattention(2 * HIDDEN_SIZE, D_A, R)
+        self.predict = predict.MLP(R * HIDDEN_SIZE * 2, MLP_HIDDEN, CLASSES_NUM)
+        self.penalty = None
+
+    def encode(self, x):
+        return self.encode(self.embedding(x))
+
+    def aggregate(self, x):
+        x, self.penalty = self.aggregate(x)
+        return x
+
+    def decode(self, x):
+        return [self.predict(x), self.penalty]
+
+
 class Net(nn.Module):
    """
    A model for sentiment analysis using lstm and self-attention
@ -34,6 +59,19 @@ class Net(nn.Module):
        x = self.predict(x)
        return x, penalty

+
+class MyTrainer(BaseTrainer):
+    def __init__(self, args):
+        super(MyTrainer, self).__init__(args)
+        self.optimizer = None
+
+    def define_optimizer(self):
+        self.optimizer = optim.SGD(self.model.parameters(), lr=0.01, momentum=0.9)
+
+    def define_loss(self):
+        self.loss_func = nn.CrossEntropyLoss()
+
+
 def train(model_dict=None, using_cuda=True, learning_rate=0.06,\
    momentum=0.3, batch_size=32, epochs=5, coef=1.0, interval=10):
    """
--- a/fastNLP/modules/utils.py
+++ b/fastNLP/modules/utils.py
@ -7,3 +7,9 @@ def mask_softmax(matrix, mask):
    else:
        raise NotImplementedError
    return result
+
+
+def seq_mask(seq_len, max_len):
+    mask = [torch.ge(torch.LongTensor(seq_len), i + 1) for i in range(max_len)]
+    mask = torch.stack(mask, 1)
+    return mask
--- a/fastNLP/reproduction/init.py
+++ b/fastNLP/reproduction/init.py
--- a/fastNLP/reproduction/CNN-sentence_classification/.gitignore
+++ b/fastNLP/reproduction/CNN-sentence_classification/.gitignore
--- a/fastNLP/reproduction/CNN-sentence_classification/README.md
+++ b/fastNLP/reproduction/CNN-sentence_classification/README.md
--- a/fastNLP/reproduction/CNN-sentence_classification/init.py
+++ b/fastNLP/reproduction/CNN-sentence_classification/init.py
--- a/fastNLP/reproduction/CNN-sentence_classification/dataset.py
+++ b/fastNLP/reproduction/CNN-sentence_classification/dataset.py
--- a/fastNLP/reproduction/CNN-sentence_classification/model.py
+++ b/fastNLP/reproduction/CNN-sentence_classification/model.py
--- a/fastNLP/reproduction/CNN-sentence_classification/rt-polaritydata/rt-polarity.neg
+++ b/fastNLP/reproduction/CNN-sentence_classification/rt-polaritydata/rt-polarity.neg
--- a/fastNLP/reproduction/CNN-sentence_classification/rt-polaritydata/rt-polarity.pos
+++ b/fastNLP/reproduction/CNN-sentence_classification/rt-polaritydata/rt-polarity.pos
--- a/fastNLP/reproduction/CNN-sentence_classification/train.py
+++ b/fastNLP/reproduction/CNN-sentence_classification/train.py
--- a/fastNLP/reproduction/Char-aware_NLM/LICENSE
+++ b/fastNLP/reproduction/Char-aware_NLM/LICENSE
--- a/fastNLP/reproduction/Char-aware_NLM/README.md
+++ b/fastNLP/reproduction/Char-aware_NLM/README.md
--- a/fastNLP/reproduction/Char-aware_NLM/init.py
+++ b/fastNLP/reproduction/Char-aware_NLM/init.py
--- a/fastNLP/reproduction/Char-aware_NLM/model.py
+++ b/fastNLP/reproduction/Char-aware_NLM/model.py
--- a/fastNLP/reproduction/Char-aware_NLM/test.py
+++ b/fastNLP/reproduction/Char-aware_NLM/test.py
--- a/fastNLP/reproduction/Char-aware_NLM/test.txt
+++ b/fastNLP/reproduction/Char-aware_NLM/test.txt
--- a/fastNLP/reproduction/Char-aware_NLM/train.py
+++ b/fastNLP/reproduction/Char-aware_NLM/train.py
--- a/fastNLP/reproduction/Char-aware_NLM/train.txt
+++ b/fastNLP/reproduction/Char-aware_NLM/train.txt
--- a/fastNLP/reproduction/Char-aware_NLM/utilities.py
+++ b/fastNLP/reproduction/Char-aware_NLM/utilities.py
--- a/fastNLP/reproduction/Char-aware_NLM/valid.txt
+++ b/fastNLP/reproduction/Char-aware_NLM/valid.txt
--- a/fastNLP/reproduction/HAN-document_classification/README.md
+++ b/fastNLP/reproduction/HAN-document_classification/README.md
--- a/fastNLP/reproduction/HAN-document_classification/init.py
+++ b/fastNLP/reproduction/HAN-document_classification/init.py
--- a/fastNLP/reproduction/HAN-document_classification/data/test_samples.pkl
+++ b/fastNLP/reproduction/HAN-document_classification/data/test_samples.pkl
--- a/fastNLP/reproduction/HAN-document_classification/data/train_samples.pkl
+++ b/fastNLP/reproduction/HAN-document_classification/data/train_samples.pkl
--- a/fastNLP/reproduction/HAN-document_classification/data/yelp.word2vec
+++ b/fastNLP/reproduction/HAN-document_classification/data/yelp.word2vec
--- a/fastNLP/reproduction/HAN-document_classification/evaluate.py
+++ b/fastNLP/reproduction/HAN-document_classification/evaluate.py
--- a/fastNLP/reproduction/HAN-document_classification/model.py
+++ b/fastNLP/reproduction/HAN-document_classification/model.py
--- a/fastNLP/reproduction/HAN-document_classification/preprocess.py
+++ b/fastNLP/reproduction/HAN-document_classification/preprocess.py
--- a/fastNLP/reproduction/HAN-document_classification/train.py
+++ b/fastNLP/reproduction/HAN-document_classification/train.py
--- a/requirements.txt
+++ b/requirements.txt
@ -1,3 +1,3 @@
-numpy==1.14.2
+numpy>=1.14.2
 torch==0.4.0
-torchvision==0.1.8
+torchvision>=0.1.8
--- a/test/data_for_tests/people.txt
+++ b/test/data_for_tests/people.txt
@ -0,0 +1,67 @@
+迈	B-v
+向	E-v
+充	B-v
+满	E-v
+希	B-n
+望	E-n
+的	S-u
+新	S-a
+世	B-n
+纪	E-n
+—	B-w
+—	E-w
+一	B-t
+九	M-t
+九	M-t
+八	M-t
+年	E-t
+新	B-t
+年	E-t
+讲	B-n
+话	E-n
+（	S-w
+附	S-v
+图	B-n
+片	E-n
+1	S-m
+张	S-q
+）	S-w
+
+中	B-nt
+共	M-nt
+中	M-nt
+央	E-nt
+总	B-n
+书	M-n
+记	E-n
+、	S-w
+国	B-n
+家	E-n
+主	B-n
+席	E-n
+江	B-nr
+泽	M-nr
+民	E-nr
+
+（	S-w
+一	B-t
+九	M-t
+九	M-t
+七	M-t
+年	E-t
+十	B-t
+二	M-t
+月	E-t
+三	B-t
+十	M-t
+一	M-t
+日	E-t
+）	S-w
+
+1	B-t
+2	M-t
+月	E-t
+3	B-t
+1	M-t
+日	E-t
+，	S-w
--- a/test/test_POS_pipeline.py
+++ b/test/test_POS_pipeline.py
@ -0,0 +1,35 @@
+import sys
+
+sys.path.append("..")
+
+from fastNLP.action.trainer import POSTrainer
+from fastNLP.loader.dataset_loader import POSDatasetLoader
+from fastNLP.loader.preprocess import POSPreprocess
+from fastNLP.models.sequence_modeling import SeqLabeling
+
+data_name = "people.txt"
+data_path = "data_for_tests/people.txt"
+pickle_path = "data_for_tests"
+
+if __name__ == "__main__":
+    # Data Loader
+    pos = POSDatasetLoader(data_name, data_path)
+    train_data = pos.load_lines()
+
+    # Preprocessor
+    p = POSPreprocess(train_data, pickle_path)
+    vocab_size = p.vocab_size
+    num_classes = p.num_classes
+
+    # Trainer
+    train_args = {"epochs": 20, "batch_size": 1, "num_classes": num_classes,
+                  "vocab_size": vocab_size, "pickle_path": pickle_path, "validate": True}
+    trainer = POSTrainer(train_args)
+
+    # Model
+    model = SeqLabeling(100, 1, num_classes, vocab_size, bi_direction=True)
+
+    # Start training
+    trainer.train(model)
+
+    print("Training finished!")