Merge pull request #1 from fastnlp/master

update
2024-12-02 04:07:35 +08:00 · 2018-09-02 17:48:56 +08:00 · 2018-09-02 17:48:56 +08:00 · c80ae39fb3
commit c80ae39fb3
parent ceac3f2e1f 961a321712
35 changed files with 911 additions and 11094 deletions
--- a/README.md
+++ b/README.md
@ -2,6 +2,9 @@

 [![Build Status](https://travis-ci.org/fastnlp/fastNLP.svg?branch=master)](https://travis-ci.org/fastnlp/fastNLP)
 [![codecov](https://codecov.io/gh/fastnlp/fastNLP/branch/master/graph/badge.svg)](https://codecov.io/gh/fastnlp/fastNLP)
+[![PyPI version](https://badge.fury.io/py/fastNLP.svg)](https://badge.fury.io/py/fastNLP)
+![Hex.pm](https://img.shields.io/hexpm/l/plug.svg)
+[![Documentation Status](https://readthedocs.org/projects/fastnlp/badge/?version=latest)](http://fastnlp.readthedocs.io/?badge=latest)

 fastNLP is a modular Natural Language Processing system based on PyTorch, for fast development of NLP tools. It divides the NLP model based on deep learning into different modules. These modules fall into 4 categories: encoder, interaction, aggregation and decoder, while each category contains different implemented modules. Encoder modules encode the input into some abstract representation, interaction modules make the information in the representation interact with each other, aggregation modules aggregate and reduce information, and decoder modules decode the representation into the output. Most current NLP models could be built on these modules, which vastly simplifies the process of developing NLP models. The architecture of fastNLP is as the figure below:

@ -30,6 +33,7 @@ A typical fastNLP routine is composed of four phases: loading dataset, pre-proce
 from fastNLP.models.base_model import BaseModel
 from fastNLP.modules import encoder
 from fastNLP.modules import aggregation
+from fastNLP.modules import decoder

 from fastNLP.loader.dataset_loader import ClassDatasetLoader
 from fastNLP.loader.preprocess import ClassPreprocess
@ -42,20 +46,20 @@ class ClassificationModel(BaseModel):
    Simple text classification model based on CNN.
    """

-    def __init__(self, class_num, vocab_size):
+    def __init__(self, num_classes, vocab_size):
        super(ClassificationModel, self).__init__()

-        self.embed = encoder.Embedding(nums=vocab_size, dims=300)
-        self.conv = encoder.Conv(
+        self.emb = encoder.Embedding(nums=vocab_size, dims=300)
+        self.enc = encoder.Conv(
            in_channels=300, out_channels=100, kernel_size=3)
-        self.pool = aggregation.MaxPool()
-        self.output = encoder.Linear(input_size=100, output_size=class_num)
+        self.agg = aggregation.MaxPool()
+        self.dec = decoder.MLP(100, num_classes=num_classes)

    def forward(self, x):
-        x = self.embed(x)  # [N,L] -> [N,L,C]
-        x = self.conv(x)  # [N,L,C_in] -> [N,L,C_out]
-        x = self.pool(x)  # [N,L,C] -> [N,C]
-        x = self.output(x)  # [N,C] -> [N, N_class]
+        x = self.emb(x)  # [N,L] -> [N,L,C]
+        x = self.enc(x)  # [N,L,C_in] -> [N,L,C_out]
+        x = self.agg(x)  # [N,L,C] -> [N,C]
+        x = self.dec(x)  # [N,C] -> [N, N_class]
        return x


@ -75,7 +79,7 @@ model_args = {
    'num_classes': n_classes,
    'vocab_size': vocab_size
 }
-model = ClassificationModel(class_num=n_classes, vocab_size=vocab_size)
+model = ClassificationModel(num_classes=n_classes, vocab_size=vocab_size)

 # train model
 train_args = {
--- a/fastNLP/core/loss.py
+++ b/fastNLP/core/loss.py
@ -0,0 +1,27 @@
+import torch
+
+
+class Loss(object):
+    """Loss function of the algorithm,
+    either the wrapper of a loss function from framework, or a user-defined loss (need pytorch auto_grad support)
+
+    """
+
+    def __init__(self, args):
+        if args is None:
+            # this is useful when
+            self._loss = None
+        elif isinstance(args, str):
+            self._loss = self._borrow_from_pytorch(args)
+        else:
+            raise NotImplementedError
+
+    def get(self):
+        return self._loss
+
+    @staticmethod
+    def _borrow_from_pytorch(loss_name):
+        if loss_name == "cross_entropy":
+            return torch.nn.CrossEntropyLoss()
+        else:
+            raise NotImplementedError
--- a/fastNLP/core/optimizer.py
+++ b/fastNLP/core/optimizer.py
@ -1,3 +1,54 @@
-"""
-use optimizer from Pytorch
-"""
+import torch
+
+
+class Optimizer(object):
+    """Wrapper of optimizer from framework
+
+            names: arguments (type)
+            1. Adam: lr (float), weight_decay (float)
+            2. AdaGrad
+            3. RMSProp
+            4. SGD: lr (float), momentum (float)
+
+    """
+
+    def __init__(self, optimizer_name, **kwargs):
+        """
+        :param optimizer_name: str, the name of the optimizer
+        :param kwargs: the arguments
+        """
+        self.optim_name = optimizer_name
+        self.kwargs = kwargs
+
+    @property
+    def name(self):
+        return self.optim_name
+
+    @property
+    def params(self):
+        return self.kwargs
+
+    def construct_from_pytorch(self, model_params):
+        """construct a optimizer from framework over given model parameters"""
+
+        if self.optim_name in ["SGD", "sgd"]:
+            if "lr" in self.kwargs:
+                if "momentum" not in self.kwargs:
+                    self.kwargs["momentum"] = 0
+                optimizer = torch.optim.SGD(model_params, lr=self.kwargs["lr"], momentum=self.kwargs["momentum"])
+            else:
+                raise ValueError("requires learning rate for SGD optimizer")
+
+        elif self.optim_name in ["adam", "Adam"]:
+            if "lr" in self.kwargs:
+                if "weight_decay" not in self.kwargs:
+                    self.kwargs["weight_decay"] = 0
+                optimizer = torch.optim.Adam(model_params, lr=self.kwargs["lr"],
+                                             weight_decay=self.kwargs["weight_decay"])
+            else:
+                raise ValueError("requires learning rate for Adam optimizer")
+
+        else:
+            raise NotImplementedError
+
+        return optimizer
--- a/fastNLP/core/preprocess.py
+++ b/fastNLP/core/preprocess.py
@ -19,13 +19,13 @@ DEFAULT_WORD_TO_INDEX = {DEFAULT_PADDING_LABEL: 0, DEFAULT_UNKNOWN_LABEL: 1,
 def save_pickle(obj, pickle_path, file_name):
    with open(os.path.join(pickle_path, file_name), "wb") as f:
        _pickle.dump(obj, f)
-    print("{} saved. ".format(file_name))
+    print("{} saved in {}".format(file_name, pickle_path))


 def load_pickle(pickle_path, file_name):
    with open(os.path.join(pickle_path, file_name), "rb") as f:
        obj = _pickle.load(f)
-    print("{} loaded. ".format(file_name))
+    print("{} loaded from {}".format(file_name, pickle_path))
    return obj


@ -59,7 +59,6 @@ class BasePreprocess(object):

    def run(self, train_dev_data, test_data=None, pickle_path="./", train_dev_split=0, cross_val=False, n_fold=10):
        """Main preprocessing pipeline.
-
        :param train_dev_data: three-level list, with either single label or multiple labels in a sample.
        :param test_data: three-level list, with either single label or multiple labels in a sample. (optional)
        :param pickle_path: str, the path to save the pickle files.
@ -98,6 +97,8 @@ class BasePreprocess(object):
                save_pickle(data_train, pickle_path, "data_train.pkl")
            else:
                data_train = load_pickle(pickle_path, "data_train.pkl")
+                if pickle_exist(pickle_path, "data_dev.pkl"):
+                    data_dev = load_pickle(pickle_path, "data_dev.pkl")
        else:
            # cross_val is True
            if not pickle_exist(pickle_path, "data_train_0.pkl"):
--- a/fastNLP/core/tester.py
+++ b/fastNLP/core/tester.py
@ -1,5 +1,3 @@
-import _pickle
-
 import numpy as np
 import torch

@ -14,43 +12,78 @@ logger = create_logger(__name__, "./train_test.log")
 class BaseTester(object):
    """An collection of model inference and evaluation of performance, used over validation/dev set and test set. """

-    def __init__(self, test_args):
+    def __init__(self, **kwargs):
        """
-        :param test_args: a dict-like object that has __getitem__ method, can be accessed by "test_args["key_str"]"
+        :param kwargs: a dict-like object that has __getitem__ method, can be accessed by "test_args["key_str"]"
        """
        super(BaseTester, self).__init__()
-        self.validate_in_training = test_args["validate_in_training"]
-        self.save_dev_data = None
-        self.save_output = test_args["save_output"]
-        self.output = None
-        self.save_loss = test_args["save_loss"]
-        self.mean_loss = None
-        self.batch_size = test_args["batch_size"]
-        self.pickle_path = test_args["pickle_path"]
-        self.iterator = None
-        self.use_cuda = test_args["use_cuda"]
+        """
+            "default_args" provides default value for important settings. 
+            The initialization arguments "kwargs" with the same key (name) will override the default value. 
+            "kwargs" must have the same type as "default_args" on corresponding keys. 
+            Otherwise, error will raise.
+        """
+        default_args = {"save_output": False,  # collect outputs of validation set
+                        "save_loss": False,  # collect losses in validation
+                        "save_best_dev": False,  # save best model during validation
+                        "batch_size": 8,
+                        "use_cuda": True,
+                        "pickle_path": "./save/",
+                        "model_name": "dev_best_model.pkl",
+                        "print_every_step": 1,
+                        }
+        """
+            "required_args" is the collection of arguments that users must pass to Trainer explicitly. 
+            This is used to warn users of essential settings in the training. 
+            Obviously, "required_args" is the subset of "default_args". 
+            The value in "default_args" to the keys in "required_args" is simply for type check. 
+        """
+        # TODO: required arguments
+        required_args = {}

-        self.model = None
+        for req_key in required_args:
+            if req_key not in kwargs:
+                logger.error("Tester lacks argument {}".format(req_key))
+                raise ValueError("Tester lacks argument {}".format(req_key))
+
+        for key in default_args:
+            if key in kwargs:
+                if isinstance(kwargs[key], type(default_args[key])):
+                    default_args[key] = kwargs[key]
+                else:
+                    msg = "Argument %s type mismatch: expected %s while get %s" % (
+                        key, type(default_args[key]), type(kwargs[key]))
+                    logger.error(msg)
+                    raise ValueError(msg)
+            else:
+                # BeseTester doesn't care about extra arguments
+                pass
+        print(default_args)
+
+        self.save_output = default_args["save_output"]
+        self.save_best_dev = default_args["save_best_dev"]
+        self.save_loss = default_args["save_loss"]
+        self.batch_size = default_args["batch_size"]
+        self.pickle_path = default_args["pickle_path"]
+        self.use_cuda = default_args["use_cuda"]
+        self.print_every_step = default_args["print_every_step"]
+
+        self._model = None
        self.eval_history = []
        self.batch_output = []

    def test(self, network, dev_data):
        if torch.cuda.is_available() and self.use_cuda:
-            self.model = network.cuda()
+            self._model = network.cuda()
        else:
-            self.model = network
+            self._model = network

        # turn on the testing mode; clean up the history
        self.mode(network, test=True)
        self.eval_history.clear()
        self.batch_output.clear()

-        # dev_data = self.prepare_input(self.pickle_path)
-        # logger.info("validation data loaded")
-
        iterator = iter(Batchifier(RandomSampler(dev_data), self.batch_size, drop_last=True))
-        n_batches = len(dev_data) // self.batch_size
-        print_every_step = 1
        step = 0

        for batch_x, batch_y in self.make_batch(iterator, dev_data):
@ -65,21 +98,10 @@ class BaseTester(object):

            print_output = "[test step {}] {}".format(step, eval_results)
            logger.info(print_output)
-            if step % print_every_step == 0:
+            if self.print_every_step > 0 and step % self.print_every_step == 0:
                print(print_output)
            step += 1

-    def prepare_input(self, data_path):
-        """Save the dev data once it is loaded. Can return directly next time.
-
-        :param data_path: str, the path to the pickle data for dev
-        :return save_dev_data: list. Each entry is a sample, which is also a list of features and label(s).
-        """
-        if self.save_dev_data is None:
-            data_dev = _pickle.load(open(data_path + "data_dev.pkl", "rb"))
-            self.save_dev_data = data_dev
-        return self.save_dev_data
-
    def mode(self, model, test):
        """Train mode or Test mode. This is for PyTorch currently.

@ -117,15 +139,14 @@ class SeqLabelTester(BaseTester):
    Tester for sequence labeling.
    """

-    def __init__(self, test_args):
+    def __init__(self, **test_args):
        """
        :param test_args: a dict-like object that has __getitem__ method, can be accessed by "test_args["key_str"]"
        """
-        super(SeqLabelTester, self).__init__(test_args)
+        super(SeqLabelTester, self).__init__(**test_args)
        self.max_len = None
        self.mask = None
        self.seq_len = None
-        self.batch_result = None

    def data_forward(self, network, inputs):
        """This is only for sequence labeling with CRF decoder.
@ -159,14 +180,14 @@ class SeqLabelTester(BaseTester):
        :return:
        """
        batch_size, max_len = predict.size(0), predict.size(1)
-        loss = self.model.loss(predict, truth, self.mask) / batch_size
+        loss = self._model.loss(predict, truth, self.mask) / batch_size

-        prediction = self.model.prediction(predict, self.mask)
-        results = torch.Tensor(prediction).view(-1,)
+        prediction = self._model.prediction(predict, self.mask)
+        results = torch.Tensor(prediction).view(-1, )
        # make sure "results" is in the same device as "truth"
        results = results.to(truth)
        accuracy = torch.sum(results == truth.view((-1,))).to(torch.float) / results.shape[0]
-        return [loss.data, accuracy.data]
+        return [float(loss), float(accuracy)]

    def metrics(self):
        batch_loss = np.mean([x[0] for x in self.eval_history])
@ -184,21 +205,16 @@ class SeqLabelTester(BaseTester):
    def make_batch(self, iterator, data):
        return Action.make_batch(iterator, use_cuda=self.use_cuda, output_length=True)

+
 class ClassificationTester(BaseTester):
    """Tester for classification."""

-    def __init__(self, test_args):
+    def __init__(self, **test_args):
        """
        :param test_args: a dict-like object that has __getitem__ method, \
            can be accessed by "test_args["key_str"]"
        """
-        super(ClassificationTester, self).__init__(test_args)
-        self.pickle_path = test_args["pickle_path"]
-
-        self.save_dev_data = None
-        self.output = None
-        self.mean_loss = None
-        self.iterator = None
+        super(ClassificationTester, self).__init__(**test_args)

    def make_batch(self, iterator, data, max_len=None):
        return Action.make_batch(iterator, use_cuda=self.use_cuda, max_len=max_len)
@ -221,4 +237,3 @@ class ClassificationTester(BaseTester):
        y_true = torch.cat(y_true, dim=0)
        acc = float(torch.sum(y_pred == y_true)) / len(y_true)
        return y_true.cpu().numpy(), y_prob.cpu().numpy(), acc
-
--- a/fastNLP/core/trainer.py
+++ b/fastNLP/core/trainer.py
@ -4,12 +4,12 @@ import os
 import time
 from datetime import timedelta

-import numpy as np
 import torch
-import torch.nn as nn

 from fastNLP.core.action import Action
 from fastNLP.core.action import RandomSampler, Batchifier
+from fastNLP.core.loss import Loss
+from fastNLP.core.optimizer import Optimizer
 from fastNLP.core.tester import SeqLabelTester, ClassificationTester
 from fastNLP.modules import utils
 from fastNLP.saver.logger import create_logger
@ -23,14 +23,13 @@ class BaseTrainer(object):
    """Operations to train a model, including data loading, SGD, and validation.

        Subclasses must implement the following abstract methods:
-        - define_optimizer
        - grad_backward
        - get_loss
    """

-    def __init__(self, train_args):
+    def __init__(self, **kwargs):
        """
-        :param train_args: dict of (key, value), or dict-like object. key is str.
+        :param kwargs: dict of (key, value), or dict-like object. key is str.

        The base trainer requires the following keys:
        - epochs: int, the number of epochs in training
@ -39,64 +38,90 @@ class BaseTrainer(object):
        - pickle_path: str, the path to pickle files for pre-processing
        """
        super(BaseTrainer, self).__init__()
-        self.n_epochs = train_args["epochs"]
-        self.batch_size = train_args["batch_size"]
-        self.pickle_path = train_args["pickle_path"]

-        self.validate = train_args["validate"]
-        self.save_best_dev = train_args["save_best_dev"]
-        self.model_saved_path = train_args["model_saved_path"]
-        self.use_cuda = train_args["use_cuda"]
+        """
+            "default_args" provides default value for important settings. 
+            The initialization arguments "kwargs" with the same key (name) will override the default value. 
+            "kwargs" must have the same type as "default_args" on corresponding keys. 
+            Otherwise, error will raise.
+        """
+        default_args = {"epochs": 3, "batch_size": 8, "validate": True, "use_cuda": True, "pickle_path": "./save/",
+                        "save_best_dev": True, "model_name": "default_model_name.pkl", "print_every_step": 1,
+                        "loss": Loss(None),
+                        "optimizer": Optimizer("Adam", lr=0.001, weight_decay=0)
+                        }
+        """
+            "required_args" is the collection of arguments that users must pass to Trainer explicitly. 
+            This is used to warn users of essential settings in the training. 
+            Obviously, "required_args" is the subset of "default_args". 
+            The value in "default_args" to the keys in "required_args" is simply for type check. 
+        """
+        # TODO: required arguments
+        required_args = {}

-        self.model = None
-        self.iterator = None
-        self.loss_func = None
-        self.optimizer = None
+        for req_key in required_args:
+            if req_key not in kwargs:
+                logger.error("Trainer lacks argument {}".format(req_key))
+                raise ValueError("Trainer lacks argument {}".format(req_key))
+
+        for key in default_args:
+            if key in kwargs:
+                if isinstance(kwargs[key], type(default_args[key])):
+                    default_args[key] = kwargs[key]
+                else:
+                    msg = "Argument %s type mismatch: expected %s while get %s" % (
+                        key, type(default_args[key]), type(kwargs[key]))
+                    logger.error(msg)
+                    raise ValueError(msg)
+            else:
+                # BaseTrainer doesn't care about extra arguments
+                pass
+        print(default_args)
+
+        self.n_epochs = default_args["epochs"]
+        self.batch_size = default_args["batch_size"]
+        self.pickle_path = default_args["pickle_path"]
+        self.validate = default_args["validate"]
+        self.save_best_dev = default_args["save_best_dev"]
+        self.use_cuda = default_args["use_cuda"]
+        self.model_name = default_args["model_name"]
+        self.print_every_step = default_args["print_every_step"]
+
+        self._model = None
+        self._loss_func = default_args["loss"].get()  # return a pytorch loss function or None
+        self._optimizer = None
+        self._optimizer_proto = default_args["optimizer"]

    def train(self, network, train_data, dev_data=None):
-        """General Training Steps
+        """General Training Procedure
        :param network: a model
        :param train_data: three-level list, the training set.
        :param dev_data: three-level list, the validation data (optional)
-
-        The method is framework independent.
-        Work by calling the following methods:
-            - prepare_input
-            - mode
-            - define_optimizer
-            - data_forward
-            - get_loss
-            - grad_backward
-            - update
-        Subclasses must implement these methods with a specific framework.
        """
-        # prepare model and data, transfer model to gpu if available
+        # transfer model to gpu if available
        if torch.cuda.is_available() and self.use_cuda:
-            self.model = network.cuda()
+            self._model = network.cuda()
+            # self._model is used to access model-specific loss
        else:
-            self.model = network
+            self._model = network

-        # train_data = self.load_train_data(self.pickle_path)
-        # logger.info("training data loaded")
-
-        # define tester over dev data
+        # define Tester over dev data
        if self.validate:
            default_valid_args = {"save_output": True, "validate_in_training": True, "save_dev_input": True,
                                  "save_loss": True, "batch_size": self.batch_size, "pickle_path": self.pickle_path,
-                                  "use_cuda": self.use_cuda}
+                                  "use_cuda": self.use_cuda, "print_every_step": 0}
            validator = self._create_validator(default_valid_args)
            logger.info("validator defined as {}".format(str(validator)))

+        # optimizer and loss
        self.define_optimizer()
-        logger.info("optimizer defined as {}".format(str(self.optimizer)))
+        logger.info("optimizer defined as {}".format(str(self._optimizer)))
+        self.define_loss()
+        logger.info("loss function defined as {}".format(str(self._loss_func)))

-        # main training epochs
-        n_samples = len(train_data)
-        n_batches = n_samples // self.batch_size
-        n_print = 1
+        # main training procedure
        start = time.time()
        logger.info("training epochs started")
-
        for epoch in range(1, self.n_epochs + 1):
            logger.info("training epoch {}".format(epoch))

@ -106,23 +131,30 @@ class BaseTrainer(object):
            data_iterator = iter(Batchifier(RandomSampler(train_data), self.batch_size, drop_last=False))
            logger.info("prepared data iterator")

-            self._train_step(data_iterator, network, start=start, n_print=n_print, epoch=epoch)
+            # one forward and backward pass
+            self._train_step(data_iterator, network, start=start, n_print=self.print_every_step, epoch=epoch)

+            # validation
            if self.validate:
                logger.info("validation started")
                validator.test(network, dev_data)

                if self.save_best_dev and self.best_eval_result(validator):
-                    self.save_model(network)
-                    print("saved better model selected by dev")
-                    logger.info("saved better model selected by dev")
+                    self.save_model(network, self.model_name)
+                    print("Saved better model selected by validation.")
+                    logger.info("Saved better model selected by validation.")

                valid_results = validator.show_matrices()
                print("[epoch {}] {}".format(epoch, valid_results))
                logger.info("[epoch {}] {}".format(epoch, valid_results))

    def _train_step(self, data_iterator, network, **kwargs):
-        """Training process in one epoch."""
+        """Training process in one epoch.
+            kwargs should contain:
+                - n_print: int, print training information every n steps.
+                - start: time.time(), the starting time of this step.
+                - epoch: int,
+        """
        step = 0
        for batch_x, batch_y in self.make_batch(data_iterator):

@ -132,7 +164,7 @@ class BaseTrainer(object):
            self.grad_backward(loss)
            self.update()

-            if step % kwargs["n_print"] == 0:
+            if kwargs["n_print"] > 0 and step % kwargs["n_print"] == 0:
                end = time.time()
                diff = timedelta(seconds=round(end - kwargs["start"]))
                print_output = "[epoch: {:>3} step: {:>4}] train loss: {:>4.2} time: {}".format(
@ -153,6 +185,11 @@ class BaseTrainer(object):
            logger.error("the number of folds in train and dev data unequals {}!={}".format(len(train_data_cv),
                                                                                            len(dev_data_cv)))
            raise RuntimeError("the number of folds in train and dev data unequals")
+        if self.validate is False:
+            logger.warn("Cross validation requires self.validate to be True. Please turn it on. ")
+            print("[warning] Cross validation requires self.validate to be True. Please turn it on. ")
+            self.validate = True
+
        n_fold = len(train_data_cv)
        logger.info("perform {} folds cross validation.".format(n_fold))
        for i in range(n_fold):
@ -186,7 +223,7 @@ class BaseTrainer(object):
        """
        Define framework-specific optimizer specified by the models.
        """
-        raise NotImplementedError
+        self._optimizer = self._optimizer_proto.construct_from_pytorch(self._model.parameters())

    def update(self):
        """
@ -194,7 +231,7 @@ class BaseTrainer(object):

        For PyTorch, just call optimizer to update.
        """
-        raise NotImplementedError
+        self._optimizer.step()

    def data_forward(self, network, x):
        raise NotImplementedError
@ -206,7 +243,8 @@ class BaseTrainer(object):

        For PyTorch, just do "loss.backward()"
        """
-        raise NotImplementedError
+        self._model.zero_grad()
+        loss.backward()

    def get_loss(self, predict, truth):
        """
@ -215,21 +253,25 @@ class BaseTrainer(object):
        :param truth: ground truth label vector
        :return: a scalar
        """
-        if self.loss_func is None:
-            if hasattr(self.model, "loss"):
-                self.loss_func = self.model.loss
-                logger.info("The model has a loss function, use it.")
-            else:
-                logger.info("The model didn't define loss, use Trainer's loss.")
-                self.define_loss()
-        return self.loss_func(predict, truth)
+        return self._loss_func(predict, truth)

    def define_loss(self):
        """
-            Assign an instance of loss function to self.loss_func
-            E.g. self.loss_func = nn.CrossEntropyLoss()
+        if the model defines a loss, use model's loss.
+        Otherwise, Trainer must has a loss argument, use it as loss.
+        These two losses cannot be defined at the same time.
+        Trainer does not handle loss definition or choose default losses.
        """
-        raise NotImplementedError
+        if hasattr(self._model, "loss") and self._loss_func is not None:
+            raise ValueError("Both the model and Trainer define loss. Please take out your loss.")
+
+        if hasattr(self._model, "loss"):
+            self._loss_func = self._model.loss
+            logger.info("The model has a loss function, use it.")
+        else:
+            if self._loss_func is None:
+                raise ValueError("Please specify a loss function.")
+            logger.info("The model didn't define loss, use Trainer's loss.")

    def best_eval_result(self, validator):
        """
@ -238,71 +280,35 @@ class BaseTrainer(object):
        """
        raise NotImplementedError

-    def save_model(self, network):
-        """
+    def save_model(self, network, model_name):
+        """Save this model with such a name.
+        This method may be called multiple times by Trainer to overwritten a better model.
+
        :param network: the PyTorch model
-        model_best_dev.pkl may be overwritten by a better model in future epochs.
+        :param model_name: str
        """
-        ModelSaver(self.model_saved_path + "model_best_dev.pkl").save_pytorch(network)
+        if model_name[-4:] != ".pkl":
+            model_name += ".pkl"
+        ModelSaver(self.pickle_path + model_name).save_pytorch(network)

    def _create_validator(self, valid_args):
        raise NotImplementedError


-class ToyTrainer(BaseTrainer):
-    """
-        An example to show the definition of Trainer.
-    """
-
-    def __init__(self, training_args):
-        super(ToyTrainer, self).__init__(training_args)
-
-    def load_train_data(self, data_path):
-        data_train = _pickle.load(open(data_path + "/data_train.pkl", "rb"))
-        data_dev = _pickle.load(open(data_path + "/data_train.pkl", "rb"))
-        return data_train, data_dev, 0, 1
-
-    def data_forward(self, network, x):
-        return network(x)
-
-    def grad_backward(self, loss):
-        self.model.zero_grad()
-        loss.backward()
-
-    def get_loss(self, pred, truth):
-        return np.mean(np.square(pred - truth))
-
-    def define_optimizer(self):
-        self.optimizer = torch.optim.SGD(self.model.parameters(), lr=0.01)
-
-    def update(self):
-        self.optimizer.step()
-
-
 class SeqLabelTrainer(BaseTrainer):
    """
-    Trainer for Sequence Modeling
+    Trainer for Sequence Labeling

    """

-    def __init__(self, train_args):
-        super(SeqLabelTrainer, self).__init__(train_args)
-        self.vocab_size = train_args["vocab_size"]
-        self.num_classes = train_args["num_classes"]
+    def __init__(self, **kwargs):
+        super(SeqLabelTrainer, self).__init__(**kwargs)
+        # self.vocab_size = kwargs["vocab_size"]
+        # self.num_classes = kwargs["num_classes"]
        self.max_len = None
        self.mask = None
        self.best_accuracy = 0.0

-    def define_optimizer(self):
-        self.optimizer = torch.optim.SGD(self.model.parameters(), lr=0.01, momentum=0.9)
-
-    def grad_backward(self, loss):
-        self.model.zero_grad()
-        loss.backward()
-
-    def update(self):
-        self.optimizer.step()
-
    def data_forward(self, network, inputs):
        if not isinstance(inputs, tuple):
            raise RuntimeError("output_length must be true for sequence modeling. Receive {}".format(type(inputs[0])))
@ -330,7 +336,7 @@ class SeqLabelTrainer(BaseTrainer):
        batch_size, max_len = predict.size(0), predict.size(1)
        assert truth.shape == (batch_size, max_len)

-        loss = self.model.loss(predict, truth, self.mask)
+        loss = self._model.loss(predict, truth, self.mask)
        return loss

    def best_eval_result(self, validator):
@ -345,48 +351,25 @@ class SeqLabelTrainer(BaseTrainer):
        return Action.make_batch(iterator, output_length=True, use_cuda=self.use_cuda)

    def _create_validator(self, valid_args):
-        return SeqLabelTester(valid_args)
+        return SeqLabelTester(**valid_args)


 class ClassificationTrainer(BaseTrainer):
-    """Trainer for classification."""
+    """Trainer for text classification."""

-    def __init__(self, train_args):
-        super(ClassificationTrainer, self).__init__(train_args)
-        self.learn_rate = train_args["learn_rate"]
-        self.momentum = train_args["momentum"]
+    def __init__(self, **train_args):
+        super(ClassificationTrainer, self).__init__(**train_args)

        self.iterator = None
        self.loss_func = None
        self.optimizer = None
        self.best_accuracy = 0

-    def define_loss(self):
-        self.loss_func = nn.CrossEntropyLoss()
-
-    def define_optimizer(self):
-        """
-        Define framework-specific optimizer specified by the models.
-        """
-        self.optimizer = torch.optim.SGD(
-            self.model.parameters(),
-            lr=self.learn_rate,
-            momentum=self.momentum)
-
    def data_forward(self, network, x):
        """Forward through network."""
        logits = network(x)
        return logits

-    def grad_backward(self, loss):
-        """Compute gradient backward."""
-        self.model.zero_grad()
-        loss.backward()
-
-    def update(self):
-        """Apply gradient."""
-        self.optimizer.step()
-
    def make_batch(self, iterator):
        return Action.make_batch(iterator, output_length=False, use_cuda=self.use_cuda)

@ -404,4 +387,4 @@ class ClassificationTrainer(BaseTrainer):
            return False

    def _create_validator(self, valid_args):
-        return ClassificationTester(valid_args)
+        return ClassificationTester(**valid_args)
--- a/fastNLP/fastnlp.py
+++ b/fastNLP/fastnlp.py
@ -1,4 +1,5 @@
 from fastNLP.core.predictor import SeqLabelInfer, ClassificationInfer
+from fastNLP.core.preprocess import load_pickle
 from fastNLP.loader.config_loader import ConfigLoader, ConfigSection
 from fastNLP.loader.model_loader import ModelLoader

@ -7,14 +8,13 @@ mapping from model name to [URL, file_name.class_name, model_pickle_name]
 Notice that the class of the model should be in "models" directory.

 Example:
-    "zh_pos_tag_model": ["www.fudan.edu.cn", "sequence_modeling.SeqLabeling", "saved_model.pkl"]
-"""
-FastNLP_MODEL_COLLECTION = {
    "seq_label_model": {
        "url": "www.fudan.edu.cn",
-        "class": "sequence_modeling.SeqLabeling",
+        "class": "sequence_modeling.SeqLabeling", # file_name.class_name in models/
        "pickle": "seq_label_model.pkl",
-        "type": "seq_label"
+        "type": "seq_label",
+        "config_file_name": "config",   # the name of the config file which stores model initialization parameters
+        "config_section_name": "text_class_model" # the name of the section in the config file which stores model init params
    },
    "text_class_model": {
        "url": "www.fudan.edu.cn",
@ -22,11 +22,18 @@ FastNLP_MODEL_COLLECTION = {
        "pickle": "text_class_model.pkl",
        "type": "text_class"
    }
+"""
+FastNLP_MODEL_COLLECTION = {
+    "cws_basic_model": {
+        "url": "",
+        "class": "sequence_modeling.AdvSeqLabel",
+        "pickle": "cws_basic_model_v_0.pkl",
+        "type": "seq_label",
+        "config_file_name": "config",
+        "config_section_name": "text_class_model"
+    }
 }

-CONFIG_FILE_NAME = "config"
-SECTION_NAME = "text_class_model"
-

 class FastNLP(object):
    """
@ -51,10 +58,13 @@ class FastNLP(object):
        self.model = None
        self.infer_type = None  # "seq_label"/"text_class"

-    def load(self, model_name):
+    def load(self, model_name, config_file="config", section_name="model"):
        """
        Load a pre-trained FastNLP model together with additional data.
        :param model_name: str, the name of a FastNLP model.
+        :param config_file: str, the name of the config file which stores the initialization information of the model.
+                (default: "config")
+        :param section_name: str, the name of the corresponding section in the config file. (default: model)
        """
        assert type(model_name) is str
        if model_name not in FastNLP_MODEL_COLLECTION:
@ -64,37 +74,47 @@ class FastNLP(object):
            self._download(model_name, FastNLP_MODEL_COLLECTION[model_name]["url"])

        model_class = self._get_model_class(FastNLP_MODEL_COLLECTION[model_name]["class"])
+        print("Restore model class {}".format(str(model_class)))

        model_args = ConfigSection()
-        ConfigLoader.load_config(self.model_dir + CONFIG_FILE_NAME, {SECTION_NAME: model_args})
+        ConfigLoader.load_config(self.model_dir + config_file, {section_name: model_args})
+        print("Restore model hyper-parameters {}".format(str(model_args.data)))
+
+        # fetch dictionary size and number of labels from pickle files
+        word2index = load_pickle(self.model_dir, "word2id.pkl")
+        model_args["vocab_size"] = len(word2index)
+        index2label = load_pickle(self.model_dir, "id2class.pkl")
+        model_args["num_classes"] = len(index2label)

        # Construct the model
        model = model_class(model_args)
+        print("Model constructed.")

        # To do: framework independent
        ModelLoader.load_pytorch(model, self.model_dir + FastNLP_MODEL_COLLECTION[model_name]["pickle"])
+        print("Model weights loaded.")

        self.model = model
        self.infer_type = FastNLP_MODEL_COLLECTION[model_name]["type"]

-        print("Model loaded. ")
+        print("Inference ready.")

    def run(self, raw_input):
        """
        Perform inference over given input using the loaded model.
-        :param raw_input: str, raw text
+        :param raw_input: list of string. Each list is an input query.
        :return results:
        """

        infer = self._create_inference(self.model_dir)

-        # string ---> 2-D list of string
-        infer_input = self.string_to_list(raw_input)
+        # tokenize: list of string ---> 2-D list of string
+        infer_input = self.tokenize(raw_input, language="zh")

-        # 2-D list of string ---> list of strings
+        # 2-D list of string ---> 2-D list of tags
        results = infer.predict(self.model, infer_input)

-        # list of strings ---> final answers
+        # 2-D list of tags ---> list of final answers
        outputs = self._make_output(results, infer_input)
        return outputs

@ -142,81 +162,100 @@ class FastNLP(object):
        """
        return True

-    def string_to_list(self, text, delimiter="\n"):
-        """
-        This function is used to transform raw input to lists, which is done by DatasetLoader in training.
-        Split text string into three-level lists.
-        [
-            [word_11, word_12, ...],
-            [word_21, word_22, ...],
-            ...
-        ]
-        :param text: string
-        :param delimiter: str, character used to split text into sentences.
-        :return data: two-level lists
+    def tokenize(self, text, language):
+        """Extract tokens from strings.
+        For English, extract words separated by space.
+        For Chinese, extract characters.
+        TODO: more complex tokenization methods
+
+        :param text: list of string
+        :param language: str, one of ('zh', 'en'), Chinese or English.
+        :return data: list of list of string, each string is a token.
        """
+        assert language in ("zh", "en")
        data = []
-        sents = text.strip().split(delimiter)
-        for sent in sents:
-            characters = []
-            for ch in sent:
-                characters.append(ch)
-            data.append(characters)
+        for sent in text:
+            if language == "en":
+                tokens = sent.strip().split()
+            elif language == "zh":
+                tokens = [char for char in sent]
+            else:
+                raise RuntimeError("Unknown language {}".format(language))
+            data.append(tokens)
        return data

    def _make_output(self, results, infer_input):
+        """Transform the infer output into user-friendly output.
+
+        :param results: 1 or 2-D list of strings.
+                If self.infer_type == "seq_label", it is of shape [num_examples, tag_seq_length]
+                If self.infer_type == "text_class", it is of shape [num_examples]
+        :param infer_input: 2-D list of string, the input query before inference.
+        :return outputs: list. Each entry is a prediction.
+        """
        if self.infer_type == "seq_label":
            outputs = make_seq_label_output(results, infer_input)
        elif self.infer_type == "text_class":
            outputs = make_class_output(results, infer_input)
        else:
-            raise ValueError("fail to make outputs with infer type {}".format(self.infer_type))
+            raise RuntimeError("fail to make outputs with infer type {}".format(self.infer_type))
        return outputs


 def make_seq_label_output(result, infer_input):
-    """
-     Transform model output into user-friendly contents.
-    :param result: 1-D list of strings. (model output)
-    :param infer_input: 2-D list of string (model input)
-    :return outputs:
-    """
-    return result
+    """Transform model output into user-friendly contents.

+    :param result: 2-D list of strings. (model output)
+    :param infer_input: 2-D list of string (model input)
+    :return ret: list of list of tuples
+        [
+            [(word_11, label_11), (word_12, label_12), ...],
+            [(word_21, label_21), (word_22, label_22), ...],
+            ...
+        ]
+    """
+    ret = []
+    for example_x, example_y in zip(infer_input, result):
+        ret.append([(x, y) for x, y in zip(example_x, example_y)])
+    return ret

 def make_class_output(result, infer_input):
+    """Transform model output into user-friendly contents.
+
+    :param result: 2-D list of strings. (model output)
+    :param infer_input: 1-D list of string (model input)
+    :return ret: the same as result, [label_1, label_2, ...]
+    """
    return result


-def interpret_word_seg_results(infer_input, results):
-    """
-    Transform model output into user-friendly contents.
+def interpret_word_seg_results(char_seq, label_seq):
+    """Transform model output into user-friendly contents.
+
    Example: In CWS, convert <BMES> labeling into segmented text.
-    :param results: list of strings. (model output)
-    :param infer_input: 2-D list of string (model input)
-    :return output: list of strings
+    :param char_seq: list of string,
+    :param label_seq: list of string, the same length as char_seq
+            Each entry is one of ('B', 'M', 'E', 'S').
+    :return output: list of words
    """
-    outputs = []
-    for sent_char, sent_label in zip(infer_input, results):
-        words = []
-        word = ""
-        for char, label in zip(sent_char, sent_label):
-            if label[0] == "B":
-                if word != "":
-                    words.append(word)
-                word = char
-            elif label[0] == "M":
-                word += char
-            elif label[0] == "E":
-                word += char
+    words = []
+    word = ""
+    for char, label in zip(char_seq, label_seq):
+        if label[0] == "B":
+            if word != "":
                words.append(word)
-                word = ""
-            elif label[0] == "S":
-                if word != "":
-                    words.append(word)
-                word = ""
-                words.append(char)
-            else:
-                raise ValueError("invalid label")
-        outputs.append(" ".join(words))
-    return outputs
+            word = char
+        elif label[0] == "M":
+            word += char
+        elif label[0] == "E":
+            word += char
+            words.append(word)
+            word = ""
+        elif label[0] == "S":
+            if word != "":
+                words.append(word)
+            word = ""
+            words.append(char)
+        else:
+            raise ValueError("invalid label {}".format(label[0]))
+    return words
--- a/fastNLP/loader/config_loader.py
+++ b/fastNLP/loader/config_loader.py
@ -94,6 +94,10 @@ class ConfigSection(object):
    def __contains__(self, item):
        return item in self.__dict__.keys()

+    @property
+    def data(self):
+        return self.__dict__
+

 if __name__ == "__main__":
    config = ConfigLoader('configLoader', 'there is no data')
--- a/fastNLP/models/char_language_model.py
+++ b/fastNLP/models/char_language_model.py
@ -142,6 +142,8 @@ class CharLM(BaseModel):
            "char_dict": char_dict,
            "reverse_word_dict": reverse_word_dict,
        }
+        if not os.path.exists("cache"):
+            os.mkdir("cache")
        torch.save(objects, "cache/prep.pt")
        print("Preprocess done.")

--- a/fastNLP/modules/decoder/MLP.py
+++ b/fastNLP/modules/decoder/MLP.py
@ -0,0 +1,56 @@
+import torch
+import torch.nn as nn
+
+class MLP(nn.Module):
+    def __init__(self, size_layer, num_class=2, activation='relu'):
+        """Multilayer Perceptrons as a decoder
+
+        Args:
+            size_layer: list of int, define the size of MLP layers
+            num_class: int, num of class in output, should be 2 or the last layer's size
+            activation: str or function, the activation function for hidden layers
+        """
+        super(MLP, self).__init__()
+        self.hiddens = nn.ModuleList()
+        self.output = None
+        for i in range(1, len(size_layer)):
+            if i + 1 == len(size_layer):
+                self.output = nn.Linear(size_layer[i-1], size_layer[i])
+            else:
+                self.hiddens.append(nn.Linear(size_layer[i-1], size_layer[i]))
+
+        if num_class == 2:
+            self.out_active = nn.LogSigmoid()
+        elif num_class == size_layer[-1]:
+            self.out_active = nn.LogSoftmax(dim=1)
+        else:
+            raise ValueError("should set output num_class correctly: {}".format(num_class))
+        
+        actives = {
+            'relu': nn.ReLU(),
+            'tanh': nn.Tanh()
+        }
+        if activation in actives:
+            self.hidden_active = actives[activation]
+        elif isinstance(activation, callable):
+            self.hidden_active = activation
+        else:
+            raise ValueError("should set activation correctly: {}".format(activation))
+
+    def forward(self, x):
+        for layer in self.hiddens:
+            x = self.hidden_active(layer(x))
+        x = self.out_active(self.output(x))
+        return x
+
+
+
+if __name__ == '__main__':
+    net1 = MLP([5,10,5])
+    net2 = MLP([5,10,5], 5)
+    for net in [net1, net2]:
+        x = torch.randn(5, 5)
+        y = net(x)
+        print(x)
+        print(y)
+    
--- a/fastNLP/modules/encoder/embedding.py
+++ b/fastNLP/modules/encoder/embedding.py
@ -15,7 +15,7 @@ class Embedding(nn.Module):
    def __init__(self, nums, dims, padding_idx=0, sparse=False, init_emb=None, dropout=0.0):
        super(Embedding, self).__init__()
        self.embed = nn.Embedding(nums, dims, padding_idx, sparse=sparse)
-        if init_emb:
+        if init_emb is not None:
            self.embed.weight = nn.Parameter(init_emb)
        self.dropout = nn.Dropout(dropout)

--- a/fastNLP/modules/encoder/masked_rnn.py
+++ b/fastNLP/modules/encoder/masked_rnn.py
@ -273,7 +273,7 @@ class MaskedRNNBase(nn.Module):
                hx = (hx, hx)

        func = AutogradMaskedStep(num_layers=self.num_layers,
-                                  dropout=self.dropout,
+                                  dropout=self.step_dropout,
                                  train=self.training,
                                  lstm=lstm)

--- a/fastnlp-architecture.jpg
+++ b/fastnlp-architecture.jpg
--- a/reproduction/CNN-sentence_classification/rt-polaritydata/rt-polarity.neg
+++ b/reproduction/CNN-sentence_classification/rt-polaritydata/rt-polarity.neg
--- a/reproduction/CNN-sentence_classification/rt-polaritydata/rt-polarity.pos
+++ b/reproduction/CNN-sentence_classification/rt-polaritydata/rt-polarity.pos
--- a/reproduction/HAN-document_classification/data/test_samples.pkl
+++ b/reproduction/HAN-document_classification/data/test_samples.pkl
--- a/reproduction/HAN-document_classification/data/train_samples.pkl
+++ b/reproduction/HAN-document_classification/data/train_samples.pkl
--- a/reproduction/HAN-document_classification/data/yelp.word2vec
+++ b/reproduction/HAN-document_classification/data/yelp.word2vec
--- a/reproduction/LSTM+self_attention_sentiment_analysis/example.py
+++ b/reproduction/LSTM+self_attention_sentiment_analysis/example.py
@ -18,7 +18,6 @@ MLP_HIDDEN = 2000
 CLASSES_NUM = 5

 from fastNLP.models.base_model import BaseModel
-from fastNLP.core.trainer import BaseTrainer


 class MyNet(BaseModel):
@ -60,18 +59,6 @@ class Net(nn.Module):
        return x, penalty


-class MyTrainer(BaseTrainer):
-    def __init__(self, args):
-        super(MyTrainer, self).__init__(args)
-        self.optimizer = None
-
-    def define_optimizer(self):
-        self.optimizer = optim.SGD(self.model.parameters(), lr=0.01, momentum=0.9)
-
-    def define_loss(self):
-        self.loss_func = nn.CrossEntropyLoss()
-
-
 def train(model_dict=None, using_cuda=True, learning_rate=0.06,\
    momentum=0.3, batch_size=32, epochs=5, coef=1.0, interval=10):
    """
--- a/reproduction/chinese_word_segment/run.py
+++ b/reproduction/chinese_word_segment/run.py
@ -1,26 +1,26 @@
-import sys, os
+import os
+import sys

 sys.path.append(os.path.join(os.path.dirname(__file__), '../..'))

 from fastNLP.loader.config_loader import ConfigLoader, ConfigSection
 from fastNLP.core.trainer import SeqLabelTrainer
 from fastNLP.loader.dataset_loader import TokenizeDatasetLoader, BaseLoader
-from fastNLP.loader.preprocess import POSPreprocess, load_pickle
+from fastNLP.core.preprocess import SeqLabelPreprocess, load_pickle
 from fastNLP.saver.model_saver import ModelSaver
 from fastNLP.loader.model_loader import ModelLoader
 from fastNLP.core.tester import SeqLabelTester
 from fastNLP.models.sequence_modeling import AdvSeqLabel
-from fastNLP.core.inference import SeqLabelInfer
-from fastNLP.core.optimizer import SGD
+from fastNLP.core.predictor import SeqLabelInfer

 # not in the file's dir
 if len(os.path.dirname(__file__)) != 0:
    os.chdir(os.path.dirname(__file__))
-datadir = 'icwb2-data'
-cfgfile = 'cws.cfg'
+datadir = "/home/zyfeng/data/"
+cfgfile = './cws.cfg'
 data_name = "pku_training.utf8"

-cws_data_path = os.path.join(datadir, "training/pku_training.utf8")
+cws_data_path = os.path.join(datadir, "pku_training.utf8")
 pickle_path = "save"
 data_infer_path = os.path.join(datadir, "infer.utf8")

@ -70,12 +70,13 @@ def train():
    train_data = loader.load_pku()

    # Preprocessor
-    p = POSPreprocess(train_data, pickle_path, train_dev_split=0.3)
-    train_args["vocab_size"] = p.vocab_size
-    train_args["num_classes"] = p.num_classes
+    preprocessor = SeqLabelPreprocess()
+    data_train, data_dev = preprocessor.run(train_data, pickle_path=pickle_path, train_dev_split=0.3)
+    train_args["vocab_size"] = preprocessor.vocab_size
+    train_args["num_classes"] = preprocessor.num_classes

    # Trainer
-    trainer = SeqLabelTrainer(train_args)
+    trainer = SeqLabelTrainer(**train_args.data)

    # Model
    model = AdvSeqLabel(train_args)
@ -83,10 +84,11 @@ def train():
        ModelLoader.load_pytorch(model, "./save/saved_model.pkl")
        print('model parameter loaded!')
    except Exception as e:
+        print("No saved model. Continue.")
        pass
        
    # Start training
-    trainer.train(model)
+    trainer.train(model, data_train, data_dev)
    print("Training finished!")

    # Saver
@ -106,6 +108,9 @@ def test():
    index2label = load_pickle(pickle_path, "id2class.pkl")
    test_args["num_classes"] = len(index2label)

+    # load dev data
+    dev_data = load_pickle(pickle_path, "data_dev.pkl")
+
    # Define the same model
    model = AdvSeqLabel(test_args)

@ -114,10 +119,10 @@ def test():
    print("model loaded!")

    # Tester
-    tester = SeqLabelTester(test_args)
+    tester = SeqLabelTester(**test_args.data)

    # Start testing
-    tester.test(model)
+    tester.test(model, dev_data)

    # print test results
    print(tester.show_matrices())
--- a/test/core/test_action.py
+++ b/test/core/test_action.py
@ -0,0 +1,18 @@
+import os
+
+import unittest
+
+from fastNLP.core.action import Action, Batchifier, SequentialSampler
+
+class TestAction(unittest.TestCase):
+    def test_case_1(self):
+        x = [1, 2, 3, 4, 5, 6, 7, 8]
+        y = [1, 1, 1, 1, 2, 2, 2, 2]
+        data = []
+        for i in range(len(x)):
+            data.append([[x[i]], [y[i]]])
+        data = Batchifier(SequentialSampler(data), batch_size=2, drop_last=False)
+        action = Action()
+        for batch_x in action.make_batch(data, use_cuda=False, output_length=True, max_len=None):
+            print(batch_x)
+
--- a/test/core/test_preprocess.py
+++ b/test/core/test_preprocess.py
@ -0,0 +1,43 @@
+import os
+import unittest
+
+from fastNLP.core.preprocess import SeqLabelPreprocess
+
+
+class TestSeqLabelPreprocess(unittest.TestCase):
+    def test_case_1(self):
+        data = [
+            [['Tom', 'and', 'Jerry', '.'], ['n', '&', 'n', '.']],
+            [['Hello', 'world', '!'], ['a', 'n', '.']],
+            [['Tom', 'and', 'Jerry', '.'], ['n', '&', 'n', '.']],
+            [['Hello', 'world', '!'], ['a', 'n', '.']],
+            [['Tom', 'and', 'Jerry', '.'], ['n', '&', 'n', '.']],
+            [['Hello', 'world', '!'], ['a', 'n', '.']],
+            [['Tom', 'and', 'Jerry', '.'], ['n', '&', 'n', '.']],
+            [['Hello', 'world', '!'], ['a', 'n', '.']],
+            [['Tom', 'and', 'Jerry', '.'], ['n', '&', 'n', '.']],
+            [['Hello', 'world', '!'], ['a', 'n', '.']],
+        ]
+
+        if os.path.exists("./save"):
+            for root, dirs, files in os.walk("./save", topdown=False):
+                for name in files:
+                    os.remove(os.path.join(root, name))
+                for name in dirs:
+                    os.rmdir(os.path.join(root, name))
+        result = SeqLabelPreprocess().run(train_dev_data=data, train_dev_split=0.4,
+                                          pickle_path="./save")
+        result = SeqLabelPreprocess().run(train_dev_data=data, train_dev_split=0.4,
+                                          pickle_path="./save")
+        if os.path.exists("./save"):
+            for root, dirs, files in os.walk("./save", topdown=False):
+                for name in files:
+                    os.remove(os.path.join(root, name))
+                for name in dirs:
+                    os.rmdir(os.path.join(root, name))
+        result = SeqLabelPreprocess().run(test_data=data, train_dev_data=data,
+                                                           pickle_path="./save", train_dev_split=0.4,
+                                                           cross_val=True)
+        result = SeqLabelPreprocess().run(test_data=data, train_dev_data=data,
+                                          pickle_path="./save", train_dev_split=0.4,
+                                          cross_val=True)
--- a/test/core/test_trainer.py
+++ b/test/core/test_trainer.py
@ -0,0 +1,33 @@
+import os
+
+import torch.nn as nn
+import unittest
+
+from fastNLP.core.trainer import SeqLabelTrainer
+from fastNLP.core.loss import Loss
+from fastNLP.core.optimizer import Optimizer
+from fastNLP.models.sequence_modeling import SeqLabeling
+
+class TestTrainer(unittest.TestCase):
+    def test_case_1(self):
+        args = {"epochs": 3, "batch_size": 8, "validate": True, "use_cuda": True, "pickle_path": "./save/",
+                "save_best_dev": True, "model_name": "default_model_name.pkl",
+                "loss": Loss(None),
+                "optimizer": Optimizer("Adam", lr=0.001, weight_decay=0),
+                "vocab_size": 20,
+                "word_emb_dim": 100,
+                "rnn_hidden_units": 100,
+                "num_classes": 3
+                }
+        trainer = SeqLabelTrainer()
+        train_data = [
+            [[1, 2, 3, 4, 5, 6], [1, 0, 1, 0, 1, 2]],
+            [[2, 3, 4, 5, 1, 6], [0, 1, 0, 1, 0, 2]],
+            [[1, 4, 1, 4, 1, 6], [1, 0, 1, 0, 1, 2]],
+            [[1, 2, 3, 4, 5, 6], [1, 0, 1, 0, 1, 2]],
+            [[2, 3, 4, 5, 1, 6], [0, 1, 0, 1, 0, 2]],
+            [[1, 4, 1, 4, 1, 6], [1, 0, 1, 0, 1, 2]],
+        ]
+        dev_data = train_data
+        model = SeqLabeling(args)
+        trainer.train(network=model, train_data=train_data, dev_data=dev_data)
--- a/test/data_for_tests/config
+++ b/test/data_for_tests/config
@ -1,65 +1,11 @@
-[General]
-revision = "first"
-datapath = "./data/smallset/imdb/"
-embed_path = "./data/smallset/imdb/embedding.txt"
-optimizer = "adam"
-attn_mode = "rout"
-seq_encoder = "bilstm"
-out_caps_num = 5
-rout_iter = 3
-max_snt_num = 40
-max_wd_num = 40
-max_epochs = 50
-pre_trained = true
-batch_sz = 32
-batch_sz_min = 32
-bucket_sz = 5000
-partial_update_until_epoch = 2
-embed_size = 300
-hidden_size = 200
-dense_hidden = [300, 10]
-lr = 0.0002
-decay_steps = 1000
-decay_rate = 0.9
-dropout = 0.2
-early_stopping = 7
-reg = 1e-06
-
-[My]
-datapath = "./data/smallset/imdb/"
-embed_path = "./data/smallset/imdb/embedding.txt"
-optimizer = "adam"
-attn_mode = "rout"
-seq_encoder = "bilstm"
-out_caps_num = 5
-rout_iter = 3
-max_snt_num = 40
-max_wd_num = 40
-max_epochs = 50
-pre_trained = true
-batch_sz = 32
-batch_sz_min = 32
-bucket_sz = 5000
-partial_update_until_epoch = 2
-embed_size = 300
-hidden_size = 200
-dense_hidden = [300, 10]
-lr = 0.0002
-decay_steps = 1000
-decay_rate = 0.9
-dropout = 0.2
-early_stopping = 70
-reg = 1e-05
-test = 5
-new_attr = 40
-
-[POS]
+[test_seq_label_trainer]
 epochs = 1
 batch_size = 32
-pickle_path = "./data_for_tests/"
 validate = true
 save_best_dev = true
-model_saved_path = "./"
+use_cuda = true
+
+[test_seq_label_model]
 rnn_hidden_units = 100
 rnn_layers = 1
 rnn_bi_direction = true
@ -68,13 +14,12 @@ dropout = 0.5
 use_crf = true
 use_cuda = true

-[POS_test]
+[test_seq_label_tester]
 save_output = true
 validate_in_training = true
 save_dev_input = false
 save_loss = true
 batch_size = 1
-pickle_path = "./data_for_tests/"
 rnn_hidden_units = 100
 rnn_layers = 1
 rnn_bi_direction = true
@ -84,7 +29,6 @@ use_crf = true
 use_cuda = true

 [POS_infer]
-pickle_path = "./data_for_tests/"
 rnn_hidden_units = 100
 rnn_layers = 1
 rnn_bi_direction = true
@ -95,14 +39,9 @@ num_classes = 27
 [text_class]
 epochs = 1
 batch_size = 10
-pickle_path = "./save_path/"
 validate = false
 save_best_dev = false
-model_saved_path = "./save_path/"
 use_cuda = true
 learn_rate = 1e-3
 momentum = 0.9
-
-[text_class_model]
-vocab_size = 867
-num_classes = 18
+model_name = "class_model.pkl"
--- a/test/loader/config
+++ b/test/loader/config
@ -0,0 +1,7 @@
+[test]
+x = 1
+y = 2
+z = 3
+input = [1,2,3]
+text = "this is text"
+doubles = 0.5
--- a/test/loader/test_loader.py
+++ b/test/loader/test_loader.py
@ -0,0 +1,75 @@
+import os
+import configparser
+
+import json
+import unittest
+
+
+from fastNLP.loader.config_loader import ConfigSection, ConfigLoader
+from fastNLP.loader.dataset_loader import TokenizeDatasetLoader, POSDatasetLoader, LMDatasetLoader
+
+class TestConfigLoader(unittest.TestCase):
+    def test_case_ConfigLoader(self):
+
+        def read_section_from_config(config_path, section_name):
+            dict = {}
+            if not os.path.exists(config_path):
+                raise FileNotFoundError("config file {} NOT found.".format(config_path))
+            cfg = configparser.ConfigParser()
+            cfg.read(config_path)
+            if section_name not in cfg:
+                raise AttributeError("config file {} do NOT have section {}".format(
+                    config_path, section_name
+                ))
+            gen_sec = cfg[section_name]
+            for s in gen_sec.keys():
+                try:
+                    val = json.loads(gen_sec[s])
+                    dict[s] = val
+                except Exception as e:
+                    raise AttributeError("json can NOT load {} in section {}, config file {}".format(
+                        s, section_name, config_path
+                    ))
+            return dict
+
+        test_arg = ConfigSection()
+        ConfigLoader("config", "").load_config(os.path.join("./test/loader", "config"), {"test": test_arg})
+        #ConfigLoader("config", "").load_config("/home/ygxu/github/fastNLP_testing/fastNLP/test/loader/config",
+        #                                       {"test": test_arg})
+
+        #dict = read_section_from_config("/home/ygxu/github/fastNLP_testing/fastNLP/test/loader/config", "test")
+        dict = read_section_from_config(os.path.join("./test/loader", "config"), "test")
+
+        for sec in dict:
+            if (sec not in test_arg) or (dict[sec] != test_arg[sec]):
+                raise AttributeError("ERROR")
+
+        for sec in test_arg.__dict__.keys():
+            if (sec not in dict) or (dict[sec] != test_arg[sec]):
+                raise AttributeError("ERROR")
+
+        try:
+            not_exist = test_arg["NOT EXIST"]
+        except Exception as e:
+            pass
+
+        print("pass config test!")
+
+
+class TestDatasetLoader(unittest.TestCase):
+    def test_case_TokenizeDatasetLoader(self):
+        loader = TokenizeDatasetLoader("cws_pku_utf_8", "./test/data_for_tests/cws_pku_utf_8")
+        data = loader.load_pku(max_seq_len=32)
+        print("pass TokenizeDatasetLoader test!")
+
+    def test_case_POSDatasetLoader(self):
+        loader = POSDatasetLoader("people", "./test/data_for_tests/people.txt")
+        data = loader.load()
+        datas = loader.load_lines()
+        print("pass POSDatasetLoader test!")
+
+    def test_case_LMDatasetLoader(self):
+        loader = LMDatasetLoader("cws_pku_utf_8", "./test/data_for_tests/cws_pku_utf_8")
+        data = loader.load()
+        datas = loader.load_lines()
+        print("pass TokenizeDatasetLoader test!")
--- a/test/modules/test_masked_rnn.py
+++ b/test/modules/test_masked_rnn.py
@ -0,0 +1,27 @@
+
+import torch
+import unittest
+
+from fastNLP.modules.encoder.masked_rnn import MaskedRNN
+
+class TestMaskedRnn(unittest.TestCase):
+    def test_case_1(self):
+        masked_rnn = MaskedRNN(input_size=1, hidden_size=1, bidirectional=True, batch_first=True)
+        x = torch.tensor([[[1.0], [2.0]]])
+        print(x.size())
+        y = masked_rnn(x)
+        mask = torch.tensor([[[1], [1]]])
+        y = masked_rnn(x, mask=mask)
+        mask = torch.tensor([[[1], [0]]])
+        y = masked_rnn(x, mask=mask)
+
+    def test_case_2(self):
+        masked_rnn = MaskedRNN(input_size=1, hidden_size=1, bidirectional=False, batch_first=True)
+        x = torch.tensor([[[1.0], [2.0]]])
+        print(x.size())
+        y = masked_rnn(x)
+        mask = torch.tensor([[[1], [1]]])
+        y = masked_rnn(x, mask=mask)
+        xx = torch.tensor([[[1.0]]])
+        y = masked_rnn.step(xx)
+        y = masked_rnn.step(xx, mask=mask)
--- a/test/modules/test_other_modules.py
+++ b/test/modules/test_other_modules.py
@ -0,0 +1,30 @@
+
+
+import torch
+import unittest
+
+from fastNLP.modules.other_modules import GroupNorm, LayerNormalization, BiLinear
+
+
+class TestGroupNorm(unittest.TestCase):
+    def test_case_1(self):
+        gn = GroupNorm(num_features=1, num_groups=10, eps=1.5e-5)
+        x = torch.randn((20, 50, 10))
+        y = gn(x)
+
+
+class TestLayerNormalization(unittest.TestCase):
+    def test_case_1(self):
+        ln = LayerNormalization(d_hid=5, eps=2e-3)
+        x = torch.randn((20, 50, 5))
+        y = ln(x)
+
+
+class TestBiLinear(unittest.TestCase):
+    def test_case_1(self):
+        bl = BiLinear(n_left=5, n_right=5, n_out=10, bias=True)
+        x_left = torch.randn((7, 10, 20, 5))
+        x_right = torch.randn((7, 10, 20, 5))
+        y = bl(x_left, x_right)
+        print(bl)
+        bl2 = BiLinear(n_left=15, n_right=15, n_out=10, bias=True)
--- a/test/modules/test_utils.py
+++ b/test/modules/test_utils.py
@ -0,0 +1,18 @@
+
+import torch
+import numpy as np
+import unittest
+
+import fastNLP.modules.utils as utils
+
+class TestUtils(unittest.TestCase):
+    def test_case_1(self):
+        a = torch.tensor([
+            [1, 2, 3, 4, 5], [2, 3, 4, 5, 6]
+        ])
+        utils.orthogonal(a)
+
+    def test_case_2(self):
+        a = np.random.rand(100, 100)
+        utils.mst(a)
+
--- a/test/modules/test_variational_rnn.py
+++ b/test/modules/test_variational_rnn.py
@ -0,0 +1,28 @@
+
+import torch
+import unittest
+
+from fastNLP.modules.encoder.variational_rnn import VarMaskedFastLSTM
+
+class TestMaskedRnn(unittest.TestCase):
+    def test_case_1(self):
+        masked_rnn = VarMaskedFastLSTM(input_size=1, hidden_size=1, bidirectional=True, batch_first=True)
+        x = torch.tensor([[[1.0], [2.0]]])
+        print(x.size())
+        y = masked_rnn(x)
+        mask = torch.tensor([[[1], [1]]])
+        y = masked_rnn(x, mask=mask)
+        mask = torch.tensor([[[1], [0]]])
+        y = masked_rnn(x, mask=mask)
+
+    def test_case_2(self):
+        masked_rnn = VarMaskedFastLSTM(input_size=1, hidden_size=1, bidirectional=False, batch_first=True)
+        x = torch.tensor([[[1.0], [2.0]]])
+        print(x.size())
+        y = masked_rnn(x)
+        mask = torch.tensor([[[1], [1]]])
+        y = masked_rnn(x, mask=mask)
+        xx = torch.tensor([[[1.0]]])
+        #y, hidden = masked_rnn.step(xx)
+        #step() still has a bug
+        #y, hidden = masked_rnn.step(xx, mask=mask)
--- a/test/ner.py
+++ b/test/ner.py
@ -20,7 +20,7 @@ class MyNERTrainer(SeqLabelTrainer):
        override
        :return:
        """
-        self.optimizer = torch.optim.Adam(self.model.parameters(), lr=0.001)
+        self.optimizer = torch.optim.Adam(self._model.parameters(), lr=0.001)
        self.scheduler = torch.optim.lr_scheduler.StepLR(self.optimizer, step_size=3000, gamma=0.5)

    def update(self):
--- a/test/readme_example.py
+++ b/test/readme_example.py
@ -13,6 +13,7 @@ from fastNLP.loader.dataset_loader import ClassDatasetLoader
 from fastNLP.models.base_model import BaseModel
 from fastNLP.modules import aggregation
 from fastNLP.modules import encoder
+from fastNLP.modules import decoder


 class ClassificationModel(BaseModel):
@ -20,20 +21,20 @@ class ClassificationModel(BaseModel):
    Simple text classification model based on CNN.
    """

-    def __init__(self, class_num, vocab_size):
+    def __init__(self, num_classes, vocab_size):
        super(ClassificationModel, self).__init__()

-        self.embed = encoder.Embedding(nums=vocab_size, dims=300)
-        self.conv = encoder.Conv(
+        self.emb = encoder.Embedding(nums=vocab_size, dims=300)
+        self.enc = encoder.Conv(
            in_channels=300, out_channels=100, kernel_size=3)
-        self.pool = aggregation.MaxPool()
-        self.output = encoder.Linear(input_size=100, output_size=class_num)
+        self.agg = aggregation.MaxPool()
+        self.dec = decoder.MLP(100, num_classes=num_classes)

    def forward(self, x):
-        x = self.embed(x)  # [N,L] -> [N,L,C]
-        x = self.conv(x)  # [N,L,C_in] -> [N,L,C_out]
-        x = self.pool(x)  # [N,L,C] -> [N,C]
-        x = self.output(x)  # [N,C] -> [N, N_class]
+        x = self.emb(x)  # [N,L] -> [N,L,C]
+        x = self.enc(x)  # [N,L,C_in] -> [N,L,C_out]
+        x = self.agg(x)  # [N,L,C] -> [N,C]
+        x = self.dec(x)  # [N,C] -> [N, N_class]
        return x


@ -55,7 +56,7 @@ model_args = {
    'num_classes': n_classes,
    'vocab_size': vocab_size
 }
-model = ClassificationModel(class_num=n_classes, vocab_size=vocab_size)
+model = ClassificationModel(num_classes=n_classes, vocab_size=vocab_size)

 # train model
 train_args = {
--- a/test/seq_labeling.py
+++ b/test/seq_labeling.py
@ -1,7 +1,7 @@
+import os
 import sys
-
 sys.path.append("..")
-
+import argparse
 from fastNLP.loader.config_loader import ConfigLoader, ConfigSection
 from fastNLP.core.trainer import SeqLabelTrainer
 from fastNLP.loader.dataset_loader import POSDatasetLoader, BaseLoader
@ -11,17 +11,29 @@ from fastNLP.loader.model_loader import ModelLoader
 from fastNLP.core.tester import SeqLabelTester
 from fastNLP.models.sequence_modeling import SeqLabeling
 from fastNLP.core.predictor import SeqLabelInfer
+from fastNLP.core.optimizer import Optimizer

-data_name = "people.txt"
-data_path = "data_for_tests/people.txt"
-pickle_path = "seq_label/"
-data_infer_path = "data_for_tests/people_infer.txt"
+parser = argparse.ArgumentParser()
+parser.add_argument("-s", "--save", type=str, default="./seq_label/", help="path to save pickle files")
+parser.add_argument("-t", "--train", type=str, default="./data_for_tests/people.txt",
+                    help="path to the training data")
+parser.add_argument("-c", "--config", type=str, default="./data_for_tests/config", help="path to the config file")
+parser.add_argument("-m", "--model_name", type=str, default="seq_label_model.pkl", help="the name of the model")
+parser.add_argument("-i", "--infer", type=str, default="data_for_tests/people_infer.txt",
+                    help="data used for inference")
+
+args = parser.parse_args()
+pickle_path = args.save
+model_name = args.model_name
+config_dir = args.config
+data_path = args.train
+data_infer_path = args.infer


 def infer():
    # Load infer configuration, the same as test
    test_args = ConfigSection()
-    ConfigLoader("config.cfg", "").load_config("./data_for_tests/config", {"POS_test": test_args})
+    ConfigLoader("config.cfg", "").load_config(config_dir, {"POS_infer": test_args})

    # fetch dictionary size and number of labels from pickle files
    word2index = load_pickle(pickle_path, "word2id.pkl")
@ -33,11 +45,11 @@ def infer():
    model = SeqLabeling(test_args)

    # Dump trained parameters into the model
-    ModelLoader.load_pytorch(model, pickle_path + "saved_model.pkl")
+    ModelLoader.load_pytorch(model, os.path.join(pickle_path, model_name))
    print("model loaded!")

    # Data Loader
-    raw_data_loader = BaseLoader(data_name, data_infer_path)
+    raw_data_loader = BaseLoader("xxx", data_infer_path)
    infer_data = raw_data_loader.load_lines()

    # Inference interface
@ -51,49 +63,72 @@ def infer():

 def train_and_test():
    # Config Loader
-    train_args = ConfigSection()
-    ConfigLoader("config.cfg", "").load_config("./data_for_tests/config", {"POS": train_args})
+    trainer_args = ConfigSection()
+    model_args = ConfigSection()
+    ConfigLoader("config.cfg", "").load_config(config_dir, {
+        "test_seq_label_trainer": trainer_args, "test_seq_label_model": model_args})

    # Data Loader
-    pos_loader = POSDatasetLoader(data_name, data_path)
+    pos_loader = POSDatasetLoader("xxx", data_path)
    train_data = pos_loader.load_lines()

    # Preprocessor
    p = SeqLabelPreprocess()
    data_train, data_dev = p.run(train_data, pickle_path=pickle_path, train_dev_split=0.5)
-    train_args["vocab_size"] = p.vocab_size
-    train_args["num_classes"] = p.num_classes
+    model_args["vocab_size"] = p.vocab_size
+    model_args["num_classes"] = p.num_classes

-    # Trainer
-    trainer = SeqLabelTrainer(train_args)
+    # Trainer: two definition styles
+    # 1
+    # trainer = SeqLabelTrainer(trainer_args.data)
+
+    # 2
+    trainer = SeqLabelTrainer(
+        epochs=trainer_args["epochs"],
+        batch_size=trainer_args["batch_size"],
+        validate=trainer_args["validate"],
+        use_cuda=trainer_args["use_cuda"],
+        pickle_path=pickle_path,
+        save_best_dev=trainer_args["save_best_dev"],
+        model_name=model_name,
+        optimizer=Optimizer("SGD", lr=0.01, momentum=0.9),
+    )

    # Model
-    model = SeqLabeling(train_args)
+    model = SeqLabeling(model_args)

    # Start training
    trainer.train(model, data_train, data_dev)
    print("Training finished!")

    # Saver
-    saver = ModelSaver(pickle_path + "saved_model.pkl")
+    saver = ModelSaver(os.path.join(pickle_path, model_name))
    saver.save_pytorch(model)
    print("Model saved!")

    del model, trainer, pos_loader

    # Define the same model
-    model = SeqLabeling(train_args)
+    model = SeqLabeling(model_args)

    # Dump trained parameters into the model
-    ModelLoader.load_pytorch(model, pickle_path + "saved_model.pkl")
+    ModelLoader.load_pytorch(model, os.path.join(pickle_path, model_name))
    print("model loaded!")

    # Load test configuration
-    test_args = ConfigSection()
-    ConfigLoader("config.cfg", "").load_config("./data_for_tests/config", {"POS_test": test_args})
+    tester_args = ConfigSection()
+    ConfigLoader("config.cfg", "").load_config(config_dir, {"test_seq_label_tester": tester_args})

    # Tester
-    tester = SeqLabelTester(test_args)
+    tester = SeqLabelTester(save_output=False,
+                            save_loss=False,
+                            save_best_dev=False,
+                            batch_size=4,
+                            use_cuda=False,
+                            pickle_path=pickle_path,
+                            model_name="seq_label_in_test.pkl",
+                            print_every_step=1
+                            )

    # Start testing with validation data
    tester.test(model, data_dev)
--- a/test/test_fastNLP.py
+++ b/test/test_fastNLP.py
@ -1,13 +1,24 @@
+import sys
+sys.path.append("..")
 from fastNLP.fastnlp import FastNLP
+from fastNLP.fastnlp import interpret_word_seg_results

+PATH_TO_CWS_PICKLE_FILES = "/home/zyfeng/fastNLP/reproduction/chinese_word_segment/save/"

 def word_seg():
-    nlp = FastNLP("./data_for_tests/")
-    nlp.load("seq_label_model")
-    text = "这是最好的基于深度学习的中文分词系统。"
-    result = nlp.run(text)
-    print(result)
-    print("FastNLP finished!")
+    nlp = FastNLP(model_dir=PATH_TO_CWS_PICKLE_FILES)
+    nlp.load("cws_basic_model", config_file="cws.cfg", section_name="POS_test")
+    text = ["这是最好的基于深度学习的中文分词系统。",
+            "大王叫我来巡山。",
+            "我党多年来致力于改善人民生活水平。"]
+    results = nlp.run(text)
+    print(results)
+    for example in results:
+        words, labels = [], []
+        for res in example:
+            words.append(res[0])
+            labels.append(res[1])
+        print(interpret_word_seg_results(words, labels))


 def text_class():
@ -19,5 +30,14 @@ def text_class():
    print("FastNLP finished!")


+def test_word_seg_interpret():
+    foo = [[('这', 'S'), ('是', 'S'), ('最', 'S'), ('好', 'S'), ('的', 'S'), ('基', 'B'), ('于', 'E'), ('深', 'B'), ('度', 'E'),
+            ('学', 'B'), ('习', 'E'), ('的', 'S'), ('中', 'B'), ('文', 'E'), ('分', 'B'), ('词', 'E'), ('系', 'B'), ('统', 'E'),
+            ('。', 'S')]]
+    chars = [x[0] for x in foo[0]]
+    labels = [x[1] for x in foo[0]]
+    print(interpret_word_seg_results(chars, labels))
+
+
 if __name__ == "__main__":
-    text_class()
+    word_seg()
--- a/test/text_classify.py
+++ b/test/text_classify.py
@ -1,6 +1,7 @@
 # Python: 3.5
 # encoding: utf-8

+import argparse
 import os
 import sys

@ -13,75 +14,105 @@ from fastNLP.loader.model_loader import ModelLoader
 from fastNLP.core.preprocess import ClassPreprocess
 from fastNLP.models.cnn_text_classification import CNNText
 from fastNLP.saver.model_saver import ModelSaver
+from fastNLP.core.optimizer import Optimizer
+from fastNLP.core.loss import Loss

-save_path = "./test_classification/"
-data_dir = "./data_for_tests/"
-train_file = 'text_classify.txt'
-model_name = "model_class.pkl"
+parser = argparse.ArgumentParser()
+parser.add_argument("-s", "--save", type=str, default="./test_classification/", help="path to save pickle files")
+parser.add_argument("-t", "--train", type=str, default="./data_for_tests/text_classify.txt",
+                    help="path to the training data")
+parser.add_argument("-c", "--config", type=str, default="./data_for_tests/config", help="path to the config file")
+parser.add_argument("-m", "--model_name", type=str, default="classify_model.pkl", help="the name of the model")
+
+args = parser.parse_args()
+save_dir = args.save
+train_data_dir = args.train
+model_name = args.model_name
+config_dir = args.config


 def infer():
    # load dataset
    print("Loading data...")
-    ds_loader = ClassDatasetLoader("train", os.path.join(data_dir, train_file))
+    ds_loader = ClassDatasetLoader("train", train_data_dir)
    data = ds_loader.load()
    unlabeled_data = [x[0] for x in data]

    # pre-process data
    pre = ClassPreprocess()
-    vocab_size, n_classes = pre.run(data, pickle_path=save_path)
-    print("vocabulary size:", vocab_size)
-    print("number of classes:", n_classes)
+    data = pre.run(data, pickle_path=save_dir)
+    print("vocabulary size:", pre.vocab_size)
+    print("number of classes:", pre.num_classes)

    model_args = ConfigSection()
-    ConfigLoader.load_config("data_for_tests/config", {"text_class_model": model_args})
+    # TODO: load from config file
+    model_args["vocab_size"] = pre.vocab_size
+    model_args["num_classes"] = pre.num_classes
+    # ConfigLoader.load_config(config_dir, {"text_class_model": model_args})

    # construct model
    print("Building model...")
    cnn = CNNText(model_args)

    # Dump trained parameters into the model
-    ModelLoader.load_pytorch(cnn, "./data_for_tests/saved_model.pkl")
+    ModelLoader.load_pytorch(cnn, os.path.join(save_dir, model_name))
    print("model loaded!")

-    infer = ClassificationInfer(data_dir)
+    infer = ClassificationInfer(pickle_path=save_dir)
    results = infer.predict(cnn, unlabeled_data)
    print(results)


 def train():
    train_args, model_args = ConfigSection(), ConfigSection()
-    ConfigLoader.load_config("data_for_tests/config", {"text_class": train_args, "text_class_model": model_args})
+    ConfigLoader.load_config(config_dir, {"text_class": train_args})

    # load dataset
    print("Loading data...")
-    ds_loader = ClassDatasetLoader("train", os.path.join(data_dir, train_file))
+    ds_loader = ClassDatasetLoader("train", train_data_dir)
    data = ds_loader.load()
    print(data[0])

    # pre-process data
    pre = ClassPreprocess()
-    data_train = pre.run(data, pickle_path=save_path)
+    data_train = pre.run(data, pickle_path=save_dir)
    print("vocabulary size:", pre.vocab_size)
    print("number of classes:", pre.num_classes)

+    model_args["num_classes"] = pre.num_classes
+    model_args["vocab_size"] = pre.vocab_size
+
    # construct model
    print("Building model...")
    model = CNNText(model_args)

+    # ConfigSaver().save_config(config_dir, {"text_class_model": model_args})
+
    # train
    print("Training...")

-    trainer = ClassificationTrainer(train_args)
+    # 1
+    # trainer = ClassificationTrainer(train_args)
+
+    # 2
+    trainer = ClassificationTrainer(epochs=train_args["epochs"],
+                                    batch_size=train_args["batch_size"],
+                                    validate=train_args["validate"],
+                                    use_cuda=train_args["use_cuda"],
+                                    pickle_path=save_dir,
+                                    save_best_dev=train_args["save_best_dev"],
+                                    model_name=model_name,
+                                    loss=Loss("cross_entropy"),
+                                    optimizer=Optimizer("SGD", lr=0.001, momentum=0.9))
    trainer.train(model, data_train)

    print("Training finished!")

-    saver = ModelSaver("./data_for_tests/saved_model.pkl")
+    saver = ModelSaver(os.path.join(save_dir, model_name))
    saver.save_pytorch(model)
    print("Model saved!")


 if __name__ == "__main__":
    train()
-    # infer()
+    infer()