[fix] drop "data" in Tester.make_batch; correct spelling of "show_metrics"

[add] PeopleDailyCorpusLoader, to parse PeopleDaily Corpus [update] add CWS + POS_tag interface at FastNLP, see example in test_fastNLP.py [update] modify README.md and readme_example.py to the latest version.
2024-12-02 04:07:35 +08:00 · 2018-09-01 21:33:28 +08:00 · 2018-09-01 21:33:28 +08:00 · 32a036e8e6
commit 32a036e8e6
parent 0430067faf
17 changed files with 372 additions and 196 deletions
--- a/README.md
+++ b/README.md
@ -27,15 +27,16 @@ fastNLP is a modular Natural Language Processing system based on PyTorch, for fa

 A typical fastNLP routine is composed of four phases: loading dataset, pre-processing data, constructing model and training model.
 ```python
-from fastNLP.models.base_model import BaseModel
-from fastNLP.modules import encoder
-from fastNLP.modules import aggregation
-from fastNLP.modules import decoder
-
-from fastNLP.loader.dataset_loader import ClassDatasetLoader
-from fastNLP.loader.preprocess import ClassPreprocess
+from fastNLP.core.preprocess import ClassPreprocess
+from fastNLP.core.predictor import ClassificationInfer
 from fastNLP.core.trainer import ClassificationTrainer
-from fastNLP.core.inference import ClassificationInfer
+from fastNLP.loader.dataset_loader import ClassDatasetLoader
+from fastNLP.models.base_model import BaseModel
+from fastNLP.modules import aggregation
+from fastNLP.modules import encoder
+from fastNLP.modules import decoder
+from fastNLP.core.loss import Loss
+from fastNLP.core.optimizer import Optimizer


 class ClassificationModel(BaseModel):
@ -50,7 +51,7 @@ class ClassificationModel(BaseModel):
        self.enc = encoder.Conv(
            in_channels=300, out_channels=100, kernel_size=3)
        self.agg = aggregation.MaxPool()
-        self.dec = decoder.MLP(100, num_classes=num_classes)
+        self.dec = decoder.MLP(size_layer=[100, num_classes])

    def forward(self, x):
        x = self.emb(x)  # [N,L] -> [N,L,C]
@ -60,16 +61,17 @@ class ClassificationModel(BaseModel):
        return x


-data_dir = 'data'  # directory to save data and model
-train_path = 'test/data_for_tests/text_classify.txt'  # training set file
+data_dir = 'save/'  # directory to save data and model
+train_path = './data_for_tests/text_classify.txt'  # training set file

 # load dataset
 ds_loader = ClassDatasetLoader("train", train_path)
 data = ds_loader.load()

 # pre-process dataset
-pre = ClassPreprocess(data_dir)
-vocab_size, n_classes = pre.process(data, "data_train.pkl")
+pre = ClassPreprocess()
+train_set, dev_set = pre.run(data, train_dev_split=0.3, pickle_path=data_dir)
+n_classes, vocab_size = pre.num_classes, pre.vocab_size

 # construct model
 model_args = {
@ -78,28 +80,36 @@ model_args = {
 }
 model = ClassificationModel(num_classes=n_classes, vocab_size=vocab_size)

-# train model
+# construct trainer
 train_args = {
-    "epochs": 20,
-    "batch_size": 50,
+    "epochs": 3,
+    "batch_size": 16,
    "pickle_path": data_dir,
    "validate": False,
    "save_best_dev": False,
    "model_saved_path": None,
    "use_cuda": True,
-    "learn_rate": 1e-3,
-    "momentum": 0.9}
-trainer = ClassificationTrainer(train_args)
-trainer.train(model)
+    "loss": Loss("cross_entropy"),
+    "optimizer": Optimizer("Adam", lr=0.001)
+}
+trainer = ClassificationTrainer(**train_args)
+
+# start training
+trainer.train(model, train_data=train_set, dev_data=dev_set)

 # predict using model
-seqs = [x[0] for x in data]
+data_infer = [x[0] for x in data]
 infer = ClassificationInfer(data_dir)
-labels_pred = infer.predict(model, seqs)
+labels_pred = infer.predict(model.cpu(), data_infer)
+print(labels_pred)
 ```


 ## Installation
+Run the following commands to install fastNLP package.
+```shell
+pip install fastNLP
+```

 ### Cloning From GitHub

--- a/fastNLP/core/tester.py
+++ b/fastNLP/core/tester.py
@ -86,7 +86,7 @@ class BaseTester(object):
        iterator = iter(Batchifier(RandomSampler(dev_data), self.batch_size, drop_last=True))
        step = 0

-        for batch_x, batch_y in self.make_batch(iterator, dev_data):
+        for batch_x, batch_y in self.make_batch(iterator):
            with torch.no_grad():
                prediction = self.data_forward(network, batch_x)
                eval_results = self.evaluate(prediction, batch_y)
@ -123,14 +123,14 @@ class BaseTester(object):
        """Return a list of metrics. """
        raise NotImplementedError

-    def show_matrices(self):
+    def show_metrics(self):
        """This is called by Trainer to print evaluation results on dev set during training.

        :return print_str: str
        """
        raise NotImplementedError

-    def make_batch(self, iterator, data):
+    def make_batch(self, iterator):
        raise NotImplementedError


@ -194,7 +194,7 @@ class SeqLabelTester(BaseTester):
        batch_accuracy = np.mean([x[1] for x in self.eval_history])
        return batch_loss, batch_accuracy

-    def show_matrices(self):
+    def show_metrics(self):
        """
        This is called by Trainer to print evaluation on dev set.
        :return print_str: str
@ -202,7 +202,7 @@ class SeqLabelTester(BaseTester):
        loss, accuracy = self.metrics()
        return "dev loss={:.2f}, accuracy={:.2f}".format(loss, accuracy)

-    def make_batch(self, iterator, data):
+    def make_batch(self, iterator):
        return Action.make_batch(iterator, use_cuda=self.use_cuda, output_length=True)


@ -216,7 +216,7 @@ class ClassificationTester(BaseTester):
        """
        super(ClassificationTester, self).__init__(**test_args)

-    def make_batch(self, iterator, data, max_len=None):
+    def make_batch(self, iterator, max_len=None):
        return Action.make_batch(iterator, use_cuda=self.use_cuda, max_len=max_len)

    def data_forward(self, network, x):
--- a/fastNLP/core/trainer.py
+++ b/fastNLP/core/trainer.py
@ -144,7 +144,7 @@ class BaseTrainer(object):
                    print("Saved better model selected by validation.")
                    logger.info("Saved better model selected by validation.")

-                valid_results = validator.show_matrices()
+                valid_results = validator.show_metrics()
                print("[epoch {}] {}".format(epoch, valid_results))
                logger.info("[epoch {}] {}".format(epoch, valid_results))

--- a/fastNLP/fastnlp.py
+++ b/fastNLP/fastnlp.py
@ -31,7 +31,16 @@ FastNLP_MODEL_COLLECTION = {
        "type": "seq_label",
        "config_file_name": "config",
        "config_section_name": "text_class_model"
+    },
+    "pos_tag_model": {
+        "url": "",
+        "class": "sequence_modeling.AdvSeqLabel",
+        "pickle": "pos_tag_model_v_0.pkl",
+        "type": "seq_label",
+        "config_file_name": "pos_tag.config",
+        "config_section_name": "pos_tag_model"
    }
+
 }


@ -259,3 +268,38 @@ def interpret_word_seg_results(char_seq, label_seq):
        else:
            raise ValueError("invalid label {}".format(label[0]))
    return words
+
+
+def interpret_cws_pos_results(char_seq, label_seq):
+    """Transform model output into user-friendly contents.
+
+    :param char_seq: list of string
+    :param label_seq: list of string, the same length as char_seq.
+    :return outputs: list of tuple (words, pos_tag):
+    """
+
+    def pos_tag_check(seq):
+        """check whether all entries are the same """
+        return len(set(seq)) <= 1
+
+    word = []
+    word_pos = []
+    outputs = []
+    for char, label in zip(char_seq, label_seq):
+        tmp = label.split("-")
+        cws_label, pos_tag = tmp[0], tmp[1]
+
+        if cws_label == "B" or cws_label == "M":
+            word.append(char)
+            word_pos.append(pos_tag)
+        elif cws_label == "E":
+            word.append(char)
+            word_pos.append(pos_tag)
+            if not pos_tag_check(word_pos):
+                raise RuntimeError("character-wise pos tags inconsistent. ")
+            outputs.append(("".join(word), word_pos[0]))
+            word.clear()
+            word_pos.clear()
+        elif cws_label == "S":
+            outputs.append((char, pos_tag))
+    return outputs
--- a/fastNLP/loader/dataset_loader.py
+++ b/fastNLP/loader/dataset_loader.py
@ -220,13 +220,57 @@ class LMDatasetLoader(DatasetLoader):
        return text.strip().split()


-if __name__ == "__main__":
+class PeopleDailyCorpusLoader(DatasetLoader):
    """
-    data = POSDatasetLoader("xxx", "../../test/data_for_tests/people.txt").load_lines()
-    for example in data:
-        for w, l in zip(example[0], example[1]):
-            print(w, l)
+        People Daily Corpus: Chinese word segmentation, POS tag, NER
    """

-    ans = TokenizeDatasetLoader("xxx", "/home/zyfeng/Desktop/data/icwb2-data/training/test").load_pku()
-    print(ans)
+    def __init__(self, data_path):
+        super(PeopleDailyCorpusLoader, self).__init__("people_daily_corpus", data_path)
+
+    def load(self):
+        with open(self.data_path, "r", encoding="utf-8") as f:
+            sents = f.readlines()
+
+        pos_tag_examples = []
+        ner_examples = []
+        for sent in sents:
+            inside_ne = False
+            sent_pos_tag = []
+            sent_words = []
+            sent_ner = []
+            words = sent.strip().split()[1:]
+            for word in words:
+                if "[" in word and "]" in word:
+                    ner_tag = "U"
+                    print(word)
+                elif "[" in word:
+                    inside_ne = True
+                    ner_tag = "B"
+                    word = word[1:]
+                elif "]" in word:
+                    ner_tag = "L"
+                    word = word[:word.index("]")]
+                    if inside_ne is True:
+                        inside_ne = False
+                    else:
+                        raise RuntimeError("only ] appears!")
+                else:
+                    if inside_ne is True:
+                        ner_tag = "I"
+                    else:
+                        ner_tag = "O"
+                tmp = word.split("/")
+                token, pos = tmp[0], tmp[1]
+                sent_ner.append(ner_tag)
+                sent_pos_tag.append(pos)
+                sent_words.append(token)
+            pos_tag_examples.append([sent_words, sent_pos_tag])
+            ner_examples.append([sent_words, sent_ner])
+        return pos_tag_examples, ner_examples
+
+if __name__ == "__main__":
+    loader = PeopleDailyCorpusLoader("/home/zyfeng/data/CWS_POS_TAG_NER_people_daily.txt")
+    pos, ner = loader.load()
+    print(pos[:10])
+    print(ner[:10])
--- a/fastNLP/modules/decoder/init.py
+++ b/fastNLP/modules/decoder/init.py
@ -1,3 +1,4 @@
 from .CRF import ConditionalRandomField
+from .MLP import MLP

-__all__ = ["ConditionalRandomField"]
+__all__ = ["ConditionalRandomField", "MLP"]
--- a/reproduction/chinese_word_seg/cws_train.py
+++ b/reproduction/chinese_word_seg/cws_train.py
@ -1,114 +0,0 @@
-import sys
-
-sys.path.append("..")
-
-from fastNLP.loader.config_loader import ConfigLoader, ConfigSection
-from fastNLP.core.trainer import SeqLabelTrainer
-from fastNLP.loader.dataset_loader import TokenizeDatasetLoader, BaseLoader
-from fastNLP.core.preprocess import SeqLabelPreprocess, load_pickle
-from fastNLP.saver.model_saver import ModelSaver
-from fastNLP.loader.model_loader import ModelLoader
-from fastNLP.core.tester import SeqLabelTester
-from fastNLP.models.sequence_modeling import SeqLabeling
-from fastNLP.core.predictor import Predictor
-
-data_name = "pku_training.utf8"
-cws_data_path = "/home/zyfeng/data/pku_training.utf8"
-pickle_path = "./save/"
-data_infer_path = "/home/zyfeng/data/pku_test.utf8"
-
-
-def infer():
-    # Load infer configuration, the same as test
-    test_args = ConfigSection()
-    ConfigLoader("config.cfg", "").load_config("./data_for_tests/config", {"POS_test": test_args})
-
-    # fetch dictionary size and number of labels from pickle files
-    word2index = load_pickle(pickle_path, "word2id.pkl")
-    test_args["vocab_size"] = len(word2index)
-    index2label = load_pickle(pickle_path, "id2class.pkl")
-    test_args["num_classes"] = len(index2label)
-
-    # Define the same model
-    model = SeqLabeling(test_args)
-
-    # Dump trained parameters into the model
-    ModelLoader.load_pytorch(model, "./data_for_tests/saved_model.pkl")
-    print("model loaded!")
-
-    # Data Loader
-    raw_data_loader = BaseLoader(data_name, data_infer_path)
-    infer_data = raw_data_loader.load_lines()
-
-    # Inference interface
-    infer = Predictor(pickle_path)
-    results = infer.predict(model, infer_data)
-
-    print(results)
-    print("Inference finished!")
-
-
-def train_test():
-    # Config Loader
-    train_args = ConfigSection()
-    test_args = ConfigSection()
-    ConfigLoader("good_name", "good_path").load_config("./cws.cfg", {"train": train_args, "test": test_args})
-
-    # Data Loader
-    loader = TokenizeDatasetLoader(data_name, cws_data_path)
-    train_data = loader.load_pku()
-
-    # Preprocessor
-    preprocess = SeqLabelPreprocess()
-    data_train, data_dev = preprocess.run(train_data, pickle_path=pickle_path, train_dev_split=0.3)
-    train_args["vocab_size"] = preprocess.vocab_size
-    train_args["num_classes"] = preprocess.num_classes
-
-    # Trainer
-    trainer = SeqLabelTrainer(train_args)
-
-    # Model
-    model = SeqLabeling(train_args)
-
-    # Start training
-    trainer.train(model, data_train, data_dev)
-    print("Training finished!")
-
-    # Saver
-    saver = ModelSaver("./save/saved_model.pkl")
-    saver.save_pytorch(model)
-    print("Model saved!")
-
-    # testing with validation set
-    test(data_dev)
-
-
-def test(test_data):
-    # Config Loader
-    train_args = ConfigSection()
-    ConfigLoader("config.cfg", "").load_config("./data_for_tests/config", {"POS": train_args})
-
-    # Define the same model
-    model = SeqLabeling(train_args)
-
-    # Dump trained parameters into the model
-    ModelLoader.load_pytorch(model, "./data_for_tests/saved_model.pkl")
-    print("model loaded!")
-
-    # Load test configuration
-    test_args = ConfigSection()
-    ConfigLoader("config.cfg", "").load_config("./data_for_tests/config", {"POS_test": test_args})
-
-    # Tester
-    tester = SeqLabelTester(test_args)
-
-    # Start testing
-    tester.test(model, test_data)
-
-    # print test results
-    print(tester.show_matrices())
-    print("model tested!")
-
-
-if __name__ == "__main__":
-    train_test()
--- a/reproduction/chinese_word_segment/cws.cfg
+++ b/reproduction/chinese_word_segment/cws.cfg
@ -32,3 +32,15 @@ use_crf = true
 use_cuda = true
 rnn_hidden_units = 100
 word_emb_dim = 100
+
+[model]
+save_output = true
+validate_in_training = true
+save_dev_input = false
+save_loss = true
+batch_size = 640
+pickle_path = "./save/"
+use_crf = true
+use_cuda = true
+rnn_hidden_units = 100
+word_emb_dim = 100
--- a/reproduction/chinese_word_segment/run.py
+++ b/reproduction/chinese_word_segment/run.py
@ -125,7 +125,7 @@ def test():
    tester.test(model, dev_data)

    # print test results
-    print(tester.show_matrices())
+    print(tester.show_metrics())
    print("model tested!")


--- a/reproduction/pos_tag_model/pos_tag.cfg
+++ b/reproduction/pos_tag_model/pos_tag.cfg
@ -1,29 +1,35 @@
 [train]
-epochs = 10
-batch_size = 32
+epochs = 30
+batch_size = 64
 pickle_path = "./save/"
 validate = true
 save_best_dev = true
 model_saved_path = "./save/"
 rnn_hidden_units = 100
-rnn_layers = 2
-rnn_bi_direction = true
 word_emb_dim = 100
-dropout = 0.5
 use_crf = true
 use_cuda = true
+print_every_step = 10

 [test]
 save_output = true
 validate_in_training = true
 save_dev_input = false
 save_loss = true
-batch_size = 64
+batch_size = 640
 pickle_path = "./save/"
-rnn_hidden_units = 100
-rnn_layers = 1
-rnn_bi_direction = true
-word_emb_dim = 100
-dropout = 0.5
 use_crf = true
 use_cuda = true
+
+
+[POS_test]
+save_output = true
+validate_in_training = true
+save_dev_input = false
+save_loss = true
+batch_size = 640
+pickle_path = "./save/"
+use_crf = true
+use_cuda = true
+rnn_hidden_units = 100
+word_emb_dim = 100
--- a/reproduction/pos_tag_model/train_pos_tag.py
+++ b/reproduction/pos_tag_model/train_pos_tag.py
@ -0,0 +1,146 @@
+import os
+import sys
+
+sys.path.append(os.path.join(os.path.dirname(__file__), '../..'))
+
+from fastNLP.loader.config_loader import ConfigLoader, ConfigSection
+from fastNLP.core.trainer import SeqLabelTrainer
+from fastNLP.loader.dataset_loader import PeopleDailyCorpusLoader, BaseLoader
+from fastNLP.core.preprocess import SeqLabelPreprocess, load_pickle
+from fastNLP.saver.model_saver import ModelSaver
+from fastNLP.loader.model_loader import ModelLoader
+from fastNLP.core.tester import SeqLabelTester
+from fastNLP.models.sequence_modeling import AdvSeqLabel
+from fastNLP.core.predictor import SeqLabelInfer
+
+# not in the file's dir
+if len(os.path.dirname(__file__)) != 0:
+    os.chdir(os.path.dirname(__file__))
+datadir = "/home/zyfeng/data/"
+cfgfile = './pos_tag.cfg'
+data_name = "CWS_POS_TAG_NER_people_daily.txt"
+
+pos_tag_data_path = os.path.join(datadir, data_name)
+pickle_path = "save"
+data_infer_path = os.path.join(datadir, "infer.utf8")
+
+
+def infer():
+    # Config Loader
+    test_args = ConfigSection()
+    ConfigLoader("config", "").load_config(cfgfile, {"POS_test": test_args})
+
+    # fetch dictionary size and number of labels from pickle files
+    word2index = load_pickle(pickle_path, "word2id.pkl")
+    test_args["vocab_size"] = len(word2index)
+    index2label = load_pickle(pickle_path, "id2class.pkl")
+    test_args["num_classes"] = len(index2label)
+
+    # Define the same model
+    model = AdvSeqLabel(test_args)
+
+    try:
+        ModelLoader.load_pytorch(model, "./save/saved_model.pkl")
+        print('model loaded!')
+    except Exception as e:
+        print('cannot load model!')
+        raise
+
+    # Data Loader
+    raw_data_loader = BaseLoader(data_name, data_infer_path)
+    infer_data = raw_data_loader.load_lines()
+    print('data loaded')
+
+    # Inference interface
+    infer = SeqLabelInfer(pickle_path)
+    results = infer.predict(model, infer_data)
+
+    print(results)
+    print("Inference finished!")
+
+
+def train():
+    # Config Loader
+    train_args = ConfigSection()
+    test_args = ConfigSection()
+    ConfigLoader("good_name", "good_path").load_config(cfgfile, {"train": train_args, "test": test_args})
+
+    # Data Loader
+    loader = PeopleDailyCorpusLoader(pos_tag_data_path)
+    train_data, _ = loader.load()
+
+    # Preprocessor
+    preprocessor = SeqLabelPreprocess()
+    data_train, data_dev = preprocessor.run(train_data, pickle_path=pickle_path, train_dev_split=0.3)
+    train_args["vocab_size"] = preprocessor.vocab_size
+    train_args["num_classes"] = preprocessor.num_classes
+
+    # Trainer
+    trainer = SeqLabelTrainer(**train_args.data)
+
+    # Model
+    model = AdvSeqLabel(train_args)
+    try:
+        ModelLoader.load_pytorch(model, "./save/saved_model.pkl")
+        print('model parameter loaded!')
+    except Exception as e:
+        print("No saved model. Continue.")
+        pass
+
+    # Start training
+    trainer.train(model, data_train, data_dev)
+    print("Training finished!")
+
+    # Saver
+    saver = ModelSaver("./save/saved_model.pkl")
+    saver.save_pytorch(model)
+    print("Model saved!")
+
+
+def test():
+    # Config Loader
+    test_args = ConfigSection()
+    ConfigLoader("config", "").load_config(cfgfile, {"POS_test": test_args})
+
+    # fetch dictionary size and number of labels from pickle files
+    word2index = load_pickle(pickle_path, "word2id.pkl")
+    test_args["vocab_size"] = len(word2index)
+    index2label = load_pickle(pickle_path, "id2class.pkl")
+    test_args["num_classes"] = len(index2label)
+
+    # load dev data
+    dev_data = load_pickle(pickle_path, "data_dev.pkl")
+
+    # Define the same model
+    model = AdvSeqLabel(test_args)
+
+    # Dump trained parameters into the model
+    ModelLoader.load_pytorch(model, "./save/saved_model.pkl")
+    print("model loaded!")
+
+    # Tester
+    tester = SeqLabelTester(**test_args.data)
+
+    # Start testing
+    tester.test(model, dev_data)
+
+    # print test results
+    print(tester.show_metrics())
+    print("model tested!")
+
+
+if __name__ == "__main__":
+    import argparse
+
+    parser = argparse.ArgumentParser(description='Run a chinese word segmentation model')
+    parser.add_argument('--mode', help='set the model\'s model', choices=['train', 'test', 'infer'])
+    args = parser.parse_args()
+    if args.mode == 'train':
+        train()
+    elif args.mode == 'test':
+        test()
+    elif args.mode == 'infer':
+        infer()
+    else:
+        print('no mode specified for model!')
+        parser.print_help()
--- a/test/ner.py
+++ b/test/ner.py
@ -68,7 +68,7 @@ class MyNERTester(SeqLabelTester):
    def metrics(self):
        return np.mean(self.eval_history)

-    def show_matrices(self):
+    def show_metrics(self):
        return "dev accuracy={:.2f}".format(float(self.metrics()))


--- a/test/readme_example.py
+++ b/test/readme_example.py
@ -1,19 +1,13 @@
-# python: 3.5
-# pytorch: 0.4
-
-################
-# Test cross validation.
-################
-
-from fastNLP.loader.preprocess import ClassPreprocess
-
+from fastNLP.core.loss import Loss
+from fastNLP.core.optimizer import Optimizer
 from fastNLP.core.predictor import ClassificationInfer
+from fastNLP.core.preprocess import ClassPreprocess
 from fastNLP.core.trainer import ClassificationTrainer
 from fastNLP.loader.dataset_loader import ClassDatasetLoader
 from fastNLP.models.base_model import BaseModel
 from fastNLP.modules import aggregation
-from fastNLP.modules import encoder
 from fastNLP.modules import decoder
+from fastNLP.modules import encoder


 class ClassificationModel(BaseModel):
@ -28,7 +22,7 @@ class ClassificationModel(BaseModel):
        self.enc = encoder.Conv(
            in_channels=300, out_channels=100, kernel_size=3)
        self.agg = aggregation.MaxPool()
-        self.dec = decoder.MLP(100, num_classes=num_classes)
+        self.dec = decoder.MLP(size_layer=[100, num_classes])

    def forward(self, x):
        x = self.emb(x)  # [N,L] -> [N,L,C]
@ -38,18 +32,17 @@ class ClassificationModel(BaseModel):
        return x


-data_dir = 'data'  # directory to save data and model
-train_path = 'test/data_for_tests/text_classify.txt'  # training set file
+data_dir = 'save/'  # directory to save data and model
+train_path = './data_for_tests/text_classify.txt'  # training set file

 # load dataset
 ds_loader = ClassDatasetLoader("train", train_path)
 data = ds_loader.load()

 # pre-process dataset
-pre = ClassPreprocess(data, data_dir, cross_val=True, n_fold=5)
-# pre = ClassPreprocess(data, data_dir)
-n_classes = pre.num_classes
-vocab_size = pre.vocab_size
+pre = ClassPreprocess()
+train_set, dev_set = pre.run(data, train_dev_split=0.3, pickle_path=data_dir)
+n_classes, vocab_size = pre.num_classes, pre.vocab_size

 # construct model
 model_args = {
@ -58,22 +51,25 @@ model_args = {
 }
 model = ClassificationModel(num_classes=n_classes, vocab_size=vocab_size)

-# train model
+# construct trainer
 train_args = {
-    "epochs": 10,
-    "batch_size": 50,
+    "epochs": 3,
+    "batch_size": 16,
    "pickle_path": data_dir,
    "validate": False,
    "save_best_dev": False,
    "model_saved_path": None,
    "use_cuda": True,
-    "learn_rate": 1e-3,
-    "momentum": 0.9}
-trainer = ClassificationTrainer(train_args)
-# trainer.train(model, ['data_train.pkl', 'data_dev.pkl'])
-trainer.cross_validate(model)
+    "loss": Loss("cross_entropy"),
+    "optimizer": Optimizer("Adam", lr=0.001)
+}
+trainer = ClassificationTrainer(**train_args)
+
+# start training
+trainer.train(model, train_data=train_set, dev_data=dev_set)

 # predict using model
 data_infer = [x[0] for x in data]
 infer = ClassificationInfer(data_dir)
-labels_pred = infer.predict(model, data_infer)
+labels_pred = infer.predict(model.cpu(), data_infer)
+print(labels_pred)
--- a/test/seq_labeling.py
+++ b/test/seq_labeling.py
@ -134,7 +134,7 @@ def train_and_test():
    tester.test(model, data_dev)

    # print test results
-    print(tester.show_matrices())
+    print(tester.show_metrics())
    print("model tested!")


--- a/test/test_cws.py
+++ b/test/test_cws.py
@ -108,7 +108,7 @@ def train_test():
    tester.test(model, data_train)

    # print test results
-    print(tester.show_matrices())
+    print(tester.show_metrics())
    print("model tested!")


--- a/test/test_fastNLP.py
+++ b/test/test_fastNLP.py
@ -1,9 +1,12 @@
 import sys
+
 sys.path.append("..")
 from fastNLP.fastnlp import FastNLP
-from fastNLP.fastnlp import interpret_word_seg_results
+from fastNLP.fastnlp import interpret_word_seg_results, interpret_cws_pos_results

 PATH_TO_CWS_PICKLE_FILES = "/home/zyfeng/fastNLP/reproduction/chinese_word_segment/save/"
+PATH_TO_POS_TAG_PICKLE_FILES = "/home/zyfeng/data/crf_seg/"
+

 def word_seg():
    nlp = FastNLP(model_dir=PATH_TO_CWS_PICKLE_FILES)
@ -39,5 +42,33 @@ def test_word_seg_interpret():
    print(interpret_word_seg_results(chars, labels))


+def test_interpret_cws_pos_results():
+    foo = [
+        [('这', 'S-r'), ('是', 'S-v'), ('最', 'S-d'), ('好', 'S-a'), ('的', 'S-u'), ('基', 'B-p'), ('于', 'E-p'), ('深', 'B-d'),
+         ('度', 'E-d'), ('学', 'B-v'), ('习', 'E-v'), ('的', 'S-u'), ('中', 'B-nz'), ('文', 'E-nz'), ('分', 'B-vn'),
+         ('词', 'E-vn'), ('系', 'B-n'), ('统', 'E-n'), ('。', 'S-w')]
+    ]
+    chars = [x[0] for x in foo[0]]
+    labels = [x[1] for x in foo[0]]
+    print(interpret_cws_pos_results(chars, labels))
+
+
+def test_pos_tag():
+    nlp = FastNLP(model_dir=PATH_TO_POS_TAG_PICKLE_FILES)
+    nlp.load("pos_tag_model", config_file="pos_tag.config", section_name="pos_tag_model")
+    text = ["这是最好的基于深度学习的中文分词系统。",
+            "大王叫我来巡山。",
+            "我党多年来致力于改善人民生活水平。"]
+    results = nlp.run(text)
+    for example in results:
+        words, labels = [], []
+        for res in example:
+            words.append(res[0])
+            labels.append(res[1])
+        print(interpret_cws_pos_results(words, labels))
+
+
+
+
 if __name__ == "__main__":
    word_seg()
--- a/test/test_tester.py
+++ b/test/test_tester.py
@ -5,7 +5,6 @@ from fastNLP.loader.dataset_loader import TokenizeDatasetLoader
 from fastNLP.models.sequence_modeling import SeqLabeling

 data_name = "pku_training.utf8"
-cws_data_path = "/home/zyfeng/Desktop/data/pku_training.utf8"
 pickle_path = "data_for_tests"


@ -17,7 +16,8 @@ def foo():
    ConfigLoader("config.cfg", "").load_config("./data_for_tests/config", {"POS": train_args})

    # Preprocessor
-    p = SeqLabelPreprocess(train_data, pickle_path)
+    p = SeqLabelPreprocess()
+    p.run(train_data)
    train_args["vocab_size"] = p.vocab_size
    train_args["num_classes"] = p.num_classes

@ -30,7 +30,7 @@ def foo():

    print("start validation.")
    validator.test(model)
-    print(validator.show_matrices())
+    print(validator.show_metrics())


 if __name__ == "__main__":