first commit

2024-11-30 03:07:59 +08:00 · 2018-05-21 22:36:11 +08:00 · 2018-05-21 22:36:11 +08:00 · 6e1446beb1
commit 6e1446beb1
parent cfc47392e8
28 changed files with 50017 additions and 49884 deletions
--- a/action/init.py
+++ b/action/init.py
--- a/action/action.py
+++ b/action/action.py
@ -0,0 +1,16 @@
+class Action(object):
+    """
+        base class for Trainer and Tester
+    """
+
+    def __init__(self):
+        super(Action, self).__init__()
+
+    def load_config(self, args):
+        pass
+
+    def load_dataset(self, args):
+        pass
+
+    def log(self, args):
+        pass
--- a/action/tester.py
+++ b/action/tester.py
@ -0,0 +1,9 @@
+from action.action import Action
+
+
+class Tester(Action):
+    """docstring for Tester"""
+
+    def __init__(self, arg):
+        super(Tester, self).__init__()
+        self.arg = arg
--- a/action/trainer.py
+++ b/action/trainer.py
@ -0,0 +1,14 @@
+from action.action import Action
+
+
+class Trainer(Action):
+    """
+        Trainer for common training logic of all models
+    """
+
+    def __init__(self, arg):
+        super(Trainer, self).__init__()
+        self.arg = arg
+
+    def train(self, args):
+        pass
--- a/loader/init.py
+++ b/loader/init.py
--- a/loader/base_loader.py
+++ b/loader/base_loader.py
@ -0,0 +1,15 @@
+class BaseLoader(object):
+    """docstring for BaseLoader"""
+
+    def __init__(self, data_name, data_path):
+        super(BaseLoader, self).__init__()
+        self.data_name = data_name
+        self.data_path = data_path
+
+    def load(self):
+        """
+        :return: string
+        """
+        with open(self.data_path, "r", encoding="utf-8") as f:
+            text = f.read()
+        return text
--- a/loader/config_loader.py
+++ b/loader/config_loader.py
@ -0,0 +1,14 @@
+from loader.base_loader import BaseLoader
+
+
+class ConfigLoader(BaseLoader):
+    """loader for configuration files"""
+
+    def __int__(self, data_name, data_path):
+        super(ConfigLoader, self).__init__(data_name, data_path)
+        self.config = self.parse(super(ConfigLoader, self).load())
+
+    @staticmethod
+    def parse(string):
+        # To do
+        return string
--- a/loader/dataset_loader.py
+++ b/loader/dataset_loader.py
@ -0,0 +1,47 @@
+from loader.base_loader import BaseLoader
+
+
+class DatasetLoader(BaseLoader):
+    """"loader for data sets"""
+
+    def __init__(self, data_name, data_path):
+        super(DatasetLoader, self).__init__(data_name, data_path)
+
+
+class ConllLoader(DatasetLoader):
+    """loader for conll format files"""
+
+    def __int__(self, data_name, data_path):
+        """
+        :param  str data_name: the name of the conll data set
+        :param str data_path: the path to the conll data set
+        """
+        super(ConllLoader, self).__init__(data_name, data_path)
+        self.data_set = self.parse(self.load())
+
+    def load(self):
+        """
+        :return: list lines: all lines in a conll file
+        """
+        with open(self.data_path, "r", encoding="utf-8") as f:
+            lines = f.readlines()
+        return lines
+
+    @staticmethod
+    def parse(lines):
+        """
+        :param list lines:a list containing all lines in a conll file.
+        :return: a 3D list
+        """
+        sentences = list()
+        tokens = list()
+        for line in lines:
+            if line[0] == "#":
+                # skip the comments
+                continue
+            if line == "\n":
+                sentences.append(tokens)
+                tokens = []
+                continue
+            tokens.append(line.split())
+        return sentences
--- a/loader/embed_loader.py
+++ b/loader/embed_loader.py
@ -0,0 +1,8 @@
+from loader.base_loader import BaseLoader
+
+
+class EmbedLoader(BaseLoader):
+    """docstring for EmbedLoader"""
+
+    def __init__(self, data_name, data_path):
+        super(EmbedLoader, self).__init__(data_name, data_path)
--- a/model/empty.txt
+++ b/model/empty.txt
--- a/reproduction/CNN-sentence_classification/.gitignore
+++ b/reproduction/CNN-sentence_classification/.gitignore
--- a/reproduction/CNN-sentence_classification/README.md
+++ b/reproduction/CNN-sentence_classification/README.md
--- a/reproduction/CNN-sentence_classification/dataset.py
+++ b/reproduction/CNN-sentence_classification/dataset.py
--- a/reproduction/CNN-sentence_classification/model.py
+++ b/reproduction/CNN-sentence_classification/model.py
--- a/reproduction/CNN-sentence_classification/rt-polaritydata/rt-polarity.neg
+++ b/reproduction/CNN-sentence_classification/rt-polaritydata/rt-polarity.neg
--- a/reproduction/CNN-sentence_classification/rt-polaritydata/rt-polarity.pos
+++ b/reproduction/CNN-sentence_classification/rt-polaritydata/rt-polarity.pos
--- a/reproduction/CNN-sentence_classification/train.py
+++ b/reproduction/CNN-sentence_classification/train.py
--- a/reproduction/Char-aware_NLM/LICENSE
+++ b/reproduction/Char-aware_NLM/LICENSE
@ -1,21 +1,21 @@
-MIT License
-
-Copyright (c) 2017 
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in all
-copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+MIT License
+
+Copyright (c) 2017 
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 SOFTWARE.
--- a/reproduction/Char-aware_NLM/README.md
+++ b/reproduction/Char-aware_NLM/README.md
@ -1,40 +1,40 @@
-
-# PyTorch-Character-Aware-Neural-Language-Model
-
-This is the PyTorch implementation of character-aware neural language model proposed in this [paper](https://arxiv.org/abs/1508.06615) by Yoon Kim. 
-
-## Requiredments
-The code is run and tested with **Python 3.5.2** and **PyTorch 0.3.1**.
-
-## HyperParameters
-| HyperParam | value |
-| ------ | :-------|
-| LSTM batch size | 20 |
-| LSTM sequence length | 35 |
-| LSTM hidden units | 300 |
-| epochs | 35 |
-| initial learning rate | 1.0 |
-| character embedding dimension | 15 |
-
-## Demo
-Train the model with split train/valid/test data.
-
-`python train.py`
-
-The trained model will saved in `cache/net.pkl`.
-Test the model.
-
-`python test.py`
-
-Best result on test set: 
-PPl=127.2163
-cross entropy loss=4.8459
-
-## Acknowledgement 
-This implementation borrowed ideas from
-
-https://github.com/jarfo/kchar
-
-https://github.com/cronos123/Character-Aware-Neural-Language-Models
-
-
+
+# PyTorch-Character-Aware-Neural-Language-Model
+
+This is the PyTorch implementation of character-aware neural language model proposed in this [paper](https://arxiv.org/abs/1508.06615) by Yoon Kim. 
+
+## Requiredments
+The code is run and tested with **Python 3.5.2** and **PyTorch 0.3.1**.
+
+## HyperParameters
+| HyperParam | value |
+| ------ | :-------|
+| LSTM batch size | 20 |
+| LSTM sequence length | 35 |
+| LSTM hidden units | 300 |
+| epochs | 35 |
+| initial learning rate | 1.0 |
+| character embedding dimension | 15 |
+
+## Demo
+Train the model with split train/valid/test data.
+
+`python train.py`
+
+The trained model will saved in `cache/net.pkl`.
+Test the model.
+
+`python test.py`
+
+Best result on test set: 
+PPl=127.2163
+cross entropy loss=4.8459
+
+## Acknowledgement 
+This implementation borrowed ideas from
+
+https://github.com/jarfo/kchar
+
+https://github.com/cronos123/Character-Aware-Neural-Language-Models
+
+
--- a/reproduction/Char-aware_NLM/model.py
+++ b/reproduction/Char-aware_NLM/model.py
@ -1,148 +1,148 @@
-
-import torch
-from torch.autograd import Variable
-import torch.nn as nn
-import torch.nn.functional as F
-
-
-class Highway(nn.Module):
-    """Highway network"""
-    def __init__(self, input_size):
-        super(Highway, self).__init__()
-        self.fc1 = nn.Linear(input_size, input_size, bias=True)
-        self.fc2 = nn.Linear(input_size, input_size, bias=True)
-
-    def forward(self, x):
-        t = F.sigmoid(self.fc1(x))
-        return torch.mul(t, F.relu(self.fc2(x))) + torch.mul(1-t, x)
-
-
-class charLM(nn.Module):
-    """CNN + highway network + LSTM
-    # Input: 
-        4D tensor with shape [batch_size, in_channel, height, width]
-    # Output:
-        2D Tensor with shape [batch_size, vocab_size]
-    # Arguments:
-        char_emb_dim: the size of each character's embedding
-        word_emb_dim: the size of each word's embedding
-        vocab_size: num of unique words
-        num_char: num of characters
-        use_gpu: True or False
-    """
-    def __init__(self, char_emb_dim, word_emb_dim,  
-                vocab_size, num_char, use_gpu):
-        super(charLM, self).__init__()
-        self.char_emb_dim = char_emb_dim
-        self.word_emb_dim = word_emb_dim
-        self.vocab_size = vocab_size
-
-        # char embedding layer
-        self.char_embed = nn.Embedding(num_char, char_emb_dim)
-
-        # convolutions of filters with different sizes
-        self.convolutions = []
-
-        # list of tuples: (the number of filter, width)
-        self.filter_num_width = [(25, 1), (50, 2), (75, 3), (100, 4), (125, 5), (150, 6)]
-        
-        for out_channel, filter_width in self.filter_num_width:
-            self.convolutions.append(
-                nn.Conv2d(
-                    1,           # in_channel
-                    out_channel, # out_channel
-                    kernel_size=(char_emb_dim, filter_width), # (height, width)
-                    bias=True
-                    )
-            )
-
-        self.highway_input_dim = sum([x for x, y in self.filter_num_width])
-
-        self.batch_norm = nn.BatchNorm1d(self.highway_input_dim, affine=False)
-
-        # highway net
-        self.highway1 = Highway(self.highway_input_dim)
-        self.highway2 = Highway(self.highway_input_dim)
-
-        # LSTM
-        self.lstm_num_layers = 2
-
-        self.lstm = nn.LSTM(input_size=self.highway_input_dim, 
-                            hidden_size=self.word_emb_dim, 
-                            num_layers=self.lstm_num_layers,
-                            bias=True,
-                            dropout=0.5,
-                            batch_first=True)
-
-        # output layer
-        self.dropout = nn.Dropout(p=0.5)
-        self.linear = nn.Linear(self.word_emb_dim, self.vocab_size)
-
-        
-        if use_gpu is True:
-            for x in range(len(self.convolutions)):
-                self.convolutions[x] = self.convolutions[x].cuda()
-            self.highway1 = self.highway1.cuda()
-            self.highway2 = self.highway2.cuda()
-            self.lstm = self.lstm.cuda()
-            self.dropout = self.dropout.cuda()
-            self.char_embed = self.char_embed.cuda()
-            self.linear = self.linear.cuda()
-            self.batch_norm = self.batch_norm.cuda()
-
-
-    def forward(self, x, hidden):
-        # Input: Variable of Tensor with shape [num_seq, seq_len, max_word_len+2]
-        # Return: Variable of Tensor with shape [num_words, len(word_dict)]
-        lstm_batch_size = x.size()[0]
-        lstm_seq_len = x.size()[1]
-
-        x = x.contiguous().view(-1, x.size()[2])
-        # [num_seq*seq_len, max_word_len+2]
-        
-        x = self.char_embed(x)
-        # [num_seq*seq_len, max_word_len+2, char_emb_dim]
-        
-        x = torch.transpose(x.view(x.size()[0], 1, x.size()[1], -1), 2, 3)
-        # [num_seq*seq_len, 1, max_word_len+2, char_emb_dim]
-        
-        x = self.conv_layers(x)
-        # [num_seq*seq_len, total_num_filters]
-
-        x = self.batch_norm(x)
-        # [num_seq*seq_len, total_num_filters]
-
-        x = self.highway1(x)
-        x = self.highway2(x)
-        # [num_seq*seq_len, total_num_filters]
-
-        x = x.contiguous().view(lstm_batch_size,lstm_seq_len, -1)
-        # [num_seq, seq_len, total_num_filters]
-        
-        x, hidden = self.lstm(x, hidden)
-        # [seq_len, num_seq, hidden_size]
-        
-        x = self.dropout(x)
-        # [seq_len, num_seq, hidden_size]
-        
-        x = x.contiguous().view(lstm_batch_size*lstm_seq_len, -1)
-        # [num_seq*seq_len, hidden_size]
-
-        x = self.linear(x)
-        # [num_seq*seq_len, vocab_size]
-        return x, hidden
-
-
-    def conv_layers(self, x):
-        chosen_list = list()
-        for conv in self.convolutions:
-            feature_map = F.tanh(conv(x))
-            # (batch_size, out_channel, 1, max_word_len-width+1)
-            chosen = torch.max(feature_map, 3)[0]
-            # (batch_size, out_channel, 1)            
-            chosen = chosen.squeeze()
-            # (batch_size, out_channel)
-            chosen_list.append(chosen)
-        
-        # (batch_size, total_num_filers)
-        return torch.cat(chosen_list, 1)
+
+import torch
+from torch.autograd import Variable
+import torch.nn as nn
+import torch.nn.functional as F
+
+
+class Highway(nn.Module):
+    """Highway network"""
+    def __init__(self, input_size):
+        super(Highway, self).__init__()
+        self.fc1 = nn.Linear(input_size, input_size, bias=True)
+        self.fc2 = nn.Linear(input_size, input_size, bias=True)
+
+    def forward(self, x):
+        t = F.sigmoid(self.fc1(x))
+        return torch.mul(t, F.relu(self.fc2(x))) + torch.mul(1-t, x)
+
+
+class charLM(nn.Module):
+    """CNN + highway network + LSTM
+    # Input: 
+        4D tensor with shape [batch_size, in_channel, height, width]
+    # Output:
+        2D Tensor with shape [batch_size, vocab_size]
+    # Arguments:
+        char_emb_dim: the size of each character's embedding
+        word_emb_dim: the size of each word's embedding
+        vocab_size: num of unique words
+        num_char: num of characters
+        use_gpu: True or False
+    """
+    def __init__(self, char_emb_dim, word_emb_dim,  
+                vocab_size, num_char, use_gpu):
+        super(charLM, self).__init__()
+        self.char_emb_dim = char_emb_dim
+        self.word_emb_dim = word_emb_dim
+        self.vocab_size = vocab_size
+
+        # char embedding layer
+        self.char_embed = nn.Embedding(num_char, char_emb_dim)
+
+        # convolutions of filters with different sizes
+        self.convolutions = []
+
+        # list of tuples: (the number of filter, width)
+        self.filter_num_width = [(25, 1), (50, 2), (75, 3), (100, 4), (125, 5), (150, 6)]
+        
+        for out_channel, filter_width in self.filter_num_width:
+            self.convolutions.append(
+                nn.Conv2d(
+                    1,           # in_channel
+                    out_channel, # out_channel
+                    kernel_size=(char_emb_dim, filter_width), # (height, width)
+                    bias=True
+                    )
+            )
+
+        self.highway_input_dim = sum([x for x, y in self.filter_num_width])
+
+        self.batch_norm = nn.BatchNorm1d(self.highway_input_dim, affine=False)
+
+        # highway net
+        self.highway1 = Highway(self.highway_input_dim)
+        self.highway2 = Highway(self.highway_input_dim)
+
+        # LSTM
+        self.lstm_num_layers = 2
+
+        self.lstm = nn.LSTM(input_size=self.highway_input_dim, 
+                            hidden_size=self.word_emb_dim, 
+                            num_layers=self.lstm_num_layers,
+                            bias=True,
+                            dropout=0.5,
+                            batch_first=True)
+
+        # output layer
+        self.dropout = nn.Dropout(p=0.5)
+        self.linear = nn.Linear(self.word_emb_dim, self.vocab_size)
+
+        
+        if use_gpu is True:
+            for x in range(len(self.convolutions)):
+                self.convolutions[x] = self.convolutions[x].cuda()
+            self.highway1 = self.highway1.cuda()
+            self.highway2 = self.highway2.cuda()
+            self.lstm = self.lstm.cuda()
+            self.dropout = self.dropout.cuda()
+            self.char_embed = self.char_embed.cuda()
+            self.linear = self.linear.cuda()
+            self.batch_norm = self.batch_norm.cuda()
+
+
+    def forward(self, x, hidden):
+        # Input: Variable of Tensor with shape [num_seq, seq_len, max_word_len+2]
+        # Return: Variable of Tensor with shape [num_words, len(word_dict)]
+        lstm_batch_size = x.size()[0]
+        lstm_seq_len = x.size()[1]
+
+        x = x.contiguous().view(-1, x.size()[2])
+        # [num_seq*seq_len, max_word_len+2]
+        
+        x = self.char_embed(x)
+        # [num_seq*seq_len, max_word_len+2, char_emb_dim]
+        
+        x = torch.transpose(x.view(x.size()[0], 1, x.size()[1], -1), 2, 3)
+        # [num_seq*seq_len, 1, max_word_len+2, char_emb_dim]
+        
+        x = self.conv_layers(x)
+        # [num_seq*seq_len, total_num_filters]
+
+        x = self.batch_norm(x)
+        # [num_seq*seq_len, total_num_filters]
+
+        x = self.highway1(x)
+        x = self.highway2(x)
+        # [num_seq*seq_len, total_num_filters]
+
+        x = x.contiguous().view(lstm_batch_size,lstm_seq_len, -1)
+        # [num_seq, seq_len, total_num_filters]
+        
+        x, hidden = self.lstm(x, hidden)
+        # [seq_len, num_seq, hidden_size]
+        
+        x = self.dropout(x)
+        # [seq_len, num_seq, hidden_size]
+        
+        x = x.contiguous().view(lstm_batch_size*lstm_seq_len, -1)
+        # [num_seq*seq_len, hidden_size]
+
+        x = self.linear(x)
+        # [num_seq*seq_len, vocab_size]
+        return x, hidden
+
+
+    def conv_layers(self, x):
+        chosen_list = list()
+        for conv in self.convolutions:
+            feature_map = F.tanh(conv(x))
+            # (batch_size, out_channel, 1, max_word_len-width+1)
+            chosen = torch.max(feature_map, 3)[0]
+            # (batch_size, out_channel, 1)            
+            chosen = chosen.squeeze()
+            # (batch_size, out_channel)
+            chosen_list.append(chosen)
+        
+        # (batch_size, total_num_filers)
+        return torch.cat(chosen_list, 1)
--- a/reproduction/Char-aware_NLM/test.py
+++ b/reproduction/Char-aware_NLM/test.py
@ -1,123 +1,123 @@
-import os
-import torch
-from torch.autograd import Variable
-import torch.nn as nn
-import torch.nn.functional as F
-import numpy as np
-from model import charLM
-from utilities import *
-from collections import namedtuple
-
-def to_var(x):
-    if torch.cuda.is_available():
-        x = x.cuda()
-    return Variable(x)
-
-
-def test(net, data, opt):
-    net.eval()
- 
-    test_input = torch.from_numpy(data.test_input)
-    test_label = torch.from_numpy(data.test_label)
-
-    num_seq = test_input.size()[0] // opt.lstm_seq_len
-    test_input = test_input[:num_seq*opt.lstm_seq_len, :]
-    # [num_seq, seq_len, max_word_len+2]
-    test_input = test_input.view(-1, opt.lstm_seq_len, opt.max_word_len+2)
-
-    criterion = nn.CrossEntropyLoss()
-
-    loss_list = []
-    num_hits = 0
-    total = 0
-    iterations = test_input.size()[0] // opt.lstm_batch_size
-    test_generator = batch_generator(test_input, opt.lstm_batch_size)
-    label_generator = batch_generator(test_label, opt.lstm_batch_size*opt.lstm_seq_len)
-
-    hidden = (to_var(torch.zeros(2, opt.lstm_batch_size, opt.word_embed_dim)), 
-              to_var(torch.zeros(2, opt.lstm_batch_size, opt.word_embed_dim)))
-    
-    add_loss = 0.0 
-    for t in range(iterations):
-        batch_input = test_generator.__next__ ()
-        batch_label = label_generator.__next__()
-        
-        net.zero_grad()
-        hidden = [state.detach() for state in hidden]
-        test_output, hidden = net(to_var(batch_input), hidden)
-        
-        test_loss = criterion(test_output, to_var(batch_label)).data
-        loss_list.append(test_loss)
-        add_loss += test_loss
-
-    print("Test Loss={0:.4f}".format(float(add_loss) / iterations))
-    print("Test PPL={0:.4f}".format(float(np.exp(add_loss / iterations))))
-
-
-#############################################################
-
-if __name__ == "__main__":
-
-    word_embed_dim = 300
-    char_embedding_dim = 15
-
-    if os.path.exists("cache/prep.pt") is False:
-        print("Cannot find prep.pt")
-
-    objetcs = torch.load("cache/prep.pt")
-
-    word_dict = objetcs["word_dict"]
-    char_dict = objetcs["char_dict"]
-    reverse_word_dict = objetcs["reverse_word_dict"]
-    max_word_len = objetcs["max_word_len"]
-    num_words = len(word_dict)
-
-    print("word/char dictionary built. Start making inputs.")
-
-
-    if os.path.exists("cache/data_sets.pt") is False:
-        
-        test_text  = read_data("./test.txt")
-        test_set  = np.array(text2vec(test_text,  char_dict, max_word_len))
-
-        # Labels are next-word index in word_dict with the same length as inputs
-        test_label  = np.array([word_dict[w] for w in test_text[1:]] + [word_dict[test_text[-1]]])
-
-        category = {"test": test_set, "tlabel":test_label}
-        torch.save(category, "cache/data_sets.pt") 
-    else:
-        data_sets = torch.load("cache/data_sets.pt")
-        test_set  = data_sets["test"]
-        test_label = data_sets["tlabel"]
-        train_set = data_sets["tdata"]
-        train_label = data_sets["trlabel"]
-
-
-    DataTuple = namedtuple("DataTuple", "test_input test_label train_input train_label ")
-    data = DataTuple( test_input=test_set,
-                     test_label=test_label, train_label=train_label, train_input=train_set)
-
-    print("Loaded data sets. Start building network.")
-
-
-
-    USE_GPU = True
-    cnn_batch_size = 700
-    lstm_seq_len = 35 
-    lstm_batch_size = 20
-    
-
-    net = torch.load("cache/net.pkl")
-    
-    Options = namedtuple("Options", [ "cnn_batch_size", "lstm_seq_len",
-            "max_word_len", "lstm_batch_size", "word_embed_dim"])
-    opt = Options(cnn_batch_size=lstm_seq_len*lstm_batch_size,
-                  lstm_seq_len=lstm_seq_len,
-                  max_word_len=max_word_len,
-                  lstm_batch_size=lstm_batch_size,
-                  word_embed_dim=word_embed_dim)
-
-
-    print("Network built. Start testing.")
-
-    test(net, data, opt)
+import os
+import torch
+from torch.autograd import Variable
+import torch.nn as nn
+import torch.nn.functional as F
+import numpy as np
+from model import charLM
+from utilities import *
+from collections import namedtuple
+
+def to_var(x):
+    if torch.cuda.is_available():
+        x = x.cuda()
+    return Variable(x)
+
+
+def test(net, data, opt):
+    net.eval()
+ 
+    test_input = torch.from_numpy(data.test_input)
+    test_label = torch.from_numpy(data.test_label)
+
+    num_seq = test_input.size()[0] // opt.lstm_seq_len
+    test_input = test_input[:num_seq*opt.lstm_seq_len, :]
+    # [num_seq, seq_len, max_word_len+2]
+    test_input = test_input.view(-1, opt.lstm_seq_len, opt.max_word_len+2)
+
+    criterion = nn.CrossEntropyLoss()
+
+    loss_list = []
+    num_hits = 0
+    total = 0
+    iterations = test_input.size()[0] // opt.lstm_batch_size
+    test_generator = batch_generator(test_input, opt.lstm_batch_size)
+    label_generator = batch_generator(test_label, opt.lstm_batch_size*opt.lstm_seq_len)
+
+    hidden = (to_var(torch.zeros(2, opt.lstm_batch_size, opt.word_embed_dim)), 
+              to_var(torch.zeros(2, opt.lstm_batch_size, opt.word_embed_dim)))
+    
+    add_loss = 0.0 
+    for t in range(iterations):
+        batch_input = test_generator.__next__ ()
+        batch_label = label_generator.__next__()
+        
+        net.zero_grad()
+        hidden = [state.detach() for state in hidden]
+        test_output, hidden = net(to_var(batch_input), hidden)
+        
+        test_loss = criterion(test_output, to_var(batch_label)).data
+        loss_list.append(test_loss)
+        add_loss += test_loss
+
+    print("Test Loss={0:.4f}".format(float(add_loss) / iterations))
+    print("Test PPL={0:.4f}".format(float(np.exp(add_loss / iterations))))
+
+
+#############################################################
+
+if __name__ == "__main__":
+
+    word_embed_dim = 300
+    char_embedding_dim = 15
+
+    if os.path.exists("cache/prep.pt") is False:
+        print("Cannot find prep.pt")
+
+    objetcs = torch.load("cache/prep.pt")
+
+    word_dict = objetcs["word_dict"]
+    char_dict = objetcs["char_dict"]
+    reverse_word_dict = objetcs["reverse_word_dict"]
+    max_word_len = objetcs["max_word_len"]
+    num_words = len(word_dict)
+
+    print("word/char dictionary built. Start making inputs.")
+
+
+    if os.path.exists("cache/data_sets.pt") is False:
+        
+        test_text  = read_data("./test.txt")
+        test_set  = np.array(text2vec(test_text,  char_dict, max_word_len))
+
+        # Labels are next-word index in word_dict with the same length as inputs
+        test_label  = np.array([word_dict[w] for w in test_text[1:]] + [word_dict[test_text[-1]]])
+
+        category = {"test": test_set, "tlabel":test_label}
+        torch.save(category, "cache/data_sets.pt") 
+    else:
+        data_sets = torch.load("cache/data_sets.pt")
+        test_set  = data_sets["test"]
+        test_label = data_sets["tlabel"]
+        train_set = data_sets["tdata"]
+        train_label = data_sets["trlabel"]
+
+
+    DataTuple = namedtuple("DataTuple", "test_input test_label train_input train_label ")
+    data = DataTuple( test_input=test_set,
+                     test_label=test_label, train_label=train_label, train_input=train_set)
+
+    print("Loaded data sets. Start building network.")
+
+
+
+    USE_GPU = True
+    cnn_batch_size = 700
+    lstm_seq_len = 35 
+    lstm_batch_size = 20
+    
+
+    net = torch.load("cache/net.pkl")
+    
+    Options = namedtuple("Options", [ "cnn_batch_size", "lstm_seq_len",
+            "max_word_len", "lstm_batch_size", "word_embed_dim"])
+    opt = Options(cnn_batch_size=lstm_seq_len*lstm_batch_size,
+                  lstm_seq_len=lstm_seq_len,
+                  max_word_len=max_word_len,
+                  lstm_batch_size=lstm_batch_size,
+                  word_embed_dim=word_embed_dim)
+
+
+    print("Network built. Start testing.")
+
+    test(net, data, opt)
--- a/reproduction/Char-aware_NLM/test.txt
+++ b/reproduction/Char-aware_NLM/test.txt
--- a/reproduction/Char-aware_NLM/train.py
+++ b/reproduction/Char-aware_NLM/train.py
@ -1,268 +1,268 @@
-
-import torch
-from torch.autograd import Variable
-import torch.nn as nn
-import torch.nn.functional as F
-import torch.optim as optim
-import numpy as np
-import os
-from model import charLM
-from utilities import *
-from collections import namedtuple
-from test import test
-
-
-def preprocess():
-    
-    word_dict, char_dict = create_word_char_dict("valid.txt", "train.txt", "test.txt")
-    num_words = len(word_dict)
-    num_char  = len(char_dict)
-    char_dict["BOW"] = num_char+1
-    char_dict["EOW"] = num_char+2
-    char_dict["PAD"] = 0
-    
-    #  dict of (int, string)
-    reverse_word_dict = {value:key for key, value in word_dict.items()}
-    max_word_len = max([len(word) for word in word_dict])
-
-    objects = {
-        "word_dict": word_dict,
-        "char_dict": char_dict,
-        "reverse_word_dict": reverse_word_dict,
-        "max_word_len": max_word_len
-    }
-    
-    torch.save(objects, "cache/prep.pt")
-    print("Preprocess done.")
-
-
-def to_var(x):
-    if torch.cuda.is_available():
-        x = x.cuda()
-    return Variable(x)
-
-
-def train(net, data, opt):
-    
-    torch.manual_seed(1024)
-
-    train_input = torch.from_numpy(data.train_input)
-    train_label = torch.from_numpy(data.train_label)
-    valid_input = torch.from_numpy(data.valid_input)
-    valid_label = torch.from_numpy(data.valid_label)
-
-    # [num_seq, seq_len, max_word_len+2]
-    num_seq = train_input.size()[0] // opt.lstm_seq_len
-    train_input = train_input[:num_seq*opt.lstm_seq_len, :]
-    train_input = train_input.view(-1, opt.lstm_seq_len, opt.max_word_len+2)
-
-    num_seq = valid_input.size()[0] // opt.lstm_seq_len
-    valid_input = valid_input[:num_seq*opt.lstm_seq_len, :]
-    valid_input = valid_input.view(-1, opt.lstm_seq_len, opt.max_word_len+2)
-
-    num_epoch = opt.epochs
-    num_iter_per_epoch = train_input.size()[0] // opt.lstm_batch_size
-    
-    learning_rate = opt.init_lr
-    old_PPL = 100000
-    best_PPL = 100000
-
-    # Log-SoftMax
-    criterion = nn.CrossEntropyLoss()
-    
-    # word_emb_dim == hidden_size / num of hidden units 
-    hidden = (to_var(torch.zeros(2, opt.lstm_batch_size, opt.word_embed_dim)), 
-              to_var(torch.zeros(2, opt.lstm_batch_size, opt.word_embed_dim)))
-
-
-    for epoch in range(num_epoch):
-
-        ################  Validation  ####################
-        net.eval()
-        loss_batch = []
-        PPL_batch = []
-        iterations = valid_input.size()[0] // opt.lstm_batch_size
-        
-        valid_generator = batch_generator(valid_input, opt.lstm_batch_size)
-        vlabel_generator = batch_generator(valid_label, opt.lstm_batch_size*opt.lstm_seq_len)
-
-
-        for t in range(iterations):
-            batch_input = valid_generator.__next__()
-            batch_label = vlabel_generator.__next__()
-
-            hidden = [state.detach() for state in hidden]
-            valid_output, hidden = net(to_var(batch_input), hidden)
-
-            length = valid_output.size()[0]
-
-            # [num_sample-1, len(word_dict)] vs [num_sample-1]
-            valid_loss = criterion(valid_output, to_var(batch_label))
-
-            PPL = torch.exp(valid_loss.data)
-
-            loss_batch.append(float(valid_loss))
-            PPL_batch.append(float(PPL))
-
-        PPL = np.mean(PPL_batch)
-        print("[epoch {}] valid PPL={}".format(epoch, PPL))
-        print("valid loss={}".format(np.mean(loss_batch)))
-        print("PPL decrease={}".format(float(old_PPL - PPL)))
-
-        # Preserve the best model
-        if best_PPL > PPL:
-            best_PPL = PPL
-            torch.save(net.state_dict(), "cache/model.pt")
-            torch.save(net, "cache/net.pkl")
-
-        # Adjust the learning rate
-        if float(old_PPL - PPL) <= 1.0:
-            learning_rate /= 2
-            print("halved lr:{}".format(learning_rate))
-
-        old_PPL = PPL
-
-        ##################################################
-        #################### Training ####################
-        net.train()
-        optimizer  = optim.SGD(net.parameters(), 
-                               lr = learning_rate, 
-                               momentum=0.85)
-
-        # split the first dim
-        input_generator = batch_generator(train_input, opt.lstm_batch_size)
-        label_generator = batch_generator(train_label, opt.lstm_batch_size*opt.lstm_seq_len)
-
-        for t in range(num_iter_per_epoch):
-            batch_input = input_generator.__next__()
-            batch_label = label_generator.__next__()
-
-            # detach hidden state of LSTM from last batch
-            hidden = [state.detach() for state in hidden]
-
-            output, hidden = net(to_var(batch_input), hidden)
-            # [num_word, vocab_size]
-            
-            loss = criterion(output, to_var(batch_label))
-
-            net.zero_grad()
-            loss.backward()
-            torch.nn.utils.clip_grad_norm(net.parameters(), 5, norm_type=2)
-            optimizer.step()
-            
-            
-            if (t+1) % 100 == 0:
-                print("[epoch {} step {}] train loss={}, Perplexity={}".format(epoch+1, 
-                    t+1, float(loss.data), float(np.exp(loss.data))))
-
-
-    torch.save(net.state_dict(), "cache/model.pt")
-    print("Training finished.")
-
-
-################################################################
-
-if __name__=="__main__":
-
-    word_embed_dim = 300
-    char_embedding_dim = 15
-
-    if os.path.exists("cache/prep.pt") is False:
-        preprocess()
-
-    objetcs = torch.load("cache/prep.pt")
-
-    word_dict = objetcs["word_dict"]
-    char_dict = objetcs["char_dict"]
-    reverse_word_dict = objetcs["reverse_word_dict"]
-    max_word_len = objetcs["max_word_len"]
-    num_words = len(word_dict)
-
-    print("word/char dictionary built. Start making inputs.")
-
-
-    if os.path.exists("cache/data_sets.pt") is False:
-        train_text = read_data("./train.txt")
-        valid_text = read_data("./valid.txt")
-        test_text  = read_data("./test.txt")
-
-        train_set = np.array(text2vec(train_text, char_dict, max_word_len))
-        valid_set = np.array(text2vec(valid_text, char_dict, max_word_len))
-        test_set  = np.array(text2vec(test_text,  char_dict, max_word_len))
-
-        # Labels are next-word index in word_dict with the same length as inputs
-        train_label = np.array([word_dict[w] for w in train_text[1:]] + [word_dict[train_text[-1]]])
-        valid_label = np.array([word_dict[w] for w in valid_text[1:]] + [word_dict[valid_text[-1]]])
-        test_label  = np.array([word_dict[w] for w in test_text[1:]] + [word_dict[test_text[-1]]])
-
-        category = {"tdata":train_set, "vdata":valid_set, "test": test_set, 
-                    "trlabel":train_label, "vlabel":valid_label, "tlabel":test_label}
-        torch.save(category, "cache/data_sets.pt") 
-    else:
-        data_sets = torch.load("cache/data_sets.pt")
-        train_set = data_sets["tdata"]
-        valid_set = data_sets["vdata"]
-        test_set  = data_sets["test"]
-        train_label = data_sets["trlabel"]
-        valid_label = data_sets["vlabel"]
-        test_label = data_sets["tlabel"]
-
-
-    DataTuple = namedtuple("DataTuple", 
-                "train_input train_label valid_input valid_label test_input test_label")
-    data = DataTuple(train_input=train_set,
-                     train_label=train_label,
-                     valid_input=valid_set,
-                     valid_label=valid_label,
-                     test_input=test_set,
-                     test_label=test_label)
-
-    print("Loaded data sets. Start building network.")
-
-
-
-    USE_GPU = True
-    cnn_batch_size = 700
-    lstm_seq_len = 35
-    lstm_batch_size = 20
-    # cnn_batch_size == lstm_seq_len * lstm_batch_size
-
-    net = charLM(char_embedding_dim, 
-                word_embed_dim, 
-                num_words,
-                len(char_dict),
-                use_gpu=USE_GPU)
-
-    for param in net.parameters():
-        nn.init.uniform(param.data, -0.05, 0.05)
-
-
-    Options = namedtuple("Options", [
-            "cnn_batch_size", "init_lr", "lstm_seq_len",
-            "max_word_len", "lstm_batch_size", "epochs",
-            "word_embed_dim"])
-    opt = Options(cnn_batch_size=lstm_seq_len*lstm_batch_size,
-                  init_lr=1.0,
-                  lstm_seq_len=lstm_seq_len,
-                  max_word_len=max_word_len,
-                  lstm_batch_size=lstm_batch_size,
-                  epochs=35,
-                  word_embed_dim=word_embed_dim)
-
-
-    print("Network built. Start training.")
-
-
-    # You can stop training anytime by "ctrl+C"
-    try:
-        train(net, data, opt)
-    except KeyboardInterrupt:
-        print('-' * 89)
-        print('Exiting from training early')
-
-
-    torch.save(net, "cache/net.pkl")
-    print("save net")
-
-
-    test(net, data, opt)
+
+import torch
+from torch.autograd import Variable
+import torch.nn as nn
+import torch.nn.functional as F
+import torch.optim as optim
+import numpy as np
+import os
+from model import charLM
+from utilities import *
+from collections import namedtuple
+from test import test
+
+
+def preprocess():
+    
+    word_dict, char_dict = create_word_char_dict("valid.txt", "train.txt", "test.txt")
+    num_words = len(word_dict)
+    num_char  = len(char_dict)
+    char_dict["BOW"] = num_char+1
+    char_dict["EOW"] = num_char+2
+    char_dict["PAD"] = 0
+    
+    #  dict of (int, string)
+    reverse_word_dict = {value:key for key, value in word_dict.items()}
+    max_word_len = max([len(word) for word in word_dict])
+
+    objects = {
+        "word_dict": word_dict,
+        "char_dict": char_dict,
+        "reverse_word_dict": reverse_word_dict,
+        "max_word_len": max_word_len
+    }
+    
+    torch.save(objects, "cache/prep.pt")
+    print("Preprocess done.")
+
+
+def to_var(x):
+    if torch.cuda.is_available():
+        x = x.cuda()
+    return Variable(x)
+
+
+def train(net, data, opt):
+    
+    torch.manual_seed(1024)
+
+    train_input = torch.from_numpy(data.train_input)
+    train_label = torch.from_numpy(data.train_label)
+    valid_input = torch.from_numpy(data.valid_input)
+    valid_label = torch.from_numpy(data.valid_label)
+
+    # [num_seq, seq_len, max_word_len+2]
+    num_seq = train_input.size()[0] // opt.lstm_seq_len
+    train_input = train_input[:num_seq*opt.lstm_seq_len, :]
+    train_input = train_input.view(-1, opt.lstm_seq_len, opt.max_word_len+2)
+
+    num_seq = valid_input.size()[0] // opt.lstm_seq_len
+    valid_input = valid_input[:num_seq*opt.lstm_seq_len, :]
+    valid_input = valid_input.view(-1, opt.lstm_seq_len, opt.max_word_len+2)
+
+    num_epoch = opt.epochs
+    num_iter_per_epoch = train_input.size()[0] // opt.lstm_batch_size
+    
+    learning_rate = opt.init_lr
+    old_PPL = 100000
+    best_PPL = 100000
+
+    # Log-SoftMax
+    criterion = nn.CrossEntropyLoss()
+    
+    # word_emb_dim == hidden_size / num of hidden units 
+    hidden = (to_var(torch.zeros(2, opt.lstm_batch_size, opt.word_embed_dim)), 
+              to_var(torch.zeros(2, opt.lstm_batch_size, opt.word_embed_dim)))
+
+
+    for epoch in range(num_epoch):
+
+        ################  Validation  ####################
+        net.eval()
+        loss_batch = []
+        PPL_batch = []
+        iterations = valid_input.size()[0] // opt.lstm_batch_size
+        
+        valid_generator = batch_generator(valid_input, opt.lstm_batch_size)
+        vlabel_generator = batch_generator(valid_label, opt.lstm_batch_size*opt.lstm_seq_len)
+
+
+        for t in range(iterations):
+            batch_input = valid_generator.__next__()
+            batch_label = vlabel_generator.__next__()
+
+            hidden = [state.detach() for state in hidden]
+            valid_output, hidden = net(to_var(batch_input), hidden)
+
+            length = valid_output.size()[0]
+
+            # [num_sample-1, len(word_dict)] vs [num_sample-1]
+            valid_loss = criterion(valid_output, to_var(batch_label))
+
+            PPL = torch.exp(valid_loss.data)
+
+            loss_batch.append(float(valid_loss))
+            PPL_batch.append(float(PPL))
+
+        PPL = np.mean(PPL_batch)
+        print("[epoch {}] valid PPL={}".format(epoch, PPL))
+        print("valid loss={}".format(np.mean(loss_batch)))
+        print("PPL decrease={}".format(float(old_PPL - PPL)))
+
+        # Preserve the best model
+        if best_PPL > PPL:
+            best_PPL = PPL
+            torch.save(net.state_dict(), "cache/model.pt")
+            torch.save(net, "cache/net.pkl")
+
+        # Adjust the learning rate
+        if float(old_PPL - PPL) <= 1.0:
+            learning_rate /= 2
+            print("halved lr:{}".format(learning_rate))
+
+        old_PPL = PPL
+
+        ##################################################
+        #################### Training ####################
+        net.train()
+        optimizer  = optim.SGD(net.parameters(), 
+                               lr = learning_rate, 
+                               momentum=0.85)
+
+        # split the first dim
+        input_generator = batch_generator(train_input, opt.lstm_batch_size)
+        label_generator = batch_generator(train_label, opt.lstm_batch_size*opt.lstm_seq_len)
+
+        for t in range(num_iter_per_epoch):
+            batch_input = input_generator.__next__()
+            batch_label = label_generator.__next__()
+
+            # detach hidden state of LSTM from last batch
+            hidden = [state.detach() for state in hidden]
+
+            output, hidden = net(to_var(batch_input), hidden)
+            # [num_word, vocab_size]
+            
+            loss = criterion(output, to_var(batch_label))
+
+            net.zero_grad()
+            loss.backward()
+            torch.nn.utils.clip_grad_norm(net.parameters(), 5, norm_type=2)
+            optimizer.step()
+            
+            
+            if (t+1) % 100 == 0:
+                print("[epoch {} step {}] train loss={}, Perplexity={}".format(epoch+1, 
+                    t+1, float(loss.data), float(np.exp(loss.data))))
+
+
+    torch.save(net.state_dict(), "cache/model.pt")
+    print("Training finished.")
+
+
+################################################################
+
+if __name__=="__main__":
+
+    word_embed_dim = 300
+    char_embedding_dim = 15
+
+    if os.path.exists("cache/prep.pt") is False:
+        preprocess()
+
+    objetcs = torch.load("cache/prep.pt")
+
+    word_dict = objetcs["word_dict"]
+    char_dict = objetcs["char_dict"]
+    reverse_word_dict = objetcs["reverse_word_dict"]
+    max_word_len = objetcs["max_word_len"]
+    num_words = len(word_dict)
+
+    print("word/char dictionary built. Start making inputs.")
+
+
+    if os.path.exists("cache/data_sets.pt") is False:
+        train_text = read_data("./train.txt")
+        valid_text = read_data("./valid.txt")
+        test_text  = read_data("./test.txt")
+
+        train_set = np.array(text2vec(train_text, char_dict, max_word_len))
+        valid_set = np.array(text2vec(valid_text, char_dict, max_word_len))
+        test_set  = np.array(text2vec(test_text,  char_dict, max_word_len))
+
+        # Labels are next-word index in word_dict with the same length as inputs
+        train_label = np.array([word_dict[w] for w in train_text[1:]] + [word_dict[train_text[-1]]])
+        valid_label = np.array([word_dict[w] for w in valid_text[1:]] + [word_dict[valid_text[-1]]])
+        test_label  = np.array([word_dict[w] for w in test_text[1:]] + [word_dict[test_text[-1]]])
+
+        category = {"tdata":train_set, "vdata":valid_set, "test": test_set, 
+                    "trlabel":train_label, "vlabel":valid_label, "tlabel":test_label}
+        torch.save(category, "cache/data_sets.pt") 
+    else:
+        data_sets = torch.load("cache/data_sets.pt")
+        train_set = data_sets["tdata"]
+        valid_set = data_sets["vdata"]
+        test_set  = data_sets["test"]
+        train_label = data_sets["trlabel"]
+        valid_label = data_sets["vlabel"]
+        test_label = data_sets["tlabel"]
+
+
+    DataTuple = namedtuple("DataTuple", 
+                "train_input train_label valid_input valid_label test_input test_label")
+    data = DataTuple(train_input=train_set,
+                     train_label=train_label,
+                     valid_input=valid_set,
+                     valid_label=valid_label,
+                     test_input=test_set,
+                     test_label=test_label)
+
+    print("Loaded data sets. Start building network.")
+
+
+
+    USE_GPU = True
+    cnn_batch_size = 700
+    lstm_seq_len = 35
+    lstm_batch_size = 20
+    # cnn_batch_size == lstm_seq_len * lstm_batch_size
+
+    net = charLM(char_embedding_dim, 
+                word_embed_dim, 
+                num_words,
+                len(char_dict),
+                use_gpu=USE_GPU)
+
+    for param in net.parameters():
+        nn.init.uniform(param.data, -0.05, 0.05)
+
+
+    Options = namedtuple("Options", [
+            "cnn_batch_size", "init_lr", "lstm_seq_len",
+            "max_word_len", "lstm_batch_size", "epochs",
+            "word_embed_dim"])
+    opt = Options(cnn_batch_size=lstm_seq_len*lstm_batch_size,
+                  init_lr=1.0,
+                  lstm_seq_len=lstm_seq_len,
+                  max_word_len=max_word_len,
+                  lstm_batch_size=lstm_batch_size,
+                  epochs=35,
+                  word_embed_dim=word_embed_dim)
+
+
+    print("Network built. Start training.")
+
+
+    # You can stop training anytime by "ctrl+C"
+    try:
+        train(net, data, opt)
+    except KeyboardInterrupt:
+        print('-' * 89)
+        print('Exiting from training early')
+
+
+    torch.save(net, "cache/net.pkl")
+    print("save net")
+
+
+    test(net, data, opt)
--- a/reproduction/Char-aware_NLM/train.txt
+++ b/reproduction/Char-aware_NLM/train.txt
--- a/reproduction/Char-aware_NLM/utilities.py
+++ b/reproduction/Char-aware_NLM/utilities.py
@ -1,86 +1,86 @@
-import torch
-from torch.autograd import Variable
-import torch.nn as nn
-import torch.nn.functional as F
-
-
-
-def batch_generator(x, batch_size):
-    # x: [num_words, in_channel, height, width]
-    # partitions x into batches
-    num_step = x.size()[0] // batch_size
-    for t in range(num_step):
-        yield x[t*batch_size:(t+1)*batch_size]
-
-
-def text2vec(words, char_dict, max_word_len):
-    """ Return list of list of int """
-    word_vec = []
-    for word in words:
-        vec = [char_dict[ch] for ch in word] 
-        if len(vec) < max_word_len:
-            vec += [char_dict["PAD"] for _ in range(max_word_len - len(vec))]
-        vec = [char_dict["BOW"]] + vec + [char_dict["EOW"]]
-        word_vec.append(vec)
-    return word_vec
-
-
-def seq2vec(input_words, char_embedding, char_embedding_dim, char_table):
-    """ convert the input strings into character embeddings """
-    # input_words == list of string
-    # char_embedding == torch.nn.Embedding
-    # char_embedding_dim == int
-    # char_table == list of unique chars
-    # Returns: tensor of shape [len(input_words), char_embedding_dim, max_word_len+2]
-    max_word_len = max([len(word) for word in input_words])
-    print("max_word_len={}".format(max_word_len))
-    tensor_list = []
-    
-    start_column = torch.ones(char_embedding_dim, 1)
-    end_column = torch.ones(char_embedding_dim, 1)
-
-    for word in input_words:
-        # convert string to word embedding
-        word_encoding = char_embedding_lookup(word, char_embedding, char_table)
-        # add start and end columns
-        word_encoding = torch.cat([start_column, word_encoding, end_column], 1)
-        # zero-pad right columns
-        word_encoding = F.pad(word_encoding, (0, max_word_len-word_encoding.size()[1]+2)).data
-        # create dimension
-        word_encoding = word_encoding.unsqueeze(0)
-
-        tensor_list.append(word_encoding)
-
-    return torch.cat(tensor_list, 0)
-
-
-def read_data(file_name):
-    # Return: list of strings
-    with open(file_name, 'r') as f:
-        corpus = f.read().lower()
-    import re
-    corpus = re.sub(r"<unk>", "unk", corpus)
-    return corpus.split()
-
-
-def get_char_dict(vocabulary):
-    # vocabulary == dict of (word, int)
-    # Return: dict of (char, int), starting from 1
-    char_dict = dict()
-    count = 1
-    for word in vocabulary:
-        for ch in word:
-            if ch not in char_dict:
-                char_dict[ch] = count
-                count += 1
-    return char_dict
-
-
-def create_word_char_dict(*file_name):
-    text = []
-    for file in file_name:
-        text += read_data(file)
-    word_dict = {word:ix for ix, word in enumerate(set(text))}
-    char_dict = get_char_dict(word_dict)
-    return word_dict, char_dict
-
+import torch
+from torch.autograd import Variable
+import torch.nn as nn
+import torch.nn.functional as F
+
+
+
+def batch_generator(x, batch_size):
+    # x: [num_words, in_channel, height, width]
+    # partitions x into batches
+    num_step = x.size()[0] // batch_size
+    for t in range(num_step):
+        yield x[t*batch_size:(t+1)*batch_size]
+
+
+def text2vec(words, char_dict, max_word_len):
+    """ Return list of list of int """
+    word_vec = []
+    for word in words:
+        vec = [char_dict[ch] for ch in word] 
+        if len(vec) < max_word_len:
+            vec += [char_dict["PAD"] for _ in range(max_word_len - len(vec))]
+        vec = [char_dict["BOW"]] + vec + [char_dict["EOW"]]
+        word_vec.append(vec)
+    return word_vec
+
+
+def seq2vec(input_words, char_embedding, char_embedding_dim, char_table):
+    """ convert the input strings into character embeddings """
+    # input_words == list of string
+    # char_embedding == torch.nn.Embedding
+    # char_embedding_dim == int
+    # char_table == list of unique chars
+    # Returns: tensor of shape [len(input_words), char_embedding_dim, max_word_len+2]
+    max_word_len = max([len(word) for word in input_words])
+    print("max_word_len={}".format(max_word_len))
+    tensor_list = []
+    
+    start_column = torch.ones(char_embedding_dim, 1)
+    end_column = torch.ones(char_embedding_dim, 1)
+
+    for word in input_words:
+        # convert string to word embedding
+        word_encoding = char_embedding_lookup(word, char_embedding, char_table)
+        # add start and end columns
+        word_encoding = torch.cat([start_column, word_encoding, end_column], 1)
+        # zero-pad right columns
+        word_encoding = F.pad(word_encoding, (0, max_word_len-word_encoding.size()[1]+2)).data
+        # create dimension
+        word_encoding = word_encoding.unsqueeze(0)
+
+        tensor_list.append(word_encoding)
+
+    return torch.cat(tensor_list, 0)
+
+
+def read_data(file_name):
+    # Return: list of strings
+    with open(file_name, 'r') as f:
+        corpus = f.read().lower()
+    import re
+    corpus = re.sub(r"<unk>", "unk", corpus)
+    return corpus.split()
+
+
+def get_char_dict(vocabulary):
+    # vocabulary == dict of (word, int)
+    # Return: dict of (char, int), starting from 1
+    char_dict = dict()
+    count = 1
+    for word in vocabulary:
+        for ch in word:
+            if ch not in char_dict:
+                char_dict[ch] = count
+                count += 1
+    return char_dict
+
+
+def create_word_char_dict(*file_name):
+    text = []
+    for file in file_name:
+        text += read_data(file)
+    word_dict = {word:ix for ix, word in enumerate(set(text))}
+    char_dict = get_char_dict(word_dict)
+    return word_dict, char_dict
+
--- a/reproduction/Char-aware_NLM/valid.txt
+++ b/reproduction/Char-aware_NLM/valid.txt
--- a/saver/empty.txt
+++ b/saver/empty.txt
--- a/test/test_loader.py
+++ b/test/test_loader.py
@ -0,0 +1,10 @@
+import unittest
+
+
+class MyTestCase(unittest.TestCase):
+    def test_something(self):
+        self.assertEqual(True, False)
+
+
+if __name__ == '__main__':
+    unittest.main()