diff --git a/.idea/deployment.xml b/.idea/deployment.xml new file mode 100644 index 00000000..820375f1 --- /dev/null +++ b/.idea/deployment.xml @@ -0,0 +1,4 @@ + + + + \ No newline at end of file diff --git a/.idea/fastNLP.iml b/.idea/fastNLP.iml new file mode 100644 index 00000000..67116063 --- /dev/null +++ b/.idea/fastNLP.iml @@ -0,0 +1,11 @@ + + + + + + + + + + \ No newline at end of file diff --git a/.idea/misc.xml b/.idea/misc.xml new file mode 100644 index 00000000..470d1301 --- /dev/null +++ b/.idea/misc.xml @@ -0,0 +1,4 @@ + + + + \ No newline at end of file diff --git a/.idea/modules.xml b/.idea/modules.xml new file mode 100644 index 00000000..8427b4d7 --- /dev/null +++ b/.idea/modules.xml @@ -0,0 +1,8 @@ + + + + + + + + \ No newline at end of file diff --git a/.idea/vcs.xml b/.idea/vcs.xml new file mode 100644 index 00000000..94a25f7f --- /dev/null +++ b/.idea/vcs.xml @@ -0,0 +1,6 @@ + + + + + + \ No newline at end of file diff --git a/README.md b/README.md index d8dbb410..840c7a6a 100644 --- a/README.md +++ b/README.md @@ -1,2 +1,2 @@ # FastNLP -FastNLP +FastNLP \ No newline at end of file diff --git a/model/empty.txt b/action/README.md similarity index 93% rename from model/empty.txt rename to action/README.md index 942340d2..af0e39c3 100644 --- a/model/empty.txt +++ b/action/README.md @@ -1,4 +1,3 @@ -Some useful reference: SpaCy "Doc" https://github.com/explosion/spaCy/blob/75d2a05c2938f412f0fae44748374e4de19cc2be/spacy/tokens/doc.pyx#L80 diff --git a/action/action.py b/action/action.py index 40ec3142..8473a1a2 100644 --- a/action/action.py +++ b/action/action.py @@ -8,10 +8,10 @@ class Action(object): self.logger = None def load_config(self, args): - pass + raise NotImplementedError def load_dataset(self, args): - pass + raise NotImplementedError def log(self, args): self.logger.log(args) @@ -22,7 +22,7 @@ class Action(object): def batchify(self, X, Y=None): # a generator - pass + raise NotImplementedError def make_log(self, *args): - pass \ No newline at end of file + raise NotImplementedError diff --git a/action/tester.py b/action/tester.py index 0b78a782..591d75ce 100644 --- a/action/tester.py +++ b/action/tester.py @@ -29,7 +29,7 @@ class Tester(Action): for step in range(iterations): batch_x, batch_y = test_batch_generator.__next__() - # forward pass from test input to predicted output + # forward pass from tests input to predicted output prediction = network.data_forward(batch_x) # get the loss diff --git a/action/trainer.py b/action/trainer.py index 9ea27114..f4429a98 100644 --- a/action/trainer.py +++ b/action/trainer.py @@ -11,4 +11,4 @@ class Trainer(Action): self.arg = arg def train(self, args): - pass + raise NotImplementedError diff --git a/loader/config_loader.py b/loader/config_loader.py index b9cc0a43..fa1d446d 100644 --- a/loader/config_loader.py +++ b/loader/config_loader.py @@ -10,5 +10,4 @@ class ConfigLoader(BaseLoader): @staticmethod def parse(string): - # To do - return string + raise NotImplementedError diff --git a/model/base_model.py b/model/base_model.py new file mode 100644 index 00000000..3298c3d6 --- /dev/null +++ b/model/base_model.py @@ -0,0 +1,20 @@ +class BaseModel(object): + """base model for all models""" + + def __init__(self): + pass + + def prepare_input(self, data): + raise NotImplementedError + + def mode(self, test=False): + raise NotImplementedError + + def data_forward(self, x): + raise NotImplementedError + + def grad_backward(self): + raise NotImplementedError + + def loss(self, pred, truth): + raise NotImplementedError diff --git a/reproduction/CNN-sentence_classification/train.py b/reproduction/CNN-sentence_classification/train.py index 8b3801d2..ca4ea96e 100644 --- a/reproduction/CNN-sentence_classification/train.py +++ b/reproduction/CNN-sentence_classification/train.py @@ -1,17 +1,12 @@ import os -import torch + +import +import torch import torch.nn as nn -import torchvision.datasets as dsets -import torchvision.transforms as transforms -import dataset as dst -from model import CNN_text +.dataset as dst +from .model import CNN_text from torch.autograd import Variable -from sklearn import cross_validation -from sklearn import datasets - - - # Hyper Parameters batch_size = 50 learning_rate = 0.0001 @@ -51,8 +46,7 @@ if cuda: criterion = nn.CrossEntropyLoss() optimizer = torch.optim.Adam(cnn.parameters(), lr=learning_rate) - -#train and test +# train and tests best_acc = None for epoch in range(num_epochs): diff --git a/reproduction/Char-aware_NLM/test.py b/reproduction/Char-aware_NLM/test.py index 762ccf55..35e7ac54 100644 --- a/reproduction/Char-aware_NLM/test.py +++ b/reproduction/Char-aware_NLM/test.py @@ -1,13 +1,13 @@ import os -import torch -from torch.autograd import Variable -import torch.nn as nn -import torch.nn.functional as F -import numpy as np -from model import charLM -from utilities import * from collections import namedtuple +import numpy as np +import torch +import torch.nn as nn +from torch.autograd import Variable +from utilities import * + + def to_var(x): if torch.cuda.is_available(): x = x.cuda() @@ -76,18 +76,18 @@ if __name__ == "__main__": if os.path.exists("cache/data_sets.pt") is False: - - test_text = read_data("./test.txt") + + test_text = read_data("./tests.txt") test_set = np.array(text2vec(test_text, char_dict, max_word_len)) # Labels are next-word index in word_dict with the same length as inputs test_label = np.array([word_dict[w] for w in test_text[1:]] + [word_dict[test_text[-1]]]) - category = {"test": test_set, "tlabel":test_label} + category = {"tests": test_set, "tlabel": test_label} torch.save(category, "cache/data_sets.pt") else: data_sets = torch.load("cache/data_sets.pt") - test_set = data_sets["test"] + test_set = data_sets["tests"] test_label = data_sets["tlabel"] train_set = data_sets["tdata"] train_label = data_sets["trlabel"] diff --git a/reproduction/Char-aware_NLM/train.py b/reproduction/Char-aware_NLM/train.py index 99edb3c6..044786fe 100644 --- a/reproduction/Char-aware_NLM/train.py +++ b/reproduction/Char-aware_NLM/train.py @@ -13,8 +13,7 @@ from .utilities import * def preprocess(): - - word_dict, char_dict = create_word_char_dict("valid.txt", "train.txt", "test.txt") + word_dict, char_dict = create_word_char_dict("valid.txt", "train.txt", "tests.txt") num_words = len(word_dict) num_char = len(char_dict) char_dict["BOW"] = num_char+1 @@ -195,7 +194,7 @@ if __name__=="__main__": if os.path.exists("cache/data_sets.pt") is False: train_text = read_data("./train.txt") valid_text = read_data("./valid.txt") - test_text = read_data("./test.txt") + test_text = read_data("./tests.txt") train_set = np.array(text2vec(train_text, char_dict, max_word_len)) valid_set = np.array(text2vec(valid_text, char_dict, max_word_len)) @@ -206,14 +205,14 @@ if __name__=="__main__": valid_label = np.array([word_dict[w] for w in valid_text[1:]] + [word_dict[valid_text[-1]]]) test_label = np.array([word_dict[w] for w in test_text[1:]] + [word_dict[test_text[-1]]]) - category = {"tdata":train_set, "vdata":valid_set, "test": test_set, + category = {"tdata": train_set, "vdata": valid_set, "tests": test_set, "trlabel":train_label, "vlabel":valid_label, "tlabel":test_label} torch.save(category, "cache/data_sets.pt") else: data_sets = torch.load("cache/data_sets.pt") train_set = data_sets["tdata"] valid_set = data_sets["vdata"] - test_set = data_sets["test"] + test_set = data_sets["tests"] train_label = data_sets["trlabel"] valid_label = data_sets["vlabel"] test_label = data_sets["tlabel"] diff --git a/saver/base_saver.py b/saver/base_saver.py index d89e0935..d721da2c 100644 --- a/saver/base_saver.py +++ b/saver/base_saver.py @@ -5,10 +5,10 @@ class BaseSaver(object): self.save_path = save_path def save_bytes(self): - pass + raise NotImplementedError def save_str(self): - pass + raise NotImplementedError def compress(self): - pass + raise NotImplementedError diff --git a/saver/logger.py b/saver/logger.py index 9ff66866..d6af6f6a 100644 --- a/saver/logger.py +++ b/saver/logger.py @@ -8,4 +8,4 @@ class Logger(BaseSaver): super(Logger, self).__init__(save_path) def log(self, string): - pass + raise NotImplementedError diff --git a/test/test_loader.py b/tests/test_loader.py similarity index 100% rename from test/test_loader.py rename to tests/test_loader.py