mirror of
https://gitee.com/fastnlp/fastNLP.git
synced 2024-12-02 04:07:35 +08:00
add TC/MTL16Loader
This commit is contained in:
parent
97b5909f5d
commit
c78811f87f
75
reproduction/text_classification/data/MTL16Loader.py
Normal file
75
reproduction/text_classification/data/MTL16Loader.py
Normal file
@ -0,0 +1,75 @@
|
||||
from fastNLP.io.embed_loader import EmbeddingOption, EmbedLoader
|
||||
from fastNLP.core.vocabulary import VocabularyOption
|
||||
from fastNLP.io.base_loader import DataSetLoader, DataInfo
|
||||
from typing import Union, Dict, List, Iterator
|
||||
from fastNLP import DataSet
|
||||
from fastNLP import Instance
|
||||
from fastNLP import Vocabulary
|
||||
from fastNLP import Const
|
||||
from reproduction.utils import check_dataloader_paths
|
||||
from functools import partial
|
||||
|
||||
class MTL16Loader(DataSetLoader):
|
||||
"""
|
||||
读取MTL16数据集,DataSet包含以下fields:
|
||||
|
||||
words: list(str), 需要分类的文本
|
||||
target: str, 文本的标签
|
||||
|
||||
数据来源:https://pan.baidu.com/s/1c2L6vdA
|
||||
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
super(MTL16Loader, self).__init__()
|
||||
|
||||
def _load(self, path):
|
||||
dataset = DataSet()
|
||||
with open(path, 'r', encoding="utf-8") as f:
|
||||
for line in f:
|
||||
line = line.strip()
|
||||
if not line:
|
||||
continue
|
||||
parts = line.split('\t')
|
||||
target = parts[0]
|
||||
words = parts[1].split()
|
||||
dataset.append(Instance(words=words, target=target))
|
||||
if len(dataset)==0:
|
||||
raise RuntimeError(f"{path} has no valid data.")
|
||||
|
||||
return dataset
|
||||
|
||||
def process(self,
|
||||
paths: Union[str, Dict[str, str]],
|
||||
src_vocab_opt: VocabularyOption = None,
|
||||
tgt_vocab_opt: VocabularyOption = None,
|
||||
src_embed_opt: EmbeddingOption = None):
|
||||
|
||||
paths = check_dataloader_paths(paths)
|
||||
datasets = {}
|
||||
info = DataInfo()
|
||||
for name, path in paths.items():
|
||||
dataset = self.load(path)
|
||||
datasets[name] = dataset
|
||||
|
||||
src_vocab = Vocabulary() if src_vocab_opt is None else Vocabulary(**src_vocab_opt)
|
||||
src_vocab.from_dataset(datasets['train'], field_name='words')
|
||||
src_vocab.index_dataset(*datasets.values(), field_name='words')
|
||||
|
||||
tgt_vocab = Vocabulary(unknown=None, padding=None) \
|
||||
if tgt_vocab_opt is None else Vocabulary(**tgt_vocab_opt)
|
||||
tgt_vocab.from_dataset(datasets['train'], field_name='target')
|
||||
tgt_vocab.index_dataset(*datasets.values(), field_name='target')
|
||||
|
||||
info.vocabs = {
|
||||
"words": src_vocab,
|
||||
"target": tgt_vocab
|
||||
}
|
||||
|
||||
info.datasets = datasets
|
||||
|
||||
if src_embed_opt is not None:
|
||||
embed = EmbedLoader.load_with_vocab(**src_embed_opt, vocab=src_vocab)
|
||||
info.embeddings['words'] = embed
|
||||
|
||||
return info
|
10
reproduction/text_classification/test/sample_MTL16.txt
Normal file
10
reproduction/text_classification/test/sample_MTL16.txt
Normal file
@ -0,0 +1,10 @@
|
||||
1 the only thing better than these sunglasses is the customer service i got , after i dropped and broke the lenses on these i called 80 's purple and they actually sent me out a replacement free of charge . i was blown away
|
||||
0 this light worked for one day . i should have known better because in the past , i bought a tap light , and it worked for only a few days , too . do n't waste your money
|
||||
1 i 've tried 6 different nursing bras . this one , with the center snap closure , is the easiest to use . it is also the lightest and most comfortable , while providing good support . my only complaint is that after about 50 washes the underwire begins to poke free from the fabric . even when i try to sew it back into place , it breaks loose after a few washes . perhaps if i handwashed the bra instead of using a machine , it would last longer . this bra is less durabe than my other nursing bras ( particularly the leading lady bra , which seems to be indestructible ) , but it is well worth the sacrifice for comfort , lightness , and ease of use . it is by far my favorite
|
||||
0 i have had my bag for a couple of months . the liner on the inside has already ripped
|
||||
0 the photo is quite deceiving . this suit is made out of cheap polyester fabric that looks cheap , shiny , and is horrible to the touch . my three year olds hate the uncomfortable stiffness . spend the extra money for a decent fabric that is actually practical for a toddler if they really need a suit
|
||||
1 i had bought a bra of this model at a discount store , just got lucky . it quickly became my favorite , and i was glad to find it at amazon .
|
||||
0 lookslike it would be a nice product , but it 's only for very small babies up to 12 pounds and 23 inches . my baby is very long and just does n't fit - wish target/amazon would have been more upfront with the sizing
|
||||
0 i purchased the non-premium kit ( $ 9.99 ) with a silicone skin case cover and 2 screen protectors ( one for each screen ) , but it is the same case . the problem is that the silicone skin cover is slippery , twice as slippery as the nintendo lite without the cover . we thought that washing them in dove dish soap would wash away the slipperyness , but that did n't work . after handling the cover , your hands have a slippery residue on them . the other issue is that the cover is so thin that it is little more than scratch protection , not impact protection . the screen covers that come with the non-premium kit are ok , i guess , but one of them had 2 defect particles that were raised ( trust me , the screen was clean ) . i purchased 2 kits , and i had one screen protector defect and my wife accidentally broke one of the silicone covers hinge straps with little effort . i do not recommend this product at all
|
||||
1 good quality jeans at an affordable price . size is just right , quite comfortable
|
||||
0 not the best fabric , scratchy and see thru . you get what you pay for on these
|
10
reproduction/text_classification/test/test_MTL16Loader.py
Normal file
10
reproduction/text_classification/test/test_MTL16Loader.py
Normal file
@ -0,0 +1,10 @@
|
||||
import unittest
|
||||
from reproduction.text_classification.data.MTL16Loader import MTL16Loader
|
||||
|
||||
|
||||
class TestDataLoader(unittest.TestCase):
|
||||
def test_MTL16Loader(self):
|
||||
loader = MTL16Loader()
|
||||
data = loader.process('sample_MTL16.txt')
|
||||
print(data.datasets)
|
||||
|
Loading…
Reference in New Issue
Block a user