修改词

This commit is contained in:
794757862@qq.com 2022-12-01 19:39:23 +08:00
parent 00d8124864
commit f755e929fa
5 changed files with 53 additions and 24 deletions

View File

@ -77,10 +77,10 @@ public class Talk {
List<String> words = wordTimes.get(i);
nub = getNub(words, keyWords.get(i));
if (nub == 0) {//出现了不认识的词
System.out.println("不认识的词:" + keyWords.get(i));
// System.out.println("不认识的词:" + keyWords.get(i));
wrong++;
} else {
System.out.println("认识的词:" + keyWords.get(i));
//System.out.println("认识的词:" + keyWords.get(i));
}
}
features.add(nub);

View File

@ -1,7 +1,7 @@
package org.wlld.naturalLanguage;
public class WordConst {
public static double Word_Noise = 0.7;//收缩程度
public static double Word_Noise = 0.65;//收缩程度
public static final int Water = 2;//送水
public static final int Nanny = 3;//保姆
public static final int Unlock = 4;//开锁

View File

@ -23,10 +23,28 @@ public class WordTemple {
private boolean isSplitWord = false;//是否使用拆分词模式,默认是不使用
//生成语句的参数
private int wordVectorDimension = 25;//词向量嵌入维度
private double studyPoint = 0.01;//词向量学习学习率0.02
private double studyPoint = 0.008;//词向量学习学习率0.02
private double lParam = 0.002;//词向量正则系数0.04
private boolean showLog = false;//是否打印学习数据
private int maxWordNumber = 40;//语句最大字数
private int maxSplitSize = 3;//最大切词数
private int maxWordSize = 12;//最大字数
public int getMaxSplitSize() {
return maxSplitSize;
}
public void setMaxSplitSize(int maxSplitSize) {
this.maxSplitSize = maxSplitSize;
}
public int getMaxWordSize() {
return maxWordSize;
}
public void setMaxWordSize(int maxWordSize) {
this.maxWordSize = maxWordSize;
}
public int getMaxWordNumber() {
return maxWordNumber;

View File

@ -15,21 +15,26 @@ public class SentenceCreator {//语言生成器
private List<String> wordList = new ArrayList<>();//模型
private NerveManager nerveManager;//模型
private int maxWordNumber;
private int maxId;//id最大值
private WordTemple wordTemple;
public void initModel(WordTemple wordTemple, CreatorSentenceModel creatorSentenceModel) throws Exception {
this.wordTemple = wordTemple;
List<String> modelList = creatorSentenceModel.getWordList();
int size = modelList.size();
for (int i = 0; i < size; i++) {
wordList.add(modelList.get(i));
}
nerveManager = new NerveManager(wordList.size(), wordTemple.getWordVectorDimension(), wordList.size()
, 1, new Tanh(), false, wordTemple.getStudyPoint(), RZ.NOT_RZ, 0);
maxId = wordList.size() + 1;
nerveManager = new NerveManager(wordList.size(), wordTemple.getWordVectorDimension(), wordList.size() + 1
, 1, new Tanh(), false, wordTemple.getStudyPoint(), RZ.L1, 0);
nerveManager.init(true, false, wordTemple.isShowLog(), true, 0, 0);
maxWordNumber = wordTemple.getMaxWordNumber();
nerveManager.insertModelParameter(creatorSentenceModel.getModelParameter());
}
public void initFirst(List<String> sentenceList, WordTemple wordTemple) throws Exception {
this.wordTemple = wordTemple;
this.sentenceList = anySort(sentenceList);
int size = this.sentenceList.size();
maxWordNumber = wordTemple.getMaxWordNumber();
@ -40,8 +45,9 @@ public class SentenceCreator {//语言生成器
for (String word : wordSet) {
wordList.add(word);
}
nerveManager = new NerveManager(wordList.size(), wordTemple.getWordVectorDimension(), wordList.size()
, 1, new Tanh(), false, wordTemple.getStudyPoint(), RZ.NOT_RZ, 0);
maxId = wordList.size() + 1;
nerveManager = new NerveManager(wordList.size(), wordTemple.getWordVectorDimension(), wordList.size() + 1
, 1, new Tanh(), false, wordTemple.getStudyPoint(), RZ.L1, 0);
nerveManager.init(true, false, wordTemple.isShowLog(), true, 0, 0);
}
@ -66,28 +72,32 @@ public class SentenceCreator {//语言生成器
}
public String fill(String sentence, Talk talk) throws Exception {
int splitSize = talk.getSplitWord(sentence).size();//切词数量
boolean isFill = splitSize < 5 && sentence.length() < 10;
int splitSize = talk.getSplitWord(sentence).get(0).size();//切词数量
boolean isFill = splitSize < wordTemple.getMaxSplitSize() && sentence.length() < wordTemple.getMaxWordSize();
String upWord = null;
while (isFill) {
CreatorWord creatorWord = new CreatorWord();
double[] feature = getFeature(sentence);
studyDNN(feature, 0, creatorWord, false);
int id = creatorWord.getId() - 1;
String word = wordList.get(id);//终止条件1 字数 2拆词
if (upWord == null) {
upWord = word;
} else {
if (upWord.equals(word)) {
isFill = false;
} else {
if (creatorWord.getId() < maxId) {
int id = creatorWord.getId() - 1;
String word = wordList.get(id);//终止条件1 字数 2拆词
if (upWord == null) {
upWord = word;
} else {
if (upWord.equals(word)) {
isFill = false;
} else {
upWord = word;
}
}
}
if (isFill) {
sentence = word + sentence;
splitSize = talk.getSplitWord(sentence).size();//切词数量
isFill = splitSize < 5 && sentence.length() < 10;
if (isFill) {
sentence = word + sentence;
splitSize = talk.getSplitWord(sentence).get(0).size();//切词数量
isFill = splitSize < wordTemple.getMaxSplitSize() && sentence.length() < wordTemple.getMaxWordSize();
}
} else {
break;
}
}
return sentence;
@ -97,6 +107,7 @@ public class SentenceCreator {//语言生成器
int index = 1;
for (String sentence : sentenceList) {
System.out.println("i===" + index);
studyDNN(getFeature(sentence), maxId, null, true);//终结态
for (int i = 0; i < sentence.length() - 1; i++) {
String response = sentence.substring(i, i + 1);//单字
String request = sentence.substring(i + 1);//后缀

View File

@ -88,7 +88,7 @@ public class RandomForest {
//一棵树属性的数量
if (dataTable.getSize() > 4) {
int kNub = (int) ArithUtil.div(Math.log(dataTable.getSize()), Math.log(2));
//int kNub = dataTable.getSize() - 1;
//int kNub = dataTable.getSize() / 2;
// System.out.println("knNub==" + kNub);
for (int i = 0; i < forest.length; i++) {
Tree tree = new Tree(getRandomData(dataTable, kNub), trustPunishment);