mirror of
https://gitee.com/dromara/easyAi.git
synced 2024-11-30 02:37:42 +08:00
修改词
This commit is contained in:
parent
00d8124864
commit
f755e929fa
@ -77,10 +77,10 @@ public class Talk {
|
||||
List<String> words = wordTimes.get(i);
|
||||
nub = getNub(words, keyWords.get(i));
|
||||
if (nub == 0) {//出现了不认识的词
|
||||
System.out.println("不认识的词:" + keyWords.get(i));
|
||||
// System.out.println("不认识的词:" + keyWords.get(i));
|
||||
wrong++;
|
||||
} else {
|
||||
System.out.println("认识的词:" + keyWords.get(i));
|
||||
//System.out.println("认识的词:" + keyWords.get(i));
|
||||
}
|
||||
}
|
||||
features.add(nub);
|
||||
|
@ -1,7 +1,7 @@
|
||||
package org.wlld.naturalLanguage;
|
||||
|
||||
public class WordConst {
|
||||
public static double Word_Noise = 0.7;//收缩程度
|
||||
public static double Word_Noise = 0.65;//收缩程度
|
||||
public static final int Water = 2;//送水
|
||||
public static final int Nanny = 3;//保姆
|
||||
public static final int Unlock = 4;//开锁
|
||||
|
@ -23,10 +23,28 @@ public class WordTemple {
|
||||
private boolean isSplitWord = false;//是否使用拆分词模式,默认是不使用
|
||||
//生成语句的参数
|
||||
private int wordVectorDimension = 25;//词向量嵌入维度
|
||||
private double studyPoint = 0.01;//词向量学习学习率0.02
|
||||
private double studyPoint = 0.008;//词向量学习学习率0.02
|
||||
private double lParam = 0.002;//词向量正则系数0.04
|
||||
private boolean showLog = false;//是否打印学习数据
|
||||
private int maxWordNumber = 40;//语句最大字数
|
||||
private int maxSplitSize = 3;//最大切词数
|
||||
private int maxWordSize = 12;//最大字数
|
||||
|
||||
public int getMaxSplitSize() {
|
||||
return maxSplitSize;
|
||||
}
|
||||
|
||||
public void setMaxSplitSize(int maxSplitSize) {
|
||||
this.maxSplitSize = maxSplitSize;
|
||||
}
|
||||
|
||||
public int getMaxWordSize() {
|
||||
return maxWordSize;
|
||||
}
|
||||
|
||||
public void setMaxWordSize(int maxWordSize) {
|
||||
this.maxWordSize = maxWordSize;
|
||||
}
|
||||
|
||||
public int getMaxWordNumber() {
|
||||
return maxWordNumber;
|
||||
|
@ -15,21 +15,26 @@ public class SentenceCreator {//语言生成器
|
||||
private List<String> wordList = new ArrayList<>();//模型
|
||||
private NerveManager nerveManager;//模型
|
||||
private int maxWordNumber;
|
||||
private int maxId;//id最大值
|
||||
private WordTemple wordTemple;
|
||||
|
||||
public void initModel(WordTemple wordTemple, CreatorSentenceModel creatorSentenceModel) throws Exception {
|
||||
this.wordTemple = wordTemple;
|
||||
List<String> modelList = creatorSentenceModel.getWordList();
|
||||
int size = modelList.size();
|
||||
for (int i = 0; i < size; i++) {
|
||||
wordList.add(modelList.get(i));
|
||||
}
|
||||
nerveManager = new NerveManager(wordList.size(), wordTemple.getWordVectorDimension(), wordList.size()
|
||||
, 1, new Tanh(), false, wordTemple.getStudyPoint(), RZ.NOT_RZ, 0);
|
||||
maxId = wordList.size() + 1;
|
||||
nerveManager = new NerveManager(wordList.size(), wordTemple.getWordVectorDimension(), wordList.size() + 1
|
||||
, 1, new Tanh(), false, wordTemple.getStudyPoint(), RZ.L1, 0);
|
||||
nerveManager.init(true, false, wordTemple.isShowLog(), true, 0, 0);
|
||||
maxWordNumber = wordTemple.getMaxWordNumber();
|
||||
nerveManager.insertModelParameter(creatorSentenceModel.getModelParameter());
|
||||
}
|
||||
|
||||
public void initFirst(List<String> sentenceList, WordTemple wordTemple) throws Exception {
|
||||
this.wordTemple = wordTemple;
|
||||
this.sentenceList = anySort(sentenceList);
|
||||
int size = this.sentenceList.size();
|
||||
maxWordNumber = wordTemple.getMaxWordNumber();
|
||||
@ -40,8 +45,9 @@ public class SentenceCreator {//语言生成器
|
||||
for (String word : wordSet) {
|
||||
wordList.add(word);
|
||||
}
|
||||
nerveManager = new NerveManager(wordList.size(), wordTemple.getWordVectorDimension(), wordList.size()
|
||||
, 1, new Tanh(), false, wordTemple.getStudyPoint(), RZ.NOT_RZ, 0);
|
||||
maxId = wordList.size() + 1;
|
||||
nerveManager = new NerveManager(wordList.size(), wordTemple.getWordVectorDimension(), wordList.size() + 1
|
||||
, 1, new Tanh(), false, wordTemple.getStudyPoint(), RZ.L1, 0);
|
||||
nerveManager.init(true, false, wordTemple.isShowLog(), true, 0, 0);
|
||||
}
|
||||
|
||||
@ -66,28 +72,32 @@ public class SentenceCreator {//语言生成器
|
||||
}
|
||||
|
||||
public String fill(String sentence, Talk talk) throws Exception {
|
||||
int splitSize = talk.getSplitWord(sentence).size();//切词数量
|
||||
boolean isFill = splitSize < 5 && sentence.length() < 10;
|
||||
int splitSize = talk.getSplitWord(sentence).get(0).size();//切词数量
|
||||
boolean isFill = splitSize < wordTemple.getMaxSplitSize() && sentence.length() < wordTemple.getMaxWordSize();
|
||||
String upWord = null;
|
||||
while (isFill) {
|
||||
CreatorWord creatorWord = new CreatorWord();
|
||||
double[] feature = getFeature(sentence);
|
||||
studyDNN(feature, 0, creatorWord, false);
|
||||
int id = creatorWord.getId() - 1;
|
||||
String word = wordList.get(id);//终止条件1 字数 2,拆词
|
||||
if (upWord == null) {
|
||||
upWord = word;
|
||||
} else {
|
||||
if (upWord.equals(word)) {
|
||||
isFill = false;
|
||||
} else {
|
||||
if (creatorWord.getId() < maxId) {
|
||||
int id = creatorWord.getId() - 1;
|
||||
String word = wordList.get(id);//终止条件1 字数 2,拆词
|
||||
if (upWord == null) {
|
||||
upWord = word;
|
||||
} else {
|
||||
if (upWord.equals(word)) {
|
||||
isFill = false;
|
||||
} else {
|
||||
upWord = word;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (isFill) {
|
||||
sentence = word + sentence;
|
||||
splitSize = talk.getSplitWord(sentence).size();//切词数量
|
||||
isFill = splitSize < 5 && sentence.length() < 10;
|
||||
if (isFill) {
|
||||
sentence = word + sentence;
|
||||
splitSize = talk.getSplitWord(sentence).get(0).size();//切词数量
|
||||
isFill = splitSize < wordTemple.getMaxSplitSize() && sentence.length() < wordTemple.getMaxWordSize();
|
||||
}
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
return sentence;
|
||||
@ -97,6 +107,7 @@ public class SentenceCreator {//语言生成器
|
||||
int index = 1;
|
||||
for (String sentence : sentenceList) {
|
||||
System.out.println("i===" + index);
|
||||
studyDNN(getFeature(sentence), maxId, null, true);//终结态
|
||||
for (int i = 0; i < sentence.length() - 1; i++) {
|
||||
String response = sentence.substring(i, i + 1);//单字
|
||||
String request = sentence.substring(i + 1);//后缀
|
||||
|
@ -88,7 +88,7 @@ public class RandomForest {
|
||||
//一棵树属性的数量
|
||||
if (dataTable.getSize() > 4) {
|
||||
int kNub = (int) ArithUtil.div(Math.log(dataTable.getSize()), Math.log(2));
|
||||
//int kNub = dataTable.getSize() - 1;
|
||||
//int kNub = dataTable.getSize() / 2;
|
||||
// System.out.println("knNub==" + kNub);
|
||||
for (int i = 0; i < forest.length; i++) {
|
||||
Tree tree = new Tree(getRandomData(dataTable, kNub), trustPunishment);
|
||||
|
Loading…
Reference in New Issue
Block a user