mirror of
https://gitee.com/dromara/easyAi.git
synced 2024-11-29 18:27:37 +08:00
transFormer增加模型获取与注入
This commit is contained in:
parent
c381437360
commit
f0beccab0b
@ -4,6 +4,7 @@ import org.wlld.function.ReLu;
|
||||
import org.wlld.i.OutBack;
|
||||
import org.wlld.matrixTools.Matrix;
|
||||
import org.wlld.matrixTools.MatrixOperation;
|
||||
import org.wlld.transFormer.model.CodecBlockModel;
|
||||
import org.wlld.transFormer.nerve.HiddenNerve;
|
||||
import org.wlld.transFormer.nerve.Nerve;
|
||||
import org.wlld.transFormer.seflAttention.LayNorm;
|
||||
@ -24,11 +25,39 @@ public class CodecBlock {
|
||||
private CodecBlock afterEncoderBlock;//后编码模块
|
||||
private CodecBlock beforeEncoderBlock;//前编码模块
|
||||
private CodecBlock lastEncoderBlock;//最后一层编码器
|
||||
private Map<Long, Matrix> outMatrixMap = new HashMap<>();
|
||||
private final Map<Long, Matrix> outMatrixMap = new HashMap<>();
|
||||
private final boolean encoder;//是否为编码器
|
||||
private LineBlock lineBlock;//解码器最后的线性分类器
|
||||
private FirstDecoderBlock firstDecoderBlock;//解码器第一层
|
||||
|
||||
public CodecBlockModel getModel() {
|
||||
List<double[][]> firstNerveModel = new ArrayList<>();
|
||||
List<double[][]> secondNerveModel = new ArrayList<>();
|
||||
for (int i = 0; i < fistHiddenNerves.size(); i++) {
|
||||
firstNerveModel.add(fistHiddenNerves.get(i).getModel());
|
||||
secondNerveModel.add(secondHiddenNerves.get(i).getModel());
|
||||
}
|
||||
CodecBlockModel codecBlockModel = new CodecBlockModel();
|
||||
codecBlockModel.setMultiSelfAttentionModel(multiSelfAttention.getModel());
|
||||
codecBlockModel.setAttentionLayNormModel(attentionLayNorm.getModel());
|
||||
codecBlockModel.setFistNervesModel(firstNerveModel);
|
||||
codecBlockModel.setSecondNervesModel(secondNerveModel);
|
||||
codecBlockModel.setLineLayNormModel(lineLayNorm.getModel());
|
||||
return codecBlockModel;
|
||||
}
|
||||
|
||||
public void insertModel(CodecBlockModel codecBlockModel) throws Exception {
|
||||
multiSelfAttention.insertModel(codecBlockModel.getMultiSelfAttentionModel());
|
||||
attentionLayNorm.insertModel(codecBlockModel.getAttentionLayNormModel());
|
||||
List<double[][]> firstNerveModel = codecBlockModel.getFistNervesModel();
|
||||
List<double[][]> secondNerveModel = codecBlockModel.getSecondNervesModel();
|
||||
for (int i = 0; i < fistHiddenNerves.size(); i++) {
|
||||
fistHiddenNerves.get(i).insertModel(firstNerveModel.get(i));
|
||||
secondHiddenNerves.get(i).insertModel(secondNerveModel.get(i));
|
||||
}
|
||||
lineLayNorm.insertModel(codecBlockModel.getLineLayNormModel());
|
||||
}
|
||||
|
||||
public void setFirstDecoderBlock(FirstDecoderBlock firstDecoderBlock) {
|
||||
this.firstDecoderBlock = firstDecoderBlock;
|
||||
}
|
||||
@ -117,14 +146,14 @@ public class CodecBlock {
|
||||
List<Nerve> secondNerves = new ArrayList<>();
|
||||
for (int i = 0; i < featureDimension; i++) {
|
||||
HiddenNerve hiddenNerve1 = new HiddenNerve(i + 1, 1, studyPoint, new ReLu(), featureDimension,
|
||||
featureDimension, true, null);
|
||||
featureDimension, null);
|
||||
fistHiddenNerves.add(hiddenNerve1);
|
||||
hiddenNerve1.setAfterLayNorm(attentionLayNorm);
|
||||
firstNerves.add(hiddenNerve1);
|
||||
}
|
||||
for (int i = 0; i < featureDimension; i++) {
|
||||
HiddenNerve hiddenNerve2 = new HiddenNerve(i + 1, 2, studyPoint, null,
|
||||
featureDimension, 1, true, null);
|
||||
featureDimension, 1, null);
|
||||
hiddenNerve2.setBeforeLayNorm(lineLayNorm);
|
||||
secondHiddenNerves.add(hiddenNerve2);
|
||||
secondNerves.add(hiddenNerve2);
|
||||
|
@ -2,6 +2,7 @@ package org.wlld.transFormer;
|
||||
|
||||
import org.wlld.i.OutBack;
|
||||
import org.wlld.matrixTools.Matrix;
|
||||
import org.wlld.transFormer.model.FirstDecoderModel;
|
||||
import org.wlld.transFormer.seflAttention.LayNorm;
|
||||
import org.wlld.transFormer.seflAttention.MultiSelfAttention;
|
||||
|
||||
@ -29,6 +30,18 @@ public class FirstDecoderBlock {//解码器模块
|
||||
this.codecBlock = codecBlock;
|
||||
}
|
||||
|
||||
public FirstDecoderModel getModel() {
|
||||
FirstDecoderModel firstDecoderModel = new FirstDecoderModel();
|
||||
firstDecoderModel.setMultiSelfAttentionModel(multiSelfAttention.getModel());
|
||||
firstDecoderModel.setAttentionLayNormModel(attentionLayNorm.getModel());
|
||||
return firstDecoderModel;
|
||||
}
|
||||
|
||||
public void insertModel(FirstDecoderModel firstDecoderModel) throws Exception {
|
||||
multiSelfAttention.insertModel(firstDecoderModel.getMultiSelfAttentionModel());
|
||||
attentionLayNorm.insertModel(firstDecoderModel.getAttentionLayNormModel());
|
||||
}
|
||||
|
||||
public void backError(long eventID, Matrix error) throws Exception {
|
||||
attentionLayNorm.backErrorFromLine(error, eventID);
|
||||
lastEncoderBlock.encoderBackStart(eventID);
|
||||
|
@ -4,6 +4,7 @@ import org.wlld.function.Tanh;
|
||||
import org.wlld.i.OutBack;
|
||||
import org.wlld.matrixTools.Matrix;
|
||||
import org.wlld.matrixTools.MatrixOperation;
|
||||
import org.wlld.transFormer.model.LineBlockModel;
|
||||
import org.wlld.transFormer.nerve.HiddenNerve;
|
||||
import org.wlld.transFormer.nerve.Nerve;
|
||||
import org.wlld.transFormer.nerve.OutNerve;
|
||||
@ -20,6 +21,32 @@ public class LineBlock {//线性层模块
|
||||
private final int featureDimension;
|
||||
private int backNumber = 0;//误差返回次数
|
||||
|
||||
public LineBlockModel getModel() {
|
||||
LineBlockModel lineBlockModel = new LineBlockModel();
|
||||
List<double[][]> hiddenNerveModel = new ArrayList<>();
|
||||
List<double[][]> outNerveModel = new ArrayList<>();
|
||||
for (HiddenNerve hiddenNerve : hiddenNerveList) {
|
||||
hiddenNerveModel.add(hiddenNerve.getModel());
|
||||
}
|
||||
for (OutNerve outNerve : outNerveList) {
|
||||
outNerveModel.add(outNerve.getModel());
|
||||
}
|
||||
lineBlockModel.setHiddenNervesModel(hiddenNerveModel);
|
||||
lineBlockModel.setOutNervesModel(outNerveModel);
|
||||
return lineBlockModel;
|
||||
}
|
||||
|
||||
public void insertModel(LineBlockModel lineBlockModel) throws Exception {
|
||||
List<double[][]> hiddenNerveModel = lineBlockModel.getHiddenNervesModel();
|
||||
List<double[][]> outNerveModel = lineBlockModel.getOutNervesModel();
|
||||
for (int i = 0; i < hiddenNerveList.size(); i++) {
|
||||
hiddenNerveList.get(i).insertModel(hiddenNerveModel.get(i));
|
||||
}
|
||||
for (int i = 0; i < outNerveList.size(); i++) {
|
||||
outNerveList.get(i).insertModel(outNerveModel.get(i));
|
||||
}
|
||||
}
|
||||
|
||||
public LineBlock(int typeNumber, int featureDimension, double studyPoint, CodecBlock lastCodecBlock, boolean showLog) throws Exception {
|
||||
this.featureDimension = featureDimension;
|
||||
this.lastCodecBlock = lastCodecBlock;
|
||||
@ -28,7 +55,7 @@ public class LineBlock {//线性层模块
|
||||
List<Nerve> hiddenNerves = new ArrayList<>();
|
||||
for (int i = 0; i < featureDimension; i++) {
|
||||
HiddenNerve hiddenNerve = new HiddenNerve(i + 1, 1, studyPoint, new Tanh(), featureDimension,
|
||||
typeNumber, false, this);
|
||||
typeNumber, this);
|
||||
hiddenNerves.add(hiddenNerve);
|
||||
hiddenNerveList.add(hiddenNerve);
|
||||
}
|
||||
|
@ -1,6 +1,7 @@
|
||||
package org.wlld.transFormer;
|
||||
|
||||
import org.wlld.naturalLanguage.word.WordEmbedding;
|
||||
import org.wlld.transFormer.model.CodecBlockModel;
|
||||
import org.wlld.transFormer.model.TransFormerModel;
|
||||
import org.wlld.transFormer.nerve.SensoryNerve;
|
||||
|
||||
import java.util.ArrayList;
|
||||
@ -17,6 +18,32 @@ public class TransFormerManager {
|
||||
return sensoryNerve;
|
||||
}
|
||||
|
||||
public TransFormerModel getModel() {
|
||||
TransFormerModel transFormerModel = new TransFormerModel();
|
||||
List<CodecBlockModel> encoderBlockModels = new ArrayList<>();
|
||||
List<CodecBlockModel> decoderBlockModels = new ArrayList<>();
|
||||
for (int i = 0; i < encoderBlocks.size(); i++) {
|
||||
encoderBlockModels.add(encoderBlocks.get(i).getModel());
|
||||
decoderBlockModels.add(decoderBlocks.get(i).getModel());
|
||||
}
|
||||
transFormerModel.setEncoderBlockModels(encoderBlockModels);
|
||||
transFormerModel.setDecoderBlockModels(decoderBlockModels);
|
||||
transFormerModel.setFirstDecoderBlockModel(firstDecoderBlock.getModel());
|
||||
transFormerModel.setLineBlockModel(lineBlock.getModel());
|
||||
return transFormerModel;
|
||||
}
|
||||
|
||||
public void insertModel(TransFormerModel transFormerModel) throws Exception {
|
||||
List<CodecBlockModel> encoderBlockModels = transFormerModel.getEncoderBlockModels();
|
||||
List<CodecBlockModel> decoderBlockModels = transFormerModel.getDecoderBlockModels();
|
||||
for (int i = 0; i < encoderBlocks.size(); i++) {
|
||||
encoderBlocks.get(i).insertModel(encoderBlockModels.get(i));
|
||||
decoderBlocks.get(i).insertModel(decoderBlockModels.get(i));
|
||||
}
|
||||
firstDecoderBlock.insertModel(transFormerModel.getFirstDecoderBlockModel());
|
||||
lineBlock.insertModel(transFormerModel.getLineBlockModel());
|
||||
}
|
||||
|
||||
/**
|
||||
* 初始化神经元参数
|
||||
*
|
||||
|
@ -0,0 +1,52 @@
|
||||
package org.wlld.transFormer.model;
|
||||
|
||||
|
||||
import java.util.List;
|
||||
|
||||
public class CodecBlockModel {
|
||||
private MultiSelfAttentionModel multiSelfAttentionModel;//注意力层model
|
||||
private LayNormModel attentionLayNormModel;//残差1层model
|
||||
private List<double[][]> fistNervesModel;//FNN层第一层model
|
||||
private List<double[][]> secondNervesModel;//FNN层第二层model
|
||||
private LayNormModel lineLayNormModel;//残差层最后2层model
|
||||
|
||||
public MultiSelfAttentionModel getMultiSelfAttentionModel() {
|
||||
return multiSelfAttentionModel;
|
||||
}
|
||||
|
||||
public void setMultiSelfAttentionModel(MultiSelfAttentionModel multiSelfAttentionModel) {
|
||||
this.multiSelfAttentionModel = multiSelfAttentionModel;
|
||||
}
|
||||
|
||||
public LayNormModel getAttentionLayNormModel() {
|
||||
return attentionLayNormModel;
|
||||
}
|
||||
|
||||
public void setAttentionLayNormModel(LayNormModel attentionLayNormModel) {
|
||||
this.attentionLayNormModel = attentionLayNormModel;
|
||||
}
|
||||
|
||||
public List<double[][]> getFistNervesModel() {
|
||||
return fistNervesModel;
|
||||
}
|
||||
|
||||
public void setFistNervesModel(List<double[][]> fistNervesModel) {
|
||||
this.fistNervesModel = fistNervesModel;
|
||||
}
|
||||
|
||||
public List<double[][]> getSecondNervesModel() {
|
||||
return secondNervesModel;
|
||||
}
|
||||
|
||||
public void setSecondNervesModel(List<double[][]> secondNervesModel) {
|
||||
this.secondNervesModel = secondNervesModel;
|
||||
}
|
||||
|
||||
public LayNormModel getLineLayNormModel() {
|
||||
return lineLayNormModel;
|
||||
}
|
||||
|
||||
public void setLineLayNormModel(LayNormModel lineLayNormModel) {
|
||||
this.lineLayNormModel = lineLayNormModel;
|
||||
}
|
||||
}
|
@ -0,0 +1,22 @@
|
||||
package org.wlld.transFormer.model;
|
||||
|
||||
public class FirstDecoderModel {
|
||||
private MultiSelfAttentionModel multiSelfAttentionModel;//注意力层model
|
||||
private LayNormModel attentionLayNormModel;//残差1层model
|
||||
|
||||
public MultiSelfAttentionModel getMultiSelfAttentionModel() {
|
||||
return multiSelfAttentionModel;
|
||||
}
|
||||
|
||||
public void setMultiSelfAttentionModel(MultiSelfAttentionModel multiSelfAttentionModel) {
|
||||
this.multiSelfAttentionModel = multiSelfAttentionModel;
|
||||
}
|
||||
|
||||
public LayNormModel getAttentionLayNormModel() {
|
||||
return attentionLayNormModel;
|
||||
}
|
||||
|
||||
public void setAttentionLayNormModel(LayNormModel attentionLayNormModel) {
|
||||
this.attentionLayNormModel = attentionLayNormModel;
|
||||
}
|
||||
}
|
23
src/main/java/org/wlld/transFormer/model/LayNormModel.java
Normal file
23
src/main/java/org/wlld/transFormer/model/LayNormModel.java
Normal file
@ -0,0 +1,23 @@
|
||||
package org.wlld.transFormer.model;
|
||||
|
||||
|
||||
public class LayNormModel {
|
||||
private double[][] bTa;//模型需要保存
|
||||
private double[][] power;//模型需要保存
|
||||
|
||||
public double[][] getbTa() {
|
||||
return bTa;
|
||||
}
|
||||
|
||||
public void setbTa(double[][] bTa) {
|
||||
this.bTa = bTa;
|
||||
}
|
||||
|
||||
public double[][] getPower() {
|
||||
return power;
|
||||
}
|
||||
|
||||
public void setPower(double[][] power) {
|
||||
this.power = power;
|
||||
}
|
||||
}
|
24
src/main/java/org/wlld/transFormer/model/LineBlockModel.java
Normal file
24
src/main/java/org/wlld/transFormer/model/LineBlockModel.java
Normal file
@ -0,0 +1,24 @@
|
||||
package org.wlld.transFormer.model;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
public class LineBlockModel {
|
||||
private List<double[][]> hiddenNervesModel;//隐层model
|
||||
private List<double[][]> outNervesModel;//输出层model
|
||||
|
||||
public List<double[][]> getHiddenNervesModel() {
|
||||
return hiddenNervesModel;
|
||||
}
|
||||
|
||||
public void setHiddenNervesModel(List<double[][]> hiddenNervesModel) {
|
||||
this.hiddenNervesModel = hiddenNervesModel;
|
||||
}
|
||||
|
||||
public List<double[][]> getOutNervesModel() {
|
||||
return outNervesModel;
|
||||
}
|
||||
|
||||
public void setOutNervesModel(List<double[][]> outNervesModel) {
|
||||
this.outNervesModel = outNervesModel;
|
||||
}
|
||||
}
|
@ -1,4 +1,4 @@
|
||||
package org.wlld.transFormer.seflAttention;
|
||||
package org.wlld.transFormer.model;
|
||||
|
||||
import java.util.List;
|
||||
|
@ -1,4 +1,4 @@
|
||||
package org.wlld.transFormer.seflAttention;
|
||||
package org.wlld.transFormer.model;
|
||||
|
||||
public class QKVModel {
|
||||
private double[][] Q;
|
@ -0,0 +1,42 @@
|
||||
package org.wlld.transFormer.model;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
public class TransFormerModel {
|
||||
private List<CodecBlockModel> encoderBlockModels;//编码器模块
|
||||
private List<CodecBlockModel> decoderBlockModels;//解码器模块
|
||||
private FirstDecoderModel firstDecoderBlockModel;//第一个解码器模块
|
||||
private LineBlockModel lineBlockModel;//线性分类层
|
||||
|
||||
public List<CodecBlockModel> getEncoderBlockModels() {
|
||||
return encoderBlockModels;
|
||||
}
|
||||
|
||||
public void setEncoderBlockModels(List<CodecBlockModel> encoderBlockModels) {
|
||||
this.encoderBlockModels = encoderBlockModels;
|
||||
}
|
||||
|
||||
public List<CodecBlockModel> getDecoderBlockModels() {
|
||||
return decoderBlockModels;
|
||||
}
|
||||
|
||||
public void setDecoderBlockModels(List<CodecBlockModel> decoderBlockModels) {
|
||||
this.decoderBlockModels = decoderBlockModels;
|
||||
}
|
||||
|
||||
public FirstDecoderModel getFirstDecoderBlockModel() {
|
||||
return firstDecoderBlockModel;
|
||||
}
|
||||
|
||||
public void setFirstDecoderBlockModel(FirstDecoderModel firstDecoderBlockModel) {
|
||||
this.firstDecoderBlockModel = firstDecoderBlockModel;
|
||||
}
|
||||
|
||||
public LineBlockModel getLineBlockModel() {
|
||||
return lineBlockModel;
|
||||
}
|
||||
|
||||
public void setLineBlockModel(LineBlockModel lineBlockModel) {
|
||||
this.lineBlockModel = lineBlockModel;
|
||||
}
|
||||
}
|
@ -21,9 +21,9 @@ import java.util.Map;
|
||||
public class HiddenNerve extends Nerve {
|
||||
|
||||
public HiddenNerve(int id, int depth, double studyPoint, ActiveFunction activeFunction, int sensoryNerveNub,
|
||||
int outNerveNub, boolean isEncoder, LineBlock lineBlock) throws Exception {//隐层神经元
|
||||
int outNerveNub, LineBlock lineBlock) throws Exception {//隐层神经元
|
||||
super(id, "HiddenNerve", studyPoint, activeFunction, sensoryNerveNub, 0,
|
||||
outNerveNub, isEncoder, lineBlock);
|
||||
outNerveNub, lineBlock);
|
||||
this.depth = depth;
|
||||
}
|
||||
|
||||
@ -41,7 +41,7 @@ public class HiddenNerve extends Nerve {
|
||||
protected void input(long eventId, Matrix parameter, boolean isStudy, Matrix allFeature, OutBack outBack,
|
||||
List<Integer> E, Matrix encoderFeature) throws Exception {//第二层收到参数
|
||||
boolean allReady = insertMatrixParameter(eventId, parameter);
|
||||
if (allReady) {//参数齐了,开始计算 sigma - threshold
|
||||
if (allReady) {//参数齐了,开始计算
|
||||
Matrix out = opMatrix(reMatrixFeatures.get(eventId), isStudy);
|
||||
reMatrixFeatures.remove(eventId);
|
||||
beforeLayNorm.addNormFromNerve(eventId, isStudy, out, allFeature, outBack, E, encoderFeature);
|
||||
|
@ -21,16 +21,12 @@ public abstract class Nerve {
|
||||
private final List<Nerve> father = new ArrayList<>();//树突上一层的连接神经元
|
||||
protected LayNorm beforeLayNorm;//多头自注意力层
|
||||
protected LayNorm afterLayNorm;//多头自注意力层
|
||||
protected Map<Integer, Double> dendrites = new HashMap<>();//上一层权重(需要取出)
|
||||
protected Matrix powerMatrix;//权重矩阵
|
||||
protected Matrix powerMatrix;//权重矩阵 作为模型取出
|
||||
private final int id;//同级神经元编号,注意在同层编号中ID应有唯一性
|
||||
private final int hiddenNerveNub;//隐层神经元个数
|
||||
private final int sensoryNerveNub;//输入神经元个数
|
||||
private final int outNerveNub;//输出神经元个数
|
||||
private final boolean encoder;
|
||||
protected Map<Long, List<Double>> features = new HashMap<>();//上一层神经元输入的数值
|
||||
protected Map<Long, Matrix> reMatrixFeatures = new HashMap<>();
|
||||
protected double threshold;//此神经元的阈值需要取出
|
||||
protected String name;//该神经元所属类型
|
||||
protected Matrix featureMatrix;
|
||||
protected double E;//模板期望值
|
||||
@ -47,23 +43,6 @@ public abstract class Nerve {
|
||||
return depth;
|
||||
}
|
||||
|
||||
public Map<Integer, Double> getDendrites() {
|
||||
return dendrites;
|
||||
}
|
||||
|
||||
|
||||
public void setDendrites(Map<Integer, Double> dendrites) {
|
||||
this.dendrites = dendrites;
|
||||
}
|
||||
|
||||
public double getThreshold() {
|
||||
return threshold;
|
||||
}
|
||||
|
||||
public void setThreshold(double threshold) {
|
||||
this.threshold = threshold;
|
||||
}
|
||||
|
||||
public void setBeforeLayNorm(LayNorm beforeLayNorm) {
|
||||
this.beforeLayNorm = beforeLayNorm;
|
||||
}
|
||||
@ -73,10 +52,9 @@ public abstract class Nerve {
|
||||
}
|
||||
|
||||
protected Nerve(int id, String name, double studyPoint, ActiveFunction activeFunction, int sensoryNerveNub,
|
||||
int hiddenNerveNub, int outNerveNub, boolean encoder, LineBlock lineBlock) throws Exception {//该神经元在同层神经元中的编号
|
||||
int hiddenNerveNub, int outNerveNub, LineBlock lineBlock) throws Exception {//该神经元在同层神经元中的编号
|
||||
this.id = id;
|
||||
this.lineBlock = lineBlock;
|
||||
this.encoder = encoder;
|
||||
this.hiddenNerveNub = hiddenNerveNub;//隐层神经元个数
|
||||
this.sensoryNerveNub = sensoryNerveNub;//输入神经元个数
|
||||
this.outNerveNub = outNerveNub;//输出神经元个数
|
||||
@ -86,10 +64,17 @@ public abstract class Nerve {
|
||||
initPower();//生成随机权重
|
||||
}
|
||||
|
||||
protected void setStudyPoint(double studyPoint) {
|
||||
this.studyPoint = studyPoint;
|
||||
public double[][] getModel() {
|
||||
return powerMatrix.getMatrix();
|
||||
}
|
||||
|
||||
public void insertModel(double[][] modelPower) throws Exception {
|
||||
for (int i = 0; i < powerMatrix.getX(); i++) {
|
||||
for (int j = 0; j < powerMatrix.getY(); j++) {
|
||||
powerMatrix.setNub(i, j, modelPower[i][j]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
protected void sendMessage(long eventId, Matrix parameter, boolean isStudy, Matrix allFeature, OutBack outBack,
|
||||
List<Integer> E, Matrix encoderFeature) throws Exception {
|
||||
@ -208,19 +193,6 @@ public abstract class Nerve {
|
||||
return sigma;
|
||||
}
|
||||
|
||||
protected double calculation(long eventId) throws Exception {//计算当前输出结果
|
||||
double sigma = 0;
|
||||
List<Double> featuresList = features.get(eventId);
|
||||
if (dendrites.size() != featuresList.size()) {
|
||||
throw new Exception("隐层参数数量与权重数量不一致");
|
||||
}
|
||||
for (int i = 0; i < featuresList.size(); i++) {
|
||||
double value = featuresList.get(i);
|
||||
double w = dendrites.get(i + 1);//当value不为0的时候把w取出来
|
||||
sigma = w * value + sigma;
|
||||
}
|
||||
return sigma - threshold;
|
||||
}
|
||||
|
||||
private void initPower() throws Exception {//初始化权重及阈值
|
||||
Random random = new Random();
|
||||
@ -235,14 +207,12 @@ public abstract class Nerve {
|
||||
if (myUpNumber > 0) {//输入个数
|
||||
powerMatrix = new Matrix(myUpNumber + 1, 1);
|
||||
double sh = Math.sqrt(myUpNumber);
|
||||
for (int i = 1; i < myUpNumber + 1; i++) {
|
||||
for (int i = 0; i < myUpNumber; i++) {
|
||||
double nub = random.nextDouble() / sh;
|
||||
dendrites.put(i, nub);//random.nextDouble()
|
||||
powerMatrix.setNub(i - 1, 0, nub);
|
||||
powerMatrix.setNub(i, 0, nub);
|
||||
}
|
||||
//生成随机阈值
|
||||
threshold = random.nextDouble() / sh;
|
||||
powerMatrix.setNub(myUpNumber, 0, threshold);
|
||||
powerMatrix.setNub(myUpNumber, 0, random.nextDouble() / sh);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -18,7 +18,7 @@ public class OutNerve extends Nerve {
|
||||
|
||||
public OutNerve(int id, double studyPoint, int sensoryNerveNub, int hiddenNerveNub, int outNerveNub, SoftMax softMax) throws Exception {
|
||||
super(id, "OutNerve", studyPoint, null, sensoryNerveNub,
|
||||
hiddenNerveNub, outNerveNub, false, null);
|
||||
hiddenNerveNub, outNerveNub, null);
|
||||
this.softMax = softMax;
|
||||
}
|
||||
|
||||
|
@ -1,14 +1,12 @@
|
||||
package org.wlld.transFormer.nerve;
|
||||
|
||||
|
||||
import org.wlld.config.RZ;
|
||||
import org.wlld.i.OutBack;
|
||||
import org.wlld.matrixTools.Matrix;
|
||||
import org.wlld.matrixTools.MatrixOperation;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
public class SoftMax extends Nerve {
|
||||
private final List<OutNerve> outNerves;
|
||||
@ -16,8 +14,7 @@ public class SoftMax extends Nerve {
|
||||
|
||||
public SoftMax(List<OutNerve> outNerves, boolean isShowLog
|
||||
, int sensoryNerveNub, int hiddenNerveNub, int outNerveNub) throws Exception {
|
||||
super(0, "softMax", 0, null, sensoryNerveNub, hiddenNerveNub, outNerveNub,
|
||||
false, null);
|
||||
super(0, "softMax", 0, null, sensoryNerveNub, hiddenNerveNub, outNerveNub, null);
|
||||
this.outNerves = outNerves;
|
||||
this.isShowLog = isShowLog;
|
||||
}
|
||||
|
@ -5,6 +5,7 @@ import org.wlld.matrixTools.Matrix;
|
||||
import org.wlld.matrixTools.MatrixOperation;
|
||||
import org.wlld.transFormer.CodecBlock;
|
||||
import org.wlld.transFormer.FirstDecoderBlock;
|
||||
import org.wlld.transFormer.model.LayNormModel;
|
||||
import org.wlld.transFormer.nerve.HiddenNerve;
|
||||
|
||||
import java.util.HashMap;
|
||||
@ -17,16 +18,36 @@ public class LayNorm {//残差与归一化
|
||||
private final CodecBlock myEncoderBlock;
|
||||
private final int featureDimension;//特征维度
|
||||
private List<HiddenNerve> hiddenNerves;//第一层隐层
|
||||
private final int type;//类别层
|
||||
private final int type;//类别层模型需要保存
|
||||
private final Map<Long, Matrix> reMatrixMap = new HashMap<>();
|
||||
private final FirstDecoderBlock firstDecoderBlock;
|
||||
private Matrix bTa;
|
||||
private Matrix power;
|
||||
private Matrix bTa;//模型需要保存
|
||||
private Matrix power;//模型需要保存
|
||||
private Matrix myNormData;//第一步归一化后的数据
|
||||
private final double study;//学习率
|
||||
private Matrix myFinalError;//从FNN传来的总误差
|
||||
private int number;//记录fnn传来的误差次数
|
||||
|
||||
public LayNormModel getModel() {
|
||||
LayNormModel layNormModel = new LayNormModel();
|
||||
layNormModel.setbTa(bTa.getMatrix());
|
||||
layNormModel.setPower(power.getMatrix());
|
||||
return layNormModel;
|
||||
}
|
||||
|
||||
public void insertModel(LayNormModel layNormModel) throws Exception {
|
||||
insertPower(layNormModel.getPower(), power);
|
||||
insertPower(layNormModel.getbTa(), bTa);
|
||||
}
|
||||
|
||||
private void insertPower(double[][] modelPower, Matrix power) throws Exception {
|
||||
for (int i = 0; i < power.getX(); i++) {
|
||||
for (int j = 0; j < power.getY(); j++) {
|
||||
power.setNub(i, j, modelPower[i][j]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public LayNorm(int type, int featureDimension, CodecBlock myEncoderBlock, FirstDecoderBlock firstDecoderBlock
|
||||
, double study) throws Exception {
|
||||
this.study = study;
|
||||
|
@ -3,9 +3,9 @@ package org.wlld.transFormer.seflAttention;
|
||||
import org.wlld.matrixTools.Matrix;
|
||||
import org.wlld.matrixTools.MatrixOperation;
|
||||
import org.wlld.i.OutBack;
|
||||
import org.wlld.tools.Frequency;
|
||||
import org.wlld.transFormer.CodecBlock;
|
||||
import org.wlld.transFormer.nerve.HiddenNerve;
|
||||
import org.wlld.transFormer.model.MultiSelfAttentionModel;
|
||||
import org.wlld.transFormer.model.QKVModel;
|
||||
|
||||
import java.util.*;
|
||||
|
||||
|
@ -2,6 +2,7 @@ package org.wlld.transFormer.seflAttention;
|
||||
|
||||
import org.wlld.matrixTools.Matrix;
|
||||
import org.wlld.matrixTools.MatrixOperation;
|
||||
import org.wlld.transFormer.model.QKVModel;
|
||||
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
|
Loading…
Reference in New Issue
Block a user