增加图片摘要id生成,可进行区域内图片快速相似度对比。

This commit is contained in:
794757862@qq.com 2022-11-21 16:21:38 +08:00
parent 6231926a71
commit 5d88a5157b
6 changed files with 625 additions and 0 deletions

View File

@ -123,6 +123,20 @@
本演示训练素材位置在: src/test/image
注意:以上图片识别代码样例为训练素材为物品全图充满图片(自己看能看到橘子训练图片为全图充满,苹果也是).自行开发时用以上代码样例时请也使用全图充满训练物品的图片来做训练非全图充满训练素材图训练api有变化
```
### 通过给图片生成摘要id进行快速相似度对比
``` java
//参数分别为:
//第一个参数threeChannelMatrix,图片矩阵(图片矩阵如何提取,上文有讲不在阐述)
//第二个参数:boxSize,将一张图片横纵各分为几个区域提取特征
参数说明该值越大摘要id敏感度越高该参数有最大值。最大值为图片图片最小边长/5,超过会报错数组越界
//第三个参数:regionSize,相似特征区域分区种类数量
参数说明该值越大摘要id敏感度越高
//返回name 即为该图片摘要id通过id逐位对比即可对比相似程度
//什么是id敏感度
//id敏感度越高对图片变化越敏感越适合越大的检索区域匹配即特征越细致但缺点id长度越长。
//id敏感度越低对图片变化越不敏感越适合越小的检索区域匹配,特征越粗优点是id长度越短。
String name = creatImageName(threeChannelMatrix, 5, 10);
```
### 自然语言分类最简API 说明:
``` java
//通过txt默认格式进行读取

View File

@ -0,0 +1,105 @@
package org.wlld.regressionForest;
import org.wlld.MatrixTools.Matrix;
import org.wlld.MatrixTools.MatrixOperation;
/**
* @param
* @DATA
* @Author LiDaPeng
* @Description rgb回归 Y = r *wr + g * wg + b* wb
*/
public class LinearRegression {
private double w1;
private double w2;
private double b;
private Matrix XY;//坐标矩阵
private Matrix NormSequence;//内积序列矩阵
private int xIndex = 0;//记录插入数量
private boolean isRegression = false;//是否进行了回归
private double avg;//结果平均值
public LinearRegression(int size) {//初始化rgb矩阵
XY = new Matrix(size, 3);
NormSequence = new Matrix(size, 1);
xIndex = 0;
avg = 0;
}
public LinearRegression() {
}
public void insertXY(double[] xy, double sequence) throws Exception {//rgb插入矩阵
if (xy.length == 2) {
XY.setNub(xIndex, 0, xy[0]);
XY.setNub(xIndex, 1, xy[1]);
XY.setNub(xIndex, 2, 1.0);
NormSequence.setNub(xIndex, 0, sequence);
xIndex++;
} else {
throw new Exception("rgb length is not equals three");
}
}
public double getValue(double x, double y) {//获取值
if (isRegression) {
return w1 * x + w2 * y + b;
}
return avg;
}
public double getCos(Matrix vector) throws Exception {//获取该直线与指定向量之间的余弦
Matrix matrix = new Matrix(1, 3);
matrix.setNub(0, 0, w1);
matrix.setNub(0, 1, w2);
matrix.setNub(0, 2, b);
return MatrixOperation.getNormCos(matrix, vector);
}
public void regression() throws Exception {//开始进行回归
if (xIndex > 0) {
Matrix ws = MatrixOperation.getLinearRegression(XY, NormSequence);
if (ws.getX() == 1 && ws.getY() == 1) {//矩阵奇异
isRegression = false;
for (int i = 0; i < xIndex; i++) {
avg = avg + NormSequence.getNumber(xIndex, 0);
}
avg = avg / xIndex;
} else {
w1 = ws.getNumber(0, 0);
w2 = ws.getNumber(1, 0);
b = ws.getNumber(2, 0);
isRegression = true;
}
// System.out.println("wr==" + wr + ",wg==" + wg + ",b==" + b);
} else {
throw new Exception("regression matrix size is zero");
}
}
public double getW1() {
return w1;
}
public void setW1(double w1) {
this.w1 = w1;
}
public double getW2() {
return w2;
}
public void setW2(double w2) {
this.w2 = w2;
}
public double getB() {
return b;
}
public void setB(double b) {
this.b = b;
}
}

View File

@ -0,0 +1,72 @@
package org.wlld.tools;
import org.wlld.MatrixTools.Matrix;
import org.wlld.entity.ThreeChannelMatrix;
import org.wlld.regressionForest.LinearRegression;
public class FastPictureExcerpt {//图片摘要id生成
//String name = creatImageName(threeChannelMatrix, 5, 10);
// 图像矩阵横纵各分多少个区域余弦区域分几份
public String creatImageName(ThreeChannelMatrix threeChannelMatrix, int boxSize, int regionSize) throws Exception {//生成文件名
int iSize = 5;
Matrix vector = new Matrix(1, 3);
vector.setNub(0, 0, 1);
vector.setNub(0, 1, 0);
vector.setNub(0, 2, 0);
Matrix h = threeChannelMatrix.getH();
int xf = h.getX();
int yf = h.getY();
int xMO = (xf % boxSize) / 2;
int yMO = (yf % boxSize) / 2;
int xSize = xf / boxSize;
int ySize = yf / boxSize;
Matrix hr = h.getSonOfMatrix(xMO, yMO, xSize * boxSize, ySize * boxSize);
int x = hr.getX();
int y = hr.getY();
StringBuilder stringBuilder = new StringBuilder();
for (int i = 0; i <= x - xSize; i += xSize) {
for (int j = 0; j <= y - ySize; j += ySize) {
Matrix sonH = hr.getSonOfMatrix(i, j, xSize, ySize);
String name = getName(sonH, iSize, vector, regionSize);
stringBuilder.append(name);
}
}
return stringBuilder.toString();
}
private String getName(Matrix h, int iSize, Matrix vector, int regionSize) throws Exception {
int x = h.getX();
int y = h.getY();
int size = (x / iSize) * (y / iSize);
LinearRegression linearRegression = new LinearRegression(size);
int cPoint = iSize / 2 + 1;
double maxXSize = (double) x / iSize;
double maxYSize = (double) y / iSize;
double[] xy = new double[2];
for (int i = 0; i <= x - iSize; i += iSize) {
for (int j = 0; j <= y - iSize; j += iSize) {
double value = h.getSonOfMatrix(i, j, iSize, iSize).getNumber(cPoint, cPoint);//灰度值
double px = i / (double) iSize / maxXSize;
double py = j / (double) iSize / maxYSize;
xy[0] = px;
xy[1] = py;
linearRegression.insertXY(xy, value);
}
}
linearRegression.regression();
double myCos = linearRegression.getCos(vector);//余弦
double oneSize = 1 / (double) regionSize;//分几个区间
int index = 0;
double minSub = -1;
for (int i = 0; i < regionSize; i++) {
double cos = Math.cos(Math.PI * oneSize * i);
double sub = Math.abs(cos - myCos);
if (minSub == -1 || sub < minSub) {
minSub = sub;
index = i;
}
}
return String.valueOf(index);
}
}

View File

@ -0,0 +1,299 @@
package org.wlld.voice;
import java.io.IOException;
import java.io.InputStream;
public class MP3 {
/**
* 横坐标为MPEG(V),纵坐标为Layer(L),sample[0][2]为MPEG-1,Layer-3的每帧采样数
*/
private final static int[][] Mp3_Sample = {{384, 384, 384},
{1152, 1152, 1152}, {1152, 576, 576}};
/**
* 二维数组长度为14,横坐标范围:1~14.MPEG1中,纵坐标分别对应Layer-1,Layer-2,Layer-3.MPEG2中,纵坐标分别对应Layer-1,Layer-2或Layer-3.
*/
private final static int[][] MPeg1ByteRate = {{32, 32, 32},
{64, 48, 40}, {96, 56, 48}, {128, 64, 56}, {160, 80, 64},
{192, 96, 80}, {224, 112, 96}, {256, 128, 112},
{288, 160, 128}, {320, 192, 160}, {352, 224, 192},
{384, 256, 224}, {416, 320, 256}, {448, 384, 320}};
private final static int[][] MPeg2ByteRate = {{32, 8}, {48, 16},
{56, 24}, {64, 32}, {80, 40}, {96, 48}, {112, 56},
{128, 64}, {144, 80}, {160, 96}, {176, 112}, {192, 128},
{224, 144}, {256, 160}};
/**
* 采样频率.横坐标为变量,纵坐标为MPEG版本.
*/
private final static int[][] SampleFrequency = {{44100, 48000, 32000},
{22050, 24000, 16000}, {11025, 12000, 8000}};
// ==================================================================================
private InputStream stream;
public MP3(InputStream stream) {
this.stream = stream;
}
/**
* MpeG版本
*/
private int Mpeg_Version;
/**
* Layer版本
*/
private int Layer_Version;
/**
* 采样速率.(kbps)
*/
private int ByteRate;
/**
* 采样频率.khz
*/
private int Frequency;
/**
* 帧长度调整值.0或1
*/
private int Padding;
/**
* 采样位数.
*/
private int Sample;
public int parserMp3Header() throws IOException {
FrameHeader = new int[3];
byte[] tempBytes = readFull(3);
if (tempBytes == null) {
return -1;
}
for (int i = 0; i < 3; i++) {
FrameHeader[i] = tempBytes[i] & 0xFF;
}
int TagHeaderSize = 0;
if (FrameHeader[0] == 'I' && FrameHeader[1] == 'D'
&& FrameHeader[2] == '3') {
byte[] tagHeader = readFull(7);
if (tagHeader == null) {
return -1;
}
int tagHeaderSize = ((tagHeader[3] & 0x7F) << 21)
+ ((tagHeader[4] & 0x7F) << 14)
+ ((tagHeader[5] & 0x7F) << 7) + (tagHeader[6] & 0x7F);
// tagHeaderSize不包括前面10个字节.加上10以后,是整个标签头的大小.
if (!skipBytes(tagHeaderSize)) {
return -1;
}
// IDV3标签头的长度.
TagHeaderSize = tagHeaderSize + 10;
tempBytes = readFull(3);
if (tempBytes == null) {
return -1;
}
for (int i = 0; i < 3; i++) {
FrameHeader[i] = tempBytes[i] & 0xFF;
}
}
if (FrameHeader[0] != 0xFF || (FrameHeader[1] >> 5) != 7) {
return -1;
}
switch ((FrameHeader[1] & 24) >> 3) {
case 0:
Mpeg_Version = 3;// MPEG-2.5
break;
case 2:
Mpeg_Version = 2;
break;
case 3:
Mpeg_Version = 1;
break;
default:
return -1;
}
switch ((FrameHeader[1] & 6) >> 1) {
case 1:
Layer_Version = 3;
break;
case 2:
Layer_Version = 2;
break;
case 3:
Layer_Version = 1;
break;
default:
return -1;
}
int index = FrameHeader[2] >> 4;
if (index < 1 || index > 14) {
return -1;
}
--index;
if (Mpeg_Version == 1) {
switch (Layer_Version) {
case 1:
ByteRate = MPeg1ByteRate[index][0];
break;
case 2:
ByteRate = MPeg1ByteRate[index][1];
break;
case 3:
ByteRate = MPeg1ByteRate[index][2];
break;
}
} else {
switch (Layer_Version) {
case 1:
ByteRate = MPeg2ByteRate[index][0];
break;
case 2:
case 3:
ByteRate = MPeg2ByteRate[index][1];
break;
}
}
Frequency = SampleFrequency[Mpeg_Version - 1][(FrameHeader[2] & 12) >> 2];
Padding = (FrameHeader[2] & 2) >> 1;
Sample = Mp3_Sample[Layer_Version - 1][Mpeg_Version - 1];
return TagHeaderSize;
}
public long SkipFrame(long num) throws IOException {
long skipped = 0;
byte[] temp = null;
for (int i = 0; i < num; i++) {
temp = ParserFrame();
if (temp == null) {
return skipped;
}
skipped += temp.length;
}
return skipped;
}
/**
* 第一帧的前三个字节.用来提取除采样速率以外的其它定值. 在前三个字节中,第一二两个字节是不变的.只有第三个字节会左右帧长度的变化.
*/
private int[] FrameHeader;
/**
* 发生网络数据错误,或者没有读到需要的数据,都会抛出异常.因此,外部调用程序要控制数据大小,到了MP3.length -
* 128的时候就不要再读了,程序不会检查是否已经到了末尾.
*
* @return byte[] 读到的一帧完整的可以播放的数据.遇到IDV1时,返回NULL
* @throws IOException
* @throws Exception
*/
public byte[] ParserFrame() throws IOException {
byte[] byteFrameHeader;
int index = 0;
if (FrameHeader == null) {
byteFrameHeader = readFull(3);
if (byteFrameHeader == null || byteFrameHeader.length < 3
|| (byteFrameHeader[0] & 0xFF) == 'T'
&& (byteFrameHeader[1] & 0xFF) == 'A'
&& (byteFrameHeader[2] & 0xFF) == 'G') {
return null;
}
if ((byteFrameHeader[0] & 0xFF) != 0xFF
|| ((byteFrameHeader[1] & 0xFF) >> 5) != 7) {
System.out.println("该MP3文件非法.");
return null;// 帧头的第一个字节和第二个字节的前三位不全部为1,该MP3文件非法.
}
index = (byteFrameHeader[2] & 0xFF) >> 4;
Padding = (byteFrameHeader[2] & 2) >> 1;
} else {
if (FrameHeader == null || FrameHeader.length < 3
|| FrameHeader[0] == 'T' && FrameHeader[1] == 'A'
&& FrameHeader[2] == 'G') {
return null;
}
index = FrameHeader[2] >> 4;
byteFrameHeader = new byte[3];
byteFrameHeader[0] = (byte) FrameHeader[0];
byteFrameHeader[0] = (byte) FrameHeader[1];
byteFrameHeader[0] = (byte) FrameHeader[2];
FrameHeader = null;
}
if (index < 1 || index > 14) {
return null;// 获取位速的查找索引非法.
}
--index;
if (Mpeg_Version == 1) {
switch (Layer_Version) {
case 1:
ByteRate = MPeg1ByteRate[index][0];
break;
case 2:
ByteRate = MPeg1ByteRate[index][1];
break;
case 3:
ByteRate = MPeg1ByteRate[index][2];
break;
}
} else {
switch (Layer_Version) {
case 1:
ByteRate = MPeg2ByteRate[index][0];
break;
case 2:
case 3:
ByteRate = MPeg2ByteRate[index][1];
break;
}
}
// 计算帧长.(ByteRate,Frequency,Padding是可变的)
int frameSize = Sample / 8 * ByteRate * 1000 / Frequency + Padding;
byte[] temp = readFull(frameSize - 3);
if (temp == null) {
return null;
}
byte[] data = new byte[frameSize];
System.arraycopy(byteFrameHeader, 0, data, 0, 3);
System.arraycopy(temp, 0, data, 3, temp.length);
return data;
}
public byte[] readFull(int size) throws IOException {
byte[] data = new byte[size];
int n = 0;
while (n < size) {
int k = stream.read(data, n, size - n);
if (k < 0) {
break;
}
n += k;
}
if (n <= 0) {
return null;
}
if (n < size) {
byte[] temp = new byte[n];
System.arraycopy(data, 0, temp, 0, n);
data = null;
return temp;
}
return data;
}
public boolean skipBytes(long skipLength) throws IOException {
long k = 0;
while (k < skipLength) {
long n = stream.skip(skipLength - k);
if (n < 0) {
return false;
}
k += n;
}
return true;
}
}

View File

@ -0,0 +1,15 @@
package org.wlld.voice;
import javax.sound.sampled.UnsupportedAudioFileException;
import java.io.File;
import java.io.IOException;
public class VoiceTest {
public static void main(String[] args) throws UnsupportedAudioFileException, IOException {
File file = new File("D:\\sondTest\\wo.wav");
WaveFile wav = new WaveFile(file);
int amplitudeExample = wav.getSampleInt(140); // 140th amplitude value.
System.out.println("帧数:"+wav.getFramesCount());
System.out.println("140帧的幅度"+amplitudeExample);
}
}

View File

@ -0,0 +1,120 @@
package org.wlld.voice;
import javax.sound.sampled.*;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.nio.ByteBuffer;
import java.nio.ByteOrder;
public class WaveFile {
public final int NOT_SPECIFIED = AudioSystem.NOT_SPECIFIED; // -1
public final int INT_SIZE = 4;
private int sampleSize = NOT_SPECIFIED;
private long framesCount = NOT_SPECIFIED;
private int sampleRate = NOT_SPECIFIED;
private int channelsNum;
private byte[] data; // wav bytes
private AudioInputStream ais;
private AudioFormat af;
private Clip clip;
private boolean canPlay;
public WaveFile(File file) throws UnsupportedAudioFileException, IOException {
if (!file.exists()) {
throw new FileNotFoundException(file.getAbsolutePath());
}
ais = AudioSystem.getAudioInputStream(file);
af = ais.getFormat();
framesCount = ais.getFrameLength();
sampleRate = (int) af.getSampleRate();
sampleSize = af.getSampleSizeInBits() / 8;
channelsNum = af.getChannels();
long dataLength = framesCount * af.getSampleSizeInBits() * af.getChannels() / 8;
data = new byte[(int) dataLength];
ais.read(data);
AudioInputStream aisForPlay = AudioSystem.getAudioInputStream(file);
try {
clip = AudioSystem.getClip();
clip.open(aisForPlay);
clip.setFramePosition(0);
canPlay = true;
} catch (LineUnavailableException e) {
canPlay = false;
System.out.println("I can play only 8bit and 16bit music.");
}
}
public boolean isCanPlay() {
return canPlay;
}
public void play() {
clip.start();
}
public void stop() {
clip.stop();
}
public AudioFormat getAudioFormat() {
return af;
}
public int getSampleSize() {
return sampleSize;
}
public double getDurationTime() {
return getFramesCount() / getAudioFormat().getFrameRate();
}
public long getFramesCount() {
return framesCount;
}
/**
* Returns sample (amplitude value). Note that in case of stereo samples
* go one after another. I.e. 0 - first sample of left channel, 1 - first
* sample of the right channel, 2 - second sample of the left channel, 3 -
* second sample of the rigth channel, etc.
*/
public int getSampleInt(int sampleNumber) {
if (sampleNumber < 0 || sampleNumber >= data.length / sampleSize) {
throw new IllegalArgumentException(
"sample number can't be < 0 or >= data.length/"
+ sampleSize);
}
byte[] sampleBytes = new byte[4]; //4byte = int
for (int i = 0; i < sampleSize; i++) {
sampleBytes[i] = data[sampleNumber * sampleSize * channelsNum + i];
}
int sample = ByteBuffer.wrap(sampleBytes)
.order(ByteOrder.LITTLE_ENDIAN).getInt();
return sample;
}
public int getSampleRate() {
return sampleRate;
}
public Clip getClip() {
return clip;
}
}