From 5d88a5157b4fc8c8b6613a7f4627178b2e23019e Mon Sep 17 00:00:00 2001 From: "794757862@qq.com" <6205194max> Date: Mon, 21 Nov 2022 16:21:38 +0800 Subject: [PATCH] =?UTF-8?q?=E5=A2=9E=E5=8A=A0=E5=9B=BE=E7=89=87=E6=91=98?= =?UTF-8?q?=E8=A6=81id=E7=94=9F=E6=88=90=EF=BC=8C=E5=8F=AF=E8=BF=9B?= =?UTF-8?q?=E8=A1=8C=E5=8C=BA=E5=9F=9F=E5=86=85=E5=9B=BE=E7=89=87=E5=BF=AB?= =?UTF-8?q?=E9=80=9F=E7=9B=B8=E4=BC=BC=E5=BA=A6=E5=AF=B9=E6=AF=94=E3=80=82?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- README.md | 14 + .../regressionForest/LinearRegression.java | 105 ++++++ .../org/wlld/tools/FastPictureExcerpt.java | 72 +++++ src/main/java/org/wlld/voice/MP3.java | 299 ++++++++++++++++++ src/main/java/org/wlld/voice/VoiceTest.java | 15 + src/main/java/org/wlld/voice/WaveFile.java | 120 +++++++ 6 files changed, 625 insertions(+) create mode 100644 src/main/java/org/wlld/regressionForest/LinearRegression.java create mode 100644 src/main/java/org/wlld/tools/FastPictureExcerpt.java create mode 100644 src/main/java/org/wlld/voice/MP3.java create mode 100644 src/main/java/org/wlld/voice/VoiceTest.java create mode 100644 src/main/java/org/wlld/voice/WaveFile.java diff --git a/README.md b/README.md index 2436c57..1f4767a 100644 --- a/README.md +++ b/README.md @@ -123,6 +123,20 @@ 本演示训练素材位置在: src/test/image 注意:以上图片识别代码样例为训练素材为物品全图充满图片(自己看能看到橘子训练图片为全图充满,苹果也是).自行开发时用以上代码样例时,请也使用全图充满训练物品的图片来做训练,非全图充满训练素材图训练api有变化! ``` +### 通过给图片生成摘要id进行快速相似度对比 +``` java +//参数分别为: +//第一个参数:threeChannelMatrix,图片矩阵(图片矩阵如何提取,上文有讲不在阐述) +//第二个参数:boxSize,将一张图片横纵各分为几个区域提取特征 +参数说明:该值越大,摘要id敏感度越高,该参数有最大值。最大值为图片:图片最小边长/5,超过会报错数组越界 +//第三个参数:regionSize,相似特征区域分区种类数量 +参数说明:该值越大,摘要id敏感度越高 +//返回name 即为该图片摘要id,通过id逐位对比即可对比相似程度 +//什么是id敏感度: +//id敏感度越高,对图片变化越敏感,越适合越大的检索区域匹配,即特征越细致,但缺点id长度越长。 +//id敏感度越低,对图片变化越不敏感,越适合越小的检索区域匹配,特征越粗,优点是id长度越短。 +String name = creatImageName(threeChannelMatrix, 5, 10); +``` ### 自然语言分类最简API 说明: ``` java //通过txt默认格式进行读取 diff --git a/src/main/java/org/wlld/regressionForest/LinearRegression.java b/src/main/java/org/wlld/regressionForest/LinearRegression.java new file mode 100644 index 0000000..1d4b45d --- /dev/null +++ b/src/main/java/org/wlld/regressionForest/LinearRegression.java @@ -0,0 +1,105 @@ +package org.wlld.regressionForest; + + +import org.wlld.MatrixTools.Matrix; +import org.wlld.MatrixTools.MatrixOperation; + +/** + * @param + * @DATA + * @Author LiDaPeng + * @Description rgb回归 Y = r *wr + g * wg + b* wb + */ +public class LinearRegression { + private double w1; + private double w2; + private double b; + private Matrix XY;//坐标矩阵 + private Matrix NormSequence;//内积序列矩阵 + private int xIndex = 0;//记录插入数量 + private boolean isRegression = false;//是否进行了回归 + private double avg;//结果平均值 + + public LinearRegression(int size) {//初始化rgb矩阵 + XY = new Matrix(size, 3); + NormSequence = new Matrix(size, 1); + xIndex = 0; + avg = 0; + } + + public LinearRegression() { + + } + + public void insertXY(double[] xy, double sequence) throws Exception {//rgb插入矩阵 + if (xy.length == 2) { + XY.setNub(xIndex, 0, xy[0]); + XY.setNub(xIndex, 1, xy[1]); + XY.setNub(xIndex, 2, 1.0); + NormSequence.setNub(xIndex, 0, sequence); + xIndex++; + } else { + throw new Exception("rgb length is not equals three"); + } + } + + public double getValue(double x, double y) {//获取值 + if (isRegression) { + return w1 * x + w2 * y + b; + } + return avg; + } + + public double getCos(Matrix vector) throws Exception {//获取该直线与指定向量之间的余弦 + Matrix matrix = new Matrix(1, 3); + matrix.setNub(0, 0, w1); + matrix.setNub(0, 1, w2); + matrix.setNub(0, 2, b); + return MatrixOperation.getNormCos(matrix, vector); + } + + public void regression() throws Exception {//开始进行回归 + if (xIndex > 0) { + Matrix ws = MatrixOperation.getLinearRegression(XY, NormSequence); + if (ws.getX() == 1 && ws.getY() == 1) {//矩阵奇异 + isRegression = false; + for (int i = 0; i < xIndex; i++) { + avg = avg + NormSequence.getNumber(xIndex, 0); + } + avg = avg / xIndex; + } else { + w1 = ws.getNumber(0, 0); + w2 = ws.getNumber(1, 0); + b = ws.getNumber(2, 0); + isRegression = true; + } + // System.out.println("wr==" + wr + ",wg==" + wg + ",b==" + b); + } else { + throw new Exception("regression matrix size is zero"); + } + } + + public double getW1() { + return w1; + } + + public void setW1(double w1) { + this.w1 = w1; + } + + public double getW2() { + return w2; + } + + public void setW2(double w2) { + this.w2 = w2; + } + + public double getB() { + return b; + } + + public void setB(double b) { + this.b = b; + } +} diff --git a/src/main/java/org/wlld/tools/FastPictureExcerpt.java b/src/main/java/org/wlld/tools/FastPictureExcerpt.java new file mode 100644 index 0000000..1ad7e26 --- /dev/null +++ b/src/main/java/org/wlld/tools/FastPictureExcerpt.java @@ -0,0 +1,72 @@ +package org.wlld.tools; + +import org.wlld.MatrixTools.Matrix; +import org.wlld.entity.ThreeChannelMatrix; +import org.wlld.regressionForest.LinearRegression; + +public class FastPictureExcerpt {//图片摘要id生成 + + //String name = creatImageName(threeChannelMatrix, 5, 10); + // 图像矩阵,横纵各分多少个区域,余弦区域分几份 + public String creatImageName(ThreeChannelMatrix threeChannelMatrix, int boxSize, int regionSize) throws Exception {//生成文件名 + int iSize = 5; + Matrix vector = new Matrix(1, 3); + vector.setNub(0, 0, 1); + vector.setNub(0, 1, 0); + vector.setNub(0, 2, 0); + Matrix h = threeChannelMatrix.getH(); + int xf = h.getX(); + int yf = h.getY(); + int xMO = (xf % boxSize) / 2; + int yMO = (yf % boxSize) / 2; + int xSize = xf / boxSize; + int ySize = yf / boxSize; + Matrix hr = h.getSonOfMatrix(xMO, yMO, xSize * boxSize, ySize * boxSize); + int x = hr.getX(); + int y = hr.getY(); + StringBuilder stringBuilder = new StringBuilder(); + for (int i = 0; i <= x - xSize; i += xSize) { + for (int j = 0; j <= y - ySize; j += ySize) { + Matrix sonH = hr.getSonOfMatrix(i, j, xSize, ySize); + String name = getName(sonH, iSize, vector, regionSize); + stringBuilder.append(name); + } + } + return stringBuilder.toString(); + } + + private String getName(Matrix h, int iSize, Matrix vector, int regionSize) throws Exception { + int x = h.getX(); + int y = h.getY(); + int size = (x / iSize) * (y / iSize); + LinearRegression linearRegression = new LinearRegression(size); + int cPoint = iSize / 2 + 1; + double maxXSize = (double) x / iSize; + double maxYSize = (double) y / iSize; + double[] xy = new double[2]; + for (int i = 0; i <= x - iSize; i += iSize) { + for (int j = 0; j <= y - iSize; j += iSize) { + double value = h.getSonOfMatrix(i, j, iSize, iSize).getNumber(cPoint, cPoint);//灰度值 + double px = i / (double) iSize / maxXSize; + double py = j / (double) iSize / maxYSize; + xy[0] = px; + xy[1] = py; + linearRegression.insertXY(xy, value); + } + } + linearRegression.regression(); + double myCos = linearRegression.getCos(vector);//余弦 + double oneSize = 1 / (double) regionSize;//分几个区间 + int index = 0; + double minSub = -1; + for (int i = 0; i < regionSize; i++) { + double cos = Math.cos(Math.PI * oneSize * i); + double sub = Math.abs(cos - myCos); + if (minSub == -1 || sub < minSub) { + minSub = sub; + index = i; + } + } + return String.valueOf(index); + } +} diff --git a/src/main/java/org/wlld/voice/MP3.java b/src/main/java/org/wlld/voice/MP3.java new file mode 100644 index 0000000..2d3bd6a --- /dev/null +++ b/src/main/java/org/wlld/voice/MP3.java @@ -0,0 +1,299 @@ +package org.wlld.voice; + + +import java.io.IOException; +import java.io.InputStream; + +public class MP3 { + + /** + * 横坐标为MPEG(V),纵坐标为Layer(L),sample[0][2]为MPEG-1,Layer-3的每帧采样数 + */ + + private final static int[][] Mp3_Sample = {{384, 384, 384}, + {1152, 1152, 1152}, {1152, 576, 576}}; + + /** + * 二维数组长度为14,横坐标范围:1~14.MPEG1中,纵坐标分别对应Layer-1,Layer-2,Layer-3.MPEG2中,纵坐标分别对应Layer-1,Layer-2或Layer-3. + */ + private final static int[][] MPeg1ByteRate = {{32, 32, 32}, + {64, 48, 40}, {96, 56, 48}, {128, 64, 56}, {160, 80, 64}, + {192, 96, 80}, {224, 112, 96}, {256, 128, 112}, + {288, 160, 128}, {320, 192, 160}, {352, 224, 192}, + {384, 256, 224}, {416, 320, 256}, {448, 384, 320}}; + + private final static int[][] MPeg2ByteRate = {{32, 8}, {48, 16}, + {56, 24}, {64, 32}, {80, 40}, {96, 48}, {112, 56}, + {128, 64}, {144, 80}, {160, 96}, {176, 112}, {192, 128}, + {224, 144}, {256, 160}}; + + /** + * 采样频率.横坐标为变量,纵坐标为MPEG版本. + */ + private final static int[][] SampleFrequency = {{44100, 48000, 32000}, + {22050, 24000, 16000}, {11025, 12000, 8000}}; + + // ================================================================================== + private InputStream stream; + + public MP3(InputStream stream) { + this.stream = stream; + } + + /** + * MpeG版本 + */ + private int Mpeg_Version; + + /** + * Layer版本 + */ + private int Layer_Version; + + /** + * 采样速率.(kbps) + */ + private int ByteRate; + + /** + * 采样频率.khz + */ + private int Frequency; + + /** + * 帧长度调整值.0或1 + */ + private int Padding; + + /** + * 采样位数. + */ + private int Sample; + + public int parserMp3Header() throws IOException { + FrameHeader = new int[3]; + byte[] tempBytes = readFull(3); + if (tempBytes == null) { + return -1; + } + for (int i = 0; i < 3; i++) { + FrameHeader[i] = tempBytes[i] & 0xFF; + } + int TagHeaderSize = 0; + if (FrameHeader[0] == 'I' && FrameHeader[1] == 'D' + && FrameHeader[2] == '3') { + byte[] tagHeader = readFull(7); + if (tagHeader == null) { + return -1; + } + int tagHeaderSize = ((tagHeader[3] & 0x7F) << 21) + + ((tagHeader[4] & 0x7F) << 14) + + ((tagHeader[5] & 0x7F) << 7) + (tagHeader[6] & 0x7F); + // tagHeaderSize不包括前面10个字节.加上10以后,是整个标签头的大小. + if (!skipBytes(tagHeaderSize)) { + return -1; + } + // IDV3标签头的长度. + TagHeaderSize = tagHeaderSize + 10; + tempBytes = readFull(3); + if (tempBytes == null) { + return -1; + } + for (int i = 0; i < 3; i++) { + FrameHeader[i] = tempBytes[i] & 0xFF; + } + } + if (FrameHeader[0] != 0xFF || (FrameHeader[1] >> 5) != 7) { + return -1; + } + switch ((FrameHeader[1] & 24) >> 3) { + case 0: + Mpeg_Version = 3;// MPEG-2.5 + break; + case 2: + Mpeg_Version = 2; + break; + case 3: + Mpeg_Version = 1; + break; + default: + return -1; + } + switch ((FrameHeader[1] & 6) >> 1) { + case 1: + Layer_Version = 3; + break; + case 2: + Layer_Version = 2; + break; + case 3: + Layer_Version = 1; + break; + default: + return -1; + } + int index = FrameHeader[2] >> 4; + if (index < 1 || index > 14) { + return -1; + } + --index; + if (Mpeg_Version == 1) { + switch (Layer_Version) { + case 1: + ByteRate = MPeg1ByteRate[index][0]; + break; + case 2: + ByteRate = MPeg1ByteRate[index][1]; + break; + case 3: + ByteRate = MPeg1ByteRate[index][2]; + break; + } + } else { + switch (Layer_Version) { + case 1: + ByteRate = MPeg2ByteRate[index][0]; + break; + case 2: + case 3: + ByteRate = MPeg2ByteRate[index][1]; + break; + } + } + Frequency = SampleFrequency[Mpeg_Version - 1][(FrameHeader[2] & 12) >> 2]; + Padding = (FrameHeader[2] & 2) >> 1; + Sample = Mp3_Sample[Layer_Version - 1][Mpeg_Version - 1]; + return TagHeaderSize; + } + + public long SkipFrame(long num) throws IOException { + long skipped = 0; + byte[] temp = null; + for (int i = 0; i < num; i++) { + temp = ParserFrame(); + if (temp == null) { + return skipped; + } + skipped += temp.length; + } + return skipped; + } + + /** + * 第一帧的前三个字节.用来提取除采样速率以外的其它定值. 在前三个字节中,第一二两个字节是不变的.只有第三个字节会左右帧长度的变化. + */ + private int[] FrameHeader; + + /** + * 发生网络数据错误,或者没有读到需要的数据,都会抛出异常.因此,外部调用程序要控制数据大小,到了MP3.length - + * 128的时候就不要再读了,程序不会检查是否已经到了末尾. + * + * @return byte[] 读到的一帧完整的可以播放的数据.遇到IDV1时,返回NULL + * @throws IOException + * @throws Exception + */ + public byte[] ParserFrame() throws IOException { + byte[] byteFrameHeader; + int index = 0; + if (FrameHeader == null) { + byteFrameHeader = readFull(3); + if (byteFrameHeader == null || byteFrameHeader.length < 3 + || (byteFrameHeader[0] & 0xFF) == 'T' + && (byteFrameHeader[1] & 0xFF) == 'A' + && (byteFrameHeader[2] & 0xFF) == 'G') { + return null; + } + if ((byteFrameHeader[0] & 0xFF) != 0xFF + || ((byteFrameHeader[1] & 0xFF) >> 5) != 7) { + System.out.println("该MP3文件非法."); + return null;// 帧头的第一个字节和第二个字节的前三位不全部为1,该MP3文件非法. + } + index = (byteFrameHeader[2] & 0xFF) >> 4; + Padding = (byteFrameHeader[2] & 2) >> 1; + } else { + if (FrameHeader == null || FrameHeader.length < 3 + || FrameHeader[0] == 'T' && FrameHeader[1] == 'A' + && FrameHeader[2] == 'G') { + return null; + } + index = FrameHeader[2] >> 4; + byteFrameHeader = new byte[3]; + byteFrameHeader[0] = (byte) FrameHeader[0]; + byteFrameHeader[0] = (byte) FrameHeader[1]; + byteFrameHeader[0] = (byte) FrameHeader[2]; + FrameHeader = null; + } + if (index < 1 || index > 14) { + return null;// 获取位速的查找索引非法. + } + --index; + if (Mpeg_Version == 1) { + switch (Layer_Version) { + case 1: + ByteRate = MPeg1ByteRate[index][0]; + break; + case 2: + ByteRate = MPeg1ByteRate[index][1]; + break; + case 3: + ByteRate = MPeg1ByteRate[index][2]; + break; + } + } else { + switch (Layer_Version) { + case 1: + ByteRate = MPeg2ByteRate[index][0]; + break; + case 2: + case 3: + ByteRate = MPeg2ByteRate[index][1]; + break; + } + } + // 计算帧长.(ByteRate,Frequency,Padding是可变的) + int frameSize = Sample / 8 * ByteRate * 1000 / Frequency + Padding; + byte[] temp = readFull(frameSize - 3); + if (temp == null) { + return null; + } + byte[] data = new byte[frameSize]; + System.arraycopy(byteFrameHeader, 0, data, 0, 3); + System.arraycopy(temp, 0, data, 3, temp.length); + return data; + } + + public byte[] readFull(int size) throws IOException { + byte[] data = new byte[size]; + int n = 0; + while (n < size) { + int k = stream.read(data, n, size - n); + if (k < 0) { + break; + } + n += k; + } + if (n <= 0) { + return null; + } + if (n < size) { + byte[] temp = new byte[n]; + System.arraycopy(data, 0, temp, 0, n); + data = null; + return temp; + } + return data; + } + + public boolean skipBytes(long skipLength) throws IOException { + long k = 0; + while (k < skipLength) { + long n = stream.skip(skipLength - k); + if (n < 0) { + return false; + } + k += n; + } + return true; + } + +} diff --git a/src/main/java/org/wlld/voice/VoiceTest.java b/src/main/java/org/wlld/voice/VoiceTest.java new file mode 100644 index 0000000..b33fb55 --- /dev/null +++ b/src/main/java/org/wlld/voice/VoiceTest.java @@ -0,0 +1,15 @@ +package org.wlld.voice; + +import javax.sound.sampled.UnsupportedAudioFileException; +import java.io.File; +import java.io.IOException; + +public class VoiceTest { + public static void main(String[] args) throws UnsupportedAudioFileException, IOException { + File file = new File("D:\\sondTest\\wo.wav"); + WaveFile wav = new WaveFile(file); + int amplitudeExample = wav.getSampleInt(140); // 140th amplitude value. + System.out.println("帧数:"+wav.getFramesCount()); + System.out.println("140帧的幅度:"+amplitudeExample); + } +} diff --git a/src/main/java/org/wlld/voice/WaveFile.java b/src/main/java/org/wlld/voice/WaveFile.java new file mode 100644 index 0000000..d0b2a17 --- /dev/null +++ b/src/main/java/org/wlld/voice/WaveFile.java @@ -0,0 +1,120 @@ +package org.wlld.voice; + +import javax.sound.sampled.*; +import java.io.File; +import java.io.FileNotFoundException; +import java.io.IOException; +import java.nio.ByteBuffer; +import java.nio.ByteOrder; + +public class WaveFile { + public final int NOT_SPECIFIED = AudioSystem.NOT_SPECIFIED; // -1 + public final int INT_SIZE = 4; + + private int sampleSize = NOT_SPECIFIED; + private long framesCount = NOT_SPECIFIED; + private int sampleRate = NOT_SPECIFIED; + private int channelsNum; + private byte[] data; // wav bytes + private AudioInputStream ais; + private AudioFormat af; + + private Clip clip; + private boolean canPlay; + + public WaveFile(File file) throws UnsupportedAudioFileException, IOException { + if (!file.exists()) { + throw new FileNotFoundException(file.getAbsolutePath()); + } + + ais = AudioSystem.getAudioInputStream(file); + + af = ais.getFormat(); + + framesCount = ais.getFrameLength(); + + sampleRate = (int) af.getSampleRate(); + + sampleSize = af.getSampleSizeInBits() / 8; + + channelsNum = af.getChannels(); + + long dataLength = framesCount * af.getSampleSizeInBits() * af.getChannels() / 8; + + data = new byte[(int) dataLength]; + ais.read(data); + + AudioInputStream aisForPlay = AudioSystem.getAudioInputStream(file); + try { + clip = AudioSystem.getClip(); + clip.open(aisForPlay); + clip.setFramePosition(0); + canPlay = true; + } catch (LineUnavailableException e) { + canPlay = false; + System.out.println("I can play only 8bit and 16bit music."); + } + } + + public boolean isCanPlay() { + return canPlay; + } + + public void play() { + clip.start(); + } + + public void stop() { + clip.stop(); + } + + public AudioFormat getAudioFormat() { + return af; + } + + public int getSampleSize() { + return sampleSize; + } + + public double getDurationTime() { + return getFramesCount() / getAudioFormat().getFrameRate(); + } + + public long getFramesCount() { + return framesCount; + } + + + /** + * Returns sample (amplitude value). Note that in case of stereo samples + * go one after another. I.e. 0 - first sample of left channel, 1 - first + * sample of the right channel, 2 - second sample of the left channel, 3 - + * second sample of the rigth channel, etc. + */ + public int getSampleInt(int sampleNumber) { + + if (sampleNumber < 0 || sampleNumber >= data.length / sampleSize) { + throw new IllegalArgumentException( + "sample number can't be < 0 or >= data.length/" + + sampleSize); + } + + byte[] sampleBytes = new byte[4]; //4byte = int + + for (int i = 0; i < sampleSize; i++) { + sampleBytes[i] = data[sampleNumber * sampleSize * channelsNum + i]; + } + + int sample = ByteBuffer.wrap(sampleBytes) + .order(ByteOrder.LITTLE_ENDIAN).getInt(); + return sample; + } + + public int getSampleRate() { + return sampleRate; + } + + public Clip getClip() { + return clip; + } +}