remove archive projects.

This commit is contained in:
Calvin 2023-07-19 16:59:33 +08:00
parent 9d08d74278
commit 9c12e53888
44 changed files with 0 additions and 3823 deletions

View File

@ -1,119 +0,0 @@
### Download the model and place it in the models directory
- Link: https://github.com/mymagicpower/AIAS/releases/download/apps/deep_speech.zip
### Speech Recognition (ASR) [Long Speech]
Speech recognition (Automatic Speech Recognition) is an advanced technology that focuses on processing and identifying human speech through speech signal processing and pattern recognition, which automatically recognizes and understands human oral language and converts it into corresponding texts or commands. Speech recognition is a cross-disciplinary field that has close relationships with acoustics, phonetics, linguistics, information theory, pattern recognition theory, and neuroscience.
The SDK is based on DeepSpeech2 model for Chinese speech recognition, and the recognition effect is good. DeepSpeech2 is an end-to-end automatic speech recognition (ASR) engine based on PaddlePaddle.
Audio segmentation is added on the basis of short speech recognition. It uses voice activity detection (VAD) to detect silence.
- Deep Speech 2 Paper
[Deep Speech 2 : End-to-End Speech Recognition in English and Mandarin](http://proceedings.mlr.press/v48/amodei16.pdf)
#### Run example - SpeechRecognitionExampleL
After successful operation, the command line should see the following information:
```text
...
[INFO ] - 第1个分割音频, 得分: 99.28923, 识别结果: 近几年不但我用输给女儿压岁
音频均方根能量: -30.505535
[INFO ] - 第2个分割音频, 得分: 88.94682, 识别结果: 劝说清朋不要给女儿压岁钱玩改送压岁书
[INFO ] - 最终识别结果:,近几年不但我用输给女儿压岁,劝说清朋不要给女儿压岁钱玩改送压岁书
```
#### Parameter Settings
The setting of audio segmentation parameters will affect the accuracy of detection results. Therefore, please set the parameters reasonably.
padding_duration_ms300
frame_duration_ms30
####This example VAD only supports Mac & Linux, VAD Windows environment support, please refer to:
https://gitee.com/endlesshh/ttskit-java
### Open Source Algorithm
#### 1. The open source algorithm used by the SDK
- [PaddlePaddle-DeepSpeech](https://github.com/yeyupiaoling/PaddlePaddle-DeepSpeech)
#### 2. How to export the model?
- [how_to_create_paddlepaddle_model](http://docs.djl.ai/docs/paddlepaddle/how_to_create_paddlepaddle_model_zh.html)
- export_model.py
```text
import argparse
import functools
import paddle
from model_utils.model import DeepSpeech2Model
from utils.utility import add_arguments
parser = argparse.ArgumentParser(description=__doc__)
add_arg = functools.partial(add_arguments, argparser=parser)
add_arg('num_conv_layers', int, 2, "卷积层数量")
add_arg('num_rnn_layers', int, 3, "循环神经网络的数量")
add_arg('rnn_layer_size', int, 1024, "循环神经网络的大小")
add_arg('use_gpu', bool, False, "是否使用GPU加载模型")
add_arg('vocab_path', str, './dataset/zh_vocab.txt', "数据集的词汇表文件路径")
add_arg('resume_model', str, './models/param/50.pdparams', "恢复模型文件路径")
add_arg('save_model_path', str, './models/infer/', "保存导出的预测模型文件夹路径")
args = parser.parse_args()
# 是否使用GPU
place = paddle.CUDAPlace(0) if args.use_gpu else paddle.CPUPlace()
with open(args.vocab_path, 'r', encoding='utf-8') as f:
vocab_size = len(f.readlines())
# 获取DeepSpeech2模型并设置为预测
ds2_model = DeepSpeech2Model(vocab_size=vocab_size,
num_conv_layers=args.num_conv_layers,
num_rnn_layers=args.num_rnn_layers,
rnn_layer_size=args.rnn_layer_size,
resume_model=args.resume_model,
place=place)
ds2_model.export_model(model_path=args.save_model_path)
print('成功导出模型,模型保存在:%s' % args.save_model_path)
```
- export_model_1300.py
```text
import argparse
import functools
import paddle
from model_utils.model import DeepSpeech2Model
from utils.utility import add_arguments
parser = argparse.ArgumentParser(description=__doc__)
add_arg = functools.partial(add_arguments, argparser=parser)
add_arg('num_conv_layers', int, 2, "卷积层数量")
add_arg('num_rnn_layers', int, 3, "循环神经网络的数量")
add_arg('rnn_layer_size', int, 1024, "循环神经网络的大小")
add_arg('use_gpu', bool, False, "是否使用GPU加载模型")
add_arg('vocab_path', str, './models/DeepSpeech-1300/dataset/zh_vocab.txt', "数据集的词汇表文件路径")
add_arg('resume_model', str, './models/DeepSpeech-1300/models/step_final/params.pdparams', "恢复模型文件路径")
add_arg('save_model_path', str, './models/infer-1300/', "保存导出的预测模型文件夹路径")
args = parser.parse_args()
# 是否使用GPU
place = paddle.CUDAPlace(0) if args.use_gpu else paddle.CPUPlace()
with open(args.vocab_path, 'r', encoding='utf-8') as f:
vocab_size = len(f.readlines())
# 获取DeepSpeech2模型并设置为预测
ds2_model = DeepSpeech2Model(vocab_size=vocab_size,
num_conv_layers=args.num_conv_layers,
num_rnn_layers=args.num_rnn_layers,
rnn_layer_size=args.rnn_layer_size,
resume_model=args.resume_model,
place=place)
ds2_model.export_model(model_path=args.save_model_path)
print('成功导出模型,模型保存在:%s' % args.save_model_path)
```
### jlibrosa
https://github.com/Subtitle-Synchronizer/jlibrosa
https://github.com/Subtitle-Synchronizer/jlibrosa/blob/master/binaries/jlibrosa-1.1.8-SNAPSHOT-jar-with-dependencies.jar

View File

@ -1,196 +0,0 @@
### 官网:
[官网链接](http://www.aias.top/)
### 下载模型放置于models目录
- 链接: https://pan.baidu.com/s/1pGpTprOOukl7Kf0QdNma0w?pwd=syp9
### 语音识别ASR【长语音】
语音识别Automatic Speech Recognition是以语音为研究对象通过语音信号处理和模式识别让机器自动识别和理解人类口述的语。
语音识别技术就是让机器通过识别和理解过程把语音信号转变为相应的文本或命令的高技术。
语音识别是一门涉及面很广的交叉学科,它与声学、语音学、语言学、信息理论、模式识别理论以及神经生物学等学科都有非常密切的关系。
sdk基于DeepSpeech2模型实现中文语音识别识别效果不错。
DeepSpeech2是基于PaddlePaddle实现的端到端自动语音识别ASR引擎。
在短语音识别的基础上增加了音频分割。使用了语音活动检测(VAD)检测静音。
- Deep Speech 2 论文
[Deep Speech 2 : End-to-End Speech Recognition in English and Mandarin](http://proceedings.mlr.press/v48/amodei16.pdf)
#### 运行例子 - SpeechRecognitionExampleL
运行成功后,命令行应该看到下面的信息:
```text
...
[INFO ] - 第1个分割音频, 得分: 99.28923, 识别结果: 近几年不但我用输给女儿压岁
音频均方根能量: -30.505535
[INFO ] - 第2个分割音频, 得分: 88.94682, 识别结果: 劝说清朋不要给女儿压岁钱玩改送压岁书
[INFO ] - 最终识别结果:,近几年不但我用输给女儿压岁,劝说清朋不要给女儿压岁钱玩改送压岁书
```
#### 1. 采样率
采样率(samplerate)为 16000 Hz表示每秒 16000 个采样点
##### MP3
mp3 每帧均为1152个字节(一个字节对应一个采样点) 则:
每帧播放时长 = 1152 * 1000 / sample_rate
例如sample_rate = 44100HZ 时,
计算出的时长为: 1152 * 1000 / 44100 = 26.122ms
这就是经常听到的mp3每帧播放时间固定为26ms的由来。
#### 2. 量化精度(位宽)
量化精度又叫量化深度上图中每一个红色的采样点都需要用一个数值来表示大小这个数值的数据类型大小可以是4bit、8bit、16bit、32bit等等位数越多表示得就越精细声音质量自然就越好当然数据量也会成倍增大。
常见的位宽是8bit 或者 16bit
#### 3. 声道数channels
由于音频的采集和播放是可以叠加的,因此,可以同时从多个音频源采集声音,并分别输出到不同的扬声器,故声道数一般表示声音录制时的音源数量或回放时相应的扬声器数量。
单声道Mono和双声道Stereo比较常见顾名思义前者的声道数为1后者为2
#### 4. 音频帧frame
是用于测量显示帧数的度量。所谓的测量单位为每秒显示帧数(Frames per Second简称FPS或“赫兹”Hz
音频跟视频很不一样,视频每一帧就是一张图像,而从上面的正玄波可以看出,音频数据是流式的,本身没有明确的一帧帧的概念,在实际的应用中,为了音频算法处理/传输的方便一般约定俗成取2.5ms~60ms为单位的数据量为一帧音频。
这个时间被称之为“采样时间”,其长度没有特别的标准,它是根据具体应用的需求来决定的,我们可以计算一下一帧音频帧的大小:
假设某通道的音频信号是采样率为8kHz位宽为16bit20ms一帧双通道则一帧音频数据的大小为
int size = 8000 x 16bit x 0.02s x 2 = 5120 bit = 640 byte
#### 5. 常见的音频编码方式有哪些?
模拟的音频信号转换为数字信号需要经过采样和量化量化的过程被称之为编码根据不同的量化策略产生了许多不同的编码方式常见的编码方式有PCM 和 ADPCM这些数据代表着无损的原始数字音频信号添加一些文件头信息就可以存储为WAV文件了它是一种由微软和IBM联合开发的用于音频数字存储的标准可以很容易地被解析和播放。
#### 6. 常见的音频压缩格式有哪些?
首先简单介绍一下音频数据压缩的最基本的原理:因为有冗余信息,所以可以压缩。
1 频谱掩蔽效应: 人耳所能察觉的声音信号的频率范围为20Hz20KHz在这个频率范围以外的音频信号属于冗余信号。
2 时域掩蔽效应: 当强音信号和弱音信号同时出现时,弱信号会听不到,因此,弱音信号也属于冗余信号。
下面简单列出常见的音频压缩格式:
MP3AACOGGWMAOpusFLACAPEm4aAMR等等。
#### 参数设置
音频分割参数的设置,会影响检测结果的精度。所以请合理设置参数。
padding_duration_ms300
frame_duration_ms30
#### 帮助
##### 共享库文件:
- linux: vad4j_sdk/lib/linux
- -libfvad.so
- -libwebrtcvadwrapper.so
- windows: vad4j_sdk/lib/windows
- -libfvad.dll
- -libwebrtcvadwrapper.dll
##### linux/mac 设置环境变量
- 共享库文件需添加到 java.library.path
LD_LIBRARY_PATH to /path/to/shared/libraries:$LD_LIBRARY_PATH.
##### windows 设置环境变量
- 共享库文件需添加到 PATH
##### 音频数据
输入的数据需是16-bit PCM audio数据详细信息请参考下面的链接
https://github.com/jitsi/jitsi-webrtc-vad-wrapper/blob/master/readme.md
### 开源算法
#### 1. sdk使用的开源算法
- [PaddlePaddle-DeepSpeech](https://github.com/yeyupiaoling/PaddlePaddle-DeepSpeech)
#### 2. 模型如何导出 ?
- [how_to_create_paddlepaddle_model](http://docs.djl.ai/docs/paddlepaddle/how_to_create_paddlepaddle_model_zh.html)
- export_model.py
```text
import argparse
import functools
import paddle
from model_utils.model import DeepSpeech2Model
from utils.utility import add_arguments
parser = argparse.ArgumentParser(description=__doc__)
add_arg = functools.partial(add_arguments, argparser=parser)
add_arg('num_conv_layers', int, 2, "卷积层数量")
add_arg('num_rnn_layers', int, 3, "循环神经网络的数量")
add_arg('rnn_layer_size', int, 1024, "循环神经网络的大小")
add_arg('use_gpu', bool, False, "是否使用GPU加载模型")
add_arg('vocab_path', str, './dataset/zh_vocab.txt', "数据集的词汇表文件路径")
add_arg('resume_model', str, './models/param/50.pdparams', "恢复模型文件路径")
add_arg('save_model_path', str, './models/infer/', "保存导出的预测模型文件夹路径")
args = parser.parse_args()
# 是否使用GPU
place = paddle.CUDAPlace(0) if args.use_gpu else paddle.CPUPlace()
with open(args.vocab_path, 'r', encoding='utf-8') as f:
vocab_size = len(f.readlines())
# 获取DeepSpeech2模型并设置为预测
ds2_model = DeepSpeech2Model(vocab_size=vocab_size,
num_conv_layers=args.num_conv_layers,
num_rnn_layers=args.num_rnn_layers,
rnn_layer_size=args.rnn_layer_size,
resume_model=args.resume_model,
place=place)
ds2_model.export_model(model_path=args.save_model_path)
print('成功导出模型,模型保存在:%s' % args.save_model_path)
```
- export_model_1300.py
```text
import argparse
import functools
import paddle
from model_utils.model import DeepSpeech2Model
from utils.utility import add_arguments
parser = argparse.ArgumentParser(description=__doc__)
add_arg = functools.partial(add_arguments, argparser=parser)
add_arg('num_conv_layers', int, 2, "卷积层数量")
add_arg('num_rnn_layers', int, 3, "循环神经网络的数量")
add_arg('rnn_layer_size', int, 1024, "循环神经网络的大小")
add_arg('use_gpu', bool, False, "是否使用GPU加载模型")
add_arg('vocab_path', str, './models/DeepSpeech-1300/dataset/zh_vocab.txt', "数据集的词汇表文件路径")
add_arg('resume_model', str, './models/DeepSpeech-1300/models/step_final/params.pdparams', "恢复模型文件路径")
add_arg('save_model_path', str, './models/infer-1300/', "保存导出的预测模型文件夹路径")
args = parser.parse_args()
# 是否使用GPU
place = paddle.CUDAPlace(0) if args.use_gpu else paddle.CPUPlace()
with open(args.vocab_path, 'r', encoding='utf-8') as f:
vocab_size = len(f.readlines())
# 获取DeepSpeech2模型并设置为预测
ds2_model = DeepSpeech2Model(vocab_size=vocab_size,
num_conv_layers=args.num_conv_layers,
num_rnn_layers=args.num_rnn_layers,
rnn_layer_size=args.rnn_layer_size,
resume_model=args.resume_model,
place=place)
ds2_model.export_model(model_path=args.save_model_path)
print('成功导出模型,模型保存在:%s' % args.save_model_path)
```
### 其它帮助信息
http://aias.top/guides.html
#### 帮助文档:
- http://aias.top/guides.html
- 1.性能优化常见问题:
- http://aias.top/AIAS/guides/performance.html
- 2.引擎配置包括CPUGPU在线自动加载及本地配置:
- http://aias.top/AIAS/guides/engine_config.html
- 3.模型加载方式(在线自动加载,及本地配置):
- http://aias.top/AIAS/guides/load_model.html
- 4.Windows环境常见问题:
- http://aias.top/AIAS/guides/windows.html
### Git地址
[Github链接](https://github.com/mymagicpower/AIAS)
[Gitee链接](https://gitee.com/mymagicpower/AIAS)
### jlibrosa 地址:
https://github.com/Subtitle-Synchronizer/jlibrosa
https://github.com/Subtitle-Synchronizer/jlibrosa/blob/master/binaries/jlibrosa-1.1.8-SNAPSHOT-jar-with-dependencies.jar

View File

@ -1,243 +0,0 @@
<?xml version="1.0" encoding="UTF-8"?>
<module org.jetbrains.idea.maven.project.MavenProjectsManager.isMavenModule="true" type="JAVA_MODULE" version="4">
<component name="CheckStyle-IDEA-Module">
<option name="configuration">
<map />
</option>
</component>
<component name="NewModuleRootManager" LANGUAGE_LEVEL="JDK_1_8">
<output url="file://$MODULE_DIR$/target/classes" />
<output-test url="file://$MODULE_DIR$/target/test-classes" />
<content url="file://$MODULE_DIR$">
<sourceFolder url="file://$MODULE_DIR$/src/main/java" isTestSource="false" />
<sourceFolder url="file://$MODULE_DIR$/src/main/resources" type="java-resource" />
<sourceFolder url="file://$MODULE_DIR$/src/test/java" isTestSource="true" />
<sourceFolder url="file://$MODULE_DIR$/src/test/resources" type="java-test-resource" />
<excludeFolder url="file://$MODULE_DIR$/target" />
</content>
<orderEntry type="inheritedJdk" />
<orderEntry type="sourceFolder" forTests="false" />
<orderEntry type="library" name="aias-image-text-search-lib-0.1.0" level="project" />
<orderEntry type="library" name="Maven: com.google.code.gson:gson:2.8.6" level="project" />
<orderEntry type="library" name="Maven: commons-cli:commons-cli:1.4" level="project" />
<orderEntry type="library" name="Maven: org.apache.logging.log4j:log4j-slf4j-impl:2.15.0" level="project" />
<orderEntry type="library" name="Maven: org.slf4j:slf4j-api:1.7.25" level="project" />
<orderEntry type="library" name="Maven: org.apache.logging.log4j:log4j-api:2.15.0" level="project" />
<orderEntry type="library" scope="RUNTIME" name="Maven: org.apache.logging.log4j:log4j-core:2.15.0" level="project" />
<orderEntry type="library" name="Maven: ai.djl:api:0.14.0" level="project" />
<orderEntry type="library" name="Maven: net.java.dev.jna:jna:5.9.0" level="project" />
<orderEntry type="library" name="Maven: org.apache.commons:commons-compress:1.21" level="project" />
<orderEntry type="library" name="Maven: ai.djl:basicdataset:0.14.0" level="project" />
<orderEntry type="library" name="Maven: org.apache.commons:commons-csv:1.8" level="project" />
<orderEntry type="library" name="Maven: ai.djl:model-zoo:0.14.0" level="project" />
<orderEntry type="library" name="Maven: ai.djl.paddlepaddle:paddlepaddle-engine:0.14.0" level="project" />
<orderEntry type="library" name="Maven: ai.djl.paddlepaddle:paddlepaddle-native-auto:2.0.2" level="project" />
<orderEntry type="library" name="Maven: ai.djl.paddlepaddle:paddlepaddle-model-zoo:0.14.0" level="project" />
<orderEntry type="library" name="Maven: ai.djl.pytorch:pytorch-model-zoo:0.14.0" level="project" />
<orderEntry type="library" name="Maven: ai.djl.pytorch:pytorch-engine:0.14.0" level="project" />
<orderEntry type="library" name="Maven: ai.djl.pytorch:pytorch-native-auto:1.9.1" level="project" />
<orderEntry type="library" name="Maven: com.github.wendykierp:JTransforms:3.1" level="project" />
<orderEntry type="library" name="Maven: org.apache.commons:commons-math3:3.5" level="project" />
<orderEntry type="library" name="Maven: pl.edu.icm:JLargeArrays:1.5" level="project" />
<orderEntry type="library" name="Maven: org.bytedeco:javacv-platform:1.5.4" level="project" />
<orderEntry type="library" name="Maven: org.bytedeco:javacv:1.5.4" level="project" />
<orderEntry type="library" name="Maven: org.bytedeco:javacpp:1.5.4" level="project" />
<orderEntry type="library" name="Maven: org.bytedeco:openblas:0.3.10-1.5.4" level="project" />
<orderEntry type="library" name="Maven: org.bytedeco:opencv:4.4.0-1.5.4" level="project" />
<orderEntry type="library" name="Maven: org.bytedeco:ffmpeg:4.3.1-1.5.4" level="project" />
<orderEntry type="library" name="Maven: org.bytedeco:flycapture:2.13.3.31-1.5.4" level="project" />
<orderEntry type="library" name="Maven: org.bytedeco:libdc1394:2.2.6-1.5.4" level="project" />
<orderEntry type="library" name="Maven: org.bytedeco:libfreenect:0.5.7-1.5.4" level="project" />
<orderEntry type="library" name="Maven: org.bytedeco:libfreenect2:0.2.0-1.5.4" level="project" />
<orderEntry type="library" name="Maven: org.bytedeco:librealsense:1.12.4-1.5.4" level="project" />
<orderEntry type="library" name="Maven: org.bytedeco:librealsense2:2.29.0-1.5.4" level="project" />
<orderEntry type="library" name="Maven: org.bytedeco:videoinput:0.200-1.5.4" level="project" />
<orderEntry type="library" name="Maven: org.bytedeco:artoolkitplus:2.3.1-1.5.4" level="project" />
<orderEntry type="library" name="Maven: org.bytedeco:flandmark:1.07-1.5.4" level="project" />
<orderEntry type="library" name="Maven: org.bytedeco:leptonica:1.80.0-1.5.4" level="project" />
<orderEntry type="library" name="Maven: org.bytedeco:tesseract:4.1.1-1.5.4" level="project" />
<orderEntry type="library" name="Maven: org.bytedeco:openblas-platform:0.3.10-1.5.4" level="project" />
<orderEntry type="library" name="Maven: org.bytedeco:javacpp-platform:1.5.4" level="project" />
<orderEntry type="library" name="Maven: org.bytedeco:javacpp:android-arm:1.5.4" level="project" />
<orderEntry type="library" name="Maven: org.bytedeco:javacpp:android-arm64:1.5.4" level="project" />
<orderEntry type="library" name="Maven: org.bytedeco:javacpp:android-x86:1.5.4" level="project" />
<orderEntry type="library" name="Maven: org.bytedeco:javacpp:android-x86_64:1.5.4" level="project" />
<orderEntry type="library" name="Maven: org.bytedeco:javacpp:ios-arm64:1.5.4" level="project" />
<orderEntry type="library" name="Maven: org.bytedeco:javacpp:ios-x86_64:1.5.4" level="project" />
<orderEntry type="library" name="Maven: org.bytedeco:javacpp:linux-armhf:1.5.4" level="project" />
<orderEntry type="library" name="Maven: org.bytedeco:javacpp:linux-arm64:1.5.4" level="project" />
<orderEntry type="library" name="Maven: org.bytedeco:javacpp:linux-ppc64le:1.5.4" level="project" />
<orderEntry type="library" name="Maven: org.bytedeco:javacpp:linux-x86:1.5.4" level="project" />
<orderEntry type="library" name="Maven: org.bytedeco:javacpp:linux-x86_64:1.5.4" level="project" />
<orderEntry type="library" name="Maven: org.bytedeco:javacpp:macosx-x86_64:1.5.4" level="project" />
<orderEntry type="library" name="Maven: org.bytedeco:javacpp:windows-x86:1.5.4" level="project" />
<orderEntry type="library" name="Maven: org.bytedeco:javacpp:windows-x86_64:1.5.4" level="project" />
<orderEntry type="library" name="Maven: org.bytedeco:openblas:android-arm:0.3.10-1.5.4" level="project" />
<orderEntry type="library" name="Maven: org.bytedeco:openblas:android-arm64:0.3.10-1.5.4" level="project" />
<orderEntry type="library" name="Maven: org.bytedeco:openblas:android-x86:0.3.10-1.5.4" level="project" />
<orderEntry type="library" name="Maven: org.bytedeco:openblas:android-x86_64:0.3.10-1.5.4" level="project" />
<orderEntry type="library" name="Maven: org.bytedeco:openblas:ios-arm64:0.3.10-1.5.4" level="project" />
<orderEntry type="library" name="Maven: org.bytedeco:openblas:ios-x86_64:0.3.10-1.5.4" level="project" />
<orderEntry type="library" name="Maven: org.bytedeco:openblas:linux-x86:0.3.10-1.5.4" level="project" />
<orderEntry type="library" name="Maven: org.bytedeco:openblas:linux-x86_64:0.3.10-1.5.4" level="project" />
<orderEntry type="library" name="Maven: org.bytedeco:openblas:linux-armhf:0.3.10-1.5.4" level="project" />
<orderEntry type="library" name="Maven: org.bytedeco:openblas:linux-arm64:0.3.10-1.5.4" level="project" />
<orderEntry type="library" name="Maven: org.bytedeco:openblas:linux-ppc64le:0.3.10-1.5.4" level="project" />
<orderEntry type="library" name="Maven: org.bytedeco:openblas:macosx-x86_64:0.3.10-1.5.4" level="project" />
<orderEntry type="library" name="Maven: org.bytedeco:openblas:windows-x86:0.3.10-1.5.4" level="project" />
<orderEntry type="library" name="Maven: org.bytedeco:openblas:windows-x86_64:0.3.10-1.5.4" level="project" />
<orderEntry type="library" name="Maven: org.bytedeco:opencv-platform:4.4.0-1.5.4" level="project" />
<orderEntry type="library" name="Maven: org.bytedeco:opencv:android-arm:4.4.0-1.5.4" level="project" />
<orderEntry type="library" name="Maven: org.bytedeco:opencv:android-arm64:4.4.0-1.5.4" level="project" />
<orderEntry type="library" name="Maven: org.bytedeco:opencv:android-x86:4.4.0-1.5.4" level="project" />
<orderEntry type="library" name="Maven: org.bytedeco:opencv:android-x86_64:4.4.0-1.5.4" level="project" />
<orderEntry type="library" name="Maven: org.bytedeco:opencv:ios-arm64:4.4.0-1.5.4" level="project" />
<orderEntry type="library" name="Maven: org.bytedeco:opencv:ios-x86_64:4.4.0-1.5.4" level="project" />
<orderEntry type="library" name="Maven: org.bytedeco:opencv:linux-x86:4.4.0-1.5.4" level="project" />
<orderEntry type="library" name="Maven: org.bytedeco:opencv:linux-x86_64:4.4.0-1.5.4" level="project" />
<orderEntry type="library" name="Maven: org.bytedeco:opencv:linux-armhf:4.4.0-1.5.4" level="project" />
<orderEntry type="library" name="Maven: org.bytedeco:opencv:linux-arm64:4.4.0-1.5.4" level="project" />
<orderEntry type="library" name="Maven: org.bytedeco:opencv:linux-ppc64le:4.4.0-1.5.4" level="project" />
<orderEntry type="library" name="Maven: org.bytedeco:opencv:macosx-x86_64:4.4.0-1.5.4" level="project" />
<orderEntry type="library" name="Maven: org.bytedeco:opencv:windows-x86:4.4.0-1.5.4" level="project" />
<orderEntry type="library" name="Maven: org.bytedeco:opencv:windows-x86_64:4.4.0-1.5.4" level="project" />
<orderEntry type="library" name="Maven: org.bytedeco:ffmpeg-platform:4.3.1-1.5.4" level="project" />
<orderEntry type="library" name="Maven: org.bytedeco:ffmpeg:android-arm:4.3.1-1.5.4" level="project" />
<orderEntry type="library" name="Maven: org.bytedeco:ffmpeg:android-arm64:4.3.1-1.5.4" level="project" />
<orderEntry type="library" name="Maven: org.bytedeco:ffmpeg:android-x86:4.3.1-1.5.4" level="project" />
<orderEntry type="library" name="Maven: org.bytedeco:ffmpeg:android-x86_64:4.3.1-1.5.4" level="project" />
<orderEntry type="library" name="Maven: org.bytedeco:ffmpeg:linux-x86:4.3.1-1.5.4" level="project" />
<orderEntry type="library" name="Maven: org.bytedeco:ffmpeg:linux-x86_64:4.3.1-1.5.4" level="project" />
<orderEntry type="library" name="Maven: org.bytedeco:ffmpeg:linux-armhf:4.3.1-1.5.4" level="project" />
<orderEntry type="library" name="Maven: org.bytedeco:ffmpeg:linux-arm64:4.3.1-1.5.4" level="project" />
<orderEntry type="library" name="Maven: org.bytedeco:ffmpeg:linux-ppc64le:4.3.1-1.5.4" level="project" />
<orderEntry type="library" name="Maven: org.bytedeco:ffmpeg:macosx-x86_64:4.3.1-1.5.4" level="project" />
<orderEntry type="library" name="Maven: org.bytedeco:ffmpeg:windows-x86:4.3.1-1.5.4" level="project" />
<orderEntry type="library" name="Maven: org.bytedeco:ffmpeg:windows-x86_64:4.3.1-1.5.4" level="project" />
<orderEntry type="library" name="Maven: org.bytedeco:flycapture-platform:2.13.3.31-1.5.4" level="project" />
<orderEntry type="library" name="Maven: org.bytedeco:flycapture:linux-x86:2.13.3.31-1.5.4" level="project" />
<orderEntry type="library" name="Maven: org.bytedeco:flycapture:linux-x86_64:2.13.3.31-1.5.4" level="project" />
<orderEntry type="library" name="Maven: org.bytedeco:flycapture:linux-armhf:2.13.3.31-1.5.4" level="project" />
<orderEntry type="library" name="Maven: org.bytedeco:flycapture:linux-arm64:2.13.3.31-1.5.4" level="project" />
<orderEntry type="library" name="Maven: org.bytedeco:flycapture:windows-x86:2.13.3.31-1.5.4" level="project" />
<orderEntry type="library" name="Maven: org.bytedeco:flycapture:windows-x86_64:2.13.3.31-1.5.4" level="project" />
<orderEntry type="library" name="Maven: org.bytedeco:libdc1394-platform:2.2.6-1.5.4" level="project" />
<orderEntry type="library" name="Maven: org.bytedeco:libdc1394:linux-x86:2.2.6-1.5.4" level="project" />
<orderEntry type="library" name="Maven: org.bytedeco:libdc1394:linux-x86_64:2.2.6-1.5.4" level="project" />
<orderEntry type="library" name="Maven: org.bytedeco:libdc1394:linux-armhf:2.2.6-1.5.4" level="project" />
<orderEntry type="library" name="Maven: org.bytedeco:libdc1394:linux-arm64:2.2.6-1.5.4" level="project" />
<orderEntry type="library" name="Maven: org.bytedeco:libdc1394:linux-ppc64le:2.2.6-1.5.4" level="project" />
<orderEntry type="library" name="Maven: org.bytedeco:libdc1394:macosx-x86_64:2.2.6-1.5.4" level="project" />
<orderEntry type="library" name="Maven: org.bytedeco:libdc1394:windows-x86:2.2.6-1.5.4" level="project" />
<orderEntry type="library" name="Maven: org.bytedeco:libdc1394:windows-x86_64:2.2.6-1.5.4" level="project" />
<orderEntry type="library" name="Maven: org.bytedeco:libfreenect-platform:0.5.7-1.5.4" level="project" />
<orderEntry type="library" name="Maven: org.bytedeco:libfreenect:linux-x86:0.5.7-1.5.4" level="project" />
<orderEntry type="library" name="Maven: org.bytedeco:libfreenect:linux-x86_64:0.5.7-1.5.4" level="project" />
<orderEntry type="library" name="Maven: org.bytedeco:libfreenect:linux-armhf:0.5.7-1.5.4" level="project" />
<orderEntry type="library" name="Maven: org.bytedeco:libfreenect:linux-arm64:0.5.7-1.5.4" level="project" />
<orderEntry type="library" name="Maven: org.bytedeco:libfreenect:linux-ppc64le:0.5.7-1.5.4" level="project" />
<orderEntry type="library" name="Maven: org.bytedeco:libfreenect:macosx-x86_64:0.5.7-1.5.4" level="project" />
<orderEntry type="library" name="Maven: org.bytedeco:libfreenect:windows-x86:0.5.7-1.5.4" level="project" />
<orderEntry type="library" name="Maven: org.bytedeco:libfreenect:windows-x86_64:0.5.7-1.5.4" level="project" />
<orderEntry type="library" name="Maven: org.bytedeco:libfreenect2-platform:0.2.0-1.5.4" level="project" />
<orderEntry type="library" name="Maven: org.bytedeco:libfreenect2:linux-x86:0.2.0-1.5.4" level="project" />
<orderEntry type="library" name="Maven: org.bytedeco:libfreenect2:linux-x86_64:0.2.0-1.5.4" level="project" />
<orderEntry type="library" name="Maven: org.bytedeco:libfreenect2:macosx-x86_64:0.2.0-1.5.4" level="project" />
<orderEntry type="library" name="Maven: org.bytedeco:libfreenect2:windows-x86_64:0.2.0-1.5.4" level="project" />
<orderEntry type="library" name="Maven: org.bytedeco:librealsense-platform:1.12.4-1.5.4" level="project" />
<orderEntry type="library" name="Maven: org.bytedeco:librealsense:linux-x86:1.12.4-1.5.4" level="project" />
<orderEntry type="library" name="Maven: org.bytedeco:librealsense:linux-x86_64:1.12.4-1.5.4" level="project" />
<orderEntry type="library" name="Maven: org.bytedeco:librealsense:macosx-x86_64:1.12.4-1.5.4" level="project" />
<orderEntry type="library" name="Maven: org.bytedeco:librealsense:windows-x86:1.12.4-1.5.4" level="project" />
<orderEntry type="library" name="Maven: org.bytedeco:librealsense:windows-x86_64:1.12.4-1.5.4" level="project" />
<orderEntry type="library" name="Maven: org.bytedeco:librealsense2-platform:2.29.0-1.5.4" level="project" />
<orderEntry type="library" name="Maven: org.bytedeco:librealsense2:linux-x86:2.29.0-1.5.4" level="project" />
<orderEntry type="library" name="Maven: org.bytedeco:librealsense2:linux-x86_64:2.29.0-1.5.4" level="project" />
<orderEntry type="library" name="Maven: org.bytedeco:librealsense2:macosx-x86_64:2.29.0-1.5.4" level="project" />
<orderEntry type="library" name="Maven: org.bytedeco:librealsense2:windows-x86:2.29.0-1.5.4" level="project" />
<orderEntry type="library" name="Maven: org.bytedeco:librealsense2:windows-x86_64:2.29.0-1.5.4" level="project" />
<orderEntry type="library" name="Maven: org.bytedeco:videoinput-platform:0.200-1.5.4" level="project" />
<orderEntry type="library" name="Maven: org.bytedeco:videoinput:windows-x86:0.200-1.5.4" level="project" />
<orderEntry type="library" name="Maven: org.bytedeco:videoinput:windows-x86_64:0.200-1.5.4" level="project" />
<orderEntry type="library" name="Maven: org.bytedeco:artoolkitplus-platform:2.3.1-1.5.4" level="project" />
<orderEntry type="library" name="Maven: org.bytedeco:artoolkitplus:android-arm:2.3.1-1.5.4" level="project" />
<orderEntry type="library" name="Maven: org.bytedeco:artoolkitplus:android-arm64:2.3.1-1.5.4" level="project" />
<orderEntry type="library" name="Maven: org.bytedeco:artoolkitplus:android-x86:2.3.1-1.5.4" level="project" />
<orderEntry type="library" name="Maven: org.bytedeco:artoolkitplus:android-x86_64:2.3.1-1.5.4" level="project" />
<orderEntry type="library" name="Maven: org.bytedeco:artoolkitplus:linux-x86:2.3.1-1.5.4" level="project" />
<orderEntry type="library" name="Maven: org.bytedeco:artoolkitplus:linux-x86_64:2.3.1-1.5.4" level="project" />
<orderEntry type="library" name="Maven: org.bytedeco:artoolkitplus:linux-armhf:2.3.1-1.5.4" level="project" />
<orderEntry type="library" name="Maven: org.bytedeco:artoolkitplus:linux-arm64:2.3.1-1.5.4" level="project" />
<orderEntry type="library" name="Maven: org.bytedeco:artoolkitplus:linux-ppc64le:2.3.1-1.5.4" level="project" />
<orderEntry type="library" name="Maven: org.bytedeco:artoolkitplus:macosx-x86_64:2.3.1-1.5.4" level="project" />
<orderEntry type="library" name="Maven: org.bytedeco:artoolkitplus:windows-x86:2.3.1-1.5.4" level="project" />
<orderEntry type="library" name="Maven: org.bytedeco:artoolkitplus:windows-x86_64:2.3.1-1.5.4" level="project" />
<orderEntry type="library" name="Maven: org.bytedeco:flandmark-platform:1.07-1.5.4" level="project" />
<orderEntry type="library" name="Maven: org.bytedeco:flandmark:android-arm:1.07-1.5.4" level="project" />
<orderEntry type="library" name="Maven: org.bytedeco:flandmark:android-arm64:1.07-1.5.4" level="project" />
<orderEntry type="library" name="Maven: org.bytedeco:flandmark:android-x86:1.07-1.5.4" level="project" />
<orderEntry type="library" name="Maven: org.bytedeco:flandmark:android-x86_64:1.07-1.5.4" level="project" />
<orderEntry type="library" name="Maven: org.bytedeco:flandmark:linux-x86:1.07-1.5.4" level="project" />
<orderEntry type="library" name="Maven: org.bytedeco:flandmark:linux-x86_64:1.07-1.5.4" level="project" />
<orderEntry type="library" name="Maven: org.bytedeco:flandmark:linux-armhf:1.07-1.5.4" level="project" />
<orderEntry type="library" name="Maven: org.bytedeco:flandmark:linux-arm64:1.07-1.5.4" level="project" />
<orderEntry type="library" name="Maven: org.bytedeco:flandmark:linux-ppc64le:1.07-1.5.4" level="project" />
<orderEntry type="library" name="Maven: org.bytedeco:flandmark:macosx-x86_64:1.07-1.5.4" level="project" />
<orderEntry type="library" name="Maven: org.bytedeco:flandmark:windows-x86:1.07-1.5.4" level="project" />
<orderEntry type="library" name="Maven: org.bytedeco:flandmark:windows-x86_64:1.07-1.5.4" level="project" />
<orderEntry type="library" name="Maven: org.bytedeco:leptonica-platform:1.80.0-1.5.4" level="project" />
<orderEntry type="library" name="Maven: org.bytedeco:leptonica:android-arm:1.80.0-1.5.4" level="project" />
<orderEntry type="library" name="Maven: org.bytedeco:leptonica:android-arm64:1.80.0-1.5.4" level="project" />
<orderEntry type="library" name="Maven: org.bytedeco:leptonica:android-x86:1.80.0-1.5.4" level="project" />
<orderEntry type="library" name="Maven: org.bytedeco:leptonica:android-x86_64:1.80.0-1.5.4" level="project" />
<orderEntry type="library" name="Maven: org.bytedeco:leptonica:linux-x86:1.80.0-1.5.4" level="project" />
<orderEntry type="library" name="Maven: org.bytedeco:leptonica:linux-x86_64:1.80.0-1.5.4" level="project" />
<orderEntry type="library" name="Maven: org.bytedeco:leptonica:linux-armhf:1.80.0-1.5.4" level="project" />
<orderEntry type="library" name="Maven: org.bytedeco:leptonica:linux-arm64:1.80.0-1.5.4" level="project" />
<orderEntry type="library" name="Maven: org.bytedeco:leptonica:linux-ppc64le:1.80.0-1.5.4" level="project" />
<orderEntry type="library" name="Maven: org.bytedeco:leptonica:macosx-x86_64:1.80.0-1.5.4" level="project" />
<orderEntry type="library" name="Maven: org.bytedeco:leptonica:windows-x86:1.80.0-1.5.4" level="project" />
<orderEntry type="library" name="Maven: org.bytedeco:leptonica:windows-x86_64:1.80.0-1.5.4" level="project" />
<orderEntry type="library" name="Maven: org.bytedeco:tesseract-platform:4.1.1-1.5.4" level="project" />
<orderEntry type="library" name="Maven: org.bytedeco:tesseract:android-arm:4.1.1-1.5.4" level="project" />
<orderEntry type="library" name="Maven: org.bytedeco:tesseract:android-arm64:4.1.1-1.5.4" level="project" />
<orderEntry type="library" name="Maven: org.bytedeco:tesseract:android-x86:4.1.1-1.5.4" level="project" />
<orderEntry type="library" name="Maven: org.bytedeco:tesseract:android-x86_64:4.1.1-1.5.4" level="project" />
<orderEntry type="library" name="Maven: org.bytedeco:tesseract:linux-x86:4.1.1-1.5.4" level="project" />
<orderEntry type="library" name="Maven: org.bytedeco:tesseract:linux-x86_64:4.1.1-1.5.4" level="project" />
<orderEntry type="library" name="Maven: org.bytedeco:tesseract:linux-armhf:4.1.1-1.5.4" level="project" />
<orderEntry type="library" name="Maven: org.bytedeco:tesseract:linux-arm64:4.1.1-1.5.4" level="project" />
<orderEntry type="library" name="Maven: org.bytedeco:tesseract:linux-ppc64le:4.1.1-1.5.4" level="project" />
<orderEntry type="library" name="Maven: org.bytedeco:tesseract:macosx-x86_64:4.1.1-1.5.4" level="project" />
<orderEntry type="library" name="Maven: org.bytedeco:tesseract:windows-x86:4.1.1-1.5.4" level="project" />
<orderEntry type="library" name="Maven: org.bytedeco:tesseract:windows-x86_64:4.1.1-1.5.4" level="project" />
<orderEntry type="library" name="Maven: org.aspectj:aspectjweaver:1.8.10" level="project" />
<orderEntry type="library" name="Maven: com.google.guava:guava:21.0" level="project" />
<orderEntry type="module-library">
<library name="Maven: jlibrosa:jlibrosa:1.1.8-SNAPSHOT">
<CLASSES>
<root url="jar://$MODULE_DIR$/lib/jlibrosa-1.1.8-SNAPSHOT.jar!/" />
</CLASSES>
<JAVADOC />
<SOURCES />
</library>
</orderEntry>
<orderEntry type="library" name="Maven: com.googlecode.soundlibs:tritonus-share:0.3.7.4" level="project" />
<orderEntry type="library" name="Maven: com.googlecode.soundlibs:tritonus-all:0.3.7.2" level="project" />
<orderEntry type="library" name="Maven: com.googlecode.soundlibs:jlayer:1.0.1.4" level="project" />
<orderEntry type="library" name="Maven: junit:junit:3.8.2" level="project" />
<orderEntry type="library" name="Maven: com.googlecode.soundlibs:mp3spi:1.9.5.4" level="project" />
<orderEntry type="library" name="Maven: com.googlecode.soundlibs:jorbis:0.0.17.4" level="project" />
<orderEntry type="library" name="Maven: com.googlecode.soundlibs:vorbisspi:1.0.3.3" level="project" />
<orderEntry type="library" name="Maven: com.googlecode.soundlibs:basicplayer:3.0.0.0" level="project" />
<orderEntry type="library" name="Maven: fr.delthas:javamp3:1.0.1" level="project" />
<orderEntry type="library" name="Maven: com.orctom:vad4j:1.0" level="project" />
<orderEntry type="library" name="Maven: org.apache.commons:commons-lang3:3.4" level="project" />
<orderEntry type="library" name="Maven: com.google.protobuf:protobuf-java:3.8.0" level="project" />
<orderEntry type="library" scope="PROVIDED" name="Maven: org.projectlombok:lombok:1.18.18" level="project" />
<orderEntry type="library" scope="TEST" name="Maven: org.testng:testng:6.8.1" level="project" />
<orderEntry type="library" scope="TEST" name="Maven: org.beanshell:bsh:2.0b4" level="project" />
<orderEntry type="library" scope="TEST" name="Maven: com.beust:jcommander:1.27" level="project" />
<orderEntry type="library" scope="TEST" name="Maven: org.yaml:snakeyaml:1.6" level="project" />
</component>
</module>

View File

@ -1,60 +0,0 @@
#### Common Model Loading Methods
1. How to load a model online via URL?
```text
# Use optModelUrls to load a model via URL
Criteria<Image, DetectedObjects> criteria =
Criteria.builder()
.optEngine("PaddlePaddle")
.setTypes(Image.class, DetectedObjects.class)
.optModelUrls("https://aias-home.oss-cn-beijing.aliyuncs.com/models/ocr_models/ch_ppocr_mobile_v2.0_det_infer.zip")
.optTranslator(new PpWordDetectionTranslator(new ConcurrentHashMap<String, String>()))
.optProgress(new ProgressBar())
.build();
```
2. How to load a model locally?
```text
# Use optModelPath to load a model from a zipped file
Path modelPath = Paths.get("src/test/resources/ch_ppocr_mobile_v2.0_det_infer.zip");
Criteria<Image, DetectedObjects> criteria =
Criteria.builder()
.optEngine("PaddlePaddle")
.setTypes(Image.class, DetectedObjects.class)
.optModelPath(modelPath)
.optTranslator(new PpWordDetectionTranslator(new ConcurrentHashMap<String, String>()))
.optProgress(new ProgressBar())
.build();
# Use optModelPath to load a model from a local directory
Path modelPath = Paths.get("src/test/resources/ch_ppocr_mobile_v2.0_det_infer/");
Criteria<Image, DetectedObjects> criteria =
Criteria.builder()
.optEngine("PaddlePaddle")
.setTypes(Image.class, DetectedObjects.class)
.optModelPath(modelPath)
.optTranslator(new PpWordDetectionTranslator(new ConcurrentHashMap<String, String>()))
.optProgress(new ProgressBar())
.build();
```
3. How to load a model packed into a JAR file?
```text
# Use optModelUrls to load a model
# Assuming the model is located in the JAR file at:
# BOOT-INF/classes/ch_ppocr_mobile_v2.0_det_infer.zip
Criteria<Image, DetectedObjects> criteria =
Criteria.builder()
.optEngine("PaddlePaddle")
.setTypes(Image.class, DetectedObjects.class)
.optModelUrls("jar:///ch_ppocr_mobile_v2.0_det_infer.zip")
.optTranslator(new PpWordDetectionTranslator(new ConcurrentHashMap<String, String>()))
.optProgress(new ProgressBar())
.build();
```

View File

@ -1,227 +0,0 @@
<?xml version="1.0" encoding="UTF-8"?>
<!--
~ Licensed to the Apache Software Foundation (ASF) under one
~ or more contributor license agreements. See the NOTICE file
~ distributed with this work for additional information
~ regarding copyright ownership. The ASF licenses this file
~ to you under the Apache License, Version 2.0 (the
~ "License"); you may not use this file except in compliance
~ with the License. You may obtain a copy of the License at
~
~ http://www.apache.org/licenses/LICENSE-2.0
~
~ Unless required by applicable law or agreed to in writing,
~ software distributed under the License is distributed on an
~ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
~ KIND, either express or implied. See the License for the
~ specific language governing permissions and limitations
~ under the License.
-->
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>aias</groupId>
<artifactId>asr-long-audio-sdk</artifactId>
<version>0.23.0</version>
<properties>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
<maven.compiler.source>1.8</maven.compiler.source>
<maven.compiler.target>1.8</maven.compiler.target>
<djl.version>0.22.1</djl.version>
</properties>
<build>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-compiler-plugin</artifactId>
<configuration>
<source>8</source>
<target>8</target>
</configuration>
<version>3.8.1</version>
</plugin>
</plugins>
</build>
<dependencies>
<dependency>
<groupId>com.google.code.gson</groupId>
<artifactId>gson</artifactId>
<version>2.8.6</version>
</dependency>
<dependency>
<groupId>commons-cli</groupId>
<artifactId>commons-cli</artifactId>
<version>1.4</version>
</dependency>
<dependency>
<groupId>org.apache.logging.log4j</groupId>
<artifactId>log4j-slf4j-impl</artifactId>
<version>2.17.2</version>
</dependency>
<!-- 服务器端推理引擎 -->
<dependency>
<groupId>ai.djl</groupId>
<artifactId>api</artifactId>
<version>${djl.version}</version>
</dependency>
<dependency>
<groupId>ai.djl</groupId>
<artifactId>basicdataset</artifactId>
<version>${djl.version}</version>
</dependency>
<dependency>
<groupId>ai.djl</groupId>
<artifactId>model-zoo</artifactId>
<version>${djl.version}</version>
</dependency>
<!-- PaddlePaddle -->
<dependency>
<groupId>ai.djl.paddlepaddle</groupId>
<artifactId>paddlepaddle-engine</artifactId>
<version>${djl.version}</version>
</dependency>
<dependency>
<groupId>ai.djl.paddlepaddle</groupId>
<artifactId>paddlepaddle-model-zoo</artifactId>
<version>${djl.version}</version>
</dependency>
<!-- Pytorch -->
<dependency>
<groupId>ai.djl.pytorch</groupId>
<artifactId>pytorch-model-zoo</artifactId>
<version>${djl.version}</version>
</dependency>
<dependency>
<groupId>ai.djl.pytorch</groupId>
<artifactId>pytorch-engine</artifactId>
<version>${djl.version}</version>
</dependency>
<!-- ONNX -->
<dependency>
<groupId>ai.djl.onnxruntime</groupId>
<artifactId>onnxruntime-engine</artifactId>
<version>${djl.version}</version>
</dependency>
<dependency>
<groupId>com.github.wendykierp</groupId>
<artifactId>JTransforms</artifactId>
<version>3.1</version>
</dependency>
<dependency>
<groupId>org.bytedeco</groupId>
<artifactId>javacv-platform</artifactId>
<version>1.5.7</version>
</dependency>
<dependency>
<groupId>org.aspectj</groupId>
<artifactId>aspectjweaver</artifactId>
<version>1.8.10</version>
</dependency>
<dependency>
<groupId>com.google.guava</groupId>
<artifactId>guava</artifactId>
<version>21.0</version>
</dependency>
<dependency>
<groupId>jlibrosa</groupId>
<artifactId>jlibrosa</artifactId>
<version>1.1.8-SNAPSHOT</version>
<scope>system</scope>
<systemPath>${project.basedir}/lib/jlibrosa-1.1.8-SNAPSHOT.jar</systemPath>
</dependency>
<dependency>
<groupId>org.jitsi</groupId>
<artifactId>TarsosDSP-bin</artifactId>
<version>1.0-SNAPSHOT</version>
<scope>system</scope>
<systemPath>${project.basedir}/lib/jitsi-webrtcvadwrapper-1.0-SNAPSHOT.jar</systemPath>
</dependency>
<dependency>
<groupId>com.googlecode.soundlibs</groupId>
<artifactId>tritonus-share</artifactId>
<version>0.3.7.4</version>
</dependency>
<dependency>
<groupId>com.googlecode.soundlibs</groupId>
<artifactId>tritonus-all</artifactId>
<version>0.3.7.2</version>
</dependency>
<dependency>
<groupId>com.googlecode.soundlibs</groupId>
<artifactId>jlayer</artifactId>
<version>1.0.1.4</version>
</dependency>
<dependency>
<groupId>com.googlecode.soundlibs</groupId>
<artifactId>mp3spi</artifactId>
<version>1.9.5.4</version>
</dependency>
<dependency>
<groupId>com.googlecode.soundlibs</groupId>
<artifactId>jorbis</artifactId>
<version>0.0.17.4</version>
</dependency>
<dependency>
<groupId>com.googlecode.soundlibs</groupId>
<artifactId>vorbisspi</artifactId>
<version>1.0.3.3</version>
</dependency>
<dependency>
<groupId>com.googlecode.soundlibs</groupId>
<artifactId>basicplayer</artifactId>
<version>3.0.0.0</version>
</dependency>
<dependency>
<groupId>fr.delthas</groupId>
<artifactId>javamp3</artifactId>
<version>1.0.1</version>
</dependency>
<dependency>
<groupId>com.orctom</groupId>
<artifactId>vad4j</artifactId>
<version>1.0</version>
</dependency>
<dependency>
<groupId>org.apache.commons</groupId>
<artifactId>commons-lang3</artifactId>
<version>3.4</version>
</dependency>
<dependency>
<groupId>com.google.protobuf</groupId>
<artifactId>protobuf-java</artifactId>
<version>3.8.0</version>
</dependency>
<dependency>
<groupId>org.projectlombok</groupId>
<artifactId>lombok</artifactId>
<version>1.18.18</version>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>org.testng</groupId>
<artifactId>testng</artifactId>
<version>6.8.1</version>
<scope>test</scope>
</dependency>
</dependencies>
</project>

View File

@ -1,65 +0,0 @@
package me.aias.example;
import ai.djl.Device;
import ai.djl.inference.Predictor;
import ai.djl.ndarray.NDArray;
import ai.djl.ndarray.NDManager;
import ai.djl.repository.zoo.Criteria;
import ai.djl.repository.zoo.ZooModel;
import me.aias.example.utils.AudioProcess;
import me.aias.example.utils.AudioUtils;
import me.aias.example.utils.AudioVadUtils;
import me.aias.example.utils.SpeechRecognition;
import org.apache.commons.lang3.tuple.Pair;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.Queue;
/**
* 预测长语音
* Speech Recognition(long)
* <p>https://github.com/yeyupiaoling/PaddlePaddle-DeepSpeech
*
* @author calvin
* @mail 179209347@qq.com
*/
public class SpeechRecognitionExampleL {
private static final Logger logger = LoggerFactory.getLogger(SpeechRecognitionExampleL.class);
public static void main(String[] args) throws Exception {
String os = System.getProperty("os.name");
if (os.contains("Windows")) {
System.out.println(
"Only support Linux & Mac");
}
Path path = Paths.get("src/test/resources/test.wav");
NDManager manager = NDManager.newBaseManager(Device.cpu());
Queue<byte[]> segments = AudioVadUtils.cropAudioVad(path, 300, 30);
SpeechRecognition speakerEncoder = new SpeechRecognition();
Criteria<NDArray, Pair> criteria = speakerEncoder.criteria();
try (ZooModel<NDArray, Pair> model = criteria.loadModel();
Predictor<NDArray, Pair> predictor = model.newPredictor()) {
logger.info("input audio: {}", "src/test/resources/test.wav");
int index = 1;
String texts = "";
for (byte[] que : segments) {
NDArray array = AudioUtils.bytesToFloatArray(manager, que);
NDArray audioFeature = AudioProcess.processUtterance(manager, array);
Pair result = predictor.predict(audioFeature);
texts = texts + "," + result.getRight();
logger.info("Segmented audio {} with score: {}, recognition result: {}", index++, result.getLeft(), result.getRight());
logger.info("第{}个分割音频, 得分: {}, 识别结果: {}", index++, result.getLeft(), result.getRight());
}
logger.info("最终识别结果:" + texts);
logger.info("Final recognition result: " + texts);
}
}
}

View File

@ -1,63 +0,0 @@
package me.aias.example;
import ai.djl.Device;
import ai.djl.inference.Predictor;
import ai.djl.ndarray.NDArray;
import ai.djl.ndarray.NDManager;
import ai.djl.repository.zoo.Criteria;
import ai.djl.repository.zoo.ZooModel;
import me.aias.example.utils.AudioProcess;
import me.aias.example.utils.AudioUtils;
import me.aias.example.utils.AudioVadUtils;
import me.aias.example.utils.SpeechRecognition;
import org.apache.commons.lang3.tuple.Pair;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.Queue;
/**
* 预测长语音
* Speech Recognition(long)
* <p>https://github.com/yeyupiaoling/PaddlePaddle-DeepSpeech
*
* @author calvin
* @mail 179209347@qq.com
*/
public class SpeechRecognitionExampleLWin {
private static final Logger logger = LoggerFactory.getLogger(SpeechRecognitionExampleLWin.class);
public static void main(String[] args) throws Exception {
String os = System.getProperty("os.name");
if (!os.contains("Windows")) {
System.out.println(
"Only support Windows");
}
Path path = Paths.get("src/test/resources/test.wav");
NDManager manager = NDManager.newBaseManager(Device.cpu());
Queue<byte[]> segments = AudioVadUtils.cropAudioVad(path, 300, 64);
SpeechRecognition speakerEncoder = new SpeechRecognition();
Criteria<NDArray, Pair> criteria = speakerEncoder.criteria();
try (ZooModel<NDArray, Pair> model = criteria.loadModel();
Predictor<NDArray, Pair> predictor = model.newPredictor()) {
logger.info("input audio: {}", "src/test/resources/test.wav");
int index = 1;
String texts = "";
for (byte[] que : segments) {
NDArray array = AudioUtils.bytesToFloatArray(manager, que);
NDArray audioFeature = AudioProcess.processUtterance(manager, array);
Pair result = predictor.predict(audioFeature);
texts = texts + "," + result.getRight();
logger.info("第{}个分割音频, 得分: {}, 识别结果: {}", index++, result.getLeft(), result.getRight());
}
logger.info("最终识别结果:" + texts);
}
}
}

View File

@ -1,50 +0,0 @@
package me.aias.example;
import ai.djl.Device;
import ai.djl.inference.Predictor;
import ai.djl.ndarray.NDArray;
import ai.djl.ndarray.NDManager;
import ai.djl.repository.zoo.Criteria;
import ai.djl.repository.zoo.ZooModel;
import me.aias.example.utils.AudioProcess;
import me.aias.example.utils.SpeechRecognition;
import org.apache.commons.lang3.tuple.Pair;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* 预测短语音
* Speech Recognition
*
* <p>https://github.com/yeyupiaoling/PaddlePaddle-DeepSpeech
*
* @author calvin
* @mail 179209347@qq.com
* @website www.aias.top
*/
public final class SpeechRecognitionExampleS {
private static final Logger logger = LoggerFactory.getLogger(SpeechRecognitionExampleS.class);
private SpeechRecognitionExampleS() {
}
public static void main(String[] args) throws Exception {
NDManager manager = NDManager.newBaseManager(Device.cpu());
NDArray audioFeature = AudioProcess.processUtterance(manager, "src/test/resources/test.wav");
// System.out.println(audioFeature.toDebugString(1000000000, 1000, 10, 1000));
SpeechRecognition speakerEncoder = new SpeechRecognition();
Criteria<NDArray, Pair> criteria = speakerEncoder.criteria();
try (ZooModel<NDArray, Pair> model = criteria.loadModel();
Predictor<NDArray, Pair> predictor = model.newPredictor()) {
logger.info("input audio: {}", "src/test/resources/test.wav");
Pair result = predictor.predict(audioFeature);
logger.info("Score : " + result.getLeft());
logger.info("Words : " + result.getRight());
}
}
}

View File

@ -1,241 +0,0 @@
package me.aias.example.utils;
import org.bytedeco.javacv.*;
import javax.sound.sampled.AudioFileFormat;
import javax.sound.sampled.AudioFormat;
import javax.sound.sampled.AudioInputStream;
import javax.sound.sampled.AudioSystem;
import java.io.ByteArrayInputStream;
import java.io.File;
import java.nio.Buffer;
import java.nio.ByteBuffer;
import java.nio.ShortBuffer;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
/**
* 获取音频数组
* Get audio array
*
* @author Calvin
*/
public class AudioArrayUtils {
public static void main(String[] args) throws FrameGrabber.Exception {
System.out.println(
Arrays.toString(AudioArrayUtils.audioSegment("src/test/resources/test.wav").samples));
}
public static final class AudioSegment {
public final float[] samples;
public final Integer sampleRate;
public final Integer audioChannels;
public AudioSegment(float[] samples, Integer sampleRate, Integer audioChannels) {
this.samples = samples;
this.sampleRate = sampleRate;
this.audioChannels = audioChannels;
}
}
public static final class FrameData {
public final Buffer[] samples;
public final Integer sampleRate;
public final Integer audioChannels;
public FrameData(Buffer[] samples, Integer sampleRate, Integer audioChannels) {
this.samples = samples;
this.sampleRate = sampleRate;
this.audioChannels = audioChannels;
}
}
/**
* 获取音频文件的float数组,sampleRate,audioChannels
* Get the float array, sample rate, and audio channels of the audio file
*
* @param path
* @return
* @throws FrameGrabber.Exception
*/
public static AudioSegment audioSegment(String path) throws FrameGrabber.Exception {
AudioSegment audioSegment = null;
int sampleRate = -1;
int audioChannels = -1;
// Audio sample type is usually integer or float-point.
// Integers will be scaled to [-1, 1] in float32.
float scale = (float) 1.0 / Float.valueOf(1 << ((8 * 2) - 1));
List<Float> floatList = new ArrayList<>();
try (FFmpegFrameGrabber audioGrabber = new FFmpegFrameGrabber(path)) {
try {
audioGrabber.start();
sampleRate = audioGrabber.getSampleRate();
audioChannels = audioGrabber.getAudioChannels();
Frame frame;
while ((frame = audioGrabber.grabFrame()) != null) {
Buffer[] buffers = frame.samples;
Buffer[] copiedBuffers = new Buffer[buffers.length];
for (int i = 0; i < buffers.length; i++) {
deepCopy((ShortBuffer) buffers[i], (ShortBuffer) copiedBuffers[i]);
}
ShortBuffer sb = (ShortBuffer) buffers[0];
for (int i = 0; i < sb.limit(); i++) {
floatList.add(new Float(sb.get() * scale));
}
}
} catch (FrameGrabber.Exception e) {
e.printStackTrace();
}
float[] floatArray = new float[floatList.size()];
int i = 0;
for (Float f : floatList) {
floatArray[i++] = (f != null ? f : Float.NaN); // Or whatever default you want.
}
audioSegment = new AudioSegment(floatArray, sampleRate, audioChannels);
return audioSegment;
}
}
/**
* Deep copy - shortBuffer
*
* @param source
* @param target
* @return
*/
private static ShortBuffer deepCopy(ShortBuffer source, ShortBuffer target) {
int sourceP = source.position();
int sourceL = source.limit();
if (null == target) {
target = ShortBuffer.allocate(source.remaining());
}
target.put(source);
target.flip();
source.position(sourceP);
source.limit(sourceL);
return target;
}
/**
* Deep copy - byteBuffer
*
* @param source
* @param target
* @return
*/
private static ByteBuffer deepCopy(ByteBuffer source, ByteBuffer target) {
int sourceP = source.position();
int sourceL = source.limit();
if (null == target) {
target = ByteBuffer.allocate(source.remaining());
}
target.put(source);
target.flip();
source.position(sourceP);
source.limit(sourceL);
return target;
}
/**
* 获取音频文件的FrameData列表
* Get the FrameData list of the audio file
*
* @param path
* @return
* @throws FrameGrabber.Exception
*/
public static List<FrameData> frameData(String path) throws FrameGrabber.Exception {
// frameRecorder setup during initialization
List<FrameData> audioData = new ArrayList<>();
try (FFmpegFrameGrabber audioGrabber = new FFmpegFrameGrabber(path)) {
try {
audioGrabber.start();
Frame frame;
while ((frame = audioGrabber.grabFrame()) != null) {
Buffer[] buffers = frame.samples;
Buffer[] copiedBuffers = new Buffer[buffers.length];
for (int i = 0; i < buffers.length; i++) {
deepCopy((ShortBuffer) buffers[i], (ShortBuffer) copiedBuffers[i]);
}
FrameData frameData = new FrameData(copiedBuffers, frame.sampleRate, frame.audioChannels);
audioData.add(frameData);
}
} catch (FrameGrabber.Exception e) {
e.printStackTrace();
}
return audioData;
}
}
/**
* 保存音频文件
* Save the audio file
*
* @param buffer
* @param sampleRate
* @param audioChannels
* @param outs
* @throws Exception
*/
public static void toWavFile(float[] buffer, float sampleRate, int audioChannels, File outs)
throws Exception {
if (sampleRate == 0.0) {
sampleRate = 22050;
}
if (audioChannels == 0) {
audioChannels = 1;
}
final byte[] byteBuffer = new byte[buffer.length * 2];
int bufferIndex = 0;
for (int i = 0; i < byteBuffer.length; i++) {
final int x = (int) (buffer[bufferIndex++]); // * 32767.0
byteBuffer[i++] = (byte) x;
byteBuffer[i] = (byte) (x >>> 8);
}
AudioFormat format = new AudioFormat(sampleRate, 16, audioChannels, true, false);
try (ByteArrayInputStream bais = new ByteArrayInputStream(byteBuffer);
AudioInputStream audioInputStream = new AudioInputStream(bais, format, buffer.length)) {
AudioSystem.write(audioInputStream, AudioFileFormat.Type.WAVE, outs);
}
}
/**
* 保存音频文件
* Save the audio file
*
* @param audioData
* @param path
* @param audioChannels
*/
public void toWavFile(List<FrameData> audioData, String path, int audioChannels) {
try (FFmpegFrameRecorder audioGrabber = new FFmpegFrameRecorder(path, audioChannels)) {
for (FrameData frameData : audioData) {
Frame frame = new Frame();
frame.sampleRate = frameData.sampleRate;
frame.audioChannels = frameData.audioChannels;
frame.samples = frameData.samples;
audioGrabber.record(frame);
}
} catch (FrameRecorder.Exception e) {
}
}
}

View File

@ -1,50 +0,0 @@
package me.aias.example.utils;
import ai.djl.ndarray.NDArray;
import ai.djl.ndarray.NDManager;
/**
* 音频特征器,用于从AudioSegment或SpeechSegment内容中提取特性
* Audio feature extractor for extracting features from AudioSegment or SpeechSegment content.
*
* @author Calvin <179209347@qq.com>
*/
public class AudioFeaturizer {
/**
* 从AudioSegment或SpeechSegment中提取音频特征
* Extracts audio features from AudioSegment or SpeechSegment
*
* @param manager
* @param floatArray
* @return
* @throws Exception
*/
public static NDArray featurize(NDManager manager, float[] floatArray) {
// 音频归一化
// Audio normalization
NDArray samples = manager.create(floatArray);
float rmsDb = AudioUtils.rmsDb(samples);
// 返回以分贝为单位的音频均方根能量
// Returns the root mean square energy of the audio in decibels
// System.out.println("Root Mean Square energy of audio: " + rmsDb);
// 提取特征前将音频归一化至-20 dB(以分贝为单位)
// Normalize audio to -20 dB (in decibels) before feature extraction
float target_dB = -20f;
samples = AudioUtils.normalize(samples, target_dB);
// 生成帧的跨步大小(以毫秒为单位)
// Frame step size in milliseconds
float stride_ms = 10f;
// 用于生成帧的窗口大小(毫秒)
// Window size in milliseconds used for generating frames
float window_ms = 20f;
// 用快速傅里叶变换计算线性谱图
// Calculate linear spectrogram using fast Fourier transform
NDArray specgram = AudioUtils.linearSpecgram(manager, samples, stride_ms, window_ms);
// System.out.println(specgram.toDebugString(1000000000, 1000, 10, 1000));
return specgram;
}
}

View File

@ -1,66 +0,0 @@
package me.aias.example.utils;
import ai.djl.ndarray.NDArray;
import ai.djl.ndarray.NDManager;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* 对音频预处理的工具
* Utility for audio preprocessing
*
* @author Calvin
*
* @email 179209347@qq.com
**/
public class AudioProcess {
private static final Logger logger = LoggerFactory.getLogger(AudioProcess.class);
public static NDArray processUtterance(NDManager manager, String path) throws Exception {
// 获取音频的float数组
// Process audio utterance given the file path
float[] floatArray = AudioArrayUtils.audioSegment(path).samples;
// System.out.println(Arrays.toString(floatArray));
// 提取语音片段的特征
// Extract features of audio segment
NDArray specgram = AudioFeaturizer.featurize(manager, floatArray);
// 使用均值和标准值计算音频特征的归一化值
// Normalize audio feature using mean and std values
String npzDataPath = "src/test/resources/mean_std.npz";
specgram = FeatureNormalizer.apply(manager, npzDataPath, specgram);
// System.out.println(specgram.toDebugString(1000000000, 1000, 10, 1000));
return specgram;
}
public static NDArray processUtterance(NDManager manager, float[] floatArray) throws Exception {
// 提取语音片段的特征
// Extract features of audio segment
NDArray specgram = AudioFeaturizer.featurize(manager, floatArray);
// 使用均值和标准值计算音频特征的归一化值
// Normalize audio feature using mean and std values
String npzDataPath = "src/test/resources/mean_std.npz";
specgram = FeatureNormalizer.apply(manager, npzDataPath, specgram);
// System.out.println(specgram.toDebugString(1000000000, 1000, 10, 1000));
return specgram;
}
public static NDArray processUtterance(NDManager manager, NDArray array) throws Exception {
// 提取语音片段的特征
// Extract features of audio segment
NDArray specgram = AudioFeaturizer.featurize(manager, array.toFloatArray());
// 使用均值和标准值计算音频特征的归一化值
// Normalize audio feature using mean and std values
String npzDataPath = "src/test/resources/mean_std.npz";
specgram = FeatureNormalizer.apply(manager, npzDataPath, specgram);
// System.out.println(specgram.toDebugString(1000000000, 1000, 10, 1000));
return specgram;
}
}

View File

@ -1,75 +0,0 @@
package me.aias.example.utils;
import ai.djl.Model;
import ai.djl.ndarray.NDArray;
import ai.djl.ndarray.NDArrays;
import ai.djl.ndarray.NDList;
import ai.djl.ndarray.NDManager;
import ai.djl.ndarray.types.Shape;
import ai.djl.translate.Batchifier;
import ai.djl.translate.Translator;
import ai.djl.translate.TranslatorContext;
import ai.djl.util.Utils;
import org.apache.commons.lang3.tuple.Pair;
import java.io.IOException;
import java.io.InputStream;
import java.util.List;
/**
*
* @author Calvin
*
* @email 179209347@qq.com
**/
public final class AudioTranslator implements Translator<NDArray, Pair> {
AudioTranslator() {}
private List<String> vocabulary = null;
@Override
public void prepare(TranslatorContext ctx) throws IOException {
Model model = ctx.getModel();
try (InputStream is = model.getArtifact("zh_vocab.txt").openStream()) {
vocabulary = Utils.readLines(is, true);
}
}
@Override
public NDList processInput(TranslatorContext ctx, NDArray audioFeature) {
NDManager manager = ctx.getNDManager();
long audio_len = audioFeature.getShape().get(1);
long mask_shape0 = (audioFeature.getShape().get(0) - 1) / 2 + 1;
long mask_shape1 = (audioFeature.getShape().get(1) - 1) / 3 + 1;
long mask_max_len = (audio_len - 1) / 3 + 1;
NDArray mask_ones = manager.ones(new Shape(mask_shape0, mask_shape1));
NDArray mask_zeros = manager.zeros(new Shape(mask_shape0, mask_max_len - mask_shape1));
NDArray maskArray = NDArrays.concat(new NDList(mask_ones, mask_zeros), 1);
maskArray = maskArray.reshape(1, mask_shape0, mask_max_len);
NDList list = new NDList();
for (int i = 0; i < 32; i++) {
list.add(maskArray);
}
NDArray mask = NDArrays.concat(list, 0);
NDArray audio_data = audioFeature.expandDims(0);
NDArray seq_len_data = manager.create(new long[] {audio_len});
NDArray masks = mask.expandDims(0);
// System.out.println(maskArray.toDebugString(1000000000, 1000, 10, 1000));
return new NDList(audio_data, seq_len_data, masks);
}
@Override
public Pair processOutput(TranslatorContext ctx, NDList list) {
NDArray probs_seq = list.singletonOrThrow();
Pair pair = CTCGreedyDecoder.greedyDecoder(ctx.getNDManager(), probs_seq, vocabulary, 0);
return pair;
}
@Override
public Batchifier getBatchifier() {
return null;
}
}

View File

@ -1,259 +0,0 @@
package me.aias.example.utils;
import ai.djl.ndarray.NDArray;
import ai.djl.ndarray.NDArrays;
import ai.djl.ndarray.NDList;
import ai.djl.ndarray.NDManager;
import ai.djl.ndarray.types.DataType;
import ai.djl.ndarray.types.Shape;
import com.jlibrosa.audio.JLibrosa;
public class AudioUtils {
static int mel_window_step = 10;
static float max_gain_db = 300.0f;
static int sample_rate = 16000;
static float eps = 1e-14f;
static float scale = (float) 1.0 / Float.valueOf(1 << ((8 * 2) - 1));
static int n_bytes = 2;
/**
* 创建给定持续时间和采样率的静音音频段
* Create a silent audio segment of given duration and sample rate.
* @param manager
* @param duration : 静音音频段长度单位 second - length of silent audio segment, in seconds
* @param sampleRate : 采样率 - sample rate
* @return
*/
public static NDArray makeSilence(NDManager manager, long duration, int sampleRate) {
NDArray samples = manager.zeros(new Shape(duration * sampleRate));
return samples;
}
/**
* 在这个音频样本上加一段静音
* Pad a given audio sample with a segment of silence.
*
* @param manager
* @param wav
* @param padLength
* @param sides : padding 位置: 'beginning' - 增加静音片段到开头 'end' - 增加静音片段到末尾 'both' - 两边都增加静音片段
* @param sides : padding location: 'beginning' - add silence segment to the front 'end' - add silence segment to the end 'both' - add silence segment on both sides
* @return
* @throws Exception
*/
public static NDArray padSilence(NDManager manager, NDArray wav, long padLength, String sides)
throws Exception {
NDArray pad = manager.zeros(new Shape(padLength));
if (sides.equals("beginning")) {
wav = pad.concat(wav);
} else if (sides.equals("end")) {
wav = wav.concat(pad);
} else if (sides.equals("both")) {
wav = pad.concat(wav);
wav = wav.concat(pad);
} else {
throw new Exception("Unknown value for the sides " + sides);
}
return wav;
}
/**
* 将任意数量的语音片段连接在一起
* Concatenate any number of audio segments together.
*
* @param segments : 要连接的输入语音片段 - the input audio segments to concatenate
* @return
*/
public static NDArray concatenate(NDList segments) {
NDArray array = segments.get(0);
for (int i = 1; i < segments.size(); i++) {
array = array.concat(segments.get(i));
}
return array;
}
/**
* 生成以分贝为单位的音频均方根能量 Root mean square energy in decibels.
*
* @param samples
* @return
*/
public static float rmsDb(NDArray samples) {
samples = samples.pow(2);
samples = samples.mean();
samples = samples.log10().mul(10);
return samples.toFloatArray()[0];
}
/**
* 将音频归一化使其具有所需的有效值(以分贝为单位) Target RMS value in decibels. This value should be less than 0.0 as
* 0.0 is full-scale audio.
*
* @param samples
* @param target_db
* @return
* @throws Exception
*/
public static NDArray normalize(NDArray samples, float target_db) {
float gain = target_db - rmsDb(samples);
gain = Math.min(gain, max_gain_db);
// 对音频施加分贝增益
// Gain in decibels to apply to samples
float factor = (float) Math.pow(10f, gain / 20f);
samples = samples.mul(factor);
return samples;
}
/**
* 用快速傅里叶变换计算线性谱图
* Compute linear spectrogram with fast Fourier transform.
*
* @param manager
* @param samples
* @param stride_ms
* @param window_ms
* @return
*/
public static NDArray linearSpecgram(
NDManager manager, NDArray samples, float stride_ms, float window_ms) {
int strideSize = (int) (0.001 * sample_rate * stride_ms);
int windowSize = (int) (0.001 * sample_rate * window_ms);
long truncateSize = (samples.size() - windowSize) % strideSize;
long len = samples.size() - truncateSize;
samples = samples.get(":" + len);
// Shape nshape = new Shape(windowSize, (samples.size() - windowSize) / strideSize + 1); //
// 320 ,838
// nstrides = (samples.strides[0], samples.strides[0] * stride_size)
// strides[0] = 4 个字节, 由于已经转为float类型所以对应当前samples中一个元素
// np.lib.stride_tricks.as_strided(samples, shape=nshape, strides=nstrides)
int rows = windowSize; // 320
int cols = ((int) samples.size() - windowSize) / strideSize + 1; // 838
float[] floatArray = samples.toFloatArray();
float[][] windows = new float[rows][cols];
for (int row = 0; row < rows; row++) {
for (int col = 0; col < cols; col++) {
windows[row][col] = floatArray[row + col * strideSize];
}
}
// 快速傅里叶变换
// Fast Fourier Transform
float[] weighting = hanningWindow(windowSize);
for (int row = 0; row < rows; row++) {
for (int col = 0; col < cols; col++) {
windows[row][col] = windows[row][col] * weighting[row];
}
}
double[] arr = null;
NDList fftList = new NDList();
for (int col = 0; col < cols; col++) {
arr = new double[rows];
for (int row = 0; row < rows; row++) {
arr[row] = windows[row][col];
}
double[] fft = FFT.fft(arr);
float[][] complex = FFT.rfft(fft);
NDArray array = manager.create(FFT.abs(complex));
fftList.add(array);
}
NDArray fft = NDArrays.stack(fftList).transpose();
fft = fft.pow(2);
NDArray weightingArray = manager.create(weighting);
weightingArray = weightingArray.pow(2);
NDArray scale = weightingArray.sum().mul(sample_rate);
NDArray middle = fft.get("1:-1,:");
middle = middle.mul(2).div(scale);
NDArray head = fft.get("0,:").div(scale).reshape(1, fft.getShape().get(1));
NDArray tail = fft.get("-1,:").div(scale).reshape(1, fft.getShape().get(1));
NDList list = new NDList(head, middle, tail);
fft = NDArrays.concat(list, 0);
NDArray freqsArray = manager.arange(fft.getShape().get(0));
freqsArray = freqsArray.mul(sample_rate / windowSize);
float[] freqs = freqsArray.toFloatArray();
int ind = 0;
for (int i = 0; i < freqs.length; i++) {
if (freqs[i] <= (sample_rate / 2)) {
ind = i;
} else {
break;
}
}
ind = ind + 1;
fft = fft.get(":" + ind + ",:").add(eps);
fft = fft.log();
// System.out.println(fft.toDebugString(1000000000, 1000, 10, 1000));
return fft;
}
/**
* Hanning窗 The Hanning window is a taper formed by using a weighted cosine.
*
* @param size
* @return
*/
public static float[] hanningWindow(int size) {
float[] data = new float[size];
for (int n = 1; n < size; n++) {
data[n] = (float) (0.5 * (1 - Math.cos((2 * Math.PI * n) / (size - 1))));
}
return data;
}
/**
* Hanning窗 The Hanning window is a taper formed by using a weighted cosine.
*
* @param recordedData
* @return
*/
public static float[] hanningWindow(float[] recordedData) {
for (int n = 1; n < recordedData.length; n++) {
recordedData[n] *= 0.5 * (1 - Math.cos((2 * Math.PI * n) / (recordedData.length - 1)));
}
return recordedData;
}
/**
* 从wav提取mel频谱特征值
* Extract mel-frequency spectrogram features from wav.
*
* @param samples
* @param n_fft 1024
* @param n_mels 40
* @return
*/
public static float[][] melSpecgram(NDArray samples, int n_fft, int n_mels) {
JLibrosa librosa = new JLibrosa();
float[][] melSpectrogram =
librosa.generateMelSpectroGram(
samples.toFloatArray(),
sample_rate,
n_fft,
n_mels,
(sample_rate * mel_window_step / 1000));
return melSpectrogram;
}
public static NDArray bytesToFloatArray(NDManager manager, byte[] frame) {
int size = frame.length / n_bytes;
int[] framei = new int[size];
for (int i = 0; i < size; i++) {
framei[i] = IntegerConversion.convertTwoBytesToInt1(frame[2 * i], frame[2 * i + 1]);
}
NDArray ans = manager.create(framei).toType(DataType.FLOAT32, false).mul(scale);
return ans;
}
}

View File

@ -1,139 +0,0 @@
package me.aias.example.utils;
import com.orctom.vad4j.VAD;
import org.jitsi.webrtcvadwrapper.WebRTCVad;
import java.nio.file.Path;
import java.util.*;
/**
* 对音频预处理的工具: 静音切除音频分段
* Tools for audio preprocessing: silence removal, audio segmentation
*
* @author Calvin <179209347@qq.com>
*/
public class AudioVadUtils {
/**
* Filters out non-voiced audio frames.
*/
public static Queue<byte[]> cropAudioVad(
Path path, int padding_duration_ms, int frame_duration_ms) throws Exception {
float sampleRate = SoundUtils.getSampleRate(path.toFile());
byte[] bytes = SoundUtils.convertAsByteArray(path.toFile(), SoundUtils.WAV_PCM_SIGNED);
List<byte[]> frames = SoundUtils.frameGenerator(bytes, frame_duration_ms, sampleRate);
Queue<byte[]> segments = vadCollector(frames, padding_duration_ms, frame_duration_ms);
return segments;
}
/**
* Filters out non-voiced audio frames.
*/
public static Queue<byte[]> vadCollector(
List<byte[]> frames, int padding_duration_ms, int frame_duration_ms) {
Queue<byte[]> segments = new LinkedList<>();
Queue<byte[]> voicedFrames = new LinkedList<>();
int num_padding_frames = (int) (padding_duration_ms / frame_duration_ms);
// We use a fixed queue for our sliding window/ring buffer.
FixedQueue<byte[]> fixedQueue = new FixedQueue<byte[]>(num_padding_frames);
// We have two states: TRIGGERED and NOTTRIGGERED. We start in the NOTTRIGGERED state.
boolean triggered = false;
WebRTCVad vad = new WebRTCVad(16000, 3);
int num_voiced = 0;
int num_unvoiced = 0;
for (byte[] frame : frames) {
int size = frame.length / 2;
int[] pcm_wave = new int[size];
for (int i = 0; i < size; i++) {
pcm_wave[i] = IntegerConversion.convertTwoBytesToInt1(frame[2 * i], frame[2 * i + 1]);
}
boolean isSpeech = vad.isSpeech(Arrays.copyOf(pcm_wave, 160)); // 160, 320 or 480 integer values
if (!triggered) {
fixedQueue.offer(frame);
if (isSpeech) {
num_voiced = num_voiced + 1;
}
// If we're NOTTRIGGERED and more than 90% of the frames in
// the ring buffer are voiced frames, then enter the
// TRIGGERED state.
if (num_voiced > 0.9 * fixedQueue.getSize()) {
triggered = true;
for (byte[] bytes : fixedQueue.getQueue()) {
voicedFrames.add(bytes);
}
fixedQueue.clear();
num_voiced = 0;
}
} else {
// We're in the TRIGGERED state, so collect the audio data
// and add it to the ring buffer.
voicedFrames.add(frame);
fixedQueue.offer(frame);
if (!isSpeech) {
num_unvoiced = num_unvoiced + 1;
}
// If more than 90% of the frames in the ring buffer are
// unvoiced, then enter NOTTRIGGERED and yield whatever
// audio we've collected.
if (num_unvoiced > 0.9 * fixedQueue.getSize()) {
triggered = false;
int len = 0;
for (byte[] item : voicedFrames) {
len = len + item.length;
}
byte[] voicedFramesBytes = new byte[len];
int index = 0;
for (byte[] item : voicedFrames) {
for (byte value : item) {
voicedFramesBytes[index++] = value;
}
}
segments.add(voicedFramesBytes);
fixedQueue.clear();
voicedFrames.clear();
num_unvoiced = 0;
}
}
}
// If we have any leftover voiced audio when we run out of input, yield it.
if (voicedFrames.size() > 0) {
int len = 0;
for (byte[] item : voicedFrames) {
len = len + item.length;
}
byte[] voicedFramesBytes = new byte[len];
int index = 0;
for (byte[] item : voicedFrames) {
for (byte value : item) {
voicedFramesBytes[index++] = value;
}
}
segments.add(voicedFramesBytes);
}
return segments;
}
/**
* Filters out non-voiced audio frames.
*/
public static List<byte[]> vadCollector(List<byte[]> frames) {
List<byte[]> voicedFrames = new ArrayList<>();
WebRTCVad vad = new WebRTCVad(16000, 3);
for (byte[] frame : frames) {
int size = frame.length / 2;
int[] pcm_wave = new int[size];
for (int i = 0; i < size; i++) {
pcm_wave[i] = IntegerConversion.convertTwoBytesToInt1(frame[2 * i], frame[2 * i + 1]);
}
boolean isSpeech = vad.isSpeech(Arrays.copyOf(pcm_wave, 160)); // 160, 320 or 480 integer values
if (isSpeech) {
voicedFrames.add(frame);
}
}
return voicedFrames;
}
}

View File

@ -1,126 +0,0 @@
package me.aias.example.utils;
import com.orctom.vad4j.VAD;
import java.nio.file.Path;
import java.util.ArrayList;
import java.util.LinkedList;
import java.util.List;
import java.util.Queue;
/**
* 对音频预处理的工具: 静音切除音频分段
* Tools for audio preprocessing: silence removal, audio segmentation
*
* @author Calvin <179209347@qq.com>
*/
public class AudioVadUtils_bak {
/** Filters out non-voiced audio frames. */
public static Queue<byte[]> cropAudioVad(
Path path, int padding_duration_ms, int frame_duration_ms) throws Exception {
float sampleRate = SoundUtils.getSampleRate(path.toFile());
byte[] bytes = SoundUtils.convertAsByteArray(path.toFile(), SoundUtils.WAV_PCM_SIGNED);
List<byte[]> frames = SoundUtils.frameGenerator(bytes, frame_duration_ms, sampleRate);
Queue<byte[]> segments = vadCollector(frames, padding_duration_ms, frame_duration_ms);
return segments;
}
/** Filters out non-voiced audio frames. */
public static List<byte[]> vadCollector(List<byte[]> frames) {
List<byte[]> voicedFrames = new ArrayList<>();
try (VAD vad = new VAD()) {
for (byte[] frame : frames) {
boolean isSpeech = vad.isSpeech(frame);
if (isSpeech) {
voicedFrames.add(frame);
}
}
}
return voicedFrames;
}
/** Filters out non-voiced audio frames. */
public static Queue<byte[]> vadCollector(
List<byte[]> frames, int padding_duration_ms, int frame_duration_ms) {
Queue<byte[]> segments = new LinkedList<>();
Queue<byte[]> voicedFrames = new LinkedList<>();
int num_padding_frames = (int) (padding_duration_ms / frame_duration_ms);
// We use a fixed queue for our sliding window/ring buffer.
FixedQueue<byte[]> fixedQueue = new FixedQueue<byte[]>(num_padding_frames);
// We have two states: TRIGGERED and NOTTRIGGERED. We start in the NOTTRIGGERED state.
boolean triggered = false;
try (VAD vad = new VAD()) {
int num_voiced = 0;
int num_unvoiced = 0;
for (byte[] frame : frames) {
boolean isSpeech = vad.isSpeech(frame);
if (!triggered) {
fixedQueue.offer(frame);
if (isSpeech) {
num_voiced = num_voiced + 1;
}
// If we're NOTTRIGGERED and more than 90% of the frames in
// the ring buffer are voiced frames, then enter the
// TRIGGERED state.
if (num_voiced > 0.9 * fixedQueue.getSize()) {
triggered = true;
for (byte[] bytes : fixedQueue.getQueue()) {
voicedFrames.add(bytes);
}
fixedQueue.clear();
num_voiced = 0;
}
} else {
// We're in the TRIGGERED state, so collect the audio data
// and add it to the ring buffer.
voicedFrames.add(frame);
fixedQueue.offer(frame);
if (!isSpeech) {
num_unvoiced = num_unvoiced + 1;
}
// If more than 90% of the frames in the ring buffer are
// unvoiced, then enter NOTTRIGGERED and yield whatever
// audio we've collected.
if (num_unvoiced > 0.9 * fixedQueue.getSize()) {
triggered = false;
int len = 0;
for (byte[] item : voicedFrames) {
len = len + item.length;
}
byte[] voicedFramesBytes = new byte[len];
int index = 0;
for (byte[] item : voicedFrames) {
for (byte value : item) {
voicedFramesBytes[index++] = value;
}
}
segments.add(voicedFramesBytes);
fixedQueue.clear();
voicedFrames.clear();
num_unvoiced = 0;
}
}
}
}
// If we have any leftover voiced audio when we run out of input, yield it.
if (voicedFrames.size() > 0) {
int len = 0;
for (byte[] item : voicedFrames) {
len = len + item.length;
}
byte[] voicedFramesBytes = new byte[len];
int index = 0;
for (byte[] item : voicedFrames) {
for (byte value : item) {
voicedFramesBytes[index++] = value;
}
}
segments.add(voicedFramesBytes);
}
return segments;
}
}

View File

@ -1,86 +0,0 @@
package me.aias.example.utils;
import ai.djl.ndarray.NDArray;
import ai.djl.ndarray.NDManager;
import org.apache.commons.lang3.tuple.Pair;
import java.util.ArrayList;
import java.util.List;
/**
* CTC贪婪(最佳路径)解码器
* CTC Greedy (Best Path) Decoder
*
* @author Calvin <179209347@qq.com>
*/
public class CTCGreedyDecoder {
/**
* 由最可能的令牌组成的路径将被进一步后处理到去掉连续重复和所有空白
* The path consisting of the most probable tokens is further post-processed to remove consecutive duplicates and all blanks
*
* @param manager
* @param probs_seq: 每一条都是2D的概率表每个元素都是浮点数概率的列表一个字符
* a list of 2D probability tables. Each element is a list of floating point probabilities for a character
* @param vocabulary: 词汇列表 - vocabulary list
* @param blank_index: 需要移除的空白索引 - blank index that needs to be removed
* @return 解码后得到的 score,字符串 - the score and string obtained after decoding
* @throws Exception
*/
public static Pair greedyDecoder(
NDManager manager, NDArray probs_seq, List<String> vocabulary, long blank_index) {
// 获得每个时间步的最佳索引
// Get the best index for each time step
float[] floats = probs_seq.toFloatArray();
int rows = (int) probs_seq.getShape().get(0);
int cols = (int) probs_seq.getShape().get(1);
long[] max_index_list = probs_seq.argMax(1).toLongArray();
List<Float> max_prob_list = new ArrayList<>();
for (int i = 0; i < rows; i++) {
if (max_index_list[i] != blank_index) {
max_prob_list.add(probs_seq.getFloat(i, max_index_list[i]));
}
}
// 删除连续的重复"索引"
// Remove consecutive duplicate "indices"
List<Long> index_list = new ArrayList<>();
long current = max_index_list[0];
index_list.add(current);
for (int i = 1; i < max_index_list.length; i++) {
if (max_index_list[i] != current) {
index_list.add(max_index_list[i]);
current = max_index_list[i];
}
}
// 删除空索引
// Remove blank indices
List<Long> pure_index_list = new ArrayList<>();
for (Long value : index_list) {
if (value != blank_index) {
pure_index_list.add(value);
}
}
// 索引列表转换为字符串
// Convert index list to string
StringBuffer sb = new StringBuffer();
for (Long value : pure_index_list) {
sb.append(vocabulary.get(value.intValue()));
}
float score = 0;
if (max_prob_list.size() > 0) {
float sum = 0;
for (Float value : max_prob_list) {
sum += value;
}
score = (sum / max_prob_list.size()) * 100.0f;
}
return Pair.of(score, sb.toString());
}
}

View File

@ -1,87 +0,0 @@
package me.aias.example.utils;
import org.jtransforms.fft.DoubleFFT_1D;
/**
* A Fast Fourier Transform wrapper for jTransforms to provide similar functionality to numpy.fft
* functions used by the Blowhole Python implementation.
*/
public class FFT {
/**
* Compute the fast fourier transform
*
* @param raw the raw signal
* @return the computed fast fourier transform
*/
public static double[] fft(double[] raw) {
double[] in = raw;
DoubleFFT_1D fft = new DoubleFFT_1D(in.length);
fft.realForward(in);
return in;
}
/**
* Computes the physical layout of the fast fourier transform.
* See jTransform documentation for more information.
* http://incanter.org/docs/parallelcolt/api/edu/emory/mathcs/jtransforms/fft/DoubleFFT_1D.html#realForward(double[])
*
* @param fft the fast fourier transform
*/
public static float[][] rfft(double[] fft) {
float[][] result = null;
int n = fft.length;
if (n % 2 == 0) {
// n is even
result = new float[2][n / 2 + 1];
for (int i = 0; i < n / 2; i++) {
result[0][i] = (float) fft[2 * i]; //the real part fo the fast fourier transform
result[1][i] = (float) fft[2 * i + 1]; //the imaginary part of the fast fourier transform
}
result[1][0] = 0;
result[0][n / 2] = (float) fft[1];
} else {
// n is odd
result = new float[2][(n + 1) / 2];
for (int i = 0; i < n / 2; i++) {
result[0][i] = (float) fft[2 * i]; //the real part fo the fast fourier transform
result[1][i] = (float) fft[2 * i + 1]; //the imaginary part of the fast fourier transform
}
result[1][0] = 0;
result[1][(n - 1) / 2] = (float) fft[1];
}
return result;
}
public static float[] abs(float[][] complex) {
float[] re = complex[0]; //the real part fo the fast fourier transform
float[] im = complex[1]; //the imaginary part of the fast fourier transform
float[] abs = new float[re.length];
for (int i = 0; i < re.length; i++) {
abs[i] = (float) Math.hypot(re[i], im[i]);
}
return abs;
}
/**
* Returns the Discrete Fourier Transform sample frequencies.
* See numpy.fft.rfftfreq for more information.
*
* @param n Window length
* @param d Sample spacing
* @return Array of length n + 1 containing the sample frequencies
*/
public static double[] rfftfreq(int n, double d) {
double val = 1.0 / (n * d);
int N = n / 2 + 1;
double[] results = new double[N];
for (int i = 0; i < N; i++) {
results[i] = i * val;
}
return results;
}
}

View File

@ -1,49 +0,0 @@
package me.aias.example.utils;
import ai.djl.ndarray.NDArray;
import ai.djl.ndarray.NDList;
import ai.djl.ndarray.NDManager;
import ai.djl.ndarray.types.DataType;
import java.io.ByteArrayOutputStream;
import java.nio.file.Files;
import java.nio.file.Paths;
/**
* 对音频预处理的工具
* Tool for audio preprocessing
*
* @author Calvin <179209347@qq.com>
*/
public class FeatureNormalizer {
static float eps = 1e-20f; // 添加到标准值以提供数值稳定性 - added to the standard value to provide numerical stability
/**
* 使用均值和标准值计算音频特征的归一化值
* Calculate the normalized value of audio features using mean and standard values
*
* @param manager
* @param npzDataPath: 均值和标准值的文件路径 - file path of mean and standard values
* @param features: 需要归一化的音频 - audio features that need to be normalized
* @return
* @throws Exception
*/
public static NDArray apply(NDManager manager, String npzDataPath, NDArray features)
throws Exception {
//https://github.com/deepjavalibrary/djl/blob/master/api/src/test/java/ai/djl/ndarray/NDSerializerTest.java
//https://github.com/deepjavalibrary/djl/blob/master/api/src/test/java/ai/djl/ndarray/NDListTest.java
byte[] data = Files.readAllBytes(Paths.get(npzDataPath));
NDList decoded = NDList.decode(manager, data);
ByteArrayOutputStream bos = new ByteArrayOutputStream(data.length + 1);
decoded.encode(bos, true);
NDList list = NDList.decode(manager, bos.toByteArray());
NDArray meanNDArray = list.get(0);//mean
meanNDArray = meanNDArray.toType(DataType.FLOAT32, false);
NDArray stdNDArray = list.get(1);//std
stdNDArray = stdNDArray.toType(DataType.FLOAT32, false);
// (features - self._mean) / (self._std + eps)
stdNDArray = stdNDArray.add(eps);
features = features.sub(meanNDArray).div(stdNDArray);
return features;
}
}

View File

@ -1,148 +0,0 @@
package me.aias.example.utils;
import java.util.Collection;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.Queue;
/**
* 固定长度队列
* Fixed-length queue
*/
public class FixedQueue<E> implements Queue<E> {
// 队列长度
// Length of the queue
private int size;
Queue<E> queue = new LinkedList<E>();
public FixedQueue(int size) {
this.size = size;
}
/**
* 入队
* Enqueue
*
* @param e
*/
@Override
public boolean offer(E e) {
if (queue.size() >= size) {
// 如果超出长度,入队时,先出队
queue.poll();
}
return queue.offer(e);
}
/**
* 出队
* Dequeue
*
* @return
*/
@Override
public E poll() {
return queue.poll();
}
/**
* 获取队列
* Get queue
*
* @return
*/
public Queue<E> getQueue() {
return queue;
}
/**
* 获取限制大小
* Get limit size
*
* @return
*/
public int getSize() {
return size;
}
@Override
public boolean add(E e) {
return queue.add(e);
}
@Override
public E element() {
return queue.element();
}
@Override
public E peek() {
return queue.peek();
}
@Override
public boolean isEmpty() {
return queue.size() == 0 ? true : false;
}
@Override
public int size() {
return queue.size();
}
@Override
public E remove() {
return queue.remove();
}
@Override
public boolean addAll(Collection<? extends E> c) {
return queue.addAll(c);
}
@Override
public void clear() {
queue.clear();
}
@Override
public boolean contains(Object o) {
return queue.contains(o);
}
@Override
public boolean containsAll(Collection<?> c) {
return queue.containsAll(c);
}
@Override
public Iterator<E> iterator() {
return queue.iterator();
}
@Override
public boolean remove(Object o) {
return queue.remove(o);
}
@Override
public boolean removeAll(Collection<?> c) {
return queue.removeAll(c);
}
@Override
public boolean retainAll(Collection<?> c) {
return queue.retainAll(c);
}
@Override
public Object[] toArray() {
return queue.toArray();
}
@Override
public <T> T[] toArray(T[] a) {
return queue.toArray(a);
}
}

View File

@ -1,39 +0,0 @@
package me.aias.example.utils;
/**
*
* @author Calvin
*
* @email 179209347@qq.com
**/
public class IntegerConversion {
public static int convertTwoBytesToInt1(byte b1, byte b2) // signed
{
return (b2 << 8) | (b1 & 0xFF);
}
public static int convertFourBytesToInt1(byte b1, byte b2, byte b3, byte b4) {
return (b4 << 24) | (b3 & 0xFF) << 16 | (b2 & 0xFF) << 8 | (b1 & 0xFF);
}
public static int convertTwoBytesToInt2(byte b1, byte b2) // unsigned
{
return (b2 & 0xFF) << 8 | (b1 & 0xFF);
}
public static long convertFourBytesToInt2(byte b1, byte b2, byte b3, byte b4) {
return (long) (b4 & 0xFF) << 24 | (b3 & 0xFF) << 16 | (b2 & 0xFF) << 8 | (b1 & 0xFF);
}
public static void main(String[] args) {
byte b1 = (byte) 0xfe;
byte b2 = (byte) 0xff;
byte b3 = (byte) 0xFF;
byte b4 = (byte) 0xFF;
float s = (float) (convertTwoBytesToInt1(b1, b2) * ( (float)1.0 / Float.valueOf(1 << ((8 * 2) - 1))));
System.out.print(s);
//System.out.printf("%,14d%n", convertTwoBytesToInt2(b1, b2));
//System.out.printf("%,14d%n", convertFourBytesToInt1(b1, b2, b3, b4));
//System.out.printf("%,14d%n", convertFourBytesToInt2(b1, b2, b3, b4));
}
}

View File

@ -1,252 +0,0 @@
package me.aias.example.utils;
import org.tritonus.share.sampled.AudioFileTypes;
import org.tritonus.share.sampled.Encodings;
import javax.sound.sampled.*;
import java.io.*;
import java.util.*;
/**
* Sound format conversion utility class
* @author Calvin
*
* @email 179209347@qq.com
**/
public class SoundUtils {
// Audio type contants
public static final AudioType MP3 = new AudioType("MPEG1L3", "MP3", "mp3");
public static final AudioType WAV = new AudioType("ULAW", "WAVE", "wav");
public static final AudioType WAV_PCM_SIGNED = new AudioType("PCM_SIGNED", "WAVE", "wav");
private SoundUtils() {}
/** Converts a byte array of sound data to the given audio type, also returned as a byte array. */
public static byte[] convertAsByteArray(byte[] source, AudioType targetType) {
try {
System.out.print("Converting byte array to AudioInputStream...");
AudioInputStream ais = toStream(source, targetType);
System.out.println("done.");
System.out.print("Converting stream to new audio format...");
ais = convertAsStream(ais, targetType);
System.out.println("done.");
System.out.print("Converting new stream to byte array...");
byte[] target = toByteArray(ais, targetType);
System.out.println("done.");
return target;
} catch (IOException ex) {
throw new RuntimeException("Exception during audio conversion", ex);
} catch (UnsupportedAudioFileException ex) {
throw new RuntimeException("Exception during audio conversion", ex);
}
}
/** Converts an file of sound data to the given audio type, returned as a byte array. */
public static byte[] convertAsByteArray(File file, AudioType targetType) {
try {
AudioInputStream ais = AudioSystem.getAudioInputStream(file);
ais = convertAsStream(ais, targetType);
byte[] bytes = toByteArray(ais, targetType);
return bytes;
} catch (IOException ex) {
throw new RuntimeException("Exception during audio conversion", ex);
} catch (UnsupportedAudioFileException ex) {
throw new RuntimeException("Exception during audio conversion", ex);
}
}
/** Converts an InputStream of sound data to the given audio type, returned as a byte array. */
public static byte[] convertAsByteArray(InputStream is, AudioType targetType) {
try {
AudioInputStream ais = AudioSystem.getAudioInputStream(is);
ais = convertAsStream(ais, targetType);
byte[] bytes = toByteArray(ais, targetType);
return bytes;
} catch (IOException ex) {
throw new RuntimeException("Exception during audio conversion", ex);
} catch (UnsupportedAudioFileException ex) {
throw new RuntimeException("Exception during audio conversion", ex);
}
}
/**
* Converts an AudioInputStream to the indicated audio type, also returned as an AudioInputStream.
*/
public static AudioInputStream convertAsStream(
AudioInputStream sourceStream, AudioType targetType) {
AudioFormat.Encoding targetEncoding = targetType.getEncoding();
AudioFormat sourceFormat = sourceStream.getFormat();
AudioInputStream targetStream = null;
if (!AudioSystem.isConversionSupported(targetEncoding, sourceFormat)) {
// Direct conversion not possible, trying with intermediate PCM format
AudioFormat intermediateFormat =
new AudioFormat(
AudioFormat.Encoding.PCM_SIGNED,
sourceFormat.getSampleRate(),
16,
sourceFormat.getChannels(),
2 * sourceFormat.getChannels(), // frameSize
sourceFormat.getSampleRate(),
false);
if (AudioSystem.isConversionSupported(intermediateFormat, sourceFormat)) {
// Intermediate conversion is supported
sourceStream = AudioSystem.getAudioInputStream(intermediateFormat, sourceStream);
}
}
targetStream = AudioSystem.getAudioInputStream(targetEncoding, sourceStream);
if (targetStream == null) {
throw new RuntimeException("Audio conversion not supported");
}
return targetStream;
}
/** Converts a byte array to an AudioInputStream with the same audio format. */
private static AudioInputStream toStream(byte[] bytes, AudioType targetType)
throws IOException, UnsupportedAudioFileException {
ByteArrayInputStream bais = new ByteArrayInputStream(bytes);
AudioInputStream ais = AudioSystem.getAudioInputStream(bais);
return ais;
}
/** Converts an AudioInputStream to a byte array with the same audio format. */
private static byte[] toByteArray(AudioInputStream ais, AudioType targetType) throws IOException {
ByteArrayOutputStream baos = new ByteArrayOutputStream();
AudioSystem.write(ais, targetType.getFileFormat(), baos);
return baos.toByteArray();
}
/** Append a wav file to another wav file */
public static void appendStream(String wavFile1, String wavFile2, String destinationFile) {
try (AudioInputStream clip1 = AudioSystem.getAudioInputStream(new File(wavFile1));
AudioInputStream clip2 = AudioSystem.getAudioInputStream(new File(wavFile2));
AudioInputStream appendedFiles =
new AudioInputStream(
new SequenceInputStream(clip1, clip2),
clip1.getFormat(),
clip1.getFrameLength() + clip2.getFrameLength())) {
AudioSystem.write(appendedFiles, AudioFileFormat.Type.WAVE, new File(destinationFile));
} catch (Exception e) {
e.printStackTrace();
}
}
/** Get SampleRate( */
public static float getSampleRate(File sourceFile) throws Exception {
try (AudioInputStream audioInputStream = AudioSystem.getAudioInputStream(sourceFile)) {
AudioFormat format = audioInputStream.getFormat();
float frameRate = format.getFrameRate();
return frameRate;
}
}
/** Get a wav file time length (seconds) */
public static float getWavLengthSeconds(File sourceFile) throws Exception {
try (AudioInputStream audioInputStream = AudioSystem.getAudioInputStream(sourceFile)) {
AudioFormat format = audioInputStream.getFormat();
long audioFileLength = sourceFile.length();
int frameSize = format.getFrameSize();
float frameRate = format.getFrameRate();
float durationInSeconds = (audioFileLength / (frameSize * frameRate));
// downcast to int
return durationInSeconds;
}
}
/** Generate Frames */
public static List<byte[]> frameGenerator(byte[] bytes, int frameDurationMs, float sampleRate) {
List<byte[]> list = new ArrayList<>();
int offset = 0;
int n = (int) (sampleRate * (frameDurationMs / 1000.0) * 2);
int length = bytes.length;
while (offset + n < length) {
byte[] frame = Arrays.copyOfRange(bytes, offset, offset + n);
offset += n;
list.add(frame);
}
return list;
}
/** Create chop from a wav file */
public static void createChop(
File sourceFile, File destinationFile, int startSecond, int secondsToCopy) {
try (AudioInputStream inputStream = AudioSystem.getAudioInputStream(sourceFile)) {
AudioFileFormat fileFormat = AudioSystem.getAudioFileFormat(sourceFile);
AudioFormat format = fileFormat.getFormat();
int bytesPerSecond = format.getFrameSize() * (int) format.getFrameRate();
inputStream.skip(startSecond * bytesPerSecond);
long framesOfAudioToCopy = secondsToCopy * (int) format.getFrameRate() / 4;
try (AudioInputStream shortenedStream =
new AudioInputStream(inputStream, format, framesOfAudioToCopy)) {
AudioSystem.write(shortenedStream, fileFormat.getType(), destinationFile);
}
} catch (Exception e) {
System.out.println(e.toString());
}
}
/**
* 保存音频文件
*
* @param buffer
* @param sampleRate
* @param audioChannels
* @param outs
* @throws Exception
*/
public static void toWavFile(float[] buffer, float sampleRate, int audioChannels, File outs)
throws Exception {
if (sampleRate == 0.0) {
sampleRate = 22050;
}
if (audioChannels == 0) {
audioChannels = 1;
}
final byte[] byteBuffer = new byte[buffer.length * 2];
int bufferIndex = 0;
for (int i = 0; i < byteBuffer.length; i++) {
final int x = (int) (buffer[bufferIndex++]); // * 32767.0
byteBuffer[i++] = (byte) x;
byteBuffer[i] = (byte) (x >>> 8);
}
AudioFormat format = new AudioFormat(sampleRate, 16, audioChannels, true, false);
try (ByteArrayInputStream bais = new ByteArrayInputStream(byteBuffer);
AudioInputStream audioInputStream = new AudioInputStream(bais, format, buffer.length)) {
AudioSystem.write(audioInputStream, AudioFileFormat.Type.WAVE, outs);
}
}
/** Class representing an audio type, encapsulating an encoding and a file format. */
public static class AudioType {
private String encodingName;
private String typeName;
private String extension;
public AudioType(String encodingName, String typeName, String extension) {
this.encodingName = encodingName;
this.typeName = typeName;
this.extension = extension;
}
public AudioFormat.Encoding getEncoding() {
return Encodings.getEncoding(encodingName);
}
public AudioFileFormat.Type getFileFormat() {
return AudioFileTypes.getType(typeName, extension);
}
}
}

View File

@ -1,32 +0,0 @@
package me.aias.example.utils;
import ai.djl.Device;
import ai.djl.ndarray.NDArray;
import ai.djl.repository.zoo.Criteria;
import ai.djl.training.util.ProgressBar;
import org.apache.commons.lang3.tuple.Pair;
import java.nio.file.Path;
import java.nio.file.Paths;
/**
*
* @author Calvin
*
* @email 179209347@qq.com
**/
public class SpeechRecognition {
public SpeechRecognition() {}
public Criteria<NDArray, Pair> criteria() {
Criteria<NDArray, Pair> criteria =
Criteria.builder()
.setTypes(NDArray.class, Pair.class)
.optModelPath(Paths.get("models/deep_speech.zip"))
.optTranslator(new AudioTranslator())
.optEngine("PaddlePaddle") // Use PaddlePaddle engine
.optProgress(new ProgressBar())
.build();
return criteria;
}
}

View File

@ -1,17 +0,0 @@
<?xml version="1.0" encoding="UTF-8"?>
<Configuration status="INFO">
<Appenders>
<Console name="console" target="SYSTEM_OUT">
<PatternLayout
pattern="[%-5level] - %msg%n"/>
</Console>
</Appenders>
<Loggers>
<Root level="info" additivity="false">
<AppenderRef ref="console"/>
</Root>
<Logger name="me.calvin" level="${sys:me.calvin.logging.level:-info}" additivity="false">
<AppenderRef ref="console"/>
</Logger>
</Loggers>
</Configuration>

View File

@ -1,68 +0,0 @@
## NDArray Advanced Audio Toolbox
Current features include:
- Get the float array of an audio file
- Save an audio file
- Create a silent audio segment with given duration and sample rate
- Append a silent segment to an audio sample
- Concatenate any number of speech segments together
- Calculate root mean square energy of audio in decibels
- Normalize audio to have desired effective value in decibels
- Compute linear spectrogram using fast Fourier transform
- Hanning window
- Extract mel frequency spectral features from wav
## Running Example - AudioExample
```text
...
NDManager manager = NDManager.newBaseManager(Device.cpu());
float[] floatArray = AudioArrayUtils.floatData("src/test/resources/test.wav");
// Get the float array of the audio
logger.info("Float array of audio: {}", Arrays.toString(floatArray));
NDArray samples = manager.create(floatArray);
float rmsDb = AudioUtils.rmsDb(samples);
// Calculate root mean square energy of audio in decibels
logger.info("Root mean square energy of audio: {}", rmsDb);
// Normalize audio to -20 dB in preparation for feature extraction
float target_dB = -20f;
samples = AudioUtils.normalize(samples, target_dB);
System.out.println("Normalized audio: " + samples.toDebugString(1000000000, 1000, 1000, 1000));
// Generate frame step size in milliseconds
float stride_ms = 10f;
// Generate window size for generating frames in milliseconds
float window_ms = 20f;
samples = AudioUtils.linearSpecgram(manager, samples, stride_ms, window_ms);
logger.info("Linear spectrogram using fast Fourier transform: {}", samples.toDebugString(1000000000, 1000, 10, 1000));...
```
Command line output:
```text
...
[INFO ] - Root mean square energy of audio: -28.989937
[INFO ] - Normalized audio: ND: (134240) cpu() float32
[ 3.09278257e-03, 3.26460390e-03, 1.71821259e-04, ... 133240 more]
[INFO ] - Linear spectrogram using fast Fourier transform: ND: (161, 838) cpu() float32
[[-15.4571, -16.4412, -16.7098, -20.372 , -23.9935, -15.8598, -17.1589, -15.5935, ..., -14.3427],
[-15.948 , -16.8391, -16.8302, -17.8034, -19.115 , -15.8378, -19.4812, -15.7247, ..., -14.7543],
[-20.8405, -18.5733, -19.4289, -19.1861, -19.4255, -18.1996, -18.0149, -18.977 , ..., -17.5405],
[-19.1938, -21.0139, -21.07 , -20.2931, -20.23 , -22.3037, -20.1103, -21.3521, ..., -20.2267],
[-19.7823, -21.2425, -21.5705, -19.9856, -21.6053, -20.9323, -22.4014, -21.5406, ..., -20.1177],
[-20.0329, -23.9688, -20.718 , -20.9419, -23.5446, -21.1718, -22.1597, -20.9377, ..., -21.3833],
[-19.3693, -21.0484, -21.1794, -20.765 , -20.6318, -20.5121, -21.7306, -20.6366, ..., -21.5107],
[-18.6552, -20.0077, -20.6954, -20.5476, -19.7953, -21.1081, -22.0988, -20.7157, ..., -20.6352],
[-18.9167, -19.3219, -20.1954, -24.7476, -20.7662, -20.3794, -24.4699, -22.1381, ..., -22.1803],
[-19.3055, -19.4264, -20.4486, -22.8299, -21.0847, -23.5101, -20.4897, -19.7943, ..., -22.8922],
... 151 more]
...
```

View File

@ -1,77 +0,0 @@
## NDArray 高级音频工具箱
当前功能包括:
- 获取音频文件的float数组
- 保存音频文件
- 创建给定持续时间和采样率的静音音频段
- 在这个音频样本上加一段静音
- 将任意数量的语音片段连接在一起
- 生成以分贝为单位的音频均方根能量
- 将音频归一化,使其具有所需的有效值(以分贝为单位)
- 用快速傅里叶变换计算线性谱图
- Hanning窗
- 从wav提取mel频谱特征值
## 运行例子 - AudioExample
```text
...
NDManager manager = NDManager.newBaseManager(Device.cpu());
float[] floatArray = AudioArrayUtils.floatData("src/test/resources/test.wav");
//音频的float数组
logger.info("音频的float数组: {}", Arrays.toString(floatArray));
NDArray samples = manager.create(floatArray);
float rmsDb = AudioUtils.rmsDb(samples);
//返回以分贝为单位的音频均方根能量
logger.info("音频均方根能量: {}", rmsDb);
//提取特征前将音频归一化至-20 dB(以分贝为单位)
float target_dB = -20f;
samples = AudioUtils.normalize(samples, target_dB);
System.out.println("音频归一化: " + samples.toDebugString(1000000000, 1000, 1000, 1000));
//生成帧的跨步大小(以毫秒为单位)
float stride_ms = 10f;
//用于生成帧的窗口大小(毫秒)
float window_ms = 20f;
samples = AudioUtils.linearSpecgram(manager, samples, stride_ms, window_ms);
logger.info("快速傅里叶变换计算线性谱图: {}", samples.toDebugString(1000000000, 1000, 10, 1000));...
```
命令行查看输出:
```text
...
[INFO ] - 音频均方根能量: -28.989937
[INFO ] - 音频归一化: ND: (134240) cpu() float32
[ 3.09278257e-03, 3.26460390e-03, 1.71821259e-04, ... 133240 more]
[INFO ] - 快速傅里叶变换计算线性谱图: ND: (161, 838) cpu() float32
[[-15.4571, -16.4412, -16.7098, -20.372 , -23.9935, -15.8598, -17.1589, -15.5935, ..., -14.3427],
[-15.948 , -16.8391, -16.8302, -17.8034, -19.115 , -15.8378, -19.4812, -15.7247, ..., -14.7543],
[-20.8405, -18.5733, -19.4289, -19.1861, -19.4255, -18.1996, -18.0149, -18.977 , ..., -17.5405],
[-19.1938, -21.0139, -21.07 , -20.2931, -20.23 , -22.3037, -20.1103, -21.3521, ..., -20.2267],
[-19.7823, -21.2425, -21.5705, -19.9856, -21.6053, -20.9323, -22.4014, -21.5406, ..., -20.1177],
[-20.0329, -23.9688, -20.718 , -20.9419, -23.5446, -21.1718, -22.1597, -20.9377, ..., -21.3833],
[-19.3693, -21.0484, -21.1794, -20.765 , -20.6318, -20.5121, -21.7306, -20.6366, ..., -21.5107],
[-18.6552, -20.0077, -20.6954, -20.5476, -19.7953, -21.1081, -22.0988, -20.7157, ..., -20.6352],
[-18.9167, -19.3219, -20.1954, -24.7476, -20.7662, -20.3794, -24.4699, -22.1381, ..., -22.1803],
[-19.3055, -19.4264, -20.4486, -22.8299, -21.0847, -23.5101, -20.4897, -19.7943, ..., -22.8922],
... 151 more]
...
```
### 帮助
引擎定制化配置,可以提升首次运行的引擎下载速度,解决外网无法访问或者带宽过低的问题。
[引擎定制化配置](http://aias.top/engine_cpu.html)
### 官网:
[官网链接](http://www.aias.top/)
### Git地址
[Github链接](https://github.com/mymagicpower/AIAS)
[Gitee链接](https://gitee.com/mymagicpower/AIAS)

View File

@ -1,222 +0,0 @@
<?xml version="1.0" encoding="UTF-8"?>
<module org.jetbrains.idea.maven.project.MavenProjectsManager.isMavenModule="true" type="JAVA_MODULE" version="4">
<component name="CheckStyle-IDEA-Module">
<option name="configuration">
<map />
</option>
</component>
<component name="NewModuleRootManager" LANGUAGE_LEVEL="JDK_1_8">
<output url="file://$MODULE_DIR$/target/classes" />
<output-test url="file://$MODULE_DIR$/target/test-classes" />
<content url="file://$MODULE_DIR$">
<sourceFolder url="file://$MODULE_DIR$/src/main/java" isTestSource="false" />
<sourceFolder url="file://$MODULE_DIR$/src/main/resources" type="java-resource" />
<sourceFolder url="file://$MODULE_DIR$/src/test/java" isTestSource="true" />
<sourceFolder url="file://$MODULE_DIR$/src/test/resources" type="java-test-resource" />
<excludeFolder url="file://$MODULE_DIR$/target" />
</content>
<orderEntry type="inheritedJdk" />
<orderEntry type="sourceFolder" forTests="false" />
<orderEntry type="library" name="aias-jieba-lib-0.1.0" level="project" />
<orderEntry type="library" name="Maven: org.apache.logging.log4j:log4j-slf4j-impl:2.12.1" level="project" />
<orderEntry type="library" name="Maven: org.slf4j:slf4j-api:1.7.25" level="project" />
<orderEntry type="library" name="Maven: org.apache.logging.log4j:log4j-api:2.12.1" level="project" />
<orderEntry type="library" scope="RUNTIME" name="Maven: org.apache.logging.log4j:log4j-core:2.12.1" level="project" />
<orderEntry type="library" name="Maven: org.apache.commons:commons-lang3:3.12.0" level="project" />
<orderEntry type="library" name="Maven: com.google.guava:guava:21.0" level="project" />
<orderEntry type="library" name="Maven: org.bytedeco:javacv-platform:1.5.4" level="project" />
<orderEntry type="library" name="Maven: org.bytedeco:javacv:1.5.4" level="project" />
<orderEntry type="library" name="Maven: org.bytedeco:javacpp:1.5.4" level="project" />
<orderEntry type="library" name="Maven: org.bytedeco:openblas:0.3.10-1.5.4" level="project" />
<orderEntry type="library" name="Maven: org.bytedeco:opencv:4.4.0-1.5.4" level="project" />
<orderEntry type="library" name="Maven: org.bytedeco:ffmpeg:4.3.1-1.5.4" level="project" />
<orderEntry type="library" name="Maven: org.bytedeco:flycapture:2.13.3.31-1.5.4" level="project" />
<orderEntry type="library" name="Maven: org.bytedeco:libdc1394:2.2.6-1.5.4" level="project" />
<orderEntry type="library" name="Maven: org.bytedeco:libfreenect:0.5.7-1.5.4" level="project" />
<orderEntry type="library" name="Maven: org.bytedeco:libfreenect2:0.2.0-1.5.4" level="project" />
<orderEntry type="library" name="Maven: org.bytedeco:librealsense:1.12.4-1.5.4" level="project" />
<orderEntry type="library" name="Maven: org.bytedeco:librealsense2:2.29.0-1.5.4" level="project" />
<orderEntry type="library" name="Maven: org.bytedeco:videoinput:0.200-1.5.4" level="project" />
<orderEntry type="library" name="Maven: org.bytedeco:artoolkitplus:2.3.1-1.5.4" level="project" />
<orderEntry type="library" name="Maven: org.bytedeco:flandmark:1.07-1.5.4" level="project" />
<orderEntry type="library" name="Maven: org.bytedeco:leptonica:1.80.0-1.5.4" level="project" />
<orderEntry type="library" name="Maven: org.bytedeco:tesseract:4.1.1-1.5.4" level="project" />
<orderEntry type="library" name="Maven: org.bytedeco:openblas-platform:0.3.10-1.5.4" level="project" />
<orderEntry type="library" name="Maven: org.bytedeco:javacpp-platform:1.5.4" level="project" />
<orderEntry type="library" name="Maven: org.bytedeco:javacpp:android-arm:1.5.4" level="project" />
<orderEntry type="library" name="Maven: org.bytedeco:javacpp:android-arm64:1.5.4" level="project" />
<orderEntry type="library" name="Maven: org.bytedeco:javacpp:android-x86:1.5.4" level="project" />
<orderEntry type="library" name="Maven: org.bytedeco:javacpp:android-x86_64:1.5.4" level="project" />
<orderEntry type="library" name="Maven: org.bytedeco:javacpp:ios-arm64:1.5.4" level="project" />
<orderEntry type="library" name="Maven: org.bytedeco:javacpp:ios-x86_64:1.5.4" level="project" />
<orderEntry type="library" name="Maven: org.bytedeco:javacpp:linux-armhf:1.5.4" level="project" />
<orderEntry type="library" name="Maven: org.bytedeco:javacpp:linux-arm64:1.5.4" level="project" />
<orderEntry type="library" name="Maven: org.bytedeco:javacpp:linux-ppc64le:1.5.4" level="project" />
<orderEntry type="library" name="Maven: org.bytedeco:javacpp:linux-x86:1.5.4" level="project" />
<orderEntry type="library" name="Maven: org.bytedeco:javacpp:linux-x86_64:1.5.4" level="project" />
<orderEntry type="library" name="Maven: org.bytedeco:javacpp:macosx-x86_64:1.5.4" level="project" />
<orderEntry type="library" name="Maven: org.bytedeco:javacpp:windows-x86:1.5.4" level="project" />
<orderEntry type="library" name="Maven: org.bytedeco:javacpp:windows-x86_64:1.5.4" level="project" />
<orderEntry type="library" name="Maven: org.bytedeco:openblas:android-arm:0.3.10-1.5.4" level="project" />
<orderEntry type="library" name="Maven: org.bytedeco:openblas:android-arm64:0.3.10-1.5.4" level="project" />
<orderEntry type="library" name="Maven: org.bytedeco:openblas:android-x86:0.3.10-1.5.4" level="project" />
<orderEntry type="library" name="Maven: org.bytedeco:openblas:android-x86_64:0.3.10-1.5.4" level="project" />
<orderEntry type="library" name="Maven: org.bytedeco:openblas:ios-arm64:0.3.10-1.5.4" level="project" />
<orderEntry type="library" name="Maven: org.bytedeco:openblas:ios-x86_64:0.3.10-1.5.4" level="project" />
<orderEntry type="library" name="Maven: org.bytedeco:openblas:linux-x86:0.3.10-1.5.4" level="project" />
<orderEntry type="library" name="Maven: org.bytedeco:openblas:linux-x86_64:0.3.10-1.5.4" level="project" />
<orderEntry type="library" name="Maven: org.bytedeco:openblas:linux-armhf:0.3.10-1.5.4" level="project" />
<orderEntry type="library" name="Maven: org.bytedeco:openblas:linux-arm64:0.3.10-1.5.4" level="project" />
<orderEntry type="library" name="Maven: org.bytedeco:openblas:linux-ppc64le:0.3.10-1.5.4" level="project" />
<orderEntry type="library" name="Maven: org.bytedeco:openblas:macosx-x86_64:0.3.10-1.5.4" level="project" />
<orderEntry type="library" name="Maven: org.bytedeco:openblas:windows-x86:0.3.10-1.5.4" level="project" />
<orderEntry type="library" name="Maven: org.bytedeco:openblas:windows-x86_64:0.3.10-1.5.4" level="project" />
<orderEntry type="library" name="Maven: org.bytedeco:opencv-platform:4.4.0-1.5.4" level="project" />
<orderEntry type="library" name="Maven: org.bytedeco:opencv:android-arm:4.4.0-1.5.4" level="project" />
<orderEntry type="library" name="Maven: org.bytedeco:opencv:android-arm64:4.4.0-1.5.4" level="project" />
<orderEntry type="library" name="Maven: org.bytedeco:opencv:android-x86:4.4.0-1.5.4" level="project" />
<orderEntry type="library" name="Maven: org.bytedeco:opencv:android-x86_64:4.4.0-1.5.4" level="project" />
<orderEntry type="library" name="Maven: org.bytedeco:opencv:ios-arm64:4.4.0-1.5.4" level="project" />
<orderEntry type="library" name="Maven: org.bytedeco:opencv:ios-x86_64:4.4.0-1.5.4" level="project" />
<orderEntry type="library" name="Maven: org.bytedeco:opencv:linux-x86:4.4.0-1.5.4" level="project" />
<orderEntry type="library" name="Maven: org.bytedeco:opencv:linux-x86_64:4.4.0-1.5.4" level="project" />
<orderEntry type="library" name="Maven: org.bytedeco:opencv:linux-armhf:4.4.0-1.5.4" level="project" />
<orderEntry type="library" name="Maven: org.bytedeco:opencv:linux-arm64:4.4.0-1.5.4" level="project" />
<orderEntry type="library" name="Maven: org.bytedeco:opencv:linux-ppc64le:4.4.0-1.5.4" level="project" />
<orderEntry type="library" name="Maven: org.bytedeco:opencv:macosx-x86_64:4.4.0-1.5.4" level="project" />
<orderEntry type="library" name="Maven: org.bytedeco:opencv:windows-x86:4.4.0-1.5.4" level="project" />
<orderEntry type="library" name="Maven: org.bytedeco:opencv:windows-x86_64:4.4.0-1.5.4" level="project" />
<orderEntry type="library" name="Maven: org.bytedeco:ffmpeg-platform:4.3.1-1.5.4" level="project" />
<orderEntry type="library" name="Maven: org.bytedeco:ffmpeg:android-arm:4.3.1-1.5.4" level="project" />
<orderEntry type="library" name="Maven: org.bytedeco:ffmpeg:android-arm64:4.3.1-1.5.4" level="project" />
<orderEntry type="library" name="Maven: org.bytedeco:ffmpeg:android-x86:4.3.1-1.5.4" level="project" />
<orderEntry type="library" name="Maven: org.bytedeco:ffmpeg:android-x86_64:4.3.1-1.5.4" level="project" />
<orderEntry type="library" name="Maven: org.bytedeco:ffmpeg:linux-x86:4.3.1-1.5.4" level="project" />
<orderEntry type="library" name="Maven: org.bytedeco:ffmpeg:linux-x86_64:4.3.1-1.5.4" level="project" />
<orderEntry type="library" name="Maven: org.bytedeco:ffmpeg:linux-armhf:4.3.1-1.5.4" level="project" />
<orderEntry type="library" name="Maven: org.bytedeco:ffmpeg:linux-arm64:4.3.1-1.5.4" level="project" />
<orderEntry type="library" name="Maven: org.bytedeco:ffmpeg:linux-ppc64le:4.3.1-1.5.4" level="project" />
<orderEntry type="library" name="Maven: org.bytedeco:ffmpeg:macosx-x86_64:4.3.1-1.5.4" level="project" />
<orderEntry type="library" name="Maven: org.bytedeco:ffmpeg:windows-x86:4.3.1-1.5.4" level="project" />
<orderEntry type="library" name="Maven: org.bytedeco:ffmpeg:windows-x86_64:4.3.1-1.5.4" level="project" />
<orderEntry type="library" name="Maven: org.bytedeco:flycapture-platform:2.13.3.31-1.5.4" level="project" />
<orderEntry type="library" name="Maven: org.bytedeco:flycapture:linux-x86:2.13.3.31-1.5.4" level="project" />
<orderEntry type="library" name="Maven: org.bytedeco:flycapture:linux-x86_64:2.13.3.31-1.5.4" level="project" />
<orderEntry type="library" name="Maven: org.bytedeco:flycapture:linux-armhf:2.13.3.31-1.5.4" level="project" />
<orderEntry type="library" name="Maven: org.bytedeco:flycapture:linux-arm64:2.13.3.31-1.5.4" level="project" />
<orderEntry type="library" name="Maven: org.bytedeco:flycapture:windows-x86:2.13.3.31-1.5.4" level="project" />
<orderEntry type="library" name="Maven: org.bytedeco:flycapture:windows-x86_64:2.13.3.31-1.5.4" level="project" />
<orderEntry type="library" name="Maven: org.bytedeco:libdc1394-platform:2.2.6-1.5.4" level="project" />
<orderEntry type="library" name="Maven: org.bytedeco:libdc1394:linux-x86:2.2.6-1.5.4" level="project" />
<orderEntry type="library" name="Maven: org.bytedeco:libdc1394:linux-x86_64:2.2.6-1.5.4" level="project" />
<orderEntry type="library" name="Maven: org.bytedeco:libdc1394:linux-armhf:2.2.6-1.5.4" level="project" />
<orderEntry type="library" name="Maven: org.bytedeco:libdc1394:linux-arm64:2.2.6-1.5.4" level="project" />
<orderEntry type="library" name="Maven: org.bytedeco:libdc1394:linux-ppc64le:2.2.6-1.5.4" level="project" />
<orderEntry type="library" name="Maven: org.bytedeco:libdc1394:macosx-x86_64:2.2.6-1.5.4" level="project" />
<orderEntry type="library" name="Maven: org.bytedeco:libdc1394:windows-x86:2.2.6-1.5.4" level="project" />
<orderEntry type="library" name="Maven: org.bytedeco:libdc1394:windows-x86_64:2.2.6-1.5.4" level="project" />
<orderEntry type="library" name="Maven: org.bytedeco:libfreenect-platform:0.5.7-1.5.4" level="project" />
<orderEntry type="library" name="Maven: org.bytedeco:libfreenect:linux-x86:0.5.7-1.5.4" level="project" />
<orderEntry type="library" name="Maven: org.bytedeco:libfreenect:linux-x86_64:0.5.7-1.5.4" level="project" />
<orderEntry type="library" name="Maven: org.bytedeco:libfreenect:linux-armhf:0.5.7-1.5.4" level="project" />
<orderEntry type="library" name="Maven: org.bytedeco:libfreenect:linux-arm64:0.5.7-1.5.4" level="project" />
<orderEntry type="library" name="Maven: org.bytedeco:libfreenect:linux-ppc64le:0.5.7-1.5.4" level="project" />
<orderEntry type="library" name="Maven: org.bytedeco:libfreenect:macosx-x86_64:0.5.7-1.5.4" level="project" />
<orderEntry type="library" name="Maven: org.bytedeco:libfreenect:windows-x86:0.5.7-1.5.4" level="project" />
<orderEntry type="library" name="Maven: org.bytedeco:libfreenect:windows-x86_64:0.5.7-1.5.4" level="project" />
<orderEntry type="library" name="Maven: org.bytedeco:libfreenect2-platform:0.2.0-1.5.4" level="project" />
<orderEntry type="library" name="Maven: org.bytedeco:libfreenect2:linux-x86:0.2.0-1.5.4" level="project" />
<orderEntry type="library" name="Maven: org.bytedeco:libfreenect2:linux-x86_64:0.2.0-1.5.4" level="project" />
<orderEntry type="library" name="Maven: org.bytedeco:libfreenect2:macosx-x86_64:0.2.0-1.5.4" level="project" />
<orderEntry type="library" name="Maven: org.bytedeco:libfreenect2:windows-x86_64:0.2.0-1.5.4" level="project" />
<orderEntry type="library" name="Maven: org.bytedeco:librealsense-platform:1.12.4-1.5.4" level="project" />
<orderEntry type="library" name="Maven: org.bytedeco:librealsense:linux-x86:1.12.4-1.5.4" level="project" />
<orderEntry type="library" name="Maven: org.bytedeco:librealsense:linux-x86_64:1.12.4-1.5.4" level="project" />
<orderEntry type="library" name="Maven: org.bytedeco:librealsense:macosx-x86_64:1.12.4-1.5.4" level="project" />
<orderEntry type="library" name="Maven: org.bytedeco:librealsense:windows-x86:1.12.4-1.5.4" level="project" />
<orderEntry type="library" name="Maven: org.bytedeco:librealsense:windows-x86_64:1.12.4-1.5.4" level="project" />
<orderEntry type="library" name="Maven: org.bytedeco:librealsense2-platform:2.29.0-1.5.4" level="project" />
<orderEntry type="library" name="Maven: org.bytedeco:librealsense2:linux-x86:2.29.0-1.5.4" level="project" />
<orderEntry type="library" name="Maven: org.bytedeco:librealsense2:linux-x86_64:2.29.0-1.5.4" level="project" />
<orderEntry type="library" name="Maven: org.bytedeco:librealsense2:macosx-x86_64:2.29.0-1.5.4" level="project" />
<orderEntry type="library" name="Maven: org.bytedeco:librealsense2:windows-x86:2.29.0-1.5.4" level="project" />
<orderEntry type="library" name="Maven: org.bytedeco:librealsense2:windows-x86_64:2.29.0-1.5.4" level="project" />
<orderEntry type="library" name="Maven: org.bytedeco:videoinput-platform:0.200-1.5.4" level="project" />
<orderEntry type="library" name="Maven: org.bytedeco:videoinput:windows-x86:0.200-1.5.4" level="project" />
<orderEntry type="library" name="Maven: org.bytedeco:videoinput:windows-x86_64:0.200-1.5.4" level="project" />
<orderEntry type="library" name="Maven: org.bytedeco:artoolkitplus-platform:2.3.1-1.5.4" level="project" />
<orderEntry type="library" name="Maven: org.bytedeco:artoolkitplus:android-arm:2.3.1-1.5.4" level="project" />
<orderEntry type="library" name="Maven: org.bytedeco:artoolkitplus:android-arm64:2.3.1-1.5.4" level="project" />
<orderEntry type="library" name="Maven: org.bytedeco:artoolkitplus:android-x86:2.3.1-1.5.4" level="project" />
<orderEntry type="library" name="Maven: org.bytedeco:artoolkitplus:android-x86_64:2.3.1-1.5.4" level="project" />
<orderEntry type="library" name="Maven: org.bytedeco:artoolkitplus:linux-x86:2.3.1-1.5.4" level="project" />
<orderEntry type="library" name="Maven: org.bytedeco:artoolkitplus:linux-x86_64:2.3.1-1.5.4" level="project" />
<orderEntry type="library" name="Maven: org.bytedeco:artoolkitplus:linux-armhf:2.3.1-1.5.4" level="project" />
<orderEntry type="library" name="Maven: org.bytedeco:artoolkitplus:linux-arm64:2.3.1-1.5.4" level="project" />
<orderEntry type="library" name="Maven: org.bytedeco:artoolkitplus:linux-ppc64le:2.3.1-1.5.4" level="project" />
<orderEntry type="library" name="Maven: org.bytedeco:artoolkitplus:macosx-x86_64:2.3.1-1.5.4" level="project" />
<orderEntry type="library" name="Maven: org.bytedeco:artoolkitplus:windows-x86:2.3.1-1.5.4" level="project" />
<orderEntry type="library" name="Maven: org.bytedeco:artoolkitplus:windows-x86_64:2.3.1-1.5.4" level="project" />
<orderEntry type="library" name="Maven: org.bytedeco:flandmark-platform:1.07-1.5.4" level="project" />
<orderEntry type="library" name="Maven: org.bytedeco:flandmark:android-arm:1.07-1.5.4" level="project" />
<orderEntry type="library" name="Maven: org.bytedeco:flandmark:android-arm64:1.07-1.5.4" level="project" />
<orderEntry type="library" name="Maven: org.bytedeco:flandmark:android-x86:1.07-1.5.4" level="project" />
<orderEntry type="library" name="Maven: org.bytedeco:flandmark:android-x86_64:1.07-1.5.4" level="project" />
<orderEntry type="library" name="Maven: org.bytedeco:flandmark:linux-x86:1.07-1.5.4" level="project" />
<orderEntry type="library" name="Maven: org.bytedeco:flandmark:linux-x86_64:1.07-1.5.4" level="project" />
<orderEntry type="library" name="Maven: org.bytedeco:flandmark:linux-armhf:1.07-1.5.4" level="project" />
<orderEntry type="library" name="Maven: org.bytedeco:flandmark:linux-arm64:1.07-1.5.4" level="project" />
<orderEntry type="library" name="Maven: org.bytedeco:flandmark:linux-ppc64le:1.07-1.5.4" level="project" />
<orderEntry type="library" name="Maven: org.bytedeco:flandmark:macosx-x86_64:1.07-1.5.4" level="project" />
<orderEntry type="library" name="Maven: org.bytedeco:flandmark:windows-x86:1.07-1.5.4" level="project" />
<orderEntry type="library" name="Maven: org.bytedeco:flandmark:windows-x86_64:1.07-1.5.4" level="project" />
<orderEntry type="library" name="Maven: org.bytedeco:leptonica-platform:1.80.0-1.5.4" level="project" />
<orderEntry type="library" name="Maven: org.bytedeco:leptonica:android-arm:1.80.0-1.5.4" level="project" />
<orderEntry type="library" name="Maven: org.bytedeco:leptonica:android-arm64:1.80.0-1.5.4" level="project" />
<orderEntry type="library" name="Maven: org.bytedeco:leptonica:android-x86:1.80.0-1.5.4" level="project" />
<orderEntry type="library" name="Maven: org.bytedeco:leptonica:android-x86_64:1.80.0-1.5.4" level="project" />
<orderEntry type="library" name="Maven: org.bytedeco:leptonica:linux-x86:1.80.0-1.5.4" level="project" />
<orderEntry type="library" name="Maven: org.bytedeco:leptonica:linux-x86_64:1.80.0-1.5.4" level="project" />
<orderEntry type="library" name="Maven: org.bytedeco:leptonica:linux-armhf:1.80.0-1.5.4" level="project" />
<orderEntry type="library" name="Maven: org.bytedeco:leptonica:linux-arm64:1.80.0-1.5.4" level="project" />
<orderEntry type="library" name="Maven: org.bytedeco:leptonica:linux-ppc64le:1.80.0-1.5.4" level="project" />
<orderEntry type="library" name="Maven: org.bytedeco:leptonica:macosx-x86_64:1.80.0-1.5.4" level="project" />
<orderEntry type="library" name="Maven: org.bytedeco:leptonica:windows-x86:1.80.0-1.5.4" level="project" />
<orderEntry type="library" name="Maven: org.bytedeco:leptonica:windows-x86_64:1.80.0-1.5.4" level="project" />
<orderEntry type="library" name="Maven: org.bytedeco:tesseract-platform:4.1.1-1.5.4" level="project" />
<orderEntry type="library" name="Maven: org.bytedeco:tesseract:android-arm:4.1.1-1.5.4" level="project" />
<orderEntry type="library" name="Maven: org.bytedeco:tesseract:android-arm64:4.1.1-1.5.4" level="project" />
<orderEntry type="library" name="Maven: org.bytedeco:tesseract:android-x86:4.1.1-1.5.4" level="project" />
<orderEntry type="library" name="Maven: org.bytedeco:tesseract:android-x86_64:4.1.1-1.5.4" level="project" />
<orderEntry type="library" name="Maven: org.bytedeco:tesseract:linux-x86:4.1.1-1.5.4" level="project" />
<orderEntry type="library" name="Maven: org.bytedeco:tesseract:linux-x86_64:4.1.1-1.5.4" level="project" />
<orderEntry type="library" name="Maven: org.bytedeco:tesseract:linux-armhf:4.1.1-1.5.4" level="project" />
<orderEntry type="library" name="Maven: org.bytedeco:tesseract:linux-arm64:4.1.1-1.5.4" level="project" />
<orderEntry type="library" name="Maven: org.bytedeco:tesseract:linux-ppc64le:4.1.1-1.5.4" level="project" />
<orderEntry type="library" name="Maven: org.bytedeco:tesseract:macosx-x86_64:4.1.1-1.5.4" level="project" />
<orderEntry type="library" name="Maven: org.bytedeco:tesseract:windows-x86:4.1.1-1.5.4" level="project" />
<orderEntry type="library" name="Maven: org.bytedeco:tesseract:windows-x86_64:4.1.1-1.5.4" level="project" />
<orderEntry type="library" name="Maven: ai.djl:api:0.12.0" level="project" />
<orderEntry type="library" name="Maven: com.google.code.gson:gson:2.8.7" level="project" />
<orderEntry type="library" name="Maven: net.java.dev.jna:jna:5.8.0" level="project" />
<orderEntry type="library" name="Maven: org.apache.commons:commons-compress:1.20" level="project" />
<orderEntry type="library" name="Maven: ai.djl:basicdataset:0.12.0" level="project" />
<orderEntry type="library" name="Maven: org.apache.commons:commons-csv:1.8" level="project" />
<orderEntry type="library" name="Maven: ai.djl:model-zoo:0.12.0" level="project" />
<orderEntry type="library" name="Maven: ai.djl.mxnet:mxnet-model-zoo:0.12.0" level="project" />
<orderEntry type="library" name="Maven: ai.djl.mxnet:mxnet-engine:0.12.0" level="project" />
<orderEntry type="library" scope="RUNTIME" name="Maven: ai.djl.mxnet:mxnet-native-auto:1.8.0" level="project" />
<orderEntry type="library" name="Maven: com.github.wendykierp:JTransforms:3.1" level="project" />
<orderEntry type="library" name="Maven: org.apache.commons:commons-math3:3.5" level="project" />
<orderEntry type="library" name="Maven: pl.edu.icm:JLargeArrays:1.5" level="project" />
<orderEntry type="module-library">
<library name="Maven: jlibrosa:jlibrosa:1.1.8-SNAPSHOT">
<CLASSES>
<root url="jar://$MODULE_DIR$/lib/jlibrosa-1.1.8-SNAPSHOT.jar!/" />
</CLASSES>
<JAVADOC />
<SOURCES />
</library>
</orderEntry>
</component>
</module>

View File

@ -1,114 +0,0 @@
<?xml version="1.0" encoding="UTF-8"?>
<!--
~ Licensed to the Apache Software Foundation (ASF) under one
~ or more contributor license agreements. See the NOTICE file
~ distributed with this work for additional information
~ regarding copyright ownership. The ASF licenses this file
~ to you under the Apache License, Version 2.0 (the
~ "License"); you may not use this file except in compliance
~ with the License. You may obtain a copy of the License at
~
~ http://www.apache.org/licenses/LICENSE-2.0
~
~ Unless required by applicable law or agreed to in writing,
~ software distributed under the License is distributed on an
~ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
~ KIND, either express or implied. See the License for the
~ specific language governing permissions and limitations
~ under the License.
-->
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>aias</groupId>
<artifactId>ndarray-audio-sdk</artifactId>
<version>0.23.0</version>
<properties>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
<maven.compiler.source>1.8</maven.compiler.source>
<maven.compiler.target>1.8</maven.compiler.target>
<djl.version>0.23.0</djl.version>
</properties>
<build>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-compiler-plugin</artifactId>
<configuration>
<source>8</source>
<target>8</target>
</configuration>
<version>3.8.1</version>
</plugin>
</plugins>
</build>
<dependencies>
<dependency>
<groupId>org.apache.logging.log4j</groupId>
<artifactId>log4j-slf4j-impl</artifactId>
<version>2.17.2</version>
</dependency>
<dependency>
<groupId>org.apache.commons</groupId>
<artifactId>commons-lang3</artifactId>
<version>3.12.0</version>
</dependency>
<dependency>
<groupId>com.google.guava</groupId>
<artifactId>guava</artifactId>
<version>21.0</version>
</dependency>
<dependency>
<groupId>org.bytedeco</groupId>
<artifactId>javacv-platform</artifactId>
<version>1.5.7</version>
</dependency>
<!-- 服务器端推理引擎 -->
<dependency>
<groupId>ai.djl</groupId>
<artifactId>api</artifactId>
<version>${djl.version}</version>
</dependency>
<dependency>
<groupId>ai.djl</groupId>
<artifactId>basicdataset</artifactId>
<version>${djl.version}</version>
</dependency>
<dependency>
<groupId>ai.djl</groupId>
<artifactId>model-zoo</artifactId>
<version>${djl.version}</version>
</dependency>
<!-- Pytorch -->
<dependency>
<groupId>ai.djl.pytorch</groupId>
<artifactId>pytorch-model-zoo</artifactId>
<version>${djl.version}</version>
</dependency>
<dependency>
<groupId>ai.djl.pytorch</groupId>
<artifactId>pytorch-engine</artifactId>
<version>${djl.version}</version>
</dependency>
<dependency>
<groupId>com.github.wendykierp</groupId>
<artifactId>JTransforms</artifactId>
<version>3.1</version>
</dependency>
<dependency>
<groupId>jlibrosa</groupId>
<artifactId>jlibrosa</artifactId>
<version>1.1.8-SNAPSHOT</version>
<scope>system</scope>
<systemPath>${project.basedir}/lib/jlibrosa-1.1.8-SNAPSHOT.jar</systemPath>
</dependency>
</dependencies>
</project>

View File

@ -1,51 +0,0 @@
package me.aias.example;
import ai.djl.Device;
import ai.djl.ndarray.NDArray;
import ai.djl.ndarray.NDManager;
import me.aias.example.util.AudioArrayUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.util.Arrays;
/**
*
* @author Calvin
*
* @email 179209347@qq.com
**/
public class AudioExample {
private static final Logger logger = LoggerFactory.getLogger(AudioExample.class);
// 测试 - test
public static void main(String[] args) throws Exception {
NDManager manager = NDManager.newBaseManager(Device.cpu());
float[] floatArray = AudioArrayUtils.audioSegment("src/test/resources/test.wav").samples;;
// 音频的float数组
// Audio float array
logger.info("Audio float array: {}", Arrays.toString(floatArray));
NDArray samples = manager.create(floatArray);
float rmsDb = me.aias.example.utils.AudioUtils.rmsDb(samples);
// 返回以分贝为单位的音频均方根能量
// Audio root-mean-square energy in decibels
logger.info("root-mean-square energy in decibels: {}", rmsDb);
//提取特征前将音频归一化至-20 dB(以分贝为单位)
//Normalize audio to -20 dB (in decibels) before feature extraction
float target_dB = -20f;
samples = me.aias.example.utils.AudioUtils.normalize(samples, target_dB);
System.out.println("Normalize audio: " + samples.toDebugString(1000000000, 1000, 1000, 1000));
// 生成帧的跨步大小(以毫秒为单位)
// Frame step size in milliseconds for generating frames
float stride_ms = 10f;
// 用于生成帧的窗口大小(毫秒)
// Window size in milliseconds used for generating frames
float window_ms = 20f;
samples = me.aias.example.utils.AudioUtils.linearSpecgram(manager, samples, stride_ms, window_ms);
logger.info("Calculate linear spectrogram: {}", samples.toDebugString(1000000000, 1000, 10, 1000));
}
}

View File

@ -1,241 +0,0 @@
package me.aias.example.util;
import org.bytedeco.javacv.*;
import javax.sound.sampled.AudioFileFormat;
import javax.sound.sampled.AudioFormat;
import javax.sound.sampled.AudioInputStream;
import javax.sound.sampled.AudioSystem;
import java.io.ByteArrayInputStream;
import java.io.File;
import java.nio.Buffer;
import java.nio.ByteBuffer;
import java.nio.ShortBuffer;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
/**
* 获取音频数组
* Get audio array
*
* @author Calvin
*/
public class AudioArrayUtils {
public static void main(String[] args) throws FrameGrabber.Exception {
System.out.println(
Arrays.toString(AudioArrayUtils.audioSegment("src/test/resources/test.wav").samples));
}
public static final class AudioSegment {
public final float[] samples;
public final Integer sampleRate;
public final Integer audioChannels;
public AudioSegment(float[] samples, Integer sampleRate, Integer audioChannels) {
this.samples = samples;
this.sampleRate = sampleRate;
this.audioChannels = audioChannels;
}
}
public static final class FrameData {
public final Buffer[] samples;
public final Integer sampleRate;
public final Integer audioChannels;
public FrameData(Buffer[] samples, Integer sampleRate, Integer audioChannels) {
this.samples = samples;
this.sampleRate = sampleRate;
this.audioChannels = audioChannels;
}
}
/**
* 获取音频文件的float数组,sampleRate,audioChannels
* Get the float array, sample rate, and audio channels of the audio file
*
* @param path
* @return
* @throws FrameGrabber.Exception
*/
public static AudioSegment audioSegment(String path) throws FrameGrabber.Exception {
AudioSegment audioSegment = null;
int sampleRate = -1;
int audioChannels = -1;
// Audio sample type is usually integer or float-point.
// Integers will be scaled to [-1, 1] in float32.
float scale = (float) 1.0 / Float.valueOf(1 << ((8 * 2) - 1));
List<Float> floatList = new ArrayList<>();
try (FFmpegFrameGrabber audioGrabber = new FFmpegFrameGrabber(path)) {
try {
audioGrabber.start();
sampleRate = audioGrabber.getSampleRate();
audioChannels = audioGrabber.getAudioChannels();
Frame frame;
while ((frame = audioGrabber.grabFrame()) != null) {
Buffer[] buffers = frame.samples;
Buffer[] copiedBuffers = new Buffer[buffers.length];
for (int i = 0; i < buffers.length; i++) {
deepCopy((ShortBuffer) buffers[i], (ShortBuffer) copiedBuffers[i]);
}
ShortBuffer sb = (ShortBuffer) buffers[0];
for (int i = 0; i < sb.limit(); i++) {
floatList.add(new Float(sb.get() * scale));
}
}
} catch (FrameGrabber.Exception e) {
e.printStackTrace();
}
float[] floatArray = new float[floatList.size()];
int i = 0;
for (Float f : floatList) {
floatArray[i++] = (f != null ? f : Float.NaN); // Or whatever default you want.
}
audioSegment = new AudioSegment(floatArray, sampleRate, audioChannels);
return audioSegment;
}
}
/**
* Deep copy - shortBuffer
*
* @param source
* @param target
* @return
*/
private static ShortBuffer deepCopy(ShortBuffer source, ShortBuffer target) {
int sourceP = source.position();
int sourceL = source.limit();
if (null == target) {
target = ShortBuffer.allocate(source.remaining());
}
target.put(source);
target.flip();
source.position(sourceP);
source.limit(sourceL);
return target;
}
/**
* Deep copy - byteBuffer
*
* @param source
* @param target
* @return
*/
private static ByteBuffer deepCopy(ByteBuffer source, ByteBuffer target) {
int sourceP = source.position();
int sourceL = source.limit();
if (null == target) {
target = ByteBuffer.allocate(source.remaining());
}
target.put(source);
target.flip();
source.position(sourceP);
source.limit(sourceL);
return target;
}
/**
* 获取音频文件的FrameData列表
* Get the FrameData list of the audio file
*
* @param path
* @return
* @throws FrameGrabber.Exception
*/
public static List<FrameData> frameData(String path) throws FrameGrabber.Exception {
// frameRecorder setup during initialization
List<FrameData> audioData = new ArrayList<>();
try (FFmpegFrameGrabber audioGrabber = new FFmpegFrameGrabber(path)) {
try {
audioGrabber.start();
Frame frame;
while ((frame = audioGrabber.grabFrame()) != null) {
Buffer[] buffers = frame.samples;
Buffer[] copiedBuffers = new Buffer[buffers.length];
for (int i = 0; i < buffers.length; i++) {
deepCopy((ShortBuffer) buffers[i], (ShortBuffer) copiedBuffers[i]);
}
FrameData frameData = new FrameData(copiedBuffers, frame.sampleRate, frame.audioChannels);
audioData.add(frameData);
}
} catch (FrameGrabber.Exception e) {
e.printStackTrace();
}
return audioData;
}
}
/**
* 保存音频文件
* Save the audio file
*
* @param buffer
* @param sampleRate
* @param audioChannels
* @param outs
* @throws Exception
*/
public static void toWavFile(float[] buffer, float sampleRate, int audioChannels, File outs)
throws Exception {
if (sampleRate == 0.0) {
sampleRate = 22050;
}
if (audioChannels == 0) {
audioChannels = 1;
}
final byte[] byteBuffer = new byte[buffer.length * 2];
int bufferIndex = 0;
for (int i = 0; i < byteBuffer.length; i++) {
final int x = (int) (buffer[bufferIndex++]); // * 32767.0
byteBuffer[i++] = (byte) x;
byteBuffer[i] = (byte) (x >>> 8);
}
AudioFormat format = new AudioFormat(sampleRate, 16, audioChannels, true, false);
try (ByteArrayInputStream bais = new ByteArrayInputStream(byteBuffer);
AudioInputStream audioInputStream = new AudioInputStream(bais, format, buffer.length)) {
AudioSystem.write(audioInputStream, AudioFileFormat.Type.WAVE, outs);
}
}
/**
* 保存音频文件
* Save the audio file
*
* @param audioData
* @param path
* @param audioChannels
*/
public void toWavFile(List<FrameData> audioData, String path, int audioChannels) {
try (FFmpegFrameRecorder audioGrabber = new FFmpegFrameRecorder(path, audioChannels)) {
for (FrameData frameData : audioData) {
Frame frame = new Frame();
frame.sampleRate = frameData.sampleRate;
frame.audioChannels = frameData.audioChannels;
frame.samples = frameData.samples;
audioGrabber.record(frame);
}
} catch (FrameRecorder.Exception e) {
}
}
}

View File

@ -1,253 +0,0 @@
package me.aias.example.utils;
import ai.djl.ndarray.NDArray;
import ai.djl.ndarray.NDArrays;
import ai.djl.ndarray.NDList;
import ai.djl.ndarray.NDManager;
import ai.djl.ndarray.types.Shape;
import com.jlibrosa.audio.JLibrosa;
import me.aias.example.util.FFT;
/**
*
* @author Calvin
*
* @email 179209347@qq.com
**/
public class AudioUtils {
static int mel_window_step = 10;
static float max_gain_db = 300.0f;
static int sample_rate = 16000;
static float eps = 1e-14f;
/**
* 创建给定持续时间和采样率的静音音频段
* Create a silent audio segment with given duration and sample rate
* @param manager
* @param duration : 静音音频段长度单位 second - Length of the silent audio segment in seconds
* @param sampleRate : 采样率 - Sample rate
* @return
*/
public static NDArray makeSilence(NDManager manager, long duration, int sampleRate) {
NDArray samples = manager.zeros(new Shape(duration * sampleRate));
return samples;
}
/**
* 在这个音频样本上加一段静音
* Add a silent segment to this audio sample
*
* @param manager
* @param wav
* @param padLength
* @param sides : padding 位置: - Padding position:
* 'beginning' - 增加静音片段到开头 - add silent segment to the beginning
* 'end' - 增加静音片段到末尾 - add silent segment to the end
* 'both' - 两边都增加静音片段 - add silent segments to both sides
* @return
* @throws Exception
*/
public static NDArray padSilence(NDManager manager, NDArray wav, long padLength, String sides) throws Exception {
NDArray pad = manager.zeros(new Shape(padLength));
if (sides.equals("beginning")) {
wav = pad.concat(wav);
} else if (sides.equals("end")) {
wav = wav.concat(pad);
} else if (sides.equals("both")) {
wav = pad.concat(wav);
wav = wav.concat(pad);
} else {
throw new Exception("Unknown value for the sides " + sides);
}
return wav;
}
/**
* 将任意数量的语音片段连接在一起
* Concatenate any number of speech segments together
*
* @param segments : 要连接的输入语音片段 - Input speech segments to concatenate
* @return
*/
public static NDArray concatenate(NDList segments) {
NDArray array = segments.get(0);
for (int i = 1; i < segments.size(); i++) {
array = array.concat(segments.get(i));
}
return array;
}
/**
* 生成以分贝为单位的音频均方根能量
* Root mean square energy in decibels.
*
* @param samples
* @return
*/
public static float rmsDb(NDArray samples) {
samples = samples.pow(2);
samples = samples.mean();
samples = samples.log10().mul(10);
return samples.toFloatArray()[0];
}
/**
* 将音频归一化使其具有所需的有效值(以分贝为单位)
* Target RMS value in decibels. This value should be
* less than 0.0 as 0.0 is full-scale audio.
*
* @param samples
* @param target_db
* @return
* @throws Exception
*/
public static NDArray normalize(NDArray samples, float target_db) {
float gain = target_db - rmsDb(samples);
gain = Math.min(gain, max_gain_db);
//对音频施加分贝增益
//Gain in decibels to apply to samples
float factor = (float) Math.pow(10f, gain / 20f);
samples = samples.mul(factor);
return samples;
}
/**
* 用快速傅里叶变换计算线性谱图
* Calculate linear spectrogram using fast Fourier transform
*
* @param manager
* @param samples
* @param stride_ms
* @param window_ms
* @return
*/
public static NDArray linearSpecgram(NDManager manager, NDArray samples, float stride_ms, float window_ms) {
int strideSize = (int) (0.001 * sample_rate * stride_ms);
int windowSize = (int) (0.001 * sample_rate * window_ms);
long truncateSize = (samples.size() - windowSize) % strideSize;
long len = samples.size() - truncateSize;
samples = samples.get(":" + len);
//Shape nshape = new Shape(windowSize, (samples.size() - windowSize) / strideSize + 1); // 320 ,838
//nstrides = (samples.strides[0], samples.strides[0] * stride_size)
//strides[0] = 4 个字节, 由于已经转为float类型所以对应当前samples中一个元素
//np.lib.stride_tricks.as_strided(samples, shape=nshape, strides=nstrides)
int rows = windowSize; //320
int cols = ((int) samples.size() - windowSize) / strideSize + 1; //838
float[] floatArray = samples.toFloatArray();
float[][] windows = new float[rows][cols];
for (int row = 0; row < rows; row++) {
for (int col = 0; col < cols; col++) {
windows[row][col] = floatArray[row + col * strideSize];
}
}
// 快速傅里叶变换
// Fast Fourier Transform
float[] weighting = hanningWindow(windowSize);
for (int row = 0; row < rows; row++) {
for (int col = 0; col < cols; col++) {
windows[row][col] = windows[row][col] * weighting[row];
}
}
double[] arr = null;
NDList fftList = new NDList();
for (int col = 0; col < cols; col++) {
arr = new double[rows];
for (int row = 0; row < rows; row++) {
arr[row] = windows[row][col];
}
double[] fft = FFT.fft(arr);
float[][] complex = FFT.rfft(fft);
NDArray array = manager.create(FFT.abs(complex));
fftList.add(array);
}
NDArray fft = NDArrays.stack(fftList).transpose();
fft = fft.pow(2);
NDArray weightingArray = manager.create(weighting);
weightingArray = weightingArray.pow(2);
NDArray scale = weightingArray.sum().mul(sample_rate);
NDArray middle = fft.get("1:-1,:");
middle = middle.mul(2).div(scale);
NDArray head = fft.get("0,:").div(scale).reshape(1, fft.getShape().get(1));
NDArray tail = fft.get("-1,:").div(scale).reshape(1, fft.getShape().get(1));
NDList list = new NDList(head, middle, tail);
fft = NDArrays.concat(list, 0);
NDArray freqsArray = manager.arange(fft.getShape().get(0));
freqsArray = freqsArray.mul(sample_rate / windowSize);
float[] freqs = freqsArray.toFloatArray();
int ind = 0;
for (int i = 0; i < freqs.length; i++) {
if (freqs[i] <= (sample_rate / 2)) {
ind = i;
} else {
break;
}
}
ind = ind + 1;
fft = fft.get(":" + ind + ",:").add(eps);
fft = fft.log();
// System.out.println(fft.toDebugString(1000000000, 1000, 10, 1000));
return fft;
}
/**
* Hanning窗
* The Hanning window is a taper formed by using a weighted cosine.
*
* @param size
* @return
*/
public static float[] hanningWindow(int size) {
float[] data = new float[size];
for (int n = 1; n < size; n++) {
data[n] = (float) (0.5 * (1 - Math.cos((2 * Math.PI * n)
/ (size - 1))));
}
return data;
}
/**
* Hanning窗
* The Hanning window is a taper formed by using a weighted cosine.
*
* @param recordedData
* @return
*/
public static float[] hanningWindow(float[] recordedData) {
for (int n = 1; n < recordedData.length; n++) {
recordedData[n] *= 0.5 * (1 - Math.cos((2 * Math.PI * n)
/ (recordedData.length - 1)));
}
return recordedData;
}
/**
* 从wav提取mel频谱特征值
* Extract Mel-frequency spectrogram features from wav
*
* @param samples
* @param n_fft 1024
* @param n_mels 40
* @return
*/
public static float[][] melSpecgram(NDArray samples, int n_fft, int n_mels) {
JLibrosa librosa = new JLibrosa();
float[][] melSpectrogram =
librosa.generateMelSpectroGram(samples.toFloatArray(), sample_rate, n_fft, n_mels, (sample_rate * mel_window_step / 1000));
return melSpectrogram;
}
}

View File

@ -1,91 +0,0 @@
package me.aias.example.util;
import org.jtransforms.fft.DoubleFFT_1D;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
/**
* A Fast Fourier Transform wrapper for jTransforms to provide similar functionality to numpy.fft
* functions used by the Blowhole Python implementation.
*/
public class FFT {
/**
* Compute the fast fourier transform
*
* @param raw the raw signal
* @return the computed fast fourier transform
*/
public static double[] fft(double[] raw) {
double[] in = raw;
DoubleFFT_1D fft = new DoubleFFT_1D(in.length);
fft.realForward(in);
return in;
}
/**
* Computes the physical layout of the fast fourier transform.
* See jTransform documentation for more information.
* http://incanter.org/docs/parallelcolt/api/edu/emory/mathcs/jtransforms/fft/DoubleFFT_1D.html#realForward(double[])
*
* @param fft the fast fourier transform
*/
public static float[][] rfft(double[] fft) {
float[][] result = null;
int n = fft.length;
if (n % 2 == 0) {
// n is even
result = new float[2][n / 2 + 1];
for (int i = 0; i < n / 2; i++) {
result[0][i] = (float) fft[2 * i]; //the real part fo the fast fourier transform
result[1][i] = (float) fft[2 * i + 1]; //the imaginary part of the fast fourier transform
}
result[1][0] = 0;
result[0][n / 2] = (float) fft[1];
} else {
// n is odd
result = new float[2][(n + 1) / 2];
for (int i = 0; i < n / 2; i++) {
result[0][i] = (float) fft[2 * i]; //the real part fo the fast fourier transform
result[1][i] = (float) fft[2 * i + 1]; //the imaginary part of the fast fourier transform
}
result[1][0] = 0;
result[1][(n - 1) / 2] = (float) fft[1];
}
return result;
}
public static float[] abs(float[][] complex) {
float[] re = complex[0]; //the real part fo the fast fourier transform
float[] im = complex[1]; //the imaginary part of the fast fourier transform
float[] abs = new float[re.length];
for (int i = 0; i < re.length; i++) {
abs[i] = (float) Math.hypot(re[i], im[i]);
}
return abs;
}
/**
* Returns the Discrete Fourier Transform sample frequencies.
* See numpy.fft.rfftfreq for more information.
*
* @param n Window length
* @param d Sample spacing
* @return Array of length n + 1 containing the sample frequencies
*/
public static double[] rfftfreq(int n, double d) {
double val = 1.0 / (n * d);
int N = n / 2 + 1;
double[] results = new double[N];
for (int i = 0; i < N; i++) {
results[i] = i * val;
}
return results;
}
}

View File

@ -1,17 +0,0 @@
<?xml version="1.0" encoding="UTF-8"?>
<Configuration status="INFO">
<Appenders>
<Console name="console" target="SYSTEM_OUT">
<PatternLayout
pattern="[%-5level] - %msg%n"/>
</Console>
</Appenders>
<Loggers>
<Root level="info" additivity="false">
<AppenderRef ref="console"/>
</Root>
<Logger name="me.calvin" level="${sys:me.calvin.logging.level:-info}" additivity="false">
<AppenderRef ref="console"/>
</Logger>
</Loggers>
</Configuration>