Fangjun Kuang
Committed by GitHub

Add Java/Kotlin API and Android support for ten-vad (#2389)

正在显示 36 个修改的文件 包含 396 行增加47 行删除
... ... @@ -237,11 +237,20 @@ jobs:
rm *.wav
rm -rf sherpa-onnx-*
- name: Run java test (VAD remove silence)
- name: Run java test (ten-vad remove silence)
shell: bash
run: |
cd ./java-api-examples
./run-vad-remove-slience.sh
./run-ten-vad-remove-silence.sh
rm *.onnx
ls -lh *.wav
rm *.wav
- name: Run java test (silero-vad remove silence)
shell: bash
run: |
cd ./java-api-examples
./run-vad-remove-silence.sh
rm *.onnx
ls -lh *.wav
rm *.wav
... ...
... ... @@ -15,7 +15,7 @@ func main() {
config := sherpa.VadModelConfig{}
// Please download silero_vad.onnx from
// https://github.com/snakers4/silero-vad/raw/master/src/silero_vad/data/silero_vad.onnx
// https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx
config.SileroVad.Model = "./silero_vad.onnx"
config.SileroVad.Threshold = 0.5
... ...
... ... @@ -3,7 +3,7 @@
set -ex
if [ ! -f ./silero_vad.onnx ]; then
curl -SL -O https://github.com/snakers4/silero-vad/raw/master/src/silero_vad/data/silero_vad.onnx
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx
fi
if [ ! -f ./sherpa-onnx-paraformer-trilingual-zh-cantonese-en/model.int8.onnx ]; then
... ...
... ... @@ -15,7 +15,7 @@ func main() {
config := sherpa.VadModelConfig{}
// Please download silero_vad.onnx from
// https://github.com/snakers4/silero-vad/raw/master/src/silero_vad/data/silero_vad.onnx
// https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx
config.SileroVad.Model = "./silero_vad.onnx"
config.SileroVad.Threshold = 0.5
... ...
... ... @@ -3,7 +3,7 @@
set -ex
if [ ! -f ./silero_vad.onnx ]; then
curl -SL -O https://github.com/snakers4/silero-vad/raw/master/src/silero_vad/data/silero_vad.onnx
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx
fi
if [ ! -f ./sherpa-onnx-whisper-tiny.en/tiny.en-encoder.int8.onnx ]; then
... ...
... ... @@ -89,7 +89,7 @@ func createVad() *sherpa.VoiceActivityDetector {
config := sherpa.VadModelConfig{}
// Please download silero_vad.onnx from
// https://github.com/snakers4/silero-vad/raw/master/src/silero_vad/data/silero_vad.onnx
// https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx
config.SileroVad.Model = "./silero_vad.onnx"
config.SileroVad.Threshold = 0.5
... ...
... ... @@ -11,7 +11,7 @@ if [ ! -f ./sr-data/enroll/fangjun-sr-1.wav ]; then
fi
if [ ! -f ./silero_vad.onnx ]; then
curl -SL -O https://github.com/snakers4/silero-vad/raw/master/src/silero_vad/data/silero_vad.onnx
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx
fi
go mod tidy
... ...
... ... @@ -15,7 +15,7 @@ func main() {
config := sherpa.VadModelConfig{}
// Please download silero_vad.onnx from
// https://github.com/snakers4/silero-vad/raw/master/src/silero_vad/data/silero_vad.onnx
// https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx
config.SileroVad.Model = "./silero_vad.onnx"
config.SileroVad.Threshold = 0.5
... ...
... ... @@ -3,7 +3,7 @@
set -ex
if [ ! -f ./silero_vad.onnx ]; then
curl -SL -O https://github.com/snakers4/silero-vad/raw/master/src/silero_vad/data/silero_vad.onnx
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx
fi
if [ ! -f ./sherpa-onnx-whisper-tiny/tiny-encoder.int8.onnx ]; then
... ...
... ... @@ -113,6 +113,7 @@ The punctuation model supports both English and Chinese.
```bash
./run-vad-remove-slience.sh
./run-ten-vad-remove-slience.sh
```
## VAD + Non-streaming Dolphin CTC for speech recognition
... ...
// Copyright 2025 Xiaomi Corporation
// This file shows how to use a ten-vad model to remove silences from
// a wave file.
import com.k2fsa.sherpa.onnx.*;
import java.util.ArrayList;
import java.util.Arrays;
public class TenVadRemoveSilence {
public static void main(String[] args) {
// please download ./ten-vad.onnx from
// https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
String model = "./ten-vad.onnx";
TenVadModelConfig tenVad =
TenVadModelConfig.builder()
.setModel(model)
.setThreshold(0.5f)
.setMinSilenceDuration(0.25f)
.setMinSpeechDuration(0.5f)
.setWindowSize(256)
.setMaxSpeechDuration(5.0f)
.build();
VadModelConfig config =
VadModelConfig.builder()
.setTenVadModelConfig(tenVad)
.setSampleRate(16000)
.setNumThreads(1)
.setDebug(true)
.setProvider("cpu")
.build();
Vad vad = new Vad(config);
// You can download the test file from
// https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
String testWaveFilename = "./lei-jun-test.wav";
WaveReader reader = new WaveReader(testWaveFilename);
int numSamples = reader.getSamples().length;
int windowSize = tenVad.getWindowSize();
int numIter = numSamples / windowSize;
ArrayList<float[]> segments = new ArrayList<float[]>();
for (int i = 0; i != numIter; ++i) {
int start = i * windowSize;
int end = start + windowSize;
float[] samples = Arrays.copyOfRange(reader.getSamples(), start, end);
vad.acceptWaveform(samples);
if (vad.isSpeechDetected()) {
while (!vad.empty()) {
// if you want to get the starting time of this segment, you can use
/* float startTime = vad.front().getStart() / 16000.0f; */
segments.add(vad.front().getSamples());
vad.pop();
}
}
}
vad.flush();
while (!vad.empty()) {
// if you want to get the starting time of this segment, you can use
/* float startTime = vad.front().getStart() / 16000.0f; */
segments.add(vad.front().getSamples());
vad.pop();
}
// get total number of samples
int n = 0;
for (float[] s : segments) {
n += s.length;
}
float[] allSamples = new float[n];
int i = 0;
for (float[] s : segments) {
System.arraycopy(s, 0, allSamples, i, s.length);
i += s.length;
}
String outFilename = "lei-jun-test-no-silence.wav";
WaveWriter.write(outFilename, allSamples, 16000);
System.out.printf("Saved to %s\n", outFilename);
vad.release();
}
}
... ...
#!/usr/bin/env bash
set -ex
if [[ ! -f ../build/lib/libsherpa-onnx-jni.dylib && ! -f ../build/lib/libsherpa-onnx-jni.so ]]; then
mkdir -p ../build
pushd ../build
cmake \
-DSHERPA_ONNX_ENABLE_PYTHON=OFF \
-DSHERPA_ONNX_ENABLE_TESTS=OFF \
-DSHERPA_ONNX_ENABLE_CHECK=OFF \
-DBUILD_SHARED_LIBS=ON \
-DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \
-DSHERPA_ONNX_ENABLE_JNI=ON \
..
make -j4
ls -lh lib
popd
fi
if [ ! -f ../sherpa-onnx/java-api/build/sherpa-onnx.jar ]; then
pushd ../sherpa-onnx/java-api
make
popd
fi
if [ ! -f ./ten-vad.onnx ]; then
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/ten-vad.onnx
fi
if [ ! -f ./lei-jun-test.wav ]; then
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/lei-jun-test.wav
fi
java \
-Djava.library.path=$PWD/../build/lib \
-cp ../sherpa-onnx/java-api/build/sherpa-onnx.jar \
./TenVadRemoveSilence.java
... ...
... ... @@ -10,7 +10,7 @@ from a microphone.
Usage:
wget https://github.com/snakers4/silero-vad/raw/master/src/silero_vad/data/silero_vad.onnx
wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx
./python-api-examples/simulate-streaming-sense-voice-microphone.py \
--silero-vad-model=./silero_vad.onnx \
... ...
... ... @@ -18,12 +18,12 @@ Note that `zh` means Chinese, while `en` means English.
(2) Download the VAD model
Please visit
https://github.com/snakers4/silero-vad/raw/master/src/silero_vad/data/silero_vad.onnx
https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx
to download silero_vad.onnx
For instance,
wget https://github.com/snakers4/silero-vad/raw/master/src/silero_vad/data/silero_vad.onnx
wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx
(3) Run this script
... ...
... ... @@ -40,12 +40,12 @@ Note that `zh` means Chinese, while `en` means English.
(3) Download the VAD model
Please visit
https://github.com/snakers4/silero-vad/raw/master/src/silero_vad/data/silero_vad.onnx
https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx
to download silero_vad.onnx
For instance,
wget https://github.com/snakers4/silero-vad/raw/master/src/silero_vad/data/silero_vad.onnx
wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx
(4) Please refer to ./generate-subtitles.py
to download a non-streaming ASR model.
... ...
... ... @@ -38,12 +38,12 @@ Note that `zh` means Chinese, while `en` means English.
(3) Download the VAD model
Please visit
https://github.com/snakers4/silero-vad/raw/master/src/silero_vad/data/silero_vad.onnx
https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx
to download silero_vad.onnx
For instance,
wget https://github.com/snakers4/silero-vad/raw/master/src/silero_vad/data/silero_vad.onnx
wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx
(4) Please refer to ./generate-subtitles.py
to download a non-streaming ASR model.
... ...
... ... @@ -36,12 +36,12 @@ Note that `zh` means Chinese, while `en` means English.
(3) Download the VAD model
Please visit
https://github.com/snakers4/silero-vad/raw/master/src/silero_vad/data/silero_vad.onnx
https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx
to download silero_vad.onnx
For instance,
wget https://github.com/snakers4/silero-vad/raw/master/src/silero_vad/data/silero_vad.onnx
wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx
(4) Run this script
... ...
... ... @@ -55,7 +55,7 @@ def main():
if not Path(args.silero_vad_model).is_file():
raise RuntimeError(
f"{args.silero_vad_model} does not exist. Please download it from "
"https://github.com/snakers4/silero-vad/raw/master/src/silero_vad/data/silero_vad.onnx"
"https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx"
)
device_name = args.device_name
... ...
... ... @@ -38,7 +38,7 @@ def main():
if not Path(args.silero_vad_model).is_file():
raise RuntimeError(
f"{args.silero_vad_model} does not exist. Please download it from "
"https://github.com/snakers4/silero-vad/raw/master/src/silero_vad/data/silero_vad.onnx"
"https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx"
)
mic_sample_rate = 16000
... ...
... ... @@ -14,12 +14,12 @@ python3 ./vad-remove-non-speech-segments-alsa.py \
--silero-vad-model silero_vad.onnx
Please visit
https://github.com/snakers4/silero-vad/raw/master/src/silero_vad/data/silero_vad.onnx
https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx
to download silero_vad.onnx
For instance,
wget https://github.com/snakers4/silero-vad/raw/master/src/silero_vad/data/silero_vad.onnx
wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx
"""
import argparse
... ...
... ... @@ -13,12 +13,11 @@ python3 ./vad-remove-non-speech-segments-from-file.py \
output.wav
Please visit
https://github.com/snakers4/silero-vad/raw/master/src/silero_vad/data/silero_vad.onnx
https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx
to download silero_vad.onnx
For instance,
wget https://github.com/snakers4/silero-vad/raw/master/src/silero_vad/data/silero_vad.onnx
wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx
"""
import argparse
... ...
... ... @@ -11,12 +11,12 @@ python3 ./vad-remove-non-speech-segments.py \
--silero-vad-model silero_vad.onnx
Please visit
https://github.com/snakers4/silero-vad/raw/master/src/silero_vad/data/silero_vad.onnx
https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx
to download silero_vad.onnx
For instance,
wget https://github.com/snakers4/silero-vad/raw/master/src/silero_vad/data/silero_vad.onnx
wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx
"""
import argparse
... ...
... ... @@ -70,12 +70,13 @@ to install sherpa-onnx and to download non-streaming pre-trained models
used in this file.
Please visit
https://github.com/snakers4/silero-vad/raw/master/src/silero_vad/data/silero_vad.onnx
https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx
to download silero_vad.onnx
For instance,
wget https://github.com/snakers4/silero-vad/raw/master/src/silero_vad/data/silero_vad.onnx
wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx
"""
import argparse
import sys
... ...
... ... @@ -32,11 +32,12 @@ log "====================x86===================="
mkdir -p apks
log "https://github.com/snakers4/silero-vad/raw/master/src/silero_vad/data/silero_vad.onnx"
log "https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx"
# Download the model
pushd ./android/SherpaOnnxVad/app/src/main/assets/
wget -c https://github.com/snakers4/silero-vad/raw/master/src/silero_vad/data/silero_vad.onnx
wget -c https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx
popd
for arch in arm64-v8a armeabi-v7a x86_64 x86; do
... ... @@ -67,4 +68,47 @@ done
rm -rf ./android/SherpaOnnxVad/app/src/main/assets/*.onnx
# Now for ten-vad
git checkout .
pushd android/SherpaOnnxVad/app/src/main/java/com/k2fsa/sherpa/onnx
sed -i.bak s/"type = 0/type = 1/" ./MainActivity.kt
git diff
popd
log "https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/ten-vad.onnx"
# Download the model
pushd ./android/SherpaOnnxVad/app/src/main/assets/
wget -c https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/ten-vad.onnx
popd
for arch in arm64-v8a armeabi-v7a x86_64 x86; do
log "------------------------------------------------------------"
log "build apk for $arch"
log "------------------------------------------------------------"
src_arch=$arch
if [ $arch == "armeabi-v7a" ]; then
src_arch=armv7-eabi
elif [ $arch == "x86_64" ]; then
src_arch=x86-64
fi
ls -lh ./build-android-$src_arch/install/lib/*.so
cp -v ./build-android-$src_arch/install/lib/*.so ./android/SherpaOnnxVad/app/src/main/jniLibs/$arch/
pushd ./android/SherpaOnnxVad
sed -i.bak s/2048/9012/g ./gradle.properties
git diff ./gradle.properties
./gradlew assembleRelease
popd
mv android/SherpaOnnxVad/app/build/outputs/apk/release/app-release-unsigned.apk ./apks/sherpa-onnx-${SHERPA_ONNX_VERSION}-$arch-ten_vad.apk
ls -lh apks
rm -v ./android/SherpaOnnxVad/app/src/main/jniLibs/$arch/*.so
done
rm -rf ./android/SherpaOnnxVad/app/src/main/assets/*.onnx
ls -lh apks/
... ...
... ... @@ -201,11 +201,11 @@ class KeywordSpotterTransducerImpl : public KeywordSpotterImpl {
int32_t num_trailing_blanks = r.num_trailing_blanks;
// assume subsampling_factor is 4
// assume frameshift is 0.01 second
float trailing_slience = num_trailing_blanks * 4 * 0.01;
float trailing_silence = num_trailing_blanks * 4 * 0.01;
// it resets automatically after detecting 1.5 seconds of silence
float threshold = 1.5;
if (trailing_slience > threshold) {
if (trailing_silence > threshold) {
Reset(s);
}
}
... ...
... ... @@ -29,10 +29,10 @@ This program shows how to use a streaming VAD with non-streaming ASR in
sherpa-onnx.
Please download silero_vad.onnx from
https://github.com/snakers4/silero-vad/raw/master/src/silero_vad/data/silero_vad.onnx
https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx
For instance, use
wget https://github.com/snakers4/silero-vad/raw/master/src/silero_vad/data/silero_vad.onnx
wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx
Please refer to ./sherpa-onnx-microphone-offline.cc
to download models for offline ASR.
... ...
... ... @@ -30,10 +30,10 @@ This program shows how to use VAD in sherpa-onnx.
device_name
Please download silero_vad.onnx from
https://github.com/snakers4/silero-vad/raw/master/src/silero_vad/data/silero_vad.onnx
https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx
For instance, use
wget https://github.com/snakers4/silero-vad/raw/master/src/silero_vad/data/silero_vad.onnx
wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx
The device name specifies which microphone to use in case there are several
on your system. You can use
... ...
... ... @@ -45,10 +45,10 @@ This program shows how to use a streaming VAD with non-streaming ASR in
sherpa-onnx.
Please download silero_vad.onnx from
https://github.com/snakers4/silero-vad/raw/master/src/silero_vad/data/silero_vad.onnx
https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx
For instance, use
wget https://github.com/snakers4/silero-vad/raw/master/src/silero_vad/data/silero_vad.onnx
wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx
Please refer to ./sherpa-onnx-microphone-offline.cc
to download models for offline ASR.
... ...
... ... @@ -49,10 +49,10 @@ This program shows how to use VAD in sherpa-onnx.
--vad-num-threads=1
Please download silero_vad.onnx from
https://github.com/snakers4/silero-vad/raw/master/src/silero_vad/data/silero_vad.onnx
https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx
For instance, use
wget https://github.com/snakers4/silero-vad/raw/master/src/silero_vad/data/silero_vad.onnx
wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx
)usage";
sherpa_onnx::ParseOptions po(kUsageMessage);
... ...
... ... @@ -23,10 +23,10 @@ to remove silences from a file.
/path/to/output.wav
Please download silero_vad.onnx from
https://github.com/snakers4/silero-vad/raw/master/src/silero_vad/data/silero_vad.onnx
https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx
For instance, use
wget https://github.com/snakers4/silero-vad/raw/master/src/silero_vad/data/silero_vad.onnx
wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx
input.wav should be 16kHz.
)usage";
... ...
... ... @@ -74,6 +74,7 @@ java_files += SpeakerEmbeddingExtractorConfig.java
java_files += SpeakerEmbeddingExtractor.java
java_files += SpeakerEmbeddingManager.java
java_files += TenVadModelConfig.java
java_files += SileroVadModelConfig.java
java_files += VadModelConfig.java
java_files += SpeechSegment.java
... ...
// Copyright 2025 Xiaomi Corporation
package com.k2fsa.sherpa.onnx;
public class TenVadModelConfig {
private final String model;
private final float threshold;
private final float minSilenceDuration;
private final float minSpeechDuration;
private final int windowSize;
private final float maxSpeechDuration;
private TenVadModelConfig(Builder builder) {
this.model = builder.model;
this.threshold = builder.threshold;
this.minSilenceDuration = builder.minSilenceDuration;
this.minSpeechDuration = builder.minSpeechDuration;
this.windowSize = builder.windowSize;
this.maxSpeechDuration = builder.maxSpeechDuration;
}
public static Builder builder() {
return new Builder();
}
public String getModel() {
return model;
}
public float getThreshold() {
return threshold;
}
public float getMinSilenceDuration() {
return minSilenceDuration;
}
public float getMinSpeechDuration() {
return minSpeechDuration;
}
public int getWindowSize() {
return windowSize;
}
public float getMaxSpeechDuration() {
return maxSpeechDuration;
}
public static class Builder {
private String model = "";
private float threshold = 0.5f;
private float minSilenceDuration = 0.25f;
private float minSpeechDuration = 0.25f;
private int windowSize = 256;
private float maxSpeechDuration = 5.0f;
public TenVadModelConfig build() {
return new TenVadModelConfig(this);
}
public Builder setModel(String model) {
this.model = model;
return this;
}
public Builder setThreshold(float threshold) {
this.threshold = threshold;
return this;
}
public Builder setMinSilenceDuration(float minSilenceDuration) {
this.minSilenceDuration = minSilenceDuration;
return this;
}
public Builder setMinSpeechDuration(float minSpeechDuration) {
this.minSpeechDuration = minSpeechDuration;
return this;
}
public Builder setWindowSize(int windowSize) {
this.windowSize = windowSize;
return this;
}
public Builder setMaxSpeechDuration(float maxSpeechDuration) {
this.maxSpeechDuration = maxSpeechDuration;
return this;
}
}
}
... ...
... ... @@ -4,6 +4,7 @@ package com.k2fsa.sherpa.onnx;
public class VadModelConfig {
private final SileroVadModelConfig sileroVadModelConfig;
private final TenVadModelConfig tenVadModelConfig;
private final int sampleRate;
private final int numThreads;
private final boolean debug;
... ... @@ -11,6 +12,7 @@ public class VadModelConfig {
private VadModelConfig(Builder builder) {
this.sileroVadModelConfig = builder.sileroVadModelConfig;
this.tenVadModelConfig = builder.tenVadModelConfig;
this.sampleRate = builder.sampleRate;
this.numThreads = builder.numThreads;
this.debug = builder.debug;
... ... @@ -25,6 +27,10 @@ public class VadModelConfig {
return sileroVadModelConfig;
}
public TenVadModelConfig getTenVadModelConfig() {
return tenVadModelConfig;
}
public int getSampleRate() {
return sampleRate;
}
... ... @@ -43,6 +49,7 @@ public class VadModelConfig {
public static class Builder {
private SileroVadModelConfig sileroVadModelConfig = new SileroVadModelConfig.Builder().build();
private TenVadModelConfig tenVadModelConfig = new TenVadModelConfig.Builder().build();
private int sampleRate = 16000;
private int numThreads = 1;
private boolean debug = true;
... ... @@ -57,6 +64,11 @@ public class VadModelConfig {
return this;
}
public Builder setTenVadModelConfig(TenVadModelConfig tenVadModelConfig) {
this.tenVadModelConfig = tenVadModelConfig;
return this;
}
public Builder setSampleRate(int sampleRate) {
this.sampleRate = sampleRate;
return this;
... ...
... ... @@ -44,6 +44,33 @@ static VadModelConfig GetVadModelConfig(JNIEnv *env, jobject config) {
ans.silero_vad.max_speech_duration =
env->GetFloatField(silero_vad_config, fid);
// ten-vad
fid = env->GetFieldID(cls, "tenVadModelConfig",
"Lcom/k2fsa/sherpa/onnx/TenVadModelConfig;");
jobject ten_vad_config = env->GetObjectField(config, fid);
jclass ten_vad_config_cls = env->GetObjectClass(ten_vad_config);
fid = env->GetFieldID(ten_vad_config_cls, "model", "Ljava/lang/String;");
s = (jstring)env->GetObjectField(ten_vad_config, fid);
p = env->GetStringUTFChars(s, nullptr);
ans.ten_vad.model = p;
env->ReleaseStringUTFChars(s, p);
fid = env->GetFieldID(ten_vad_config_cls, "threshold", "F");
ans.ten_vad.threshold = env->GetFloatField(ten_vad_config, fid);
fid = env->GetFieldID(ten_vad_config_cls, "minSilenceDuration", "F");
ans.ten_vad.min_silence_duration = env->GetFloatField(ten_vad_config, fid);
fid = env->GetFieldID(ten_vad_config_cls, "minSpeechDuration", "F");
ans.ten_vad.min_speech_duration = env->GetFloatField(ten_vad_config, fid);
fid = env->GetFieldID(ten_vad_config_cls, "windowSize", "I");
ans.ten_vad.window_size = env->GetIntField(ten_vad_config, fid);
fid = env->GetFieldID(ten_vad_config_cls, "maxSpeechDuration", "F");
ans.ten_vad.max_speech_duration = env->GetFloatField(ten_vad_config, fid);
fid = env->GetFieldID(cls, "sampleRate", "I");
ans.sample_rate = env->GetIntField(config, fid);
... ... @@ -179,8 +206,9 @@ JNIEXPORT bool JNICALL Java_com_k2fsa_sherpa_onnx_Vad_isSpeechDetected(
}
SHERPA_ONNX_EXTERN_C
JNIEXPORT void JNICALL Java_com_k2fsa_sherpa_onnx_Vad_reset(
JNIEnv *env, jobject /*obj*/, jlong ptr) {
JNIEXPORT void JNICALL Java_com_k2fsa_sherpa_onnx_Vad_reset(JNIEnv *env,
jobject /*obj*/,
jlong ptr) {
SafeJNI(env, "Vad_reset", [&] {
if (!ValidatePointer(env, ptr, "Vad_reset",
"VoiceActivityDetector pointer is null.")) {
... ...
... ... @@ -12,8 +12,18 @@ data class SileroVadModelConfig(
var maxSpeechDuration: Float = 5.0F,
)
data class TenVadModelConfig(
var model: String = "",
var threshold: Float = 0.5F,
var minSilenceDuration: Float = 0.25F,
var minSpeechDuration: Float = 0.25F,
var windowSize: Int = 256,
var maxSpeechDuration: Float = 5.0F,
)
data class VadModelConfig(
var sileroVadModelConfig: SileroVadModelConfig = SileroVadModelConfig(),
var tenVadModelConfig: TenVadModelConfig = TenVadModelConfig(),
var sampleRate: Int = 16000,
var numThreads: Int = 1,
var provider: String = "cpu",
... ... @@ -91,10 +101,14 @@ class Vad(
}
// Please visit
// https://github.com/snakers4/silero-vad/raw/master/src/silero_vad/data/silero_vad.onnx
// https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx
// to download silero_vad.onnx
// and put it inside the assets/
// directory
//
// For ten-vad, please use
// https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/ten-vad.onnx
//
fun getVadModelConfig(type: Int): VadModelConfig? {
when (type) {
0 -> {
... ... @@ -111,6 +125,21 @@ fun getVadModelConfig(type: Int): VadModelConfig? {
provider = "cpu",
)
}
1 -> {
return VadModelConfig(
tenVadModelConfig = TenVadModelConfig(
model = "ten-vad.onnx",
threshold = 0.5F,
minSilenceDuration = 0.25F,
minSpeechDuration = 0.25F,
windowSize = 256,
),
sampleRate = 16000,
numThreads = 1,
provider = "cpu",
)
}
}
return null
}
... ...