Committed by
GitHub
Add Java/Kotlin API and Android support for ten-vad (#2389)
正在显示
36 个修改的文件
包含
396 行增加
和
47 行删除
| @@ -237,11 +237,20 @@ jobs: | @@ -237,11 +237,20 @@ jobs: | ||
| 237 | rm *.wav | 237 | rm *.wav |
| 238 | rm -rf sherpa-onnx-* | 238 | rm -rf sherpa-onnx-* |
| 239 | 239 | ||
| 240 | - - name: Run java test (VAD remove silence) | 240 | + - name: Run java test (ten-vad remove silence) |
| 241 | shell: bash | 241 | shell: bash |
| 242 | run: | | 242 | run: | |
| 243 | cd ./java-api-examples | 243 | cd ./java-api-examples |
| 244 | - ./run-vad-remove-slience.sh | 244 | + ./run-ten-vad-remove-silence.sh |
| 245 | + rm *.onnx | ||
| 246 | + ls -lh *.wav | ||
| 247 | + rm *.wav | ||
| 248 | + | ||
| 249 | + - name: Run java test (silero-vad remove silence) | ||
| 250 | + shell: bash | ||
| 251 | + run: | | ||
| 252 | + cd ./java-api-examples | ||
| 253 | + ./run-vad-remove-silence.sh | ||
| 245 | rm *.onnx | 254 | rm *.onnx |
| 246 | ls -lh *.wav | 255 | ls -lh *.wav |
| 247 | rm *.wav | 256 | rm *.wav |
| @@ -15,7 +15,7 @@ func main() { | @@ -15,7 +15,7 @@ func main() { | ||
| 15 | config := sherpa.VadModelConfig{} | 15 | config := sherpa.VadModelConfig{} |
| 16 | 16 | ||
| 17 | // Please download silero_vad.onnx from | 17 | // Please download silero_vad.onnx from |
| 18 | - // https://github.com/snakers4/silero-vad/raw/master/src/silero_vad/data/silero_vad.onnx | 18 | + // https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx |
| 19 | 19 | ||
| 20 | config.SileroVad.Model = "./silero_vad.onnx" | 20 | config.SileroVad.Model = "./silero_vad.onnx" |
| 21 | config.SileroVad.Threshold = 0.5 | 21 | config.SileroVad.Threshold = 0.5 |
| @@ -3,7 +3,7 @@ | @@ -3,7 +3,7 @@ | ||
| 3 | set -ex | 3 | set -ex |
| 4 | 4 | ||
| 5 | if [ ! -f ./silero_vad.onnx ]; then | 5 | if [ ! -f ./silero_vad.onnx ]; then |
| 6 | - curl -SL -O https://github.com/snakers4/silero-vad/raw/master/src/silero_vad/data/silero_vad.onnx | 6 | + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx |
| 7 | fi | 7 | fi |
| 8 | 8 | ||
| 9 | if [ ! -f ./sherpa-onnx-paraformer-trilingual-zh-cantonese-en/model.int8.onnx ]; then | 9 | if [ ! -f ./sherpa-onnx-paraformer-trilingual-zh-cantonese-en/model.int8.onnx ]; then |
| @@ -15,7 +15,7 @@ func main() { | @@ -15,7 +15,7 @@ func main() { | ||
| 15 | config := sherpa.VadModelConfig{} | 15 | config := sherpa.VadModelConfig{} |
| 16 | 16 | ||
| 17 | // Please download silero_vad.onnx from | 17 | // Please download silero_vad.onnx from |
| 18 | - // https://github.com/snakers4/silero-vad/raw/master/src/silero_vad/data/silero_vad.onnx | 18 | + // https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx |
| 19 | 19 | ||
| 20 | config.SileroVad.Model = "./silero_vad.onnx" | 20 | config.SileroVad.Model = "./silero_vad.onnx" |
| 21 | config.SileroVad.Threshold = 0.5 | 21 | config.SileroVad.Threshold = 0.5 |
| @@ -3,7 +3,7 @@ | @@ -3,7 +3,7 @@ | ||
| 3 | set -ex | 3 | set -ex |
| 4 | 4 | ||
| 5 | if [ ! -f ./silero_vad.onnx ]; then | 5 | if [ ! -f ./silero_vad.onnx ]; then |
| 6 | - curl -SL -O https://github.com/snakers4/silero-vad/raw/master/src/silero_vad/data/silero_vad.onnx | 6 | + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx |
| 7 | fi | 7 | fi |
| 8 | 8 | ||
| 9 | if [ ! -f ./sherpa-onnx-whisper-tiny.en/tiny.en-encoder.int8.onnx ]; then | 9 | if [ ! -f ./sherpa-onnx-whisper-tiny.en/tiny.en-encoder.int8.onnx ]; then |
| @@ -89,7 +89,7 @@ func createVad() *sherpa.VoiceActivityDetector { | @@ -89,7 +89,7 @@ func createVad() *sherpa.VoiceActivityDetector { | ||
| 89 | config := sherpa.VadModelConfig{} | 89 | config := sherpa.VadModelConfig{} |
| 90 | 90 | ||
| 91 | // Please download silero_vad.onnx from | 91 | // Please download silero_vad.onnx from |
| 92 | - // https://github.com/snakers4/silero-vad/raw/master/src/silero_vad/data/silero_vad.onnx | 92 | + // https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx |
| 93 | 93 | ||
| 94 | config.SileroVad.Model = "./silero_vad.onnx" | 94 | config.SileroVad.Model = "./silero_vad.onnx" |
| 95 | config.SileroVad.Threshold = 0.5 | 95 | config.SileroVad.Threshold = 0.5 |
| @@ -11,7 +11,7 @@ if [ ! -f ./sr-data/enroll/fangjun-sr-1.wav ]; then | @@ -11,7 +11,7 @@ if [ ! -f ./sr-data/enroll/fangjun-sr-1.wav ]; then | ||
| 11 | fi | 11 | fi |
| 12 | 12 | ||
| 13 | if [ ! -f ./silero_vad.onnx ]; then | 13 | if [ ! -f ./silero_vad.onnx ]; then |
| 14 | - curl -SL -O https://github.com/snakers4/silero-vad/raw/master/src/silero_vad/data/silero_vad.onnx | 14 | + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx |
| 15 | fi | 15 | fi |
| 16 | 16 | ||
| 17 | go mod tidy | 17 | go mod tidy |
| @@ -15,7 +15,7 @@ func main() { | @@ -15,7 +15,7 @@ func main() { | ||
| 15 | config := sherpa.VadModelConfig{} | 15 | config := sherpa.VadModelConfig{} |
| 16 | 16 | ||
| 17 | // Please download silero_vad.onnx from | 17 | // Please download silero_vad.onnx from |
| 18 | - // https://github.com/snakers4/silero-vad/raw/master/src/silero_vad/data/silero_vad.onnx | 18 | + // https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx |
| 19 | 19 | ||
| 20 | config.SileroVad.Model = "./silero_vad.onnx" | 20 | config.SileroVad.Model = "./silero_vad.onnx" |
| 21 | config.SileroVad.Threshold = 0.5 | 21 | config.SileroVad.Threshold = 0.5 |
| @@ -3,7 +3,7 @@ | @@ -3,7 +3,7 @@ | ||
| 3 | set -ex | 3 | set -ex |
| 4 | 4 | ||
| 5 | if [ ! -f ./silero_vad.onnx ]; then | 5 | if [ ! -f ./silero_vad.onnx ]; then |
| 6 | - curl -SL -O https://github.com/snakers4/silero-vad/raw/master/src/silero_vad/data/silero_vad.onnx | 6 | + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx |
| 7 | fi | 7 | fi |
| 8 | 8 | ||
| 9 | if [ ! -f ./sherpa-onnx-whisper-tiny/tiny-encoder.int8.onnx ]; then | 9 | if [ ! -f ./sherpa-onnx-whisper-tiny/tiny-encoder.int8.onnx ]; then |
| @@ -113,6 +113,7 @@ The punctuation model supports both English and Chinese. | @@ -113,6 +113,7 @@ The punctuation model supports both English and Chinese. | ||
| 113 | 113 | ||
| 114 | ```bash | 114 | ```bash |
| 115 | ./run-vad-remove-slience.sh | 115 | ./run-vad-remove-slience.sh |
| 116 | +./run-ten-vad-remove-slience.sh | ||
| 116 | ``` | 117 | ``` |
| 117 | 118 | ||
| 118 | ## VAD + Non-streaming Dolphin CTC for speech recognition | 119 | ## VAD + Non-streaming Dolphin CTC for speech recognition |
java-api-examples/TenVadRemoveSilence.java
0 → 100644
| 1 | +// Copyright 2025 Xiaomi Corporation | ||
| 2 | + | ||
| 3 | +// This file shows how to use a ten-vad model to remove silences from | ||
| 4 | +// a wave file. | ||
| 5 | + | ||
| 6 | +import com.k2fsa.sherpa.onnx.*; | ||
| 7 | +import java.util.ArrayList; | ||
| 8 | +import java.util.Arrays; | ||
| 9 | + | ||
| 10 | +public class TenVadRemoveSilence { | ||
| 11 | + public static void main(String[] args) { | ||
| 12 | + // please download ./ten-vad.onnx from | ||
| 13 | + // https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models | ||
| 14 | + String model = "./ten-vad.onnx"; | ||
| 15 | + TenVadModelConfig tenVad = | ||
| 16 | + TenVadModelConfig.builder() | ||
| 17 | + .setModel(model) | ||
| 18 | + .setThreshold(0.5f) | ||
| 19 | + .setMinSilenceDuration(0.25f) | ||
| 20 | + .setMinSpeechDuration(0.5f) | ||
| 21 | + .setWindowSize(256) | ||
| 22 | + .setMaxSpeechDuration(5.0f) | ||
| 23 | + .build(); | ||
| 24 | + | ||
| 25 | + VadModelConfig config = | ||
| 26 | + VadModelConfig.builder() | ||
| 27 | + .setTenVadModelConfig(tenVad) | ||
| 28 | + .setSampleRate(16000) | ||
| 29 | + .setNumThreads(1) | ||
| 30 | + .setDebug(true) | ||
| 31 | + .setProvider("cpu") | ||
| 32 | + .build(); | ||
| 33 | + | ||
| 34 | + Vad vad = new Vad(config); | ||
| 35 | + | ||
| 36 | + // You can download the test file from | ||
| 37 | + // https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models | ||
| 38 | + String testWaveFilename = "./lei-jun-test.wav"; | ||
| 39 | + WaveReader reader = new WaveReader(testWaveFilename); | ||
| 40 | + | ||
| 41 | + int numSamples = reader.getSamples().length; | ||
| 42 | + int windowSize = tenVad.getWindowSize(); | ||
| 43 | + int numIter = numSamples / windowSize; | ||
| 44 | + | ||
| 45 | + ArrayList<float[]> segments = new ArrayList<float[]>(); | ||
| 46 | + | ||
| 47 | + for (int i = 0; i != numIter; ++i) { | ||
| 48 | + int start = i * windowSize; | ||
| 49 | + int end = start + windowSize; | ||
| 50 | + float[] samples = Arrays.copyOfRange(reader.getSamples(), start, end); | ||
| 51 | + vad.acceptWaveform(samples); | ||
| 52 | + if (vad.isSpeechDetected()) { | ||
| 53 | + while (!vad.empty()) { | ||
| 54 | + | ||
| 55 | + // if you want to get the starting time of this segment, you can use | ||
| 56 | + /* float startTime = vad.front().getStart() / 16000.0f; */ | ||
| 57 | + | ||
| 58 | + segments.add(vad.front().getSamples()); | ||
| 59 | + vad.pop(); | ||
| 60 | + } | ||
| 61 | + } | ||
| 62 | + } | ||
| 63 | + | ||
| 64 | + vad.flush(); | ||
| 65 | + while (!vad.empty()) { | ||
| 66 | + | ||
| 67 | + // if you want to get the starting time of this segment, you can use | ||
| 68 | + /* float startTime = vad.front().getStart() / 16000.0f; */ | ||
| 69 | + | ||
| 70 | + segments.add(vad.front().getSamples()); | ||
| 71 | + vad.pop(); | ||
| 72 | + } | ||
| 73 | + | ||
| 74 | + // get total number of samples | ||
| 75 | + int n = 0; | ||
| 76 | + for (float[] s : segments) { | ||
| 77 | + n += s.length; | ||
| 78 | + } | ||
| 79 | + | ||
| 80 | + float[] allSamples = new float[n]; | ||
| 81 | + int i = 0; | ||
| 82 | + for (float[] s : segments) { | ||
| 83 | + System.arraycopy(s, 0, allSamples, i, s.length); | ||
| 84 | + i += s.length; | ||
| 85 | + } | ||
| 86 | + | ||
| 87 | + String outFilename = "lei-jun-test-no-silence.wav"; | ||
| 88 | + WaveWriter.write(outFilename, allSamples, 16000); | ||
| 89 | + System.out.printf("Saved to %s\n", outFilename); | ||
| 90 | + | ||
| 91 | + vad.release(); | ||
| 92 | + } | ||
| 93 | +} |
| 1 | +#!/usr/bin/env bash | ||
| 2 | + | ||
| 3 | +set -ex | ||
| 4 | + | ||
| 5 | +if [[ ! -f ../build/lib/libsherpa-onnx-jni.dylib && ! -f ../build/lib/libsherpa-onnx-jni.so ]]; then | ||
| 6 | + mkdir -p ../build | ||
| 7 | + pushd ../build | ||
| 8 | + cmake \ | ||
| 9 | + -DSHERPA_ONNX_ENABLE_PYTHON=OFF \ | ||
| 10 | + -DSHERPA_ONNX_ENABLE_TESTS=OFF \ | ||
| 11 | + -DSHERPA_ONNX_ENABLE_CHECK=OFF \ | ||
| 12 | + -DBUILD_SHARED_LIBS=ON \ | ||
| 13 | + -DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \ | ||
| 14 | + -DSHERPA_ONNX_ENABLE_JNI=ON \ | ||
| 15 | + .. | ||
| 16 | + | ||
| 17 | + make -j4 | ||
| 18 | + ls -lh lib | ||
| 19 | + popd | ||
| 20 | +fi | ||
| 21 | + | ||
| 22 | +if [ ! -f ../sherpa-onnx/java-api/build/sherpa-onnx.jar ]; then | ||
| 23 | + pushd ../sherpa-onnx/java-api | ||
| 24 | + make | ||
| 25 | + popd | ||
| 26 | +fi | ||
| 27 | + | ||
| 28 | +if [ ! -f ./ten-vad.onnx ]; then | ||
| 29 | + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/ten-vad.onnx | ||
| 30 | +fi | ||
| 31 | + | ||
| 32 | +if [ ! -f ./lei-jun-test.wav ]; then | ||
| 33 | + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/lei-jun-test.wav | ||
| 34 | +fi | ||
| 35 | + | ||
| 36 | +java \ | ||
| 37 | + -Djava.library.path=$PWD/../build/lib \ | ||
| 38 | + -cp ../sherpa-onnx/java-api/build/sherpa-onnx.jar \ | ||
| 39 | + ./TenVadRemoveSilence.java |
| @@ -10,7 +10,7 @@ from a microphone. | @@ -10,7 +10,7 @@ from a microphone. | ||
| 10 | Usage: | 10 | Usage: |
| 11 | 11 | ||
| 12 | 12 | ||
| 13 | -wget https://github.com/snakers4/silero-vad/raw/master/src/silero_vad/data/silero_vad.onnx | 13 | +wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx |
| 14 | 14 | ||
| 15 | ./python-api-examples/simulate-streaming-sense-voice-microphone.py \ | 15 | ./python-api-examples/simulate-streaming-sense-voice-microphone.py \ |
| 16 | --silero-vad-model=./silero_vad.onnx \ | 16 | --silero-vad-model=./silero_vad.onnx \ |
| @@ -18,12 +18,12 @@ Note that `zh` means Chinese, while `en` means English. | @@ -18,12 +18,12 @@ Note that `zh` means Chinese, while `en` means English. | ||
| 18 | 18 | ||
| 19 | (2) Download the VAD model | 19 | (2) Download the VAD model |
| 20 | Please visit | 20 | Please visit |
| 21 | -https://github.com/snakers4/silero-vad/raw/master/src/silero_vad/data/silero_vad.onnx | 21 | +https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx |
| 22 | to download silero_vad.onnx | 22 | to download silero_vad.onnx |
| 23 | 23 | ||
| 24 | For instance, | 24 | For instance, |
| 25 | 25 | ||
| 26 | -wget https://github.com/snakers4/silero-vad/raw/master/src/silero_vad/data/silero_vad.onnx | 26 | +wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx |
| 27 | 27 | ||
| 28 | (3) Run this script | 28 | (3) Run this script |
| 29 | 29 |
| @@ -40,12 +40,12 @@ Note that `zh` means Chinese, while `en` means English. | @@ -40,12 +40,12 @@ Note that `zh` means Chinese, while `en` means English. | ||
| 40 | 40 | ||
| 41 | (3) Download the VAD model | 41 | (3) Download the VAD model |
| 42 | Please visit | 42 | Please visit |
| 43 | -https://github.com/snakers4/silero-vad/raw/master/src/silero_vad/data/silero_vad.onnx | 43 | +https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx |
| 44 | to download silero_vad.onnx | 44 | to download silero_vad.onnx |
| 45 | 45 | ||
| 46 | For instance, | 46 | For instance, |
| 47 | 47 | ||
| 48 | -wget https://github.com/snakers4/silero-vad/raw/master/src/silero_vad/data/silero_vad.onnx | 48 | +wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx |
| 49 | 49 | ||
| 50 | (4) Please refer to ./generate-subtitles.py | 50 | (4) Please refer to ./generate-subtitles.py |
| 51 | to download a non-streaming ASR model. | 51 | to download a non-streaming ASR model. |
| @@ -38,12 +38,12 @@ Note that `zh` means Chinese, while `en` means English. | @@ -38,12 +38,12 @@ Note that `zh` means Chinese, while `en` means English. | ||
| 38 | 38 | ||
| 39 | (3) Download the VAD model | 39 | (3) Download the VAD model |
| 40 | Please visit | 40 | Please visit |
| 41 | -https://github.com/snakers4/silero-vad/raw/master/src/silero_vad/data/silero_vad.onnx | 41 | +https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx |
| 42 | to download silero_vad.onnx | 42 | to download silero_vad.onnx |
| 43 | 43 | ||
| 44 | For instance, | 44 | For instance, |
| 45 | 45 | ||
| 46 | -wget https://github.com/snakers4/silero-vad/raw/master/src/silero_vad/data/silero_vad.onnx | 46 | +wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx |
| 47 | 47 | ||
| 48 | (4) Please refer to ./generate-subtitles.py | 48 | (4) Please refer to ./generate-subtitles.py |
| 49 | to download a non-streaming ASR model. | 49 | to download a non-streaming ASR model. |
| @@ -36,12 +36,12 @@ Note that `zh` means Chinese, while `en` means English. | @@ -36,12 +36,12 @@ Note that `zh` means Chinese, while `en` means English. | ||
| 36 | 36 | ||
| 37 | (3) Download the VAD model | 37 | (3) Download the VAD model |
| 38 | Please visit | 38 | Please visit |
| 39 | -https://github.com/snakers4/silero-vad/raw/master/src/silero_vad/data/silero_vad.onnx | 39 | +https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx |
| 40 | to download silero_vad.onnx | 40 | to download silero_vad.onnx |
| 41 | 41 | ||
| 42 | For instance, | 42 | For instance, |
| 43 | 43 | ||
| 44 | -wget https://github.com/snakers4/silero-vad/raw/master/src/silero_vad/data/silero_vad.onnx | 44 | +wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx |
| 45 | 45 | ||
| 46 | (4) Run this script | 46 | (4) Run this script |
| 47 | 47 |
| @@ -55,7 +55,7 @@ def main(): | @@ -55,7 +55,7 @@ def main(): | ||
| 55 | if not Path(args.silero_vad_model).is_file(): | 55 | if not Path(args.silero_vad_model).is_file(): |
| 56 | raise RuntimeError( | 56 | raise RuntimeError( |
| 57 | f"{args.silero_vad_model} does not exist. Please download it from " | 57 | f"{args.silero_vad_model} does not exist. Please download it from " |
| 58 | - "https://github.com/snakers4/silero-vad/raw/master/src/silero_vad/data/silero_vad.onnx" | 58 | + "https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx" |
| 59 | ) | 59 | ) |
| 60 | 60 | ||
| 61 | device_name = args.device_name | 61 | device_name = args.device_name |
| @@ -38,7 +38,7 @@ def main(): | @@ -38,7 +38,7 @@ def main(): | ||
| 38 | if not Path(args.silero_vad_model).is_file(): | 38 | if not Path(args.silero_vad_model).is_file(): |
| 39 | raise RuntimeError( | 39 | raise RuntimeError( |
| 40 | f"{args.silero_vad_model} does not exist. Please download it from " | 40 | f"{args.silero_vad_model} does not exist. Please download it from " |
| 41 | - "https://github.com/snakers4/silero-vad/raw/master/src/silero_vad/data/silero_vad.onnx" | 41 | + "https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx" |
| 42 | ) | 42 | ) |
| 43 | 43 | ||
| 44 | mic_sample_rate = 16000 | 44 | mic_sample_rate = 16000 |
| @@ -14,12 +14,12 @@ python3 ./vad-remove-non-speech-segments-alsa.py \ | @@ -14,12 +14,12 @@ python3 ./vad-remove-non-speech-segments-alsa.py \ | ||
| 14 | --silero-vad-model silero_vad.onnx | 14 | --silero-vad-model silero_vad.onnx |
| 15 | 15 | ||
| 16 | Please visit | 16 | Please visit |
| 17 | -https://github.com/snakers4/silero-vad/raw/master/src/silero_vad/data/silero_vad.onnx | 17 | +https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx |
| 18 | to download silero_vad.onnx | 18 | to download silero_vad.onnx |
| 19 | 19 | ||
| 20 | For instance, | 20 | For instance, |
| 21 | 21 | ||
| 22 | -wget https://github.com/snakers4/silero-vad/raw/master/src/silero_vad/data/silero_vad.onnx | 22 | +wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx |
| 23 | """ | 23 | """ |
| 24 | 24 | ||
| 25 | import argparse | 25 | import argparse |
| @@ -13,12 +13,11 @@ python3 ./vad-remove-non-speech-segments-from-file.py \ | @@ -13,12 +13,11 @@ python3 ./vad-remove-non-speech-segments-from-file.py \ | ||
| 13 | output.wav | 13 | output.wav |
| 14 | 14 | ||
| 15 | Please visit | 15 | Please visit |
| 16 | -https://github.com/snakers4/silero-vad/raw/master/src/silero_vad/data/silero_vad.onnx | 16 | +https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx |
| 17 | to download silero_vad.onnx | 17 | to download silero_vad.onnx |
| 18 | 18 | ||
| 19 | For instance, | 19 | For instance, |
| 20 | - | ||
| 21 | -wget https://github.com/snakers4/silero-vad/raw/master/src/silero_vad/data/silero_vad.onnx | 20 | +wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx |
| 22 | """ | 21 | """ |
| 23 | 22 | ||
| 24 | import argparse | 23 | import argparse |
| @@ -11,12 +11,12 @@ python3 ./vad-remove-non-speech-segments.py \ | @@ -11,12 +11,12 @@ python3 ./vad-remove-non-speech-segments.py \ | ||
| 11 | --silero-vad-model silero_vad.onnx | 11 | --silero-vad-model silero_vad.onnx |
| 12 | 12 | ||
| 13 | Please visit | 13 | Please visit |
| 14 | -https://github.com/snakers4/silero-vad/raw/master/src/silero_vad/data/silero_vad.onnx | 14 | +https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx |
| 15 | to download silero_vad.onnx | 15 | to download silero_vad.onnx |
| 16 | 16 | ||
| 17 | For instance, | 17 | For instance, |
| 18 | 18 | ||
| 19 | -wget https://github.com/snakers4/silero-vad/raw/master/src/silero_vad/data/silero_vad.onnx | 19 | +wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx |
| 20 | """ | 20 | """ |
| 21 | 21 | ||
| 22 | import argparse | 22 | import argparse |
| @@ -70,12 +70,13 @@ to install sherpa-onnx and to download non-streaming pre-trained models | @@ -70,12 +70,13 @@ to install sherpa-onnx and to download non-streaming pre-trained models | ||
| 70 | used in this file. | 70 | used in this file. |
| 71 | 71 | ||
| 72 | Please visit | 72 | Please visit |
| 73 | -https://github.com/snakers4/silero-vad/raw/master/src/silero_vad/data/silero_vad.onnx | 73 | +https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx |
| 74 | to download silero_vad.onnx | 74 | to download silero_vad.onnx |
| 75 | 75 | ||
| 76 | For instance, | 76 | For instance, |
| 77 | 77 | ||
| 78 | -wget https://github.com/snakers4/silero-vad/raw/master/src/silero_vad/data/silero_vad.onnx | 78 | +wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx |
| 79 | + | ||
| 79 | """ | 80 | """ |
| 80 | import argparse | 81 | import argparse |
| 81 | import sys | 82 | import sys |
| @@ -32,11 +32,12 @@ log "====================x86====================" | @@ -32,11 +32,12 @@ log "====================x86====================" | ||
| 32 | 32 | ||
| 33 | mkdir -p apks | 33 | mkdir -p apks |
| 34 | 34 | ||
| 35 | -log "https://github.com/snakers4/silero-vad/raw/master/src/silero_vad/data/silero_vad.onnx" | 35 | +log "https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx" |
| 36 | + | ||
| 36 | 37 | ||
| 37 | # Download the model | 38 | # Download the model |
| 38 | pushd ./android/SherpaOnnxVad/app/src/main/assets/ | 39 | pushd ./android/SherpaOnnxVad/app/src/main/assets/ |
| 39 | -wget -c https://github.com/snakers4/silero-vad/raw/master/src/silero_vad/data/silero_vad.onnx | 40 | +wget -c https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx |
| 40 | popd | 41 | popd |
| 41 | 42 | ||
| 42 | for arch in arm64-v8a armeabi-v7a x86_64 x86; do | 43 | for arch in arm64-v8a armeabi-v7a x86_64 x86; do |
| @@ -67,4 +68,47 @@ done | @@ -67,4 +68,47 @@ done | ||
| 67 | 68 | ||
| 68 | rm -rf ./android/SherpaOnnxVad/app/src/main/assets/*.onnx | 69 | rm -rf ./android/SherpaOnnxVad/app/src/main/assets/*.onnx |
| 69 | 70 | ||
| 71 | + | ||
| 72 | +# Now for ten-vad | ||
| 73 | +git checkout . | ||
| 74 | +pushd android/SherpaOnnxVad/app/src/main/java/com/k2fsa/sherpa/onnx | ||
| 75 | +sed -i.bak s/"type = 0/type = 1/" ./MainActivity.kt | ||
| 76 | +git diff | ||
| 77 | +popd | ||
| 78 | + | ||
| 79 | +log "https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/ten-vad.onnx" | ||
| 80 | + | ||
| 81 | +# Download the model | ||
| 82 | +pushd ./android/SherpaOnnxVad/app/src/main/assets/ | ||
| 83 | +wget -c https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/ten-vad.onnx | ||
| 84 | +popd | ||
| 85 | + | ||
| 86 | +for arch in arm64-v8a armeabi-v7a x86_64 x86; do | ||
| 87 | + log "------------------------------------------------------------" | ||
| 88 | + log "build apk for $arch" | ||
| 89 | + log "------------------------------------------------------------" | ||
| 90 | + src_arch=$arch | ||
| 91 | + if [ $arch == "armeabi-v7a" ]; then | ||
| 92 | + src_arch=armv7-eabi | ||
| 93 | + elif [ $arch == "x86_64" ]; then | ||
| 94 | + src_arch=x86-64 | ||
| 95 | + fi | ||
| 96 | + | ||
| 97 | + ls -lh ./build-android-$src_arch/install/lib/*.so | ||
| 98 | + | ||
| 99 | + cp -v ./build-android-$src_arch/install/lib/*.so ./android/SherpaOnnxVad/app/src/main/jniLibs/$arch/ | ||
| 100 | + | ||
| 101 | + pushd ./android/SherpaOnnxVad | ||
| 102 | + sed -i.bak s/2048/9012/g ./gradle.properties | ||
| 103 | + git diff ./gradle.properties | ||
| 104 | + ./gradlew assembleRelease | ||
| 105 | + popd | ||
| 106 | + | ||
| 107 | + mv android/SherpaOnnxVad/app/build/outputs/apk/release/app-release-unsigned.apk ./apks/sherpa-onnx-${SHERPA_ONNX_VERSION}-$arch-ten_vad.apk | ||
| 108 | + ls -lh apks | ||
| 109 | + rm -v ./android/SherpaOnnxVad/app/src/main/jniLibs/$arch/*.so | ||
| 110 | +done | ||
| 111 | + | ||
| 112 | +rm -rf ./android/SherpaOnnxVad/app/src/main/assets/*.onnx | ||
| 113 | + | ||
| 70 | ls -lh apks/ | 114 | ls -lh apks/ |
| @@ -201,11 +201,11 @@ class KeywordSpotterTransducerImpl : public KeywordSpotterImpl { | @@ -201,11 +201,11 @@ class KeywordSpotterTransducerImpl : public KeywordSpotterImpl { | ||
| 201 | int32_t num_trailing_blanks = r.num_trailing_blanks; | 201 | int32_t num_trailing_blanks = r.num_trailing_blanks; |
| 202 | // assume subsampling_factor is 4 | 202 | // assume subsampling_factor is 4 |
| 203 | // assume frameshift is 0.01 second | 203 | // assume frameshift is 0.01 second |
| 204 | - float trailing_slience = num_trailing_blanks * 4 * 0.01; | 204 | + float trailing_silence = num_trailing_blanks * 4 * 0.01; |
| 205 | 205 | ||
| 206 | // it resets automatically after detecting 1.5 seconds of silence | 206 | // it resets automatically after detecting 1.5 seconds of silence |
| 207 | float threshold = 1.5; | 207 | float threshold = 1.5; |
| 208 | - if (trailing_slience > threshold) { | 208 | + if (trailing_silence > threshold) { |
| 209 | Reset(s); | 209 | Reset(s); |
| 210 | } | 210 | } |
| 211 | } | 211 | } |
| @@ -29,10 +29,10 @@ This program shows how to use a streaming VAD with non-streaming ASR in | @@ -29,10 +29,10 @@ This program shows how to use a streaming VAD with non-streaming ASR in | ||
| 29 | sherpa-onnx. | 29 | sherpa-onnx. |
| 30 | 30 | ||
| 31 | Please download silero_vad.onnx from | 31 | Please download silero_vad.onnx from |
| 32 | -https://github.com/snakers4/silero-vad/raw/master/src/silero_vad/data/silero_vad.onnx | 32 | +https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx |
| 33 | 33 | ||
| 34 | For instance, use | 34 | For instance, use |
| 35 | -wget https://github.com/snakers4/silero-vad/raw/master/src/silero_vad/data/silero_vad.onnx | 35 | +wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx |
| 36 | 36 | ||
| 37 | Please refer to ./sherpa-onnx-microphone-offline.cc | 37 | Please refer to ./sherpa-onnx-microphone-offline.cc |
| 38 | to download models for offline ASR. | 38 | to download models for offline ASR. |
| @@ -30,10 +30,10 @@ This program shows how to use VAD in sherpa-onnx. | @@ -30,10 +30,10 @@ This program shows how to use VAD in sherpa-onnx. | ||
| 30 | device_name | 30 | device_name |
| 31 | 31 | ||
| 32 | Please download silero_vad.onnx from | 32 | Please download silero_vad.onnx from |
| 33 | -https://github.com/snakers4/silero-vad/raw/master/src/silero_vad/data/silero_vad.onnx | 33 | +https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx |
| 34 | 34 | ||
| 35 | For instance, use | 35 | For instance, use |
| 36 | -wget https://github.com/snakers4/silero-vad/raw/master/src/silero_vad/data/silero_vad.onnx | 36 | +wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx |
| 37 | 37 | ||
| 38 | The device name specifies which microphone to use in case there are several | 38 | The device name specifies which microphone to use in case there are several |
| 39 | on your system. You can use | 39 | on your system. You can use |
| @@ -45,10 +45,10 @@ This program shows how to use a streaming VAD with non-streaming ASR in | @@ -45,10 +45,10 @@ This program shows how to use a streaming VAD with non-streaming ASR in | ||
| 45 | sherpa-onnx. | 45 | sherpa-onnx. |
| 46 | 46 | ||
| 47 | Please download silero_vad.onnx from | 47 | Please download silero_vad.onnx from |
| 48 | -https://github.com/snakers4/silero-vad/raw/master/src/silero_vad/data/silero_vad.onnx | 48 | +https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx |
| 49 | 49 | ||
| 50 | For instance, use | 50 | For instance, use |
| 51 | -wget https://github.com/snakers4/silero-vad/raw/master/src/silero_vad/data/silero_vad.onnx | 51 | +wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx |
| 52 | 52 | ||
| 53 | Please refer to ./sherpa-onnx-microphone-offline.cc | 53 | Please refer to ./sherpa-onnx-microphone-offline.cc |
| 54 | to download models for offline ASR. | 54 | to download models for offline ASR. |
| @@ -49,10 +49,10 @@ This program shows how to use VAD in sherpa-onnx. | @@ -49,10 +49,10 @@ This program shows how to use VAD in sherpa-onnx. | ||
| 49 | --vad-num-threads=1 | 49 | --vad-num-threads=1 |
| 50 | 50 | ||
| 51 | Please download silero_vad.onnx from | 51 | Please download silero_vad.onnx from |
| 52 | -https://github.com/snakers4/silero-vad/raw/master/src/silero_vad/data/silero_vad.onnx | 52 | +https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx |
| 53 | 53 | ||
| 54 | For instance, use | 54 | For instance, use |
| 55 | -wget https://github.com/snakers4/silero-vad/raw/master/src/silero_vad/data/silero_vad.onnx | 55 | +wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx |
| 56 | )usage"; | 56 | )usage"; |
| 57 | 57 | ||
| 58 | sherpa_onnx::ParseOptions po(kUsageMessage); | 58 | sherpa_onnx::ParseOptions po(kUsageMessage); |
| @@ -23,10 +23,10 @@ to remove silences from a file. | @@ -23,10 +23,10 @@ to remove silences from a file. | ||
| 23 | /path/to/output.wav | 23 | /path/to/output.wav |
| 24 | 24 | ||
| 25 | Please download silero_vad.onnx from | 25 | Please download silero_vad.onnx from |
| 26 | -https://github.com/snakers4/silero-vad/raw/master/src/silero_vad/data/silero_vad.onnx | 26 | +https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx |
| 27 | 27 | ||
| 28 | For instance, use | 28 | For instance, use |
| 29 | -wget https://github.com/snakers4/silero-vad/raw/master/src/silero_vad/data/silero_vad.onnx | 29 | +wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx |
| 30 | 30 | ||
| 31 | input.wav should be 16kHz. | 31 | input.wav should be 16kHz. |
| 32 | )usage"; | 32 | )usage"; |
| @@ -74,6 +74,7 @@ java_files += SpeakerEmbeddingExtractorConfig.java | @@ -74,6 +74,7 @@ java_files += SpeakerEmbeddingExtractorConfig.java | ||
| 74 | java_files += SpeakerEmbeddingExtractor.java | 74 | java_files += SpeakerEmbeddingExtractor.java |
| 75 | java_files += SpeakerEmbeddingManager.java | 75 | java_files += SpeakerEmbeddingManager.java |
| 76 | 76 | ||
| 77 | +java_files += TenVadModelConfig.java | ||
| 77 | java_files += SileroVadModelConfig.java | 78 | java_files += SileroVadModelConfig.java |
| 78 | java_files += VadModelConfig.java | 79 | java_files += VadModelConfig.java |
| 79 | java_files += SpeechSegment.java | 80 | java_files += SpeechSegment.java |
| 1 | +// Copyright 2025 Xiaomi Corporation | ||
| 2 | + | ||
| 3 | +package com.k2fsa.sherpa.onnx; | ||
| 4 | + | ||
| 5 | +public class TenVadModelConfig { | ||
| 6 | + private final String model; | ||
| 7 | + private final float threshold; | ||
| 8 | + private final float minSilenceDuration; | ||
| 9 | + private final float minSpeechDuration; | ||
| 10 | + private final int windowSize; | ||
| 11 | + private final float maxSpeechDuration; | ||
| 12 | + | ||
| 13 | + private TenVadModelConfig(Builder builder) { | ||
| 14 | + this.model = builder.model; | ||
| 15 | + this.threshold = builder.threshold; | ||
| 16 | + this.minSilenceDuration = builder.minSilenceDuration; | ||
| 17 | + this.minSpeechDuration = builder.minSpeechDuration; | ||
| 18 | + this.windowSize = builder.windowSize; | ||
| 19 | + this.maxSpeechDuration = builder.maxSpeechDuration; | ||
| 20 | + } | ||
| 21 | + | ||
| 22 | + public static Builder builder() { | ||
| 23 | + return new Builder(); | ||
| 24 | + } | ||
| 25 | + | ||
| 26 | + public String getModel() { | ||
| 27 | + return model; | ||
| 28 | + } | ||
| 29 | + | ||
| 30 | + public float getThreshold() { | ||
| 31 | + return threshold; | ||
| 32 | + } | ||
| 33 | + | ||
| 34 | + public float getMinSilenceDuration() { | ||
| 35 | + return minSilenceDuration; | ||
| 36 | + } | ||
| 37 | + | ||
| 38 | + public float getMinSpeechDuration() { | ||
| 39 | + return minSpeechDuration; | ||
| 40 | + } | ||
| 41 | + | ||
| 42 | + public int getWindowSize() { | ||
| 43 | + return windowSize; | ||
| 44 | + } | ||
| 45 | + | ||
| 46 | + public float getMaxSpeechDuration() { | ||
| 47 | + return maxSpeechDuration; | ||
| 48 | + } | ||
| 49 | + | ||
| 50 | + public static class Builder { | ||
| 51 | + private String model = ""; | ||
| 52 | + private float threshold = 0.5f; | ||
| 53 | + private float minSilenceDuration = 0.25f; | ||
| 54 | + private float minSpeechDuration = 0.25f; | ||
| 55 | + private int windowSize = 256; | ||
| 56 | + private float maxSpeechDuration = 5.0f; | ||
| 57 | + | ||
| 58 | + public TenVadModelConfig build() { | ||
| 59 | + return new TenVadModelConfig(this); | ||
| 60 | + } | ||
| 61 | + | ||
| 62 | + | ||
| 63 | + public Builder setModel(String model) { | ||
| 64 | + this.model = model; | ||
| 65 | + return this; | ||
| 66 | + } | ||
| 67 | + | ||
| 68 | + public Builder setThreshold(float threshold) { | ||
| 69 | + this.threshold = threshold; | ||
| 70 | + return this; | ||
| 71 | + } | ||
| 72 | + | ||
| 73 | + public Builder setMinSilenceDuration(float minSilenceDuration) { | ||
| 74 | + this.minSilenceDuration = minSilenceDuration; | ||
| 75 | + return this; | ||
| 76 | + } | ||
| 77 | + | ||
| 78 | + public Builder setMinSpeechDuration(float minSpeechDuration) { | ||
| 79 | + this.minSpeechDuration = minSpeechDuration; | ||
| 80 | + return this; | ||
| 81 | + } | ||
| 82 | + | ||
| 83 | + public Builder setWindowSize(int windowSize) { | ||
| 84 | + this.windowSize = windowSize; | ||
| 85 | + return this; | ||
| 86 | + } | ||
| 87 | + | ||
| 88 | + public Builder setMaxSpeechDuration(float maxSpeechDuration) { | ||
| 89 | + this.maxSpeechDuration = maxSpeechDuration; | ||
| 90 | + return this; | ||
| 91 | + } | ||
| 92 | + } | ||
| 93 | +} |
| @@ -4,6 +4,7 @@ package com.k2fsa.sherpa.onnx; | @@ -4,6 +4,7 @@ package com.k2fsa.sherpa.onnx; | ||
| 4 | 4 | ||
| 5 | public class VadModelConfig { | 5 | public class VadModelConfig { |
| 6 | private final SileroVadModelConfig sileroVadModelConfig; | 6 | private final SileroVadModelConfig sileroVadModelConfig; |
| 7 | + private final TenVadModelConfig tenVadModelConfig; | ||
| 7 | private final int sampleRate; | 8 | private final int sampleRate; |
| 8 | private final int numThreads; | 9 | private final int numThreads; |
| 9 | private final boolean debug; | 10 | private final boolean debug; |
| @@ -11,6 +12,7 @@ public class VadModelConfig { | @@ -11,6 +12,7 @@ public class VadModelConfig { | ||
| 11 | 12 | ||
| 12 | private VadModelConfig(Builder builder) { | 13 | private VadModelConfig(Builder builder) { |
| 13 | this.sileroVadModelConfig = builder.sileroVadModelConfig; | 14 | this.sileroVadModelConfig = builder.sileroVadModelConfig; |
| 15 | + this.tenVadModelConfig = builder.tenVadModelConfig; | ||
| 14 | this.sampleRate = builder.sampleRate; | 16 | this.sampleRate = builder.sampleRate; |
| 15 | this.numThreads = builder.numThreads; | 17 | this.numThreads = builder.numThreads; |
| 16 | this.debug = builder.debug; | 18 | this.debug = builder.debug; |
| @@ -25,6 +27,10 @@ public class VadModelConfig { | @@ -25,6 +27,10 @@ public class VadModelConfig { | ||
| 25 | return sileroVadModelConfig; | 27 | return sileroVadModelConfig; |
| 26 | } | 28 | } |
| 27 | 29 | ||
| 30 | + public TenVadModelConfig getTenVadModelConfig() { | ||
| 31 | + return tenVadModelConfig; | ||
| 32 | + } | ||
| 33 | + | ||
| 28 | public int getSampleRate() { | 34 | public int getSampleRate() { |
| 29 | return sampleRate; | 35 | return sampleRate; |
| 30 | } | 36 | } |
| @@ -43,6 +49,7 @@ public class VadModelConfig { | @@ -43,6 +49,7 @@ public class VadModelConfig { | ||
| 43 | 49 | ||
| 44 | public static class Builder { | 50 | public static class Builder { |
| 45 | private SileroVadModelConfig sileroVadModelConfig = new SileroVadModelConfig.Builder().build(); | 51 | private SileroVadModelConfig sileroVadModelConfig = new SileroVadModelConfig.Builder().build(); |
| 52 | + private TenVadModelConfig tenVadModelConfig = new TenVadModelConfig.Builder().build(); | ||
| 46 | private int sampleRate = 16000; | 53 | private int sampleRate = 16000; |
| 47 | private int numThreads = 1; | 54 | private int numThreads = 1; |
| 48 | private boolean debug = true; | 55 | private boolean debug = true; |
| @@ -57,6 +64,11 @@ public class VadModelConfig { | @@ -57,6 +64,11 @@ public class VadModelConfig { | ||
| 57 | return this; | 64 | return this; |
| 58 | } | 65 | } |
| 59 | 66 | ||
| 67 | + public Builder setTenVadModelConfig(TenVadModelConfig tenVadModelConfig) { | ||
| 68 | + this.tenVadModelConfig = tenVadModelConfig; | ||
| 69 | + return this; | ||
| 70 | + } | ||
| 71 | + | ||
| 60 | public Builder setSampleRate(int sampleRate) { | 72 | public Builder setSampleRate(int sampleRate) { |
| 61 | this.sampleRate = sampleRate; | 73 | this.sampleRate = sampleRate; |
| 62 | return this; | 74 | return this; |
| @@ -44,6 +44,33 @@ static VadModelConfig GetVadModelConfig(JNIEnv *env, jobject config) { | @@ -44,6 +44,33 @@ static VadModelConfig GetVadModelConfig(JNIEnv *env, jobject config) { | ||
| 44 | ans.silero_vad.max_speech_duration = | 44 | ans.silero_vad.max_speech_duration = |
| 45 | env->GetFloatField(silero_vad_config, fid); | 45 | env->GetFloatField(silero_vad_config, fid); |
| 46 | 46 | ||
| 47 | + // ten-vad | ||
| 48 | + fid = env->GetFieldID(cls, "tenVadModelConfig", | ||
| 49 | + "Lcom/k2fsa/sherpa/onnx/TenVadModelConfig;"); | ||
| 50 | + jobject ten_vad_config = env->GetObjectField(config, fid); | ||
| 51 | + jclass ten_vad_config_cls = env->GetObjectClass(ten_vad_config); | ||
| 52 | + | ||
| 53 | + fid = env->GetFieldID(ten_vad_config_cls, "model", "Ljava/lang/String;"); | ||
| 54 | + s = (jstring)env->GetObjectField(ten_vad_config, fid); | ||
| 55 | + p = env->GetStringUTFChars(s, nullptr); | ||
| 56 | + ans.ten_vad.model = p; | ||
| 57 | + env->ReleaseStringUTFChars(s, p); | ||
| 58 | + | ||
| 59 | + fid = env->GetFieldID(ten_vad_config_cls, "threshold", "F"); | ||
| 60 | + ans.ten_vad.threshold = env->GetFloatField(ten_vad_config, fid); | ||
| 61 | + | ||
| 62 | + fid = env->GetFieldID(ten_vad_config_cls, "minSilenceDuration", "F"); | ||
| 63 | + ans.ten_vad.min_silence_duration = env->GetFloatField(ten_vad_config, fid); | ||
| 64 | + | ||
| 65 | + fid = env->GetFieldID(ten_vad_config_cls, "minSpeechDuration", "F"); | ||
| 66 | + ans.ten_vad.min_speech_duration = env->GetFloatField(ten_vad_config, fid); | ||
| 67 | + | ||
| 68 | + fid = env->GetFieldID(ten_vad_config_cls, "windowSize", "I"); | ||
| 69 | + ans.ten_vad.window_size = env->GetIntField(ten_vad_config, fid); | ||
| 70 | + | ||
| 71 | + fid = env->GetFieldID(ten_vad_config_cls, "maxSpeechDuration", "F"); | ||
| 72 | + ans.ten_vad.max_speech_duration = env->GetFloatField(ten_vad_config, fid); | ||
| 73 | + | ||
| 47 | fid = env->GetFieldID(cls, "sampleRate", "I"); | 74 | fid = env->GetFieldID(cls, "sampleRate", "I"); |
| 48 | ans.sample_rate = env->GetIntField(config, fid); | 75 | ans.sample_rate = env->GetIntField(config, fid); |
| 49 | 76 | ||
| @@ -179,8 +206,9 @@ JNIEXPORT bool JNICALL Java_com_k2fsa_sherpa_onnx_Vad_isSpeechDetected( | @@ -179,8 +206,9 @@ JNIEXPORT bool JNICALL Java_com_k2fsa_sherpa_onnx_Vad_isSpeechDetected( | ||
| 179 | } | 206 | } |
| 180 | 207 | ||
| 181 | SHERPA_ONNX_EXTERN_C | 208 | SHERPA_ONNX_EXTERN_C |
| 182 | -JNIEXPORT void JNICALL Java_com_k2fsa_sherpa_onnx_Vad_reset( | ||
| 183 | - JNIEnv *env, jobject /*obj*/, jlong ptr) { | 209 | +JNIEXPORT void JNICALL Java_com_k2fsa_sherpa_onnx_Vad_reset(JNIEnv *env, |
| 210 | + jobject /*obj*/, | ||
| 211 | + jlong ptr) { | ||
| 184 | SafeJNI(env, "Vad_reset", [&] { | 212 | SafeJNI(env, "Vad_reset", [&] { |
| 185 | if (!ValidatePointer(env, ptr, "Vad_reset", | 213 | if (!ValidatePointer(env, ptr, "Vad_reset", |
| 186 | "VoiceActivityDetector pointer is null.")) { | 214 | "VoiceActivityDetector pointer is null.")) { |
| @@ -12,8 +12,18 @@ data class SileroVadModelConfig( | @@ -12,8 +12,18 @@ data class SileroVadModelConfig( | ||
| 12 | var maxSpeechDuration: Float = 5.0F, | 12 | var maxSpeechDuration: Float = 5.0F, |
| 13 | ) | 13 | ) |
| 14 | 14 | ||
| 15 | +data class TenVadModelConfig( | ||
| 16 | + var model: String = "", | ||
| 17 | + var threshold: Float = 0.5F, | ||
| 18 | + var minSilenceDuration: Float = 0.25F, | ||
| 19 | + var minSpeechDuration: Float = 0.25F, | ||
| 20 | + var windowSize: Int = 256, | ||
| 21 | + var maxSpeechDuration: Float = 5.0F, | ||
| 22 | +) | ||
| 23 | + | ||
| 15 | data class VadModelConfig( | 24 | data class VadModelConfig( |
| 16 | var sileroVadModelConfig: SileroVadModelConfig = SileroVadModelConfig(), | 25 | var sileroVadModelConfig: SileroVadModelConfig = SileroVadModelConfig(), |
| 26 | + var tenVadModelConfig: TenVadModelConfig = TenVadModelConfig(), | ||
| 17 | var sampleRate: Int = 16000, | 27 | var sampleRate: Int = 16000, |
| 18 | var numThreads: Int = 1, | 28 | var numThreads: Int = 1, |
| 19 | var provider: String = "cpu", | 29 | var provider: String = "cpu", |
| @@ -91,10 +101,14 @@ class Vad( | @@ -91,10 +101,14 @@ class Vad( | ||
| 91 | } | 101 | } |
| 92 | 102 | ||
| 93 | // Please visit | 103 | // Please visit |
| 94 | -// https://github.com/snakers4/silero-vad/raw/master/src/silero_vad/data/silero_vad.onnx | 104 | +// https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx |
| 95 | // to download silero_vad.onnx | 105 | // to download silero_vad.onnx |
| 96 | // and put it inside the assets/ | 106 | // and put it inside the assets/ |
| 97 | // directory | 107 | // directory |
| 108 | +// | ||
| 109 | +// For ten-vad, please use | ||
| 110 | +// https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/ten-vad.onnx | ||
| 111 | +// | ||
| 98 | fun getVadModelConfig(type: Int): VadModelConfig? { | 112 | fun getVadModelConfig(type: Int): VadModelConfig? { |
| 99 | when (type) { | 113 | when (type) { |
| 100 | 0 -> { | 114 | 0 -> { |
| @@ -111,6 +125,21 @@ fun getVadModelConfig(type: Int): VadModelConfig? { | @@ -111,6 +125,21 @@ fun getVadModelConfig(type: Int): VadModelConfig? { | ||
| 111 | provider = "cpu", | 125 | provider = "cpu", |
| 112 | ) | 126 | ) |
| 113 | } | 127 | } |
| 128 | + | ||
| 129 | + 1 -> { | ||
| 130 | + return VadModelConfig( | ||
| 131 | + tenVadModelConfig = TenVadModelConfig( | ||
| 132 | + model = "ten-vad.onnx", | ||
| 133 | + threshold = 0.5F, | ||
| 134 | + minSilenceDuration = 0.25F, | ||
| 135 | + minSpeechDuration = 0.25F, | ||
| 136 | + windowSize = 256, | ||
| 137 | + ), | ||
| 138 | + sampleRate = 16000, | ||
| 139 | + numThreads = 1, | ||
| 140 | + provider = "cpu", | ||
| 141 | + ) | ||
| 142 | + } | ||
| 114 | } | 143 | } |
| 115 | return null | 144 | return null |
| 116 | } | 145 | } |
-
请 注册 或 登录 后发表评论