Fangjun Kuang
Committed by GitHub

Add Java/Kotlin API and Android support for ten-vad (#2389)

正在显示 36 个修改的文件 包含 396 行增加47 行删除
@@ -237,11 +237,20 @@ jobs: @@ -237,11 +237,20 @@ jobs:
237 rm *.wav 237 rm *.wav
238 rm -rf sherpa-onnx-* 238 rm -rf sherpa-onnx-*
239 239
240 - - name: Run java test (VAD remove silence) 240 + - name: Run java test (ten-vad remove silence)
241 shell: bash 241 shell: bash
242 run: | 242 run: |
243 cd ./java-api-examples 243 cd ./java-api-examples
244 - ./run-vad-remove-slience.sh 244 + ./run-ten-vad-remove-silence.sh
  245 + rm *.onnx
  246 + ls -lh *.wav
  247 + rm *.wav
  248 +
  249 + - name: Run java test (silero-vad remove silence)
  250 + shell: bash
  251 + run: |
  252 + cd ./java-api-examples
  253 + ./run-vad-remove-silence.sh
245 rm *.onnx 254 rm *.onnx
246 ls -lh *.wav 255 ls -lh *.wav
247 rm *.wav 256 rm *.wav
@@ -15,7 +15,7 @@ func main() { @@ -15,7 +15,7 @@ func main() {
15 config := sherpa.VadModelConfig{} 15 config := sherpa.VadModelConfig{}
16 16
17 // Please download silero_vad.onnx from 17 // Please download silero_vad.onnx from
18 - // https://github.com/snakers4/silero-vad/raw/master/src/silero_vad/data/silero_vad.onnx 18 + // https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx
19 19
20 config.SileroVad.Model = "./silero_vad.onnx" 20 config.SileroVad.Model = "./silero_vad.onnx"
21 config.SileroVad.Threshold = 0.5 21 config.SileroVad.Threshold = 0.5
@@ -3,7 +3,7 @@ @@ -3,7 +3,7 @@
3 set -ex 3 set -ex
4 4
5 if [ ! -f ./silero_vad.onnx ]; then 5 if [ ! -f ./silero_vad.onnx ]; then
6 - curl -SL -O https://github.com/snakers4/silero-vad/raw/master/src/silero_vad/data/silero_vad.onnx 6 + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx
7 fi 7 fi
8 8
9 if [ ! -f ./sherpa-onnx-paraformer-trilingual-zh-cantonese-en/model.int8.onnx ]; then 9 if [ ! -f ./sherpa-onnx-paraformer-trilingual-zh-cantonese-en/model.int8.onnx ]; then
@@ -15,7 +15,7 @@ func main() { @@ -15,7 +15,7 @@ func main() {
15 config := sherpa.VadModelConfig{} 15 config := sherpa.VadModelConfig{}
16 16
17 // Please download silero_vad.onnx from 17 // Please download silero_vad.onnx from
18 - // https://github.com/snakers4/silero-vad/raw/master/src/silero_vad/data/silero_vad.onnx 18 + // https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx
19 19
20 config.SileroVad.Model = "./silero_vad.onnx" 20 config.SileroVad.Model = "./silero_vad.onnx"
21 config.SileroVad.Threshold = 0.5 21 config.SileroVad.Threshold = 0.5
@@ -3,7 +3,7 @@ @@ -3,7 +3,7 @@
3 set -ex 3 set -ex
4 4
5 if [ ! -f ./silero_vad.onnx ]; then 5 if [ ! -f ./silero_vad.onnx ]; then
6 - curl -SL -O https://github.com/snakers4/silero-vad/raw/master/src/silero_vad/data/silero_vad.onnx 6 + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx
7 fi 7 fi
8 8
9 if [ ! -f ./sherpa-onnx-whisper-tiny.en/tiny.en-encoder.int8.onnx ]; then 9 if [ ! -f ./sherpa-onnx-whisper-tiny.en/tiny.en-encoder.int8.onnx ]; then
@@ -89,7 +89,7 @@ func createVad() *sherpa.VoiceActivityDetector { @@ -89,7 +89,7 @@ func createVad() *sherpa.VoiceActivityDetector {
89 config := sherpa.VadModelConfig{} 89 config := sherpa.VadModelConfig{}
90 90
91 // Please download silero_vad.onnx from 91 // Please download silero_vad.onnx from
92 - // https://github.com/snakers4/silero-vad/raw/master/src/silero_vad/data/silero_vad.onnx 92 + // https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx
93 93
94 config.SileroVad.Model = "./silero_vad.onnx" 94 config.SileroVad.Model = "./silero_vad.onnx"
95 config.SileroVad.Threshold = 0.5 95 config.SileroVad.Threshold = 0.5
@@ -11,7 +11,7 @@ if [ ! -f ./sr-data/enroll/fangjun-sr-1.wav ]; then @@ -11,7 +11,7 @@ if [ ! -f ./sr-data/enroll/fangjun-sr-1.wav ]; then
11 fi 11 fi
12 12
13 if [ ! -f ./silero_vad.onnx ]; then 13 if [ ! -f ./silero_vad.onnx ]; then
14 - curl -SL -O https://github.com/snakers4/silero-vad/raw/master/src/silero_vad/data/silero_vad.onnx 14 + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx
15 fi 15 fi
16 16
17 go mod tidy 17 go mod tidy
@@ -15,7 +15,7 @@ func main() { @@ -15,7 +15,7 @@ func main() {
15 config := sherpa.VadModelConfig{} 15 config := sherpa.VadModelConfig{}
16 16
17 // Please download silero_vad.onnx from 17 // Please download silero_vad.onnx from
18 - // https://github.com/snakers4/silero-vad/raw/master/src/silero_vad/data/silero_vad.onnx 18 + // https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx
19 19
20 config.SileroVad.Model = "./silero_vad.onnx" 20 config.SileroVad.Model = "./silero_vad.onnx"
21 config.SileroVad.Threshold = 0.5 21 config.SileroVad.Threshold = 0.5
@@ -3,7 +3,7 @@ @@ -3,7 +3,7 @@
3 set -ex 3 set -ex
4 4
5 if [ ! -f ./silero_vad.onnx ]; then 5 if [ ! -f ./silero_vad.onnx ]; then
6 - curl -SL -O https://github.com/snakers4/silero-vad/raw/master/src/silero_vad/data/silero_vad.onnx 6 + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx
7 fi 7 fi
8 8
9 if [ ! -f ./sherpa-onnx-whisper-tiny/tiny-encoder.int8.onnx ]; then 9 if [ ! -f ./sherpa-onnx-whisper-tiny/tiny-encoder.int8.onnx ]; then
@@ -113,6 +113,7 @@ The punctuation model supports both English and Chinese. @@ -113,6 +113,7 @@ The punctuation model supports both English and Chinese.
113 113
114 ```bash 114 ```bash
115 ./run-vad-remove-slience.sh 115 ./run-vad-remove-slience.sh
  116 +./run-ten-vad-remove-slience.sh
116 ``` 117 ```
117 118
118 ## VAD + Non-streaming Dolphin CTC for speech recognition 119 ## VAD + Non-streaming Dolphin CTC for speech recognition
  1 +// Copyright 2025 Xiaomi Corporation
  2 +
  3 +// This file shows how to use a ten-vad model to remove silences from
  4 +// a wave file.
  5 +
  6 +import com.k2fsa.sherpa.onnx.*;
  7 +import java.util.ArrayList;
  8 +import java.util.Arrays;
  9 +
  10 +public class TenVadRemoveSilence {
  11 + public static void main(String[] args) {
  12 + // please download ./ten-vad.onnx from
  13 + // https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
  14 + String model = "./ten-vad.onnx";
  15 + TenVadModelConfig tenVad =
  16 + TenVadModelConfig.builder()
  17 + .setModel(model)
  18 + .setThreshold(0.5f)
  19 + .setMinSilenceDuration(0.25f)
  20 + .setMinSpeechDuration(0.5f)
  21 + .setWindowSize(256)
  22 + .setMaxSpeechDuration(5.0f)
  23 + .build();
  24 +
  25 + VadModelConfig config =
  26 + VadModelConfig.builder()
  27 + .setTenVadModelConfig(tenVad)
  28 + .setSampleRate(16000)
  29 + .setNumThreads(1)
  30 + .setDebug(true)
  31 + .setProvider("cpu")
  32 + .build();
  33 +
  34 + Vad vad = new Vad(config);
  35 +
  36 + // You can download the test file from
  37 + // https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
  38 + String testWaveFilename = "./lei-jun-test.wav";
  39 + WaveReader reader = new WaveReader(testWaveFilename);
  40 +
  41 + int numSamples = reader.getSamples().length;
  42 + int windowSize = tenVad.getWindowSize();
  43 + int numIter = numSamples / windowSize;
  44 +
  45 + ArrayList<float[]> segments = new ArrayList<float[]>();
  46 +
  47 + for (int i = 0; i != numIter; ++i) {
  48 + int start = i * windowSize;
  49 + int end = start + windowSize;
  50 + float[] samples = Arrays.copyOfRange(reader.getSamples(), start, end);
  51 + vad.acceptWaveform(samples);
  52 + if (vad.isSpeechDetected()) {
  53 + while (!vad.empty()) {
  54 +
  55 + // if you want to get the starting time of this segment, you can use
  56 + /* float startTime = vad.front().getStart() / 16000.0f; */
  57 +
  58 + segments.add(vad.front().getSamples());
  59 + vad.pop();
  60 + }
  61 + }
  62 + }
  63 +
  64 + vad.flush();
  65 + while (!vad.empty()) {
  66 +
  67 + // if you want to get the starting time of this segment, you can use
  68 + /* float startTime = vad.front().getStart() / 16000.0f; */
  69 +
  70 + segments.add(vad.front().getSamples());
  71 + vad.pop();
  72 + }
  73 +
  74 + // get total number of samples
  75 + int n = 0;
  76 + for (float[] s : segments) {
  77 + n += s.length;
  78 + }
  79 +
  80 + float[] allSamples = new float[n];
  81 + int i = 0;
  82 + for (float[] s : segments) {
  83 + System.arraycopy(s, 0, allSamples, i, s.length);
  84 + i += s.length;
  85 + }
  86 +
  87 + String outFilename = "lei-jun-test-no-silence.wav";
  88 + WaveWriter.write(outFilename, allSamples, 16000);
  89 + System.out.printf("Saved to %s\n", outFilename);
  90 +
  91 + vad.release();
  92 + }
  93 +}
  1 +#!/usr/bin/env bash
  2 +
  3 +set -ex
  4 +
  5 +if [[ ! -f ../build/lib/libsherpa-onnx-jni.dylib && ! -f ../build/lib/libsherpa-onnx-jni.so ]]; then
  6 + mkdir -p ../build
  7 + pushd ../build
  8 + cmake \
  9 + -DSHERPA_ONNX_ENABLE_PYTHON=OFF \
  10 + -DSHERPA_ONNX_ENABLE_TESTS=OFF \
  11 + -DSHERPA_ONNX_ENABLE_CHECK=OFF \
  12 + -DBUILD_SHARED_LIBS=ON \
  13 + -DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \
  14 + -DSHERPA_ONNX_ENABLE_JNI=ON \
  15 + ..
  16 +
  17 + make -j4
  18 + ls -lh lib
  19 + popd
  20 +fi
  21 +
  22 +if [ ! -f ../sherpa-onnx/java-api/build/sherpa-onnx.jar ]; then
  23 + pushd ../sherpa-onnx/java-api
  24 + make
  25 + popd
  26 +fi
  27 +
  28 +if [ ! -f ./ten-vad.onnx ]; then
  29 + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/ten-vad.onnx
  30 +fi
  31 +
  32 +if [ ! -f ./lei-jun-test.wav ]; then
  33 + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/lei-jun-test.wav
  34 +fi
  35 +
  36 +java \
  37 + -Djava.library.path=$PWD/../build/lib \
  38 + -cp ../sherpa-onnx/java-api/build/sherpa-onnx.jar \
  39 + ./TenVadRemoveSilence.java
@@ -10,7 +10,7 @@ from a microphone. @@ -10,7 +10,7 @@ from a microphone.
10 Usage: 10 Usage:
11 11
12 12
13 -wget https://github.com/snakers4/silero-vad/raw/master/src/silero_vad/data/silero_vad.onnx 13 +wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx
14 14
15 ./python-api-examples/simulate-streaming-sense-voice-microphone.py \ 15 ./python-api-examples/simulate-streaming-sense-voice-microphone.py \
16 --silero-vad-model=./silero_vad.onnx \ 16 --silero-vad-model=./silero_vad.onnx \
@@ -18,12 +18,12 @@ Note that `zh` means Chinese, while `en` means English. @@ -18,12 +18,12 @@ Note that `zh` means Chinese, while `en` means English.
18 18
19 (2) Download the VAD model 19 (2) Download the VAD model
20 Please visit 20 Please visit
21 -https://github.com/snakers4/silero-vad/raw/master/src/silero_vad/data/silero_vad.onnx 21 +https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx
22 to download silero_vad.onnx 22 to download silero_vad.onnx
23 23
24 For instance, 24 For instance,
25 25
26 -wget https://github.com/snakers4/silero-vad/raw/master/src/silero_vad/data/silero_vad.onnx 26 +wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx
27 27
28 (3) Run this script 28 (3) Run this script
29 29
@@ -40,12 +40,12 @@ Note that `zh` means Chinese, while `en` means English. @@ -40,12 +40,12 @@ Note that `zh` means Chinese, while `en` means English.
40 40
41 (3) Download the VAD model 41 (3) Download the VAD model
42 Please visit 42 Please visit
43 -https://github.com/snakers4/silero-vad/raw/master/src/silero_vad/data/silero_vad.onnx 43 +https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx
44 to download silero_vad.onnx 44 to download silero_vad.onnx
45 45
46 For instance, 46 For instance,
47 47
48 -wget https://github.com/snakers4/silero-vad/raw/master/src/silero_vad/data/silero_vad.onnx 48 +wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx
49 49
50 (4) Please refer to ./generate-subtitles.py 50 (4) Please refer to ./generate-subtitles.py
51 to download a non-streaming ASR model. 51 to download a non-streaming ASR model.
@@ -38,12 +38,12 @@ Note that `zh` means Chinese, while `en` means English. @@ -38,12 +38,12 @@ Note that `zh` means Chinese, while `en` means English.
38 38
39 (3) Download the VAD model 39 (3) Download the VAD model
40 Please visit 40 Please visit
41 -https://github.com/snakers4/silero-vad/raw/master/src/silero_vad/data/silero_vad.onnx 41 +https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx
42 to download silero_vad.onnx 42 to download silero_vad.onnx
43 43
44 For instance, 44 For instance,
45 45
46 -wget https://github.com/snakers4/silero-vad/raw/master/src/silero_vad/data/silero_vad.onnx 46 +wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx
47 47
48 (4) Please refer to ./generate-subtitles.py 48 (4) Please refer to ./generate-subtitles.py
49 to download a non-streaming ASR model. 49 to download a non-streaming ASR model.
@@ -36,12 +36,12 @@ Note that `zh` means Chinese, while `en` means English. @@ -36,12 +36,12 @@ Note that `zh` means Chinese, while `en` means English.
36 36
37 (3) Download the VAD model 37 (3) Download the VAD model
38 Please visit 38 Please visit
39 -https://github.com/snakers4/silero-vad/raw/master/src/silero_vad/data/silero_vad.onnx 39 +https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx
40 to download silero_vad.onnx 40 to download silero_vad.onnx
41 41
42 For instance, 42 For instance,
43 43
44 -wget https://github.com/snakers4/silero-vad/raw/master/src/silero_vad/data/silero_vad.onnx 44 +wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx
45 45
46 (4) Run this script 46 (4) Run this script
47 47
@@ -55,7 +55,7 @@ def main(): @@ -55,7 +55,7 @@ def main():
55 if not Path(args.silero_vad_model).is_file(): 55 if not Path(args.silero_vad_model).is_file():
56 raise RuntimeError( 56 raise RuntimeError(
57 f"{args.silero_vad_model} does not exist. Please download it from " 57 f"{args.silero_vad_model} does not exist. Please download it from "
58 - "https://github.com/snakers4/silero-vad/raw/master/src/silero_vad/data/silero_vad.onnx" 58 + "https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx"
59 ) 59 )
60 60
61 device_name = args.device_name 61 device_name = args.device_name
@@ -38,7 +38,7 @@ def main(): @@ -38,7 +38,7 @@ def main():
38 if not Path(args.silero_vad_model).is_file(): 38 if not Path(args.silero_vad_model).is_file():
39 raise RuntimeError( 39 raise RuntimeError(
40 f"{args.silero_vad_model} does not exist. Please download it from " 40 f"{args.silero_vad_model} does not exist. Please download it from "
41 - "https://github.com/snakers4/silero-vad/raw/master/src/silero_vad/data/silero_vad.onnx" 41 + "https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx"
42 ) 42 )
43 43
44 mic_sample_rate = 16000 44 mic_sample_rate = 16000
@@ -14,12 +14,12 @@ python3 ./vad-remove-non-speech-segments-alsa.py \ @@ -14,12 +14,12 @@ python3 ./vad-remove-non-speech-segments-alsa.py \
14 --silero-vad-model silero_vad.onnx 14 --silero-vad-model silero_vad.onnx
15 15
16 Please visit 16 Please visit
17 -https://github.com/snakers4/silero-vad/raw/master/src/silero_vad/data/silero_vad.onnx 17 +https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx
18 to download silero_vad.onnx 18 to download silero_vad.onnx
19 19
20 For instance, 20 For instance,
21 21
22 -wget https://github.com/snakers4/silero-vad/raw/master/src/silero_vad/data/silero_vad.onnx 22 +wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx
23 """ 23 """
24 24
25 import argparse 25 import argparse
@@ -13,12 +13,11 @@ python3 ./vad-remove-non-speech-segments-from-file.py \ @@ -13,12 +13,11 @@ python3 ./vad-remove-non-speech-segments-from-file.py \
13 output.wav 13 output.wav
14 14
15 Please visit 15 Please visit
16 -https://github.com/snakers4/silero-vad/raw/master/src/silero_vad/data/silero_vad.onnx 16 +https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx
17 to download silero_vad.onnx 17 to download silero_vad.onnx
18 18
19 For instance, 19 For instance,
20 -  
21 -wget https://github.com/snakers4/silero-vad/raw/master/src/silero_vad/data/silero_vad.onnx 20 +wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx
22 """ 21 """
23 22
24 import argparse 23 import argparse
@@ -11,12 +11,12 @@ python3 ./vad-remove-non-speech-segments.py \ @@ -11,12 +11,12 @@ python3 ./vad-remove-non-speech-segments.py \
11 --silero-vad-model silero_vad.onnx 11 --silero-vad-model silero_vad.onnx
12 12
13 Please visit 13 Please visit
14 -https://github.com/snakers4/silero-vad/raw/master/src/silero_vad/data/silero_vad.onnx 14 +https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx
15 to download silero_vad.onnx 15 to download silero_vad.onnx
16 16
17 For instance, 17 For instance,
18 18
19 -wget https://github.com/snakers4/silero-vad/raw/master/src/silero_vad/data/silero_vad.onnx 19 +wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx
20 """ 20 """
21 21
22 import argparse 22 import argparse
@@ -70,12 +70,13 @@ to install sherpa-onnx and to download non-streaming pre-trained models @@ -70,12 +70,13 @@ to install sherpa-onnx and to download non-streaming pre-trained models
70 used in this file. 70 used in this file.
71 71
72 Please visit 72 Please visit
73 -https://github.com/snakers4/silero-vad/raw/master/src/silero_vad/data/silero_vad.onnx 73 +https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx
74 to download silero_vad.onnx 74 to download silero_vad.onnx
75 75
76 For instance, 76 For instance,
77 77
78 -wget https://github.com/snakers4/silero-vad/raw/master/src/silero_vad/data/silero_vad.onnx 78 +wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx
  79 +
79 """ 80 """
80 import argparse 81 import argparse
81 import sys 82 import sys
@@ -32,11 +32,12 @@ log "====================x86====================" @@ -32,11 +32,12 @@ log "====================x86===================="
32 32
33 mkdir -p apks 33 mkdir -p apks
34 34
35 -log "https://github.com/snakers4/silero-vad/raw/master/src/silero_vad/data/silero_vad.onnx" 35 +log "https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx"
  36 +
36 37
37 # Download the model 38 # Download the model
38 pushd ./android/SherpaOnnxVad/app/src/main/assets/ 39 pushd ./android/SherpaOnnxVad/app/src/main/assets/
39 -wget -c https://github.com/snakers4/silero-vad/raw/master/src/silero_vad/data/silero_vad.onnx 40 +wget -c https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx
40 popd 41 popd
41 42
42 for arch in arm64-v8a armeabi-v7a x86_64 x86; do 43 for arch in arm64-v8a armeabi-v7a x86_64 x86; do
@@ -67,4 +68,47 @@ done @@ -67,4 +68,47 @@ done
67 68
68 rm -rf ./android/SherpaOnnxVad/app/src/main/assets/*.onnx 69 rm -rf ./android/SherpaOnnxVad/app/src/main/assets/*.onnx
69 70
  71 +
  72 +# Now for ten-vad
  73 +git checkout .
  74 +pushd android/SherpaOnnxVad/app/src/main/java/com/k2fsa/sherpa/onnx
  75 +sed -i.bak s/"type = 0/type = 1/" ./MainActivity.kt
  76 +git diff
  77 +popd
  78 +
  79 +log "https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/ten-vad.onnx"
  80 +
  81 +# Download the model
  82 +pushd ./android/SherpaOnnxVad/app/src/main/assets/
  83 +wget -c https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/ten-vad.onnx
  84 +popd
  85 +
  86 +for arch in arm64-v8a armeabi-v7a x86_64 x86; do
  87 + log "------------------------------------------------------------"
  88 + log "build apk for $arch"
  89 + log "------------------------------------------------------------"
  90 + src_arch=$arch
  91 + if [ $arch == "armeabi-v7a" ]; then
  92 + src_arch=armv7-eabi
  93 + elif [ $arch == "x86_64" ]; then
  94 + src_arch=x86-64
  95 + fi
  96 +
  97 + ls -lh ./build-android-$src_arch/install/lib/*.so
  98 +
  99 + cp -v ./build-android-$src_arch/install/lib/*.so ./android/SherpaOnnxVad/app/src/main/jniLibs/$arch/
  100 +
  101 + pushd ./android/SherpaOnnxVad
  102 + sed -i.bak s/2048/9012/g ./gradle.properties
  103 + git diff ./gradle.properties
  104 + ./gradlew assembleRelease
  105 + popd
  106 +
  107 + mv android/SherpaOnnxVad/app/build/outputs/apk/release/app-release-unsigned.apk ./apks/sherpa-onnx-${SHERPA_ONNX_VERSION}-$arch-ten_vad.apk
  108 + ls -lh apks
  109 + rm -v ./android/SherpaOnnxVad/app/src/main/jniLibs/$arch/*.so
  110 +done
  111 +
  112 +rm -rf ./android/SherpaOnnxVad/app/src/main/assets/*.onnx
  113 +
70 ls -lh apks/ 114 ls -lh apks/
@@ -201,11 +201,11 @@ class KeywordSpotterTransducerImpl : public KeywordSpotterImpl { @@ -201,11 +201,11 @@ class KeywordSpotterTransducerImpl : public KeywordSpotterImpl {
201 int32_t num_trailing_blanks = r.num_trailing_blanks; 201 int32_t num_trailing_blanks = r.num_trailing_blanks;
202 // assume subsampling_factor is 4 202 // assume subsampling_factor is 4
203 // assume frameshift is 0.01 second 203 // assume frameshift is 0.01 second
204 - float trailing_slience = num_trailing_blanks * 4 * 0.01; 204 + float trailing_silence = num_trailing_blanks * 4 * 0.01;
205 205
206 // it resets automatically after detecting 1.5 seconds of silence 206 // it resets automatically after detecting 1.5 seconds of silence
207 float threshold = 1.5; 207 float threshold = 1.5;
208 - if (trailing_slience > threshold) { 208 + if (trailing_silence > threshold) {
209 Reset(s); 209 Reset(s);
210 } 210 }
211 } 211 }
@@ -29,10 +29,10 @@ This program shows how to use a streaming VAD with non-streaming ASR in @@ -29,10 +29,10 @@ This program shows how to use a streaming VAD with non-streaming ASR in
29 sherpa-onnx. 29 sherpa-onnx.
30 30
31 Please download silero_vad.onnx from 31 Please download silero_vad.onnx from
32 -https://github.com/snakers4/silero-vad/raw/master/src/silero_vad/data/silero_vad.onnx 32 +https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx
33 33
34 For instance, use 34 For instance, use
35 -wget https://github.com/snakers4/silero-vad/raw/master/src/silero_vad/data/silero_vad.onnx 35 +wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx
36 36
37 Please refer to ./sherpa-onnx-microphone-offline.cc 37 Please refer to ./sherpa-onnx-microphone-offline.cc
38 to download models for offline ASR. 38 to download models for offline ASR.
@@ -30,10 +30,10 @@ This program shows how to use VAD in sherpa-onnx. @@ -30,10 +30,10 @@ This program shows how to use VAD in sherpa-onnx.
30 device_name 30 device_name
31 31
32 Please download silero_vad.onnx from 32 Please download silero_vad.onnx from
33 -https://github.com/snakers4/silero-vad/raw/master/src/silero_vad/data/silero_vad.onnx 33 +https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx
34 34
35 For instance, use 35 For instance, use
36 -wget https://github.com/snakers4/silero-vad/raw/master/src/silero_vad/data/silero_vad.onnx 36 +wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx
37 37
38 The device name specifies which microphone to use in case there are several 38 The device name specifies which microphone to use in case there are several
39 on your system. You can use 39 on your system. You can use
@@ -45,10 +45,10 @@ This program shows how to use a streaming VAD with non-streaming ASR in @@ -45,10 +45,10 @@ This program shows how to use a streaming VAD with non-streaming ASR in
45 sherpa-onnx. 45 sherpa-onnx.
46 46
47 Please download silero_vad.onnx from 47 Please download silero_vad.onnx from
48 -https://github.com/snakers4/silero-vad/raw/master/src/silero_vad/data/silero_vad.onnx 48 +https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx
49 49
50 For instance, use 50 For instance, use
51 -wget https://github.com/snakers4/silero-vad/raw/master/src/silero_vad/data/silero_vad.onnx 51 +wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx
52 52
53 Please refer to ./sherpa-onnx-microphone-offline.cc 53 Please refer to ./sherpa-onnx-microphone-offline.cc
54 to download models for offline ASR. 54 to download models for offline ASR.
@@ -49,10 +49,10 @@ This program shows how to use VAD in sherpa-onnx. @@ -49,10 +49,10 @@ This program shows how to use VAD in sherpa-onnx.
49 --vad-num-threads=1 49 --vad-num-threads=1
50 50
51 Please download silero_vad.onnx from 51 Please download silero_vad.onnx from
52 -https://github.com/snakers4/silero-vad/raw/master/src/silero_vad/data/silero_vad.onnx 52 +https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx
53 53
54 For instance, use 54 For instance, use
55 -wget https://github.com/snakers4/silero-vad/raw/master/src/silero_vad/data/silero_vad.onnx 55 +wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx
56 )usage"; 56 )usage";
57 57
58 sherpa_onnx::ParseOptions po(kUsageMessage); 58 sherpa_onnx::ParseOptions po(kUsageMessage);
@@ -23,10 +23,10 @@ to remove silences from a file. @@ -23,10 +23,10 @@ to remove silences from a file.
23 /path/to/output.wav 23 /path/to/output.wav
24 24
25 Please download silero_vad.onnx from 25 Please download silero_vad.onnx from
26 -https://github.com/snakers4/silero-vad/raw/master/src/silero_vad/data/silero_vad.onnx 26 +https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx
27 27
28 For instance, use 28 For instance, use
29 -wget https://github.com/snakers4/silero-vad/raw/master/src/silero_vad/data/silero_vad.onnx 29 +wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx
30 30
31 input.wav should be 16kHz. 31 input.wav should be 16kHz.
32 )usage"; 32 )usage";
@@ -74,6 +74,7 @@ java_files += SpeakerEmbeddingExtractorConfig.java @@ -74,6 +74,7 @@ java_files += SpeakerEmbeddingExtractorConfig.java
74 java_files += SpeakerEmbeddingExtractor.java 74 java_files += SpeakerEmbeddingExtractor.java
75 java_files += SpeakerEmbeddingManager.java 75 java_files += SpeakerEmbeddingManager.java
76 76
  77 +java_files += TenVadModelConfig.java
77 java_files += SileroVadModelConfig.java 78 java_files += SileroVadModelConfig.java
78 java_files += VadModelConfig.java 79 java_files += VadModelConfig.java
79 java_files += SpeechSegment.java 80 java_files += SpeechSegment.java
  1 +// Copyright 2025 Xiaomi Corporation
  2 +
  3 +package com.k2fsa.sherpa.onnx;
  4 +
  5 +public class TenVadModelConfig {
  6 + private final String model;
  7 + private final float threshold;
  8 + private final float minSilenceDuration;
  9 + private final float minSpeechDuration;
  10 + private final int windowSize;
  11 + private final float maxSpeechDuration;
  12 +
  13 + private TenVadModelConfig(Builder builder) {
  14 + this.model = builder.model;
  15 + this.threshold = builder.threshold;
  16 + this.minSilenceDuration = builder.minSilenceDuration;
  17 + this.minSpeechDuration = builder.minSpeechDuration;
  18 + this.windowSize = builder.windowSize;
  19 + this.maxSpeechDuration = builder.maxSpeechDuration;
  20 + }
  21 +
  22 + public static Builder builder() {
  23 + return new Builder();
  24 + }
  25 +
  26 + public String getModel() {
  27 + return model;
  28 + }
  29 +
  30 + public float getThreshold() {
  31 + return threshold;
  32 + }
  33 +
  34 + public float getMinSilenceDuration() {
  35 + return minSilenceDuration;
  36 + }
  37 +
  38 + public float getMinSpeechDuration() {
  39 + return minSpeechDuration;
  40 + }
  41 +
  42 + public int getWindowSize() {
  43 + return windowSize;
  44 + }
  45 +
  46 + public float getMaxSpeechDuration() {
  47 + return maxSpeechDuration;
  48 + }
  49 +
  50 + public static class Builder {
  51 + private String model = "";
  52 + private float threshold = 0.5f;
  53 + private float minSilenceDuration = 0.25f;
  54 + private float minSpeechDuration = 0.25f;
  55 + private int windowSize = 256;
  56 + private float maxSpeechDuration = 5.0f;
  57 +
  58 + public TenVadModelConfig build() {
  59 + return new TenVadModelConfig(this);
  60 + }
  61 +
  62 +
  63 + public Builder setModel(String model) {
  64 + this.model = model;
  65 + return this;
  66 + }
  67 +
  68 + public Builder setThreshold(float threshold) {
  69 + this.threshold = threshold;
  70 + return this;
  71 + }
  72 +
  73 + public Builder setMinSilenceDuration(float minSilenceDuration) {
  74 + this.minSilenceDuration = minSilenceDuration;
  75 + return this;
  76 + }
  77 +
  78 + public Builder setMinSpeechDuration(float minSpeechDuration) {
  79 + this.minSpeechDuration = minSpeechDuration;
  80 + return this;
  81 + }
  82 +
  83 + public Builder setWindowSize(int windowSize) {
  84 + this.windowSize = windowSize;
  85 + return this;
  86 + }
  87 +
  88 + public Builder setMaxSpeechDuration(float maxSpeechDuration) {
  89 + this.maxSpeechDuration = maxSpeechDuration;
  90 + return this;
  91 + }
  92 + }
  93 +}
@@ -4,6 +4,7 @@ package com.k2fsa.sherpa.onnx; @@ -4,6 +4,7 @@ package com.k2fsa.sherpa.onnx;
4 4
5 public class VadModelConfig { 5 public class VadModelConfig {
6 private final SileroVadModelConfig sileroVadModelConfig; 6 private final SileroVadModelConfig sileroVadModelConfig;
  7 + private final TenVadModelConfig tenVadModelConfig;
7 private final int sampleRate; 8 private final int sampleRate;
8 private final int numThreads; 9 private final int numThreads;
9 private final boolean debug; 10 private final boolean debug;
@@ -11,6 +12,7 @@ public class VadModelConfig { @@ -11,6 +12,7 @@ public class VadModelConfig {
11 12
12 private VadModelConfig(Builder builder) { 13 private VadModelConfig(Builder builder) {
13 this.sileroVadModelConfig = builder.sileroVadModelConfig; 14 this.sileroVadModelConfig = builder.sileroVadModelConfig;
  15 + this.tenVadModelConfig = builder.tenVadModelConfig;
14 this.sampleRate = builder.sampleRate; 16 this.sampleRate = builder.sampleRate;
15 this.numThreads = builder.numThreads; 17 this.numThreads = builder.numThreads;
16 this.debug = builder.debug; 18 this.debug = builder.debug;
@@ -25,6 +27,10 @@ public class VadModelConfig { @@ -25,6 +27,10 @@ public class VadModelConfig {
25 return sileroVadModelConfig; 27 return sileroVadModelConfig;
26 } 28 }
27 29
  30 + public TenVadModelConfig getTenVadModelConfig() {
  31 + return tenVadModelConfig;
  32 + }
  33 +
28 public int getSampleRate() { 34 public int getSampleRate() {
29 return sampleRate; 35 return sampleRate;
30 } 36 }
@@ -43,6 +49,7 @@ public class VadModelConfig { @@ -43,6 +49,7 @@ public class VadModelConfig {
43 49
44 public static class Builder { 50 public static class Builder {
45 private SileroVadModelConfig sileroVadModelConfig = new SileroVadModelConfig.Builder().build(); 51 private SileroVadModelConfig sileroVadModelConfig = new SileroVadModelConfig.Builder().build();
  52 + private TenVadModelConfig tenVadModelConfig = new TenVadModelConfig.Builder().build();
46 private int sampleRate = 16000; 53 private int sampleRate = 16000;
47 private int numThreads = 1; 54 private int numThreads = 1;
48 private boolean debug = true; 55 private boolean debug = true;
@@ -57,6 +64,11 @@ public class VadModelConfig { @@ -57,6 +64,11 @@ public class VadModelConfig {
57 return this; 64 return this;
58 } 65 }
59 66
  67 + public Builder setTenVadModelConfig(TenVadModelConfig tenVadModelConfig) {
  68 + this.tenVadModelConfig = tenVadModelConfig;
  69 + return this;
  70 + }
  71 +
60 public Builder setSampleRate(int sampleRate) { 72 public Builder setSampleRate(int sampleRate) {
61 this.sampleRate = sampleRate; 73 this.sampleRate = sampleRate;
62 return this; 74 return this;
@@ -44,6 +44,33 @@ static VadModelConfig GetVadModelConfig(JNIEnv *env, jobject config) { @@ -44,6 +44,33 @@ static VadModelConfig GetVadModelConfig(JNIEnv *env, jobject config) {
44 ans.silero_vad.max_speech_duration = 44 ans.silero_vad.max_speech_duration =
45 env->GetFloatField(silero_vad_config, fid); 45 env->GetFloatField(silero_vad_config, fid);
46 46
  47 + // ten-vad
  48 + fid = env->GetFieldID(cls, "tenVadModelConfig",
  49 + "Lcom/k2fsa/sherpa/onnx/TenVadModelConfig;");
  50 + jobject ten_vad_config = env->GetObjectField(config, fid);
  51 + jclass ten_vad_config_cls = env->GetObjectClass(ten_vad_config);
  52 +
  53 + fid = env->GetFieldID(ten_vad_config_cls, "model", "Ljava/lang/String;");
  54 + s = (jstring)env->GetObjectField(ten_vad_config, fid);
  55 + p = env->GetStringUTFChars(s, nullptr);
  56 + ans.ten_vad.model = p;
  57 + env->ReleaseStringUTFChars(s, p);
  58 +
  59 + fid = env->GetFieldID(ten_vad_config_cls, "threshold", "F");
  60 + ans.ten_vad.threshold = env->GetFloatField(ten_vad_config, fid);
  61 +
  62 + fid = env->GetFieldID(ten_vad_config_cls, "minSilenceDuration", "F");
  63 + ans.ten_vad.min_silence_duration = env->GetFloatField(ten_vad_config, fid);
  64 +
  65 + fid = env->GetFieldID(ten_vad_config_cls, "minSpeechDuration", "F");
  66 + ans.ten_vad.min_speech_duration = env->GetFloatField(ten_vad_config, fid);
  67 +
  68 + fid = env->GetFieldID(ten_vad_config_cls, "windowSize", "I");
  69 + ans.ten_vad.window_size = env->GetIntField(ten_vad_config, fid);
  70 +
  71 + fid = env->GetFieldID(ten_vad_config_cls, "maxSpeechDuration", "F");
  72 + ans.ten_vad.max_speech_duration = env->GetFloatField(ten_vad_config, fid);
  73 +
47 fid = env->GetFieldID(cls, "sampleRate", "I"); 74 fid = env->GetFieldID(cls, "sampleRate", "I");
48 ans.sample_rate = env->GetIntField(config, fid); 75 ans.sample_rate = env->GetIntField(config, fid);
49 76
@@ -179,8 +206,9 @@ JNIEXPORT bool JNICALL Java_com_k2fsa_sherpa_onnx_Vad_isSpeechDetected( @@ -179,8 +206,9 @@ JNIEXPORT bool JNICALL Java_com_k2fsa_sherpa_onnx_Vad_isSpeechDetected(
179 } 206 }
180 207
181 SHERPA_ONNX_EXTERN_C 208 SHERPA_ONNX_EXTERN_C
182 -JNIEXPORT void JNICALL Java_com_k2fsa_sherpa_onnx_Vad_reset(  
183 - JNIEnv *env, jobject /*obj*/, jlong ptr) { 209 +JNIEXPORT void JNICALL Java_com_k2fsa_sherpa_onnx_Vad_reset(JNIEnv *env,
  210 + jobject /*obj*/,
  211 + jlong ptr) {
184 SafeJNI(env, "Vad_reset", [&] { 212 SafeJNI(env, "Vad_reset", [&] {
185 if (!ValidatePointer(env, ptr, "Vad_reset", 213 if (!ValidatePointer(env, ptr, "Vad_reset",
186 "VoiceActivityDetector pointer is null.")) { 214 "VoiceActivityDetector pointer is null.")) {
@@ -12,8 +12,18 @@ data class SileroVadModelConfig( @@ -12,8 +12,18 @@ data class SileroVadModelConfig(
12 var maxSpeechDuration: Float = 5.0F, 12 var maxSpeechDuration: Float = 5.0F,
13 ) 13 )
14 14
  15 +data class TenVadModelConfig(
  16 + var model: String = "",
  17 + var threshold: Float = 0.5F,
  18 + var minSilenceDuration: Float = 0.25F,
  19 + var minSpeechDuration: Float = 0.25F,
  20 + var windowSize: Int = 256,
  21 + var maxSpeechDuration: Float = 5.0F,
  22 +)
  23 +
15 data class VadModelConfig( 24 data class VadModelConfig(
16 var sileroVadModelConfig: SileroVadModelConfig = SileroVadModelConfig(), 25 var sileroVadModelConfig: SileroVadModelConfig = SileroVadModelConfig(),
  26 + var tenVadModelConfig: TenVadModelConfig = TenVadModelConfig(),
17 var sampleRate: Int = 16000, 27 var sampleRate: Int = 16000,
18 var numThreads: Int = 1, 28 var numThreads: Int = 1,
19 var provider: String = "cpu", 29 var provider: String = "cpu",
@@ -91,10 +101,14 @@ class Vad( @@ -91,10 +101,14 @@ class Vad(
91 } 101 }
92 102
93 // Please visit 103 // Please visit
94 -// https://github.com/snakers4/silero-vad/raw/master/src/silero_vad/data/silero_vad.onnx 104 +// https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx
95 // to download silero_vad.onnx 105 // to download silero_vad.onnx
96 // and put it inside the assets/ 106 // and put it inside the assets/
97 // directory 107 // directory
  108 +//
  109 +// For ten-vad, please use
  110 +// https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/ten-vad.onnx
  111 +//
98 fun getVadModelConfig(type: Int): VadModelConfig? { 112 fun getVadModelConfig(type: Int): VadModelConfig? {
99 when (type) { 113 when (type) {
100 0 -> { 114 0 -> {
@@ -111,6 +125,21 @@ fun getVadModelConfig(type: Int): VadModelConfig? { @@ -111,6 +125,21 @@ fun getVadModelConfig(type: Int): VadModelConfig? {
111 provider = "cpu", 125 provider = "cpu",
112 ) 126 )
113 } 127 }
  128 +
  129 + 1 -> {
  130 + return VadModelConfig(
  131 + tenVadModelConfig = TenVadModelConfig(
  132 + model = "ten-vad.onnx",
  133 + threshold = 0.5F,
  134 + minSilenceDuration = 0.25F,
  135 + minSpeechDuration = 0.25F,
  136 + windowSize = 256,
  137 + ),
  138 + sampleRate = 16000,
  139 + numThreads = 1,
  140 + provider = "cpu",
  141 + )
  142 + }
114 } 143 }
115 return null 144 return null
116 } 145 }