Add C API for ten-vad (#2379)

Fangjun Kuang · GitHub
Commit ceb1bc5ec367b80de97cd678b779cfe70abb85c4 ceb1bc5e 1 parent da9f3033
.github/workflows/c-api.yaml
c-api-examples/vad-moonshine-c-api.c
c-api-examples/vad-sense-voice-c-api.c
c-api-examples/vad-whisper-c-api.c
sherpa-onnx/c-api/c-api.cc
sherpa-onnx/c-api/c-api.h
sherpa-onnx/c-api/cxx-api.cc
sherpa-onnx/c-api/cxx-api.h
sherpa-onnx/csrc/ten-vad-model.cc
--- a/.github/workflows/c-api.yaml
查看文件 @ceb1bc5
+++ b/.github/workflows/c-api.yaml
查看文件 @ceb1bc5
@@ -376,7 +376,7 @@ jobs:
           name: matcha-tts-${{ matrix.os }}
           path: ./generated-matcha-*.wav
-      - name: Test vad + Whisper tiny.en
+      - name: Test silero-vad + Whisper tiny.en
         shell: bash
         run: |
           gcc -o vad-whisper-c-api ./c-api-examples/vad-whisper-c-api.c \
@@ -403,7 +403,34 @@ jobs:
           rm -rf *.onnx
           rm *.wav
-      - name: Test vad + Moonshine
+      - name: Test ten-vad + Whisper tiny.en
+        shell: bash
+        run: |
+          gcc -o vad-whisper-c-api ./c-api-examples/vad-whisper-c-api.c \
+            -I ./build/install/include \
+            -L ./build/install/lib/ \
+            -l sherpa-onnx-c-api \
+            -l onnxruntime
+
+          # Now download models
+          #
+          curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/ten-vad.onnx
+          curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/Obama.wav
+
+          curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-whisper-tiny.en.tar.bz2
+          tar xvf sherpa-onnx-whisper-tiny.en.tar.bz2
+          rm sherpa-onnx-whisper-tiny.en.tar.bz2
+
+          export LD_LIBRARY_PATH=$PWD/build/install/lib:$LD_LIBRARY_PATH
+          export DYLD_LIBRARY_PATH=$PWD/build/install/lib:$DYLD_LIBRARY_PATH
+
+          ./vad-whisper-c-api
+
+          rm -rf sherpa-onnx-*
+          rm -rf *.onnx
+          rm *.wav
+
+      - name: Test silero-vad + Moonshine
         shell: bash
         run: |
           gcc -o vad-moonshine-c-api ./c-api-examples/vad-moonshine-c-api.c \
@@ -430,6 +457,33 @@ jobs:
           rm -rf *.onnx
           rm *.wav
+      - name: Test ten-vad + Moonshine
+        shell: bash
+        run: |
+          gcc -o vad-moonshine-c-api ./c-api-examples/vad-moonshine-c-api.c \
+            -I ./build/install/include \
+            -L ./build/install/lib/ \
+            -l sherpa-onnx-c-api \
+            -l onnxruntime
+
+          # Now download models
+          #
+          curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/ten-vad.onnx
+          curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/Obama.wav
+
+          curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-moonshine-tiny-en-int8.tar.bz2
+          tar xvf sherpa-onnx-moonshine-tiny-en-int8.tar.bz2
+          rm sherpa-onnx-moonshine-tiny-en-int8.tar.bz2
+
+          export LD_LIBRARY_PATH=$PWD/build/install/lib:$LD_LIBRARY_PATH
+          export DYLD_LIBRARY_PATH=$PWD/build/install/lib:$DYLD_LIBRARY_PATH
+
+          ./vad-moonshine-c-api
+
+          rm -rf sherpa-onnx-*
+          rm -rf *.onnx
+          rm *.wav
+
       - name: Test Moonshine
         shell: bash
         run: |
@@ -466,7 +520,7 @@ jobs:
           ./run.sh
           rm -rf sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20
-      - name: Test vad + sense-voice
+      - name: Test silero-vad + sense-voice
         shell: bash
         run: |
           gcc -o vad-sense-voice-c-api ./c-api-examples/vad-sense-voice-c-api.c \
@@ -505,6 +559,45 @@ jobs:
           rm -rf *.onnx
           rm *.wav
+      - name: Test ten-vad + sense-voice
+        shell: bash
+        run: |
+          gcc -o vad-sense-voice-c-api ./c-api-examples/vad-sense-voice-c-api.c \
+            -I ./build/install/include \
+            -L ./build/install/lib/ \
+            -l sherpa-onnx-c-api \
+            -l onnxruntime
+
+          ls -lh vad-sense-voice-c-api
+
+          if [[ ${{ matrix.os }} == ubuntu-latest || ${{ matrix.os }} == ubuntu-22.04-arm ]]; then
+            ldd ./vad-sense-voice-c-api
+            echo "----"
+            readelf -d ./vad-sense-voice-c-api
+          fi
+
+          # Now download models
+          #
+          curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/ten-vad.onnx
+          curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/lei-jun-test.wav
+
+          curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17.tar.bz2
+          tar xvf sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17.tar.bz2
+          rm sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17.tar.bz2
+
+          ls -lh sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17
+          echo "---"
+          ls -lh sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/test_wavs
+
+          export LD_LIBRARY_PATH=$PWD/build/install/lib:$LD_LIBRARY_PATH
+          export DYLD_LIBRARY_PATH=$PWD/build/install/lib:$DYLD_LIBRARY_PATH
+
+          ./vad-sense-voice-c-api
+
+          rm -rf sherpa-onnx-sense-voice-*
+          rm -rf *.onnx
+          rm *.wav
+
       - name: Test sense-voice
         shell: bash
         run: |
--- a/c-api-examples/vad-moonshine-c-api.c
查看文件 @ceb1bc5
+++ b/c-api-examples/vad-moonshine-c-api.c
查看文件 @ceb1bc5
@@ -6,7 +6,12 @@
 // This file demonstrates how to use VAD + Moonshine with sherpa-onnx's C API.
 // clang-format off
 //
-// wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx
+// To use silero-vad:
+//  wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx
+//
+// To use ten-vad:
+//  wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/ten-vad.onnx
+//
 // wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/Obama.wav
 //
 // wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-moonshine-tiny-en-int8.tar.bz2
@@ -23,7 +28,27 @@
 int32_t main() {
   const char *wav_filename = "./Obama.wav";
-  const char *vad_filename = "./silero_vad.onnx";
+  if (!SherpaOnnxFileExists(wav_filename)) {
+    fprintf(stderr, "Please download %s\n", wav_filename);
+    return -1;
+  }
+
+  const char *vad_filename;
+  int32_t use_silero_vad = 0;
+  int32_t use_ten_vad = 0;
+
+  if (SherpaOnnxFileExists("./silero_vad.onnx")) {
+    printf("Use silero-vad\n");
+    vad_filename = "./silero_vad.onnx";
+    use_silero_vad = 1;
+  } else if (SherpaOnnxFileExists("./ten-vad.onnx")) {
+    printf("Use ten-vad\n");
+    vad_filename = "./ten-vad.onnx";
+    use_ten_vad = 1;
+  } else {
+    fprintf(stderr, "Please provide either silero_vad.onnx or ten-vad.onnx\n");
+    return -1;
+  }
   const char *preprocessor =
       "./sherpa-onnx-moonshine-tiny-en-int8/preprocess.onnx";
@@ -76,12 +101,22 @@ int32_t main() {
   SherpaOnnxVadModelConfig vadConfig;
   memset(&vadConfig, 0, sizeof(vadConfig));
-  vadConfig.silero_vad.model = vad_filename;
-  vadConfig.silero_vad.threshold = 0.5;
-  vadConfig.silero_vad.min_silence_duration = 0.5;
-  vadConfig.silero_vad.min_speech_duration = 0.5;
-  vadConfig.silero_vad.max_speech_duration = 10;
-  vadConfig.silero_vad.window_size = 512;
+  if (use_silero_vad) {
+    vadConfig.silero_vad.model = vad_filename;
+    vadConfig.silero_vad.threshold = 0.25;
+    vadConfig.silero_vad.min_silence_duration = 0.5;
+    vadConfig.silero_vad.min_speech_duration = 0.5;
+    vadConfig.silero_vad.max_speech_duration = 10;
+    vadConfig.silero_vad.window_size = 512;
+  } else if (use_ten_vad) {
+    vadConfig.ten_vad.model = vad_filename;
+    vadConfig.ten_vad.threshold = 0.25;
+    vadConfig.ten_vad.min_silence_duration = 0.5;
+    vadConfig.ten_vad.min_speech_duration = 0.5;
+    vadConfig.ten_vad.max_speech_duration = 10;
+    vadConfig.ten_vad.window_size = 256;
+  }
+
   vadConfig.sample_rate = 16000;
   vadConfig.num_threads = 1;
   vadConfig.debug = 1;
@@ -96,7 +131,9 @@ int32_t main() {
     return -1;
   }
-  int32_t window_size = vadConfig.silero_vad.window_size;
+  int32_t window_size = use_silero_vad ? vadConfig.silero_vad.window_size
+                                       : vadConfig.ten_vad.window_size;
+
   int32_t i = 0;
   int is_eof = 0;
--- a/c-api-examples/vad-sense-voice-c-api.c
查看文件 @ceb1bc5
+++ b/c-api-examples/vad-sense-voice-c-api.c
查看文件 @ceb1bc5
@@ -6,7 +6,12 @@
 // This file demonstrates how to use VAD + SenseVoice with sherpa-onnx's C API.
 // clang-format off
 //
-// wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx
+// To use silero-vad:
+//  wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx
+//
+// To use ten-vad:
+//  wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/ten-vad.onnx
+//
 // wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/lei-jun-test.wav
 //
 // wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17.tar.bz2
@@ -23,7 +28,28 @@
 int32_t main() {
   const char *wav_filename = "./lei-jun-test.wav";
-  const char *vad_filename = "./silero_vad.onnx";
+  if (!SherpaOnnxFileExists(wav_filename)) {
+    fprintf(stderr, "Please download %s\n", wav_filename);
+    return -1;
+  }
+
+  const char *vad_filename;
+  int32_t use_silero_vad = 0;
+  int32_t use_ten_vad = 0;
+
+  if (SherpaOnnxFileExists("./silero_vad.onnx")) {
+    printf("Use silero-vad\n");
+    vad_filename = "./silero_vad.onnx";
+    use_silero_vad = 1;
+  } else if (SherpaOnnxFileExists("./ten-vad.onnx")) {
+    printf("Use ten-vad\n");
+    vad_filename = "./ten-vad.onnx";
+    use_ten_vad = 1;
+  } else {
+    fprintf(stderr, "Please provide either silero_vad.onnx or ten-vad.onnx\n");
+    return -1;
+  }
+
   const char *model_filename =
       "./sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/model.int8.onnx";
   const char *tokens_filename =
@@ -77,12 +103,23 @@ int32_t main() {
   SherpaOnnxVadModelConfig vadConfig;
   memset(&vadConfig, 0, sizeof(vadConfig));
-  vadConfig.silero_vad.model = vad_filename;
-  vadConfig.silero_vad.threshold = 0.5;
-  vadConfig.silero_vad.min_silence_duration = 0.5;
-  vadConfig.silero_vad.min_speech_duration = 0.5;
-  vadConfig.silero_vad.max_speech_duration = 5;
-  vadConfig.silero_vad.window_size = 512;
+
+  if (use_silero_vad) {
+    vadConfig.silero_vad.model = vad_filename;
+    vadConfig.silero_vad.threshold = 0.25;
+    vadConfig.silero_vad.min_silence_duration = 0.5;
+    vadConfig.silero_vad.min_speech_duration = 0.5;
+    vadConfig.silero_vad.max_speech_duration = 10;
+    vadConfig.silero_vad.window_size = 512;
+  } else if (use_ten_vad) {
+    vadConfig.ten_vad.model = vad_filename;
+    vadConfig.ten_vad.threshold = 0.25;
+    vadConfig.ten_vad.min_silence_duration = 0.5;
+    vadConfig.ten_vad.min_speech_duration = 0.5;
+    vadConfig.ten_vad.max_speech_duration = 10;
+    vadConfig.ten_vad.window_size = 256;
+  }
+
   vadConfig.sample_rate = 16000;
   vadConfig.num_threads = 1;
   vadConfig.debug = 1;
@@ -97,7 +134,8 @@ int32_t main() {
     return -1;
   }
-  int32_t window_size = vadConfig.silero_vad.window_size;
+  int32_t window_size = use_silero_vad ? vadConfig.silero_vad.window_size
+                                       : vadConfig.ten_vad.window_size;
   int32_t i = 0;
   int is_eof = 0;
--- a/c-api-examples/vad-whisper-c-api.c
查看文件 @ceb1bc5
+++ b/c-api-examples/vad-whisper-c-api.c
查看文件 @ceb1bc5
@@ -8,7 +8,12 @@
 //
 // clang-format off
 //
-// wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx
+// To use silero-vad:
+//  wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx
+//
+// To use ten-vad:
+//  wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/ten-vad.onnx
+//
 // wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/Obama.wav
 //
 // wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-whisper-tiny.en.tar.bz2
@@ -25,7 +30,28 @@
 int32_t main() {
   const char *wav_filename = "./Obama.wav";
-  const char *vad_filename = "./silero_vad.onnx";
+
+  if (!SherpaOnnxFileExists(wav_filename)) {
+    fprintf(stderr, "Please download %s\n", wav_filename);
+    return -1;
+  }
+
+  const char *vad_filename;
+  int32_t use_silero_vad = 0;
+  int32_t use_ten_vad = 0;
+
+  if (SherpaOnnxFileExists("./silero_vad.onnx")) {
+    printf("Use silero-vad\n");
+    vad_filename = "./silero_vad.onnx";
+    use_silero_vad = 1;
+  } else if (SherpaOnnxFileExists("./ten-vad.onnx")) {
+    printf("Use ten-vad\n");
+    vad_filename = "./ten-vad.onnx";
+    use_ten_vad = 1;
+  } else {
+    fprintf(stderr, "Please provide either silero_vad.onnx or ten-vad.onnx\n");
+    return -1;
+  }
   const char *encoder = "sherpa-onnx-whisper-tiny.en/tiny.en-encoder.int8.onnx";
   const char *decoder = "sherpa-onnx-whisper-tiny.en/tiny.en-decoder.int8.onnx";
@@ -74,12 +100,23 @@ int32_t main() {
   SherpaOnnxVadModelConfig vadConfig;
   memset(&vadConfig, 0, sizeof(vadConfig));
-  vadConfig.silero_vad.model = vad_filename;
-  vadConfig.silero_vad.threshold = 0.5;
-  vadConfig.silero_vad.min_silence_duration = 0.5;
-  vadConfig.silero_vad.min_speech_duration = 0.5;
-  vadConfig.silero_vad.max_speech_duration = 10;
-  vadConfig.silero_vad.window_size = 512;
+
+  if (use_silero_vad) {
+    vadConfig.silero_vad.model = vad_filename;
+    vadConfig.silero_vad.threshold = 0.25;
+    vadConfig.silero_vad.min_silence_duration = 0.5;
+    vadConfig.silero_vad.min_speech_duration = 0.5;
+    vadConfig.silero_vad.max_speech_duration = 10;
+    vadConfig.silero_vad.window_size = 512;
+  } else if (use_ten_vad) {
+    vadConfig.ten_vad.model = vad_filename;
+    vadConfig.ten_vad.threshold = 0.25;
+    vadConfig.ten_vad.min_silence_duration = 0.5;
+    vadConfig.ten_vad.min_speech_duration = 0.5;
+    vadConfig.ten_vad.max_speech_duration = 10;
+    vadConfig.ten_vad.window_size = 256;
+  }
+
   vadConfig.sample_rate = 16000;
   vadConfig.num_threads = 1;
   vadConfig.debug = 1;
@@ -94,7 +131,8 @@ int32_t main() {
     return -1;
   }
-  int32_t window_size = vadConfig.silero_vad.window_size;
+  int32_t window_size = use_silero_vad ? vadConfig.silero_vad.window_size
+                                       : vadConfig.ten_vad.window_size;
   int32_t i = 0;
   int is_eof = 0;
--- a/sherpa-onnx/c-api/c-api.cc
查看文件 @ceb1bc5
+++ b/sherpa-onnx/c-api/c-api.cc
查看文件 @ceb1bc5
@@ -1033,6 +1033,21 @@ sherpa_onnx::VadModelConfig GetVadModelConfig(
   vad_config.silero_vad.max_speech_duration =
       SHERPA_ONNX_OR(config->silero_vad.max_speech_duration, 20);
+  vad_config.ten_vad.model = SHERPA_ONNX_OR(config->ten_vad.model, "");
+  vad_config.ten_vad.threshold = SHERPA_ONNX_OR(config->ten_vad.threshold, 0.5);
+
+  vad_config.ten_vad.min_silence_duration =
+      SHERPA_ONNX_OR(config->ten_vad.min_silence_duration, 0.5);
+
+  vad_config.ten_vad.min_speech_duration =
+      SHERPA_ONNX_OR(config->ten_vad.min_speech_duration, 0.25);
+
+  vad_config.ten_vad.window_size =
+      SHERPA_ONNX_OR(config->ten_vad.window_size, 256);
+
+  vad_config.ten_vad.max_speech_duration =
+      SHERPA_ONNX_OR(config->ten_vad.max_speech_duration, 20);
+
   vad_config.sample_rate = SHERPA_ONNX_OR(config->sample_rate, 16000);
   vad_config.num_threads = SHERPA_ONNX_OR(config->num_threads, 1);
   vad_config.provider = SHERPA_ONNX_OR(config->provider, "cpu");
--- a/sherpa-onnx/c-api/c-api.h
查看文件 @ceb1bc5
+++ b/sherpa-onnx/c-api/c-api.h
查看文件 @ceb1bc5
@@ -71,6 +71,9 @@ SHERPA_ONNX_API const char *SherpaOnnxGetGitSha1();
 // Example return value: "Fri Jun 20 11:22:52 2025"
 SHERPA_ONNX_API const char *SherpaOnnxGetGitDate();
+// return 1 if the given file exists; return 0 otherwise
+SHERPA_ONNX_API int32_t SherpaOnnxFileExists(const char *filename);
+
 /// Please refer to
 /// https://k2-fsa.github.io/sherpa/onnx/pretrained_models/index.html
 /// to download pre-trained models. That is, you can find encoder-xxx.onnx
@@ -845,6 +848,30 @@ SHERPA_ONNX_API typedef struct SherpaOnnxSileroVadModelConfig {
   float max_speech_duration;
 } SherpaOnnxSileroVadModelConfig;
+SHERPA_ONNX_API typedef struct SherpaOnnxTenVadModelConfig {
+  // Path to the ten-vad model
+  const char *model;
+
+  // threshold to classify a segment as speech
+  //
+  // If the predicted probability of a segment is larger than this
+  // value, then it is classified as speech.
+  float threshold;
+
+  // in seconds
+  float min_silence_duration;
+
+  // in seconds
+  float min_speech_duration;
+
+  int32_t window_size;
+
+  // If a speech segment is longer than this value, then we increase
+  // the threshold to 0.9. After finishing detecting the segment,
+  // the threshold value is reset to its original value.
+  float max_speech_duration;
+} SherpaOnnxTenVadModelConfig;
+
 SHERPA_ONNX_API typedef struct SherpaOnnxVadModelConfig {
   SherpaOnnxSileroVadModelConfig silero_vad;
@@ -852,6 +879,7 @@ SHERPA_ONNX_API typedef struct SherpaOnnxVadModelConfig {
   int32_t num_threads;
   const char *provider;
   int32_t debug;
+  SherpaOnnxTenVadModelConfig ten_vad;
 } SherpaOnnxVadModelConfig;
 SHERPA_ONNX_API typedef struct SherpaOnnxCircularBuffer
@@ -1567,9 +1595,6 @@ SHERPA_ONNX_API int32_t SherpaOnnxLinearResamplerResampleGetInputSampleRate(
 SHERPA_ONNX_API int32_t SherpaOnnxLinearResamplerResampleGetOutputSampleRate(
     const SherpaOnnxLinearResampler *p);
-// Return 1 if the file exists; return 0 if the file does not exist.
-SHERPA_ONNX_API int32_t SherpaOnnxFileExists(const char *filename);
-
 // =========================================================================
 // For offline speaker diarization (i.e., non-streaming speaker diarization)
 // =========================================================================
--- a/sherpa-onnx/c-api/cxx-api.cc
查看文件 @ceb1bc5
+++ b/sherpa-onnx/c-api/cxx-api.cc
查看文件 @ceb1bc5
@@ -655,6 +655,13 @@ VoiceActivityDetector VoiceActivityDetector::Create(
   c.silero_vad.window_size = config.silero_vad.window_size;
   c.silero_vad.max_speech_duration = config.silero_vad.max_speech_duration;
+  c.ten_vad.model = config.ten_vad.model.c_str();
+  c.ten_vad.threshold = config.ten_vad.threshold;
+  c.ten_vad.min_silence_duration = config.ten_vad.min_silence_duration;
+  c.ten_vad.min_speech_duration = config.ten_vad.min_speech_duration;
+  c.ten_vad.window_size = config.ten_vad.window_size;
+  c.ten_vad.max_speech_duration = config.ten_vad.max_speech_duration;
+
   c.sample_rate = config.sample_rate;
   c.num_threads = config.num_threads;
   c.provider = config.provider.c_str();
@@ -758,4 +765,8 @@ std::string GetGitSha1() { return SherpaOnnxGetGitSha1(); }
 std::string GetGitDate() { return SherpaOnnxGetGitDate(); }
+bool FileExists(const std::string &filename) {
+  return SherpaOnnxFileExists(filename.c_str());
+}
+
 }  // namespace sherpa_onnx::cxx
--- a/sherpa-onnx/c-api/cxx-api.h
查看文件 @ceb1bc5
+++ b/sherpa-onnx/c-api/cxx-api.h
查看文件 @ceb1bc5
@@ -552,8 +552,18 @@ struct SileroVadModelConfig {
   float max_speech_duration = 20;
 };
+struct TenVadModelConfig {
+  std::string model;
+  float threshold = 0.5;
+  float min_silence_duration = 0.5;
+  float min_speech_duration = 0.25;
+  int32_t window_size = 256;
+  float max_speech_duration = 20;
+};
+
 struct VadModelConfig {
   SileroVadModelConfig silero_vad;
+  TenVadModelConfig ten_vad;
   int32_t sample_rate = 16000;
   int32_t num_threads = 1;
@@ -642,6 +652,7 @@ class SHERPA_ONNX_API LinearResampler
 std::string GetVersionStr();
 std::string GetGitSha1();
 std::string GetGitDate();
+bool FileExists(const std::string &filename);
 }  // namespace sherpa_onnx::cxx
--- a/sherpa-onnx/csrc/ten-vad-model.cc
查看文件 @ceb1bc5
+++ b/sherpa-onnx/csrc/ten-vad-model.cc
查看文件 @ceb1bc5
@@ -321,7 +321,7 @@ class TenVadModel::Impl {
   static void LogMel(const float *in, int32_t n, float *out) {
     for (int32_t i = 0; i != n; ++i) {
       // 20.79441541679836 is log(32768*32768)
-      out[i] = logf(in[i] + 1e-10) - 20.79441541679836f;
+      out[i] = logf(in[i] + 1e-10f) - 20.79441541679836f;
     }
   }