Fangjun Kuang
Committed by GitHub

Add C# API for ten-vad (#2385)

1 #!/usr/bin/env bash 1 #!/usr/bin/env bash
2 2
  3 +set -ex
  4 +
3 cd dotnet-examples/ 5 cd dotnet-examples/
4 6
5 cd ./version-test 7 cd ./version-test
6 ./run.sh 8 ./run.sh
7 ls -lh 9 ls -lh
8 10
  11 +cd ../vad-non-streaming-asr-paraformer
  12 +./run-ten-vad.sh
  13 +rm -fv *.onnx
  14 +
  15 +./run.sh
  16 +rm -fv *.onnx
  17 +
9 cd ../non-streaming-canary-decode-files 18 cd ../non-streaming-canary-decode-files
10 ./run.sh 19 ./run.sh
11 ls -lh 20 ls -lh
@@ -106,9 +115,6 @@ rm -rf sherpa-onnx-* @@ -106,9 +115,6 @@ rm -rf sherpa-onnx-*
106 ./run-paraformer.sh 115 ./run-paraformer.sh
107 rm -rf sherpa-onnx-* 116 rm -rf sherpa-onnx-*
108 117
109 -cd ../vad-non-streaming-asr-paraformer  
110 -./run.sh  
111 -  
112 cd ../offline-punctuation 118 cd ../offline-punctuation
113 ./run.sh 119 ./run.sh
114 rm -rf sherpa-onnx-* 120 rm -rf sherpa-onnx-*
@@ -102,6 +102,7 @@ jobs: @@ -102,6 +102,7 @@ jobs:
102 df -h 102 df -h
103 103
104 - name: Free space 104 - name: Free space
  105 + if: false
105 shell: bash 106 shell: bash
106 run: | 107 run: |
107 df -h 108 df -h
@@ -109,6 +110,7 @@ jobs: @@ -109,6 +110,7 @@ jobs:
109 df -h 110 df -h
110 111
111 - name: Free more space 112 - name: Free more space
  113 + if: false
112 shell: bash 114 shell: bash
113 run: | 115 run: |
114 # https://github.com/orgs/community/discussions/25678 116 # https://github.com/orgs/community/discussions/25678
@@ -120,6 +122,7 @@ jobs: @@ -120,6 +122,7 @@ jobs:
120 sudo rm -rf "$AGENT_TOOLSDIRECTORY" 122 sudo rm -rf "$AGENT_TOOLSDIRECTORY"
121 123
122 - name: Free Disk Space (Ubuntu) 124 - name: Free Disk Space (Ubuntu)
  125 + if: false
123 uses: jlumbroso/free-disk-space@main 126 uses: jlumbroso/free-disk-space@main
124 with: 127 with:
125 # this might remove tools that are actually needed, 128 # this might remove tools that are actually needed,
@@ -136,6 +139,7 @@ jobs: @@ -136,6 +139,7 @@ jobs:
136 swap-storage: true 139 swap-storage: true
137 140
138 - name: Check space 141 - name: Check space
  142 + if: false
139 shell: bash 143 shell: bash
140 run: | 144 run: |
141 df -h 145 df -h
1 // Copyright (c) 2024 Xiaomi Corporation 1 // Copyright (c) 2024 Xiaomi Corporation
2 // 2 //
3 -// This file shows how to use a silero_vad model with a non-streaming Paraformer  
4 -// for speech recognition. 3 +// This file shows how to use a silero_vad model or ten-vad model
  4 +// with a non-streaming Paraformer for speech recognition.
5 using SherpaOnnx; 5 using SherpaOnnx;
  6 +using System.IO;
  7 +
6 8
7 class VadNonStreamingAsrParaformer 9 class VadNonStreamingAsrParaformer
8 { 10 {
@@ -17,7 +19,31 @@ class VadNonStreamingAsrParaformer @@ -17,7 +19,31 @@ class VadNonStreamingAsrParaformer
17 var recognizer = new OfflineRecognizer(config); 19 var recognizer = new OfflineRecognizer(config);
18 20
19 var vadModelConfig = new VadModelConfig(); 21 var vadModelConfig = new VadModelConfig();
20 - vadModelConfig.SileroVad.Model = "./silero_vad.onnx"; 22 + if (File.Exists("./silero_vad.onnx"))
  23 + {
  24 + Console.WriteLine("Use silero-vad");
  25 + vadModelConfig.SileroVad.Model = "./silero_vad.onnx";
  26 + vadModelConfig.SileroVad.Threshold = 0.3F;
  27 + vadModelConfig.SileroVad.MinSilenceDuration = 0.5F;
  28 + vadModelConfig.SileroVad.MinSpeechDuration = 0.25F;
  29 + vadModelConfig.SileroVad.MaxSpeechDuration = 5.0F;
  30 + vadModelConfig.SileroVad.WindowSize = 512;
  31 + }
  32 + else if (File.Exists("./ten-vad.onnx"))
  33 + {
  34 + Console.WriteLine("Use ten-vad");
  35 + vadModelConfig.TenVad.Model = "./ten-vad.onnx";
  36 + vadModelConfig.TenVad.Threshold = 0.3F;
  37 + vadModelConfig.TenVad.MinSilenceDuration = 0.5F;
  38 + vadModelConfig.TenVad.MinSpeechDuration = 0.25F;
  39 + vadModelConfig.TenVad.MaxSpeechDuration = 5.0F;
  40 + vadModelConfig.TenVad.WindowSize = 256;
  41 + }
  42 + else
  43 + {
  44 + Console.WriteLine("Please download ./silero_vad.onnx or ./ten-vad.onnx");
  45 + return;
  46 + }
21 vadModelConfig.Debug = 0; 47 vadModelConfig.Debug = 0;
22 48
23 var vad = new VoiceActivityDetector(vadModelConfig, 60); 49 var vad = new VoiceActivityDetector(vadModelConfig, 60);
@@ -27,6 +53,12 @@ class VadNonStreamingAsrParaformer @@ -27,6 +53,12 @@ class VadNonStreamingAsrParaformer
27 53
28 int numSamples = reader.Samples.Length; 54 int numSamples = reader.Samples.Length;
29 int windowSize = vadModelConfig.SileroVad.WindowSize; 55 int windowSize = vadModelConfig.SileroVad.WindowSize;
  56 +
  57 + if (vadModelConfig.TenVad.Model != "")
  58 + {
  59 + windowSize = vadModelConfig.TenVad.WindowSize;
  60 + }
  61 +
30 int sampleRate = vadModelConfig.SampleRate; 62 int sampleRate = vadModelConfig.SampleRate;
31 int numIter = numSamples / windowSize; 63 int numIter = numSamples / windowSize;
32 64
  1 +#!/usr/bin/env bash
  2 +
  3 +set -ex
  4 +
  5 +if [ ! -f ./ten-vad.onnx ]; then
  6 + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/ten-vad.onnx
  7 +fi
  8 +
  9 +if [ ! -f ./lei-jun-test.wav ]; then
  10 + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/lei-jun-test.wav
  11 +fi
  12 +
  13 +if [ ! -f ./sherpa-onnx-paraformer-zh-2023-09-14/tokens.txt ]; then
  14 + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-paraformer-zh-2023-09-14.tar.bz2
  15 +
  16 + tar xvf sherpa-onnx-paraformer-zh-2023-09-14.tar.bz2
  17 + rm sherpa-onnx-paraformer-zh-2023-09-14.tar.bz2
  18 +fi
  19 +
  20 +dotnet run
  1 +/// Copyright (c) 2025 Xiaomi Corporation (authors: Fangjun Kuang)
  2 +
  3 +using System.Runtime.InteropServices;
  4 +
  5 +namespace SherpaOnnx
  6 +{
  7 + [StructLayout(LayoutKind.Sequential)]
  8 + public struct TenVadModelConfig
  9 + {
  10 + public TenVadModelConfig()
  11 + {
  12 + Model = "";
  13 + Threshold = 0.5F;
  14 + MinSilenceDuration = 0.5F;
  15 + MinSpeechDuration = 0.25F;
  16 + WindowSize = 256;
  17 + MaxSpeechDuration = 5.0F;
  18 + }
  19 +
  20 + [MarshalAs(UnmanagedType.LPStr)]
  21 + public string Model;
  22 +
  23 + public float Threshold;
  24 +
  25 + public float MinSilenceDuration;
  26 +
  27 + public float MinSpeechDuration;
  28 +
  29 + public int WindowSize;
  30 +
  31 + public float MaxSpeechDuration;
  32 + }
  33 +}
@@ -14,6 +14,7 @@ namespace SherpaOnnx @@ -14,6 +14,7 @@ namespace SherpaOnnx
14 NumThreads = 1; 14 NumThreads = 1;
15 Provider = "cpu"; 15 Provider = "cpu";
16 Debug = 0; 16 Debug = 0;
  17 + TenVad = new TenVadModelConfig();
17 } 18 }
18 19
19 public SileroVadModelConfig SileroVad; 20 public SileroVadModelConfig SileroVad;
@@ -26,6 +27,8 @@ namespace SherpaOnnx @@ -26,6 +27,8 @@ namespace SherpaOnnx
26 public string Provider; 27 public string Provider;
27 28
28 public int Debug; 29 public int Debug;
  30 +
  31 + public TenVadModelConfig TenVad;
29 } 32 }
30 } 33 }
31 34