Add Flush to VAD so that the last segment can be detected. (#1099)

Fangjun Kuang · GitHub
Commit c2cc9dec5866e00a1464554f382668f2887de70c c2cc9dec 1 parent 3e4307e2
.github/workflows/dot-net.yaml
CHANGELOG.md
CMakeLists.txt
dart-api-examples/non-streaming-asr/bin/vad-with-paraformer.dart
dart-api-examples/non-streaming-asr/pubspec.yaml
dart-api-examples/streaming-asr/pubspec.yaml
dart-api-examples/tts/pubspec.yaml
dart-api-examples/vad/bin/vad.dart
dart-api-examples/vad/pubspec.yaml
dotnet-examples/vad-non-streaming-asr-paraformer/Program.cs
flutter-examples/streaming_asr/pubspec.yaml
flutter-examples/tts/pubspec.yaml
flutter/sherpa_onnx/lib/src/sherpa_onnx_bindings.dart
flutter/sherpa_onnx/lib/src/vad.dart
flutter/sherpa_onnx/pubspec.yaml
flutter/sherpa_onnx_ios/ios/sherpa_onnx_ios.podspec
flutter/sherpa_onnx_macos/macos/sherpa_onnx_macos.podspec
java-api-examples/VadNonStreamingParaformer.java
java-api-examples/VadRemoveSilence.java
nodejs-addon-examples/package.json
--- a/.github/workflows/dot-net.yaml
查看文件 @c2cc9de
+++ b/.github/workflows/dot-net.yaml
查看文件 @c2cc9de
@@ -52,11 +52,6 @@ jobs:
           cmake --build . --target install --config Release
           rm -rf install/pkgconfig
-      - uses: actions/upload-artifact@v4
-        with:
-          name: windows-${{ matrix.arch }}
-          path: ./build/install/lib/
-
       - name: Create tar file
         shell: bash
         run: |
@@ -72,6 +67,11 @@ jobs:
           ls -lh *.tar.bz2
           mv *.tar.bz2 ../
+      - uses: actions/upload-artifact@v4
+        with:
+          name: windows-${{ matrix.arch }}
+          path: ./*.tar.bz2
+
       # https://huggingface.co/docs/hub/spaces-github-actions
       - name: Publish to huggingface
         if: (github.repository_owner == 'csukuangfj' || github.repository_owner == 'k2-fsa') && (github.event_name == 'push' || github.event_name == 'workflow_dispatch')
@@ -88,7 +88,9 @@ jobs:
             rm -rf huggingface
             export GIT_CLONE_PROTECTION_ACTIVE=false
-            GIT_LFS_SKIP_SMUDGE=1 git clone https://huggingface.co/csukuangfj/sherpa-onnx-libs huggingface
+            export GIT_LFS_SKIP_SMUDGE=1
+
+            git clone https://huggingface.co/csukuangfj/sherpa-onnx-libs huggingface
             cd huggingface
             mkdir -p windows-for-dotnet
--- a/CHANGELOG.md
查看文件 @c2cc9de
+++ b/CHANGELOG.md
查看文件 @c2cc9de
+## 1.10.12
+
+* Add Flush to VAD so that the last speech segment can be detected. See also
+  https://github.com/k2-fsa/sherpa-onnx/discussions/1077#discussioncomment-9979740
+
 ## 1.10.11
 * Support the iOS platform for iOS.
--- a/CMakeLists.txt
查看文件 @c2cc9de
+++ b/CMakeLists.txt
查看文件 @c2cc9de
@@ -10,8 +10,8 @@ project(sherpa-onnx)
 # Remember to update
 # ./nodejs-addon-examples
 # ./dart-api-examples/
-# ./sherpa-onnx/flutter/CHANGELOG.md
-set(SHERPA_ONNX_VERSION "1.10.11")
+# ./CHANGELOG.md
+set(SHERPA_ONNX_VERSION "1.10.12")
 # Disable warning about
 #
--- a/dart-api-examples/non-streaming-asr/bin/vad-with-paraformer.dart
查看文件 @c2cc9de
+++ b/dart-api-examples/non-streaming-asr/bin/vad-with-paraformer.dart
查看文件 @c2cc9de
@@ -93,6 +93,28 @@ void main(List<String> arguments) async {
     }
   }
+  vad.flush();
+  while (!vad.isEmpty()) {
+    final stream = recognizer.createStream();
+    final segment = vad.front();
+    stream.acceptWaveform(
+        samples: segment.samples, sampleRate: waveData.sampleRate);
+    recognizer.decode(stream);
+
+    final result = recognizer.getResult(stream);
+
+    final startTime = segment.start * 1.0 / waveData.sampleRate;
+    final duration = segment.samples.length * 1.0 / waveData.sampleRate;
+    final stopTime = startTime + duration;
+    if (result.text != '') {
+      print(
+          '${startTime.toStringAsPrecision(4)} -- ${stopTime.toStringAsPrecision(4)}: ${result.text}');
+    }
+
+    stream.free();
+    vad.pop();
+  }
+
   vad.free();
   recognizer.free();
 }
--- a/dart-api-examples/non-streaming-asr/pubspec.yaml
查看文件 @c2cc9de
+++ b/dart-api-examples/non-streaming-asr/pubspec.yaml
查看文件 @c2cc9de
@@ -10,7 +10,7 @@ environment:
 # Add regular dependencies here.
 dependencies:
-  sherpa_onnx: ^1.10.11
+  sherpa_onnx: ^1.10.12
   path: ^1.9.0
   args: ^2.5.0
--- a/dart-api-examples/streaming-asr/pubspec.yaml
查看文件 @c2cc9de
+++ b/dart-api-examples/streaming-asr/pubspec.yaml
查看文件 @c2cc9de
@@ -11,7 +11,7 @@ environment:
 # Add regular dependencies here.
 dependencies:
-  sherpa_onnx: ^1.10.11
+  sherpa_onnx: ^1.10.12
   path: ^1.9.0
   args: ^2.5.0
--- a/dart-api-examples/tts/pubspec.yaml
查看文件 @c2cc9de
+++ b/dart-api-examples/tts/pubspec.yaml
查看文件 @c2cc9de
@@ -8,7 +8,7 @@ environment:
 # Add regular dependencies here.
 dependencies:
-  sherpa_onnx: ^1.10.11
+  sherpa_onnx: ^1.10.12
   path: ^1.9.0
   args: ^2.5.0
--- a/dart-api-examples/vad/bin/vad.dart
查看文件 @c2cc9de
+++ b/dart-api-examples/vad/bin/vad.dart
查看文件 @c2cc9de
@@ -65,6 +65,12 @@ void main(List<String> arguments) async {
     }
   }
+  vad.flush();
+  while (!vad.isEmpty()) {
+    allSamples.add(vad.front().samples);
+    vad.pop();
+  }
+
   vad.free();
   final s = Float32List.fromList(allSamples.expand((x) => x).toList());
--- a/dart-api-examples/vad/pubspec.yaml
查看文件 @c2cc9de
+++ b/dart-api-examples/vad/pubspec.yaml
查看文件 @c2cc9de
@@ -9,7 +9,7 @@ environment:
   sdk: ^3.4.0
 dependencies:
-  sherpa_onnx: ^1.10.11
+  sherpa_onnx: ^1.10.12
   path: ^1.9.0
   args: ^2.5.0
--- a/dotnet-examples/vad-non-streaming-asr-paraformer/Program.cs
查看文件 @c2cc9de
+++ b/dotnet-examples/vad-non-streaming-asr-paraformer/Program.cs
查看文件 @c2cc9de
@@ -57,6 +57,26 @@ class VadNonStreamingAsrParaformer
         }
       }
     }
+
+    vad.Flush();
+
+    while (!vad.IsEmpty()) {
+      SpeechSegment segment = vad.Front();
+      float startTime = segment.Start / (float)sampleRate;
+      float duration = segment.Samples.Length / (float)sampleRate;
+
+      OfflineStream stream = recognizer.CreateStream();
+      stream.AcceptWaveform(sampleRate, segment.Samples);
+      recognizer.Decode(stream);
+      String text = stream.Result.Text;
+
+      if (!String.IsNullOrEmpty(text)) {
+        Console.WriteLine("{0}--{1}: {2}", String.Format("{0:0.00}", startTime),
+            String.Format("{0:0.00}", startTime+duration), text);
+      }
+
+      vad.Pop();
+    }
   }
 }
--- a/flutter-examples/streaming_asr/pubspec.yaml
查看文件 @c2cc9de
+++ b/flutter-examples/streaming_asr/pubspec.yaml
查看文件 @c2cc9de
@@ -5,7 +5,7 @@ description: >
 publish_to: 'none'
-version: 1.10.11
+version: 1.10.12
 topics:
   - speech-recognition
@@ -30,7 +30,7 @@ dependencies:
   record: ^5.1.0
   url_launcher: ^6.2.6
-  sherpa_onnx: ^1.10.11
+  sherpa_onnx: ^1.10.12
   # sherpa_onnx:
     # path: ../../flutter/sherpa_onnx
--- a/flutter-examples/tts/pubspec.yaml
查看文件 @c2cc9de
+++ b/flutter-examples/tts/pubspec.yaml
查看文件 @c2cc9de
@@ -17,7 +17,7 @@ dependencies:
   cupertino_icons: ^1.0.6
   path_provider: ^2.1.3
   path: ^1.9.0
-  sherpa_onnx: ^1.10.11
+  sherpa_onnx: ^1.10.12
   url_launcher: ^6.2.6
   audioplayers: ^5.0.0
--- a/flutter/sherpa_onnx/lib/src/sherpa_onnx_bindings.dart
查看文件 @c2cc9de
+++ b/flutter/sherpa_onnx/lib/src/sherpa_onnx_bindings.dart
查看文件 @c2cc9de
@@ -491,6 +491,12 @@ typedef SherpaOnnxVoiceActivityDetectorResetNative = Void Function(
 typedef SherpaOnnxVoiceActivityDetectorReset = void Function(
     Pointer<SherpaOnnxVoiceActivityDetector>);
+typedef SherpaOnnxVoiceActivityDetectorFlushNative = Void Function(
+    Pointer<SherpaOnnxVoiceActivityDetector>);
+
+typedef SherpaOnnxVoiceActivityDetectorFlush = void Function(
+    Pointer<SherpaOnnxVoiceActivityDetector>);
+
 typedef SherpaOnnxVoiceActivityDetectorFrontNative
     = Pointer<SherpaOnnxSpeechSegment> Function(
         Pointer<SherpaOnnxVoiceActivityDetector>);
@@ -779,6 +785,8 @@ class SherpaOnnxBindings {
   static SherpaOnnxVoiceActivityDetectorReset? voiceActivityDetectorReset;
+  static SherpaOnnxVoiceActivityDetectorFlush? voiceActivityDetectorFlush;
+
   static SherpaOnnxCreateCircularBuffer? createCircularBuffer;
   static SherpaOnnxDestroyCircularBuffer? destroyCircularBuffer;
@@ -1036,6 +1044,11 @@ class SherpaOnnxBindings {
             'SherpaOnnxVoiceActivityDetectorReset')
         .asFunction();
+    voiceActivityDetectorFlush ??= dynamicLibrary
+        .lookup<NativeFunction<SherpaOnnxVoiceActivityDetectorFlushNative>>(
+            'SherpaOnnxVoiceActivityDetectorFlush')
+        .asFunction();
+
     createCircularBuffer ??= dynamicLibrary
         .lookup<NativeFunction<SherpaOnnxCreateCircularBufferNative>>(
             'SherpaOnnxCreateCircularBuffer')
--- a/flutter/sherpa_onnx/lib/src/vad.dart
查看文件 @c2cc9de
+++ b/flutter/sherpa_onnx/lib/src/vad.dart
查看文件 @c2cc9de
@@ -207,6 +207,10 @@ class VoiceActivityDetector {
     SherpaOnnxBindings.voiceActivityDetectorReset?.call(ptr);
   }
+  void flush() {
+    SherpaOnnxBindings.voiceActivityDetectorFlush?.call(ptr);
+  }
+
   Pointer<SherpaOnnxVoiceActivityDetector> ptr;
   final VadModelConfig config;
 }
--- a/flutter/sherpa_onnx/pubspec.yaml
查看文件 @c2cc9de
+++ b/flutter/sherpa_onnx/pubspec.yaml
查看文件 @c2cc9de
@@ -17,7 +17,7 @@ topics:
   - voice-activity-detection
 # remember to change the version in ../sherpa_onnx_macos/macos/sherpa_onnx_macos.podspec
-version: 1.10.11
+version: 1.10.12
 homepage: https://github.com/k2-fsa/sherpa-onnx
@@ -30,19 +30,19 @@ dependencies:
   flutter:
     sdk: flutter
-  sherpa_onnx_android: ^1.10.11
+  sherpa_onnx_android: ^1.10.12
     # path: ../sherpa_onnx_android
-  sherpa_onnx_macos: ^1.10.11
+  sherpa_onnx_macos: ^1.10.12
     # path: ../sherpa_onnx_macos
-  sherpa_onnx_linux: ^1.10.11
+  sherpa_onnx_linux: ^1.10.12
     # path: ../sherpa_onnx_linux
     #
-  sherpa_onnx_windows: ^1.10.11
+  sherpa_onnx_windows: ^1.10.12
     # path: ../sherpa_onnx_windows
-  sherpa_onnx_ios: ^1.10.11
+  sherpa_onnx_ios: ^1.10.12
   # sherpa_onnx_ios:
     # path: ../sherpa_onnx_ios
--- a/flutter/sherpa_onnx_ios/ios/sherpa_onnx_ios.podspec
查看文件 @c2cc9de
+++ b/flutter/sherpa_onnx_ios/ios/sherpa_onnx_ios.podspec
查看文件 @c2cc9de
@@ -7,7 +7,7 @@
 # https://groups.google.com/g/dart-ffi/c/nUATMBy7r0c
 Pod::Spec.new do |s|
   s.name             = 'sherpa_onnx_ios'
-  s.version          = '1.10.11'
+  s.version          = '1.10.12'
   s.summary          = 'A new Flutter FFI plugin project.'
   s.description      = <<-DESC
 A new Flutter FFI plugin project.
--- a/flutter/sherpa_onnx_macos/macos/sherpa_onnx_macos.podspec
查看文件 @c2cc9de
+++ b/flutter/sherpa_onnx_macos/macos/sherpa_onnx_macos.podspec
查看文件 @c2cc9de
@@ -4,7 +4,7 @@
 #
 Pod::Spec.new do |s|
   s.name             = 'sherpa_onnx_macos'
-  s.version          = '1.10.11'
+  s.version          = '1.10.12'
   s.summary          = 'sherpa-onnx Flutter FFI plugin project.'
   s.description      = <<-DESC
 sherpa-onnx Flutter FFI plugin project.
--- a/java-api-examples/VadNonStreamingParaformer.java
查看文件 @c2cc9de
+++ b/java-api-examples/VadNonStreamingParaformer.java
查看文件 @c2cc9de
@@ -98,6 +98,25 @@ public class VadNonStreamingParaformer {
       }
     }
+    vad.flush();
+    while (!vad.empty()) {
+      SpeechSegment segment = vad.front();
+      float startTime = segment.getStart() / 16000.0f;
+      float duration = segment.getSamples().length / 16000.0f;
+
+      OfflineStream stream = recognizer.createStream();
+      stream.acceptWaveform(segment.getSamples(), 16000);
+      recognizer.decode(stream);
+      String text = recognizer.getResult(stream).getText();
+      stream.release();
+
+      if (!text.isEmpty()) {
+        System.out.printf("%.3f--%.3f: %s\n", startTime, startTime + duration, text);
+      }
+
+      vad.pop();
+    }
+
     vad.release();
     recognizer.release();
   }
--- a/java-api-examples/VadRemoveSilence.java
查看文件 @c2cc9de
+++ b/java-api-examples/VadRemoveSilence.java
查看文件 @c2cc9de
@@ -59,6 +59,16 @@ public class VadRemoveSilence {
       }
     }
+    vad.flush();
+    while (!vad.empty()) {
+
+      // if you want to get the starting time of this segment, you can use
+      /* float startTime = vad.front().getStart() / 16000.0f; */
+
+      segments.add(vad.front().getSamples());
+      vad.pop();
+    }
+
     // get total number of samples
     int n = 0;
     for (float[] s : segments) {
--- a/nodejs-addon-examples/package.json
查看文件 @c2cc9de
+++ b/nodejs-addon-examples/package.json
查看文件 @c2cc9de
 {
   "dependencies": {
-    "sherpa-onnx-node": "^1.10.6"
+    "sherpa-onnx-node": "^1.10.12"
   }
 }
--- a/python-api-examples/vad-remove-non-speech-segments-from-file.py
查看文件 @c2cc9de
+++ b/python-api-examples/vad-remove-non-speech-segments-from-file.py
查看文件 @c2cc9de
@@ -105,6 +105,12 @@ def main():
             speech_samples.extend(vad.front.samples)
             vad.pop()
+    vad.flush()
+
+    while not vad.empty():
+        speech_samples.extend(vad.front.samples)
+        vad.pop()
+
     speech_samples = np.array(speech_samples, dtype=np.float32)
     sf.write(args.output, speech_samples, samplerate=sample_rate)
--- a/scripts/dart/sherpa-onnx-pubspec.yaml
查看文件 @c2cc9de
+++ b/scripts/dart/sherpa-onnx-pubspec.yaml
查看文件 @c2cc9de
@@ -17,7 +17,7 @@ topics:
   - voice-activity-detection
 # remember to change the version in ../sherpa_onnx_macos/macos/sherpa_onnx.podspec
-version: 1.10.6
+version: 1.10.12
 homepage: https://github.com/k2-fsa/sherpa-onnx
--- a/scripts/dotnet/VoiceActivityDetector.cs
查看文件 @c2cc9de
+++ b/scripts/dotnet/VoiceActivityDetector.cs
查看文件 @c2cc9de
@@ -53,6 +53,11 @@ namespace SherpaOnnx
             SherpaOnnxVoiceActivityDetectorReset(_handle.Handle);
         }
+        public void Flush()
+        {
+            SherpaOnnxVoiceActivityDetectorFlush(_handle.Handle);
+        }
+
         public void Dispose()
         {
             Cleanup();
@@ -106,5 +111,7 @@ namespace SherpaOnnx
         [DllImport(Dll.Filename)]
         private static extern void SherpaOnnxVoiceActivityDetectorReset(IntPtr handle);
+        [DllImport(Dll.Filename)]
+        private static extern void SherpaOnnxVoiceActivityDetectorFlush(IntPtr handle);
     }
 }
--- a/scripts/go/sherpa_onnx.go
查看文件 @c2cc9de
+++ b/scripts/go/sherpa_onnx.go
查看文件 @c2cc9de
@@ -856,6 +856,10 @@ func (vad *VoiceActivityDetector) Reset() {
 	C.SherpaOnnxVoiceActivityDetectorReset(vad.impl)
 }
+func (vad *VoiceActivityDetector) Flush() {
+	C.SherpaOnnxVoiceActivityDetectorFlush(vad.impl)
+}
+
 // Spoken language identification
 type SpokenLanguageIdentificationWhisperConfig struct {
--- a/scripts/node-addon-api/lib/vad.js
查看文件 @c2cc9de
+++ b/scripts/node-addon-api/lib/vad.js
查看文件 @c2cc9de
@@ -29,7 +29,7 @@ class CircularBuffer {
   }
   reset() {
-    return addon.circularBufferReset(this.handle);
+    addon.circularBufferReset(this.handle);
   }
 }
@@ -79,7 +79,11 @@ config = {
   }
   reset() {
-    return addon.VoiceActivityDetectorResetWrapper(this.handle);
+    addon.VoiceActivityDetectorResetWrapper(this.handle);
+  }
+
+  flush() {
+    addon.VoiceActivityDetectorFlushWrapper(this.handle);
   }
 }
--- a/scripts/node-addon-api/src/vad.cc
查看文件 @c2cc9de
+++ b/scripts/node-addon-api/src/vad.cc
查看文件 @c2cc9de
@@ -590,6 +590,31 @@ static void VoiceActivityDetectorResetWrapper(const Napi::CallbackInfo &info) {
   SherpaOnnxVoiceActivityDetectorReset(vad);
 }
+static void VoiceActivityDetectorFlushWrapper(const Napi::CallbackInfo &info) {
+  Napi::Env env = info.Env();
+
+  if (info.Length() != 1) {
+    std::ostringstream os;
+    os << "Expect only 1 argument. Given: " << info.Length();
+
+    Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();
+
+    return;
+  }
+
+  if (!info[0].IsExternal()) {
+    Napi::TypeError::New(env, "Argument 0 should be a VAD pointer.")
+        .ThrowAsJavaScriptException();
+
+    return;
+  }
+
+  SherpaOnnxVoiceActivityDetector *vad =
+      info[0].As<Napi::External<SherpaOnnxVoiceActivityDetector>>().Data();
+
+  SherpaOnnxVoiceActivityDetectorFlush(vad);
+}
+
 void InitVad(Napi::Env env, Napi::Object exports) {
   exports.Set(Napi::String::New(env, "createCircularBuffer"),
               Napi::Function::New(env, CreateCircularBufferWrapper));
@@ -636,4 +661,7 @@ void InitVad(Napi::Env env, Napi::Object exports) {
   exports.Set(Napi::String::New(env, "voiceActivityDetectorReset"),
               Napi::Function::New(env, VoiceActivityDetectorResetWrapper));
+
+  exports.Set(Napi::String::New(env, "voiceActivityDetectorFlush"),
+              Napi::Function::New(env, VoiceActivityDetectorFlushWrapper));
 }
--- a/sherpa-onnx/c-api/c-api.cc
查看文件 @c2cc9de
+++ b/sherpa-onnx/c-api/c-api.cc
查看文件 @c2cc9de
@@ -876,6 +876,10 @@ void SherpaOnnxVoiceActivityDetectorReset(SherpaOnnxVoiceActivityDetector *p) {
   p->impl->Reset();
 }
+void SherpaOnnxVoiceActivityDetectorFlush(SherpaOnnxVoiceActivityDetector *p) {
+  p->impl->Flush();
+}
+
 #if SHERPA_ONNX_ENABLE_TTS == 1
 struct SherpaOnnxOfflineTts {
   std::unique_ptr<sherpa_onnx::OfflineTts> impl;
--- a/sherpa-onnx/c-api/c-api.h
查看文件 @c2cc9de
+++ b/sherpa-onnx/c-api/c-api.h
查看文件 @c2cc9de
@@ -815,6 +815,9 @@ SHERPA_ONNX_API void SherpaOnnxDestroySpeechSegment(
 SHERPA_ONNX_API void SherpaOnnxVoiceActivityDetectorReset(
     SherpaOnnxVoiceActivityDetector *p);
+SHERPA_ONNX_API void SherpaOnnxVoiceActivityDetectorFlush(
+    SherpaOnnxVoiceActivityDetector *p);
+
 // ============================================================
 // For offline Text-to-Speech (i.e., non-streaming TTS)
 // ============================================================
--- a/sherpa-onnx/csrc/voice-activity-detector.cc
查看文件 @c2cc9de
+++ b/sherpa-onnx/csrc/voice-activity-detector.cc
查看文件 @c2cc9de
@@ -118,6 +118,29 @@ class VoiceActivityDetector::Impl {
     start_ = -1;
   }
+  void Flush() {
+    if (start_ == -1 || buffer_.Size() == 0) {
+      return;
+    }
+
+    int32_t end = buffer_.Tail() - model_->MinSilenceDurationSamples();
+    if (end <= start_) {
+      return;
+    }
+
+    std::vector<float> s = buffer_.Get(start_, end - start_);
+
+    SpeechSegment segment;
+
+    segment.start = start_;
+    segment.samples = std::move(s);
+
+    segments_.push(std::move(segment));
+
+    buffer_.Pop(end - buffer_.Head());
+    start_ = -1;
+  }
+
   bool IsSpeechDetected() const { return start_ != -1; }
   const VadModelConfig &GetConfig() const { return config_; }
@@ -164,7 +187,9 @@ const SpeechSegment &VoiceActivityDetector::Front() const {
   return impl_->Front();
 }
-void VoiceActivityDetector::Reset() { impl_->Reset(); }
+void VoiceActivityDetector::Reset() const { impl_->Reset(); }
+
+void VoiceActivityDetector::Flush() const { impl_->Flush(); }
 bool VoiceActivityDetector::IsSpeechDetected() const {
   return impl_->IsSpeechDetected();
--- a/sherpa-onnx/csrc/voice-activity-detector.h
查看文件 @c2cc9de
+++ b/sherpa-onnx/csrc/voice-activity-detector.h
查看文件 @c2cc9de
@@ -41,7 +41,11 @@ class VoiceActivityDetector {
   bool IsSpeechDetected() const;
-  void Reset();
+  void Reset() const;
+
+  // At the end of the utterance, you can invoke this method so that
+  // the last speech segment can be detected.
+  void Flush() const;
   const VadModelConfig &GetConfig() const;
--- a/sherpa-onnx/java-api/src/com/k2fsa/sherpa/onnx/Vad.java
查看文件 @c2cc9de
+++ b/sherpa-onnx/java-api/src/com/k2fsa/sherpa/onnx/Vad.java
查看文件 @c2cc9de
@@ -46,6 +46,10 @@ public class Vad {
         reset(this.ptr);
     }
+    public void flush() {
+        flush(this.ptr);
+    }
+
     public SpeechSegment front() {
         Object[] arr = front(this.ptr);
         int start = (int) arr[0];
@@ -75,4 +79,6 @@ public class Vad {
     private native boolean isSpeechDetected(long ptr);
     private native void reset(long ptr);
+
+    private native void flush(long ptr);
 }
--- a/sherpa-onnx/jni/voice-activity-detector.cc
查看文件 @c2cc9de
+++ b/sherpa-onnx/jni/voice-activity-detector.cc
查看文件 @c2cc9de
@@ -173,3 +173,11 @@ JNIEXPORT void JNICALL Java_com_k2fsa_sherpa_onnx_Vad_reset(JNIEnv * /*env*/,
   auto model = reinterpret_cast<sherpa_onnx::VoiceActivityDetector *>(ptr);
   model->Reset();
 }
+
+SHERPA_ONNX_EXTERN_C
+JNIEXPORT void JNICALL Java_com_k2fsa_sherpa_onnx_Vad_flush(JNIEnv * /*env*/,
+                                                            jobject /*obj*/,
+                                                            jlong ptr) {
+  auto model = reinterpret_cast<sherpa_onnx::VoiceActivityDetector *>(ptr);
+  model->Flush();
+}
--- a/sherpa-onnx/kotlin-api/Vad.kt
查看文件 @c2cc9de
+++ b/sherpa-onnx/kotlin-api/Vad.kt
查看文件 @c2cc9de
@@ -52,6 +52,8 @@ class Vad(
     fun reset() = reset(ptr)
+    fun flush() = flush(ptr)
+
     private external fun delete(ptr: Long)
     private external fun newFromAsset(
@@ -70,6 +72,7 @@ class Vad(
     private external fun front(ptr: Long): Array<Any>
     private external fun isSpeechDetected(ptr: Long): Boolean
     private external fun reset(ptr: Long)
+    private external fun flush(ptr: Long)
     companion object {
         init {
--- a/sherpa-onnx/python/csrc/voice-activity-detector.cc
查看文件 @c2cc9de
+++ b/sherpa-onnx/python/csrc/voice-activity-detector.cc
查看文件 @c2cc9de
@@ -38,6 +38,7 @@ void PybindVoiceActivityDetector(py::module *m) {
       .def("is_speech_detected", &PyClass::IsSpeechDetected,
            py::call_guard<py::gil_scoped_release>())
       .def("reset", &PyClass::Reset, py::call_guard<py::gil_scoped_release>())
+      .def("flush", &PyClass::Flush, py::call_guard<py::gil_scoped_release>())
       .def_property_readonly("front", &PyClass::Front);
 }
--- a/swift-api-examples/SherpaOnnx.swift
查看文件 @c2cc9de
+++ b/swift-api-examples/SherpaOnnx.swift
查看文件 @c2cc9de
@@ -633,6 +633,10 @@ class SherpaOnnxVoiceActivityDetectorWrapper {
   func reset() {
     SherpaOnnxVoiceActivityDetectorReset(vad)
   }
+
+  func flush() {
+    SherpaOnnxVoiceActivityDetectorFlush(vad)
+  }
 }
 // offline tts