Fangjun Kuang
Committed by GitHub

Add Flush to VAD so that the last segment can be detected. (#1099)

@@ -52,11 +52,6 @@ jobs: @@ -52,11 +52,6 @@ jobs:
52 cmake --build . --target install --config Release 52 cmake --build . --target install --config Release
53 rm -rf install/pkgconfig 53 rm -rf install/pkgconfig
54 54
55 - - uses: actions/upload-artifact@v4  
56 - with:  
57 - name: windows-${{ matrix.arch }}  
58 - path: ./build/install/lib/  
59 -  
60 - name: Create tar file 55 - name: Create tar file
61 shell: bash 56 shell: bash
62 run: | 57 run: |
@@ -72,6 +67,11 @@ jobs: @@ -72,6 +67,11 @@ jobs:
72 ls -lh *.tar.bz2 67 ls -lh *.tar.bz2
73 mv *.tar.bz2 ../ 68 mv *.tar.bz2 ../
74 69
  70 + - uses: actions/upload-artifact@v4
  71 + with:
  72 + name: windows-${{ matrix.arch }}
  73 + path: ./*.tar.bz2
  74 +
75 # https://huggingface.co/docs/hub/spaces-github-actions 75 # https://huggingface.co/docs/hub/spaces-github-actions
76 - name: Publish to huggingface 76 - name: Publish to huggingface
77 if: (github.repository_owner == 'csukuangfj' || github.repository_owner == 'k2-fsa') && (github.event_name == 'push' || github.event_name == 'workflow_dispatch') 77 if: (github.repository_owner == 'csukuangfj' || github.repository_owner == 'k2-fsa') && (github.event_name == 'push' || github.event_name == 'workflow_dispatch')
@@ -88,7 +88,9 @@ jobs: @@ -88,7 +88,9 @@ jobs:
88 88
89 rm -rf huggingface 89 rm -rf huggingface
90 export GIT_CLONE_PROTECTION_ACTIVE=false 90 export GIT_CLONE_PROTECTION_ACTIVE=false
91 - GIT_LFS_SKIP_SMUDGE=1 git clone https://huggingface.co/csukuangfj/sherpa-onnx-libs huggingface 91 + export GIT_LFS_SKIP_SMUDGE=1
  92 +
  93 + git clone https://huggingface.co/csukuangfj/sherpa-onnx-libs huggingface
92 94
93 cd huggingface 95 cd huggingface
94 mkdir -p windows-for-dotnet 96 mkdir -p windows-for-dotnet
  1 +## 1.10.12
  2 +
  3 +* Add Flush to VAD so that the last speech segment can be detected. See also
  4 + https://github.com/k2-fsa/sherpa-onnx/discussions/1077#discussioncomment-9979740
  5 +
1 ## 1.10.11 6 ## 1.10.11
2 7
3 * Support the iOS platform for iOS. 8 * Support the iOS platform for iOS.
@@ -10,8 +10,8 @@ project(sherpa-onnx) @@ -10,8 +10,8 @@ project(sherpa-onnx)
10 # Remember to update 10 # Remember to update
11 # ./nodejs-addon-examples 11 # ./nodejs-addon-examples
12 # ./dart-api-examples/ 12 # ./dart-api-examples/
13 -# ./sherpa-onnx/flutter/CHANGELOG.md  
14 -set(SHERPA_ONNX_VERSION "1.10.11") 13 +# ./CHANGELOG.md
  14 +set(SHERPA_ONNX_VERSION "1.10.12")
15 15
16 # Disable warning about 16 # Disable warning about
17 # 17 #
@@ -93,6 +93,28 @@ void main(List<String> arguments) async { @@ -93,6 +93,28 @@ void main(List<String> arguments) async {
93 } 93 }
94 } 94 }
95 95
  96 + vad.flush();
  97 + while (!vad.isEmpty()) {
  98 + final stream = recognizer.createStream();
  99 + final segment = vad.front();
  100 + stream.acceptWaveform(
  101 + samples: segment.samples, sampleRate: waveData.sampleRate);
  102 + recognizer.decode(stream);
  103 +
  104 + final result = recognizer.getResult(stream);
  105 +
  106 + final startTime = segment.start * 1.0 / waveData.sampleRate;
  107 + final duration = segment.samples.length * 1.0 / waveData.sampleRate;
  108 + final stopTime = startTime + duration;
  109 + if (result.text != '') {
  110 + print(
  111 + '${startTime.toStringAsPrecision(4)} -- ${stopTime.toStringAsPrecision(4)}: ${result.text}');
  112 + }
  113 +
  114 + stream.free();
  115 + vad.pop();
  116 + }
  117 +
96 vad.free(); 118 vad.free();
97 recognizer.free(); 119 recognizer.free();
98 } 120 }
@@ -10,7 +10,7 @@ environment: @@ -10,7 +10,7 @@ environment:
10 10
11 # Add regular dependencies here. 11 # Add regular dependencies here.
12 dependencies: 12 dependencies:
13 - sherpa_onnx: ^1.10.11 13 + sherpa_onnx: ^1.10.12
14 path: ^1.9.0 14 path: ^1.9.0
15 args: ^2.5.0 15 args: ^2.5.0
16 16
@@ -11,7 +11,7 @@ environment: @@ -11,7 +11,7 @@ environment:
11 11
12 # Add regular dependencies here. 12 # Add regular dependencies here.
13 dependencies: 13 dependencies:
14 - sherpa_onnx: ^1.10.11 14 + sherpa_onnx: ^1.10.12
15 path: ^1.9.0 15 path: ^1.9.0
16 args: ^2.5.0 16 args: ^2.5.0
17 17
@@ -8,7 +8,7 @@ environment: @@ -8,7 +8,7 @@ environment:
8 8
9 # Add regular dependencies here. 9 # Add regular dependencies here.
10 dependencies: 10 dependencies:
11 - sherpa_onnx: ^1.10.11 11 + sherpa_onnx: ^1.10.12
12 path: ^1.9.0 12 path: ^1.9.0
13 args: ^2.5.0 13 args: ^2.5.0
14 14
@@ -65,6 +65,12 @@ void main(List<String> arguments) async { @@ -65,6 +65,12 @@ void main(List<String> arguments) async {
65 } 65 }
66 } 66 }
67 67
  68 + vad.flush();
  69 + while (!vad.isEmpty()) {
  70 + allSamples.add(vad.front().samples);
  71 + vad.pop();
  72 + }
  73 +
68 vad.free(); 74 vad.free();
69 75
70 final s = Float32List.fromList(allSamples.expand((x) => x).toList()); 76 final s = Float32List.fromList(allSamples.expand((x) => x).toList());
@@ -9,7 +9,7 @@ environment: @@ -9,7 +9,7 @@ environment:
9 sdk: ^3.4.0 9 sdk: ^3.4.0
10 10
11 dependencies: 11 dependencies:
12 - sherpa_onnx: ^1.10.11 12 + sherpa_onnx: ^1.10.12
13 path: ^1.9.0 13 path: ^1.9.0
14 args: ^2.5.0 14 args: ^2.5.0
15 15
@@ -57,6 +57,26 @@ class VadNonStreamingAsrParaformer @@ -57,6 +57,26 @@ class VadNonStreamingAsrParaformer
57 } 57 }
58 } 58 }
59 } 59 }
  60 +
  61 + vad.Flush();
  62 +
  63 + while (!vad.IsEmpty()) {
  64 + SpeechSegment segment = vad.Front();
  65 + float startTime = segment.Start / (float)sampleRate;
  66 + float duration = segment.Samples.Length / (float)sampleRate;
  67 +
  68 + OfflineStream stream = recognizer.CreateStream();
  69 + stream.AcceptWaveform(sampleRate, segment.Samples);
  70 + recognizer.Decode(stream);
  71 + String text = stream.Result.Text;
  72 +
  73 + if (!String.IsNullOrEmpty(text)) {
  74 + Console.WriteLine("{0}--{1}: {2}", String.Format("{0:0.00}", startTime),
  75 + String.Format("{0:0.00}", startTime+duration), text);
  76 + }
  77 +
  78 + vad.Pop();
  79 + }
60 } 80 }
61 } 81 }
62 82
@@ -5,7 +5,7 @@ description: > @@ -5,7 +5,7 @@ description: >
5 5
6 publish_to: 'none' 6 publish_to: 'none'
7 7
8 -version: 1.10.11 8 +version: 1.10.12
9 9
10 topics: 10 topics:
11 - speech-recognition 11 - speech-recognition
@@ -30,7 +30,7 @@ dependencies: @@ -30,7 +30,7 @@ dependencies:
30 record: ^5.1.0 30 record: ^5.1.0
31 url_launcher: ^6.2.6 31 url_launcher: ^6.2.6
32 32
33 - sherpa_onnx: ^1.10.11 33 + sherpa_onnx: ^1.10.12
34 # sherpa_onnx: 34 # sherpa_onnx:
35 # path: ../../flutter/sherpa_onnx 35 # path: ../../flutter/sherpa_onnx
36 36
@@ -17,7 +17,7 @@ dependencies: @@ -17,7 +17,7 @@ dependencies:
17 cupertino_icons: ^1.0.6 17 cupertino_icons: ^1.0.6
18 path_provider: ^2.1.3 18 path_provider: ^2.1.3
19 path: ^1.9.0 19 path: ^1.9.0
20 - sherpa_onnx: ^1.10.11 20 + sherpa_onnx: ^1.10.12
21 url_launcher: ^6.2.6 21 url_launcher: ^6.2.6
22 audioplayers: ^5.0.0 22 audioplayers: ^5.0.0
23 23
@@ -491,6 +491,12 @@ typedef SherpaOnnxVoiceActivityDetectorResetNative = Void Function( @@ -491,6 +491,12 @@ typedef SherpaOnnxVoiceActivityDetectorResetNative = Void Function(
491 typedef SherpaOnnxVoiceActivityDetectorReset = void Function( 491 typedef SherpaOnnxVoiceActivityDetectorReset = void Function(
492 Pointer<SherpaOnnxVoiceActivityDetector>); 492 Pointer<SherpaOnnxVoiceActivityDetector>);
493 493
  494 +typedef SherpaOnnxVoiceActivityDetectorFlushNative = Void Function(
  495 + Pointer<SherpaOnnxVoiceActivityDetector>);
  496 +
  497 +typedef SherpaOnnxVoiceActivityDetectorFlush = void Function(
  498 + Pointer<SherpaOnnxVoiceActivityDetector>);
  499 +
494 typedef SherpaOnnxVoiceActivityDetectorFrontNative 500 typedef SherpaOnnxVoiceActivityDetectorFrontNative
495 = Pointer<SherpaOnnxSpeechSegment> Function( 501 = Pointer<SherpaOnnxSpeechSegment> Function(
496 Pointer<SherpaOnnxVoiceActivityDetector>); 502 Pointer<SherpaOnnxVoiceActivityDetector>);
@@ -779,6 +785,8 @@ class SherpaOnnxBindings { @@ -779,6 +785,8 @@ class SherpaOnnxBindings {
779 785
780 static SherpaOnnxVoiceActivityDetectorReset? voiceActivityDetectorReset; 786 static SherpaOnnxVoiceActivityDetectorReset? voiceActivityDetectorReset;
781 787
  788 + static SherpaOnnxVoiceActivityDetectorFlush? voiceActivityDetectorFlush;
  789 +
782 static SherpaOnnxCreateCircularBuffer? createCircularBuffer; 790 static SherpaOnnxCreateCircularBuffer? createCircularBuffer;
783 791
784 static SherpaOnnxDestroyCircularBuffer? destroyCircularBuffer; 792 static SherpaOnnxDestroyCircularBuffer? destroyCircularBuffer;
@@ -1036,6 +1044,11 @@ class SherpaOnnxBindings { @@ -1036,6 +1044,11 @@ class SherpaOnnxBindings {
1036 'SherpaOnnxVoiceActivityDetectorReset') 1044 'SherpaOnnxVoiceActivityDetectorReset')
1037 .asFunction(); 1045 .asFunction();
1038 1046
  1047 + voiceActivityDetectorFlush ??= dynamicLibrary
  1048 + .lookup<NativeFunction<SherpaOnnxVoiceActivityDetectorFlushNative>>(
  1049 + 'SherpaOnnxVoiceActivityDetectorFlush')
  1050 + .asFunction();
  1051 +
1039 createCircularBuffer ??= dynamicLibrary 1052 createCircularBuffer ??= dynamicLibrary
1040 .lookup<NativeFunction<SherpaOnnxCreateCircularBufferNative>>( 1053 .lookup<NativeFunction<SherpaOnnxCreateCircularBufferNative>>(
1041 'SherpaOnnxCreateCircularBuffer') 1054 'SherpaOnnxCreateCircularBuffer')
@@ -207,6 +207,10 @@ class VoiceActivityDetector { @@ -207,6 +207,10 @@ class VoiceActivityDetector {
207 SherpaOnnxBindings.voiceActivityDetectorReset?.call(ptr); 207 SherpaOnnxBindings.voiceActivityDetectorReset?.call(ptr);
208 } 208 }
209 209
  210 + void flush() {
  211 + SherpaOnnxBindings.voiceActivityDetectorFlush?.call(ptr);
  212 + }
  213 +
210 Pointer<SherpaOnnxVoiceActivityDetector> ptr; 214 Pointer<SherpaOnnxVoiceActivityDetector> ptr;
211 final VadModelConfig config; 215 final VadModelConfig config;
212 } 216 }
@@ -17,7 +17,7 @@ topics: @@ -17,7 +17,7 @@ topics:
17 - voice-activity-detection 17 - voice-activity-detection
18 18
19 # remember to change the version in ../sherpa_onnx_macos/macos/sherpa_onnx_macos.podspec 19 # remember to change the version in ../sherpa_onnx_macos/macos/sherpa_onnx_macos.podspec
20 -version: 1.10.11 20 +version: 1.10.12
21 21
22 homepage: https://github.com/k2-fsa/sherpa-onnx 22 homepage: https://github.com/k2-fsa/sherpa-onnx
23 23
@@ -30,19 +30,19 @@ dependencies: @@ -30,19 +30,19 @@ dependencies:
30 flutter: 30 flutter:
31 sdk: flutter 31 sdk: flutter
32 32
33 - sherpa_onnx_android: ^1.10.11 33 + sherpa_onnx_android: ^1.10.12
34 # path: ../sherpa_onnx_android 34 # path: ../sherpa_onnx_android
35 35
36 - sherpa_onnx_macos: ^1.10.11 36 + sherpa_onnx_macos: ^1.10.12
37 # path: ../sherpa_onnx_macos 37 # path: ../sherpa_onnx_macos
38 38
39 - sherpa_onnx_linux: ^1.10.11 39 + sherpa_onnx_linux: ^1.10.12
40 # path: ../sherpa_onnx_linux 40 # path: ../sherpa_onnx_linux
41 # 41 #
42 - sherpa_onnx_windows: ^1.10.11 42 + sherpa_onnx_windows: ^1.10.12
43 # path: ../sherpa_onnx_windows 43 # path: ../sherpa_onnx_windows
44 44
45 - sherpa_onnx_ios: ^1.10.11 45 + sherpa_onnx_ios: ^1.10.12
46 # sherpa_onnx_ios: 46 # sherpa_onnx_ios:
47 # path: ../sherpa_onnx_ios 47 # path: ../sherpa_onnx_ios
48 48
@@ -7,7 +7,7 @@ @@ -7,7 +7,7 @@
7 # https://groups.google.com/g/dart-ffi/c/nUATMBy7r0c 7 # https://groups.google.com/g/dart-ffi/c/nUATMBy7r0c
8 Pod::Spec.new do |s| 8 Pod::Spec.new do |s|
9 s.name = 'sherpa_onnx_ios' 9 s.name = 'sherpa_onnx_ios'
10 - s.version = '1.10.11' 10 + s.version = '1.10.12'
11 s.summary = 'A new Flutter FFI plugin project.' 11 s.summary = 'A new Flutter FFI plugin project.'
12 s.description = <<-DESC 12 s.description = <<-DESC
13 A new Flutter FFI plugin project. 13 A new Flutter FFI plugin project.
@@ -4,7 +4,7 @@ @@ -4,7 +4,7 @@
4 # 4 #
5 Pod::Spec.new do |s| 5 Pod::Spec.new do |s|
6 s.name = 'sherpa_onnx_macos' 6 s.name = 'sherpa_onnx_macos'
7 - s.version = '1.10.11' 7 + s.version = '1.10.12'
8 s.summary = 'sherpa-onnx Flutter FFI plugin project.' 8 s.summary = 'sherpa-onnx Flutter FFI plugin project.'
9 s.description = <<-DESC 9 s.description = <<-DESC
10 sherpa-onnx Flutter FFI plugin project. 10 sherpa-onnx Flutter FFI plugin project.
@@ -98,6 +98,25 @@ public class VadNonStreamingParaformer { @@ -98,6 +98,25 @@ public class VadNonStreamingParaformer {
98 } 98 }
99 } 99 }
100 100
  101 + vad.flush();
  102 + while (!vad.empty()) {
  103 + SpeechSegment segment = vad.front();
  104 + float startTime = segment.getStart() / 16000.0f;
  105 + float duration = segment.getSamples().length / 16000.0f;
  106 +
  107 + OfflineStream stream = recognizer.createStream();
  108 + stream.acceptWaveform(segment.getSamples(), 16000);
  109 + recognizer.decode(stream);
  110 + String text = recognizer.getResult(stream).getText();
  111 + stream.release();
  112 +
  113 + if (!text.isEmpty()) {
  114 + System.out.printf("%.3f--%.3f: %s\n", startTime, startTime + duration, text);
  115 + }
  116 +
  117 + vad.pop();
  118 + }
  119 +
101 vad.release(); 120 vad.release();
102 recognizer.release(); 121 recognizer.release();
103 } 122 }
@@ -59,6 +59,16 @@ public class VadRemoveSilence { @@ -59,6 +59,16 @@ public class VadRemoveSilence {
59 } 59 }
60 } 60 }
61 61
  62 + vad.flush();
  63 + while (!vad.empty()) {
  64 +
  65 + // if you want to get the starting time of this segment, you can use
  66 + /* float startTime = vad.front().getStart() / 16000.0f; */
  67 +
  68 + segments.add(vad.front().getSamples());
  69 + vad.pop();
  70 + }
  71 +
62 // get total number of samples 72 // get total number of samples
63 int n = 0; 73 int n = 0;
64 for (float[] s : segments) { 74 for (float[] s : segments) {
1 { 1 {
2 "dependencies": { 2 "dependencies": {
3 - "sherpa-onnx-node": "^1.10.6" 3 + "sherpa-onnx-node": "^1.10.12"
4 } 4 }
5 } 5 }
@@ -105,6 +105,12 @@ def main(): @@ -105,6 +105,12 @@ def main():
105 speech_samples.extend(vad.front.samples) 105 speech_samples.extend(vad.front.samples)
106 vad.pop() 106 vad.pop()
107 107
  108 + vad.flush()
  109 +
  110 + while not vad.empty():
  111 + speech_samples.extend(vad.front.samples)
  112 + vad.pop()
  113 +
108 speech_samples = np.array(speech_samples, dtype=np.float32) 114 speech_samples = np.array(speech_samples, dtype=np.float32)
109 115
110 sf.write(args.output, speech_samples, samplerate=sample_rate) 116 sf.write(args.output, speech_samples, samplerate=sample_rate)
@@ -17,7 +17,7 @@ topics: @@ -17,7 +17,7 @@ topics:
17 - voice-activity-detection 17 - voice-activity-detection
18 18
19 # remember to change the version in ../sherpa_onnx_macos/macos/sherpa_onnx.podspec 19 # remember to change the version in ../sherpa_onnx_macos/macos/sherpa_onnx.podspec
20 -version: 1.10.6 20 +version: 1.10.12
21 21
22 homepage: https://github.com/k2-fsa/sherpa-onnx 22 homepage: https://github.com/k2-fsa/sherpa-onnx
23 23
@@ -53,6 +53,11 @@ namespace SherpaOnnx @@ -53,6 +53,11 @@ namespace SherpaOnnx
53 SherpaOnnxVoiceActivityDetectorReset(_handle.Handle); 53 SherpaOnnxVoiceActivityDetectorReset(_handle.Handle);
54 } 54 }
55 55
  56 + public void Flush()
  57 + {
  58 + SherpaOnnxVoiceActivityDetectorFlush(_handle.Handle);
  59 + }
  60 +
56 public void Dispose() 61 public void Dispose()
57 { 62 {
58 Cleanup(); 63 Cleanup();
@@ -106,5 +111,7 @@ namespace SherpaOnnx @@ -106,5 +111,7 @@ namespace SherpaOnnx
106 [DllImport(Dll.Filename)] 111 [DllImport(Dll.Filename)]
107 private static extern void SherpaOnnxVoiceActivityDetectorReset(IntPtr handle); 112 private static extern void SherpaOnnxVoiceActivityDetectorReset(IntPtr handle);
108 113
  114 + [DllImport(Dll.Filename)]
  115 + private static extern void SherpaOnnxVoiceActivityDetectorFlush(IntPtr handle);
109 } 116 }
110 } 117 }
@@ -856,6 +856,10 @@ func (vad *VoiceActivityDetector) Reset() { @@ -856,6 +856,10 @@ func (vad *VoiceActivityDetector) Reset() {
856 C.SherpaOnnxVoiceActivityDetectorReset(vad.impl) 856 C.SherpaOnnxVoiceActivityDetectorReset(vad.impl)
857 } 857 }
858 858
  859 +func (vad *VoiceActivityDetector) Flush() {
  860 + C.SherpaOnnxVoiceActivityDetectorFlush(vad.impl)
  861 +}
  862 +
859 // Spoken language identification 863 // Spoken language identification
860 864
861 type SpokenLanguageIdentificationWhisperConfig struct { 865 type SpokenLanguageIdentificationWhisperConfig struct {
@@ -29,7 +29,7 @@ class CircularBuffer { @@ -29,7 +29,7 @@ class CircularBuffer {
29 } 29 }
30 30
31 reset() { 31 reset() {
32 - return addon.circularBufferReset(this.handle); 32 + addon.circularBufferReset(this.handle);
33 } 33 }
34 } 34 }
35 35
@@ -79,7 +79,11 @@ config = { @@ -79,7 +79,11 @@ config = {
79 } 79 }
80 80
81 reset() { 81 reset() {
82 - return addon.VoiceActivityDetectorResetWrapper(this.handle); 82 + addon.VoiceActivityDetectorResetWrapper(this.handle);
  83 + }
  84 +
  85 + flush() {
  86 + addon.VoiceActivityDetectorFlushWrapper(this.handle);
83 } 87 }
84 } 88 }
85 89
@@ -590,6 +590,31 @@ static void VoiceActivityDetectorResetWrapper(const Napi::CallbackInfo &info) { @@ -590,6 +590,31 @@ static void VoiceActivityDetectorResetWrapper(const Napi::CallbackInfo &info) {
590 SherpaOnnxVoiceActivityDetectorReset(vad); 590 SherpaOnnxVoiceActivityDetectorReset(vad);
591 } 591 }
592 592
  593 +static void VoiceActivityDetectorFlushWrapper(const Napi::CallbackInfo &info) {
  594 + Napi::Env env = info.Env();
  595 +
  596 + if (info.Length() != 1) {
  597 + std::ostringstream os;
  598 + os << "Expect only 1 argument. Given: " << info.Length();
  599 +
  600 + Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();
  601 +
  602 + return;
  603 + }
  604 +
  605 + if (!info[0].IsExternal()) {
  606 + Napi::TypeError::New(env, "Argument 0 should be a VAD pointer.")
  607 + .ThrowAsJavaScriptException();
  608 +
  609 + return;
  610 + }
  611 +
  612 + SherpaOnnxVoiceActivityDetector *vad =
  613 + info[0].As<Napi::External<SherpaOnnxVoiceActivityDetector>>().Data();
  614 +
  615 + SherpaOnnxVoiceActivityDetectorFlush(vad);
  616 +}
  617 +
593 void InitVad(Napi::Env env, Napi::Object exports) { 618 void InitVad(Napi::Env env, Napi::Object exports) {
594 exports.Set(Napi::String::New(env, "createCircularBuffer"), 619 exports.Set(Napi::String::New(env, "createCircularBuffer"),
595 Napi::Function::New(env, CreateCircularBufferWrapper)); 620 Napi::Function::New(env, CreateCircularBufferWrapper));
@@ -636,4 +661,7 @@ void InitVad(Napi::Env env, Napi::Object exports) { @@ -636,4 +661,7 @@ void InitVad(Napi::Env env, Napi::Object exports) {
636 661
637 exports.Set(Napi::String::New(env, "voiceActivityDetectorReset"), 662 exports.Set(Napi::String::New(env, "voiceActivityDetectorReset"),
638 Napi::Function::New(env, VoiceActivityDetectorResetWrapper)); 663 Napi::Function::New(env, VoiceActivityDetectorResetWrapper));
  664 +
  665 + exports.Set(Napi::String::New(env, "voiceActivityDetectorFlush"),
  666 + Napi::Function::New(env, VoiceActivityDetectorFlushWrapper));
639 } 667 }
@@ -876,6 +876,10 @@ void SherpaOnnxVoiceActivityDetectorReset(SherpaOnnxVoiceActivityDetector *p) { @@ -876,6 +876,10 @@ void SherpaOnnxVoiceActivityDetectorReset(SherpaOnnxVoiceActivityDetector *p) {
876 p->impl->Reset(); 876 p->impl->Reset();
877 } 877 }
878 878
  879 +void SherpaOnnxVoiceActivityDetectorFlush(SherpaOnnxVoiceActivityDetector *p) {
  880 + p->impl->Flush();
  881 +}
  882 +
879 #if SHERPA_ONNX_ENABLE_TTS == 1 883 #if SHERPA_ONNX_ENABLE_TTS == 1
880 struct SherpaOnnxOfflineTts { 884 struct SherpaOnnxOfflineTts {
881 std::unique_ptr<sherpa_onnx::OfflineTts> impl; 885 std::unique_ptr<sherpa_onnx::OfflineTts> impl;
@@ -815,6 +815,9 @@ SHERPA_ONNX_API void SherpaOnnxDestroySpeechSegment( @@ -815,6 +815,9 @@ SHERPA_ONNX_API void SherpaOnnxDestroySpeechSegment(
815 SHERPA_ONNX_API void SherpaOnnxVoiceActivityDetectorReset( 815 SHERPA_ONNX_API void SherpaOnnxVoiceActivityDetectorReset(
816 SherpaOnnxVoiceActivityDetector *p); 816 SherpaOnnxVoiceActivityDetector *p);
817 817
  818 +SHERPA_ONNX_API void SherpaOnnxVoiceActivityDetectorFlush(
  819 + SherpaOnnxVoiceActivityDetector *p);
  820 +
818 // ============================================================ 821 // ============================================================
819 // For offline Text-to-Speech (i.e., non-streaming TTS) 822 // For offline Text-to-Speech (i.e., non-streaming TTS)
820 // ============================================================ 823 // ============================================================
@@ -118,6 +118,29 @@ class VoiceActivityDetector::Impl { @@ -118,6 +118,29 @@ class VoiceActivityDetector::Impl {
118 start_ = -1; 118 start_ = -1;
119 } 119 }
120 120
  121 + void Flush() {
  122 + if (start_ == -1 || buffer_.Size() == 0) {
  123 + return;
  124 + }
  125 +
  126 + int32_t end = buffer_.Tail() - model_->MinSilenceDurationSamples();
  127 + if (end <= start_) {
  128 + return;
  129 + }
  130 +
  131 + std::vector<float> s = buffer_.Get(start_, end - start_);
  132 +
  133 + SpeechSegment segment;
  134 +
  135 + segment.start = start_;
  136 + segment.samples = std::move(s);
  137 +
  138 + segments_.push(std::move(segment));
  139 +
  140 + buffer_.Pop(end - buffer_.Head());
  141 + start_ = -1;
  142 + }
  143 +
121 bool IsSpeechDetected() const { return start_ != -1; } 144 bool IsSpeechDetected() const { return start_ != -1; }
122 145
123 const VadModelConfig &GetConfig() const { return config_; } 146 const VadModelConfig &GetConfig() const { return config_; }
@@ -164,7 +187,9 @@ const SpeechSegment &VoiceActivityDetector::Front() const { @@ -164,7 +187,9 @@ const SpeechSegment &VoiceActivityDetector::Front() const {
164 return impl_->Front(); 187 return impl_->Front();
165 } 188 }
166 189
167 -void VoiceActivityDetector::Reset() { impl_->Reset(); } 190 +void VoiceActivityDetector::Reset() const { impl_->Reset(); }
  191 +
  192 +void VoiceActivityDetector::Flush() const { impl_->Flush(); }
168 193
169 bool VoiceActivityDetector::IsSpeechDetected() const { 194 bool VoiceActivityDetector::IsSpeechDetected() const {
170 return impl_->IsSpeechDetected(); 195 return impl_->IsSpeechDetected();
@@ -41,7 +41,11 @@ class VoiceActivityDetector { @@ -41,7 +41,11 @@ class VoiceActivityDetector {
41 41
42 bool IsSpeechDetected() const; 42 bool IsSpeechDetected() const;
43 43
44 - void Reset(); 44 + void Reset() const;
  45 +
  46 + // At the end of the utterance, you can invoke this method so that
  47 + // the last speech segment can be detected.
  48 + void Flush() const;
45 49
46 const VadModelConfig &GetConfig() const; 50 const VadModelConfig &GetConfig() const;
47 51
@@ -46,6 +46,10 @@ public class Vad { @@ -46,6 +46,10 @@ public class Vad {
46 reset(this.ptr); 46 reset(this.ptr);
47 } 47 }
48 48
  49 + public void flush() {
  50 + flush(this.ptr);
  51 + }
  52 +
49 public SpeechSegment front() { 53 public SpeechSegment front() {
50 Object[] arr = front(this.ptr); 54 Object[] arr = front(this.ptr);
51 int start = (int) arr[0]; 55 int start = (int) arr[0];
@@ -75,4 +79,6 @@ public class Vad { @@ -75,4 +79,6 @@ public class Vad {
75 private native boolean isSpeechDetected(long ptr); 79 private native boolean isSpeechDetected(long ptr);
76 80
77 private native void reset(long ptr); 81 private native void reset(long ptr);
  82 +
  83 + private native void flush(long ptr);
78 } 84 }
@@ -173,3 +173,11 @@ JNIEXPORT void JNICALL Java_com_k2fsa_sherpa_onnx_Vad_reset(JNIEnv * /*env*/, @@ -173,3 +173,11 @@ JNIEXPORT void JNICALL Java_com_k2fsa_sherpa_onnx_Vad_reset(JNIEnv * /*env*/,
173 auto model = reinterpret_cast<sherpa_onnx::VoiceActivityDetector *>(ptr); 173 auto model = reinterpret_cast<sherpa_onnx::VoiceActivityDetector *>(ptr);
174 model->Reset(); 174 model->Reset();
175 } 175 }
  176 +
  177 +SHERPA_ONNX_EXTERN_C
  178 +JNIEXPORT void JNICALL Java_com_k2fsa_sherpa_onnx_Vad_flush(JNIEnv * /*env*/,
  179 + jobject /*obj*/,
  180 + jlong ptr) {
  181 + auto model = reinterpret_cast<sherpa_onnx::VoiceActivityDetector *>(ptr);
  182 + model->Flush();
  183 +}
@@ -52,6 +52,8 @@ class Vad( @@ -52,6 +52,8 @@ class Vad(
52 52
53 fun reset() = reset(ptr) 53 fun reset() = reset(ptr)
54 54
  55 + fun flush() = flush(ptr)
  56 +
55 private external fun delete(ptr: Long) 57 private external fun delete(ptr: Long)
56 58
57 private external fun newFromAsset( 59 private external fun newFromAsset(
@@ -70,6 +72,7 @@ class Vad( @@ -70,6 +72,7 @@ class Vad(
70 private external fun front(ptr: Long): Array<Any> 72 private external fun front(ptr: Long): Array<Any>
71 private external fun isSpeechDetected(ptr: Long): Boolean 73 private external fun isSpeechDetected(ptr: Long): Boolean
72 private external fun reset(ptr: Long) 74 private external fun reset(ptr: Long)
  75 + private external fun flush(ptr: Long)
73 76
74 companion object { 77 companion object {
75 init { 78 init {
@@ -38,6 +38,7 @@ void PybindVoiceActivityDetector(py::module *m) { @@ -38,6 +38,7 @@ void PybindVoiceActivityDetector(py::module *m) {
38 .def("is_speech_detected", &PyClass::IsSpeechDetected, 38 .def("is_speech_detected", &PyClass::IsSpeechDetected,
39 py::call_guard<py::gil_scoped_release>()) 39 py::call_guard<py::gil_scoped_release>())
40 .def("reset", &PyClass::Reset, py::call_guard<py::gil_scoped_release>()) 40 .def("reset", &PyClass::Reset, py::call_guard<py::gil_scoped_release>())
  41 + .def("flush", &PyClass::Flush, py::call_guard<py::gil_scoped_release>())
41 .def_property_readonly("front", &PyClass::Front); 42 .def_property_readonly("front", &PyClass::Front);
42 } 43 }
43 44
@@ -633,6 +633,10 @@ class SherpaOnnxVoiceActivityDetectorWrapper { @@ -633,6 +633,10 @@ class SherpaOnnxVoiceActivityDetectorWrapper {
633 func reset() { 633 func reset() {
634 SherpaOnnxVoiceActivityDetectorReset(vad) 634 SherpaOnnxVoiceActivityDetectorReset(vad)
635 } 635 }
  636 +
  637 + func flush() {
  638 + SherpaOnnxVoiceActivityDetectorFlush(vad)
  639 + }
636 } 640 }
637 641
638 // offline tts 642 // offline tts