正在显示
11 个修改的文件
包含
598 行增加
和
14 行删除
| @@ -123,6 +123,7 @@ jobs: | @@ -123,6 +123,7 @@ jobs: | ||
| 123 | pushd example/assets | 123 | pushd example/assets |
| 124 | 124 | ||
| 125 | curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-recongition-models/3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx | 125 | curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-recongition-models/3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx |
| 126 | + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx | ||
| 126 | git clone https://github.com/csukuangfj/sr-data | 127 | git clone https://github.com/csukuangfj/sr-data |
| 127 | 128 | ||
| 128 | rm -rf sr-data/.git | 129 | rm -rf sr-data/.git |
| @@ -6,6 +6,7 @@ | @@ -6,6 +6,7 @@ | ||
| 6 | # switch to this directory and run | 6 | # switch to this directory and run |
| 7 | 7 | ||
| 8 | wget https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-recongition-models/3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx | 8 | wget https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-recongition-models/3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx |
| 9 | +wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx | ||
| 9 | git clone https://github.com/csukuangfj/sr-data | 10 | git clone https://github.com/csukuangfj/sr-data |
| 10 | 11 | ||
| 11 | rm -rf sr-data/.git | 12 | rm -rf sr-data/.git |
| 1 | // Copyright (c) 2024 Xiaomi Corporation | 1 | // Copyright (c) 2024 Xiaomi Corporation |
| 2 | import 'package:sherpa_onnx/sherpa_onnx.dart' as sherpa_onnx; | 2 | import 'package:sherpa_onnx/sherpa_onnx.dart' as sherpa_onnx; |
| 3 | import 'package:flutter/material.dart'; | 3 | import 'package:flutter/material.dart'; |
| 4 | + | ||
| 4 | import "./speaker_identification_test.dart"; | 5 | import "./speaker_identification_test.dart"; |
| 6 | +import "./vad_test.dart"; | ||
| 5 | 7 | ||
| 6 | void main() { | 8 | void main() { |
| 7 | runApp(const MyApp()); | 9 | runApp(const MyApp()); |
| @@ -51,6 +53,7 @@ class _MyHomePageState extends State<MyHomePage> { | @@ -51,6 +53,7 @@ class _MyHomePageState extends State<MyHomePage> { | ||
| 51 | if (_counter <= 10) { | 53 | if (_counter <= 10) { |
| 52 | sherpa_onnx.initBindings(); | 54 | sherpa_onnx.initBindings(); |
| 53 | await testSpeakerID(); | 55 | await testSpeakerID(); |
| 56 | + // await testVad(); | ||
| 54 | } | 57 | } |
| 55 | 58 | ||
| 56 | setState(() { | 59 | setState(() { |
| 1 | +// Copyright (c) 2024 Xiaomi Corporation | ||
| 2 | +import 'dart:typed_data'; | ||
| 3 | +import 'package:sherpa_onnx/sherpa_onnx.dart' as sherpa_onnx; | ||
| 4 | +import './utils.dart'; | ||
| 5 | + | ||
| 6 | +Future<void> testVad() async { | ||
| 7 | + final src = 'assets/silero_vad.onnx'; | ||
| 8 | + final modelPath = await copyAssetFile(src: src, dst: 'silero_vad.onnx'); | ||
| 9 | + | ||
| 10 | + final sileroVadConfig = sherpa_onnx.SileroVadModelConfig(model: modelPath); | ||
| 11 | + final config = sherpa_onnx.VadModelConfig( | ||
| 12 | + sileroVad: sileroVadConfig, | ||
| 13 | + numThreads: 1, | ||
| 14 | + debug: true, | ||
| 15 | + ); | ||
| 16 | + | ||
| 17 | + final vad = sherpa_onnx.VoiceActivityDetector( | ||
| 18 | + config: config, bufferSizeInSeconds: 10); | ||
| 19 | + print('before vad.free(): ${vad.ptr}'); | ||
| 20 | + vad.free(); | ||
| 21 | + print('after vad.free(): ${vad.ptr}'); | ||
| 22 | + | ||
| 23 | + final buffer = sherpa_onnx.CircularBuffer(capacity: 16000 * 2); | ||
| 24 | + | ||
| 25 | + final d = Float32List.fromList([0, 10, 20, 30]); | ||
| 26 | + buffer.push(d); | ||
| 27 | + assert(d.length == buffer.size, '${d.length} vs ${buffer.size}'); | ||
| 28 | + | ||
| 29 | + final f = Float32List.fromList([-5, 100.25, 599]); | ||
| 30 | + buffer.push(f); | ||
| 31 | + | ||
| 32 | + assert(buffer.size == d.length + f.length); | ||
| 33 | + final g = buffer.get(startIndex: 0, n: 5); | ||
| 34 | + | ||
| 35 | + assert(g.length == 5); | ||
| 36 | + assert(g[0] == 0); | ||
| 37 | + assert(g[1] == 10); | ||
| 38 | + assert(g[2] == 20); | ||
| 39 | + assert(g[3] == 30); | ||
| 40 | + assert(g[4] == -5); | ||
| 41 | + | ||
| 42 | + assert(buffer.size == d.length + f.length); | ||
| 43 | + | ||
| 44 | + buffer.pop(3); | ||
| 45 | + assert(buffer.size == d.length + f.length - 3); | ||
| 46 | + | ||
| 47 | + final h = buffer.get(startIndex: buffer.head, n: 4); | ||
| 48 | + assert(h.length == 4); | ||
| 49 | + assert(h[0] == 30); | ||
| 50 | + assert(h[1] == -5); | ||
| 51 | + assert(h[2] == 100.25); | ||
| 52 | + assert(h[3] == 599); | ||
| 53 | + | ||
| 54 | + buffer.reset(); | ||
| 55 | + | ||
| 56 | + assert(buffer.size == 0); | ||
| 57 | + assert(buffer.head == 0); | ||
| 58 | + | ||
| 59 | + print('before free: ${buffer.ptr}'); | ||
| 60 | + buffer.free(); | ||
| 61 | + print('after free: ${buffer.ptr}'); | ||
| 62 | +} |
| @@ -2,10 +2,11 @@ | @@ -2,10 +2,11 @@ | ||
| 2 | import 'dart:io'; | 2 | import 'dart:io'; |
| 3 | import 'dart:ffi'; | 3 | import 'dart:ffi'; |
| 4 | 4 | ||
| 5 | -import 'src/sherpa_onnx_bindings.dart'; | ||
| 6 | -export 'src/speaker_identification.dart'; | ||
| 7 | export 'src/online_stream.dart'; | 5 | export 'src/online_stream.dart'; |
| 6 | +export 'src/speaker_identification.dart'; | ||
| 7 | +export 'src/vad.dart'; | ||
| 8 | export 'src/wave_reader.dart'; | 8 | export 'src/wave_reader.dart'; |
| 9 | +import 'src/sherpa_onnx_bindings.dart'; | ||
| 9 | 10 | ||
| 10 | final DynamicLibrary _dylib = () { | 11 | final DynamicLibrary _dylib = () { |
| 11 | if (Platform.isIOS) { | 12 | if (Platform.isIOS) { |
| 1 | // Copyright (c) 2024 Xiaomi Corporation | 1 | // Copyright (c) 2024 Xiaomi Corporation |
| 2 | -import 'dart:typed_data'; | ||
| 3 | import 'dart:ffi'; | 2 | import 'dart:ffi'; |
| 3 | +import 'dart:typed_data'; | ||
| 4 | import 'package:ffi/ffi.dart'; | 4 | import 'package:ffi/ffi.dart'; |
| 5 | -import "./sherpa_onnx_bindings.dart"; | 5 | + |
| 6 | +import './sherpa_onnx_bindings.dart'; | ||
| 6 | 7 | ||
| 7 | class OnlineStream { | 8 | class OnlineStream { |
| 8 | /// The user has to call OnlineStream.free() to avoid memory leak. | 9 | /// The user has to call OnlineStream.free() to avoid memory leak. |
| @@ -2,6 +2,47 @@ | @@ -2,6 +2,47 @@ | ||
| 2 | import 'dart:ffi'; | 2 | import 'dart:ffi'; |
| 3 | import 'package:ffi/ffi.dart'; | 3 | import 'package:ffi/ffi.dart'; |
| 4 | 4 | ||
| 5 | +final class SherpaOnnxSileroVadModelConfig extends Struct { | ||
| 6 | + external Pointer<Utf8> model; | ||
| 7 | + | ||
| 8 | + @Float() | ||
| 9 | + external double threshold; | ||
| 10 | + | ||
| 11 | + @Float() | ||
| 12 | + external double minSilenceDuration; | ||
| 13 | + | ||
| 14 | + @Float() | ||
| 15 | + external double minSpeechDuration; | ||
| 16 | + | ||
| 17 | + @Int32() | ||
| 18 | + external int windowSize; | ||
| 19 | +} | ||
| 20 | + | ||
| 21 | +final class SherpaOnnxVadModelConfig extends Struct { | ||
| 22 | + external SherpaOnnxSileroVadModelConfig sileroVad; | ||
| 23 | + | ||
| 24 | + @Int32() | ||
| 25 | + external int sampleRate; | ||
| 26 | + | ||
| 27 | + @Int32() | ||
| 28 | + external int numThreads; | ||
| 29 | + | ||
| 30 | + external Pointer<Utf8> provider; | ||
| 31 | + | ||
| 32 | + @Int32() | ||
| 33 | + external int debug; | ||
| 34 | +} | ||
| 35 | + | ||
| 36 | +final class SherpaOnnxSpeechSegment extends Struct { | ||
| 37 | + @Int32() | ||
| 38 | + external int start; | ||
| 39 | + | ||
| 40 | + external Pointer<Float> samples; | ||
| 41 | + | ||
| 42 | + @Int32() | ||
| 43 | + external int n; | ||
| 44 | +} | ||
| 45 | + | ||
| 5 | final class SherpaOnnxWave extends Struct { | 46 | final class SherpaOnnxWave extends Struct { |
| 6 | external Pointer<Float> samples; | 47 | external Pointer<Float> samples; |
| 7 | 48 | ||
| @@ -24,17 +65,136 @@ final class SherpaOnnxSpeakerEmbeddingExtractorConfig extends Struct { | @@ -24,17 +65,136 @@ final class SherpaOnnxSpeakerEmbeddingExtractorConfig extends Struct { | ||
| 24 | external Pointer<Utf8> provider; | 65 | external Pointer<Utf8> provider; |
| 25 | } | 66 | } |
| 26 | 67 | ||
| 68 | +final class SherpaOnnxCircularBuffer extends Opaque {} | ||
| 69 | + | ||
| 70 | +final class SherpaOnnxVoiceActivityDetector extends Opaque {} | ||
| 71 | + | ||
| 27 | final class SherpaOnnxOnlineStream extends Opaque {} | 72 | final class SherpaOnnxOnlineStream extends Opaque {} |
| 28 | 73 | ||
| 29 | final class SherpaOnnxSpeakerEmbeddingExtractor extends Opaque {} | 74 | final class SherpaOnnxSpeakerEmbeddingExtractor extends Opaque {} |
| 30 | 75 | ||
| 31 | final class SherpaOnnxSpeakerEmbeddingManager extends Opaque {} | 76 | final class SherpaOnnxSpeakerEmbeddingManager extends Opaque {} |
| 32 | 77 | ||
| 78 | +typedef SherpaOnnxCreateVoiceActivityDetectorNative | ||
| 79 | + = Pointer<SherpaOnnxVoiceActivityDetector> Function( | ||
| 80 | + Pointer<SherpaOnnxVadModelConfig>, Float); | ||
| 81 | + | ||
| 82 | +typedef SherpaOnnxCreateVoiceActivityDetector | ||
| 83 | + = Pointer<SherpaOnnxVoiceActivityDetector> Function( | ||
| 84 | + Pointer<SherpaOnnxVadModelConfig>, double); | ||
| 85 | + | ||
| 86 | +typedef SherpaOnnxDestroyVoiceActivityDetectorNative = Void Function( | ||
| 87 | + Pointer<SherpaOnnxVoiceActivityDetector>); | ||
| 88 | + | ||
| 89 | +typedef SherpaOnnxDestroyVoiceActivityDetector = void Function( | ||
| 90 | + Pointer<SherpaOnnxVoiceActivityDetector>); | ||
| 91 | + | ||
| 92 | +typedef SherpaOnnxVoiceActivityDetectorAcceptWaveformNative = Void Function( | ||
| 93 | + Pointer<SherpaOnnxVoiceActivityDetector>, Pointer<Float>, Int32); | ||
| 94 | + | ||
| 95 | +typedef SherpaOnnxVoiceActivityDetectorAcceptWaveform = void Function( | ||
| 96 | + Pointer<SherpaOnnxVoiceActivityDetector>, Pointer<Float>, int); | ||
| 97 | + | ||
| 98 | +typedef SherpaOnnxVoiceActivityDetectorEmptyNative = Int32 Function( | ||
| 99 | + Pointer<SherpaOnnxVoiceActivityDetector>); | ||
| 100 | + | ||
| 101 | +typedef SherpaOnnxVoiceActivityDetectorEmpty = int Function( | ||
| 102 | + Pointer<SherpaOnnxVoiceActivityDetector>); | ||
| 103 | + | ||
| 104 | +typedef SherpaOnnxVoiceActivityDetectorDetectedNative = Int32 Function( | ||
| 105 | + Pointer<SherpaOnnxVoiceActivityDetector>); | ||
| 106 | + | ||
| 107 | +typedef SherpaOnnxVoiceActivityDetectorDetected = int Function( | ||
| 108 | + Pointer<SherpaOnnxVoiceActivityDetector>); | ||
| 109 | + | ||
| 110 | +typedef SherpaOnnxVoiceActivityDetectorPopNative = Void Function( | ||
| 111 | + Pointer<SherpaOnnxVoiceActivityDetector>); | ||
| 112 | + | ||
| 113 | +typedef SherpaOnnxVoiceActivityDetectorPop = void Function( | ||
| 114 | + Pointer<SherpaOnnxVoiceActivityDetector>); | ||
| 115 | + | ||
| 116 | +typedef SherpaOnnxVoiceActivityDetectorClearNative = Void Function( | ||
| 117 | + Pointer<SherpaOnnxVoiceActivityDetector>); | ||
| 118 | + | ||
| 119 | +typedef SherpaOnnxVoiceActivityDetectorClear = void Function( | ||
| 120 | + Pointer<SherpaOnnxVoiceActivityDetector>); | ||
| 121 | + | ||
| 122 | +typedef SherpaOnnxVoiceActivityDetectorResetNative = Void Function( | ||
| 123 | + Pointer<SherpaOnnxVoiceActivityDetector>); | ||
| 124 | + | ||
| 125 | +typedef SherpaOnnxVoiceActivityDetectorReset = void Function( | ||
| 126 | + Pointer<SherpaOnnxVoiceActivityDetector>); | ||
| 127 | + | ||
| 128 | +typedef SherpaOnnxVoiceActivityDetectorFrontNative | ||
| 129 | + = Pointer<SherpaOnnxSpeechSegment> Function( | ||
| 130 | + Pointer<SherpaOnnxVoiceActivityDetector>); | ||
| 131 | + | ||
| 132 | +typedef SherpaOnnxVoiceActivityDetectorFront | ||
| 133 | + = SherpaOnnxVoiceActivityDetectorFrontNative; | ||
| 134 | + | ||
| 135 | +typedef SherpaOnnxDestroySpeechSegmentNative = Void Function( | ||
| 136 | + Pointer<SherpaOnnxSpeechSegment>); | ||
| 137 | + | ||
| 138 | +typedef SherpaOnnxDestroySpeechSegment = void Function( | ||
| 139 | + Pointer<SherpaOnnxSpeechSegment>); | ||
| 140 | + | ||
| 141 | +typedef SherpaOnnxCreateCircularBufferNative = Pointer<SherpaOnnxCircularBuffer> | ||
| 142 | + Function(Int32); | ||
| 143 | + | ||
| 144 | +typedef SherpaOnnxCreateCircularBuffer = Pointer<SherpaOnnxCircularBuffer> | ||
| 145 | + Function(int); | ||
| 146 | + | ||
| 147 | +typedef SherpaOnnxDestroyCircularBufferNative = Void Function( | ||
| 148 | + Pointer<SherpaOnnxCircularBuffer>); | ||
| 149 | + | ||
| 150 | +typedef SherpaOnnxDestroyCircularBuffer = void Function( | ||
| 151 | + Pointer<SherpaOnnxCircularBuffer>); | ||
| 152 | + | ||
| 153 | +typedef SherpaOnnxCircularBufferPushNative = Void Function( | ||
| 154 | + Pointer<SherpaOnnxCircularBuffer>, Pointer<Float>, Int32); | ||
| 155 | + | ||
| 156 | +typedef SherpaOnnxCircularBufferPush = void Function( | ||
| 157 | + Pointer<SherpaOnnxCircularBuffer>, Pointer<Float>, int); | ||
| 158 | + | ||
| 159 | +typedef SherpaOnnxCircularBufferGetNative = Pointer<Float> Function( | ||
| 160 | + Pointer<SherpaOnnxCircularBuffer>, Int32, Int32); | ||
| 161 | + | ||
| 162 | +typedef SherpaOnnxCircularBufferGet = Pointer<Float> Function( | ||
| 163 | + Pointer<SherpaOnnxCircularBuffer>, int, int); | ||
| 164 | + | ||
| 165 | +typedef SherpaOnnxCircularBufferFreeNative = Void Function(Pointer<Float>); | ||
| 166 | + | ||
| 167 | +typedef SherpaOnnxCircularBufferFree = void Function(Pointer<Float>); | ||
| 168 | + | ||
| 169 | +typedef SherpaOnnxCircularBufferPopNative = Void Function( | ||
| 170 | + Pointer<SherpaOnnxCircularBuffer>, Int32); | ||
| 171 | + | ||
| 172 | +typedef SherpaOnnxCircularBufferPop = void Function( | ||
| 173 | + Pointer<SherpaOnnxCircularBuffer>, int); | ||
| 174 | + | ||
| 175 | +typedef SherpaOnnxCircularBufferSizeNative = Int32 Function( | ||
| 176 | + Pointer<SherpaOnnxCircularBuffer>); | ||
| 177 | + | ||
| 178 | +typedef SherpaOnnxCircularBufferSize = int Function( | ||
| 179 | + Pointer<SherpaOnnxCircularBuffer>); | ||
| 180 | + | ||
| 181 | +typedef SherpaOnnxCircularBufferHeadNative = Int32 Function( | ||
| 182 | + Pointer<SherpaOnnxCircularBuffer>); | ||
| 183 | + | ||
| 184 | +typedef SherpaOnnxCircularBufferHead = int Function( | ||
| 185 | + Pointer<SherpaOnnxCircularBuffer>); | ||
| 186 | + | ||
| 187 | +typedef SherpaOnnxCircularBufferResetNative = Void Function( | ||
| 188 | + Pointer<SherpaOnnxCircularBuffer>); | ||
| 189 | + | ||
| 190 | +typedef SherpaOnnxCircularBufferReset = void Function( | ||
| 191 | + Pointer<SherpaOnnxCircularBuffer>); | ||
| 192 | + | ||
| 33 | typedef SherpaOnnxCreateSpeakerEmbeddingManagerNative | 193 | typedef SherpaOnnxCreateSpeakerEmbeddingManagerNative |
| 34 | - = Pointer<SherpaOnnxSpeakerEmbeddingManager> Function(Int32 dim); | 194 | + = Pointer<SherpaOnnxSpeakerEmbeddingManager> Function(Int32); |
| 35 | 195 | ||
| 36 | typedef SherpaOnnxCreateSpeakerEmbeddingManager | 196 | typedef SherpaOnnxCreateSpeakerEmbeddingManager |
| 37 | - = Pointer<SherpaOnnxSpeakerEmbeddingManager> Function(int dim); | 197 | + = Pointer<SherpaOnnxSpeakerEmbeddingManager> Function(int); |
| 38 | 198 | ||
| 39 | typedef SherpaOnnxDestroySpeakerEmbeddingManagerNative = Void Function( | 199 | typedef SherpaOnnxDestroySpeakerEmbeddingManagerNative = Void Function( |
| 40 | Pointer<SherpaOnnxSpeakerEmbeddingManager>); | 200 | Pointer<SherpaOnnxSpeakerEmbeddingManager>); |
| @@ -190,6 +350,45 @@ typedef SherpaOnnxFreeWaveNative = Void Function(Pointer<SherpaOnnxWave>); | @@ -190,6 +350,45 @@ typedef SherpaOnnxFreeWaveNative = Void Function(Pointer<SherpaOnnxWave>); | ||
| 190 | typedef SherpaOnnxFreeWave = void Function(Pointer<SherpaOnnxWave>); | 350 | typedef SherpaOnnxFreeWave = void Function(Pointer<SherpaOnnxWave>); |
| 191 | 351 | ||
| 192 | class SherpaOnnxBindings { | 352 | class SherpaOnnxBindings { |
| 353 | + static SherpaOnnxCreateVoiceActivityDetector? createVoiceActivityDetector; | ||
| 354 | + | ||
| 355 | + static SherpaOnnxDestroyVoiceActivityDetector? destroyVoiceActivityDetector; | ||
| 356 | + | ||
| 357 | + static SherpaOnnxVoiceActivityDetectorAcceptWaveform? | ||
| 358 | + voiceActivityDetectorAcceptWaveform; | ||
| 359 | + | ||
| 360 | + static SherpaOnnxVoiceActivityDetectorEmpty? voiceActivityDetectorEmpty; | ||
| 361 | + | ||
| 362 | + static SherpaOnnxVoiceActivityDetectorDetected? voiceActivityDetectorDetected; | ||
| 363 | + | ||
| 364 | + static SherpaOnnxVoiceActivityDetectorPop? voiceActivityDetectorPop; | ||
| 365 | + | ||
| 366 | + static SherpaOnnxVoiceActivityDetectorClear? voiceActivityDetectorClear; | ||
| 367 | + | ||
| 368 | + static SherpaOnnxVoiceActivityDetectorFront? voiceActivityDetectorFront; | ||
| 369 | + | ||
| 370 | + static SherpaOnnxDestroySpeechSegment? destroySpeechSegment; | ||
| 371 | + | ||
| 372 | + static SherpaOnnxVoiceActivityDetectorReset? voiceActivityDetectorReset; | ||
| 373 | + | ||
| 374 | + static SherpaOnnxCreateCircularBuffer? createCircularBuffer; | ||
| 375 | + | ||
| 376 | + static SherpaOnnxDestroyCircularBuffer? destroyCircularBuffer; | ||
| 377 | + | ||
| 378 | + static SherpaOnnxCircularBufferPush? circularBufferPush; | ||
| 379 | + | ||
| 380 | + static SherpaOnnxCircularBufferGet? circularBufferGet; | ||
| 381 | + | ||
| 382 | + static SherpaOnnxCircularBufferFree? circularBufferFree; | ||
| 383 | + | ||
| 384 | + static SherpaOnnxCircularBufferPop? circularBufferPop; | ||
| 385 | + | ||
| 386 | + static SherpaOnnxCircularBufferSize? circularBufferSize; | ||
| 387 | + | ||
| 388 | + static SherpaOnnxCircularBufferHead? circularBufferHead; | ||
| 389 | + | ||
| 390 | + static SherpaOnnxCircularBufferReset? circularBufferReset; | ||
| 391 | + | ||
| 193 | static SherpaOnnxCreateSpeakerEmbeddingExtractor? | 392 | static SherpaOnnxCreateSpeakerEmbeddingExtractor? |
| 194 | createSpeakerEmbeddingExtractor; | 393 | createSpeakerEmbeddingExtractor; |
| 195 | 394 | ||
| @@ -252,8 +451,107 @@ class SherpaOnnxBindings { | @@ -252,8 +451,107 @@ class SherpaOnnxBindings { | ||
| 252 | static SherpaOnnxFreeWave? freeWave; | 451 | static SherpaOnnxFreeWave? freeWave; |
| 253 | 452 | ||
| 254 | static void init(DynamicLibrary dynamicLibrary) { | 453 | static void init(DynamicLibrary dynamicLibrary) { |
| 454 | + createVoiceActivityDetector ??= dynamicLibrary | ||
| 455 | + .lookup<NativeFunction<SherpaOnnxCreateVoiceActivityDetectorNative>>( | ||
| 456 | + 'SherpaOnnxCreateVoiceActivityDetector') | ||
| 457 | + .asFunction(); | ||
| 458 | + | ||
| 459 | + destroyVoiceActivityDetector ??= dynamicLibrary | ||
| 460 | + .lookup<NativeFunction<SherpaOnnxDestroyVoiceActivityDetectorNative>>( | ||
| 461 | + 'SherpaOnnxDestroyVoiceActivityDetector') | ||
| 462 | + .asFunction(); | ||
| 463 | + | ||
| 464 | + voiceActivityDetectorAcceptWaveform ??= dynamicLibrary | ||
| 465 | + .lookup< | ||
| 466 | + NativeFunction< | ||
| 467 | + SherpaOnnxVoiceActivityDetectorAcceptWaveformNative>>( | ||
| 468 | + 'SherpaOnnxVoiceActivityDetectorAcceptWaveform') | ||
| 469 | + .asFunction(); | ||
| 470 | + | ||
| 471 | + voiceActivityDetectorEmpty ??= dynamicLibrary | ||
| 472 | + .lookup<NativeFunction<SherpaOnnxVoiceActivityDetectorEmptyNative>>( | ||
| 473 | + 'SherpaOnnxVoiceActivityDetectorEmpty') | ||
| 474 | + .asFunction(); | ||
| 475 | + | ||
| 476 | + voiceActivityDetectorDetected ??= dynamicLibrary | ||
| 477 | + .lookup<NativeFunction<SherpaOnnxVoiceActivityDetectorDetectedNative>>( | ||
| 478 | + 'SherpaOnnxVoiceActivityDetectorDetected') | ||
| 479 | + .asFunction(); | ||
| 480 | + | ||
| 481 | + voiceActivityDetectorPop ??= dynamicLibrary | ||
| 482 | + .lookup<NativeFunction<SherpaOnnxVoiceActivityDetectorPopNative>>( | ||
| 483 | + 'SherpaOnnxVoiceActivityDetectorPop') | ||
| 484 | + .asFunction(); | ||
| 485 | + | ||
| 486 | + voiceActivityDetectorClear ??= dynamicLibrary | ||
| 487 | + .lookup<NativeFunction<SherpaOnnxVoiceActivityDetectorClearNative>>( | ||
| 488 | + 'SherpaOnnxVoiceActivityDetectorClear') | ||
| 489 | + .asFunction(); | ||
| 490 | + | ||
| 491 | + voiceActivityDetectorFront ??= dynamicLibrary | ||
| 492 | + .lookup<NativeFunction<SherpaOnnxVoiceActivityDetectorFrontNative>>( | ||
| 493 | + 'SherpaOnnxVoiceActivityDetectorFront') | ||
| 494 | + .asFunction(); | ||
| 495 | + | ||
| 496 | + destroySpeechSegment ??= dynamicLibrary | ||
| 497 | + .lookup<NativeFunction<SherpaOnnxDestroySpeechSegmentNative>>( | ||
| 498 | + 'SherpaOnnxDestroySpeechSegment') | ||
| 499 | + .asFunction(); | ||
| 500 | + | ||
| 501 | + voiceActivityDetectorReset ??= dynamicLibrary | ||
| 502 | + .lookup<NativeFunction<SherpaOnnxVoiceActivityDetectorResetNative>>( | ||
| 503 | + 'SherpaOnnxVoiceActivityDetectorReset') | ||
| 504 | + .asFunction(); | ||
| 505 | + | ||
| 506 | + createCircularBuffer ??= dynamicLibrary | ||
| 507 | + .lookup<NativeFunction<SherpaOnnxCreateCircularBufferNative>>( | ||
| 508 | + 'SherpaOnnxCreateCircularBuffer') | ||
| 509 | + .asFunction(); | ||
| 510 | + | ||
| 511 | + destroyCircularBuffer ??= dynamicLibrary | ||
| 512 | + .lookup<NativeFunction<SherpaOnnxDestroyCircularBufferNative>>( | ||
| 513 | + 'SherpaOnnxDestroyCircularBuffer') | ||
| 514 | + .asFunction(); | ||
| 515 | + | ||
| 516 | + circularBufferPush ??= dynamicLibrary | ||
| 517 | + .lookup<NativeFunction<SherpaOnnxCircularBufferPushNative>>( | ||
| 518 | + 'SherpaOnnxCircularBufferPush') | ||
| 519 | + .asFunction(); | ||
| 520 | + | ||
| 521 | + circularBufferGet ??= dynamicLibrary | ||
| 522 | + .lookup<NativeFunction<SherpaOnnxCircularBufferGetNative>>( | ||
| 523 | + 'SherpaOnnxCircularBufferGet') | ||
| 524 | + .asFunction(); | ||
| 525 | + | ||
| 526 | + circularBufferFree ??= dynamicLibrary | ||
| 527 | + .lookup<NativeFunction<SherpaOnnxCircularBufferFreeNative>>( | ||
| 528 | + 'SherpaOnnxCircularBufferFree') | ||
| 529 | + .asFunction(); | ||
| 530 | + | ||
| 531 | + circularBufferPop ??= dynamicLibrary | ||
| 532 | + .lookup<NativeFunction<SherpaOnnxCircularBufferPopNative>>( | ||
| 533 | + 'SherpaOnnxCircularBufferPop') | ||
| 534 | + .asFunction(); | ||
| 535 | + | ||
| 536 | + circularBufferSize ??= dynamicLibrary | ||
| 537 | + .lookup<NativeFunction<SherpaOnnxCircularBufferSizeNative>>( | ||
| 538 | + 'SherpaOnnxCircularBufferSize') | ||
| 539 | + .asFunction(); | ||
| 540 | + | ||
| 541 | + circularBufferHead ??= dynamicLibrary | ||
| 542 | + .lookup<NativeFunction<SherpaOnnxCircularBufferHeadNative>>( | ||
| 543 | + 'SherpaOnnxCircularBufferHead') | ||
| 544 | + .asFunction(); | ||
| 545 | + | ||
| 546 | + circularBufferReset ??= dynamicLibrary | ||
| 547 | + .lookup<NativeFunction<SherpaOnnxCircularBufferResetNative>>( | ||
| 548 | + 'SherpaOnnxCircularBufferReset') | ||
| 549 | + .asFunction(); | ||
| 550 | + | ||
| 255 | createSpeakerEmbeddingExtractor ??= dynamicLibrary | 551 | createSpeakerEmbeddingExtractor ??= dynamicLibrary |
| 256 | - .lookup<NativeFunction<SherpaOnnxCreateSpeakerEmbeddingExtractor>>( | 552 | + .lookup< |
| 553 | + NativeFunction< | ||
| 554 | + SherpaOnnxCreateSpeakerEmbeddingExtractorNative>>( | ||
| 257 | 'SherpaOnnxCreateSpeakerEmbeddingExtractor') | 555 | 'SherpaOnnxCreateSpeakerEmbeddingExtractor') |
| 258 | .asFunction(); | 556 | .asFunction(); |
| 259 | 557 |
| @@ -2,19 +2,20 @@ | @@ -2,19 +2,20 @@ | ||
| 2 | import 'dart:ffi'; | 2 | import 'dart:ffi'; |
| 3 | import 'dart:typed_data'; | 3 | import 'dart:typed_data'; |
| 4 | import 'package:ffi/ffi.dart'; | 4 | import 'package:ffi/ffi.dart'; |
| 5 | -import "./sherpa_onnx_bindings.dart"; | ||
| 6 | -import "./online_stream.dart"; | 5 | + |
| 6 | +import './online_stream.dart'; | ||
| 7 | +import './sherpa_onnx_bindings.dart'; | ||
| 7 | 8 | ||
| 8 | class SpeakerEmbeddingExtractorConfig { | 9 | class SpeakerEmbeddingExtractorConfig { |
| 9 | const SpeakerEmbeddingExtractorConfig( | 10 | const SpeakerEmbeddingExtractorConfig( |
| 10 | {required this.model, | 11 | {required this.model, |
| 11 | this.numThreads = 1, | 12 | this.numThreads = 1, |
| 12 | this.debug = true, | 13 | this.debug = true, |
| 13 | - this.provider = "cpu"}); | 14 | + this.provider = 'cpu'}); |
| 14 | 15 | ||
| 15 | @override | 16 | @override |
| 16 | String toString() { | 17 | String toString() { |
| 17 | - return "SpeakerEmbeddingExtractorConfig(model: $model, numThreads: $numThreads, debug: $debug, provider: $provider)"; | 18 | + return 'SpeakerEmbeddingExtractorConfig(model: $model, numThreads: $numThreads, debug: $debug, provider: $provider)'; |
| 18 | } | 19 | } |
| 19 | 20 | ||
| 20 | final String model; | 21 | final String model; |
| @@ -116,7 +117,7 @@ class SpeakerEmbeddingManager { | @@ -116,7 +117,7 @@ class SpeakerEmbeddingManager { | ||
| 116 | 117 | ||
| 117 | /// Return true if added successfully; return false otherwise | 118 | /// Return true if added successfully; return false otherwise |
| 118 | bool add({required String name, required Float32List embedding}) { | 119 | bool add({required String name, required Float32List embedding}) { |
| 119 | - assert(embedding.length == this.dim, "${embedding.length} vs ${this.dim}"); | 120 | + assert(embedding.length == this.dim, '${embedding.length} vs ${this.dim}'); |
| 120 | 121 | ||
| 121 | final Pointer<Utf8> namePtr = name.toNativeUtf8(); | 122 | final Pointer<Utf8> namePtr = name.toNativeUtf8(); |
| 122 | final int n = embedding.length; | 123 | final int n = embedding.length; |
| @@ -145,7 +146,7 @@ class SpeakerEmbeddingManager { | @@ -145,7 +146,7 @@ class SpeakerEmbeddingManager { | ||
| 145 | 146 | ||
| 146 | int offset = 0; | 147 | int offset = 0; |
| 147 | for (final e in embeddingList) { | 148 | for (final e in embeddingList) { |
| 148 | - assert(e.length == this.dim, "${e.length} vs ${this.dim}"); | 149 | + assert(e.length == this.dim, '${e.length} vs ${this.dim}'); |
| 149 | 150 | ||
| 150 | pList.setAll(offset, e); | 151 | pList.setAll(offset, e); |
| 151 | offset += this.dim; | 152 | offset += this.dim; |
sherpa-onnx/flutter/lib/src/vad.dart
0 → 100644
| 1 | +// Copyright (c) 2024 Xiaomi Corporation | ||
| 2 | +import 'dart:ffi'; | ||
| 3 | +import 'dart:typed_data'; | ||
| 4 | +import 'package:ffi/ffi.dart'; | ||
| 5 | + | ||
| 6 | +import './sherpa_onnx_bindings.dart'; | ||
| 7 | + | ||
| 8 | +class SileroVadModelConfig { | ||
| 9 | + const SileroVadModelConfig( | ||
| 10 | + {this.model = '', | ||
| 11 | + this.threshold = 0.5, | ||
| 12 | + this.minSilenceDuration = 0.5, | ||
| 13 | + this.minSpeechDuration = 0.25, | ||
| 14 | + this.windowSize = 512}); | ||
| 15 | + | ||
| 16 | + @override | ||
| 17 | + String toString() { | ||
| 18 | + return 'SileroVadModelConfig(model: $model, threshold: $threshold, minSilenceDuration: $minSilenceDuration, minSpeechDuration: $minSpeechDuration, windowSize: $windowSize)'; | ||
| 19 | + } | ||
| 20 | + | ||
| 21 | + final String model; | ||
| 22 | + final double threshold; | ||
| 23 | + final double minSilenceDuration; | ||
| 24 | + final double minSpeechDuration; | ||
| 25 | + final int windowSize; | ||
| 26 | +} | ||
| 27 | + | ||
| 28 | +class VadModelConfig { | ||
| 29 | + VadModelConfig( | ||
| 30 | + {this.sileroVad = const SileroVadModelConfig(), | ||
| 31 | + this.sampleRate = 16000, | ||
| 32 | + this.numThreads = 1, | ||
| 33 | + this.provider = 'cpu', | ||
| 34 | + this.debug = true}); | ||
| 35 | + | ||
| 36 | + @override | ||
| 37 | + String toString() { | ||
| 38 | + return 'VadModelConfig(sileroVad: $sileroVad, sampleRate: $sampleRate, numThreads: $numThreads, provider: $provider, debug: $debug)'; | ||
| 39 | + } | ||
| 40 | + | ||
| 41 | + final SileroVadModelConfig sileroVad; | ||
| 42 | + final int sampleRate; | ||
| 43 | + final int numThreads; | ||
| 44 | + final String provider; | ||
| 45 | + final bool debug; | ||
| 46 | +} | ||
| 47 | + | ||
| 48 | +class SpeechSegment { | ||
| 49 | + SpeechSegment({required this.samples, required this.start}); | ||
| 50 | + final Float32List samples; | ||
| 51 | + final int start; | ||
| 52 | +} | ||
| 53 | + | ||
| 54 | +class CircularBuffer { | ||
| 55 | + CircularBuffer._({required this.ptr}); | ||
| 56 | + | ||
| 57 | + /// The user has to invoke CircularBuffer.free() on the returned instance | ||
| 58 | + /// to avoid memory leak. | ||
| 59 | + factory CircularBuffer({required int capacity}) { | ||
| 60 | + assert(capacity > 0, 'capacity is $capacity'); | ||
| 61 | + final p = | ||
| 62 | + SherpaOnnxBindings.createCircularBuffer?.call(capacity) ?? nullptr; | ||
| 63 | + | ||
| 64 | + return CircularBuffer._(ptr: p); | ||
| 65 | + } | ||
| 66 | + | ||
| 67 | + void free() { | ||
| 68 | + SherpaOnnxBindings.destroyCircularBuffer?.call(ptr); | ||
| 69 | + ptr = nullptr; | ||
| 70 | + } | ||
| 71 | + | ||
| 72 | + void push(Float32List data) { | ||
| 73 | + final n = data.length; | ||
| 74 | + final Pointer<Float> p = calloc<Float>(n); | ||
| 75 | + | ||
| 76 | + final pList = p.asTypedList(n); | ||
| 77 | + pList.setAll(0, data); | ||
| 78 | + | ||
| 79 | + SherpaOnnxBindings.circularBufferPush?.call(this.ptr, p, n); | ||
| 80 | + | ||
| 81 | + calloc.free(p); | ||
| 82 | + } | ||
| 83 | + | ||
| 84 | + Float32List get({required int startIndex, required int n}) { | ||
| 85 | + final Pointer<Float> p = | ||
| 86 | + SherpaOnnxBindings.circularBufferGet?.call(this.ptr, startIndex, n) ?? | ||
| 87 | + nullptr; | ||
| 88 | + | ||
| 89 | + if (p == nullptr) { | ||
| 90 | + return Float32List(0); | ||
| 91 | + } | ||
| 92 | + | ||
| 93 | + final pList = p.asTypedList(n); | ||
| 94 | + final Float32List ans = Float32List.fromList(pList); | ||
| 95 | + | ||
| 96 | + SherpaOnnxBindings.circularBufferFree?.call(p); | ||
| 97 | + | ||
| 98 | + return ans; | ||
| 99 | + } | ||
| 100 | + | ||
| 101 | + void pop(int n) { | ||
| 102 | + SherpaOnnxBindings.circularBufferPop?.call(this.ptr, n); | ||
| 103 | + } | ||
| 104 | + | ||
| 105 | + void reset() { | ||
| 106 | + SherpaOnnxBindings.circularBufferReset?.call(this.ptr); | ||
| 107 | + } | ||
| 108 | + | ||
| 109 | + int get size => SherpaOnnxBindings.circularBufferSize?.call(this.ptr) ?? 0; | ||
| 110 | + int get head => SherpaOnnxBindings.circularBufferHead?.call(this.ptr) ?? 0; | ||
| 111 | + | ||
| 112 | + Pointer<SherpaOnnxCircularBuffer> ptr; | ||
| 113 | +} | ||
| 114 | + | ||
| 115 | +class VoiceActivityDetector { | ||
| 116 | + VoiceActivityDetector._({required this.ptr}); | ||
| 117 | + | ||
| 118 | + // The user has to invoke VoiceActivityDetector.free() to avoid memory leak. | ||
| 119 | + factory VoiceActivityDetector( | ||
| 120 | + {required VadModelConfig config, required double bufferSizeInSeconds}) { | ||
| 121 | + final c = calloc<SherpaOnnxVadModelConfig>(); | ||
| 122 | + | ||
| 123 | + final modelPtr = config.sileroVad.model.toNativeUtf8(); | ||
| 124 | + c.ref.sileroVad.model = modelPtr; | ||
| 125 | + | ||
| 126 | + c.ref.sileroVad.threshold = config.sileroVad.threshold; | ||
| 127 | + c.ref.sileroVad.minSilenceDuration = config.sileroVad.minSilenceDuration; | ||
| 128 | + c.ref.sileroVad.minSpeechDuration = config.sileroVad.minSpeechDuration; | ||
| 129 | + c.ref.sileroVad.windowSize = config.sileroVad.windowSize; | ||
| 130 | + | ||
| 131 | + c.ref.sampleRate = config.sampleRate; | ||
| 132 | + c.ref.numThreads = config.numThreads; | ||
| 133 | + | ||
| 134 | + final providerPtr = config.provider.toNativeUtf8(); | ||
| 135 | + c.ref.provider = providerPtr; | ||
| 136 | + | ||
| 137 | + c.ref.debug = config.debug ? 1 : 0; | ||
| 138 | + | ||
| 139 | + final ptr = SherpaOnnxBindings.createVoiceActivityDetector | ||
| 140 | + ?.call(c, bufferSizeInSeconds) ?? | ||
| 141 | + nullptr; | ||
| 142 | + | ||
| 143 | + calloc.free(providerPtr); | ||
| 144 | + calloc.free(modelPtr); | ||
| 145 | + calloc.free(c); | ||
| 146 | + | ||
| 147 | + return VoiceActivityDetector._(ptr: ptr); | ||
| 148 | + } | ||
| 149 | + | ||
| 150 | + void free() { | ||
| 151 | + SherpaOnnxBindings.destroyVoiceActivityDetector?.call(ptr); | ||
| 152 | + ptr = nullptr; | ||
| 153 | + } | ||
| 154 | + | ||
| 155 | + void acceptWaveform(Float32List samples) { | ||
| 156 | + final n = samples.length; | ||
| 157 | + final Pointer<Float> p = calloc<Float>(n); | ||
| 158 | + | ||
| 159 | + final pList = p.asTypedList(n); | ||
| 160 | + pList.setAll(0, samples); | ||
| 161 | + | ||
| 162 | + SherpaOnnxBindings.voiceActivityDetectorAcceptWaveform | ||
| 163 | + ?.call(this.ptr, p, n); | ||
| 164 | + | ||
| 165 | + calloc.free(p); | ||
| 166 | + } | ||
| 167 | + | ||
| 168 | + bool isEmpty() { | ||
| 169 | + final int empty = | ||
| 170 | + SherpaOnnxBindings.voiceActivityDetectorEmpty?.call(this.ptr) ?? 0; | ||
| 171 | + | ||
| 172 | + return empty == 1; | ||
| 173 | + } | ||
| 174 | + | ||
| 175 | + bool isDetected() { | ||
| 176 | + final int detected = | ||
| 177 | + SherpaOnnxBindings.voiceActivityDetectorDetected?.call(this.ptr) ?? 0; | ||
| 178 | + | ||
| 179 | + return detected == 1; | ||
| 180 | + } | ||
| 181 | + | ||
| 182 | + void pop() { | ||
| 183 | + SherpaOnnxBindings.voiceActivityDetectorPop?.call(this.ptr); | ||
| 184 | + } | ||
| 185 | + | ||
| 186 | + void clear() { | ||
| 187 | + SherpaOnnxBindings.voiceActivityDetectorClear?.call(this.ptr); | ||
| 188 | + } | ||
| 189 | + | ||
| 190 | + SpeechSegment front() { | ||
| 191 | + final Pointer<SherpaOnnxSpeechSegment> segment = | ||
| 192 | + SherpaOnnxBindings.voiceActivityDetectorFront?.call(this.ptr) ?? | ||
| 193 | + nullptr; | ||
| 194 | + if (segment == nullptr) { | ||
| 195 | + return SpeechSegment(samples: Float32List(0), start: 0); | ||
| 196 | + } | ||
| 197 | + | ||
| 198 | + final sampleList = segment.ref.samples.asTypedList(segment.ref.n); | ||
| 199 | + final start = segment.ref.start; | ||
| 200 | + | ||
| 201 | + final samples = Float32List.fromList(sampleList); | ||
| 202 | + | ||
| 203 | + SherpaOnnxBindings.destroySpeechSegment?.call(segment); | ||
| 204 | + | ||
| 205 | + return SpeechSegment(samples: samples, start: start); | ||
| 206 | + } | ||
| 207 | + | ||
| 208 | + void reset() { | ||
| 209 | + SherpaOnnxBindings.voiceActivityDetectorReset?.call(this.ptr); | ||
| 210 | + } | ||
| 211 | + | ||
| 212 | + Pointer<SherpaOnnxVoiceActivityDetector> ptr; | ||
| 213 | +} |
| @@ -2,7 +2,8 @@ | @@ -2,7 +2,8 @@ | ||
| 2 | import 'dart:ffi'; | 2 | import 'dart:ffi'; |
| 3 | import 'dart:typed_data'; | 3 | import 'dart:typed_data'; |
| 4 | import 'package:ffi/ffi.dart'; | 4 | import 'package:ffi/ffi.dart'; |
| 5 | -import "./sherpa_onnx_bindings.dart"; | 5 | + |
| 6 | +import './sherpa_onnx_bindings.dart'; | ||
| 6 | 7 | ||
| 7 | class WaveData { | 8 | class WaveData { |
| 8 | WaveData({required this.samples, required this.sampleRate}); | 9 | WaveData({required this.samples, required this.sampleRate}); |
-
请 注册 或 登录 后发表评论