Fangjun Kuang
Committed by GitHub

Add Dart API for VAD (#904)

@@ -123,6 +123,7 @@ jobs: @@ -123,6 +123,7 @@ jobs:
123 pushd example/assets 123 pushd example/assets
124 124
125 curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-recongition-models/3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx 125 curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-recongition-models/3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx
  126 + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx
126 git clone https://github.com/csukuangfj/sr-data 127 git clone https://github.com/csukuangfj/sr-data
127 128
128 rm -rf sr-data/.git 129 rm -rf sr-data/.git
@@ -143,3 +143,5 @@ xcuserdata/ @@ -143,3 +143,5 @@ xcuserdata/
143 ## Xcode 8 and earlier 143 ## Xcode 8 and earlier
144 *.xcscmblueprint 144 *.xcscmblueprint
145 *.xccheckout 145 *.xccheckout
  146 +
  147 +flutter_jank_metrics*.json
@@ -6,6 +6,7 @@ @@ -6,6 +6,7 @@
6 # switch to this directory and run 6 # switch to this directory and run
7 7
8 wget https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-recongition-models/3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx 8 wget https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-recongition-models/3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx
  9 +wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx
9 git clone https://github.com/csukuangfj/sr-data 10 git clone https://github.com/csukuangfj/sr-data
10 11
11 rm -rf sr-data/.git 12 rm -rf sr-data/.git
1 // Copyright (c) 2024 Xiaomi Corporation 1 // Copyright (c) 2024 Xiaomi Corporation
2 import 'package:sherpa_onnx/sherpa_onnx.dart' as sherpa_onnx; 2 import 'package:sherpa_onnx/sherpa_onnx.dart' as sherpa_onnx;
3 import 'package:flutter/material.dart'; 3 import 'package:flutter/material.dart';
  4 +
4 import "./speaker_identification_test.dart"; 5 import "./speaker_identification_test.dart";
  6 +import "./vad_test.dart";
5 7
6 void main() { 8 void main() {
7 runApp(const MyApp()); 9 runApp(const MyApp());
@@ -51,6 +53,7 @@ class _MyHomePageState extends State<MyHomePage> { @@ -51,6 +53,7 @@ class _MyHomePageState extends State<MyHomePage> {
51 if (_counter <= 10) { 53 if (_counter <= 10) {
52 sherpa_onnx.initBindings(); 54 sherpa_onnx.initBindings();
53 await testSpeakerID(); 55 await testSpeakerID();
  56 + // await testVad();
54 } 57 }
55 58
56 setState(() { 59 setState(() {
  1 +// Copyright (c) 2024 Xiaomi Corporation
  2 +import 'dart:typed_data';
  3 +import 'package:sherpa_onnx/sherpa_onnx.dart' as sherpa_onnx;
  4 +import './utils.dart';
  5 +
  6 +Future<void> testVad() async {
  7 + final src = 'assets/silero_vad.onnx';
  8 + final modelPath = await copyAssetFile(src: src, dst: 'silero_vad.onnx');
  9 +
  10 + final sileroVadConfig = sherpa_onnx.SileroVadModelConfig(model: modelPath);
  11 + final config = sherpa_onnx.VadModelConfig(
  12 + sileroVad: sileroVadConfig,
  13 + numThreads: 1,
  14 + debug: true,
  15 + );
  16 +
  17 + final vad = sherpa_onnx.VoiceActivityDetector(
  18 + config: config, bufferSizeInSeconds: 10);
  19 + print('before vad.free(): ${vad.ptr}');
  20 + vad.free();
  21 + print('after vad.free(): ${vad.ptr}');
  22 +
  23 + final buffer = sherpa_onnx.CircularBuffer(capacity: 16000 * 2);
  24 +
  25 + final d = Float32List.fromList([0, 10, 20, 30]);
  26 + buffer.push(d);
  27 + assert(d.length == buffer.size, '${d.length} vs ${buffer.size}');
  28 +
  29 + final f = Float32List.fromList([-5, 100.25, 599]);
  30 + buffer.push(f);
  31 +
  32 + assert(buffer.size == d.length + f.length);
  33 + final g = buffer.get(startIndex: 0, n: 5);
  34 +
  35 + assert(g.length == 5);
  36 + assert(g[0] == 0);
  37 + assert(g[1] == 10);
  38 + assert(g[2] == 20);
  39 + assert(g[3] == 30);
  40 + assert(g[4] == -5);
  41 +
  42 + assert(buffer.size == d.length + f.length);
  43 +
  44 + buffer.pop(3);
  45 + assert(buffer.size == d.length + f.length - 3);
  46 +
  47 + final h = buffer.get(startIndex: buffer.head, n: 4);
  48 + assert(h.length == 4);
  49 + assert(h[0] == 30);
  50 + assert(h[1] == -5);
  51 + assert(h[2] == 100.25);
  52 + assert(h[3] == 599);
  53 +
  54 + buffer.reset();
  55 +
  56 + assert(buffer.size == 0);
  57 + assert(buffer.head == 0);
  58 +
  59 + print('before free: ${buffer.ptr}');
  60 + buffer.free();
  61 + print('after free: ${buffer.ptr}');
  62 +}
@@ -2,10 +2,11 @@ @@ -2,10 +2,11 @@
2 import 'dart:io'; 2 import 'dart:io';
3 import 'dart:ffi'; 3 import 'dart:ffi';
4 4
5 -import 'src/sherpa_onnx_bindings.dart';  
6 -export 'src/speaker_identification.dart';  
7 export 'src/online_stream.dart'; 5 export 'src/online_stream.dart';
  6 +export 'src/speaker_identification.dart';
  7 +export 'src/vad.dart';
8 export 'src/wave_reader.dart'; 8 export 'src/wave_reader.dart';
  9 +import 'src/sherpa_onnx_bindings.dart';
9 10
10 final DynamicLibrary _dylib = () { 11 final DynamicLibrary _dylib = () {
11 if (Platform.isIOS) { 12 if (Platform.isIOS) {
1 // Copyright (c) 2024 Xiaomi Corporation 1 // Copyright (c) 2024 Xiaomi Corporation
2 -import 'dart:typed_data';  
3 import 'dart:ffi'; 2 import 'dart:ffi';
  3 +import 'dart:typed_data';
4 import 'package:ffi/ffi.dart'; 4 import 'package:ffi/ffi.dart';
5 -import "./sherpa_onnx_bindings.dart"; 5 +
  6 +import './sherpa_onnx_bindings.dart';
6 7
7 class OnlineStream { 8 class OnlineStream {
8 /// The user has to call OnlineStream.free() to avoid memory leak. 9 /// The user has to call OnlineStream.free() to avoid memory leak.
@@ -2,6 +2,47 @@ @@ -2,6 +2,47 @@
2 import 'dart:ffi'; 2 import 'dart:ffi';
3 import 'package:ffi/ffi.dart'; 3 import 'package:ffi/ffi.dart';
4 4
  5 +final class SherpaOnnxSileroVadModelConfig extends Struct {
  6 + external Pointer<Utf8> model;
  7 +
  8 + @Float()
  9 + external double threshold;
  10 +
  11 + @Float()
  12 + external double minSilenceDuration;
  13 +
  14 + @Float()
  15 + external double minSpeechDuration;
  16 +
  17 + @Int32()
  18 + external int windowSize;
  19 +}
  20 +
  21 +final class SherpaOnnxVadModelConfig extends Struct {
  22 + external SherpaOnnxSileroVadModelConfig sileroVad;
  23 +
  24 + @Int32()
  25 + external int sampleRate;
  26 +
  27 + @Int32()
  28 + external int numThreads;
  29 +
  30 + external Pointer<Utf8> provider;
  31 +
  32 + @Int32()
  33 + external int debug;
  34 +}
  35 +
  36 +final class SherpaOnnxSpeechSegment extends Struct {
  37 + @Int32()
  38 + external int start;
  39 +
  40 + external Pointer<Float> samples;
  41 +
  42 + @Int32()
  43 + external int n;
  44 +}
  45 +
5 final class SherpaOnnxWave extends Struct { 46 final class SherpaOnnxWave extends Struct {
6 external Pointer<Float> samples; 47 external Pointer<Float> samples;
7 48
@@ -24,17 +65,136 @@ final class SherpaOnnxSpeakerEmbeddingExtractorConfig extends Struct { @@ -24,17 +65,136 @@ final class SherpaOnnxSpeakerEmbeddingExtractorConfig extends Struct {
24 external Pointer<Utf8> provider; 65 external Pointer<Utf8> provider;
25 } 66 }
26 67
  68 +final class SherpaOnnxCircularBuffer extends Opaque {}
  69 +
  70 +final class SherpaOnnxVoiceActivityDetector extends Opaque {}
  71 +
27 final class SherpaOnnxOnlineStream extends Opaque {} 72 final class SherpaOnnxOnlineStream extends Opaque {}
28 73
29 final class SherpaOnnxSpeakerEmbeddingExtractor extends Opaque {} 74 final class SherpaOnnxSpeakerEmbeddingExtractor extends Opaque {}
30 75
31 final class SherpaOnnxSpeakerEmbeddingManager extends Opaque {} 76 final class SherpaOnnxSpeakerEmbeddingManager extends Opaque {}
32 77
  78 +typedef SherpaOnnxCreateVoiceActivityDetectorNative
  79 + = Pointer<SherpaOnnxVoiceActivityDetector> Function(
  80 + Pointer<SherpaOnnxVadModelConfig>, Float);
  81 +
  82 +typedef SherpaOnnxCreateVoiceActivityDetector
  83 + = Pointer<SherpaOnnxVoiceActivityDetector> Function(
  84 + Pointer<SherpaOnnxVadModelConfig>, double);
  85 +
  86 +typedef SherpaOnnxDestroyVoiceActivityDetectorNative = Void Function(
  87 + Pointer<SherpaOnnxVoiceActivityDetector>);
  88 +
  89 +typedef SherpaOnnxDestroyVoiceActivityDetector = void Function(
  90 + Pointer<SherpaOnnxVoiceActivityDetector>);
  91 +
  92 +typedef SherpaOnnxVoiceActivityDetectorAcceptWaveformNative = Void Function(
  93 + Pointer<SherpaOnnxVoiceActivityDetector>, Pointer<Float>, Int32);
  94 +
  95 +typedef SherpaOnnxVoiceActivityDetectorAcceptWaveform = void Function(
  96 + Pointer<SherpaOnnxVoiceActivityDetector>, Pointer<Float>, int);
  97 +
  98 +typedef SherpaOnnxVoiceActivityDetectorEmptyNative = Int32 Function(
  99 + Pointer<SherpaOnnxVoiceActivityDetector>);
  100 +
  101 +typedef SherpaOnnxVoiceActivityDetectorEmpty = int Function(
  102 + Pointer<SherpaOnnxVoiceActivityDetector>);
  103 +
  104 +typedef SherpaOnnxVoiceActivityDetectorDetectedNative = Int32 Function(
  105 + Pointer<SherpaOnnxVoiceActivityDetector>);
  106 +
  107 +typedef SherpaOnnxVoiceActivityDetectorDetected = int Function(
  108 + Pointer<SherpaOnnxVoiceActivityDetector>);
  109 +
  110 +typedef SherpaOnnxVoiceActivityDetectorPopNative = Void Function(
  111 + Pointer<SherpaOnnxVoiceActivityDetector>);
  112 +
  113 +typedef SherpaOnnxVoiceActivityDetectorPop = void Function(
  114 + Pointer<SherpaOnnxVoiceActivityDetector>);
  115 +
  116 +typedef SherpaOnnxVoiceActivityDetectorClearNative = Void Function(
  117 + Pointer<SherpaOnnxVoiceActivityDetector>);
  118 +
  119 +typedef SherpaOnnxVoiceActivityDetectorClear = void Function(
  120 + Pointer<SherpaOnnxVoiceActivityDetector>);
  121 +
  122 +typedef SherpaOnnxVoiceActivityDetectorResetNative = Void Function(
  123 + Pointer<SherpaOnnxVoiceActivityDetector>);
  124 +
  125 +typedef SherpaOnnxVoiceActivityDetectorReset = void Function(
  126 + Pointer<SherpaOnnxVoiceActivityDetector>);
  127 +
  128 +typedef SherpaOnnxVoiceActivityDetectorFrontNative
  129 + = Pointer<SherpaOnnxSpeechSegment> Function(
  130 + Pointer<SherpaOnnxVoiceActivityDetector>);
  131 +
  132 +typedef SherpaOnnxVoiceActivityDetectorFront
  133 + = SherpaOnnxVoiceActivityDetectorFrontNative;
  134 +
  135 +typedef SherpaOnnxDestroySpeechSegmentNative = Void Function(
  136 + Pointer<SherpaOnnxSpeechSegment>);
  137 +
  138 +typedef SherpaOnnxDestroySpeechSegment = void Function(
  139 + Pointer<SherpaOnnxSpeechSegment>);
  140 +
  141 +typedef SherpaOnnxCreateCircularBufferNative = Pointer<SherpaOnnxCircularBuffer>
  142 + Function(Int32);
  143 +
  144 +typedef SherpaOnnxCreateCircularBuffer = Pointer<SherpaOnnxCircularBuffer>
  145 + Function(int);
  146 +
  147 +typedef SherpaOnnxDestroyCircularBufferNative = Void Function(
  148 + Pointer<SherpaOnnxCircularBuffer>);
  149 +
  150 +typedef SherpaOnnxDestroyCircularBuffer = void Function(
  151 + Pointer<SherpaOnnxCircularBuffer>);
  152 +
  153 +typedef SherpaOnnxCircularBufferPushNative = Void Function(
  154 + Pointer<SherpaOnnxCircularBuffer>, Pointer<Float>, Int32);
  155 +
  156 +typedef SherpaOnnxCircularBufferPush = void Function(
  157 + Pointer<SherpaOnnxCircularBuffer>, Pointer<Float>, int);
  158 +
  159 +typedef SherpaOnnxCircularBufferGetNative = Pointer<Float> Function(
  160 + Pointer<SherpaOnnxCircularBuffer>, Int32, Int32);
  161 +
  162 +typedef SherpaOnnxCircularBufferGet = Pointer<Float> Function(
  163 + Pointer<SherpaOnnxCircularBuffer>, int, int);
  164 +
  165 +typedef SherpaOnnxCircularBufferFreeNative = Void Function(Pointer<Float>);
  166 +
  167 +typedef SherpaOnnxCircularBufferFree = void Function(Pointer<Float>);
  168 +
  169 +typedef SherpaOnnxCircularBufferPopNative = Void Function(
  170 + Pointer<SherpaOnnxCircularBuffer>, Int32);
  171 +
  172 +typedef SherpaOnnxCircularBufferPop = void Function(
  173 + Pointer<SherpaOnnxCircularBuffer>, int);
  174 +
  175 +typedef SherpaOnnxCircularBufferSizeNative = Int32 Function(
  176 + Pointer<SherpaOnnxCircularBuffer>);
  177 +
  178 +typedef SherpaOnnxCircularBufferSize = int Function(
  179 + Pointer<SherpaOnnxCircularBuffer>);
  180 +
  181 +typedef SherpaOnnxCircularBufferHeadNative = Int32 Function(
  182 + Pointer<SherpaOnnxCircularBuffer>);
  183 +
  184 +typedef SherpaOnnxCircularBufferHead = int Function(
  185 + Pointer<SherpaOnnxCircularBuffer>);
  186 +
  187 +typedef SherpaOnnxCircularBufferResetNative = Void Function(
  188 + Pointer<SherpaOnnxCircularBuffer>);
  189 +
  190 +typedef SherpaOnnxCircularBufferReset = void Function(
  191 + Pointer<SherpaOnnxCircularBuffer>);
  192 +
33 typedef SherpaOnnxCreateSpeakerEmbeddingManagerNative 193 typedef SherpaOnnxCreateSpeakerEmbeddingManagerNative
34 - = Pointer<SherpaOnnxSpeakerEmbeddingManager> Function(Int32 dim); 194 + = Pointer<SherpaOnnxSpeakerEmbeddingManager> Function(Int32);
35 195
36 typedef SherpaOnnxCreateSpeakerEmbeddingManager 196 typedef SherpaOnnxCreateSpeakerEmbeddingManager
37 - = Pointer<SherpaOnnxSpeakerEmbeddingManager> Function(int dim); 197 + = Pointer<SherpaOnnxSpeakerEmbeddingManager> Function(int);
38 198
39 typedef SherpaOnnxDestroySpeakerEmbeddingManagerNative = Void Function( 199 typedef SherpaOnnxDestroySpeakerEmbeddingManagerNative = Void Function(
40 Pointer<SherpaOnnxSpeakerEmbeddingManager>); 200 Pointer<SherpaOnnxSpeakerEmbeddingManager>);
@@ -190,6 +350,45 @@ typedef SherpaOnnxFreeWaveNative = Void Function(Pointer<SherpaOnnxWave>); @@ -190,6 +350,45 @@ typedef SherpaOnnxFreeWaveNative = Void Function(Pointer<SherpaOnnxWave>);
190 typedef SherpaOnnxFreeWave = void Function(Pointer<SherpaOnnxWave>); 350 typedef SherpaOnnxFreeWave = void Function(Pointer<SherpaOnnxWave>);
191 351
192 class SherpaOnnxBindings { 352 class SherpaOnnxBindings {
  353 + static SherpaOnnxCreateVoiceActivityDetector? createVoiceActivityDetector;
  354 +
  355 + static SherpaOnnxDestroyVoiceActivityDetector? destroyVoiceActivityDetector;
  356 +
  357 + static SherpaOnnxVoiceActivityDetectorAcceptWaveform?
  358 + voiceActivityDetectorAcceptWaveform;
  359 +
  360 + static SherpaOnnxVoiceActivityDetectorEmpty? voiceActivityDetectorEmpty;
  361 +
  362 + static SherpaOnnxVoiceActivityDetectorDetected? voiceActivityDetectorDetected;
  363 +
  364 + static SherpaOnnxVoiceActivityDetectorPop? voiceActivityDetectorPop;
  365 +
  366 + static SherpaOnnxVoiceActivityDetectorClear? voiceActivityDetectorClear;
  367 +
  368 + static SherpaOnnxVoiceActivityDetectorFront? voiceActivityDetectorFront;
  369 +
  370 + static SherpaOnnxDestroySpeechSegment? destroySpeechSegment;
  371 +
  372 + static SherpaOnnxVoiceActivityDetectorReset? voiceActivityDetectorReset;
  373 +
  374 + static SherpaOnnxCreateCircularBuffer? createCircularBuffer;
  375 +
  376 + static SherpaOnnxDestroyCircularBuffer? destroyCircularBuffer;
  377 +
  378 + static SherpaOnnxCircularBufferPush? circularBufferPush;
  379 +
  380 + static SherpaOnnxCircularBufferGet? circularBufferGet;
  381 +
  382 + static SherpaOnnxCircularBufferFree? circularBufferFree;
  383 +
  384 + static SherpaOnnxCircularBufferPop? circularBufferPop;
  385 +
  386 + static SherpaOnnxCircularBufferSize? circularBufferSize;
  387 +
  388 + static SherpaOnnxCircularBufferHead? circularBufferHead;
  389 +
  390 + static SherpaOnnxCircularBufferReset? circularBufferReset;
  391 +
193 static SherpaOnnxCreateSpeakerEmbeddingExtractor? 392 static SherpaOnnxCreateSpeakerEmbeddingExtractor?
194 createSpeakerEmbeddingExtractor; 393 createSpeakerEmbeddingExtractor;
195 394
@@ -252,8 +451,107 @@ class SherpaOnnxBindings { @@ -252,8 +451,107 @@ class SherpaOnnxBindings {
252 static SherpaOnnxFreeWave? freeWave; 451 static SherpaOnnxFreeWave? freeWave;
253 452
254 static void init(DynamicLibrary dynamicLibrary) { 453 static void init(DynamicLibrary dynamicLibrary) {
  454 + createVoiceActivityDetector ??= dynamicLibrary
  455 + .lookup<NativeFunction<SherpaOnnxCreateVoiceActivityDetectorNative>>(
  456 + 'SherpaOnnxCreateVoiceActivityDetector')
  457 + .asFunction();
  458 +
  459 + destroyVoiceActivityDetector ??= dynamicLibrary
  460 + .lookup<NativeFunction<SherpaOnnxDestroyVoiceActivityDetectorNative>>(
  461 + 'SherpaOnnxDestroyVoiceActivityDetector')
  462 + .asFunction();
  463 +
  464 + voiceActivityDetectorAcceptWaveform ??= dynamicLibrary
  465 + .lookup<
  466 + NativeFunction<
  467 + SherpaOnnxVoiceActivityDetectorAcceptWaveformNative>>(
  468 + 'SherpaOnnxVoiceActivityDetectorAcceptWaveform')
  469 + .asFunction();
  470 +
  471 + voiceActivityDetectorEmpty ??= dynamicLibrary
  472 + .lookup<NativeFunction<SherpaOnnxVoiceActivityDetectorEmptyNative>>(
  473 + 'SherpaOnnxVoiceActivityDetectorEmpty')
  474 + .asFunction();
  475 +
  476 + voiceActivityDetectorDetected ??= dynamicLibrary
  477 + .lookup<NativeFunction<SherpaOnnxVoiceActivityDetectorDetectedNative>>(
  478 + 'SherpaOnnxVoiceActivityDetectorDetected')
  479 + .asFunction();
  480 +
  481 + voiceActivityDetectorPop ??= dynamicLibrary
  482 + .lookup<NativeFunction<SherpaOnnxVoiceActivityDetectorPopNative>>(
  483 + 'SherpaOnnxVoiceActivityDetectorPop')
  484 + .asFunction();
  485 +
  486 + voiceActivityDetectorClear ??= dynamicLibrary
  487 + .lookup<NativeFunction<SherpaOnnxVoiceActivityDetectorClearNative>>(
  488 + 'SherpaOnnxVoiceActivityDetectorClear')
  489 + .asFunction();
  490 +
  491 + voiceActivityDetectorFront ??= dynamicLibrary
  492 + .lookup<NativeFunction<SherpaOnnxVoiceActivityDetectorFrontNative>>(
  493 + 'SherpaOnnxVoiceActivityDetectorFront')
  494 + .asFunction();
  495 +
  496 + destroySpeechSegment ??= dynamicLibrary
  497 + .lookup<NativeFunction<SherpaOnnxDestroySpeechSegmentNative>>(
  498 + 'SherpaOnnxDestroySpeechSegment')
  499 + .asFunction();
  500 +
  501 + voiceActivityDetectorReset ??= dynamicLibrary
  502 + .lookup<NativeFunction<SherpaOnnxVoiceActivityDetectorResetNative>>(
  503 + 'SherpaOnnxVoiceActivityDetectorReset')
  504 + .asFunction();
  505 +
  506 + createCircularBuffer ??= dynamicLibrary
  507 + .lookup<NativeFunction<SherpaOnnxCreateCircularBufferNative>>(
  508 + 'SherpaOnnxCreateCircularBuffer')
  509 + .asFunction();
  510 +
  511 + destroyCircularBuffer ??= dynamicLibrary
  512 + .lookup<NativeFunction<SherpaOnnxDestroyCircularBufferNative>>(
  513 + 'SherpaOnnxDestroyCircularBuffer')
  514 + .asFunction();
  515 +
  516 + circularBufferPush ??= dynamicLibrary
  517 + .lookup<NativeFunction<SherpaOnnxCircularBufferPushNative>>(
  518 + 'SherpaOnnxCircularBufferPush')
  519 + .asFunction();
  520 +
  521 + circularBufferGet ??= dynamicLibrary
  522 + .lookup<NativeFunction<SherpaOnnxCircularBufferGetNative>>(
  523 + 'SherpaOnnxCircularBufferGet')
  524 + .asFunction();
  525 +
  526 + circularBufferFree ??= dynamicLibrary
  527 + .lookup<NativeFunction<SherpaOnnxCircularBufferFreeNative>>(
  528 + 'SherpaOnnxCircularBufferFree')
  529 + .asFunction();
  530 +
  531 + circularBufferPop ??= dynamicLibrary
  532 + .lookup<NativeFunction<SherpaOnnxCircularBufferPopNative>>(
  533 + 'SherpaOnnxCircularBufferPop')
  534 + .asFunction();
  535 +
  536 + circularBufferSize ??= dynamicLibrary
  537 + .lookup<NativeFunction<SherpaOnnxCircularBufferSizeNative>>(
  538 + 'SherpaOnnxCircularBufferSize')
  539 + .asFunction();
  540 +
  541 + circularBufferHead ??= dynamicLibrary
  542 + .lookup<NativeFunction<SherpaOnnxCircularBufferHeadNative>>(
  543 + 'SherpaOnnxCircularBufferHead')
  544 + .asFunction();
  545 +
  546 + circularBufferReset ??= dynamicLibrary
  547 + .lookup<NativeFunction<SherpaOnnxCircularBufferResetNative>>(
  548 + 'SherpaOnnxCircularBufferReset')
  549 + .asFunction();
  550 +
255 createSpeakerEmbeddingExtractor ??= dynamicLibrary 551 createSpeakerEmbeddingExtractor ??= dynamicLibrary
256 - .lookup<NativeFunction<SherpaOnnxCreateSpeakerEmbeddingExtractor>>( 552 + .lookup<
  553 + NativeFunction<
  554 + SherpaOnnxCreateSpeakerEmbeddingExtractorNative>>(
257 'SherpaOnnxCreateSpeakerEmbeddingExtractor') 555 'SherpaOnnxCreateSpeakerEmbeddingExtractor')
258 .asFunction(); 556 .asFunction();
259 557
@@ -2,19 +2,20 @@ @@ -2,19 +2,20 @@
2 import 'dart:ffi'; 2 import 'dart:ffi';
3 import 'dart:typed_data'; 3 import 'dart:typed_data';
4 import 'package:ffi/ffi.dart'; 4 import 'package:ffi/ffi.dart';
5 -import "./sherpa_onnx_bindings.dart";  
6 -import "./online_stream.dart"; 5 +
  6 +import './online_stream.dart';
  7 +import './sherpa_onnx_bindings.dart';
7 8
8 class SpeakerEmbeddingExtractorConfig { 9 class SpeakerEmbeddingExtractorConfig {
9 const SpeakerEmbeddingExtractorConfig( 10 const SpeakerEmbeddingExtractorConfig(
10 {required this.model, 11 {required this.model,
11 this.numThreads = 1, 12 this.numThreads = 1,
12 this.debug = true, 13 this.debug = true,
13 - this.provider = "cpu"}); 14 + this.provider = 'cpu'});
14 15
15 @override 16 @override
16 String toString() { 17 String toString() {
17 - return "SpeakerEmbeddingExtractorConfig(model: $model, numThreads: $numThreads, debug: $debug, provider: $provider)"; 18 + return 'SpeakerEmbeddingExtractorConfig(model: $model, numThreads: $numThreads, debug: $debug, provider: $provider)';
18 } 19 }
19 20
20 final String model; 21 final String model;
@@ -116,7 +117,7 @@ class SpeakerEmbeddingManager { @@ -116,7 +117,7 @@ class SpeakerEmbeddingManager {
116 117
117 /// Return true if added successfully; return false otherwise 118 /// Return true if added successfully; return false otherwise
118 bool add({required String name, required Float32List embedding}) { 119 bool add({required String name, required Float32List embedding}) {
119 - assert(embedding.length == this.dim, "${embedding.length} vs ${this.dim}"); 120 + assert(embedding.length == this.dim, '${embedding.length} vs ${this.dim}');
120 121
121 final Pointer<Utf8> namePtr = name.toNativeUtf8(); 122 final Pointer<Utf8> namePtr = name.toNativeUtf8();
122 final int n = embedding.length; 123 final int n = embedding.length;
@@ -145,7 +146,7 @@ class SpeakerEmbeddingManager { @@ -145,7 +146,7 @@ class SpeakerEmbeddingManager {
145 146
146 int offset = 0; 147 int offset = 0;
147 for (final e in embeddingList) { 148 for (final e in embeddingList) {
148 - assert(e.length == this.dim, "${e.length} vs ${this.dim}"); 149 + assert(e.length == this.dim, '${e.length} vs ${this.dim}');
149 150
150 pList.setAll(offset, e); 151 pList.setAll(offset, e);
151 offset += this.dim; 152 offset += this.dim;
  1 +// Copyright (c) 2024 Xiaomi Corporation
  2 +import 'dart:ffi';
  3 +import 'dart:typed_data';
  4 +import 'package:ffi/ffi.dart';
  5 +
  6 +import './sherpa_onnx_bindings.dart';
  7 +
  8 +class SileroVadModelConfig {
  9 + const SileroVadModelConfig(
  10 + {this.model = '',
  11 + this.threshold = 0.5,
  12 + this.minSilenceDuration = 0.5,
  13 + this.minSpeechDuration = 0.25,
  14 + this.windowSize = 512});
  15 +
  16 + @override
  17 + String toString() {
  18 + return 'SileroVadModelConfig(model: $model, threshold: $threshold, minSilenceDuration: $minSilenceDuration, minSpeechDuration: $minSpeechDuration, windowSize: $windowSize)';
  19 + }
  20 +
  21 + final String model;
  22 + final double threshold;
  23 + final double minSilenceDuration;
  24 + final double minSpeechDuration;
  25 + final int windowSize;
  26 +}
  27 +
  28 +class VadModelConfig {
  29 + VadModelConfig(
  30 + {this.sileroVad = const SileroVadModelConfig(),
  31 + this.sampleRate = 16000,
  32 + this.numThreads = 1,
  33 + this.provider = 'cpu',
  34 + this.debug = true});
  35 +
  36 + @override
  37 + String toString() {
  38 + return 'VadModelConfig(sileroVad: $sileroVad, sampleRate: $sampleRate, numThreads: $numThreads, provider: $provider, debug: $debug)';
  39 + }
  40 +
  41 + final SileroVadModelConfig sileroVad;
  42 + final int sampleRate;
  43 + final int numThreads;
  44 + final String provider;
  45 + final bool debug;
  46 +}
  47 +
  48 +class SpeechSegment {
  49 + SpeechSegment({required this.samples, required this.start});
  50 + final Float32List samples;
  51 + final int start;
  52 +}
  53 +
  54 +class CircularBuffer {
  55 + CircularBuffer._({required this.ptr});
  56 +
  57 + /// The user has to invoke CircularBuffer.free() on the returned instance
  58 + /// to avoid memory leak.
  59 + factory CircularBuffer({required int capacity}) {
  60 + assert(capacity > 0, 'capacity is $capacity');
  61 + final p =
  62 + SherpaOnnxBindings.createCircularBuffer?.call(capacity) ?? nullptr;
  63 +
  64 + return CircularBuffer._(ptr: p);
  65 + }
  66 +
  67 + void free() {
  68 + SherpaOnnxBindings.destroyCircularBuffer?.call(ptr);
  69 + ptr = nullptr;
  70 + }
  71 +
  72 + void push(Float32List data) {
  73 + final n = data.length;
  74 + final Pointer<Float> p = calloc<Float>(n);
  75 +
  76 + final pList = p.asTypedList(n);
  77 + pList.setAll(0, data);
  78 +
  79 + SherpaOnnxBindings.circularBufferPush?.call(this.ptr, p, n);
  80 +
  81 + calloc.free(p);
  82 + }
  83 +
  84 + Float32List get({required int startIndex, required int n}) {
  85 + final Pointer<Float> p =
  86 + SherpaOnnxBindings.circularBufferGet?.call(this.ptr, startIndex, n) ??
  87 + nullptr;
  88 +
  89 + if (p == nullptr) {
  90 + return Float32List(0);
  91 + }
  92 +
  93 + final pList = p.asTypedList(n);
  94 + final Float32List ans = Float32List.fromList(pList);
  95 +
  96 + SherpaOnnxBindings.circularBufferFree?.call(p);
  97 +
  98 + return ans;
  99 + }
  100 +
  101 + void pop(int n) {
  102 + SherpaOnnxBindings.circularBufferPop?.call(this.ptr, n);
  103 + }
  104 +
  105 + void reset() {
  106 + SherpaOnnxBindings.circularBufferReset?.call(this.ptr);
  107 + }
  108 +
  109 + int get size => SherpaOnnxBindings.circularBufferSize?.call(this.ptr) ?? 0;
  110 + int get head => SherpaOnnxBindings.circularBufferHead?.call(this.ptr) ?? 0;
  111 +
  112 + Pointer<SherpaOnnxCircularBuffer> ptr;
  113 +}
  114 +
  115 +class VoiceActivityDetector {
  116 + VoiceActivityDetector._({required this.ptr});
  117 +
  118 + // The user has to invoke VoiceActivityDetector.free() to avoid memory leak.
  119 + factory VoiceActivityDetector(
  120 + {required VadModelConfig config, required double bufferSizeInSeconds}) {
  121 + final c = calloc<SherpaOnnxVadModelConfig>();
  122 +
  123 + final modelPtr = config.sileroVad.model.toNativeUtf8();
  124 + c.ref.sileroVad.model = modelPtr;
  125 +
  126 + c.ref.sileroVad.threshold = config.sileroVad.threshold;
  127 + c.ref.sileroVad.minSilenceDuration = config.sileroVad.minSilenceDuration;
  128 + c.ref.sileroVad.minSpeechDuration = config.sileroVad.minSpeechDuration;
  129 + c.ref.sileroVad.windowSize = config.sileroVad.windowSize;
  130 +
  131 + c.ref.sampleRate = config.sampleRate;
  132 + c.ref.numThreads = config.numThreads;
  133 +
  134 + final providerPtr = config.provider.toNativeUtf8();
  135 + c.ref.provider = providerPtr;
  136 +
  137 + c.ref.debug = config.debug ? 1 : 0;
  138 +
  139 + final ptr = SherpaOnnxBindings.createVoiceActivityDetector
  140 + ?.call(c, bufferSizeInSeconds) ??
  141 + nullptr;
  142 +
  143 + calloc.free(providerPtr);
  144 + calloc.free(modelPtr);
  145 + calloc.free(c);
  146 +
  147 + return VoiceActivityDetector._(ptr: ptr);
  148 + }
  149 +
  150 + void free() {
  151 + SherpaOnnxBindings.destroyVoiceActivityDetector?.call(ptr);
  152 + ptr = nullptr;
  153 + }
  154 +
  155 + void acceptWaveform(Float32List samples) {
  156 + final n = samples.length;
  157 + final Pointer<Float> p = calloc<Float>(n);
  158 +
  159 + final pList = p.asTypedList(n);
  160 + pList.setAll(0, samples);
  161 +
  162 + SherpaOnnxBindings.voiceActivityDetectorAcceptWaveform
  163 + ?.call(this.ptr, p, n);
  164 +
  165 + calloc.free(p);
  166 + }
  167 +
  168 + bool isEmpty() {
  169 + final int empty =
  170 + SherpaOnnxBindings.voiceActivityDetectorEmpty?.call(this.ptr) ?? 0;
  171 +
  172 + return empty == 1;
  173 + }
  174 +
  175 + bool isDetected() {
  176 + final int detected =
  177 + SherpaOnnxBindings.voiceActivityDetectorDetected?.call(this.ptr) ?? 0;
  178 +
  179 + return detected == 1;
  180 + }
  181 +
  182 + void pop() {
  183 + SherpaOnnxBindings.voiceActivityDetectorPop?.call(this.ptr);
  184 + }
  185 +
  186 + void clear() {
  187 + SherpaOnnxBindings.voiceActivityDetectorClear?.call(this.ptr);
  188 + }
  189 +
  190 + SpeechSegment front() {
  191 + final Pointer<SherpaOnnxSpeechSegment> segment =
  192 + SherpaOnnxBindings.voiceActivityDetectorFront?.call(this.ptr) ??
  193 + nullptr;
  194 + if (segment == nullptr) {
  195 + return SpeechSegment(samples: Float32List(0), start: 0);
  196 + }
  197 +
  198 + final sampleList = segment.ref.samples.asTypedList(segment.ref.n);
  199 + final start = segment.ref.start;
  200 +
  201 + final samples = Float32List.fromList(sampleList);
  202 +
  203 + SherpaOnnxBindings.destroySpeechSegment?.call(segment);
  204 +
  205 + return SpeechSegment(samples: samples, start: start);
  206 + }
  207 +
  208 + void reset() {
  209 + SherpaOnnxBindings.voiceActivityDetectorReset?.call(this.ptr);
  210 + }
  211 +
  212 + Pointer<SherpaOnnxVoiceActivityDetector> ptr;
  213 +}
@@ -2,7 +2,8 @@ @@ -2,7 +2,8 @@
2 import 'dart:ffi'; 2 import 'dart:ffi';
3 import 'dart:typed_data'; 3 import 'dart:typed_data';
4 import 'package:ffi/ffi.dart'; 4 import 'package:ffi/ffi.dart';
5 -import "./sherpa_onnx_bindings.dart"; 5 +
  6 +import './sherpa_onnx_bindings.dart';
6 7
7 class WaveData { 8 class WaveData {
8 WaveData({required this.samples, required this.sampleRate}); 9 WaveData({required this.samples, required this.sampleRate});