Fangjun Kuang
Committed by GitHub

Add node-addon-api for VAD (#864)

@@ -38,3 +38,12 @@ node ./test_asr_streaming_transducer.js @@ -38,3 +38,12 @@ node ./test_asr_streaming_transducer.js
38 38
39 node ./test_asr_streaming_transducer_microphone.js 39 node ./test_asr_streaming_transducer_microphone.js
40 ``` 40 ```
  41 +
  42 +# VAD
  43 +
  44 +```bash
  45 +wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx
  46 +
  47 +node ./test_vad_microphone.js
  48 +```
  49 +
@@ -36,10 +36,10 @@ console.log('Started') @@ -36,10 +36,10 @@ console.log('Started')
36 let start = performance.now(); 36 let start = performance.now();
37 const stream = recognizer.createStream(); 37 const stream = recognizer.createStream();
38 const wave = sherpa_onnx.readWave(waveFilename); 38 const wave = sherpa_onnx.readWave(waveFilename);
39 -stream.acceptWaveform(wave.samples, wave.sampleRate); 39 +stream.acceptWaveform({sampleRate: wave.sampleRate, samples: wave.samples});
40 40
41 const tailPadding = new Float32Array(wave.sampleRate * 0.4); 41 const tailPadding = new Float32Array(wave.sampleRate * 0.4);
42 -stream.acceptWaveform(tailPadding, wave.sampleRate); 42 +stream.acceptWaveform({samples: tailPadding, sampleRate: wave.sampleRate});
43 43
44 while (recognizer.isReady(stream)) { 44 while (recognizer.isReady(stream)) {
45 recognizer.decode(stream); 45 recognizer.decode(stream);
@@ -60,7 +60,8 @@ const display = new sherpa_onnx.Display(50); @@ -60,7 +60,8 @@ const display = new sherpa_onnx.Display(50);
60 ai.on('data', data => { 60 ai.on('data', data => {
61 const samples = new Float32Array(data.buffer); 61 const samples = new Float32Array(data.buffer);
62 62
63 - stream.acceptWaveform(samples, recognizer.config.featConfig.sampleRate); 63 + stream.acceptWaveform(
  64 + {sampleRate: recognizer.config.featConfig.sampleRate, samples: samples});
64 65
65 while (recognizer.isReady(stream)) { 66 while (recognizer.isReady(stream)) {
66 recognizer.decode(stream); 67 recognizer.decode(stream);
  1 +// Copyright (c) 2023-2024 Xiaomi Corporation (authors: Fangjun Kuang)
  2 +
  3 +const portAudio = require('naudiodon2');
  4 +// console.log(portAudio.getDevices());
  5 +
  6 +const sherpa_onnx = require('sherpa-onnx-node');
  7 +
  8 +function createVad() {
  9 + // please download silero_vad.onnx from
  10 + // https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx
  11 + const config = {
  12 + sileroVad: {
  13 + model: './silero_vad.onnx',
  14 + threshold: 0.5,
  15 + minSpeechDuration: 0.25,
  16 + minSilenceDuration: 0.5,
  17 + windowSize: 512,
  18 + },
  19 + sampleRate: 16000,
  20 + debug: true,
  21 + numThreads: 1,
  22 + };
  23 +
  24 + const bufferSizeInSeconds = 60;
  25 +
  26 + return new sherpa_onnx.Vad(config, bufferSizeInSeconds);
  27 +}
  28 +
  29 +vad = createVad();
  30 +
  31 +const bufferSizeInSeconds = 30;
  32 +const buffer =
  33 + new sherpa_onnx.CircularBuffer(bufferSizeInSeconds * vad.config.sampleRate);
  34 +
  35 +
  36 +const ai = new portAudio.AudioIO({
  37 + inOptions: {
  38 + channelCount: 1,
  39 + closeOnError: true, // Close the stream if an audio error is detected, if
  40 + // set false then just log the error
  41 + deviceId: -1, // Use -1 or omit the deviceId to select the default device
  42 + sampleFormat: portAudio.SampleFormatFloat32,
  43 + sampleRate: vad.config.sampleRate,
  44 + }
  45 +});
  46 +
  47 +let printed = false;
  48 +let index = 0;
  49 +ai.on('data', data => {
  50 + const windowSize = vad.config.sileroVad.windowSize;
  51 + buffer.push(new Float32Array(data.buffer));
  52 + while (buffer.size() > windowSize) {
  53 + const samples = buffer.get(buffer.head(), windowSize);
  54 + buffer.pop(windowSize);
  55 + vad.acceptWaveform(samples)
  56 + if (vad.isDetected() && !printed) {
  57 + console.log(`${index}: Detected speech`)
  58 + printed = true;
  59 + }
  60 +
  61 + if (!vad.isDetected()) {
  62 + printed = false;
  63 + }
  64 +
  65 + while (!vad.isEmpty()) {
  66 + const segment = vad.front();
  67 + vad.pop();
  68 + const filename = `${index}-${
  69 + new Date()
  70 + .toLocaleTimeString('en-US', {hour12: false})
  71 + .split(' ')[0]}.wav`;
  72 + sherpa_onnx.writeWave(
  73 + filename,
  74 + {samples: segment.samples, sampleRate: vad.config.sampleRate})
  75 + const duration = segment.samples.length / vad.config.sampleRate;
  76 + console.log(`${index} End of speech. Duration: ${duration} seconds`);
  77 + console.log(`Saved to ${filename}`);
  78 + index += 1;
  79 + }
  80 + }
  81 +});
  82 +
  83 +ai.on('close', () => {
  84 + console.log('Free resources');
  85 +});
  86 +
  87 +ai.start();
  88 +console.log('Started! Please speak')
@@ -47,7 +47,7 @@ python3 ./python-api-examples/offline-tts-play.py \ @@ -47,7 +47,7 @@ python3 ./python-api-examples/offline-tts-play.py \
47 --vits-model=./sherpa-onnx-vits-zh-ll/model.onnx \ 47 --vits-model=./sherpa-onnx-vits-zh-ll/model.onnx \
48 --vits-lexicon=./sherpa-onnx-vits-zh-ll/lexicon.txt \ 48 --vits-lexicon=./sherpa-onnx-vits-zh-ll/lexicon.txt \
49 --vits-tokens=./sherpa-onnx-vits-zh-ll/tokens.txt \ 49 --vits-tokens=./sherpa-onnx-vits-zh-ll/tokens.txt \
50 - --tts-rule-fsts='./sherpa-onnx-vits-zh-ll/phone.fst,./sherpa-onnx-vits-zh-ll/date.fst,./sherpa-onnx-vits-zh-ll/number.fst' \ 50 + --tts-rule-fsts=./sherpa-onnx-vits-zh-ll/phone.fst,./sherpa-onnx-vits-zh-ll/date.fst,./sherpa-onnx-vits-zh-ll/number.fst \
51 --vits-dict-dir=./sherpa-onnx-vits-zh-ll/dict \ 51 --vits-dict-dir=./sherpa-onnx-vits-zh-ll/dict \
52 --sid=2 \ 52 --sid=2 \
53 --output-filename=./test-2.wav \ 53 --output-filename=./test-2.wav \
@@ -48,7 +48,7 @@ python3 ./python-api-examples/offline-tts.py \ @@ -48,7 +48,7 @@ python3 ./python-api-examples/offline-tts.py \
48 --vits-model=./sherpa-onnx-vits-zh-ll/model.onnx \ 48 --vits-model=./sherpa-onnx-vits-zh-ll/model.onnx \
49 --vits-lexicon=./sherpa-onnx-vits-zh-ll/lexicon.txt \ 49 --vits-lexicon=./sherpa-onnx-vits-zh-ll/lexicon.txt \
50 --vits-tokens=./sherpa-onnx-vits-zh-ll/tokens.txt \ 50 --vits-tokens=./sherpa-onnx-vits-zh-ll/tokens.txt \
51 - --tts-rule-fsts='./sherpa-onnx-vits-zh-ll/phone.fst,./sherpa-onnx-vits-zh-ll/date.fst,./sherpa-onnx-vits-zh-ll/number.fst' \ 51 + --tts-rule-fsts=./sherpa-onnx-vits-zh-ll/phone.fst,./sherpa-onnx-vits-zh-ll/date.fst,./sherpa-onnx-vits-zh-ll/number.fst \
52 --vits-dict-dir=./sherpa-onnx-vits-zh-ll/dict \ 52 --vits-dict-dir=./sherpa-onnx-vits-zh-ll/dict \
53 --sid=2 \ 53 --sid=2 \
54 --output-filename=./test-2.wav \ 54 --output-filename=./test-2.wav \
@@ -20,7 +20,9 @@ include_directories(${CMAKE_JS_INC}) @@ -20,7 +20,9 @@ include_directories(${CMAKE_JS_INC})
20 set(srcs 20 set(srcs
21 src/sherpa-onnx-node-addon-api.cc 21 src/sherpa-onnx-node-addon-api.cc
22 src/streaming-asr.cc 22 src/streaming-asr.cc
  23 + src/vad.cc
23 src/wave-reader.cc 24 src/wave-reader.cc
  25 + src/wave-writer.cc
24 ) 26 )
25 27
26 if(NOT DEFINED ENV{SHERPA_ONNX_INSTALL_DIR}) 28 if(NOT DEFINED ENV{SHERPA_ONNX_INSTALL_DIR})
1 const addon = require('./addon.js') 1 const addon = require('./addon.js')
2 const streaming_asr = require('./streaming-asr.js'); 2 const streaming_asr = require('./streaming-asr.js');
  3 +const vad = require('./vad.js');
3 4
4 module.exports = { 5 module.exports = {
5 OnlineRecognizer: streaming_asr.OnlineRecognizer, 6 OnlineRecognizer: streaming_asr.OnlineRecognizer,
6 readWave: addon.readWave, 7 readWave: addon.readWave,
  8 + writeWave: addon.writeWave,
7 Display: streaming_asr.Display, 9 Display: streaming_asr.Display,
  10 + Vad: vad.Vad,
  11 + CircularBuffer: vad.CircularBuffer,
8 } 12 }
@@ -15,10 +15,11 @@ class OnlineStream { @@ -15,10 +15,11 @@ class OnlineStream {
15 this.handle = handle; 15 this.handle = handle;
16 } 16 }
17 17
  18 + // obj is {samples: samples, sampleRate: sampleRate}
18 // samples is a float32 array containing samples in the range [-1, 1] 19 // samples is a float32 array containing samples in the range [-1, 1]
19 - acceptWaveform(samples, sampleRate) {  
20 - addon.acceptWaveformOnline(  
21 - this.handle, {samples: samples, sampleRate: sampleRate}) 20 + // sampleRate is a number
  21 + acceptWaveform(obj) {
  22 + addon.acceptWaveformOnline(this.handle, obj)
22 } 23 }
23 24
24 inputFinished() { 25 inputFinished() {
  1 +const addon = require('./addon.js');
  2 +
  3 +class CircularBuffer {
  4 + constructor(capacity) {
  5 + this.handle = addon.createCircularBuffer(capacity);
  6 + }
  7 +
  8 + // samples is a float32 array
  9 + push(samples) {
  10 + addon.circularBufferPush(this.handle, samples);
  11 + }
  12 +
  13 + // return a float32 array
  14 + get(startIndex, n) {
  15 + return addon.circularBufferGet(this.handle, startIndex, n);
  16 + }
  17 +
  18 + pop(n) {
  19 + return addon.circularBufferPop(this.handle, n);
  20 + }
  21 +
  22 + size() {
  23 + return addon.circularBufferSize(this.handle);
  24 + }
  25 +
  26 + head() {
  27 + return addon.circularBufferHead(this.handle);
  28 + }
  29 +
  30 + reset() {
  31 + return addon.circularBufferReset(this.handle);
  32 + }
  33 +}
  34 +
  35 +class Vad {
  36 + /*
  37 +config = {
  38 + sileroVad: {
  39 + model: "./silero_vad.onnx",
  40 + threshold: 0.5,
  41 + }
  42 +}
  43 + */
  44 + constructor(config, bufferSizeInSeconds) {
  45 + this.handle =
  46 + addon.createVoiceActivityDetector(config, bufferSizeInSeconds);
  47 + this.config = config;
  48 + }
  49 +
  50 + acceptWaveform(samples) {
  51 + addon.voiceActivityDetectorAcceptWaveform(this.handle, samples)
  52 + }
  53 +
  54 + isEmpty() {
  55 + return addon.voiceActivityDetectorIsEmpty(this.handle)
  56 + }
  57 +
  58 + isDetected() {
  59 + return addon.voiceActivityDetectorIsDetected(this.handle)
  60 + }
  61 +
  62 + pop() {
  63 + addon.voiceActivityDetectorPop(this.handle)
  64 + }
  65 +
  66 + clear() {
  67 + addon.VoiceActivityDetectorClearWrapper(this.handle)
  68 + }
  69 +
  70 + /*
  71 +{
  72 + samples: a 1-d float32 array,
  73 + start: a int32
  74 +}
  75 + */
  76 + front() {
  77 + return addon.voiceActivityDetectorFront(this.handle)
  78 + }
  79 +
  80 + reset() {
  81 + return addon.VoiceActivityDetectorResetWrapper(this.handle)
  82 + }
  83 +}
  84 +
  85 +module.exports = {
  86 + Vad,
  87 + CircularBuffer,
  88 +}
@@ -5,10 +5,14 @@ @@ -5,10 +5,14 @@
5 5
6 void InitStreamingAsr(Napi::Env env, Napi::Object exports); 6 void InitStreamingAsr(Napi::Env env, Napi::Object exports);
7 void InitWaveReader(Napi::Env env, Napi::Object exports); 7 void InitWaveReader(Napi::Env env, Napi::Object exports);
  8 +void InitWaveWriter(Napi::Env env, Napi::Object exports);
  9 +void InitVad(Napi::Env env, Napi::Object exports);
8 10
9 Napi::Object Init(Napi::Env env, Napi::Object exports) { 11 Napi::Object Init(Napi::Env env, Napi::Object exports) {
10 InitStreamingAsr(env, exports); 12 InitStreamingAsr(env, exports);
11 InitWaveReader(env, exports); 13 InitWaveReader(env, exports);
  14 + InitWaveWriter(env, exports);
  15 + InitVad(env, exports);
12 16
13 return exports; 17 return exports;
14 } 18 }
@@ -125,8 +125,13 @@ static SherpaOnnxOnlineModelConfig GetOnlineModelConfig(Napi::Object obj) { @@ -125,8 +125,13 @@ static SherpaOnnxOnlineModelConfig GetOnlineModelConfig(Napi::Object obj) {
125 config.provider = p; 125 config.provider = p;
126 } 126 }
127 127
128 - if (o.Has("debug") && o.Get("debug").IsNumber()) {  
129 - config.debug = o.Get("debug").As<Napi::Number>().Int32Value(); 128 + if (o.Has("debug") &&
  129 + (o.Get("debug").IsNumber() || o.Get("debug").IsBoolean())) {
  130 + if (o.Get("debug").IsBoolean()) {
  131 + config.debug = o.Get("debug").As<Napi::Boolean>().Value();
  132 + } else {
  133 + config.debug = o.Get("debug").As<Napi::Number>().Int32Value();
  134 + }
130 } 135 }
131 136
132 if (o.Has("modelType") && o.Get("modelType").IsString()) { 137 if (o.Has("modelType") && o.Get("modelType").IsString()) {
  1 +// scripts/node-addon-api/src/vad.cc
  2 +//
  3 +// Copyright (c) 2024 Xiaomi Corporation
  4 +
  5 +#include <sstream>
  6 +
  7 +#include "napi.h" // NOLINT
  8 +#include "sherpa-onnx/c-api/c-api.h"
  9 +
  10 +static Napi::External<SherpaOnnxCircularBuffer> CreateCircularBufferWrapper(
  11 + const Napi::CallbackInfo &info) {
  12 + Napi::Env env = info.Env();
  13 + if (info.Length() != 1) {
  14 + std::ostringstream os;
  15 + os << "Expect only 1 argument. Given: " << info.Length();
  16 +
  17 + Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();
  18 +
  19 + return {};
  20 + }
  21 +
  22 + if (!info[0].IsNumber()) {
  23 + Napi::TypeError::New(env, "You should pass an integer as the argument.")
  24 + .ThrowAsJavaScriptException();
  25 +
  26 + return {};
  27 + }
  28 +
  29 + SherpaOnnxCircularBuffer *buf =
  30 + SherpaOnnxCreateCircularBuffer(info[0].As<Napi::Number>().Int32Value());
  31 +
  32 + return Napi::External<SherpaOnnxCircularBuffer>::New(
  33 + env, buf, [](Napi::Env env, SherpaOnnxCircularBuffer *p) {
  34 + SherpaOnnxDestroyCircularBuffer(p);
  35 + });
  36 +}
  37 +
  38 +static void CircularBufferPushWrapper(const Napi::CallbackInfo &info) {
  39 + Napi::Env env = info.Env();
  40 +
  41 + if (info.Length() != 2) {
  42 + std::ostringstream os;
  43 + os << "Expect only 2 arguments. Given: " << info.Length();
  44 +
  45 + Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();
  46 +
  47 + return;
  48 + }
  49 +
  50 + if (!info[0].IsExternal()) {
  51 + Napi::TypeError::New(env, "Argument 0 should be an CircularBuffer pointer.")
  52 + .ThrowAsJavaScriptException();
  53 +
  54 + return;
  55 + }
  56 +
  57 + SherpaOnnxCircularBuffer *buf =
  58 + info[0].As<Napi::External<SherpaOnnxCircularBuffer>>().Data();
  59 +
  60 + if (!info[1].IsTypedArray()) {
  61 + Napi::TypeError::New(env, "Argument 1 should be a Float32Array.")
  62 + .ThrowAsJavaScriptException();
  63 +
  64 + return;
  65 + }
  66 +
  67 + Napi::Float32Array data = info[1].As<Napi::Float32Array>();
  68 + SherpaOnnxCircularBufferPush(buf, data.Data(), data.ElementLength());
  69 +}
  70 +
  71 +// see https://github.com/nodejs/node-addon-api/blob/main/doc/typed_array.md
  72 +// https://github.com/nodejs/node-addon-examples/blob/main/src/2-js-to-native-conversion/typed_array_to_native/node-addon-api/typed_array_to_native.cc
  73 +static Napi::Float32Array CircularBufferGetWrapper(
  74 + const Napi::CallbackInfo &info) {
  75 + Napi::Env env = info.Env();
  76 +
  77 + if (info.Length() != 3) {
  78 + std::ostringstream os;
  79 + os << "Expect only 3 arguments. Given: " << info.Length();
  80 +
  81 + Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();
  82 +
  83 + return {};
  84 + }
  85 +
  86 + if (!info[0].IsExternal()) {
  87 + Napi::TypeError::New(env, "Argument 0 should be an CircularBuffer pointer.")
  88 + .ThrowAsJavaScriptException();
  89 +
  90 + return {};
  91 + }
  92 +
  93 + SherpaOnnxCircularBuffer *buf =
  94 + info[0].As<Napi::External<SherpaOnnxCircularBuffer>>().Data();
  95 +
  96 + if (!info[1].IsNumber()) {
  97 + Napi::TypeError::New(env, "Argument 1 should be an integer (startIndex).")
  98 + .ThrowAsJavaScriptException();
  99 +
  100 + return {};
  101 + }
  102 +
  103 + if (!info[2].IsNumber()) {
  104 + Napi::TypeError::New(env, "Argument 2 should be an integer (n).")
  105 + .ThrowAsJavaScriptException();
  106 +
  107 + return {};
  108 + }
  109 +
  110 + int32_t start_index = info[1].As<Napi::Number>().Int32Value();
  111 + int32_t n = info[2].As<Napi::Number>().Int32Value();
  112 +
  113 + const float *data = SherpaOnnxCircularBufferGet(buf, start_index, n);
  114 +
  115 + Napi::ArrayBuffer arrayBuffer = Napi::ArrayBuffer::New(
  116 + env, const_cast<float *>(data), sizeof(float) * n,
  117 + [](Napi::Env /*env*/, void *p) {
  118 + SherpaOnnxCircularBufferFree(reinterpret_cast<const float *>(p));
  119 + });
  120 +
  121 + Napi::Float32Array float32Array =
  122 + Napi::Float32Array::New(env, n, arrayBuffer, 0);
  123 +
  124 + return float32Array;
  125 +}
  126 +
  127 +static void CircularBufferPopWrapper(const Napi::CallbackInfo &info) {
  128 + Napi::Env env = info.Env();
  129 +
  130 + if (info.Length() != 2) {
  131 + std::ostringstream os;
  132 + os << "Expect only 2 arguments. Given: " << info.Length();
  133 +
  134 + Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();
  135 +
  136 + return;
  137 + }
  138 +
  139 + if (!info[0].IsExternal()) {
  140 + Napi::TypeError::New(env, "Argument 0 should be an CircularBuffer pointer.")
  141 + .ThrowAsJavaScriptException();
  142 +
  143 + return;
  144 + }
  145 +
  146 + SherpaOnnxCircularBuffer *buf =
  147 + info[0].As<Napi::External<SherpaOnnxCircularBuffer>>().Data();
  148 +
  149 + if (!info[1].IsNumber()) {
  150 + Napi::TypeError::New(env, "Argument 1 should be an integer (n).")
  151 + .ThrowAsJavaScriptException();
  152 +
  153 + return;
  154 + }
  155 +
  156 + int32_t n = info[1].As<Napi::Number>().Int32Value();
  157 +
  158 + SherpaOnnxCircularBufferPop(buf, n);
  159 +}
  160 +
  161 +static Napi::Number CircularBufferSizeWrapper(const Napi::CallbackInfo &info) {
  162 + Napi::Env env = info.Env();
  163 +
  164 + if (info.Length() != 1) {
  165 + std::ostringstream os;
  166 + os << "Expect only 1 argument. Given: " << info.Length();
  167 +
  168 + Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();
  169 +
  170 + return {};
  171 + }
  172 +
  173 + if (!info[0].IsExternal()) {
  174 + Napi::TypeError::New(env, "Argument 0 should be an CircularBuffer pointer.")
  175 + .ThrowAsJavaScriptException();
  176 +
  177 + return {};
  178 + }
  179 +
  180 + SherpaOnnxCircularBuffer *buf =
  181 + info[0].As<Napi::External<SherpaOnnxCircularBuffer>>().Data();
  182 +
  183 + int32_t size = SherpaOnnxCircularBufferSize(buf);
  184 +
  185 + return Napi::Number::New(env, size);
  186 +}
  187 +
  188 +static Napi::Number CircularBufferHeadWrapper(const Napi::CallbackInfo &info) {
  189 + Napi::Env env = info.Env();
  190 +
  191 + if (info.Length() != 1) {
  192 + std::ostringstream os;
  193 + os << "Expect only 1 argument. Given: " << info.Length();
  194 +
  195 + Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();
  196 +
  197 + return {};
  198 + }
  199 +
  200 + if (!info[0].IsExternal()) {
  201 + Napi::TypeError::New(env, "Argument 0 should be an CircularBuffer pointer.")
  202 + .ThrowAsJavaScriptException();
  203 +
  204 + return {};
  205 + }
  206 +
  207 + SherpaOnnxCircularBuffer *buf =
  208 + info[0].As<Napi::External<SherpaOnnxCircularBuffer>>().Data();
  209 +
  210 + int32_t size = SherpaOnnxCircularBufferHead(buf);
  211 +
  212 + return Napi::Number::New(env, size);
  213 +}
  214 +
  215 +static void CircularBufferResetWrapper(const Napi::CallbackInfo &info) {
  216 + Napi::Env env = info.Env();
  217 +
  218 + if (info.Length() != 1) {
  219 + std::ostringstream os;
  220 + os << "Expect only 1 argument. Given: " << info.Length();
  221 +
  222 + Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();
  223 +
  224 + return;
  225 + }
  226 +
  227 + if (!info[0].IsExternal()) {
  228 + Napi::TypeError::New(env, "Argument 0 should be an CircularBuffer pointer.")
  229 + .ThrowAsJavaScriptException();
  230 +
  231 + return;
  232 + }
  233 +
  234 + SherpaOnnxCircularBuffer *buf =
  235 + info[0].As<Napi::External<SherpaOnnxCircularBuffer>>().Data();
  236 +
  237 + SherpaOnnxCircularBufferReset(buf);
  238 +}
  239 +
  240 +static SherpaOnnxSileroVadModelConfig GetSileroVadConfig(
  241 + const Napi::Object &obj) {
  242 + SherpaOnnxSileroVadModelConfig c;
  243 + memset(&c, 0, sizeof(c));
  244 +
  245 + if (!obj.Has("sileroVad") || !obj.Get("sileroVad").IsObject()) {
  246 + return c;
  247 + }
  248 +
  249 + Napi::Object o = obj.Get("sileroVad").As<Napi::Object>();
  250 +
  251 + if (o.Has("model") && o.Get("model").IsString()) {
  252 + Napi::String model = o.Get("model").As<Napi::String>();
  253 + std::string s = model.Utf8Value();
  254 + char *p = new char[s.size() + 1];
  255 + std::copy(s.begin(), s.end(), p);
  256 + p[s.size()] = 0;
  257 +
  258 + c.model = p;
  259 + }
  260 +
  261 + if (o.Has("threshold") && o.Get("threshold").IsNumber()) {
  262 + c.threshold = o.Get("threshold").As<Napi::Number>().FloatValue();
  263 + }
  264 +
  265 + if (o.Has("minSilenceDuration") && o.Get("minSilenceDuration").IsNumber()) {
  266 + c.min_silence_duration =
  267 + o.Get("minSilenceDuration").As<Napi::Number>().FloatValue();
  268 + }
  269 +
  270 + if (o.Has("minSpeechDuration") && o.Get("minSpeechDuration").IsNumber()) {
  271 + c.min_speech_duration =
  272 + o.Get("minSpeechDuration").As<Napi::Number>().FloatValue();
  273 + }
  274 +
  275 + if (o.Has("windowSize") && o.Get("windowSize").IsNumber()) {
  276 + c.window_size = o.Get("windowSize").As<Napi::Number>().Int32Value();
  277 + }
  278 +
  279 + return c;
  280 +}
  281 +
  282 +static Napi::External<SherpaOnnxVoiceActivityDetector>
  283 +CreateVoiceActivityDetectorWrapper(const Napi::CallbackInfo &info) {
  284 + Napi::Env env = info.Env();
  285 + if (info.Length() != 2) {
  286 + std::ostringstream os;
  287 + os << "Expect only 2 arguments. Given: " << info.Length();
  288 +
  289 + Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();
  290 +
  291 + return {};
  292 + }
  293 +
  294 + if (!info[0].IsObject()) {
  295 + Napi::TypeError::New(env,
  296 + "You should pass an object as the first argument.")
  297 + .ThrowAsJavaScriptException();
  298 +
  299 + return {};
  300 + }
  301 +
  302 + if (!info[1].IsNumber()) {
  303 + Napi::TypeError::New(env,
  304 + "You should pass an integer as the second argument.")
  305 + .ThrowAsJavaScriptException();
  306 +
  307 + return {};
  308 + }
  309 +
  310 + Napi::Object o = info[0].As<Napi::Object>();
  311 +
  312 + SherpaOnnxVadModelConfig c;
  313 + memset(&c, 0, sizeof(c));
  314 + c.silero_vad = GetSileroVadConfig(o);
  315 +
  316 + if (o.Has("sampleRate") && o.Get("sampleRate").IsNumber()) {
  317 + c.sample_rate = o.Get("sampleRate").As<Napi::Number>().Int32Value();
  318 + }
  319 +
  320 + if (o.Has("numThreads") && o.Get("numThreads").IsNumber()) {
  321 + c.num_threads = o.Get("numThreads").As<Napi::Number>().Int32Value();
  322 + }
  323 +
  324 + if (o.Has("provider") && o.Get("provider").IsString()) {
  325 + Napi::String provider = o.Get("provider").As<Napi::String>();
  326 + std::string s = provider.Utf8Value();
  327 + char *p = new char[s.size() + 1];
  328 + std::copy(s.begin(), s.end(), p);
  329 + p[s.size()] = 0;
  330 +
  331 + c.provider = p;
  332 + }
  333 +
  334 + if (o.Has("debug") &&
  335 + (o.Get("debug").IsNumber() || o.Get("debug").IsBoolean())) {
  336 + if (o.Get("debug").IsBoolean()) {
  337 + c.debug = o.Get("debug").As<Napi::Boolean>().Value();
  338 + } else {
  339 + c.debug = o.Get("debug").As<Napi::Number>().Int32Value();
  340 + }
  341 + }
  342 +
  343 + float buffer_size_in_seconds = info[1].As<Napi::Number>().FloatValue();
  344 +
  345 + SherpaOnnxVoiceActivityDetector *vad =
  346 + SherpaOnnxCreateVoiceActivityDetector(&c, buffer_size_in_seconds);
  347 +
  348 + if (c.silero_vad.model) {
  349 + delete[] c.silero_vad.model;
  350 + }
  351 +
  352 + if (c.provider) {
  353 + delete[] c.provider;
  354 + }
  355 +
  356 + return Napi::External<SherpaOnnxVoiceActivityDetector>::New(
  357 + env, vad, [](Napi::Env env, SherpaOnnxVoiceActivityDetector *p) {
  358 + SherpaOnnxDestroyVoiceActivityDetector(p);
  359 + });
  360 +}
  361 +
  362 +static void VoiceActivityDetectorAcceptWaveformWrapper(
  363 + const Napi::CallbackInfo &info) {
  364 + Napi::Env env = info.Env();
  365 +
  366 + if (info.Length() != 2) {
  367 + std::ostringstream os;
  368 + os << "Expect only 2 arguments. Given: " << info.Length();
  369 +
  370 + Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();
  371 +
  372 + return;
  373 + }
  374 +
  375 + if (!info[0].IsExternal()) {
  376 + Napi::TypeError::New(env, "Argument 0 should be a VAD pointer.")
  377 + .ThrowAsJavaScriptException();
  378 +
  379 + return;
  380 + }
  381 +
  382 + SherpaOnnxVoiceActivityDetector *vad =
  383 + info[0].As<Napi::External<SherpaOnnxVoiceActivityDetector>>().Data();
  384 +
  385 + if (!info[1].IsTypedArray()) {
  386 + Napi::TypeError::New(
  387 + env, "Argument 1 should be a Float32Array containing samples")
  388 + .ThrowAsJavaScriptException();
  389 +
  390 + return;
  391 + }
  392 +
  393 + Napi::Float32Array samples = info[1].As<Napi::Float32Array>();
  394 +
  395 + SherpaOnnxVoiceActivityDetectorAcceptWaveform(vad, samples.Data(),
  396 + samples.ElementLength());
  397 +}
  398 +
  399 +static Napi::Boolean VoiceActivityDetectorEmptyWrapper(
  400 + const Napi::CallbackInfo &info) {
  401 + Napi::Env env = info.Env();
  402 +
  403 + if (info.Length() != 1) {
  404 + std::ostringstream os;
  405 + os << "Expect only 1 argument. Given: " << info.Length();
  406 +
  407 + Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();
  408 +
  409 + return {};
  410 + }
  411 +
  412 + if (!info[0].IsExternal()) {
  413 + Napi::TypeError::New(env, "Argument 0 should be a VAD pointer.")
  414 + .ThrowAsJavaScriptException();
  415 +
  416 + return {};
  417 + }
  418 +
  419 + SherpaOnnxVoiceActivityDetector *vad =
  420 + info[0].As<Napi::External<SherpaOnnxVoiceActivityDetector>>().Data();
  421 +
  422 + int32_t is_empty = SherpaOnnxVoiceActivityDetectorEmpty(vad);
  423 +
  424 + return Napi::Boolean::New(env, is_empty);
  425 +}
  426 +
  427 +static Napi::Boolean VoiceActivityDetectorDetectedWrapper(
  428 + const Napi::CallbackInfo &info) {
  429 + Napi::Env env = info.Env();
  430 +
  431 + if (info.Length() != 1) {
  432 + std::ostringstream os;
  433 + os << "Expect only 1 argument. Given: " << info.Length();
  434 +
  435 + Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();
  436 +
  437 + return {};
  438 + }
  439 +
  440 + if (!info[0].IsExternal()) {
  441 + Napi::TypeError::New(env, "Argument 0 should be a VAD pointer.")
  442 + .ThrowAsJavaScriptException();
  443 +
  444 + return {};
  445 + }
  446 +
  447 + SherpaOnnxVoiceActivityDetector *vad =
  448 + info[0].As<Napi::External<SherpaOnnxVoiceActivityDetector>>().Data();
  449 +
  450 + int32_t is_detected = SherpaOnnxVoiceActivityDetectorDetected(vad);
  451 +
  452 + return Napi::Boolean::New(env, is_detected);
  453 +}
  454 +
  455 +static void VoiceActivityDetectorPopWrapper(const Napi::CallbackInfo &info) {
  456 + Napi::Env env = info.Env();
  457 +
  458 + if (info.Length() != 1) {
  459 + std::ostringstream os;
  460 + os << "Expect only 1 argument. Given: " << info.Length();
  461 +
  462 + Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();
  463 +
  464 + return;
  465 + }
  466 +
  467 + if (!info[0].IsExternal()) {
  468 + Napi::TypeError::New(env, "Argument 0 should be a VAD pointer.")
  469 + .ThrowAsJavaScriptException();
  470 +
  471 + return;
  472 + }
  473 +
  474 + SherpaOnnxVoiceActivityDetector *vad =
  475 + info[0].As<Napi::External<SherpaOnnxVoiceActivityDetector>>().Data();
  476 +
  477 + SherpaOnnxVoiceActivityDetectorPop(vad);
  478 +}
  479 +
  480 +static void VoiceActivityDetectorClearWrapper(const Napi::CallbackInfo &info) {
  481 + Napi::Env env = info.Env();
  482 +
  483 + if (info.Length() != 1) {
  484 + std::ostringstream os;
  485 + os << "Expect only 1 argument. Given: " << info.Length();
  486 +
  487 + Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();
  488 +
  489 + return;
  490 + }
  491 +
  492 + if (!info[0].IsExternal()) {
  493 + Napi::TypeError::New(env, "Argument 0 should be a VAD pointer.")
  494 + .ThrowAsJavaScriptException();
  495 +
  496 + return;
  497 + }
  498 +
  499 + SherpaOnnxVoiceActivityDetector *vad =
  500 + info[0].As<Napi::External<SherpaOnnxVoiceActivityDetector>>().Data();
  501 +
  502 + SherpaOnnxVoiceActivityDetectorClear(vad);
  503 +}
  504 +
  505 +static Napi::Object VoiceActivityDetectorFrontWrapper(
  506 + const Napi::CallbackInfo &info) {
  507 + Napi::Env env = info.Env();
  508 +
  509 + if (info.Length() != 1) {
  510 + std::ostringstream os;
  511 + os << "Expect only 1 argument. Given: " << info.Length();
  512 +
  513 + Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();
  514 +
  515 + return {};
  516 + }
  517 +
  518 + if (!info[0].IsExternal()) {
  519 + Napi::TypeError::New(env, "Argument 0 should be a VAD pointer.")
  520 + .ThrowAsJavaScriptException();
  521 +
  522 + return {};
  523 + }
  524 +
  525 + SherpaOnnxVoiceActivityDetector *vad =
  526 + info[0].As<Napi::External<SherpaOnnxVoiceActivityDetector>>().Data();
  527 +
  528 + const SherpaOnnxSpeechSegment *segment =
  529 + SherpaOnnxVoiceActivityDetectorFront(vad);
  530 +
  531 + Napi::ArrayBuffer arrayBuffer = Napi::ArrayBuffer::New(
  532 + env, const_cast<float *>(segment->samples), sizeof(float) * segment->n,
  533 + [](Napi::Env /*env*/, void * /*data*/,
  534 + const SherpaOnnxSpeechSegment *hint) {
  535 + SherpaOnnxDestroySpeechSegment(hint);
  536 + },
  537 + segment);
  538 +
  539 + Napi::Float32Array float32Array =
  540 + Napi::Float32Array::New(env, segment->n, arrayBuffer, 0);
  541 +
  542 + Napi::Object obj = Napi::Object::New(env);
  543 + obj.Set(Napi::String::New(env, "start"), segment->start);
  544 + obj.Set(Napi::String::New(env, "samples"), float32Array);
  545 +
  546 + return obj;
  547 +}
  548 +
  549 +static void VoiceActivityDetectorResetWrapper(const Napi::CallbackInfo &info) {
  550 + Napi::Env env = info.Env();
  551 +
  552 + if (info.Length() != 1) {
  553 + std::ostringstream os;
  554 + os << "Expect only 1 argument. Given: " << info.Length();
  555 +
  556 + Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();
  557 +
  558 + return;
  559 + }
  560 +
  561 + if (!info[0].IsExternal()) {
  562 + Napi::TypeError::New(env, "Argument 0 should be a VAD pointer.")
  563 + .ThrowAsJavaScriptException();
  564 +
  565 + return;
  566 + }
  567 +
  568 + SherpaOnnxVoiceActivityDetector *vad =
  569 + info[0].As<Napi::External<SherpaOnnxVoiceActivityDetector>>().Data();
  570 +
  571 + SherpaOnnxVoiceActivityDetectorReset(vad);
  572 +}
  573 +
  574 +void InitVad(Napi::Env env, Napi::Object exports) {
  575 + exports.Set(Napi::String::New(env, "createCircularBuffer"),
  576 + Napi::Function::New(env, CreateCircularBufferWrapper));
  577 +
  578 + exports.Set(Napi::String::New(env, "circularBufferPush"),
  579 + Napi::Function::New(env, CircularBufferPushWrapper));
  580 +
  581 + exports.Set(Napi::String::New(env, "circularBufferGet"),
  582 + Napi::Function::New(env, CircularBufferGetWrapper));
  583 +
  584 + exports.Set(Napi::String::New(env, "circularBufferPop"),
  585 + Napi::Function::New(env, CircularBufferPopWrapper));
  586 +
  587 + exports.Set(Napi::String::New(env, "circularBufferSize"),
  588 + Napi::Function::New(env, CircularBufferSizeWrapper));
  589 +
  590 + exports.Set(Napi::String::New(env, "circularBufferHead"),
  591 + Napi::Function::New(env, CircularBufferHeadWrapper));
  592 +
  593 + exports.Set(Napi::String::New(env, "circularBufferReset"),
  594 + Napi::Function::New(env, CircularBufferResetWrapper));
  595 +
  596 + exports.Set(Napi::String::New(env, "createVoiceActivityDetector"),
  597 + Napi::Function::New(env, CreateVoiceActivityDetectorWrapper));
  598 +
  599 + exports.Set(
  600 + Napi::String::New(env, "voiceActivityDetectorAcceptWaveform"),
  601 + Napi::Function::New(env, VoiceActivityDetectorAcceptWaveformWrapper));
  602 +
  603 + exports.Set(Napi::String::New(env, "voiceActivityDetectorIsEmpty"),
  604 + Napi::Function::New(env, VoiceActivityDetectorEmptyWrapper));
  605 +
  606 + exports.Set(Napi::String::New(env, "voiceActivityDetectorIsDetected"),
  607 + Napi::Function::New(env, VoiceActivityDetectorDetectedWrapper));
  608 +
  609 + exports.Set(Napi::String::New(env, "voiceActivityDetectorPop"),
  610 + Napi::Function::New(env, VoiceActivityDetectorPopWrapper));
  611 +
  612 + exports.Set(Napi::String::New(env, "voiceActivityDetectorClear"),
  613 + Napi::Function::New(env, VoiceActivityDetectorClearWrapper));
  614 +
  615 + exports.Set(Napi::String::New(env, "voiceActivityDetectorFront"),
  616 + Napi::Function::New(env, VoiceActivityDetectorFrontWrapper));
  617 +
  618 + exports.Set(Napi::String::New(env, "voiceActivityDetectorReset"),
  619 + Napi::Function::New(env, VoiceActivityDetectorResetWrapper));
  620 +}
  1 +// scripts/node-addon-api/src/wave-writer.cc
  2 +//
  3 +// Copyright (c) 2024 Xiaomi Corporation
  4 +
  5 +#include <sstream>
  6 +
  7 +#include "napi.h" // NOLINT
  8 +#include "sherpa-onnx/c-api/c-api.h"
  9 +
  10 +// (filename, {samples: samples, sampleRate: sampleRate}
  11 +static Napi::Boolean WriteWaveWrapper(const Napi::CallbackInfo &info) {
  12 + Napi::Env env = info.Env();
  13 +
  14 + if (info.Length() != 2) {
  15 + std::ostringstream os;
  16 + os << "Expect only 2 argument2. Given: " << info.Length();
  17 +
  18 + Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();
  19 +
  20 + return {};
  21 + }
  22 +
  23 + if (!info[0].IsString()) {
  24 + Napi::TypeError::New(env, "Argument 0 should be a string")
  25 + .ThrowAsJavaScriptException();
  26 +
  27 + return {};
  28 + }
  29 +
  30 + if (!info[1].IsObject()) {
  31 + Napi::TypeError::New(env, "Argument 1 should be an object")
  32 + .ThrowAsJavaScriptException();
  33 +
  34 + return {};
  35 + }
  36 +
  37 + Napi::Object obj = info[1].As<Napi::Object>();
  38 +
  39 + if (!obj.Has("samples")) {
  40 + Napi::TypeError::New(env, "The argument object should have a field samples")
  41 + .ThrowAsJavaScriptException();
  42 +
  43 + return {};
  44 + }
  45 +
  46 + if (!obj.Get("samples").IsTypedArray()) {
  47 + Napi::TypeError::New(env, "The object['samples'] should be a typed array")
  48 + .ThrowAsJavaScriptException();
  49 +
  50 + return {};
  51 + }
  52 +
  53 + if (!obj.Has("sampleRate")) {
  54 + Napi::TypeError::New(env,
  55 + "The argument object should have a field sampleRate")
  56 + .ThrowAsJavaScriptException();
  57 +
  58 + return {};
  59 + }
  60 +
  61 + if (!obj.Get("sampleRate").IsNumber()) {
  62 + Napi::TypeError::New(env, "The object['samples'] should be a number")
  63 + .ThrowAsJavaScriptException();
  64 +
  65 + return {};
  66 + }
  67 +
  68 + Napi::Float32Array samples = obj.Get("samples").As<Napi::Float32Array>();
  69 + int32_t sample_rate = obj.Get("sampleRate").As<Napi::Number>().Int32Value();
  70 +
  71 + int32_t ok =
  72 + SherpaOnnxWriteWave(samples.Data(), samples.ElementLength(), sample_rate,
  73 + info[0].As<Napi::String>().Utf8Value().c_str());
  74 +
  75 + return Napi::Boolean::New(env, ok);
  76 +}
  77 +
  78 +void InitWaveWriter(Napi::Env env, Napi::Object exports) {
  79 + exports.Set(Napi::String::New(env, "writeWave"),
  80 + Napi::Function::New(env, WriteWaveWrapper));
  81 +}