Fangjun Kuang
Committed by GitHub

Add node-addon-api for VAD (#864)

... ... @@ -38,3 +38,12 @@ node ./test_asr_streaming_transducer.js
node ./test_asr_streaming_transducer_microphone.js
```
# VAD
```bash
wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx
node ./test_vad_microphone.js
```
... ...
... ... @@ -36,10 +36,10 @@ console.log('Started')
let start = performance.now();
const stream = recognizer.createStream();
const wave = sherpa_onnx.readWave(waveFilename);
stream.acceptWaveform(wave.samples, wave.sampleRate);
stream.acceptWaveform({sampleRate: wave.sampleRate, samples: wave.samples});
const tailPadding = new Float32Array(wave.sampleRate * 0.4);
stream.acceptWaveform(tailPadding, wave.sampleRate);
stream.acceptWaveform({samples: tailPadding, sampleRate: wave.sampleRate});
while (recognizer.isReady(stream)) {
recognizer.decode(stream);
... ...
... ... @@ -60,7 +60,8 @@ const display = new sherpa_onnx.Display(50);
ai.on('data', data => {
const samples = new Float32Array(data.buffer);
stream.acceptWaveform(samples, recognizer.config.featConfig.sampleRate);
stream.acceptWaveform(
{sampleRate: recognizer.config.featConfig.sampleRate, samples: samples});
while (recognizer.isReady(stream)) {
recognizer.decode(stream);
... ...
// Copyright (c) 2023-2024 Xiaomi Corporation (authors: Fangjun Kuang)
const portAudio = require('naudiodon2');
// console.log(portAudio.getDevices());
const sherpa_onnx = require('sherpa-onnx-node');
function createVad() {
// please download silero_vad.onnx from
// https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx
const config = {
sileroVad: {
model: './silero_vad.onnx',
threshold: 0.5,
minSpeechDuration: 0.25,
minSilenceDuration: 0.5,
windowSize: 512,
},
sampleRate: 16000,
debug: true,
numThreads: 1,
};
const bufferSizeInSeconds = 60;
return new sherpa_onnx.Vad(config, bufferSizeInSeconds);
}
vad = createVad();
const bufferSizeInSeconds = 30;
const buffer =
new sherpa_onnx.CircularBuffer(bufferSizeInSeconds * vad.config.sampleRate);
const ai = new portAudio.AudioIO({
inOptions: {
channelCount: 1,
closeOnError: true, // Close the stream if an audio error is detected, if
// set false then just log the error
deviceId: -1, // Use -1 or omit the deviceId to select the default device
sampleFormat: portAudio.SampleFormatFloat32,
sampleRate: vad.config.sampleRate,
}
});
let printed = false;
let index = 0;
ai.on('data', data => {
const windowSize = vad.config.sileroVad.windowSize;
buffer.push(new Float32Array(data.buffer));
while (buffer.size() > windowSize) {
const samples = buffer.get(buffer.head(), windowSize);
buffer.pop(windowSize);
vad.acceptWaveform(samples)
if (vad.isDetected() && !printed) {
console.log(`${index}: Detected speech`)
printed = true;
}
if (!vad.isDetected()) {
printed = false;
}
while (!vad.isEmpty()) {
const segment = vad.front();
vad.pop();
const filename = `${index}-${
new Date()
.toLocaleTimeString('en-US', {hour12: false})
.split(' ')[0]}.wav`;
sherpa_onnx.writeWave(
filename,
{samples: segment.samples, sampleRate: vad.config.sampleRate})
const duration = segment.samples.length / vad.config.sampleRate;
console.log(`${index} End of speech. Duration: ${duration} seconds`);
console.log(`Saved to ${filename}`);
index += 1;
}
}
});
ai.on('close', () => {
console.log('Free resources');
});
ai.start();
console.log('Started! Please speak')
... ...
... ... @@ -47,7 +47,7 @@ python3 ./python-api-examples/offline-tts-play.py \
--vits-model=./sherpa-onnx-vits-zh-ll/model.onnx \
--vits-lexicon=./sherpa-onnx-vits-zh-ll/lexicon.txt \
--vits-tokens=./sherpa-onnx-vits-zh-ll/tokens.txt \
--tts-rule-fsts='./sherpa-onnx-vits-zh-ll/phone.fst,./sherpa-onnx-vits-zh-ll/date.fst,./sherpa-onnx-vits-zh-ll/number.fst' \
--tts-rule-fsts=./sherpa-onnx-vits-zh-ll/phone.fst,./sherpa-onnx-vits-zh-ll/date.fst,./sherpa-onnx-vits-zh-ll/number.fst \
--vits-dict-dir=./sherpa-onnx-vits-zh-ll/dict \
--sid=2 \
--output-filename=./test-2.wav \
... ...
... ... @@ -48,7 +48,7 @@ python3 ./python-api-examples/offline-tts.py \
--vits-model=./sherpa-onnx-vits-zh-ll/model.onnx \
--vits-lexicon=./sherpa-onnx-vits-zh-ll/lexicon.txt \
--vits-tokens=./sherpa-onnx-vits-zh-ll/tokens.txt \
--tts-rule-fsts='./sherpa-onnx-vits-zh-ll/phone.fst,./sherpa-onnx-vits-zh-ll/date.fst,./sherpa-onnx-vits-zh-ll/number.fst' \
--tts-rule-fsts=./sherpa-onnx-vits-zh-ll/phone.fst,./sherpa-onnx-vits-zh-ll/date.fst,./sherpa-onnx-vits-zh-ll/number.fst \
--vits-dict-dir=./sherpa-onnx-vits-zh-ll/dict \
--sid=2 \
--output-filename=./test-2.wav \
... ...
... ... @@ -20,7 +20,9 @@ include_directories(${CMAKE_JS_INC})
set(srcs
src/sherpa-onnx-node-addon-api.cc
src/streaming-asr.cc
src/vad.cc
src/wave-reader.cc
src/wave-writer.cc
)
if(NOT DEFINED ENV{SHERPA_ONNX_INSTALL_DIR})
... ...
const addon = require('./addon.js')
const streaming_asr = require('./streaming-asr.js');
const vad = require('./vad.js');
module.exports = {
OnlineRecognizer: streaming_asr.OnlineRecognizer,
readWave: addon.readWave,
writeWave: addon.writeWave,
Display: streaming_asr.Display,
Vad: vad.Vad,
CircularBuffer: vad.CircularBuffer,
}
... ...
... ... @@ -15,10 +15,11 @@ class OnlineStream {
this.handle = handle;
}
// obj is {samples: samples, sampleRate: sampleRate}
// samples is a float32 array containing samples in the range [-1, 1]
acceptWaveform(samples, sampleRate) {
addon.acceptWaveformOnline(
this.handle, {samples: samples, sampleRate: sampleRate})
// sampleRate is a number
acceptWaveform(obj) {
addon.acceptWaveformOnline(this.handle, obj)
}
inputFinished() {
... ...
const addon = require('./addon.js');
class CircularBuffer {
constructor(capacity) {
this.handle = addon.createCircularBuffer(capacity);
}
// samples is a float32 array
push(samples) {
addon.circularBufferPush(this.handle, samples);
}
// return a float32 array
get(startIndex, n) {
return addon.circularBufferGet(this.handle, startIndex, n);
}
pop(n) {
return addon.circularBufferPop(this.handle, n);
}
size() {
return addon.circularBufferSize(this.handle);
}
head() {
return addon.circularBufferHead(this.handle);
}
reset() {
return addon.circularBufferReset(this.handle);
}
}
class Vad {
/*
config = {
sileroVad: {
model: "./silero_vad.onnx",
threshold: 0.5,
}
}
*/
constructor(config, bufferSizeInSeconds) {
this.handle =
addon.createVoiceActivityDetector(config, bufferSizeInSeconds);
this.config = config;
}
acceptWaveform(samples) {
addon.voiceActivityDetectorAcceptWaveform(this.handle, samples)
}
isEmpty() {
return addon.voiceActivityDetectorIsEmpty(this.handle)
}
isDetected() {
return addon.voiceActivityDetectorIsDetected(this.handle)
}
pop() {
addon.voiceActivityDetectorPop(this.handle)
}
clear() {
addon.VoiceActivityDetectorClearWrapper(this.handle)
}
/*
{
samples: a 1-d float32 array,
start: a int32
}
*/
front() {
return addon.voiceActivityDetectorFront(this.handle)
}
reset() {
return addon.VoiceActivityDetectorResetWrapper(this.handle)
}
}
module.exports = {
Vad,
CircularBuffer,
}
... ...
... ... @@ -5,10 +5,14 @@
void InitStreamingAsr(Napi::Env env, Napi::Object exports);
void InitWaveReader(Napi::Env env, Napi::Object exports);
void InitWaveWriter(Napi::Env env, Napi::Object exports);
void InitVad(Napi::Env env, Napi::Object exports);
Napi::Object Init(Napi::Env env, Napi::Object exports) {
InitStreamingAsr(env, exports);
InitWaveReader(env, exports);
InitWaveWriter(env, exports);
InitVad(env, exports);
return exports;
}
... ...
... ... @@ -125,9 +125,14 @@ static SherpaOnnxOnlineModelConfig GetOnlineModelConfig(Napi::Object obj) {
config.provider = p;
}
if (o.Has("debug") && o.Get("debug").IsNumber()) {
if (o.Has("debug") &&
(o.Get("debug").IsNumber() || o.Get("debug").IsBoolean())) {
if (o.Get("debug").IsBoolean()) {
config.debug = o.Get("debug").As<Napi::Boolean>().Value();
} else {
config.debug = o.Get("debug").As<Napi::Number>().Int32Value();
}
}
if (o.Has("modelType") && o.Get("modelType").IsString()) {
Napi::String model_type = o.Get("modelType").As<Napi::String>();
... ...
// scripts/node-addon-api/src/vad.cc
//
// Copyright (c) 2024 Xiaomi Corporation
#include <sstream>
#include "napi.h" // NOLINT
#include "sherpa-onnx/c-api/c-api.h"
static Napi::External<SherpaOnnxCircularBuffer> CreateCircularBufferWrapper(
const Napi::CallbackInfo &info) {
Napi::Env env = info.Env();
if (info.Length() != 1) {
std::ostringstream os;
os << "Expect only 1 argument. Given: " << info.Length();
Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();
return {};
}
if (!info[0].IsNumber()) {
Napi::TypeError::New(env, "You should pass an integer as the argument.")
.ThrowAsJavaScriptException();
return {};
}
SherpaOnnxCircularBuffer *buf =
SherpaOnnxCreateCircularBuffer(info[0].As<Napi::Number>().Int32Value());
return Napi::External<SherpaOnnxCircularBuffer>::New(
env, buf, [](Napi::Env env, SherpaOnnxCircularBuffer *p) {
SherpaOnnxDestroyCircularBuffer(p);
});
}
static void CircularBufferPushWrapper(const Napi::CallbackInfo &info) {
Napi::Env env = info.Env();
if (info.Length() != 2) {
std::ostringstream os;
os << "Expect only 2 arguments. Given: " << info.Length();
Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();
return;
}
if (!info[0].IsExternal()) {
Napi::TypeError::New(env, "Argument 0 should be an CircularBuffer pointer.")
.ThrowAsJavaScriptException();
return;
}
SherpaOnnxCircularBuffer *buf =
info[0].As<Napi::External<SherpaOnnxCircularBuffer>>().Data();
if (!info[1].IsTypedArray()) {
Napi::TypeError::New(env, "Argument 1 should be a Float32Array.")
.ThrowAsJavaScriptException();
return;
}
Napi::Float32Array data = info[1].As<Napi::Float32Array>();
SherpaOnnxCircularBufferPush(buf, data.Data(), data.ElementLength());
}
// see https://github.com/nodejs/node-addon-api/blob/main/doc/typed_array.md
// https://github.com/nodejs/node-addon-examples/blob/main/src/2-js-to-native-conversion/typed_array_to_native/node-addon-api/typed_array_to_native.cc
static Napi::Float32Array CircularBufferGetWrapper(
const Napi::CallbackInfo &info) {
Napi::Env env = info.Env();
if (info.Length() != 3) {
std::ostringstream os;
os << "Expect only 3 arguments. Given: " << info.Length();
Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();
return {};
}
if (!info[0].IsExternal()) {
Napi::TypeError::New(env, "Argument 0 should be an CircularBuffer pointer.")
.ThrowAsJavaScriptException();
return {};
}
SherpaOnnxCircularBuffer *buf =
info[0].As<Napi::External<SherpaOnnxCircularBuffer>>().Data();
if (!info[1].IsNumber()) {
Napi::TypeError::New(env, "Argument 1 should be an integer (startIndex).")
.ThrowAsJavaScriptException();
return {};
}
if (!info[2].IsNumber()) {
Napi::TypeError::New(env, "Argument 2 should be an integer (n).")
.ThrowAsJavaScriptException();
return {};
}
int32_t start_index = info[1].As<Napi::Number>().Int32Value();
int32_t n = info[2].As<Napi::Number>().Int32Value();
const float *data = SherpaOnnxCircularBufferGet(buf, start_index, n);
Napi::ArrayBuffer arrayBuffer = Napi::ArrayBuffer::New(
env, const_cast<float *>(data), sizeof(float) * n,
[](Napi::Env /*env*/, void *p) {
SherpaOnnxCircularBufferFree(reinterpret_cast<const float *>(p));
});
Napi::Float32Array float32Array =
Napi::Float32Array::New(env, n, arrayBuffer, 0);
return float32Array;
}
static void CircularBufferPopWrapper(const Napi::CallbackInfo &info) {
Napi::Env env = info.Env();
if (info.Length() != 2) {
std::ostringstream os;
os << "Expect only 2 arguments. Given: " << info.Length();
Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();
return;
}
if (!info[0].IsExternal()) {
Napi::TypeError::New(env, "Argument 0 should be an CircularBuffer pointer.")
.ThrowAsJavaScriptException();
return;
}
SherpaOnnxCircularBuffer *buf =
info[0].As<Napi::External<SherpaOnnxCircularBuffer>>().Data();
if (!info[1].IsNumber()) {
Napi::TypeError::New(env, "Argument 1 should be an integer (n).")
.ThrowAsJavaScriptException();
return;
}
int32_t n = info[1].As<Napi::Number>().Int32Value();
SherpaOnnxCircularBufferPop(buf, n);
}
static Napi::Number CircularBufferSizeWrapper(const Napi::CallbackInfo &info) {
Napi::Env env = info.Env();
if (info.Length() != 1) {
std::ostringstream os;
os << "Expect only 1 argument. Given: " << info.Length();
Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();
return {};
}
if (!info[0].IsExternal()) {
Napi::TypeError::New(env, "Argument 0 should be an CircularBuffer pointer.")
.ThrowAsJavaScriptException();
return {};
}
SherpaOnnxCircularBuffer *buf =
info[0].As<Napi::External<SherpaOnnxCircularBuffer>>().Data();
int32_t size = SherpaOnnxCircularBufferSize(buf);
return Napi::Number::New(env, size);
}
static Napi::Number CircularBufferHeadWrapper(const Napi::CallbackInfo &info) {
Napi::Env env = info.Env();
if (info.Length() != 1) {
std::ostringstream os;
os << "Expect only 1 argument. Given: " << info.Length();
Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();
return {};
}
if (!info[0].IsExternal()) {
Napi::TypeError::New(env, "Argument 0 should be an CircularBuffer pointer.")
.ThrowAsJavaScriptException();
return {};
}
SherpaOnnxCircularBuffer *buf =
info[0].As<Napi::External<SherpaOnnxCircularBuffer>>().Data();
int32_t size = SherpaOnnxCircularBufferHead(buf);
return Napi::Number::New(env, size);
}
static void CircularBufferResetWrapper(const Napi::CallbackInfo &info) {
Napi::Env env = info.Env();
if (info.Length() != 1) {
std::ostringstream os;
os << "Expect only 1 argument. Given: " << info.Length();
Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();
return;
}
if (!info[0].IsExternal()) {
Napi::TypeError::New(env, "Argument 0 should be an CircularBuffer pointer.")
.ThrowAsJavaScriptException();
return;
}
SherpaOnnxCircularBuffer *buf =
info[0].As<Napi::External<SherpaOnnxCircularBuffer>>().Data();
SherpaOnnxCircularBufferReset(buf);
}
static SherpaOnnxSileroVadModelConfig GetSileroVadConfig(
const Napi::Object &obj) {
SherpaOnnxSileroVadModelConfig c;
memset(&c, 0, sizeof(c));
if (!obj.Has("sileroVad") || !obj.Get("sileroVad").IsObject()) {
return c;
}
Napi::Object o = obj.Get("sileroVad").As<Napi::Object>();
if (o.Has("model") && o.Get("model").IsString()) {
Napi::String model = o.Get("model").As<Napi::String>();
std::string s = model.Utf8Value();
char *p = new char[s.size() + 1];
std::copy(s.begin(), s.end(), p);
p[s.size()] = 0;
c.model = p;
}
if (o.Has("threshold") && o.Get("threshold").IsNumber()) {
c.threshold = o.Get("threshold").As<Napi::Number>().FloatValue();
}
if (o.Has("minSilenceDuration") && o.Get("minSilenceDuration").IsNumber()) {
c.min_silence_duration =
o.Get("minSilenceDuration").As<Napi::Number>().FloatValue();
}
if (o.Has("minSpeechDuration") && o.Get("minSpeechDuration").IsNumber()) {
c.min_speech_duration =
o.Get("minSpeechDuration").As<Napi::Number>().FloatValue();
}
if (o.Has("windowSize") && o.Get("windowSize").IsNumber()) {
c.window_size = o.Get("windowSize").As<Napi::Number>().Int32Value();
}
return c;
}
static Napi::External<SherpaOnnxVoiceActivityDetector>
CreateVoiceActivityDetectorWrapper(const Napi::CallbackInfo &info) {
Napi::Env env = info.Env();
if (info.Length() != 2) {
std::ostringstream os;
os << "Expect only 2 arguments. Given: " << info.Length();
Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();
return {};
}
if (!info[0].IsObject()) {
Napi::TypeError::New(env,
"You should pass an object as the first argument.")
.ThrowAsJavaScriptException();
return {};
}
if (!info[1].IsNumber()) {
Napi::TypeError::New(env,
"You should pass an integer as the second argument.")
.ThrowAsJavaScriptException();
return {};
}
Napi::Object o = info[0].As<Napi::Object>();
SherpaOnnxVadModelConfig c;
memset(&c, 0, sizeof(c));
c.silero_vad = GetSileroVadConfig(o);
if (o.Has("sampleRate") && o.Get("sampleRate").IsNumber()) {
c.sample_rate = o.Get("sampleRate").As<Napi::Number>().Int32Value();
}
if (o.Has("numThreads") && o.Get("numThreads").IsNumber()) {
c.num_threads = o.Get("numThreads").As<Napi::Number>().Int32Value();
}
if (o.Has("provider") && o.Get("provider").IsString()) {
Napi::String provider = o.Get("provider").As<Napi::String>();
std::string s = provider.Utf8Value();
char *p = new char[s.size() + 1];
std::copy(s.begin(), s.end(), p);
p[s.size()] = 0;
c.provider = p;
}
if (o.Has("debug") &&
(o.Get("debug").IsNumber() || o.Get("debug").IsBoolean())) {
if (o.Get("debug").IsBoolean()) {
c.debug = o.Get("debug").As<Napi::Boolean>().Value();
} else {
c.debug = o.Get("debug").As<Napi::Number>().Int32Value();
}
}
float buffer_size_in_seconds = info[1].As<Napi::Number>().FloatValue();
SherpaOnnxVoiceActivityDetector *vad =
SherpaOnnxCreateVoiceActivityDetector(&c, buffer_size_in_seconds);
if (c.silero_vad.model) {
delete[] c.silero_vad.model;
}
if (c.provider) {
delete[] c.provider;
}
return Napi::External<SherpaOnnxVoiceActivityDetector>::New(
env, vad, [](Napi::Env env, SherpaOnnxVoiceActivityDetector *p) {
SherpaOnnxDestroyVoiceActivityDetector(p);
});
}
static void VoiceActivityDetectorAcceptWaveformWrapper(
const Napi::CallbackInfo &info) {
Napi::Env env = info.Env();
if (info.Length() != 2) {
std::ostringstream os;
os << "Expect only 2 arguments. Given: " << info.Length();
Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();
return;
}
if (!info[0].IsExternal()) {
Napi::TypeError::New(env, "Argument 0 should be a VAD pointer.")
.ThrowAsJavaScriptException();
return;
}
SherpaOnnxVoiceActivityDetector *vad =
info[0].As<Napi::External<SherpaOnnxVoiceActivityDetector>>().Data();
if (!info[1].IsTypedArray()) {
Napi::TypeError::New(
env, "Argument 1 should be a Float32Array containing samples")
.ThrowAsJavaScriptException();
return;
}
Napi::Float32Array samples = info[1].As<Napi::Float32Array>();
SherpaOnnxVoiceActivityDetectorAcceptWaveform(vad, samples.Data(),
samples.ElementLength());
}
static Napi::Boolean VoiceActivityDetectorEmptyWrapper(
const Napi::CallbackInfo &info) {
Napi::Env env = info.Env();
if (info.Length() != 1) {
std::ostringstream os;
os << "Expect only 1 argument. Given: " << info.Length();
Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();
return {};
}
if (!info[0].IsExternal()) {
Napi::TypeError::New(env, "Argument 0 should be a VAD pointer.")
.ThrowAsJavaScriptException();
return {};
}
SherpaOnnxVoiceActivityDetector *vad =
info[0].As<Napi::External<SherpaOnnxVoiceActivityDetector>>().Data();
int32_t is_empty = SherpaOnnxVoiceActivityDetectorEmpty(vad);
return Napi::Boolean::New(env, is_empty);
}
static Napi::Boolean VoiceActivityDetectorDetectedWrapper(
const Napi::CallbackInfo &info) {
Napi::Env env = info.Env();
if (info.Length() != 1) {
std::ostringstream os;
os << "Expect only 1 argument. Given: " << info.Length();
Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();
return {};
}
if (!info[0].IsExternal()) {
Napi::TypeError::New(env, "Argument 0 should be a VAD pointer.")
.ThrowAsJavaScriptException();
return {};
}
SherpaOnnxVoiceActivityDetector *vad =
info[0].As<Napi::External<SherpaOnnxVoiceActivityDetector>>().Data();
int32_t is_detected = SherpaOnnxVoiceActivityDetectorDetected(vad);
return Napi::Boolean::New(env, is_detected);
}
static void VoiceActivityDetectorPopWrapper(const Napi::CallbackInfo &info) {
Napi::Env env = info.Env();
if (info.Length() != 1) {
std::ostringstream os;
os << "Expect only 1 argument. Given: " << info.Length();
Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();
return;
}
if (!info[0].IsExternal()) {
Napi::TypeError::New(env, "Argument 0 should be a VAD pointer.")
.ThrowAsJavaScriptException();
return;
}
SherpaOnnxVoiceActivityDetector *vad =
info[0].As<Napi::External<SherpaOnnxVoiceActivityDetector>>().Data();
SherpaOnnxVoiceActivityDetectorPop(vad);
}
static void VoiceActivityDetectorClearWrapper(const Napi::CallbackInfo &info) {
Napi::Env env = info.Env();
if (info.Length() != 1) {
std::ostringstream os;
os << "Expect only 1 argument. Given: " << info.Length();
Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();
return;
}
if (!info[0].IsExternal()) {
Napi::TypeError::New(env, "Argument 0 should be a VAD pointer.")
.ThrowAsJavaScriptException();
return;
}
SherpaOnnxVoiceActivityDetector *vad =
info[0].As<Napi::External<SherpaOnnxVoiceActivityDetector>>().Data();
SherpaOnnxVoiceActivityDetectorClear(vad);
}
static Napi::Object VoiceActivityDetectorFrontWrapper(
const Napi::CallbackInfo &info) {
Napi::Env env = info.Env();
if (info.Length() != 1) {
std::ostringstream os;
os << "Expect only 1 argument. Given: " << info.Length();
Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();
return {};
}
if (!info[0].IsExternal()) {
Napi::TypeError::New(env, "Argument 0 should be a VAD pointer.")
.ThrowAsJavaScriptException();
return {};
}
SherpaOnnxVoiceActivityDetector *vad =
info[0].As<Napi::External<SherpaOnnxVoiceActivityDetector>>().Data();
const SherpaOnnxSpeechSegment *segment =
SherpaOnnxVoiceActivityDetectorFront(vad);
Napi::ArrayBuffer arrayBuffer = Napi::ArrayBuffer::New(
env, const_cast<float *>(segment->samples), sizeof(float) * segment->n,
[](Napi::Env /*env*/, void * /*data*/,
const SherpaOnnxSpeechSegment *hint) {
SherpaOnnxDestroySpeechSegment(hint);
},
segment);
Napi::Float32Array float32Array =
Napi::Float32Array::New(env, segment->n, arrayBuffer, 0);
Napi::Object obj = Napi::Object::New(env);
obj.Set(Napi::String::New(env, "start"), segment->start);
obj.Set(Napi::String::New(env, "samples"), float32Array);
return obj;
}
static void VoiceActivityDetectorResetWrapper(const Napi::CallbackInfo &info) {
Napi::Env env = info.Env();
if (info.Length() != 1) {
std::ostringstream os;
os << "Expect only 1 argument. Given: " << info.Length();
Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();
return;
}
if (!info[0].IsExternal()) {
Napi::TypeError::New(env, "Argument 0 should be a VAD pointer.")
.ThrowAsJavaScriptException();
return;
}
SherpaOnnxVoiceActivityDetector *vad =
info[0].As<Napi::External<SherpaOnnxVoiceActivityDetector>>().Data();
SherpaOnnxVoiceActivityDetectorReset(vad);
}
void InitVad(Napi::Env env, Napi::Object exports) {
exports.Set(Napi::String::New(env, "createCircularBuffer"),
Napi::Function::New(env, CreateCircularBufferWrapper));
exports.Set(Napi::String::New(env, "circularBufferPush"),
Napi::Function::New(env, CircularBufferPushWrapper));
exports.Set(Napi::String::New(env, "circularBufferGet"),
Napi::Function::New(env, CircularBufferGetWrapper));
exports.Set(Napi::String::New(env, "circularBufferPop"),
Napi::Function::New(env, CircularBufferPopWrapper));
exports.Set(Napi::String::New(env, "circularBufferSize"),
Napi::Function::New(env, CircularBufferSizeWrapper));
exports.Set(Napi::String::New(env, "circularBufferHead"),
Napi::Function::New(env, CircularBufferHeadWrapper));
exports.Set(Napi::String::New(env, "circularBufferReset"),
Napi::Function::New(env, CircularBufferResetWrapper));
exports.Set(Napi::String::New(env, "createVoiceActivityDetector"),
Napi::Function::New(env, CreateVoiceActivityDetectorWrapper));
exports.Set(
Napi::String::New(env, "voiceActivityDetectorAcceptWaveform"),
Napi::Function::New(env, VoiceActivityDetectorAcceptWaveformWrapper));
exports.Set(Napi::String::New(env, "voiceActivityDetectorIsEmpty"),
Napi::Function::New(env, VoiceActivityDetectorEmptyWrapper));
exports.Set(Napi::String::New(env, "voiceActivityDetectorIsDetected"),
Napi::Function::New(env, VoiceActivityDetectorDetectedWrapper));
exports.Set(Napi::String::New(env, "voiceActivityDetectorPop"),
Napi::Function::New(env, VoiceActivityDetectorPopWrapper));
exports.Set(Napi::String::New(env, "voiceActivityDetectorClear"),
Napi::Function::New(env, VoiceActivityDetectorClearWrapper));
exports.Set(Napi::String::New(env, "voiceActivityDetectorFront"),
Napi::Function::New(env, VoiceActivityDetectorFrontWrapper));
exports.Set(Napi::String::New(env, "voiceActivityDetectorReset"),
Napi::Function::New(env, VoiceActivityDetectorResetWrapper));
}
... ...
// scripts/node-addon-api/src/wave-writer.cc
//
// Copyright (c) 2024 Xiaomi Corporation
#include <sstream>
#include "napi.h" // NOLINT
#include "sherpa-onnx/c-api/c-api.h"
// (filename, {samples: samples, sampleRate: sampleRate}
static Napi::Boolean WriteWaveWrapper(const Napi::CallbackInfo &info) {
Napi::Env env = info.Env();
if (info.Length() != 2) {
std::ostringstream os;
os << "Expect only 2 argument2. Given: " << info.Length();
Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();
return {};
}
if (!info[0].IsString()) {
Napi::TypeError::New(env, "Argument 0 should be a string")
.ThrowAsJavaScriptException();
return {};
}
if (!info[1].IsObject()) {
Napi::TypeError::New(env, "Argument 1 should be an object")
.ThrowAsJavaScriptException();
return {};
}
Napi::Object obj = info[1].As<Napi::Object>();
if (!obj.Has("samples")) {
Napi::TypeError::New(env, "The argument object should have a field samples")
.ThrowAsJavaScriptException();
return {};
}
if (!obj.Get("samples").IsTypedArray()) {
Napi::TypeError::New(env, "The object['samples'] should be a typed array")
.ThrowAsJavaScriptException();
return {};
}
if (!obj.Has("sampleRate")) {
Napi::TypeError::New(env,
"The argument object should have a field sampleRate")
.ThrowAsJavaScriptException();
return {};
}
if (!obj.Get("sampleRate").IsNumber()) {
Napi::TypeError::New(env, "The object['samples'] should be a number")
.ThrowAsJavaScriptException();
return {};
}
Napi::Float32Array samples = obj.Get("samples").As<Napi::Float32Array>();
int32_t sample_rate = obj.Get("sampleRate").As<Napi::Number>().Int32Value();
int32_t ok =
SherpaOnnxWriteWave(samples.Data(), samples.ElementLength(), sample_rate,
info[0].As<Napi::String>().Utf8Value().c_str());
return Napi::Boolean::New(env, ok);
}
void InitWaveWriter(Napi::Env env, Napi::Object exports) {
exports.Set(Napi::String::New(env, "writeWave"),
Napi::Function::New(env, WriteWaveWrapper));
}
... ...