Fangjun Kuang
Committed by GitHub

Add JavaScript (node-addon) API for speech enhancement GTCRN models (#1996)

@@ -10,6 +10,15 @@ arch=$(node -p "require('os').arch()") @@ -10,6 +10,15 @@ arch=$(node -p "require('os').arch()")
10 platform=$(node -p "require('os').platform()") 10 platform=$(node -p "require('os').platform()")
11 node_version=$(node -p "process.versions.node.split('.')[0]") 11 node_version=$(node -p "process.versions.node.split('.')[0]")
12 12
  13 +echo "----------non-streaming speech denoiser----------"
  14 +
  15 +curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speech-enhancement-models/gtcrn_simple.onnx
  16 +curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speech-enhancement-models/inp_16k.wav
  17 +
  18 +node ./test_offline_speech_enhancement_gtcrn.js
  19 +rm gtcrn_simple.onnx
  20 +ls -lh *.wav
  21 +
13 echo "----------non-streaming asr FireRedAsr----------" 22 echo "----------non-streaming asr FireRedAsr----------"
14 curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-fire-red-asr-large-zh_en-2025-02-16.tar.bz2 23 curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-fire-red-asr-large-zh_en-2025-02-16.tar.bz2
15 tar xvf sherpa-onnx-fire-red-asr-large-zh_en-2025-02-16.tar.bz2 24 tar xvf sherpa-onnx-fire-red-asr-large-zh_en-2025-02-16.tar.bz2
@@ -22,5 +22,5 @@ Hint: All of the following functions can be used in Flutter, even if some of the @@ -22,5 +22,5 @@ Hint: All of the following functions can be used in Flutter, even if some of the
22 |Speaker identification and verification| [Address](https://github.com/k2-fsa/sherpa-onnx/tree/master/dart-api-examples/speaker-identification)| macOS, Windows, Linux| 22 |Speaker identification and verification| [Address](https://github.com/k2-fsa/sherpa-onnx/tree/master/dart-api-examples/speaker-identification)| macOS, Windows, Linux|
23 |Audio tagging| [Address](https://github.com/k2-fsa/sherpa-onnx/tree/master/dart-api-examples/audio-tagging)| macOS, Windows, Linux| 23 |Audio tagging| [Address](https://github.com/k2-fsa/sherpa-onnx/tree/master/dart-api-examples/audio-tagging)| macOS, Windows, Linux|
24 |Keyword spotter| [Address](https://github.com/k2-fsa/sherpa-onnx/tree/master/dart-api-examples/keyword-spotter)| macOS, Windows, Linux| 24 |Keyword spotter| [Address](https://github.com/k2-fsa/sherpa-onnx/tree/master/dart-api-examples/keyword-spotter)| macOS, Windows, Linux|
25 -|Add punctuions| [Address](https://github.com/k2-fsa/sherpa-onnx/tree/master/dart-api-examples/add-punctuations)| macOS, Windows, Linux| 25 +|Add punctuations| [Address](https://github.com/k2-fsa/sherpa-onnx/tree/master/dart-api-examples/add-punctuations)| macOS, Windows, Linux|
26 |Speech enhancement/denoising| [Address](https://github.com/k2-fsa/sherpa-onnx/tree/master/dart-api-examples/speech-enhancement-gtcrn)| macOS, Windows, Linux| 26 |Speech enhancement/denoising| [Address](https://github.com/k2-fsa/sherpa-onnx/tree/master/dart-api-examples/speech-enhancement-gtcrn)| macOS, Windows, Linux|
@@ -44,6 +44,7 @@ add_library(sherpa_onnx SHARED @@ -44,6 +44,7 @@ add_library(sherpa_onnx SHARED
44 keyword-spotting.cc 44 keyword-spotting.cc
45 non-streaming-asr.cc 45 non-streaming-asr.cc
46 non-streaming-speaker-diarization.cc 46 non-streaming-speaker-diarization.cc
  47 + non-streaming-speech-denoiser.cc
47 non-streaming-tts.cc 48 non-streaming-tts.cc
48 punctuation.cc 49 punctuation.cc
49 sherpa-onnx-node-addon-api.cc 50 sherpa-onnx-node-addon-api.cc
@@ -121,7 +121,7 @@ AudioTaggingCreateOfflineStreamWrapper(const Napi::CallbackInfo &info) { @@ -121,7 +121,7 @@ AudioTaggingCreateOfflineStreamWrapper(const Napi::CallbackInfo &info) {
121 return {}; 121 return {};
122 } 122 }
123 123
124 - SherpaOnnxAudioTagging *at = 124 + const SherpaOnnxAudioTagging *at =
125 info[0].As<Napi::External<SherpaOnnxAudioTagging>>().Data(); 125 info[0].As<Napi::External<SherpaOnnxAudioTagging>>().Data();
126 126
127 const SherpaOnnxOfflineStream *stream = 127 const SherpaOnnxOfflineStream *stream =
@@ -169,10 +169,10 @@ static Napi::Object AudioTaggingComputeWrapper(const Napi::CallbackInfo &info) { @@ -169,10 +169,10 @@ static Napi::Object AudioTaggingComputeWrapper(const Napi::CallbackInfo &info) {
169 return {}; 169 return {};
170 } 170 }
171 171
172 - SherpaOnnxAudioTagging *at = 172 + const SherpaOnnxAudioTagging *at =
173 info[0].As<Napi::External<SherpaOnnxAudioTagging>>().Data(); 173 info[0].As<Napi::External<SherpaOnnxAudioTagging>>().Data();
174 174
175 - SherpaOnnxOfflineStream *stream = 175 + const SherpaOnnxOfflineStream *stream =
176 info[1].As<Napi::External<SherpaOnnxOfflineStream>>().Data(); 176 info[1].As<Napi::External<SherpaOnnxOfflineStream>>().Data();
177 177
178 int32_t top_k = info[2].As<Napi::Number>().Int32Value(); 178 int32_t top_k = info[2].As<Napi::Number>().Int32Value();
@@ -341,7 +341,7 @@ static Napi::External<SherpaOnnxOfflineStream> CreateOfflineStreamWrapper( @@ -341,7 +341,7 @@ static Napi::External<SherpaOnnxOfflineStream> CreateOfflineStreamWrapper(
341 return {}; 341 return {};
342 } 342 }
343 343
344 - SherpaOnnxOfflineRecognizer *recognizer = 344 + const SherpaOnnxOfflineRecognizer *recognizer =
345 info[0].As<Napi::External<SherpaOnnxOfflineRecognizer>>().Data(); 345 info[0].As<Napi::External<SherpaOnnxOfflineRecognizer>>().Data();
346 346
347 const SherpaOnnxOfflineStream *stream = 347 const SherpaOnnxOfflineStream *stream =
@@ -373,7 +373,7 @@ static void AcceptWaveformOfflineWrapper(const Napi::CallbackInfo &info) { @@ -373,7 +373,7 @@ static void AcceptWaveformOfflineWrapper(const Napi::CallbackInfo &info) {
373 return; 373 return;
374 } 374 }
375 375
376 - SherpaOnnxOfflineStream *stream = 376 + const SherpaOnnxOfflineStream *stream =
377 info[0].As<Napi::External<SherpaOnnxOfflineStream>>().Data(); 377 info[0].As<Napi::External<SherpaOnnxOfflineStream>>().Data();
378 378
379 if (!info[1].IsObject()) { 379 if (!info[1].IsObject()) {
@@ -454,10 +454,10 @@ static void DecodeOfflineStreamWrapper(const Napi::CallbackInfo &info) { @@ -454,10 +454,10 @@ static void DecodeOfflineStreamWrapper(const Napi::CallbackInfo &info) {
454 return; 454 return;
455 } 455 }
456 456
457 - SherpaOnnxOfflineRecognizer *recognizer = 457 + const SherpaOnnxOfflineRecognizer *recognizer =
458 info[0].As<Napi::External<SherpaOnnxOfflineRecognizer>>().Data(); 458 info[0].As<Napi::External<SherpaOnnxOfflineRecognizer>>().Data();
459 459
460 - SherpaOnnxOfflineStream *stream = 460 + const SherpaOnnxOfflineStream *stream =
461 info[1].As<Napi::External<SherpaOnnxOfflineStream>>().Data(); 461 info[1].As<Napi::External<SherpaOnnxOfflineStream>>().Data();
462 462
463 SherpaOnnxDecodeOfflineStream(recognizer, stream); 463 SherpaOnnxDecodeOfflineStream(recognizer, stream);
@@ -482,7 +482,7 @@ static Napi::String GetOfflineStreamResultAsJsonWrapper( @@ -482,7 +482,7 @@ static Napi::String GetOfflineStreamResultAsJsonWrapper(
482 return {}; 482 return {};
483 } 483 }
484 484
485 - SherpaOnnxOfflineStream *stream = 485 + const SherpaOnnxOfflineStream *stream =
486 info[0].As<Napi::External<SherpaOnnxOfflineStream>>().Data(); 486 info[0].As<Napi::External<SherpaOnnxOfflineStream>>().Data();
487 487
488 const char *json = SherpaOnnxGetOfflineStreamResultAsJson(stream); 488 const char *json = SherpaOnnxGetOfflineStreamResultAsJson(stream);
  1 +// scripts/node-addon-api/src/non-streaming-speech-denoiser.cc
  2 +//
  3 +// Copyright (c) 2025 Xiaomi Corporation
  4 +#include <sstream>
  5 +
  6 +#include "macros.h" // NOLINT
  7 +#include "napi.h" // NOLINT
  8 +#include "sherpa-onnx/c-api/c-api.h"
  9 +
  10 +static SherpaOnnxOfflineSpeechDenoiserGtcrnModelConfig
  11 +GetOfflineSpeechDenoiserGtcrnModelConfig(Napi::Object obj) {
  12 + SherpaOnnxOfflineSpeechDenoiserGtcrnModelConfig c;
  13 + memset(&c, 0, sizeof(c));
  14 +
  15 + if (!obj.Has("gtcrn") || !obj.Get("gtcrn").IsObject()) {
  16 + return c;
  17 + }
  18 +
  19 + Napi::Object o = obj.Get("gtcrn").As<Napi::Object>();
  20 +
  21 + SHERPA_ONNX_ASSIGN_ATTR_STR(model, model);
  22 +
  23 + return c;
  24 +}
  25 +
  26 +static SherpaOnnxOfflineSpeechDenoiserModelConfig
  27 +GetOfflineSpeechDenoiserModelConfig(Napi::Object obj) {
  28 + SherpaOnnxOfflineSpeechDenoiserModelConfig c;
  29 + memset(&c, 0, sizeof(c));
  30 +
  31 + if (!obj.Has("model") || !obj.Get("model").IsObject()) {
  32 + return c;
  33 + }
  34 +
  35 + Napi::Object o = obj.Get("model").As<Napi::Object>();
  36 +
  37 + c.gtcrn = GetOfflineSpeechDenoiserGtcrnModelConfig(o);
  38 +
  39 + SHERPA_ONNX_ASSIGN_ATTR_INT32(num_threads, numThreads);
  40 +
  41 + if (o.Has("debug") &&
  42 + (o.Get("debug").IsNumber() || o.Get("debug").IsBoolean())) {
  43 + if (o.Get("debug").IsBoolean()) {
  44 + c.debug = o.Get("debug").As<Napi::Boolean>().Value();
  45 + } else {
  46 + c.debug = o.Get("debug").As<Napi::Number>().Int32Value();
  47 + }
  48 + }
  49 +
  50 + SHERPA_ONNX_ASSIGN_ATTR_STR(provider, provider);
  51 +
  52 + return c;
  53 +}
  54 +
  55 +static Napi::External<SherpaOnnxOfflineSpeechDenoiser>
  56 +CreateOfflineSpeechDenoiserWrapper(const Napi::CallbackInfo &info) {
  57 + Napi::Env env = info.Env();
  58 +#if __OHOS__
  59 + // the last argument is the NativeResourceManager
  60 + if (info.Length() != 2) {
  61 + std::ostringstream os;
  62 + os << "Expect only 2 arguments. Given: " << info.Length();
  63 +
  64 + Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();
  65 +
  66 + return {};
  67 + }
  68 +#else
  69 + if (info.Length() != 1) {
  70 + std::ostringstream os;
  71 + os << "Expect only 1 argument. Given: " << info.Length();
  72 +
  73 + Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();
  74 +
  75 + return {};
  76 + }
  77 +#endif
  78 +
  79 + if (!info[0].IsObject()) {
  80 + Napi::TypeError::New(env, "Expect an object as the argument")
  81 + .ThrowAsJavaScriptException();
  82 +
  83 + return {};
  84 + }
  85 +
  86 + Napi::Object o = info[0].As<Napi::Object>();
  87 +
  88 + SherpaOnnxOfflineSpeechDenoiserConfig c;
  89 + memset(&c, 0, sizeof(c));
  90 + c.model = GetOfflineSpeechDenoiserModelConfig(o);
  91 +
  92 +#if __OHOS__
  93 + std::unique_ptr<NativeResourceManager,
  94 + decltype(&OH_ResourceManager_ReleaseNativeResourceManager)>
  95 + mgr(OH_ResourceManager_InitNativeResourceManager(env, info[1]),
  96 + &OH_ResourceManager_ReleaseNativeResourceManager);
  97 +
  98 + const SherpaOnnxOfflineSpeechDenoiser *sd =
  99 + SherpaOnnxCreateOfflineSpeechDenoiserOHOS(&c, mgr.get());
  100 +#else
  101 + const SherpaOnnxOfflineSpeechDenoiser *sd =
  102 + SherpaOnnxCreateOfflineSpeechDenoiser(&c);
  103 +#endif
  104 +
  105 + SHERPA_ONNX_DELETE_C_STR(c.model.gtcrn.model);
  106 + SHERPA_ONNX_DELETE_C_STR(c.model.provider);
  107 +
  108 + if (!sd) {
  109 + Napi::TypeError::New(env, "Please check your config!")
  110 + .ThrowAsJavaScriptException();
  111 +
  112 + return {};
  113 + }
  114 +
  115 + return Napi::External<SherpaOnnxOfflineSpeechDenoiser>::New(
  116 + env, const_cast<SherpaOnnxOfflineSpeechDenoiser *>(sd),
  117 + [](Napi::Env env, SherpaOnnxOfflineSpeechDenoiser *sd) {
  118 + SherpaOnnxDestroyOfflineSpeechDenoiser(sd);
  119 + });
  120 +}
  121 +
  122 +static Napi::Object OfflineSpeechDenoiserRunWrapper(
  123 + const Napi::CallbackInfo &info) {
  124 + Napi::Env env = info.Env();
  125 +
  126 + if (info.Length() != 2) {
  127 + std::ostringstream os;
  128 + os << "Expect only 2 arguments. Given: " << info.Length();
  129 +
  130 + Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();
  131 +
  132 + return {};
  133 + }
  134 +
  135 + if (!info[0].IsExternal()) {
  136 + Napi::TypeError::New(
  137 + env, "Argument 0 should be an offline speech denoiser pointer.")
  138 + .ThrowAsJavaScriptException();
  139 +
  140 + return {};
  141 + }
  142 +
  143 + const SherpaOnnxOfflineSpeechDenoiser *sd =
  144 + info[0].As<Napi::External<SherpaOnnxOfflineSpeechDenoiser>>().Data();
  145 +
  146 + if (!info[1].IsObject()) {
  147 + Napi::TypeError::New(env, "Argument 1 should be an object")
  148 + .ThrowAsJavaScriptException();
  149 +
  150 + return {};
  151 + }
  152 +
  153 + Napi::Object obj = info[1].As<Napi::Object>();
  154 +
  155 + if (!obj.Has("samples")) {
  156 + Napi::TypeError::New(env, "The argument object should have a field samples")
  157 + .ThrowAsJavaScriptException();
  158 +
  159 + return {};
  160 + }
  161 +
  162 + if (!obj.Get("samples").IsTypedArray()) {
  163 + Napi::TypeError::New(env, "The object['samples'] should be a typed array")
  164 + .ThrowAsJavaScriptException();
  165 +
  166 + return {};
  167 + }
  168 +
  169 + if (!obj.Has("sampleRate")) {
  170 + Napi::TypeError::New(env,
  171 + "The argument object should have a field sampleRate")
  172 + .ThrowAsJavaScriptException();
  173 +
  174 + return {};
  175 + }
  176 +
  177 + if (!obj.Get("sampleRate").IsNumber()) {
  178 + Napi::TypeError::New(env, "The object['samples'] should be a number")
  179 + .ThrowAsJavaScriptException();
  180 +
  181 + return {};
  182 + }
  183 +
  184 + Napi::Float32Array samples = obj.Get("samples").As<Napi::Float32Array>();
  185 + int32_t sample_rate = obj.Get("sampleRate").As<Napi::Number>().Int32Value();
  186 +
  187 + const SherpaOnnxDenoisedAudio *audio;
  188 +
  189 +#if __OHOS__
  190 + // Note(fangjun): For unknown reasons on HarmonyOS, we need to divide it by
  191 + // sizeof(float) here
  192 + audio = SherpaOnnxOfflineSpeechDenoiserRun(
  193 + sd, samples.Data(), samples.ElementLength() / sizeof(float), sample_rate);
  194 +#else
  195 + audio = SherpaOnnxOfflineSpeechDenoiserRun(
  196 + sd, samples.Data(), samples.ElementLength(), sample_rate);
  197 +#endif
  198 +
  199 + bool enable_external_buffer = true;
  200 + if (obj.Has("enableExternalBuffer") &&
  201 + obj.Get("enableExternalBuffer").IsBoolean()) {
  202 + enable_external_buffer =
  203 + obj.Get("enableExternalBuffer").As<Napi::Boolean>().Value();
  204 + }
  205 +
  206 + if (enable_external_buffer) {
  207 + Napi::ArrayBuffer arrayBuffer = Napi::ArrayBuffer::New(
  208 + env, const_cast<float *>(audio->samples), sizeof(float) * audio->n,
  209 + [](Napi::Env /*env*/, void * /*data*/,
  210 + const SherpaOnnxDenoisedAudio *hint) {
  211 + SherpaOnnxDestroyDenoisedAudio(hint);
  212 + },
  213 + audio);
  214 + Napi::Float32Array float32Array =
  215 + Napi::Float32Array::New(env, audio->n, arrayBuffer, 0);
  216 +
  217 + Napi::Object ans = Napi::Object::New(env);
  218 + ans.Set(Napi::String::New(env, "samples"), float32Array);
  219 + ans.Set(Napi::String::New(env, "sampleRate"), audio->sample_rate);
  220 + return ans;
  221 + } else {
  222 + // don't use external buffer
  223 + Napi::ArrayBuffer arrayBuffer =
  224 + Napi::ArrayBuffer::New(env, sizeof(float) * audio->n);
  225 +
  226 + Napi::Float32Array float32Array =
  227 + Napi::Float32Array::New(env, audio->n, arrayBuffer, 0);
  228 +
  229 + std::copy(audio->samples, audio->samples + audio->n, float32Array.Data());
  230 +
  231 + Napi::Object ans = Napi::Object::New(env);
  232 + ans.Set(Napi::String::New(env, "samples"), float32Array);
  233 + ans.Set(Napi::String::New(env, "sampleRate"), audio->sample_rate);
  234 + SherpaOnnxDestroyDenoisedAudio(audio);
  235 + return ans;
  236 + }
  237 +}
  238 +
  239 +static Napi::Number OfflineSpeechDenoiserGetSampleRateWrapper(
  240 + const Napi::CallbackInfo &info) {
  241 + Napi::Env env = info.Env();
  242 +
  243 + if (info.Length() != 1) {
  244 + std::ostringstream os;
  245 + os << "Expect only 1 argument. Given: " << info.Length();
  246 +
  247 + Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();
  248 +
  249 + return {};
  250 + }
  251 +
  252 + if (!info[0].IsExternal()) {
  253 + Napi::TypeError::New(
  254 + env, "Argument 0 should be an offline speech denoiser pointer.")
  255 + .ThrowAsJavaScriptException();
  256 +
  257 + return {};
  258 + }
  259 +
  260 + const SherpaOnnxOfflineSpeechDenoiser *sd =
  261 + info[0].As<Napi::External<SherpaOnnxOfflineSpeechDenoiser>>().Data();
  262 +
  263 + int32_t sample_rate = SherpaOnnxOfflineSpeechDenoiserGetSampleRate(sd);
  264 +
  265 + return Napi::Number::New(env, sample_rate);
  266 +}
  267 +
  268 +void InitNonStreamingSpeechDenoiser(Napi::Env env, Napi::Object exports) {
  269 + exports.Set(Napi::String::New(env, "createOfflineSpeechDenoiser"),
  270 + Napi::Function::New(env, CreateOfflineSpeechDenoiserWrapper));
  271 +
  272 + exports.Set(Napi::String::New(env, "offlineSpeechDenoiserRunWrapper"),
  273 + Napi::Function::New(env, OfflineSpeechDenoiserRunWrapper));
  274 +
  275 + exports.Set(
  276 + Napi::String::New(env, "offlineSpeechDenoiserGetSampleRateWrapper"),
  277 + Napi::Function::New(env, OfflineSpeechDenoiserGetSampleRateWrapper));
  278 +}
@@ -217,7 +217,7 @@ static Napi::Number OfflineTtsSampleRateWrapper( @@ -217,7 +217,7 @@ static Napi::Number OfflineTtsSampleRateWrapper(
217 return {}; 217 return {};
218 } 218 }
219 219
220 - SherpaOnnxOfflineTts *tts = 220 + const SherpaOnnxOfflineTts *tts =
221 info[0].As<Napi::External<SherpaOnnxOfflineTts>>().Data(); 221 info[0].As<Napi::External<SherpaOnnxOfflineTts>>().Data();
222 222
223 int32_t sample_rate = SherpaOnnxOfflineTtsSampleRate(tts); 223 int32_t sample_rate = SherpaOnnxOfflineTtsSampleRate(tts);
@@ -245,7 +245,7 @@ static Napi::Number OfflineTtsNumSpeakersWrapper( @@ -245,7 +245,7 @@ static Napi::Number OfflineTtsNumSpeakersWrapper(
245 return {}; 245 return {};
246 } 246 }
247 247
248 - SherpaOnnxOfflineTts *tts = 248 + const SherpaOnnxOfflineTts *tts =
249 info[0].As<Napi::External<SherpaOnnxOfflineTts>>().Data(); 249 info[0].As<Napi::External<SherpaOnnxOfflineTts>>().Data();
250 250
251 int32_t num_speakers = SherpaOnnxOfflineTtsNumSpeakers(tts); 251 int32_t num_speakers = SherpaOnnxOfflineTtsNumSpeakers(tts);
@@ -273,7 +273,7 @@ static Napi::Object OfflineTtsGenerateWrapper(const Napi::CallbackInfo &info) { @@ -273,7 +273,7 @@ static Napi::Object OfflineTtsGenerateWrapper(const Napi::CallbackInfo &info) {
273 return {}; 273 return {};
274 } 274 }
275 275
276 - SherpaOnnxOfflineTts *tts = 276 + const SherpaOnnxOfflineTts *tts =
277 info[0].As<Napi::External<SherpaOnnxOfflineTts>>().Data(); 277 info[0].As<Napi::External<SherpaOnnxOfflineTts>>().Data();
278 278
279 if (!info[1].IsObject()) { 279 if (!info[1].IsObject()) {
@@ -418,9 +418,9 @@ using TSFN = Napi::TypedThreadSafeFunction<Napi::Reference<Napi::Value>, @@ -418,9 +418,9 @@ using TSFN = Napi::TypedThreadSafeFunction<Napi::Reference<Napi::Value>,
418 418
419 class TtsGenerateWorker : public Napi::AsyncWorker { 419 class TtsGenerateWorker : public Napi::AsyncWorker {
420 public: 420 public:
421 - TtsGenerateWorker(const Napi::Env &env, TSFN tsfn, SherpaOnnxOfflineTts *tts,  
422 - const std::string &text, float speed, int32_t sid,  
423 - bool use_external_buffer) 421 + TtsGenerateWorker(const Napi::Env &env, TSFN tsfn,
  422 + const SherpaOnnxOfflineTts *tts, const std::string &text,
  423 + float speed, int32_t sid, bool use_external_buffer)
424 : tsfn_(tsfn), 424 : tsfn_(tsfn),
425 Napi::AsyncWorker{env, "TtsGenerateWorker"}, 425 Napi::AsyncWorker{env, "TtsGenerateWorker"},
426 deferred_(env), 426 deferred_(env),
@@ -506,7 +506,7 @@ class TtsGenerateWorker : public Napi::AsyncWorker { @@ -506,7 +506,7 @@ class TtsGenerateWorker : public Napi::AsyncWorker {
506 private: 506 private:
507 TSFN tsfn_; 507 TSFN tsfn_;
508 Napi::Promise::Deferred deferred_; 508 Napi::Promise::Deferred deferred_;
509 - SherpaOnnxOfflineTts *tts_; 509 + const SherpaOnnxOfflineTts *tts_;
510 std::string text_; 510 std::string text_;
511 float speed_; 511 float speed_;
512 int32_t sid_; 512 int32_t sid_;
@@ -537,7 +537,7 @@ static Napi::Object OfflineTtsGenerateAsyncWrapper( @@ -537,7 +537,7 @@ static Napi::Object OfflineTtsGenerateAsyncWrapper(
537 return {}; 537 return {};
538 } 538 }
539 539
540 - SherpaOnnxOfflineTts *tts = 540 + const SherpaOnnxOfflineTts *tts =
541 info[0].As<Napi::External<SherpaOnnxOfflineTts>>().Data(); 541 info[0].As<Napi::External<SherpaOnnxOfflineTts>>().Data();
542 542
543 if (!info[1].IsObject()) { 543 if (!info[1].IsObject()) {
@@ -108,7 +108,7 @@ static Napi::String OfflinePunctuationAddPunctWraper( @@ -108,7 +108,7 @@ static Napi::String OfflinePunctuationAddPunctWraper(
108 return {}; 108 return {};
109 } 109 }
110 110
111 - SherpaOnnxOfflinePunctuation *punct = 111 + const SherpaOnnxOfflinePunctuation *punct =
112 info[0].As<Napi::External<SherpaOnnxOfflinePunctuation>>().Data(); 112 info[0].As<Napi::External<SherpaOnnxOfflinePunctuation>>().Data();
113 Napi::String js_text = info[1].As<Napi::String>(); 113 Napi::String js_text = info[1].As<Napi::String>();
114 std::string text = js_text.Utf8Value(); 114 std::string text = js_text.Utf8Value();
@@ -27,6 +27,8 @@ void InitKeywordSpotting(Napi::Env env, Napi::Object exports); @@ -27,6 +27,8 @@ void InitKeywordSpotting(Napi::Env env, Napi::Object exports);
27 27
28 void InitNonStreamingSpeakerDiarization(Napi::Env env, Napi::Object exports); 28 void InitNonStreamingSpeakerDiarization(Napi::Env env, Napi::Object exports);
29 29
  30 +void InitNonStreamingSpeechDenoiser(Napi::Env env, Napi::Object exports);
  31 +
30 #if __OHOS__ 32 #if __OHOS__
31 void InitUtils(Napi::Env env, Napi::Object exports); 33 void InitUtils(Napi::Env env, Napi::Object exports);
32 #endif 34 #endif
@@ -44,6 +46,7 @@ Napi::Object Init(Napi::Env env, Napi::Object exports) { @@ -44,6 +46,7 @@ Napi::Object Init(Napi::Env env, Napi::Object exports) {
44 InitPunctuation(env, exports); 46 InitPunctuation(env, exports);
45 InitKeywordSpotting(env, exports); 47 InitKeywordSpotting(env, exports);
46 InitNonStreamingSpeakerDiarization(env, exports); 48 InitNonStreamingSpeakerDiarization(env, exports);
  49 + InitNonStreamingSpeechDenoiser(env, exports);
47 50
48 #if __OHOS__ 51 #if __OHOS__
49 InitUtils(env, exports); 52 InitUtils(env, exports);
@@ -108,7 +108,7 @@ static Napi::Number SpeakerEmbeddingExtractorDimWrapper( @@ -108,7 +108,7 @@ static Napi::Number SpeakerEmbeddingExtractorDimWrapper(
108 return {}; 108 return {};
109 } 109 }
110 110
111 - SherpaOnnxSpeakerEmbeddingExtractor *extractor = 111 + const SherpaOnnxSpeakerEmbeddingExtractor *extractor =
112 info[0].As<Napi::External<SherpaOnnxSpeakerEmbeddingExtractor>>().Data(); 112 info[0].As<Napi::External<SherpaOnnxSpeakerEmbeddingExtractor>>().Data();
113 113
114 int32_t dim = SherpaOnnxSpeakerEmbeddingExtractorDim(extractor); 114 int32_t dim = SherpaOnnxSpeakerEmbeddingExtractorDim(extractor);
@@ -137,7 +137,7 @@ SpeakerEmbeddingExtractorCreateStreamWrapper(const Napi::CallbackInfo &info) { @@ -137,7 +137,7 @@ SpeakerEmbeddingExtractorCreateStreamWrapper(const Napi::CallbackInfo &info) {
137 return {}; 137 return {};
138 } 138 }
139 139
140 - SherpaOnnxSpeakerEmbeddingExtractor *extractor = 140 + const SherpaOnnxSpeakerEmbeddingExtractor *extractor =
141 info[0].As<Napi::External<SherpaOnnxSpeakerEmbeddingExtractor>>().Data(); 141 info[0].As<Napi::External<SherpaOnnxSpeakerEmbeddingExtractor>>().Data();
142 142
143 const SherpaOnnxOnlineStream *stream = 143 const SherpaOnnxOnlineStream *stream =
@@ -177,10 +177,10 @@ static Napi::Boolean SpeakerEmbeddingExtractorIsReadyWrapper( @@ -177,10 +177,10 @@ static Napi::Boolean SpeakerEmbeddingExtractorIsReadyWrapper(
177 return {}; 177 return {};
178 } 178 }
179 179
180 - SherpaOnnxSpeakerEmbeddingExtractor *extractor = 180 + const SherpaOnnxSpeakerEmbeddingExtractor *extractor =
181 info[0].As<Napi::External<SherpaOnnxSpeakerEmbeddingExtractor>>().Data(); 181 info[0].As<Napi::External<SherpaOnnxSpeakerEmbeddingExtractor>>().Data();
182 182
183 - SherpaOnnxOnlineStream *stream = 183 + const SherpaOnnxOnlineStream *stream =
184 info[1].As<Napi::External<SherpaOnnxOnlineStream>>().Data(); 184 info[1].As<Napi::External<SherpaOnnxOnlineStream>>().Data();
185 185
186 int32_t is_ready = 186 int32_t is_ready =
@@ -226,10 +226,10 @@ static Napi::Float32Array SpeakerEmbeddingExtractorComputeEmbeddingWrapper( @@ -226,10 +226,10 @@ static Napi::Float32Array SpeakerEmbeddingExtractorComputeEmbeddingWrapper(
226 } 226 }
227 } 227 }
228 228
229 - SherpaOnnxSpeakerEmbeddingExtractor *extractor = 229 + const SherpaOnnxSpeakerEmbeddingExtractor *extractor =
230 info[0].As<Napi::External<SherpaOnnxSpeakerEmbeddingExtractor>>().Data(); 230 info[0].As<Napi::External<SherpaOnnxSpeakerEmbeddingExtractor>>().Data();
231 231
232 - SherpaOnnxOnlineStream *stream = 232 + const SherpaOnnxOnlineStream *stream =
233 info[1].As<Napi::External<SherpaOnnxOnlineStream>>().Data(); 233 info[1].As<Napi::External<SherpaOnnxOnlineStream>>().Data();
234 234
235 const float *v = 235 const float *v =
@@ -329,7 +329,7 @@ static Napi::Boolean SpeakerEmbeddingManagerAddWrapper( @@ -329,7 +329,7 @@ static Napi::Boolean SpeakerEmbeddingManagerAddWrapper(
329 return {}; 329 return {};
330 } 330 }
331 331
332 - SherpaOnnxSpeakerEmbeddingManager *manager = 332 + const SherpaOnnxSpeakerEmbeddingManager *manager =
333 info[0].As<Napi::External<SherpaOnnxSpeakerEmbeddingManager>>().Data(); 333 info[0].As<Napi::External<SherpaOnnxSpeakerEmbeddingManager>>().Data();
334 334
335 Napi::Object obj = info[1].As<Napi::Object>(); 335 Napi::Object obj = info[1].As<Napi::Object>();
@@ -399,7 +399,7 @@ static Napi::Boolean SpeakerEmbeddingManagerAddListFlattenedWrapper( @@ -399,7 +399,7 @@ static Napi::Boolean SpeakerEmbeddingManagerAddListFlattenedWrapper(
399 return {}; 399 return {};
400 } 400 }
401 401
402 - SherpaOnnxSpeakerEmbeddingManager *manager = 402 + const SherpaOnnxSpeakerEmbeddingManager *manager =
403 info[0].As<Napi::External<SherpaOnnxSpeakerEmbeddingManager>>().Data(); 403 info[0].As<Napi::External<SherpaOnnxSpeakerEmbeddingManager>>().Data();
404 404
405 Napi::Object obj = info[1].As<Napi::Object>(); 405 Napi::Object obj = info[1].As<Napi::Object>();
@@ -486,7 +486,7 @@ static Napi::Boolean SpeakerEmbeddingManagerRemoveWrapper( @@ -486,7 +486,7 @@ static Napi::Boolean SpeakerEmbeddingManagerRemoveWrapper(
486 return {}; 486 return {};
487 } 487 }
488 488
489 - SherpaOnnxSpeakerEmbeddingManager *manager = 489 + const SherpaOnnxSpeakerEmbeddingManager *manager =
490 info[0].As<Napi::External<SherpaOnnxSpeakerEmbeddingManager>>().Data(); 490 info[0].As<Napi::External<SherpaOnnxSpeakerEmbeddingManager>>().Data();
491 491
492 Napi::String js_name = info[1].As<Napi::String>(); 492 Napi::String js_name = info[1].As<Napi::String>();
@@ -525,7 +525,7 @@ static Napi::String SpeakerEmbeddingManagerSearchWrapper( @@ -525,7 +525,7 @@ static Napi::String SpeakerEmbeddingManagerSearchWrapper(
525 return {}; 525 return {};
526 } 526 }
527 527
528 - SherpaOnnxSpeakerEmbeddingManager *manager = 528 + const SherpaOnnxSpeakerEmbeddingManager *manager =
529 info[0].As<Napi::External<SherpaOnnxSpeakerEmbeddingManager>>().Data(); 529 info[0].As<Napi::External<SherpaOnnxSpeakerEmbeddingManager>>().Data();
530 530
531 Napi::Object obj = info[1].As<Napi::Object>(); 531 Napi::Object obj = info[1].As<Napi::Object>();
@@ -603,7 +603,7 @@ static Napi::Boolean SpeakerEmbeddingManagerVerifyWrapper( @@ -603,7 +603,7 @@ static Napi::Boolean SpeakerEmbeddingManagerVerifyWrapper(
603 return {}; 603 return {};
604 } 604 }
605 605
606 - SherpaOnnxSpeakerEmbeddingManager *manager = 606 + const SherpaOnnxSpeakerEmbeddingManager *manager =
607 info[0].As<Napi::External<SherpaOnnxSpeakerEmbeddingManager>>().Data(); 607 info[0].As<Napi::External<SherpaOnnxSpeakerEmbeddingManager>>().Data();
608 608
609 Napi::Object obj = info[1].As<Napi::Object>(); 609 Napi::Object obj = info[1].As<Napi::Object>();
@@ -691,7 +691,7 @@ static Napi::Boolean SpeakerEmbeddingManagerContainsWrapper( @@ -691,7 +691,7 @@ static Napi::Boolean SpeakerEmbeddingManagerContainsWrapper(
691 return {}; 691 return {};
692 } 692 }
693 693
694 - SherpaOnnxSpeakerEmbeddingManager *manager = 694 + const SherpaOnnxSpeakerEmbeddingManager *manager =
695 info[0].As<Napi::External<SherpaOnnxSpeakerEmbeddingManager>>().Data(); 695 info[0].As<Napi::External<SherpaOnnxSpeakerEmbeddingManager>>().Data();
696 696
697 Napi::String js_name = info[1].As<Napi::String>(); 697 Napi::String js_name = info[1].As<Napi::String>();
@@ -724,7 +724,7 @@ static Napi::Number SpeakerEmbeddingManagerNumSpeakersWrapper( @@ -724,7 +724,7 @@ static Napi::Number SpeakerEmbeddingManagerNumSpeakersWrapper(
724 return {}; 724 return {};
725 } 725 }
726 726
727 - SherpaOnnxSpeakerEmbeddingManager *manager = 727 + const SherpaOnnxSpeakerEmbeddingManager *manager =
728 info[0].As<Napi::External<SherpaOnnxSpeakerEmbeddingManager>>().Data(); 728 info[0].As<Napi::External<SherpaOnnxSpeakerEmbeddingManager>>().Data();
729 729
730 int32_t num_speakers = SherpaOnnxSpeakerEmbeddingManagerNumSpeakers(manager); 730 int32_t num_speakers = SherpaOnnxSpeakerEmbeddingManagerNumSpeakers(manager);
@@ -753,7 +753,7 @@ static Napi::Array SpeakerEmbeddingManagerGetAllSpeakersWrapper( @@ -753,7 +753,7 @@ static Napi::Array SpeakerEmbeddingManagerGetAllSpeakersWrapper(
753 return {}; 753 return {};
754 } 754 }
755 755
756 - SherpaOnnxSpeakerEmbeddingManager *manager = 756 + const SherpaOnnxSpeakerEmbeddingManager *manager =
757 info[0].As<Napi::External<SherpaOnnxSpeakerEmbeddingManager>>().Data(); 757 info[0].As<Napi::External<SherpaOnnxSpeakerEmbeddingManager>>().Data();
758 758
759 int32_t num_speakers = SherpaOnnxSpeakerEmbeddingManagerNumSpeakers(manager); 759 int32_t num_speakers = SherpaOnnxSpeakerEmbeddingManagerNumSpeakers(manager);
@@ -106,16 +106,17 @@ SpokenLanguageIdentificationCreateOfflineStreamWrapper( @@ -106,16 +106,17 @@ SpokenLanguageIdentificationCreateOfflineStreamWrapper(
106 return {}; 106 return {};
107 } 107 }
108 108
109 - SherpaOnnxSpokenLanguageIdentification *slid = 109 + const SherpaOnnxSpokenLanguageIdentification *slid =
110 info[0] 110 info[0]
111 .As<Napi::External<SherpaOnnxSpokenLanguageIdentification>>() 111 .As<Napi::External<SherpaOnnxSpokenLanguageIdentification>>()
112 .Data(); 112 .Data();
113 113
114 - SherpaOnnxOfflineStream *stream = 114 + const SherpaOnnxOfflineStream *stream =
115 SherpaOnnxSpokenLanguageIdentificationCreateOfflineStream(slid); 115 SherpaOnnxSpokenLanguageIdentificationCreateOfflineStream(slid);
116 116
117 return Napi::External<SherpaOnnxOfflineStream>::New( 117 return Napi::External<SherpaOnnxOfflineStream>::New(
118 - env, stream, [](Napi::Env env, SherpaOnnxOfflineStream *stream) { 118 + env, const_cast<SherpaOnnxOfflineStream *>(stream),
  119 + [](Napi::Env env, SherpaOnnxOfflineStream *stream) {
119 SherpaOnnxDestroyOfflineStream(stream); 120 SherpaOnnxDestroyOfflineStream(stream);
120 }); 121 });
121 } 122 }
@@ -147,12 +148,12 @@ static Napi::String SpokenLanguageIdentificationComputeWrapper( @@ -147,12 +148,12 @@ static Napi::String SpokenLanguageIdentificationComputeWrapper(
147 return {}; 148 return {};
148 } 149 }
149 150
150 - SherpaOnnxSpokenLanguageIdentification *slid = 151 + const SherpaOnnxSpokenLanguageIdentification *slid =
151 info[0] 152 info[0]
152 .As<Napi::External<SherpaOnnxSpokenLanguageIdentification>>() 153 .As<Napi::External<SherpaOnnxSpokenLanguageIdentification>>()
153 .Data(); 154 .Data();
154 155
155 - SherpaOnnxOfflineStream *stream = 156 + const SherpaOnnxOfflineStream *stream =
156 info[1].As<Napi::External<SherpaOnnxOfflineStream>>().Data(); 157 info[1].As<Napi::External<SherpaOnnxOfflineStream>>().Data();
157 158
158 const SherpaOnnxSpokenLanguageIdentificationResult *r = 159 const SherpaOnnxSpokenLanguageIdentificationResult *r =
@@ -278,7 +278,7 @@ static Napi::External<SherpaOnnxOnlineStream> CreateOnlineStreamWrapper( @@ -278,7 +278,7 @@ static Napi::External<SherpaOnnxOnlineStream> CreateOnlineStreamWrapper(
278 return {}; 278 return {};
279 } 279 }
280 280
281 - SherpaOnnxOnlineRecognizer *recognizer = 281 + const SherpaOnnxOnlineRecognizer *recognizer =
282 info[0].As<Napi::External<SherpaOnnxOnlineRecognizer>>().Data(); 282 info[0].As<Napi::External<SherpaOnnxOnlineRecognizer>>().Data();
283 283
284 const SherpaOnnxOnlineStream *stream = 284 const SherpaOnnxOnlineStream *stream =
@@ -310,7 +310,7 @@ static void AcceptWaveformWrapper(const Napi::CallbackInfo &info) { @@ -310,7 +310,7 @@ static void AcceptWaveformWrapper(const Napi::CallbackInfo &info) {
310 return; 310 return;
311 } 311 }
312 312
313 - SherpaOnnxOnlineStream *stream = 313 + const SherpaOnnxOnlineStream *stream =
314 info[0].As<Napi::External<SherpaOnnxOnlineStream>>().Data(); 314 info[0].As<Napi::External<SherpaOnnxOnlineStream>>().Data();
315 315
316 if (!info[1].IsObject()) { 316 if (!info[1].IsObject()) {
@@ -390,10 +390,10 @@ static Napi::Boolean IsOnlineStreamReadyWrapper( @@ -390,10 +390,10 @@ static Napi::Boolean IsOnlineStreamReadyWrapper(
390 return {}; 390 return {};
391 } 391 }
392 392
393 - SherpaOnnxOnlineRecognizer *recognizer = 393 + const SherpaOnnxOnlineRecognizer *recognizer =
394 info[0].As<Napi::External<SherpaOnnxOnlineRecognizer>>().Data(); 394 info[0].As<Napi::External<SherpaOnnxOnlineRecognizer>>().Data();
395 395
396 - SherpaOnnxOnlineStream *stream = 396 + const SherpaOnnxOnlineStream *stream =
397 info[1].As<Napi::External<SherpaOnnxOnlineStream>>().Data(); 397 info[1].As<Napi::External<SherpaOnnxOnlineStream>>().Data();
398 398
399 int32_t is_ready = SherpaOnnxIsOnlineStreamReady(recognizer, stream); 399 int32_t is_ready = SherpaOnnxIsOnlineStreamReady(recognizer, stream);
@@ -427,10 +427,10 @@ static void DecodeOnlineStreamWrapper(const Napi::CallbackInfo &info) { @@ -427,10 +427,10 @@ static void DecodeOnlineStreamWrapper(const Napi::CallbackInfo &info) {
427 return; 427 return;
428 } 428 }
429 429
430 - SherpaOnnxOnlineRecognizer *recognizer = 430 + const SherpaOnnxOnlineRecognizer *recognizer =
431 info[0].As<Napi::External<SherpaOnnxOnlineRecognizer>>().Data(); 431 info[0].As<Napi::External<SherpaOnnxOnlineRecognizer>>().Data();
432 432
433 - SherpaOnnxOnlineStream *stream = 433 + const SherpaOnnxOnlineStream *stream =
434 info[1].As<Napi::External<SherpaOnnxOnlineStream>>().Data(); 434 info[1].As<Napi::External<SherpaOnnxOnlineStream>>().Data();
435 435
436 SherpaOnnxDecodeOnlineStream(recognizer, stream); 436 SherpaOnnxDecodeOnlineStream(recognizer, stream);
@@ -463,10 +463,10 @@ static Napi::String GetOnlineStreamResultAsJsonWrapper( @@ -463,10 +463,10 @@ static Napi::String GetOnlineStreamResultAsJsonWrapper(
463 return {}; 463 return {};
464 } 464 }
465 465
466 - SherpaOnnxOnlineRecognizer *recognizer = 466 + const SherpaOnnxOnlineRecognizer *recognizer =
467 info[0].As<Napi::External<SherpaOnnxOnlineRecognizer>>().Data(); 467 info[0].As<Napi::External<SherpaOnnxOnlineRecognizer>>().Data();
468 468
469 - SherpaOnnxOnlineStream *stream = 469 + const SherpaOnnxOnlineStream *stream =
470 info[1].As<Napi::External<SherpaOnnxOnlineStream>>().Data(); 470 info[1].As<Napi::External<SherpaOnnxOnlineStream>>().Data();
471 471
472 const char *json = SherpaOnnxGetOnlineStreamResultAsJson(recognizer, stream); 472 const char *json = SherpaOnnxGetOnlineStreamResultAsJson(recognizer, stream);
@@ -496,7 +496,7 @@ static void InputFinishedWrapper(const Napi::CallbackInfo &info) { @@ -496,7 +496,7 @@ static void InputFinishedWrapper(const Napi::CallbackInfo &info) {
496 return; 496 return;
497 } 497 }
498 498
499 - SherpaOnnxOnlineStream *stream = 499 + const SherpaOnnxOnlineStream *stream =
500 info[0].As<Napi::External<SherpaOnnxOnlineStream>>().Data(); 500 info[0].As<Napi::External<SherpaOnnxOnlineStream>>().Data();
501 501
502 SherpaOnnxOnlineStreamInputFinished(stream); 502 SherpaOnnxOnlineStreamInputFinished(stream);
@@ -528,10 +528,10 @@ static void ResetOnlineStreamWrapper(const Napi::CallbackInfo &info) { @@ -528,10 +528,10 @@ static void ResetOnlineStreamWrapper(const Napi::CallbackInfo &info) {
528 return; 528 return;
529 } 529 }
530 530
531 - SherpaOnnxOnlineRecognizer *recognizer = 531 + const SherpaOnnxOnlineRecognizer *recognizer =
532 info[0].As<Napi::External<SherpaOnnxOnlineRecognizer>>().Data(); 532 info[0].As<Napi::External<SherpaOnnxOnlineRecognizer>>().Data();
533 533
534 - SherpaOnnxOnlineStream *stream = 534 + const SherpaOnnxOnlineStream *stream =
535 info[1].As<Napi::External<SherpaOnnxOnlineStream>>().Data(); 535 info[1].As<Napi::External<SherpaOnnxOnlineStream>>().Data();
536 536
537 SherpaOnnxOnlineStreamReset(recognizer, stream); 537 SherpaOnnxOnlineStreamReset(recognizer, stream);
@@ -563,10 +563,10 @@ static Napi::Boolean IsEndpointWrapper(const Napi::CallbackInfo &info) { @@ -563,10 +563,10 @@ static Napi::Boolean IsEndpointWrapper(const Napi::CallbackInfo &info) {
563 return {}; 563 return {};
564 } 564 }
565 565
566 - SherpaOnnxOnlineRecognizer *recognizer = 566 + const SherpaOnnxOnlineRecognizer *recognizer =
567 info[0].As<Napi::External<SherpaOnnxOnlineRecognizer>>().Data(); 567 info[0].As<Napi::External<SherpaOnnxOnlineRecognizer>>().Data();
568 568
569 - SherpaOnnxOnlineStream *stream = 569 + const SherpaOnnxOnlineStream *stream =
570 info[1].As<Napi::External<SherpaOnnxOnlineStream>>().Data(); 570 info[1].As<Napi::External<SherpaOnnxOnlineStream>>().Data();
571 571
572 int32_t is_endpoint = SherpaOnnxOnlineStreamIsEndpoint(recognizer, stream); 572 int32_t is_endpoint = SherpaOnnxOnlineStreamIsEndpoint(recognizer, stream);
@@ -636,7 +636,7 @@ static void PrintWrapper(const Napi::CallbackInfo &info) { @@ -636,7 +636,7 @@ static void PrintWrapper(const Napi::CallbackInfo &info) {
636 return; 636 return;
637 } 637 }
638 638
639 - SherpaOnnxDisplay *display = 639 + const SherpaOnnxDisplay *display =
640 info[0].As<Napi::External<SherpaOnnxDisplay>>().Data(); 640 info[0].As<Napi::External<SherpaOnnxDisplay>>().Data();
641 641
642 int32_t idx = info[1].As<Napi::Number>().Int32Value(); 642 int32_t idx = info[1].As<Napi::Number>().Int32Value();
@@ -28,11 +28,12 @@ static Napi::External<SherpaOnnxCircularBuffer> CreateCircularBufferWrapper( @@ -28,11 +28,12 @@ static Napi::External<SherpaOnnxCircularBuffer> CreateCircularBufferWrapper(
28 return {}; 28 return {};
29 } 29 }
30 30
31 - SherpaOnnxCircularBuffer *buf = 31 + const SherpaOnnxCircularBuffer *buf =
32 SherpaOnnxCreateCircularBuffer(info[0].As<Napi::Number>().Int32Value()); 32 SherpaOnnxCreateCircularBuffer(info[0].As<Napi::Number>().Int32Value());
33 33
34 return Napi::External<SherpaOnnxCircularBuffer>::New( 34 return Napi::External<SherpaOnnxCircularBuffer>::New(
35 - env, buf, [](Napi::Env env, SherpaOnnxCircularBuffer *p) { 35 + env, const_cast<SherpaOnnxCircularBuffer *>(buf),
  36 + [](Napi::Env env, SherpaOnnxCircularBuffer *p) {
36 SherpaOnnxDestroyCircularBuffer(p); 37 SherpaOnnxDestroyCircularBuffer(p);
37 }); 38 });
38 } 39 }
@@ -56,7 +57,7 @@ static void CircularBufferPushWrapper(const Napi::CallbackInfo &info) { @@ -56,7 +57,7 @@ static void CircularBufferPushWrapper(const Napi::CallbackInfo &info) {
56 return; 57 return;
57 } 58 }
58 59
59 - SherpaOnnxCircularBuffer *buf = 60 + const SherpaOnnxCircularBuffer *buf =
60 info[0].As<Napi::External<SherpaOnnxCircularBuffer>>().Data(); 61 info[0].As<Napi::External<SherpaOnnxCircularBuffer>>().Data();
61 62
62 if (!info[1].IsTypedArray()) { 63 if (!info[1].IsTypedArray()) {
@@ -101,7 +102,7 @@ static Napi::Float32Array CircularBufferGetWrapper( @@ -101,7 +102,7 @@ static Napi::Float32Array CircularBufferGetWrapper(
101 return {}; 102 return {};
102 } 103 }
103 104
104 - SherpaOnnxCircularBuffer *buf = 105 + const SherpaOnnxCircularBuffer *buf =
105 info[0].As<Napi::External<SherpaOnnxCircularBuffer>>().Data(); 106 info[0].As<Napi::External<SherpaOnnxCircularBuffer>>().Data();
106 107
107 if (!info[1].IsNumber()) { 108 if (!info[1].IsNumber()) {
@@ -179,7 +180,7 @@ static void CircularBufferPopWrapper(const Napi::CallbackInfo &info) { @@ -179,7 +180,7 @@ static void CircularBufferPopWrapper(const Napi::CallbackInfo &info) {
179 return; 180 return;
180 } 181 }
181 182
182 - SherpaOnnxCircularBuffer *buf = 183 + const SherpaOnnxCircularBuffer *buf =
183 info[0].As<Napi::External<SherpaOnnxCircularBuffer>>().Data(); 184 info[0].As<Napi::External<SherpaOnnxCircularBuffer>>().Data();
184 185
185 if (!info[1].IsNumber()) { 186 if (!info[1].IsNumber()) {
@@ -213,7 +214,7 @@ static Napi::Number CircularBufferSizeWrapper(const Napi::CallbackInfo &info) { @@ -213,7 +214,7 @@ static Napi::Number CircularBufferSizeWrapper(const Napi::CallbackInfo &info) {
213 return {}; 214 return {};
214 } 215 }
215 216
216 - SherpaOnnxCircularBuffer *buf = 217 + const SherpaOnnxCircularBuffer *buf =
217 info[0].As<Napi::External<SherpaOnnxCircularBuffer>>().Data(); 218 info[0].As<Napi::External<SherpaOnnxCircularBuffer>>().Data();
218 219
219 int32_t size = SherpaOnnxCircularBufferSize(buf); 220 int32_t size = SherpaOnnxCircularBufferSize(buf);
@@ -240,7 +241,7 @@ static Napi::Number CircularBufferHeadWrapper(const Napi::CallbackInfo &info) { @@ -240,7 +241,7 @@ static Napi::Number CircularBufferHeadWrapper(const Napi::CallbackInfo &info) {
240 return {}; 241 return {};
241 } 242 }
242 243
243 - SherpaOnnxCircularBuffer *buf = 244 + const SherpaOnnxCircularBuffer *buf =
244 info[0].As<Napi::External<SherpaOnnxCircularBuffer>>().Data(); 245 info[0].As<Napi::External<SherpaOnnxCircularBuffer>>().Data();
245 246
246 int32_t size = SherpaOnnxCircularBufferHead(buf); 247 int32_t size = SherpaOnnxCircularBufferHead(buf);
@@ -267,7 +268,7 @@ static void CircularBufferResetWrapper(const Napi::CallbackInfo &info) { @@ -267,7 +268,7 @@ static void CircularBufferResetWrapper(const Napi::CallbackInfo &info) {
267 return; 268 return;
268 } 269 }
269 270
270 - SherpaOnnxCircularBuffer *buf = 271 + const SherpaOnnxCircularBuffer *buf =
271 info[0].As<Napi::External<SherpaOnnxCircularBuffer>>().Data(); 272 info[0].As<Napi::External<SherpaOnnxCircularBuffer>>().Data();
272 273
273 SherpaOnnxCircularBufferReset(buf); 274 SherpaOnnxCircularBufferReset(buf);
@@ -360,18 +361,19 @@ CreateVoiceActivityDetectorWrapper(const Napi::CallbackInfo &info) { @@ -360,18 +361,19 @@ CreateVoiceActivityDetectorWrapper(const Napi::CallbackInfo &info) {
360 mgr(OH_ResourceManager_InitNativeResourceManager(env, info[2]), 361 mgr(OH_ResourceManager_InitNativeResourceManager(env, info[2]),
361 &OH_ResourceManager_ReleaseNativeResourceManager); 362 &OH_ResourceManager_ReleaseNativeResourceManager);
362 363
363 - SherpaOnnxVoiceActivityDetector *vad = 364 + const SherpaOnnxVoiceActivityDetector *vad =
364 SherpaOnnxCreateVoiceActivityDetectorOHOS(&c, buffer_size_in_seconds, 365 SherpaOnnxCreateVoiceActivityDetectorOHOS(&c, buffer_size_in_seconds,
365 mgr.get()); 366 mgr.get());
366 #else 367 #else
367 - SherpaOnnxVoiceActivityDetector *vad = 368 + const SherpaOnnxVoiceActivityDetector *vad =
368 SherpaOnnxCreateVoiceActivityDetector(&c, buffer_size_in_seconds); 369 SherpaOnnxCreateVoiceActivityDetector(&c, buffer_size_in_seconds);
369 #endif 370 #endif
370 SHERPA_ONNX_DELETE_C_STR(c.silero_vad.model); 371 SHERPA_ONNX_DELETE_C_STR(c.silero_vad.model);
371 SHERPA_ONNX_DELETE_C_STR(c.provider); 372 SHERPA_ONNX_DELETE_C_STR(c.provider);
372 373
373 return Napi::External<SherpaOnnxVoiceActivityDetector>::New( 374 return Napi::External<SherpaOnnxVoiceActivityDetector>::New(
374 - env, vad, [](Napi::Env env, SherpaOnnxVoiceActivityDetector *p) { 375 + env, const_cast<SherpaOnnxVoiceActivityDetector *>(vad),
  376 + [](Napi::Env env, SherpaOnnxVoiceActivityDetector *p) {
375 SherpaOnnxDestroyVoiceActivityDetector(p); 377 SherpaOnnxDestroyVoiceActivityDetector(p);
376 }); 378 });
377 } 379 }
@@ -396,7 +398,7 @@ static void VoiceActivityDetectorAcceptWaveformWrapper( @@ -396,7 +398,7 @@ static void VoiceActivityDetectorAcceptWaveformWrapper(
396 return; 398 return;
397 } 399 }
398 400
399 - SherpaOnnxVoiceActivityDetector *vad = 401 + const SherpaOnnxVoiceActivityDetector *vad =
400 info[0].As<Napi::External<SherpaOnnxVoiceActivityDetector>>().Data(); 402 info[0].As<Napi::External<SherpaOnnxVoiceActivityDetector>>().Data();
401 403
402 if (!info[1].IsTypedArray()) { 404 if (!info[1].IsTypedArray()) {
@@ -440,7 +442,7 @@ static Napi::Boolean VoiceActivityDetectorEmptyWrapper( @@ -440,7 +442,7 @@ static Napi::Boolean VoiceActivityDetectorEmptyWrapper(
440 return {}; 442 return {};
441 } 443 }
442 444
443 - SherpaOnnxVoiceActivityDetector *vad = 445 + const SherpaOnnxVoiceActivityDetector *vad =
444 info[0].As<Napi::External<SherpaOnnxVoiceActivityDetector>>().Data(); 446 info[0].As<Napi::External<SherpaOnnxVoiceActivityDetector>>().Data();
445 447
446 int32_t is_empty = SherpaOnnxVoiceActivityDetectorEmpty(vad); 448 int32_t is_empty = SherpaOnnxVoiceActivityDetectorEmpty(vad);
@@ -468,7 +470,7 @@ static Napi::Boolean VoiceActivityDetectorDetectedWrapper( @@ -468,7 +470,7 @@ static Napi::Boolean VoiceActivityDetectorDetectedWrapper(
468 return {}; 470 return {};
469 } 471 }
470 472
471 - SherpaOnnxVoiceActivityDetector *vad = 473 + const SherpaOnnxVoiceActivityDetector *vad =
472 info[0].As<Napi::External<SherpaOnnxVoiceActivityDetector>>().Data(); 474 info[0].As<Napi::External<SherpaOnnxVoiceActivityDetector>>().Data();
473 475
474 int32_t is_detected = SherpaOnnxVoiceActivityDetectorDetected(vad); 476 int32_t is_detected = SherpaOnnxVoiceActivityDetectorDetected(vad);
@@ -495,7 +497,7 @@ static void VoiceActivityDetectorPopWrapper(const Napi::CallbackInfo &info) { @@ -495,7 +497,7 @@ static void VoiceActivityDetectorPopWrapper(const Napi::CallbackInfo &info) {
495 return; 497 return;
496 } 498 }
497 499
498 - SherpaOnnxVoiceActivityDetector *vad = 500 + const SherpaOnnxVoiceActivityDetector *vad =
499 info[0].As<Napi::External<SherpaOnnxVoiceActivityDetector>>().Data(); 501 info[0].As<Napi::External<SherpaOnnxVoiceActivityDetector>>().Data();
500 502
501 SherpaOnnxVoiceActivityDetectorPop(vad); 503 SherpaOnnxVoiceActivityDetectorPop(vad);
@@ -520,7 +522,7 @@ static void VoiceActivityDetectorClearWrapper(const Napi::CallbackInfo &info) { @@ -520,7 +522,7 @@ static void VoiceActivityDetectorClearWrapper(const Napi::CallbackInfo &info) {
520 return; 522 return;
521 } 523 }
522 524
523 - SherpaOnnxVoiceActivityDetector *vad = 525 + const SherpaOnnxVoiceActivityDetector *vad =
524 info[0].As<Napi::External<SherpaOnnxVoiceActivityDetector>>().Data(); 526 info[0].As<Napi::External<SherpaOnnxVoiceActivityDetector>>().Data();
525 527
526 SherpaOnnxVoiceActivityDetectorClear(vad); 528 SherpaOnnxVoiceActivityDetectorClear(vad);
@@ -556,7 +558,7 @@ static Napi::Object VoiceActivityDetectorFrontWrapper( @@ -556,7 +558,7 @@ static Napi::Object VoiceActivityDetectorFrontWrapper(
556 } 558 }
557 } 559 }
558 560
559 - SherpaOnnxVoiceActivityDetector *vad = 561 + const SherpaOnnxVoiceActivityDetector *vad =
560 info[0].As<Napi::External<SherpaOnnxVoiceActivityDetector>>().Data(); 562 info[0].As<Napi::External<SherpaOnnxVoiceActivityDetector>>().Data();
561 563
562 const SherpaOnnxSpeechSegment *segment = 564 const SherpaOnnxSpeechSegment *segment =
@@ -618,7 +620,7 @@ static void VoiceActivityDetectorResetWrapper(const Napi::CallbackInfo &info) { @@ -618,7 +620,7 @@ static void VoiceActivityDetectorResetWrapper(const Napi::CallbackInfo &info) {
618 return; 620 return;
619 } 621 }
620 622
621 - SherpaOnnxVoiceActivityDetector *vad = 623 + const SherpaOnnxVoiceActivityDetector *vad =
622 info[0].As<Napi::External<SherpaOnnxVoiceActivityDetector>>().Data(); 624 info[0].As<Napi::External<SherpaOnnxVoiceActivityDetector>>().Data();
623 625
624 SherpaOnnxVoiceActivityDetectorReset(vad); 626 SherpaOnnxVoiceActivityDetectorReset(vad);
@@ -643,7 +645,7 @@ static void VoiceActivityDetectorFlushWrapper(const Napi::CallbackInfo &info) { @@ -643,7 +645,7 @@ static void VoiceActivityDetectorFlushWrapper(const Napi::CallbackInfo &info) {
643 return; 645 return;
644 } 646 }
645 647
646 - SherpaOnnxVoiceActivityDetector *vad = 648 + const SherpaOnnxVoiceActivityDetector *vad =
647 info[0].As<Napi::External<SherpaOnnxVoiceActivityDetector>>().Data(); 649 info[0].As<Napi::External<SherpaOnnxVoiceActivityDetector>>().Data();
648 650
649 SherpaOnnxVoiceActivityDetectorFlush(vad); 651 SherpaOnnxVoiceActivityDetectorFlush(vad);
@@ -43,6 +43,12 @@ export LD_LIBRARY_PATH=$PWD/node_modules/.pnpm/sherpa-onnx-node@<REPLACE-THIS-WI @@ -43,6 +43,12 @@ export LD_LIBRARY_PATH=$PWD/node_modules/.pnpm/sherpa-onnx-node@<REPLACE-THIS-WI
43 43
44 The following tables list the examples in this folder. 44 The following tables list the examples in this folder.
45 45
  46 +## Speech enhancement/denoising
  47 +
  48 +|File| Description|
  49 +|---|---|
  50 +|[./test_offline_speech_enhancement_gtcrn.js](./test_offline_speech_enhancement_gtcrn.js)| It demonstrates how to use sherpa-onnx JavaScript API for speech enhancement.|
  51 +
46 ## Speaker diarization 52 ## Speaker diarization
47 53
48 |File| Description| 54 |File| Description|
@@ -159,6 +165,15 @@ curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-segm @@ -159,6 +165,15 @@ curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-segm
159 node ./test_offline_speaker_diarization.js 165 node ./test_offline_speaker_diarization.js
160 ``` 166 ```
161 167
  168 +### Speech enhancement/denoising
  169 +
  170 +```bash
  171 +wget https://github.com/k2-fsa/sherpa-onnx/releases/download/speech-enhancement-models/gtcrn_simple.onnx
  172 +wget https://github.com/k2-fsa/sherpa-onnx/releases/download/speech-enhancement-models/inp_16k.wav
  173 +
  174 +node ./test_offline_speech_enhancement_gtcrn.js
  175 +```
  176 +
162 ### Voice Activity detection (VAD) 177 ### Voice Activity detection (VAD)
163 178
164 ```bash 179 ```bash
  1 +// Copyright (c) 2025 Xiaomi Corporation (authors: Fangjun Kuang)
  2 +
  3 +const sherpa_onnx = require('sherpa-onnx-node');
  4 +
  5 +function createOfflineSpeechDenoiser() {
  6 + // please download models from
  7 + // https://github.com/k2-fsa/sherpa-onnx/releases/tag/speech-enhancement-models
  8 + const config = {
  9 + model: {
  10 + gtcrn: {model: './gtcrn_simple.onnx'},
  11 + debug: true,
  12 + numThreads: 1,
  13 + },
  14 + };
  15 +
  16 + return new sherpa_onnx.OfflineSpeechDenoiser(config);
  17 +}
  18 +
  19 +const sd = createOfflineSpeechDenoiser();
  20 +
  21 +const waveFilename = './inp_16k.wav';
  22 +const wave = sherpa_onnx.readWave(waveFilename);
  23 +const denoised = sd.run({
  24 + samples: wave.samples,
  25 + sampleRate: wave.sampleRate,
  26 + enableExternalBuffer: true
  27 +});
  28 +sherpa_onnx.writeWave(
  29 + './enhanced-16k.wav',
  30 + {samples: denoised.samples, sampleRate: denoised.sampleRate});
  31 +
  32 +console.log(`Saved to ./enhanced-16k.wav`);
@@ -20,6 +20,7 @@ set(srcs @@ -20,6 +20,7 @@ set(srcs
20 src/keyword-spotting.cc 20 src/keyword-spotting.cc
21 src/non-streaming-asr.cc 21 src/non-streaming-asr.cc
22 src/non-streaming-speaker-diarization.cc 22 src/non-streaming-speaker-diarization.cc
  23 + src/non-streaming-speech-denoiser.cc
23 src/non-streaming-tts.cc 24 src/non-streaming-tts.cc
24 src/punctuation.cc 25 src/punctuation.cc
25 src/sherpa-onnx-node-addon-api.cc 26 src/sherpa-onnx-node-addon-api.cc
  1 +const addon = require('./addon.js');
  2 +
  3 +class OfflineSpeechDenoiser {
  4 + constructor(config) {
  5 + this.handle = addon.createOfflineSpeechDenoiser(config);
  6 + this.config = config;
  7 +
  8 + this.sampleRate =
  9 + addon.offlineSpeechDenoiserGetSampleRateWrapper(this.handle);
  10 + }
  11 +
  12 + /*
  13 + obj is
  14 + {samples: samples, sampleRate: sampleRate, enableExternalBuffer: true}
  15 +
  16 + samples is a float32 array containing samples in the range [-1, 1]
  17 + sampleRate is a number
  18 +
  19 + return an object {samples: Float32Array, sampleRate: <a number>}
  20 + */
  21 + run(obj) {
  22 + return addon.offlineSpeechDenoiserRunWrapper(this.handle, obj);
  23 + }
  24 +}
  25 +
  26 +module.exports = {
  27 + OfflineSpeechDenoiser,
  28 +}
@@ -9,6 +9,7 @@ const at = require('./audio-tagg.js'); @@ -9,6 +9,7 @@ const at = require('./audio-tagg.js');
9 const punct = require('./punctuation.js'); 9 const punct = require('./punctuation.js');
10 const kws = require('./keyword-spotter.js'); 10 const kws = require('./keyword-spotter.js');
11 const sd = require('./non-streaming-speaker-diarization.js'); 11 const sd = require('./non-streaming-speaker-diarization.js');
  12 +const speech_denoiser = require('./non-streaming-speech-denoiser.js');
12 13
13 module.exports = { 14 module.exports = {
14 OnlineRecognizer: streaming_asr.OnlineRecognizer, 15 OnlineRecognizer: streaming_asr.OnlineRecognizer,
@@ -26,4 +27,5 @@ module.exports = { @@ -26,4 +27,5 @@ module.exports = {
26 Punctuation: punct.Punctuation, 27 Punctuation: punct.Punctuation,
27 KeywordSpotter: kws.KeywordSpotter, 28 KeywordSpotter: kws.KeywordSpotter,
28 OfflineSpeakerDiarization: sd.OfflineSpeakerDiarization, 29 OfflineSpeakerDiarization: sd.OfflineSpeakerDiarization,
  30 + OfflineSpeechDenoiser: speech_denoiser.OfflineSpeechDenoiser,
29 } 31 }
  1 +../../../harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/cpp/non-streaming-speech-denoiser.cc
@@ -922,22 +922,23 @@ struct SherpaOnnxCircularBuffer { @@ -922,22 +922,23 @@ struct SherpaOnnxCircularBuffer {
922 std::unique_ptr<sherpa_onnx::CircularBuffer> impl; 922 std::unique_ptr<sherpa_onnx::CircularBuffer> impl;
923 }; 923 };
924 924
925 -SherpaOnnxCircularBuffer *SherpaOnnxCreateCircularBuffer(int32_t capacity) { 925 +const SherpaOnnxCircularBuffer *SherpaOnnxCreateCircularBuffer(
  926 + int32_t capacity) {
926 SherpaOnnxCircularBuffer *buffer = new SherpaOnnxCircularBuffer; 927 SherpaOnnxCircularBuffer *buffer = new SherpaOnnxCircularBuffer;
927 buffer->impl = std::make_unique<sherpa_onnx::CircularBuffer>(capacity); 928 buffer->impl = std::make_unique<sherpa_onnx::CircularBuffer>(capacity);
928 return buffer; 929 return buffer;
929 } 930 }
930 931
931 -void SherpaOnnxDestroyCircularBuffer(SherpaOnnxCircularBuffer *buffer) { 932 +void SherpaOnnxDestroyCircularBuffer(const SherpaOnnxCircularBuffer *buffer) {
932 delete buffer; 933 delete buffer;
933 } 934 }
934 935
935 -void SherpaOnnxCircularBufferPush(SherpaOnnxCircularBuffer *buffer, 936 +void SherpaOnnxCircularBufferPush(const SherpaOnnxCircularBuffer *buffer,
936 const float *p, int32_t n) { 937 const float *p, int32_t n) {
937 buffer->impl->Push(p, n); 938 buffer->impl->Push(p, n);
938 } 939 }
939 940
940 -const float *SherpaOnnxCircularBufferGet(SherpaOnnxCircularBuffer *buffer, 941 +const float *SherpaOnnxCircularBufferGet(const SherpaOnnxCircularBuffer *buffer,
941 int32_t start_index, int32_t n) { 942 int32_t start_index, int32_t n) {
942 std::vector<float> v = buffer->impl->Get(start_index, n); 943 std::vector<float> v = buffer->impl->Get(start_index, n);
943 944
@@ -948,19 +949,20 @@ const float *SherpaOnnxCircularBufferGet(SherpaOnnxCircularBuffer *buffer, @@ -948,19 +949,20 @@ const float *SherpaOnnxCircularBufferGet(SherpaOnnxCircularBuffer *buffer,
948 949
949 void SherpaOnnxCircularBufferFree(const float *p) { delete[] p; } 950 void SherpaOnnxCircularBufferFree(const float *p) { delete[] p; }
950 951
951 -void SherpaOnnxCircularBufferPop(SherpaOnnxCircularBuffer *buffer, int32_t n) { 952 +void SherpaOnnxCircularBufferPop(const SherpaOnnxCircularBuffer *buffer,
  953 + int32_t n) {
952 buffer->impl->Pop(n); 954 buffer->impl->Pop(n);
953 } 955 }
954 956
955 -int32_t SherpaOnnxCircularBufferSize(SherpaOnnxCircularBuffer *buffer) { 957 +int32_t SherpaOnnxCircularBufferSize(const SherpaOnnxCircularBuffer *buffer) {
956 return buffer->impl->Size(); 958 return buffer->impl->Size();
957 } 959 }
958 960
959 -int32_t SherpaOnnxCircularBufferHead(SherpaOnnxCircularBuffer *buffer) { 961 +int32_t SherpaOnnxCircularBufferHead(const SherpaOnnxCircularBuffer *buffer) {
960 return buffer->impl->Head(); 962 return buffer->impl->Head();
961 } 963 }
962 964
963 -void SherpaOnnxCircularBufferReset(SherpaOnnxCircularBuffer *buffer) { 965 +void SherpaOnnxCircularBufferReset(const SherpaOnnxCircularBuffer *buffer) {
964 buffer->impl->Reset(); 966 buffer->impl->Reset();
965 } 967 }
966 968
@@ -1008,7 +1010,7 @@ sherpa_onnx::VadModelConfig GetVadModelConfig( @@ -1008,7 +1010,7 @@ sherpa_onnx::VadModelConfig GetVadModelConfig(
1008 return vad_config; 1010 return vad_config;
1009 } 1011 }
1010 1012
1011 -SherpaOnnxVoiceActivityDetector *SherpaOnnxCreateVoiceActivityDetector( 1013 +const SherpaOnnxVoiceActivityDetector *SherpaOnnxCreateVoiceActivityDetector(
1012 const SherpaOnnxVadModelConfig *config, float buffer_size_in_seconds) { 1014 const SherpaOnnxVadModelConfig *config, float buffer_size_in_seconds) {
1013 auto vad_config = GetVadModelConfig(config); 1015 auto vad_config = GetVadModelConfig(config);
1014 1016
@@ -1025,35 +1027,37 @@ SherpaOnnxVoiceActivityDetector *SherpaOnnxCreateVoiceActivityDetector( @@ -1025,35 +1027,37 @@ SherpaOnnxVoiceActivityDetector *SherpaOnnxCreateVoiceActivityDetector(
1025 } 1027 }
1026 1028
1027 void SherpaOnnxDestroyVoiceActivityDetector( 1029 void SherpaOnnxDestroyVoiceActivityDetector(
1028 - SherpaOnnxVoiceActivityDetector *p) { 1030 + const SherpaOnnxVoiceActivityDetector *p) {
1029 delete p; 1031 delete p;
1030 } 1032 }
1031 1033
1032 void SherpaOnnxVoiceActivityDetectorAcceptWaveform( 1034 void SherpaOnnxVoiceActivityDetectorAcceptWaveform(
1033 - SherpaOnnxVoiceActivityDetector *p, const float *samples, int32_t n) { 1035 + const SherpaOnnxVoiceActivityDetector *p, const float *samples, int32_t n) {
1034 p->impl->AcceptWaveform(samples, n); 1036 p->impl->AcceptWaveform(samples, n);
1035 } 1037 }
1036 1038
1037 int32_t SherpaOnnxVoiceActivityDetectorEmpty( 1039 int32_t SherpaOnnxVoiceActivityDetectorEmpty(
1038 - SherpaOnnxVoiceActivityDetector *p) { 1040 + const SherpaOnnxVoiceActivityDetector *p) {
1039 return p->impl->Empty(); 1041 return p->impl->Empty();
1040 } 1042 }
1041 1043
1042 int32_t SherpaOnnxVoiceActivityDetectorDetected( 1044 int32_t SherpaOnnxVoiceActivityDetectorDetected(
1043 - SherpaOnnxVoiceActivityDetector *p) { 1045 + const SherpaOnnxVoiceActivityDetector *p) {
1044 return p->impl->IsSpeechDetected(); 1046 return p->impl->IsSpeechDetected();
1045 } 1047 }
1046 1048
1047 -void SherpaOnnxVoiceActivityDetectorPop(SherpaOnnxVoiceActivityDetector *p) { 1049 +void SherpaOnnxVoiceActivityDetectorPop(
  1050 + const SherpaOnnxVoiceActivityDetector *p) {
1048 p->impl->Pop(); 1051 p->impl->Pop();
1049 } 1052 }
1050 1053
1051 -void SherpaOnnxVoiceActivityDetectorClear(SherpaOnnxVoiceActivityDetector *p) { 1054 +void SherpaOnnxVoiceActivityDetectorClear(
  1055 + const SherpaOnnxVoiceActivityDetector *p) {
1052 p->impl->Clear(); 1056 p->impl->Clear();
1053 } 1057 }
1054 1058
1055 const SherpaOnnxSpeechSegment *SherpaOnnxVoiceActivityDetectorFront( 1059 const SherpaOnnxSpeechSegment *SherpaOnnxVoiceActivityDetectorFront(
1056 - SherpaOnnxVoiceActivityDetector *p) { 1060 + const SherpaOnnxVoiceActivityDetector *p) {
1057 const sherpa_onnx::SpeechSegment &segment = p->impl->Front(); 1061 const sherpa_onnx::SpeechSegment &segment = p->impl->Front();
1058 1062
1059 SherpaOnnxSpeechSegment *ans = new SherpaOnnxSpeechSegment; 1063 SherpaOnnxSpeechSegment *ans = new SherpaOnnxSpeechSegment;
@@ -1072,11 +1076,13 @@ void SherpaOnnxDestroySpeechSegment(const SherpaOnnxSpeechSegment *p) { @@ -1072,11 +1076,13 @@ void SherpaOnnxDestroySpeechSegment(const SherpaOnnxSpeechSegment *p) {
1072 } 1076 }
1073 } 1077 }
1074 1078
1075 -void SherpaOnnxVoiceActivityDetectorReset(SherpaOnnxVoiceActivityDetector *p) { 1079 +void SherpaOnnxVoiceActivityDetectorReset(
  1080 + const SherpaOnnxVoiceActivityDetector *p) {
1076 p->impl->Reset(); 1081 p->impl->Reset();
1077 } 1082 }
1078 1083
1079 -void SherpaOnnxVoiceActivityDetectorFlush(SherpaOnnxVoiceActivityDetector *p) { 1084 +void SherpaOnnxVoiceActivityDetectorFlush(
  1085 + const SherpaOnnxVoiceActivityDetector *p) {
1080 p->impl->Flush(); 1086 p->impl->Flush();
1081 } 1087 }
1082 1088
@@ -1915,7 +1921,7 @@ struct SherpaOnnxLinearResampler { @@ -1915,7 +1921,7 @@ struct SherpaOnnxLinearResampler {
1915 std::unique_ptr<sherpa_onnx::LinearResample> impl; 1921 std::unique_ptr<sherpa_onnx::LinearResample> impl;
1916 }; 1922 };
1917 1923
1918 -SherpaOnnxLinearResampler *SherpaOnnxCreateLinearResampler( 1924 +const SherpaOnnxLinearResampler *SherpaOnnxCreateLinearResampler(
1919 int32_t samp_rate_in_hz, int32_t samp_rate_out_hz, float filter_cutoff_hz, 1925 int32_t samp_rate_in_hz, int32_t samp_rate_out_hz, float filter_cutoff_hz,
1920 int32_t num_zeros) { 1926 int32_t num_zeros) {
1921 SherpaOnnxLinearResampler *p = new SherpaOnnxLinearResampler; 1927 SherpaOnnxLinearResampler *p = new SherpaOnnxLinearResampler;
@@ -1925,12 +1931,12 @@ SherpaOnnxLinearResampler *SherpaOnnxCreateLinearResampler( @@ -1925,12 +1931,12 @@ SherpaOnnxLinearResampler *SherpaOnnxCreateLinearResampler(
1925 return p; 1931 return p;
1926 } 1932 }
1927 1933
1928 -void SherpaOnnxDestroyLinearResampler(SherpaOnnxLinearResampler *p) { 1934 +void SherpaOnnxDestroyLinearResampler(const SherpaOnnxLinearResampler *p) {
1929 delete p; 1935 delete p;
1930 } 1936 }
1931 1937
1932 const SherpaOnnxResampleOut *SherpaOnnxLinearResamplerResample( 1938 const SherpaOnnxResampleOut *SherpaOnnxLinearResamplerResample(
1933 - SherpaOnnxLinearResampler *p, const float *input, int32_t input_dim, 1939 + const SherpaOnnxLinearResampler *p, const float *input, int32_t input_dim,
1934 int32_t flush) { 1940 int32_t flush) {
1935 std::vector<float> o; 1941 std::vector<float> o;
1936 p->impl->Resample(input, input_dim, flush, &o); 1942 p->impl->Resample(input, input_dim, flush, &o);
@@ -2320,7 +2326,7 @@ const SherpaOnnxOfflineSpeechDenoiser * @@ -2320,7 +2326,7 @@ const SherpaOnnxOfflineSpeechDenoiser *
2320 SherpaOnnxCreateOfflineSpeechDenoiserOHOS( 2326 SherpaOnnxCreateOfflineSpeechDenoiserOHOS(
2321 const SherpaOnnxOfflineSpeechDenoiserConfig *config, 2327 const SherpaOnnxOfflineSpeechDenoiserConfig *config,
2322 NativeResourceManager *mgr) { 2328 NativeResourceManager *mgr) {
2323 - auto sd_config = GetOfflineSpeechDenoiserConfia(config); 2329 + auto sd_config = GetOfflineSpeechDenoiserConfig(config);
2324 2330
2325 SherpaOnnxOfflineSpeechDenoiser *sd = new SherpaOnnxOfflineSpeechDenoiser; 2331 SherpaOnnxOfflineSpeechDenoiser *sd = new SherpaOnnxOfflineSpeechDenoiser;
2326 2332
@@ -2361,7 +2367,8 @@ const SherpaOnnxOfflineRecognizer *SherpaOnnxCreateOfflineRecognizerOHOS( @@ -2361,7 +2367,8 @@ const SherpaOnnxOfflineRecognizer *SherpaOnnxCreateOfflineRecognizerOHOS(
2361 return recognizer; 2367 return recognizer;
2362 } 2368 }
2363 2369
2364 -SherpaOnnxVoiceActivityDetector *SherpaOnnxCreateVoiceActivityDetectorOHOS( 2370 +const SherpaOnnxVoiceActivityDetector *
  2371 +SherpaOnnxCreateVoiceActivityDetectorOHOS(
2365 const SherpaOnnxVadModelConfig *config, float buffer_size_in_seconds, 2372 const SherpaOnnxVadModelConfig *config, float buffer_size_in_seconds,
2366 NativeResourceManager *mgr) { 2373 NativeResourceManager *mgr) {
2367 if (mgr == nullptr) { 2374 if (mgr == nullptr) {
@@ -808,15 +808,15 @@ SHERPA_ONNX_API typedef struct SherpaOnnxCircularBuffer @@ -808,15 +808,15 @@ SHERPA_ONNX_API typedef struct SherpaOnnxCircularBuffer
808 // Return an instance of circular buffer. The user has to use 808 // Return an instance of circular buffer. The user has to use
809 // SherpaOnnxDestroyCircularBuffer() to free the returned pointer to avoid 809 // SherpaOnnxDestroyCircularBuffer() to free the returned pointer to avoid
810 // memory leak. 810 // memory leak.
811 -SHERPA_ONNX_API SherpaOnnxCircularBuffer *SherpaOnnxCreateCircularBuffer( 811 +SHERPA_ONNX_API const SherpaOnnxCircularBuffer *SherpaOnnxCreateCircularBuffer(
812 int32_t capacity); 812 int32_t capacity);
813 813
814 // Free the pointer returned by SherpaOnnxCreateCircularBuffer() 814 // Free the pointer returned by SherpaOnnxCreateCircularBuffer()
815 SHERPA_ONNX_API void SherpaOnnxDestroyCircularBuffer( 815 SHERPA_ONNX_API void SherpaOnnxDestroyCircularBuffer(
816 - SherpaOnnxCircularBuffer *buffer); 816 + const SherpaOnnxCircularBuffer *buffer);
817 817
818 SHERPA_ONNX_API void SherpaOnnxCircularBufferPush( 818 SHERPA_ONNX_API void SherpaOnnxCircularBufferPush(
819 - SherpaOnnxCircularBuffer *buffer, const float *p, int32_t n); 819 + const SherpaOnnxCircularBuffer *buffer, const float *p, int32_t n);
820 820
821 // Return n samples starting at the given index. 821 // Return n samples starting at the given index.
822 // 822 //
@@ -824,27 +824,27 @@ SHERPA_ONNX_API void SherpaOnnxCircularBufferPush( @@ -824,27 +824,27 @@ SHERPA_ONNX_API void SherpaOnnxCircularBufferPush(
824 // The user has to use SherpaOnnxCircularBufferFree() to free the returned 824 // The user has to use SherpaOnnxCircularBufferFree() to free the returned
825 // pointer to avoid memory leak. 825 // pointer to avoid memory leak.
826 SHERPA_ONNX_API const float *SherpaOnnxCircularBufferGet( 826 SHERPA_ONNX_API const float *SherpaOnnxCircularBufferGet(
827 - SherpaOnnxCircularBuffer *buffer, int32_t start_index, int32_t n); 827 + const SherpaOnnxCircularBuffer *buffer, int32_t start_index, int32_t n);
828 828
829 // Free the pointer returned by SherpaOnnxCircularBufferGet(). 829 // Free the pointer returned by SherpaOnnxCircularBufferGet().
830 SHERPA_ONNX_API void SherpaOnnxCircularBufferFree(const float *p); 830 SHERPA_ONNX_API void SherpaOnnxCircularBufferFree(const float *p);
831 831
832 // Remove n elements from the buffer 832 // Remove n elements from the buffer
833 SHERPA_ONNX_API void SherpaOnnxCircularBufferPop( 833 SHERPA_ONNX_API void SherpaOnnxCircularBufferPop(
834 - SherpaOnnxCircularBuffer *buffer, int32_t n); 834 + const SherpaOnnxCircularBuffer *buffer, int32_t n);
835 835
836 // Return number of elements in the buffer. 836 // Return number of elements in the buffer.
837 SHERPA_ONNX_API int32_t 837 SHERPA_ONNX_API int32_t
838 -SherpaOnnxCircularBufferSize(SherpaOnnxCircularBuffer *buffer); 838 +SherpaOnnxCircularBufferSize(const SherpaOnnxCircularBuffer *buffer);
839 839
840 // Return the head of the buffer. It's always non-decreasing until you 840 // Return the head of the buffer. It's always non-decreasing until you
841 // invoke SherpaOnnxCircularBufferReset() which resets head to 0. 841 // invoke SherpaOnnxCircularBufferReset() which resets head to 0.
842 SHERPA_ONNX_API int32_t 842 SHERPA_ONNX_API int32_t
843 -SherpaOnnxCircularBufferHead(SherpaOnnxCircularBuffer *buffer); 843 +SherpaOnnxCircularBufferHead(const SherpaOnnxCircularBuffer *buffer);
844 844
845 // Clear all elements in the buffer 845 // Clear all elements in the buffer
846 SHERPA_ONNX_API void SherpaOnnxCircularBufferReset( 846 SHERPA_ONNX_API void SherpaOnnxCircularBufferReset(
847 - SherpaOnnxCircularBuffer *buffer); 847 + const SherpaOnnxCircularBuffer *buffer);
848 848
849 SHERPA_ONNX_API typedef struct SherpaOnnxSpeechSegment { 849 SHERPA_ONNX_API typedef struct SherpaOnnxSpeechSegment {
850 // The start index in samples of this segment 850 // The start index in samples of this segment
@@ -862,40 +862,40 @@ typedef struct SherpaOnnxVoiceActivityDetector SherpaOnnxVoiceActivityDetector; @@ -862,40 +862,40 @@ typedef struct SherpaOnnxVoiceActivityDetector SherpaOnnxVoiceActivityDetector;
862 // Return an instance of VoiceActivityDetector. 862 // Return an instance of VoiceActivityDetector.
863 // The user has to use SherpaOnnxDestroyVoiceActivityDetector() to free 863 // The user has to use SherpaOnnxDestroyVoiceActivityDetector() to free
864 // the returned pointer to avoid memory leak. 864 // the returned pointer to avoid memory leak.
865 -SHERPA_ONNX_API SherpaOnnxVoiceActivityDetector * 865 +SHERPA_ONNX_API const SherpaOnnxVoiceActivityDetector *
866 SherpaOnnxCreateVoiceActivityDetector(const SherpaOnnxVadModelConfig *config, 866 SherpaOnnxCreateVoiceActivityDetector(const SherpaOnnxVadModelConfig *config,
867 float buffer_size_in_seconds); 867 float buffer_size_in_seconds);
868 868
869 SHERPA_ONNX_API void SherpaOnnxDestroyVoiceActivityDetector( 869 SHERPA_ONNX_API void SherpaOnnxDestroyVoiceActivityDetector(
870 - SherpaOnnxVoiceActivityDetector *p); 870 + const SherpaOnnxVoiceActivityDetector *p);
871 871
872 SHERPA_ONNX_API void SherpaOnnxVoiceActivityDetectorAcceptWaveform( 872 SHERPA_ONNX_API void SherpaOnnxVoiceActivityDetectorAcceptWaveform(
873 - SherpaOnnxVoiceActivityDetector *p, const float *samples, int32_t n); 873 + const SherpaOnnxVoiceActivityDetector *p, const float *samples, int32_t n);
874 874
875 // Return 1 if there are no speech segments available. 875 // Return 1 if there are no speech segments available.
876 // Return 0 if there are speech segments. 876 // Return 0 if there are speech segments.
877 SHERPA_ONNX_API int32_t 877 SHERPA_ONNX_API int32_t
878 -SherpaOnnxVoiceActivityDetectorEmpty(SherpaOnnxVoiceActivityDetector *p); 878 +SherpaOnnxVoiceActivityDetectorEmpty(const SherpaOnnxVoiceActivityDetector *p);
879 879
880 // Return 1 if there is voice detected. 880 // Return 1 if there is voice detected.
881 // Return 0 if voice is silent. 881 // Return 0 if voice is silent.
882 -SHERPA_ONNX_API int32_t  
883 -SherpaOnnxVoiceActivityDetectorDetected(SherpaOnnxVoiceActivityDetector *p); 882 +SHERPA_ONNX_API int32_t SherpaOnnxVoiceActivityDetectorDetected(
  883 + const SherpaOnnxVoiceActivityDetector *p);
884 884
885 // Return the first speech segment. 885 // Return the first speech segment.
886 // It throws if SherpaOnnxVoiceActivityDetectorEmpty() returns 1. 886 // It throws if SherpaOnnxVoiceActivityDetectorEmpty() returns 1.
887 SHERPA_ONNX_API void SherpaOnnxVoiceActivityDetectorPop( 887 SHERPA_ONNX_API void SherpaOnnxVoiceActivityDetectorPop(
888 - SherpaOnnxVoiceActivityDetector *p); 888 + const SherpaOnnxVoiceActivityDetector *p);
889 889
890 // Clear current speech segments. 890 // Clear current speech segments.
891 SHERPA_ONNX_API void SherpaOnnxVoiceActivityDetectorClear( 891 SHERPA_ONNX_API void SherpaOnnxVoiceActivityDetectorClear(
892 - SherpaOnnxVoiceActivityDetector *p); 892 + const SherpaOnnxVoiceActivityDetector *p);
893 893
894 // Return the first speech segment. 894 // Return the first speech segment.
895 // The user has to use SherpaOnnxDestroySpeechSegment() to free the returned 895 // The user has to use SherpaOnnxDestroySpeechSegment() to free the returned
896 // pointer to avoid memory leak. 896 // pointer to avoid memory leak.
897 SHERPA_ONNX_API const SherpaOnnxSpeechSegment * 897 SHERPA_ONNX_API const SherpaOnnxSpeechSegment *
898 -SherpaOnnxVoiceActivityDetectorFront(SherpaOnnxVoiceActivityDetector *p); 898 +SherpaOnnxVoiceActivityDetectorFront(const SherpaOnnxVoiceActivityDetector *p);
899 899
900 // Free the pointer returned SherpaOnnxVoiceActivityDetectorFront(). 900 // Free the pointer returned SherpaOnnxVoiceActivityDetectorFront().
901 SHERPA_ONNX_API void SherpaOnnxDestroySpeechSegment( 901 SHERPA_ONNX_API void SherpaOnnxDestroySpeechSegment(
@@ -903,10 +903,10 @@ SHERPA_ONNX_API void SherpaOnnxDestroySpeechSegment( @@ -903,10 +903,10 @@ SHERPA_ONNX_API void SherpaOnnxDestroySpeechSegment(
903 903
904 // Re-initialize the voice activity detector. 904 // Re-initialize the voice activity detector.
905 SHERPA_ONNX_API void SherpaOnnxVoiceActivityDetectorReset( 905 SHERPA_ONNX_API void SherpaOnnxVoiceActivityDetectorReset(
906 - SherpaOnnxVoiceActivityDetector *p); 906 + const SherpaOnnxVoiceActivityDetector *p);
907 907
908 SHERPA_ONNX_API void SherpaOnnxVoiceActivityDetectorFlush( 908 SHERPA_ONNX_API void SherpaOnnxVoiceActivityDetectorFlush(
909 - SherpaOnnxVoiceActivityDetector *p); 909 + const SherpaOnnxVoiceActivityDetector *p);
910 910
911 // ============================================================ 911 // ============================================================
912 // For offline Text-to-Speech (i.e., non-streaming TTS) 912 // For offline Text-to-Speech (i.e., non-streaming TTS)
@@ -1481,15 +1481,16 @@ SHERPA_ONNX_API typedef struct SherpaOnnxLinearResampler @@ -1481,15 +1481,16 @@ SHERPA_ONNX_API typedef struct SherpaOnnxLinearResampler
1481 */ 1481 */
1482 // The user has to invoke SherpaOnnxDestroyLinearResampler() 1482 // The user has to invoke SherpaOnnxDestroyLinearResampler()
1483 // to free the returned pointer to avoid memory leak 1483 // to free the returned pointer to avoid memory leak
1484 -SHERPA_ONNX_API SherpaOnnxLinearResampler *SherpaOnnxCreateLinearResampler(  
1485 - int32_t samp_rate_in_hz, int32_t samp_rate_out_hz, float filter_cutoff_hz,  
1486 - int32_t num_zeros); 1484 +SHERPA_ONNX_API const SherpaOnnxLinearResampler *
  1485 +SherpaOnnxCreateLinearResampler(int32_t samp_rate_in_hz,
  1486 + int32_t samp_rate_out_hz,
  1487 + float filter_cutoff_hz, int32_t num_zeros);
1487 1488
1488 SHERPA_ONNX_API void SherpaOnnxDestroyLinearResampler( 1489 SHERPA_ONNX_API void SherpaOnnxDestroyLinearResampler(
1489 - SherpaOnnxLinearResampler *p); 1490 + const SherpaOnnxLinearResampler *p);
1490 1491
1491 SHERPA_ONNX_API void SherpaOnnxLinearResamplerReset( 1492 SHERPA_ONNX_API void SherpaOnnxLinearResamplerReset(
1492 - SherpaOnnxLinearResampler *p); 1493 + const SherpaOnnxLinearResampler *p);
1493 1494
1494 typedef struct SherpaOnnxResampleOut { 1495 typedef struct SherpaOnnxResampleOut {
1495 const float *samples; 1496 const float *samples;
@@ -1501,7 +1502,7 @@ typedef struct SherpaOnnxResampleOut { @@ -1501,7 +1502,7 @@ typedef struct SherpaOnnxResampleOut {
1501 // If this is the last segment, you can set flush to 1; otherwise, please 1502 // If this is the last segment, you can set flush to 1; otherwise, please
1502 // set flush to 0 1503 // set flush to 0
1503 SHERPA_ONNX_API const SherpaOnnxResampleOut *SherpaOnnxLinearResamplerResample( 1504 SHERPA_ONNX_API const SherpaOnnxResampleOut *SherpaOnnxLinearResamplerResample(
1504 - SherpaOnnxLinearResampler *p, const float *input, int32_t input_dim, 1505 + const SherpaOnnxLinearResampler *p, const float *input, int32_t input_dim,
1505 int32_t flush); 1506 int32_t flush);
1506 1507
1507 SHERPA_ONNX_API void SherpaOnnxLinearResamplerResampleFree( 1508 SHERPA_ONNX_API void SherpaOnnxLinearResamplerResampleFree(
@@ -1724,7 +1725,7 @@ SherpaOnnxCreateOfflineRecognizerOHOS( @@ -1724,7 +1725,7 @@ SherpaOnnxCreateOfflineRecognizerOHOS(
1724 // Return an instance of VoiceActivityDetector. 1725 // Return an instance of VoiceActivityDetector.
1725 // The user has to use SherpaOnnxDestroyVoiceActivityDetector() to free 1726 // The user has to use SherpaOnnxDestroyVoiceActivityDetector() to free
1726 // the returned pointer to avoid memory leak. 1727 // the returned pointer to avoid memory leak.
1727 -SHERPA_ONNX_API SherpaOnnxVoiceActivityDetector * 1728 +SHERPA_ONNX_API const SherpaOnnxVoiceActivityDetector *
1728 SherpaOnnxCreateVoiceActivityDetectorOHOS( 1729 SherpaOnnxCreateVoiceActivityDetectorOHOS(
1729 const SherpaOnnxVadModelConfig *config, float buffer_size_in_seconds, 1730 const SherpaOnnxVadModelConfig *config, float buffer_size_in_seconds,
1730 NativeResourceManager *mgr); 1731 NativeResourceManager *mgr);