Fangjun Kuang
Committed by GitHub

Add keyword spotting API for node-addon-api (#877)

@@ -18,7 +18,7 @@ fi @@ -18,7 +18,7 @@ fi
18 SHERPA_ONNX_VERSION=$(grep "SHERPA_ONNX_VERSION" ./CMakeLists.txt | cut -d " " -f 2 | cut -d '"' -f 2) 18 SHERPA_ONNX_VERSION=$(grep "SHERPA_ONNX_VERSION" ./CMakeLists.txt | cut -d " " -f 2 | cut -d '"' -f 2)
19 echo "SHERPA_ONNX_VERSION $SHERPA_ONNX_VERSION" 19 echo "SHERPA_ONNX_VERSION $SHERPA_ONNX_VERSION"
20 20
21 -# SHERPA_ONNX_VERSION=1.0.23 21 +# SHERPA_ONNX_VERSION=1.0.24
22 22
23 if [ -z $owner ]; then 23 if [ -z $owner ]; then
24 owner=k2-fsa 24 owner=k2-fsa
@@ -6,6 +6,15 @@ d=nodejs-addon-examples @@ -6,6 +6,15 @@ d=nodejs-addon-examples
6 echo "dir: $d" 6 echo "dir: $d"
7 cd $d 7 cd $d
8 8
  9 +echo "----------keyword spotting----------"
  10 +
  11 +curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/kws-models/sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01.tar.bz2
  12 +tar xvf sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01.tar.bz2
  13 +rm sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01.tar.bz2
  14 +
  15 +node ./test_keyword_spotter_transducer.js
  16 +rm -rf sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01
  17 +
9 echo "----------add punctuations----------" 18 echo "----------add punctuations----------"
10 19
11 curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/punctuation-models/sherpa-onnx-punct-ct-transformer-zh-en-vocab272727-2024-04-12.tar.bz2 20 curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/punctuation-models/sherpa-onnx-punct-ct-transformer-zh-en-vocab272727-2024-04-12.tar.bz2
@@ -55,7 +55,7 @@ jobs: @@ -55,7 +55,7 @@ jobs:
55 55
56 SHERPA_ONNX_VERSION=$(grep "SHERPA_ONNX_VERSION" ./CMakeLists.txt | cut -d " " -f 2 | cut -d '"' -f 2) 56 SHERPA_ONNX_VERSION=$(grep "SHERPA_ONNX_VERSION" ./CMakeLists.txt | cut -d " " -f 2 | cut -d '"' -f 2)
57 echo "SHERPA_ONNX_VERSION $SHERPA_ONNX_VERSION" 57 echo "SHERPA_ONNX_VERSION $SHERPA_ONNX_VERSION"
58 - # SHERPA_ONNX_VERSION=1.0.23 58 + # SHERPA_ONNX_VERSION=1.0.24
59 59
60 src_dir=.github/scripts/node-addon 60 src_dir=.github/scripts/node-addon
61 sed -i.bak s/SHERPA_ONNX_VERSION/$SHERPA_ONNX_VERSION/g $src_dir/package.json 61 sed -i.bak s/SHERPA_ONNX_VERSION/$SHERPA_ONNX_VERSION/g $src_dir/package.json
@@ -62,6 +62,13 @@ The following tables list the examples in this folder. @@ -62,6 +62,13 @@ The following tables list the examples in this folder.
62 |[./test_audio_tagging_zipformer.js](./test_audio_tagging_zipformer.js)| Audio tagging with a Zipformer model| 62 |[./test_audio_tagging_zipformer.js](./test_audio_tagging_zipformer.js)| Audio tagging with a Zipformer model|
63 |[./test_audio_tagging_ced.js](./test_audio_tagging_ced.js)| Audio tagging with a [CED](https://github.com/RicherMans/CED) model| 63 |[./test_audio_tagging_ced.js](./test_audio_tagging_ced.js)| Audio tagging with a [CED](https://github.com/RicherMans/CED) model|
64 64
  65 +## Keyword spotting
  66 +
  67 +|File| Description|
  68 +|---|---|
  69 +|[./test_keyword_spotter_transducer.js](./test_keyword_spotter_transducer.js)| Keyword spotting from a file using a Zipformer model|
  70 +|[./test_keyword_spotter_transducer_microphone.js](./test_keyword_spotter_transducer_microphone.js)| Keyword spotting from a microphone using a Zipformer model|
  71 +
65 ## Streaming speech-to-text from files 72 ## Streaming speech-to-text from files
66 73
67 |File| Description| 74 |File| Description|
@@ -325,3 +332,17 @@ rm sherpa-onnx-punct-ct-transformer-zh-en-vocab272727-2024-04-12.tar.bz2 @@ -325,3 +332,17 @@ rm sherpa-onnx-punct-ct-transformer-zh-en-vocab272727-2024-04-12.tar.bz2
325 332
326 node ./test_punctuation.js 333 node ./test_punctuation.js
327 ``` 334 ```
  335 +
  336 +## Keyword spotting
  337 +
  338 +```bash
  339 +wget https://github.com/k2-fsa/sherpa-onnx/releases/download/kws-models/sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01.tar.bz2
  340 +tar xvf sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01.tar.bz2
  341 +rm sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01.tar.bz2
  342 +
  343 +node ./test_keyword_spotter_transducer.js
  344 +
  345 +# To run keyword spotting using a microphone
  346 +npm install naudiodon2
  347 +node ./test_keyword_spotter_transducer_microphone.js
  348 +```
@@ -79,11 +79,5 @@ ai.on('data', data => { @@ -79,11 +79,5 @@ ai.on('data', data => {
79 } 79 }
80 }); 80 });
81 81
82 -ai.on('close', () => {  
83 - console.log('Free resources');  
84 - stream.free();  
85 - recognizer.free();  
86 -});  
87 -  
88 ai.start(); 82 ai.start();
89 console.log('Started! Please speak') 83 console.log('Started! Please speak')
@@ -78,11 +78,6 @@ ai.on('data', data => { @@ -78,11 +78,6 @@ ai.on('data', data => {
78 } 78 }
79 }); 79 });
80 80
81 -ai.on('close', () => {  
82 - console.log('Free resources');  
83 - stream.free();  
84 - recognizer.free();  
85 -});  
86 81
87 ai.start(); 82 ai.start();
88 console.log('Started! Please speak') 83 console.log('Started! Please speak')
@@ -94,11 +94,5 @@ ai.on('data', data => { @@ -94,11 +94,5 @@ ai.on('data', data => {
94 } 94 }
95 }); 95 });
96 96
97 -ai.on('close', () => {  
98 - console.log('Free resources');  
99 - stream.free();  
100 - recognizer.free();  
101 -});  
102 -  
103 ai.start(); 97 ai.start();
104 console.log('Started! Please speak') 98 console.log('Started! Please speak')
@@ -82,11 +82,5 @@ ai.on('data', data => { @@ -82,11 +82,5 @@ ai.on('data', data => {
82 } 82 }
83 }); 83 });
84 84
85 -ai.on('close', () => {  
86 - console.log('Free resources');  
87 - stream.free();  
88 - recognizer.free();  
89 -});  
90 -  
91 ai.start(); 85 ai.start();
92 console.log('Started! Please speak') 86 console.log('Started! Please speak')
  1 +// Copyright (c) 2024 Xiaomi Corporation
  2 +const sherpa_onnx = require('sherpa-onnx-node');
  3 +const performance = require('perf_hooks').performance;
  4 +
  5 +
  6 +// Please download test files from
  7 +// https://github.com/k2-fsa/sherpa-onnx/releases/tag/kws-models
  8 +const config = {
  9 + 'featConfig': {
  10 + 'sampleRate': 16000,
  11 + 'featureDim': 80,
  12 + },
  13 + 'modelConfig': {
  14 + 'transducer': {
  15 + 'encoder':
  16 + './sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/encoder-epoch-12-avg-2-chunk-16-left-64.onnx',
  17 + 'decoder':
  18 + './sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/decoder-epoch-12-avg-2-chunk-16-left-64.onnx',
  19 + 'joiner':
  20 + './sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/joiner-epoch-12-avg-2-chunk-16-left-64.onnx',
  21 + },
  22 + 'tokens':
  23 + './sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/tokens.txt',
  24 + 'numThreads': 1,
  25 + 'provider': 'cpu',
  26 + 'debug': 1,
  27 + },
  28 + 'keywordsFile':
  29 + './sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/test_wavs/test_keywords.txt',
  30 +};
  31 +
  32 +const waveFilename =
  33 + './sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/test_wavs/3.wav';
  34 +
  35 +const kws = new sherpa_onnx.KeywordSpotter(config);
  36 +console.log('Started')
  37 +let start = performance.now();
  38 +const stream = kws.createStream();
  39 +const wave = sherpa_onnx.readWave(waveFilename);
  40 +stream.acceptWaveform({sampleRate: wave.sampleRate, samples: wave.samples});
  41 +
  42 +const tailPadding = new Float32Array(wave.sampleRate * 0.4);
  43 +stream.acceptWaveform({samples: tailPadding, sampleRate: wave.sampleRate});
  44 +
  45 +const detectedKeywords = [];
  46 +while (kws.isReady(stream)) {
  47 + const keyword = kws.getResult(stream).keyword;
  48 + if (keyword != '') {
  49 + detectedKeywords.push(keyword);
  50 + }
  51 + kws.decode(stream);
  52 +}
  53 +let stop = performance.now();
  54 +
  55 +console.log('Done')
  56 +
  57 +const elapsed_seconds = (stop - start) / 1000;
  58 +const duration = wave.samples.length / wave.sampleRate;
  59 +const real_time_factor = elapsed_seconds / duration;
  60 +console.log('Wave duration', duration.toFixed(3), 'secodns')
  61 +console.log('Elapsed', elapsed_seconds.toFixed(3), 'secodns')
  62 +console.log(
  63 + `RTF = ${elapsed_seconds.toFixed(3)}/${duration.toFixed(3)} =`,
  64 + real_time_factor.toFixed(3))
  65 +console.log(waveFilename)
  66 +console.log('result\n', detectedKeywords)
  1 +// Copyright (c) 2023-2024 Xiaomi Corporation (authors: Fangjun Kuang)
  2 +//
  3 +const portAudio = require('naudiodon2');
  4 +// console.log(portAudio.getDevices());
  5 +
  6 +const sherpa_onnx = require('sherpa-onnx-node');
  7 +
  8 +function createKeywordSpotter() {
  9 + const config = {
  10 + 'featConfig': {
  11 + 'sampleRate': 16000,
  12 + 'featureDim': 80,
  13 + },
  14 + 'modelConfig': {
  15 + 'transducer': {
  16 + 'encoder':
  17 + './sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/encoder-epoch-12-avg-2-chunk-16-left-64.onnx',
  18 + 'decoder':
  19 + './sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/decoder-epoch-12-avg-2-chunk-16-left-64.onnx',
  20 + 'joiner':
  21 + './sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/joiner-epoch-12-avg-2-chunk-16-left-64.onnx',
  22 + },
  23 + 'tokens':
  24 + './sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/tokens.txt',
  25 + 'numThreads': 2,
  26 + 'provider': 'cpu',
  27 + 'debug': 1,
  28 + },
  29 + 'keywordsFile':
  30 + './sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/keywords.txt',
  31 + };
  32 +
  33 + return new sherpa_onnx.KeywordSpotter(config);
  34 +}
  35 +
  36 +const kws = createKeywordSpotter();
  37 +const stream = kws.createStream();
  38 +
  39 +let lastText = '';
  40 +let segmentIndex = 0;
  41 +
  42 +const ai = new portAudio.AudioIO({
  43 + inOptions: {
  44 + channelCount: 1,
  45 + closeOnError: true, // Close the stream if an audio error is detected, if
  46 + // set false then just log the error
  47 + deviceId: -1, // Use -1 or omit the deviceId to select the default device
  48 + sampleFormat: portAudio.SampleFormatFloat32,
  49 + sampleRate: kws.config.featConfig.sampleRate
  50 + }
  51 +});
  52 +
  53 +const display = new sherpa_onnx.Display(50);
  54 +
  55 +ai.on('data', data => {
  56 + const samples = new Float32Array(data.buffer);
  57 +
  58 + stream.acceptWaveform(
  59 + {sampleRate: kws.config.featConfig.sampleRate, samples: samples});
  60 +
  61 + while (kws.isReady(stream)) {
  62 + kws.decode(stream);
  63 + }
  64 +
  65 + const keyword = kws.getResult(stream).keyword
  66 + if (keyword != '') {
  67 + display.print(segmentIndex, keyword);
  68 + segmentIndex += 1;
  69 + }
  70 +});
  71 +
  72 +ai.start();
  73 +console.log('Started! Please speak.')
  74 +console.log(`Only words from ${kws.config.keywordsFile} can be recognized`)
@@ -19,6 +19,7 @@ include_directories(${CMAKE_JS_INC}) @@ -19,6 +19,7 @@ include_directories(${CMAKE_JS_INC})
19 19
20 set(srcs 20 set(srcs
21 src/audio-tagging.cc 21 src/audio-tagging.cc
  22 + src/keyword-spotting.cc
22 src/non-streaming-asr.cc 23 src/non-streaming-asr.cc
23 src/non-streaming-tts.cc 24 src/non-streaming-tts.cc
24 src/punctuation.cc 25 src/punctuation.cc
  1 +const addon = require('./addon.js');
  2 +const streaming_asr = require('./streaming-asr.js');
  3 +
  4 +class KeywordSpotter {
  5 + constructor(config) {
  6 + this.handle = addon.createKeywordSpotter(config);
  7 + this.config = config
  8 + }
  9 +
  10 + createStream() {
  11 + const handle = addon.createKeywordStream(this.handle);
  12 + return new streaming_asr.OnlineStream(handle);
  13 + }
  14 +
  15 + isReady(stream) {
  16 + return addon.isKeywordStreamReady(this.handle, stream.handle);
  17 + }
  18 +
  19 + decode(stream) {
  20 + addon.decodeKeywordStream(this.handle, stream.handle);
  21 + }
  22 +
  23 + getResult(stream) {
  24 + const jsonStr = addon.getKeywordResultAsJson(this.handle, stream.handle);
  25 +
  26 + return JSON.parse(jsonStr);
  27 + }
  28 +}
  29 +
  30 +module.exports = {
  31 + KeywordSpotter,
  32 +}
@@ -7,6 +7,7 @@ const slid = require('./spoken-language-identification.js'); @@ -7,6 +7,7 @@ const slid = require('./spoken-language-identification.js');
7 const sid = require('./speaker-identification.js'); 7 const sid = require('./speaker-identification.js');
8 const at = require('./audio-tagg.js'); 8 const at = require('./audio-tagg.js');
9 const punct = require('./punctuation.js'); 9 const punct = require('./punctuation.js');
  10 +const kws = require('./keyword-spotter.js');
10 11
11 module.exports = { 12 module.exports = {
12 OnlineRecognizer: streaming_asr.OnlineRecognizer, 13 OnlineRecognizer: streaming_asr.OnlineRecognizer,
@@ -22,4 +23,5 @@ module.exports = { @@ -22,4 +23,5 @@ module.exports = {
22 SpeakerEmbeddingManager: sid.SpeakerEmbeddingManager, 23 SpeakerEmbeddingManager: sid.SpeakerEmbeddingManager,
23 AudioTagging: at.AudioTagging, 24 AudioTagging: at.AudioTagging,
24 Punctuation: punct.Punctuation, 25 Punctuation: punct.Punctuation,
  26 + KeywordSpotter: kws.KeywordSpotter,
25 } 27 }
  1 +// scripts/node-addon-api/src/keyword-spotting.cc
  2 +//
  3 +// Copyright (c) 2024 Xiaomi Corporation
  4 +#include <sstream>
  5 +
  6 +#include "macros.h" // NOLINT
  7 +#include "napi.h" // NOLINT
  8 +#include "sherpa-onnx/c-api/c-api.h"
  9 +
  10 +// defined ./streaming-asr.cc
  11 +SherpaOnnxFeatureConfig GetFeatureConfig(Napi::Object obj);
  12 +
  13 +// defined ./streaming-asr.cc
  14 +SherpaOnnxOnlineModelConfig GetOnlineModelConfig(Napi::Object obj);
  15 +
  16 +static Napi::External<SherpaOnnxKeywordSpotter> CreateKeywordSpotterWrapper(
  17 + const Napi::CallbackInfo &info) {
  18 + Napi::Env env = info.Env();
  19 + if (info.Length() != 1) {
  20 + std::ostringstream os;
  21 + os << "Expect only 1 argument. Given: " << info.Length();
  22 +
  23 + Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();
  24 +
  25 + return {};
  26 + }
  27 +
  28 + if (!info[0].IsObject()) {
  29 + Napi::TypeError::New(env, "Expect an object as the argument")
  30 + .ThrowAsJavaScriptException();
  31 +
  32 + return {};
  33 + }
  34 +
  35 + Napi::Object o = info[0].As<Napi::Object>();
  36 + SherpaOnnxKeywordSpotterConfig c;
  37 + memset(&c, 0, sizeof(c));
  38 + c.feat_config = GetFeatureConfig(o);
  39 + c.model_config = GetOnlineModelConfig(o);
  40 +
  41 + SHERPA_ONNX_ASSIGN_ATTR_INT32(max_active_paths, maxActivePaths);
  42 + SHERPA_ONNX_ASSIGN_ATTR_INT32(num_trailing_blanks, numTrailingBlanks);
  43 + SHERPA_ONNX_ASSIGN_ATTR_FLOAT(keywords_score, keywordsScore);
  44 + SHERPA_ONNX_ASSIGN_ATTR_FLOAT(keywords_threshold, keywordsThreshold);
  45 + SHERPA_ONNX_ASSIGN_ATTR_STR(keywords_file, keywordsFile);
  46 +
  47 + SherpaOnnxKeywordSpotter *kws = CreateKeywordSpotter(&c);
  48 +
  49 + if (c.model_config.transducer.encoder) {
  50 + delete[] c.model_config.transducer.encoder;
  51 + }
  52 +
  53 + if (c.model_config.transducer.decoder) {
  54 + delete[] c.model_config.transducer.decoder;
  55 + }
  56 +
  57 + if (c.model_config.transducer.joiner) {
  58 + delete[] c.model_config.transducer.joiner;
  59 + }
  60 +
  61 + if (c.model_config.paraformer.encoder) {
  62 + delete[] c.model_config.paraformer.encoder;
  63 + }
  64 +
  65 + if (c.model_config.paraformer.decoder) {
  66 + delete[] c.model_config.paraformer.decoder;
  67 + }
  68 +
  69 + if (c.model_config.zipformer2_ctc.model) {
  70 + delete[] c.model_config.zipformer2_ctc.model;
  71 + }
  72 +
  73 + if (c.model_config.tokens) {
  74 + delete[] c.model_config.tokens;
  75 + }
  76 +
  77 + if (c.model_config.provider) {
  78 + delete[] c.model_config.provider;
  79 + }
  80 +
  81 + if (c.model_config.model_type) {
  82 + delete[] c.model_config.model_type;
  83 + }
  84 +
  85 + if (c.keywords_file) {
  86 + delete[] c.keywords_file;
  87 + }
  88 +
  89 + if (!kws) {
  90 + Napi::TypeError::New(env, "Please check your config!")
  91 + .ThrowAsJavaScriptException();
  92 +
  93 + return {};
  94 + }
  95 +
  96 + return Napi::External<SherpaOnnxKeywordSpotter>::New(
  97 + env, kws, [](Napi::Env env, SherpaOnnxKeywordSpotter *kws) {
  98 + DestroyKeywordSpotter(kws);
  99 + });
  100 +}
  101 +
  102 +static Napi::External<SherpaOnnxOnlineStream> CreateKeywordStreamWrapper(
  103 + const Napi::CallbackInfo &info) {
  104 + Napi::Env env = info.Env();
  105 + if (info.Length() != 1) {
  106 + std::ostringstream os;
  107 + os << "Expect only 1 argument. Given: " << info.Length();
  108 +
  109 + Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();
  110 +
  111 + return {};
  112 + }
  113 +
  114 + if (!info[0].IsExternal()) {
  115 + Napi::TypeError::New(
  116 + env, "You should pass a keyword spotter pointer as the only argument")
  117 + .ThrowAsJavaScriptException();
  118 +
  119 + return {};
  120 + }
  121 +
  122 + SherpaOnnxKeywordSpotter *kws =
  123 + info[0].As<Napi::External<SherpaOnnxKeywordSpotter>>().Data();
  124 +
  125 + SherpaOnnxOnlineStream *stream = CreateKeywordStream(kws);
  126 +
  127 + return Napi::External<SherpaOnnxOnlineStream>::New(
  128 + env, stream, [](Napi::Env env, SherpaOnnxOnlineStream *stream) {
  129 + DestroyOnlineStream(stream);
  130 + });
  131 +}
  132 +
  133 +static Napi::Boolean IsKeywordStreamReadyWrapper(
  134 + const Napi::CallbackInfo &info) {
  135 + Napi::Env env = info.Env();
  136 + if (info.Length() != 2) {
  137 + std::ostringstream os;
  138 + os << "Expect only 2 arguments. Given: " << info.Length();
  139 +
  140 + Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();
  141 +
  142 + return {};
  143 + }
  144 +
  145 + if (!info[0].IsExternal()) {
  146 + Napi::TypeError::New(env, "Argument 0 should be a keyword spotter pointer.")
  147 + .ThrowAsJavaScriptException();
  148 +
  149 + return {};
  150 + }
  151 +
  152 + if (!info[1].IsExternal()) {
  153 + Napi::TypeError::New(env, "Argument 1 should be an online stream pointer.")
  154 + .ThrowAsJavaScriptException();
  155 +
  156 + return {};
  157 + }
  158 +
  159 + SherpaOnnxKeywordSpotter *kws =
  160 + info[0].As<Napi::External<SherpaOnnxKeywordSpotter>>().Data();
  161 +
  162 + SherpaOnnxOnlineStream *stream =
  163 + info[1].As<Napi::External<SherpaOnnxOnlineStream>>().Data();
  164 +
  165 + int32_t is_ready = IsKeywordStreamReady(kws, stream);
  166 +
  167 + return Napi::Boolean::New(env, is_ready);
  168 +}
  169 +
  170 +static void DecodeKeywordStreamWrapper(const Napi::CallbackInfo &info) {
  171 + Napi::Env env = info.Env();
  172 + if (info.Length() != 2) {
  173 + std::ostringstream os;
  174 + os << "Expect only 2 arguments. Given: " << info.Length();
  175 +
  176 + Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();
  177 +
  178 + return;
  179 + }
  180 +
  181 + if (!info[0].IsExternal()) {
  182 + Napi::TypeError::New(env, "Argument 0 should be a keyword spotter pointer.")
  183 + .ThrowAsJavaScriptException();
  184 +
  185 + return;
  186 + }
  187 +
  188 + if (!info[1].IsExternal()) {
  189 + Napi::TypeError::New(env, "Argument 1 should be an online stream pointer.")
  190 + .ThrowAsJavaScriptException();
  191 +
  192 + return;
  193 + }
  194 +
  195 + SherpaOnnxKeywordSpotter *kws =
  196 + info[0].As<Napi::External<SherpaOnnxKeywordSpotter>>().Data();
  197 +
  198 + SherpaOnnxOnlineStream *stream =
  199 + info[1].As<Napi::External<SherpaOnnxOnlineStream>>().Data();
  200 +
  201 + DecodeKeywordStream(kws, stream);
  202 +}
  203 +
  204 +static Napi::String GetKeywordResultAsJsonWrapper(
  205 + const Napi::CallbackInfo &info) {
  206 + Napi::Env env = info.Env();
  207 + if (info.Length() != 2) {
  208 + std::ostringstream os;
  209 + os << "Expect only 2 arguments. Given: " << info.Length();
  210 +
  211 + Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();
  212 +
  213 + return {};
  214 + }
  215 +
  216 + if (!info[0].IsExternal()) {
  217 + Napi::TypeError::New(env, "Argument 0 should be a keyword spotter pointer.")
  218 + .ThrowAsJavaScriptException();
  219 +
  220 + return {};
  221 + }
  222 +
  223 + if (!info[1].IsExternal()) {
  224 + Napi::TypeError::New(env, "Argument 1 should be an online stream pointer.")
  225 + .ThrowAsJavaScriptException();
  226 +
  227 + return {};
  228 + }
  229 +
  230 + SherpaOnnxKeywordSpotter *kws =
  231 + info[0].As<Napi::External<SherpaOnnxKeywordSpotter>>().Data();
  232 +
  233 + SherpaOnnxOnlineStream *stream =
  234 + info[1].As<Napi::External<SherpaOnnxOnlineStream>>().Data();
  235 +
  236 + const char *json = GetKeywordResultAsJson(kws, stream);
  237 +
  238 + Napi::String s = Napi::String::New(env, json);
  239 +
  240 + FreeKeywordResultJson(json);
  241 +
  242 + return s;
  243 +}
  244 +
  245 +void InitKeywordSpotting(Napi::Env env, Napi::Object exports) {
  246 + exports.Set(Napi::String::New(env, "createKeywordSpotter"),
  247 + Napi::Function::New(env, CreateKeywordSpotterWrapper));
  248 +
  249 + exports.Set(Napi::String::New(env, "createKeywordStream"),
  250 + Napi::Function::New(env, CreateKeywordStreamWrapper));
  251 +
  252 + exports.Set(Napi::String::New(env, "isKeywordStreamReady"),
  253 + Napi::Function::New(env, IsKeywordStreamReadyWrapper));
  254 +
  255 + exports.Set(Napi::String::New(env, "decodeKeywordStream"),
  256 + Napi::Function::New(env, DecodeKeywordStreamWrapper));
  257 +
  258 + exports.Set(Napi::String::New(env, "getKeywordResultAsJson"),
  259 + Napi::Function::New(env, GetKeywordResultAsJsonWrapper));
  260 +}
@@ -23,6 +23,8 @@ void InitAudioTagging(Napi::Env env, Napi::Object exports); @@ -23,6 +23,8 @@ void InitAudioTagging(Napi::Env env, Napi::Object exports);
23 23
24 void InitPunctuation(Napi::Env env, Napi::Object exports); 24 void InitPunctuation(Napi::Env env, Napi::Object exports);
25 25
  26 +void InitKeywordSpotting(Napi::Env env, Napi::Object exports);
  27 +
26 Napi::Object Init(Napi::Env env, Napi::Object exports) { 28 Napi::Object Init(Napi::Env env, Napi::Object exports) {
27 InitStreamingAsr(env, exports); 29 InitStreamingAsr(env, exports);
28 InitNonStreamingAsr(env, exports); 30 InitNonStreamingAsr(env, exports);
@@ -34,6 +36,7 @@ Napi::Object Init(Napi::Env env, Napi::Object exports) { @@ -34,6 +36,7 @@ Napi::Object Init(Napi::Env env, Napi::Object exports) {
34 InitSpeakerID(env, exports); 36 InitSpeakerID(env, exports);
35 InitAudioTagging(env, exports); 37 InitAudioTagging(env, exports);
36 InitPunctuation(env, exports); 38 InitPunctuation(env, exports);
  39 + InitKeywordSpotting(env, exports);
37 40
38 return exports; 41 return exports;
39 } 42 }
@@ -90,7 +90,7 @@ static SherpaOnnxOnlineParaformerModelConfig GetOnlineParaformerModelConfig( @@ -90,7 +90,7 @@ static SherpaOnnxOnlineParaformerModelConfig GetOnlineParaformerModelConfig(
90 return c; 90 return c;
91 } 91 }
92 92
93 -static SherpaOnnxOnlineModelConfig GetOnlineModelConfig(Napi::Object obj) { 93 +SherpaOnnxOnlineModelConfig GetOnlineModelConfig(Napi::Object obj) {
94 SherpaOnnxOnlineModelConfig c; 94 SherpaOnnxOnlineModelConfig c;
95 memset(&c, 0, sizeof(c)); 95 memset(&c, 0, sizeof(c));
96 96
@@ -653,6 +653,20 @@ void DestroyKeywordResult(const SherpaOnnxKeywordResult *r) { @@ -653,6 +653,20 @@ void DestroyKeywordResult(const SherpaOnnxKeywordResult *r) {
653 } 653 }
654 } 654 }
655 655
  656 +const char *GetKeywordResultAsJson(SherpaOnnxKeywordSpotter *spotter,
  657 + SherpaOnnxOnlineStream *stream) {
  658 + const sherpa_onnx::KeywordResult &result =
  659 + spotter->impl->GetResult(stream->impl.get());
  660 +
  661 + std::string json = result.AsJsonString();
  662 + char *pJson = new char[json.size() + 1];
  663 + std::copy(json.begin(), json.end(), pJson);
  664 + pJson[json.size()] = 0;
  665 + return pJson;
  666 +}
  667 +
  668 +void FreeKeywordResultJson(const char *s) { delete[] s; }
  669 +
656 // ============================================================ 670 // ============================================================
657 // For VAD 671 // For VAD
658 // ============================================================ 672 // ============================================================
@@ -625,6 +625,13 @@ SHERPA_ONNX_API const SherpaOnnxKeywordResult *GetKeywordResult( @@ -625,6 +625,13 @@ SHERPA_ONNX_API const SherpaOnnxKeywordResult *GetKeywordResult(
625 /// @param r A pointer returned by GetKeywordResult() 625 /// @param r A pointer returned by GetKeywordResult()
626 SHERPA_ONNX_API void DestroyKeywordResult(const SherpaOnnxKeywordResult *r); 626 SHERPA_ONNX_API void DestroyKeywordResult(const SherpaOnnxKeywordResult *r);
627 627
  628 +// the user has to call FreeKeywordResultJson() to free the returned pointer
  629 +// to avoid memory leak
  630 +SHERPA_ONNX_API const char *GetKeywordResultAsJson(
  631 + SherpaOnnxKeywordSpotter *spotter, SherpaOnnxOnlineStream *stream);
  632 +
  633 +SHERPA_ONNX_API void FreeKeywordResultJson(const char *s);
  634 +
628 // ============================================================ 635 // ============================================================
629 // For VAD 636 // For VAD
630 // ============================================================ 637 // ============================================================