Committed by
GitHub
Add keyword spotting API for node-addon-api (#877)
正在显示
18 个修改的文件
包含
492 行增加
和
26 行删除
| @@ -18,7 +18,7 @@ fi | @@ -18,7 +18,7 @@ fi | ||
| 18 | SHERPA_ONNX_VERSION=$(grep "SHERPA_ONNX_VERSION" ./CMakeLists.txt | cut -d " " -f 2 | cut -d '"' -f 2) | 18 | SHERPA_ONNX_VERSION=$(grep "SHERPA_ONNX_VERSION" ./CMakeLists.txt | cut -d " " -f 2 | cut -d '"' -f 2) |
| 19 | echo "SHERPA_ONNX_VERSION $SHERPA_ONNX_VERSION" | 19 | echo "SHERPA_ONNX_VERSION $SHERPA_ONNX_VERSION" |
| 20 | 20 | ||
| 21 | -# SHERPA_ONNX_VERSION=1.0.23 | 21 | +# SHERPA_ONNX_VERSION=1.0.24 |
| 22 | 22 | ||
| 23 | if [ -z $owner ]; then | 23 | if [ -z $owner ]; then |
| 24 | owner=k2-fsa | 24 | owner=k2-fsa |
| @@ -6,6 +6,15 @@ d=nodejs-addon-examples | @@ -6,6 +6,15 @@ d=nodejs-addon-examples | ||
| 6 | echo "dir: $d" | 6 | echo "dir: $d" |
| 7 | cd $d | 7 | cd $d |
| 8 | 8 | ||
| 9 | +echo "----------keyword spotting----------" | ||
| 10 | + | ||
| 11 | +curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/kws-models/sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01.tar.bz2 | ||
| 12 | +tar xvf sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01.tar.bz2 | ||
| 13 | +rm sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01.tar.bz2 | ||
| 14 | + | ||
| 15 | +node ./test_keyword_spotter_transducer.js | ||
| 16 | +rm -rf sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01 | ||
| 17 | + | ||
| 9 | echo "----------add punctuations----------" | 18 | echo "----------add punctuations----------" |
| 10 | 19 | ||
| 11 | curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/punctuation-models/sherpa-onnx-punct-ct-transformer-zh-en-vocab272727-2024-04-12.tar.bz2 | 20 | curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/punctuation-models/sherpa-onnx-punct-ct-transformer-zh-en-vocab272727-2024-04-12.tar.bz2 |
| @@ -55,7 +55,7 @@ jobs: | @@ -55,7 +55,7 @@ jobs: | ||
| 55 | 55 | ||
| 56 | SHERPA_ONNX_VERSION=$(grep "SHERPA_ONNX_VERSION" ./CMakeLists.txt | cut -d " " -f 2 | cut -d '"' -f 2) | 56 | SHERPA_ONNX_VERSION=$(grep "SHERPA_ONNX_VERSION" ./CMakeLists.txt | cut -d " " -f 2 | cut -d '"' -f 2) |
| 57 | echo "SHERPA_ONNX_VERSION $SHERPA_ONNX_VERSION" | 57 | echo "SHERPA_ONNX_VERSION $SHERPA_ONNX_VERSION" |
| 58 | - # SHERPA_ONNX_VERSION=1.0.23 | 58 | + # SHERPA_ONNX_VERSION=1.0.24 |
| 59 | 59 | ||
| 60 | src_dir=.github/scripts/node-addon | 60 | src_dir=.github/scripts/node-addon |
| 61 | sed -i.bak s/SHERPA_ONNX_VERSION/$SHERPA_ONNX_VERSION/g $src_dir/package.json | 61 | sed -i.bak s/SHERPA_ONNX_VERSION/$SHERPA_ONNX_VERSION/g $src_dir/package.json |
| @@ -62,6 +62,13 @@ The following tables list the examples in this folder. | @@ -62,6 +62,13 @@ The following tables list the examples in this folder. | ||
| 62 | |[./test_audio_tagging_zipformer.js](./test_audio_tagging_zipformer.js)| Audio tagging with a Zipformer model| | 62 | |[./test_audio_tagging_zipformer.js](./test_audio_tagging_zipformer.js)| Audio tagging with a Zipformer model| |
| 63 | |[./test_audio_tagging_ced.js](./test_audio_tagging_ced.js)| Audio tagging with a [CED](https://github.com/RicherMans/CED) model| | 63 | |[./test_audio_tagging_ced.js](./test_audio_tagging_ced.js)| Audio tagging with a [CED](https://github.com/RicherMans/CED) model| |
| 64 | 64 | ||
| 65 | +## Keyword spotting | ||
| 66 | + | ||
| 67 | +|File| Description| | ||
| 68 | +|---|---| | ||
| 69 | +|[./test_keyword_spotter_transducer.js](./test_keyword_spotter_transducer.js)| Keyword spotting from a file using a Zipformer model| | ||
| 70 | +|[./test_keyword_spotter_transducer_microphone.js](./test_keyword_spotter_transducer_microphone.js)| Keyword spotting from a microphone using a Zipformer model| | ||
| 71 | + | ||
| 65 | ## Streaming speech-to-text from files | 72 | ## Streaming speech-to-text from files |
| 66 | 73 | ||
| 67 | |File| Description| | 74 | |File| Description| |
| @@ -325,3 +332,17 @@ rm sherpa-onnx-punct-ct-transformer-zh-en-vocab272727-2024-04-12.tar.bz2 | @@ -325,3 +332,17 @@ rm sherpa-onnx-punct-ct-transformer-zh-en-vocab272727-2024-04-12.tar.bz2 | ||
| 325 | 332 | ||
| 326 | node ./test_punctuation.js | 333 | node ./test_punctuation.js |
| 327 | ``` | 334 | ``` |
| 335 | + | ||
| 336 | +## Keyword spotting | ||
| 337 | + | ||
| 338 | +```bash | ||
| 339 | +wget https://github.com/k2-fsa/sherpa-onnx/releases/download/kws-models/sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01.tar.bz2 | ||
| 340 | +tar xvf sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01.tar.bz2 | ||
| 341 | +rm sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01.tar.bz2 | ||
| 342 | + | ||
| 343 | +node ./test_keyword_spotter_transducer.js | ||
| 344 | + | ||
| 345 | +# To run keyword spotting using a microphone | ||
| 346 | +npm install naudiodon2 | ||
| 347 | +node ./test_keyword_spotter_transducer_microphone.js | ||
| 348 | +``` |
| @@ -79,11 +79,5 @@ ai.on('data', data => { | @@ -79,11 +79,5 @@ ai.on('data', data => { | ||
| 79 | } | 79 | } |
| 80 | }); | 80 | }); |
| 81 | 81 | ||
| 82 | -ai.on('close', () => { | ||
| 83 | - console.log('Free resources'); | ||
| 84 | - stream.free(); | ||
| 85 | - recognizer.free(); | ||
| 86 | -}); | ||
| 87 | - | ||
| 88 | ai.start(); | 82 | ai.start(); |
| 89 | console.log('Started! Please speak') | 83 | console.log('Started! Please speak') |
| @@ -78,11 +78,6 @@ ai.on('data', data => { | @@ -78,11 +78,6 @@ ai.on('data', data => { | ||
| 78 | } | 78 | } |
| 79 | }); | 79 | }); |
| 80 | 80 | ||
| 81 | -ai.on('close', () => { | ||
| 82 | - console.log('Free resources'); | ||
| 83 | - stream.free(); | ||
| 84 | - recognizer.free(); | ||
| 85 | -}); | ||
| 86 | 81 | ||
| 87 | ai.start(); | 82 | ai.start(); |
| 88 | console.log('Started! Please speak') | 83 | console.log('Started! Please speak') |
| @@ -94,11 +94,5 @@ ai.on('data', data => { | @@ -94,11 +94,5 @@ ai.on('data', data => { | ||
| 94 | } | 94 | } |
| 95 | }); | 95 | }); |
| 96 | 96 | ||
| 97 | -ai.on('close', () => { | ||
| 98 | - console.log('Free resources'); | ||
| 99 | - stream.free(); | ||
| 100 | - recognizer.free(); | ||
| 101 | -}); | ||
| 102 | - | ||
| 103 | ai.start(); | 97 | ai.start(); |
| 104 | console.log('Started! Please speak') | 98 | console.log('Started! Please speak') |
| @@ -82,11 +82,5 @@ ai.on('data', data => { | @@ -82,11 +82,5 @@ ai.on('data', data => { | ||
| 82 | } | 82 | } |
| 83 | }); | 83 | }); |
| 84 | 84 | ||
| 85 | -ai.on('close', () => { | ||
| 86 | - console.log('Free resources'); | ||
| 87 | - stream.free(); | ||
| 88 | - recognizer.free(); | ||
| 89 | -}); | ||
| 90 | - | ||
| 91 | ai.start(); | 85 | ai.start(); |
| 92 | console.log('Started! Please speak') | 86 | console.log('Started! Please speak') |
| 1 | +// Copyright (c) 2024 Xiaomi Corporation | ||
| 2 | +const sherpa_onnx = require('sherpa-onnx-node'); | ||
| 3 | +const performance = require('perf_hooks').performance; | ||
| 4 | + | ||
| 5 | + | ||
| 6 | +// Please download test files from | ||
| 7 | +// https://github.com/k2-fsa/sherpa-onnx/releases/tag/kws-models | ||
| 8 | +const config = { | ||
| 9 | + 'featConfig': { | ||
| 10 | + 'sampleRate': 16000, | ||
| 11 | + 'featureDim': 80, | ||
| 12 | + }, | ||
| 13 | + 'modelConfig': { | ||
| 14 | + 'transducer': { | ||
| 15 | + 'encoder': | ||
| 16 | + './sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/encoder-epoch-12-avg-2-chunk-16-left-64.onnx', | ||
| 17 | + 'decoder': | ||
| 18 | + './sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/decoder-epoch-12-avg-2-chunk-16-left-64.onnx', | ||
| 19 | + 'joiner': | ||
| 20 | + './sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/joiner-epoch-12-avg-2-chunk-16-left-64.onnx', | ||
| 21 | + }, | ||
| 22 | + 'tokens': | ||
| 23 | + './sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/tokens.txt', | ||
| 24 | + 'numThreads': 1, | ||
| 25 | + 'provider': 'cpu', | ||
| 26 | + 'debug': 1, | ||
| 27 | + }, | ||
| 28 | + 'keywordsFile': | ||
| 29 | + './sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/test_wavs/test_keywords.txt', | ||
| 30 | +}; | ||
| 31 | + | ||
| 32 | +const waveFilename = | ||
| 33 | + './sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/test_wavs/3.wav'; | ||
| 34 | + | ||
| 35 | +const kws = new sherpa_onnx.KeywordSpotter(config); | ||
| 36 | +console.log('Started') | ||
| 37 | +let start = performance.now(); | ||
| 38 | +const stream = kws.createStream(); | ||
| 39 | +const wave = sherpa_onnx.readWave(waveFilename); | ||
| 40 | +stream.acceptWaveform({sampleRate: wave.sampleRate, samples: wave.samples}); | ||
| 41 | + | ||
| 42 | +const tailPadding = new Float32Array(wave.sampleRate * 0.4); | ||
| 43 | +stream.acceptWaveform({samples: tailPadding, sampleRate: wave.sampleRate}); | ||
| 44 | + | ||
| 45 | +const detectedKeywords = []; | ||
| 46 | +while (kws.isReady(stream)) { | ||
| 47 | + const keyword = kws.getResult(stream).keyword; | ||
| 48 | + if (keyword != '') { | ||
| 49 | + detectedKeywords.push(keyword); | ||
| 50 | + } | ||
| 51 | + kws.decode(stream); | ||
| 52 | +} | ||
| 53 | +let stop = performance.now(); | ||
| 54 | + | ||
| 55 | +console.log('Done') | ||
| 56 | + | ||
| 57 | +const elapsed_seconds = (stop - start) / 1000; | ||
| 58 | +const duration = wave.samples.length / wave.sampleRate; | ||
| 59 | +const real_time_factor = elapsed_seconds / duration; | ||
| 60 | +console.log('Wave duration', duration.toFixed(3), 'secodns') | ||
| 61 | +console.log('Elapsed', elapsed_seconds.toFixed(3), 'secodns') | ||
| 62 | +console.log( | ||
| 63 | + `RTF = ${elapsed_seconds.toFixed(3)}/${duration.toFixed(3)} =`, | ||
| 64 | + real_time_factor.toFixed(3)) | ||
| 65 | +console.log(waveFilename) | ||
| 66 | +console.log('result\n', detectedKeywords) |
| 1 | +// Copyright (c) 2023-2024 Xiaomi Corporation (authors: Fangjun Kuang) | ||
| 2 | +// | ||
| 3 | +const portAudio = require('naudiodon2'); | ||
| 4 | +// console.log(portAudio.getDevices()); | ||
| 5 | + | ||
| 6 | +const sherpa_onnx = require('sherpa-onnx-node'); | ||
| 7 | + | ||
| 8 | +function createKeywordSpotter() { | ||
| 9 | + const config = { | ||
| 10 | + 'featConfig': { | ||
| 11 | + 'sampleRate': 16000, | ||
| 12 | + 'featureDim': 80, | ||
| 13 | + }, | ||
| 14 | + 'modelConfig': { | ||
| 15 | + 'transducer': { | ||
| 16 | + 'encoder': | ||
| 17 | + './sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/encoder-epoch-12-avg-2-chunk-16-left-64.onnx', | ||
| 18 | + 'decoder': | ||
| 19 | + './sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/decoder-epoch-12-avg-2-chunk-16-left-64.onnx', | ||
| 20 | + 'joiner': | ||
| 21 | + './sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/joiner-epoch-12-avg-2-chunk-16-left-64.onnx', | ||
| 22 | + }, | ||
| 23 | + 'tokens': | ||
| 24 | + './sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/tokens.txt', | ||
| 25 | + 'numThreads': 2, | ||
| 26 | + 'provider': 'cpu', | ||
| 27 | + 'debug': 1, | ||
| 28 | + }, | ||
| 29 | + 'keywordsFile': | ||
| 30 | + './sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/keywords.txt', | ||
| 31 | + }; | ||
| 32 | + | ||
| 33 | + return new sherpa_onnx.KeywordSpotter(config); | ||
| 34 | +} | ||
| 35 | + | ||
| 36 | +const kws = createKeywordSpotter(); | ||
| 37 | +const stream = kws.createStream(); | ||
| 38 | + | ||
| 39 | +let lastText = ''; | ||
| 40 | +let segmentIndex = 0; | ||
| 41 | + | ||
| 42 | +const ai = new portAudio.AudioIO({ | ||
| 43 | + inOptions: { | ||
| 44 | + channelCount: 1, | ||
| 45 | + closeOnError: true, // Close the stream if an audio error is detected, if | ||
| 46 | + // set false then just log the error | ||
| 47 | + deviceId: -1, // Use -1 or omit the deviceId to select the default device | ||
| 48 | + sampleFormat: portAudio.SampleFormatFloat32, | ||
| 49 | + sampleRate: kws.config.featConfig.sampleRate | ||
| 50 | + } | ||
| 51 | +}); | ||
| 52 | + | ||
| 53 | +const display = new sherpa_onnx.Display(50); | ||
| 54 | + | ||
| 55 | +ai.on('data', data => { | ||
| 56 | + const samples = new Float32Array(data.buffer); | ||
| 57 | + | ||
| 58 | + stream.acceptWaveform( | ||
| 59 | + {sampleRate: kws.config.featConfig.sampleRate, samples: samples}); | ||
| 60 | + | ||
| 61 | + while (kws.isReady(stream)) { | ||
| 62 | + kws.decode(stream); | ||
| 63 | + } | ||
| 64 | + | ||
| 65 | + const keyword = kws.getResult(stream).keyword | ||
| 66 | + if (keyword != '') { | ||
| 67 | + display.print(segmentIndex, keyword); | ||
| 68 | + segmentIndex += 1; | ||
| 69 | + } | ||
| 70 | +}); | ||
| 71 | + | ||
| 72 | +ai.start(); | ||
| 73 | +console.log('Started! Please speak.') | ||
| 74 | +console.log(`Only words from ${kws.config.keywordsFile} can be recognized`) |
| @@ -19,6 +19,7 @@ include_directories(${CMAKE_JS_INC}) | @@ -19,6 +19,7 @@ include_directories(${CMAKE_JS_INC}) | ||
| 19 | 19 | ||
| 20 | set(srcs | 20 | set(srcs |
| 21 | src/audio-tagging.cc | 21 | src/audio-tagging.cc |
| 22 | + src/keyword-spotting.cc | ||
| 22 | src/non-streaming-asr.cc | 23 | src/non-streaming-asr.cc |
| 23 | src/non-streaming-tts.cc | 24 | src/non-streaming-tts.cc |
| 24 | src/punctuation.cc | 25 | src/punctuation.cc |
| 1 | +const addon = require('./addon.js'); | ||
| 2 | +const streaming_asr = require('./streaming-asr.js'); | ||
| 3 | + | ||
| 4 | +class KeywordSpotter { | ||
| 5 | + constructor(config) { | ||
| 6 | + this.handle = addon.createKeywordSpotter(config); | ||
| 7 | + this.config = config | ||
| 8 | + } | ||
| 9 | + | ||
| 10 | + createStream() { | ||
| 11 | + const handle = addon.createKeywordStream(this.handle); | ||
| 12 | + return new streaming_asr.OnlineStream(handle); | ||
| 13 | + } | ||
| 14 | + | ||
| 15 | + isReady(stream) { | ||
| 16 | + return addon.isKeywordStreamReady(this.handle, stream.handle); | ||
| 17 | + } | ||
| 18 | + | ||
| 19 | + decode(stream) { | ||
| 20 | + addon.decodeKeywordStream(this.handle, stream.handle); | ||
| 21 | + } | ||
| 22 | + | ||
| 23 | + getResult(stream) { | ||
| 24 | + const jsonStr = addon.getKeywordResultAsJson(this.handle, stream.handle); | ||
| 25 | + | ||
| 26 | + return JSON.parse(jsonStr); | ||
| 27 | + } | ||
| 28 | +} | ||
| 29 | + | ||
| 30 | +module.exports = { | ||
| 31 | + KeywordSpotter, | ||
| 32 | +} |
| @@ -7,6 +7,7 @@ const slid = require('./spoken-language-identification.js'); | @@ -7,6 +7,7 @@ const slid = require('./spoken-language-identification.js'); | ||
| 7 | const sid = require('./speaker-identification.js'); | 7 | const sid = require('./speaker-identification.js'); |
| 8 | const at = require('./audio-tagg.js'); | 8 | const at = require('./audio-tagg.js'); |
| 9 | const punct = require('./punctuation.js'); | 9 | const punct = require('./punctuation.js'); |
| 10 | +const kws = require('./keyword-spotter.js'); | ||
| 10 | 11 | ||
| 11 | module.exports = { | 12 | module.exports = { |
| 12 | OnlineRecognizer: streaming_asr.OnlineRecognizer, | 13 | OnlineRecognizer: streaming_asr.OnlineRecognizer, |
| @@ -22,4 +23,5 @@ module.exports = { | @@ -22,4 +23,5 @@ module.exports = { | ||
| 22 | SpeakerEmbeddingManager: sid.SpeakerEmbeddingManager, | 23 | SpeakerEmbeddingManager: sid.SpeakerEmbeddingManager, |
| 23 | AudioTagging: at.AudioTagging, | 24 | AudioTagging: at.AudioTagging, |
| 24 | Punctuation: punct.Punctuation, | 25 | Punctuation: punct.Punctuation, |
| 26 | + KeywordSpotter: kws.KeywordSpotter, | ||
| 25 | } | 27 | } |
| 1 | +// scripts/node-addon-api/src/keyword-spotting.cc | ||
| 2 | +// | ||
| 3 | +// Copyright (c) 2024 Xiaomi Corporation | ||
| 4 | +#include <sstream> | ||
| 5 | + | ||
| 6 | +#include "macros.h" // NOLINT | ||
| 7 | +#include "napi.h" // NOLINT | ||
| 8 | +#include "sherpa-onnx/c-api/c-api.h" | ||
| 9 | + | ||
| 10 | +// defined ./streaming-asr.cc | ||
| 11 | +SherpaOnnxFeatureConfig GetFeatureConfig(Napi::Object obj); | ||
| 12 | + | ||
| 13 | +// defined ./streaming-asr.cc | ||
| 14 | +SherpaOnnxOnlineModelConfig GetOnlineModelConfig(Napi::Object obj); | ||
| 15 | + | ||
| 16 | +static Napi::External<SherpaOnnxKeywordSpotter> CreateKeywordSpotterWrapper( | ||
| 17 | + const Napi::CallbackInfo &info) { | ||
| 18 | + Napi::Env env = info.Env(); | ||
| 19 | + if (info.Length() != 1) { | ||
| 20 | + std::ostringstream os; | ||
| 21 | + os << "Expect only 1 argument. Given: " << info.Length(); | ||
| 22 | + | ||
| 23 | + Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException(); | ||
| 24 | + | ||
| 25 | + return {}; | ||
| 26 | + } | ||
| 27 | + | ||
| 28 | + if (!info[0].IsObject()) { | ||
| 29 | + Napi::TypeError::New(env, "Expect an object as the argument") | ||
| 30 | + .ThrowAsJavaScriptException(); | ||
| 31 | + | ||
| 32 | + return {}; | ||
| 33 | + } | ||
| 34 | + | ||
| 35 | + Napi::Object o = info[0].As<Napi::Object>(); | ||
| 36 | + SherpaOnnxKeywordSpotterConfig c; | ||
| 37 | + memset(&c, 0, sizeof(c)); | ||
| 38 | + c.feat_config = GetFeatureConfig(o); | ||
| 39 | + c.model_config = GetOnlineModelConfig(o); | ||
| 40 | + | ||
| 41 | + SHERPA_ONNX_ASSIGN_ATTR_INT32(max_active_paths, maxActivePaths); | ||
| 42 | + SHERPA_ONNX_ASSIGN_ATTR_INT32(num_trailing_blanks, numTrailingBlanks); | ||
| 43 | + SHERPA_ONNX_ASSIGN_ATTR_FLOAT(keywords_score, keywordsScore); | ||
| 44 | + SHERPA_ONNX_ASSIGN_ATTR_FLOAT(keywords_threshold, keywordsThreshold); | ||
| 45 | + SHERPA_ONNX_ASSIGN_ATTR_STR(keywords_file, keywordsFile); | ||
| 46 | + | ||
| 47 | + SherpaOnnxKeywordSpotter *kws = CreateKeywordSpotter(&c); | ||
| 48 | + | ||
| 49 | + if (c.model_config.transducer.encoder) { | ||
| 50 | + delete[] c.model_config.transducer.encoder; | ||
| 51 | + } | ||
| 52 | + | ||
| 53 | + if (c.model_config.transducer.decoder) { | ||
| 54 | + delete[] c.model_config.transducer.decoder; | ||
| 55 | + } | ||
| 56 | + | ||
| 57 | + if (c.model_config.transducer.joiner) { | ||
| 58 | + delete[] c.model_config.transducer.joiner; | ||
| 59 | + } | ||
| 60 | + | ||
| 61 | + if (c.model_config.paraformer.encoder) { | ||
| 62 | + delete[] c.model_config.paraformer.encoder; | ||
| 63 | + } | ||
| 64 | + | ||
| 65 | + if (c.model_config.paraformer.decoder) { | ||
| 66 | + delete[] c.model_config.paraformer.decoder; | ||
| 67 | + } | ||
| 68 | + | ||
| 69 | + if (c.model_config.zipformer2_ctc.model) { | ||
| 70 | + delete[] c.model_config.zipformer2_ctc.model; | ||
| 71 | + } | ||
| 72 | + | ||
| 73 | + if (c.model_config.tokens) { | ||
| 74 | + delete[] c.model_config.tokens; | ||
| 75 | + } | ||
| 76 | + | ||
| 77 | + if (c.model_config.provider) { | ||
| 78 | + delete[] c.model_config.provider; | ||
| 79 | + } | ||
| 80 | + | ||
| 81 | + if (c.model_config.model_type) { | ||
| 82 | + delete[] c.model_config.model_type; | ||
| 83 | + } | ||
| 84 | + | ||
| 85 | + if (c.keywords_file) { | ||
| 86 | + delete[] c.keywords_file; | ||
| 87 | + } | ||
| 88 | + | ||
| 89 | + if (!kws) { | ||
| 90 | + Napi::TypeError::New(env, "Please check your config!") | ||
| 91 | + .ThrowAsJavaScriptException(); | ||
| 92 | + | ||
| 93 | + return {}; | ||
| 94 | + } | ||
| 95 | + | ||
| 96 | + return Napi::External<SherpaOnnxKeywordSpotter>::New( | ||
| 97 | + env, kws, [](Napi::Env env, SherpaOnnxKeywordSpotter *kws) { | ||
| 98 | + DestroyKeywordSpotter(kws); | ||
| 99 | + }); | ||
| 100 | +} | ||
| 101 | + | ||
| 102 | +static Napi::External<SherpaOnnxOnlineStream> CreateKeywordStreamWrapper( | ||
| 103 | + const Napi::CallbackInfo &info) { | ||
| 104 | + Napi::Env env = info.Env(); | ||
| 105 | + if (info.Length() != 1) { | ||
| 106 | + std::ostringstream os; | ||
| 107 | + os << "Expect only 1 argument. Given: " << info.Length(); | ||
| 108 | + | ||
| 109 | + Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException(); | ||
| 110 | + | ||
| 111 | + return {}; | ||
| 112 | + } | ||
| 113 | + | ||
| 114 | + if (!info[0].IsExternal()) { | ||
| 115 | + Napi::TypeError::New( | ||
| 116 | + env, "You should pass a keyword spotter pointer as the only argument") | ||
| 117 | + .ThrowAsJavaScriptException(); | ||
| 118 | + | ||
| 119 | + return {}; | ||
| 120 | + } | ||
| 121 | + | ||
| 122 | + SherpaOnnxKeywordSpotter *kws = | ||
| 123 | + info[0].As<Napi::External<SherpaOnnxKeywordSpotter>>().Data(); | ||
| 124 | + | ||
| 125 | + SherpaOnnxOnlineStream *stream = CreateKeywordStream(kws); | ||
| 126 | + | ||
| 127 | + return Napi::External<SherpaOnnxOnlineStream>::New( | ||
| 128 | + env, stream, [](Napi::Env env, SherpaOnnxOnlineStream *stream) { | ||
| 129 | + DestroyOnlineStream(stream); | ||
| 130 | + }); | ||
| 131 | +} | ||
| 132 | + | ||
| 133 | +static Napi::Boolean IsKeywordStreamReadyWrapper( | ||
| 134 | + const Napi::CallbackInfo &info) { | ||
| 135 | + Napi::Env env = info.Env(); | ||
| 136 | + if (info.Length() != 2) { | ||
| 137 | + std::ostringstream os; | ||
| 138 | + os << "Expect only 2 arguments. Given: " << info.Length(); | ||
| 139 | + | ||
| 140 | + Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException(); | ||
| 141 | + | ||
| 142 | + return {}; | ||
| 143 | + } | ||
| 144 | + | ||
| 145 | + if (!info[0].IsExternal()) { | ||
| 146 | + Napi::TypeError::New(env, "Argument 0 should be a keyword spotter pointer.") | ||
| 147 | + .ThrowAsJavaScriptException(); | ||
| 148 | + | ||
| 149 | + return {}; | ||
| 150 | + } | ||
| 151 | + | ||
| 152 | + if (!info[1].IsExternal()) { | ||
| 153 | + Napi::TypeError::New(env, "Argument 1 should be an online stream pointer.") | ||
| 154 | + .ThrowAsJavaScriptException(); | ||
| 155 | + | ||
| 156 | + return {}; | ||
| 157 | + } | ||
| 158 | + | ||
| 159 | + SherpaOnnxKeywordSpotter *kws = | ||
| 160 | + info[0].As<Napi::External<SherpaOnnxKeywordSpotter>>().Data(); | ||
| 161 | + | ||
| 162 | + SherpaOnnxOnlineStream *stream = | ||
| 163 | + info[1].As<Napi::External<SherpaOnnxOnlineStream>>().Data(); | ||
| 164 | + | ||
| 165 | + int32_t is_ready = IsKeywordStreamReady(kws, stream); | ||
| 166 | + | ||
| 167 | + return Napi::Boolean::New(env, is_ready); | ||
| 168 | +} | ||
| 169 | + | ||
| 170 | +static void DecodeKeywordStreamWrapper(const Napi::CallbackInfo &info) { | ||
| 171 | + Napi::Env env = info.Env(); | ||
| 172 | + if (info.Length() != 2) { | ||
| 173 | + std::ostringstream os; | ||
| 174 | + os << "Expect only 2 arguments. Given: " << info.Length(); | ||
| 175 | + | ||
| 176 | + Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException(); | ||
| 177 | + | ||
| 178 | + return; | ||
| 179 | + } | ||
| 180 | + | ||
| 181 | + if (!info[0].IsExternal()) { | ||
| 182 | + Napi::TypeError::New(env, "Argument 0 should be a keyword spotter pointer.") | ||
| 183 | + .ThrowAsJavaScriptException(); | ||
| 184 | + | ||
| 185 | + return; | ||
| 186 | + } | ||
| 187 | + | ||
| 188 | + if (!info[1].IsExternal()) { | ||
| 189 | + Napi::TypeError::New(env, "Argument 1 should be an online stream pointer.") | ||
| 190 | + .ThrowAsJavaScriptException(); | ||
| 191 | + | ||
| 192 | + return; | ||
| 193 | + } | ||
| 194 | + | ||
| 195 | + SherpaOnnxKeywordSpotter *kws = | ||
| 196 | + info[0].As<Napi::External<SherpaOnnxKeywordSpotter>>().Data(); | ||
| 197 | + | ||
| 198 | + SherpaOnnxOnlineStream *stream = | ||
| 199 | + info[1].As<Napi::External<SherpaOnnxOnlineStream>>().Data(); | ||
| 200 | + | ||
| 201 | + DecodeKeywordStream(kws, stream); | ||
| 202 | +} | ||
| 203 | + | ||
| 204 | +static Napi::String GetKeywordResultAsJsonWrapper( | ||
| 205 | + const Napi::CallbackInfo &info) { | ||
| 206 | + Napi::Env env = info.Env(); | ||
| 207 | + if (info.Length() != 2) { | ||
| 208 | + std::ostringstream os; | ||
| 209 | + os << "Expect only 2 arguments. Given: " << info.Length(); | ||
| 210 | + | ||
| 211 | + Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException(); | ||
| 212 | + | ||
| 213 | + return {}; | ||
| 214 | + } | ||
| 215 | + | ||
| 216 | + if (!info[0].IsExternal()) { | ||
| 217 | + Napi::TypeError::New(env, "Argument 0 should be a keyword spotter pointer.") | ||
| 218 | + .ThrowAsJavaScriptException(); | ||
| 219 | + | ||
| 220 | + return {}; | ||
| 221 | + } | ||
| 222 | + | ||
| 223 | + if (!info[1].IsExternal()) { | ||
| 224 | + Napi::TypeError::New(env, "Argument 1 should be an online stream pointer.") | ||
| 225 | + .ThrowAsJavaScriptException(); | ||
| 226 | + | ||
| 227 | + return {}; | ||
| 228 | + } | ||
| 229 | + | ||
| 230 | + SherpaOnnxKeywordSpotter *kws = | ||
| 231 | + info[0].As<Napi::External<SherpaOnnxKeywordSpotter>>().Data(); | ||
| 232 | + | ||
| 233 | + SherpaOnnxOnlineStream *stream = | ||
| 234 | + info[1].As<Napi::External<SherpaOnnxOnlineStream>>().Data(); | ||
| 235 | + | ||
| 236 | + const char *json = GetKeywordResultAsJson(kws, stream); | ||
| 237 | + | ||
| 238 | + Napi::String s = Napi::String::New(env, json); | ||
| 239 | + | ||
| 240 | + FreeKeywordResultJson(json); | ||
| 241 | + | ||
| 242 | + return s; | ||
| 243 | +} | ||
| 244 | + | ||
| 245 | +void InitKeywordSpotting(Napi::Env env, Napi::Object exports) { | ||
| 246 | + exports.Set(Napi::String::New(env, "createKeywordSpotter"), | ||
| 247 | + Napi::Function::New(env, CreateKeywordSpotterWrapper)); | ||
| 248 | + | ||
| 249 | + exports.Set(Napi::String::New(env, "createKeywordStream"), | ||
| 250 | + Napi::Function::New(env, CreateKeywordStreamWrapper)); | ||
| 251 | + | ||
| 252 | + exports.Set(Napi::String::New(env, "isKeywordStreamReady"), | ||
| 253 | + Napi::Function::New(env, IsKeywordStreamReadyWrapper)); | ||
| 254 | + | ||
| 255 | + exports.Set(Napi::String::New(env, "decodeKeywordStream"), | ||
| 256 | + Napi::Function::New(env, DecodeKeywordStreamWrapper)); | ||
| 257 | + | ||
| 258 | + exports.Set(Napi::String::New(env, "getKeywordResultAsJson"), | ||
| 259 | + Napi::Function::New(env, GetKeywordResultAsJsonWrapper)); | ||
| 260 | +} |
| @@ -23,6 +23,8 @@ void InitAudioTagging(Napi::Env env, Napi::Object exports); | @@ -23,6 +23,8 @@ void InitAudioTagging(Napi::Env env, Napi::Object exports); | ||
| 23 | 23 | ||
| 24 | void InitPunctuation(Napi::Env env, Napi::Object exports); | 24 | void InitPunctuation(Napi::Env env, Napi::Object exports); |
| 25 | 25 | ||
| 26 | +void InitKeywordSpotting(Napi::Env env, Napi::Object exports); | ||
| 27 | + | ||
| 26 | Napi::Object Init(Napi::Env env, Napi::Object exports) { | 28 | Napi::Object Init(Napi::Env env, Napi::Object exports) { |
| 27 | InitStreamingAsr(env, exports); | 29 | InitStreamingAsr(env, exports); |
| 28 | InitNonStreamingAsr(env, exports); | 30 | InitNonStreamingAsr(env, exports); |
| @@ -34,6 +36,7 @@ Napi::Object Init(Napi::Env env, Napi::Object exports) { | @@ -34,6 +36,7 @@ Napi::Object Init(Napi::Env env, Napi::Object exports) { | ||
| 34 | InitSpeakerID(env, exports); | 36 | InitSpeakerID(env, exports); |
| 35 | InitAudioTagging(env, exports); | 37 | InitAudioTagging(env, exports); |
| 36 | InitPunctuation(env, exports); | 38 | InitPunctuation(env, exports); |
| 39 | + InitKeywordSpotting(env, exports); | ||
| 37 | 40 | ||
| 38 | return exports; | 41 | return exports; |
| 39 | } | 42 | } |
| @@ -90,7 +90,7 @@ static SherpaOnnxOnlineParaformerModelConfig GetOnlineParaformerModelConfig( | @@ -90,7 +90,7 @@ static SherpaOnnxOnlineParaformerModelConfig GetOnlineParaformerModelConfig( | ||
| 90 | return c; | 90 | return c; |
| 91 | } | 91 | } |
| 92 | 92 | ||
| 93 | -static SherpaOnnxOnlineModelConfig GetOnlineModelConfig(Napi::Object obj) { | 93 | +SherpaOnnxOnlineModelConfig GetOnlineModelConfig(Napi::Object obj) { |
| 94 | SherpaOnnxOnlineModelConfig c; | 94 | SherpaOnnxOnlineModelConfig c; |
| 95 | memset(&c, 0, sizeof(c)); | 95 | memset(&c, 0, sizeof(c)); |
| 96 | 96 |
| @@ -653,6 +653,20 @@ void DestroyKeywordResult(const SherpaOnnxKeywordResult *r) { | @@ -653,6 +653,20 @@ void DestroyKeywordResult(const SherpaOnnxKeywordResult *r) { | ||
| 653 | } | 653 | } |
| 654 | } | 654 | } |
| 655 | 655 | ||
| 656 | +const char *GetKeywordResultAsJson(SherpaOnnxKeywordSpotter *spotter, | ||
| 657 | + SherpaOnnxOnlineStream *stream) { | ||
| 658 | + const sherpa_onnx::KeywordResult &result = | ||
| 659 | + spotter->impl->GetResult(stream->impl.get()); | ||
| 660 | + | ||
| 661 | + std::string json = result.AsJsonString(); | ||
| 662 | + char *pJson = new char[json.size() + 1]; | ||
| 663 | + std::copy(json.begin(), json.end(), pJson); | ||
| 664 | + pJson[json.size()] = 0; | ||
| 665 | + return pJson; | ||
| 666 | +} | ||
| 667 | + | ||
| 668 | +void FreeKeywordResultJson(const char *s) { delete[] s; } | ||
| 669 | + | ||
| 656 | // ============================================================ | 670 | // ============================================================ |
| 657 | // For VAD | 671 | // For VAD |
| 658 | // ============================================================ | 672 | // ============================================================ |
| @@ -625,6 +625,13 @@ SHERPA_ONNX_API const SherpaOnnxKeywordResult *GetKeywordResult( | @@ -625,6 +625,13 @@ SHERPA_ONNX_API const SherpaOnnxKeywordResult *GetKeywordResult( | ||
| 625 | /// @param r A pointer returned by GetKeywordResult() | 625 | /// @param r A pointer returned by GetKeywordResult() |
| 626 | SHERPA_ONNX_API void DestroyKeywordResult(const SherpaOnnxKeywordResult *r); | 626 | SHERPA_ONNX_API void DestroyKeywordResult(const SherpaOnnxKeywordResult *r); |
| 627 | 627 | ||
| 628 | +// the user has to call FreeKeywordResultJson() to free the returned pointer | ||
| 629 | +// to avoid memory leak | ||
| 630 | +SHERPA_ONNX_API const char *GetKeywordResultAsJson( | ||
| 631 | + SherpaOnnxKeywordSpotter *spotter, SherpaOnnxOnlineStream *stream); | ||
| 632 | + | ||
| 633 | +SHERPA_ONNX_API void FreeKeywordResultJson(const char *s); | ||
| 634 | + | ||
| 628 | // ============================================================ | 635 | // ============================================================ |
| 629 | // For VAD | 636 | // For VAD |
| 630 | // ============================================================ | 637 | // ============================================================ |
-
请 注册 或 登录 后发表评论