Fangjun Kuang
Committed by GitHub

Add streaming CTC ASR APIs for node-addon-api (#867)

@@ -5,15 +5,6 @@ set -ex @@ -5,15 +5,6 @@ set -ex
5 d=nodejs-addon-examples 5 d=nodejs-addon-examples
6 echo "dir: $d" 6 echo "dir: $d"
7 cd $d 7 cd $d
8 -npm install --verbose  
9 -git status  
10 -ls -lh  
11 -ls -lh node_modules  
12 -  
13 -export DYLD_LIBRARY_PATH=$PWD/node_modules/sherpa-onnx-darwin-x64:$DYLD_LIBRARY_PATH  
14 -export DYLD_LIBRARY_PATH=$PWD/node_modules/sherpa-onnx-darwin-arm64:$DYLD_LIBRARY_PATH  
15 -export LD_LIBRARY_PATH=$PWD/node_modules/sherpa-onnx-linux-x64:$LD_LIBRARY_PATH  
16 -export LD_LIBRARY_PATH=$PWD/node_modules/sherpa-onnx-linux-arm64:$LD_LIBRARY_PATH  
17 8
18 curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2 9 curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2
19 tar xvf sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2 10 tar xvf sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2
@@ -22,3 +13,14 @@ rm sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2 @@ -22,3 +13,14 @@ rm sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2
22 node test_asr_streaming_transducer.js 13 node test_asr_streaming_transducer.js
23 14
24 rm -rf sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20 15 rm -rf sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20
  16 +
  17 +curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18.tar.bz2
  18 +tar xvf sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18.tar.bz2
  19 +rm sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18.tar.bz2
  20 +
  21 +node ./test_asr_streaming_ctc.js
  22 +
  23 +# To decode with HLG.fst
  24 +node ./test_asr_streaming_ctc_hlg.js
  25 +
  26 +rm -rf sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18
@@ -152,17 +152,23 @@ jobs: @@ -152,17 +152,23 @@ jobs:
152 152
153 ./node_modules/.bin/cmake-js compile --log-level verbose 153 ./node_modules/.bin/cmake-js compile --log-level verbose
154 154
155 - - name: Test streaming transducer 155 + - name: Run tests
156 shell: bash 156 shell: bash
157 run: | 157 run: |
158 export PATH=$PWD/build/install/lib:$PATH 158 export PATH=$PWD/build/install/lib:$PATH
159 export LD_LIBRARY_PATH=$PWD/build/install/lib:$LD_LIBRARY_PATH 159 export LD_LIBRARY_PATH=$PWD/build/install/lib:$LD_LIBRARY_PATH
160 -  
161 - cd scripts/node-addon-api  
162 - curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2  
163 - tar xvf sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2  
164 - rm sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2  
165 -  
166 - node test/test_asr_streaming_transducer.js  
167 -  
168 - rm -rf sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20 160 + d=nodejs-addon-examples
  161 + cd $d
  162 + files=$(ls *.js)
  163 + echo $files
  164 + for f in ${files[@]}; do
  165 + echo $f
  166 + sed -i.bak s%sherpa-onnx-node%./sherpa-onnx% ./$f
  167 + done
  168 + cd ..
  169 +
  170 + cp -v scripts/node-addon-api/build/Release/sherpa-onnx.node $d/
  171 + cp -v scripts/node-addon-api/lib/*.js $d/
  172 + cp -v ./build/install/lib/lib* $d/
  173 +
  174 + .github/scripts/test-nodejs-addon-npm.sh
@@ -63,4 +63,19 @@ jobs: @@ -63,4 +63,19 @@ jobs:
63 - name: Run tests 63 - name: Run tests
64 shell: bash 64 shell: bash
65 run: | 65 run: |
  66 + d=nodejs-addon-examples
  67 + echo "dir: $d"
  68 + cd $d
  69 + npm install --verbose
  70 + git status
  71 + ls -lh
  72 + ls -lh node_modules
  73 +
  74 + export DYLD_LIBRARY_PATH=$PWD/node_modules/sherpa-onnx-darwin-x64:$DYLD_LIBRARY_PATH
  75 + export DYLD_LIBRARY_PATH=$PWD/node_modules/sherpa-onnx-darwin-arm64:$DYLD_LIBRARY_PATH
  76 + export LD_LIBRARY_PATH=$PWD/node_modules/sherpa-onnx-linux-x64:$LD_LIBRARY_PATH
  77 + export LD_LIBRARY_PATH=$PWD/node_modules/sherpa-onnx-linux-arm64:$LD_LIBRARY_PATH
  78 +
  79 + cd ../
  80 +
66 .github/scripts/test-nodejs-addon-npm.sh 81 .github/scripts/test-nodejs-addon-npm.sh
@@ -27,6 +27,18 @@ export LD_LIBRARY_PATH=$PWD/node_modules/sherpa-onnx-linux-x64:$LD_LIBRARY_PATH @@ -27,6 +27,18 @@ export LD_LIBRARY_PATH=$PWD/node_modules/sherpa-onnx-linux-x64:$LD_LIBRARY_PATH
27 export LD_LIBRARY_PATH=$PWD/node_modules/sherpa-onnx-linux-arm64:$LD_LIBRARY_PATH 27 export LD_LIBRARY_PATH=$PWD/node_modules/sherpa-onnx-linux-arm64:$LD_LIBRARY_PATH
28 ``` 28 ```
29 29
  30 +# Voice Activity detection (VAD)
  31 +
  32 +```bash
  33 +wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx
  34 +
  35 +
  36 +# To run the test with a microphone, you need to install the package naudiodon2
  37 +npm install naudiodon2
  38 +
  39 +node ./test_vad_microphone.js
  40 +```
  41 +
30 ## Streaming speech recognition with zipformer transducer 42 ## Streaming speech recognition with zipformer transducer
31 43
32 ```bash 44 ```bash
@@ -36,21 +48,27 @@ rm sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2 @@ -36,21 +48,27 @@ rm sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2
36 48
37 node ./test_asr_streaming_transducer.js 49 node ./test_asr_streaming_transducer.js
38 50
39 -# To run the test with microphone, you need to install the package naudiodon2 51 +# To run the test with a microphone, you need to install the package naudiodon2
40 npm install naudiodon2 52 npm install naudiodon2
41 53
42 node ./test_asr_streaming_transducer_microphone.js 54 node ./test_asr_streaming_transducer_microphone.js
43 ``` 55 ```
44 56
45 -# VAD 57 +## Streaming speech recognition with zipformer CTC
46 58
47 ```bash 59 ```bash
48 -wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx 60 +wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18.tar.bz2
  61 +tar xvf sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18.tar.bz2
  62 +rm sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18.tar.bz2
49 63
  64 +node ./test_asr_streaming_ctc.js
50 65
51 -# To run the test with microphone, you need to install the package naudiodon2 66 +# To decode with HLG.fst
  67 +node ./test_asr_streaming_ctc_hlg.js
  68 +
  69 +# To run the test with a microphone, you need to install the package naudiodon2
52 npm install naudiodon2 70 npm install naudiodon2
53 71
54 -node ./test_vad_microphone.js 72 +node ./test_asr_streaming_ctc_microphone.js
  73 +node ./test_asr_streaming_ctc_hlg_microphone.js
55 ``` 74 ```
56 -  
  1 +// Copyright (c) 2024 Xiaomi Corporation
  2 +const sherpa_onnx = require('sherpa-onnx-node');
  3 +const performance = require('perf_hooks').performance;
  4 +
  5 +
  6 +// Please download test files from
  7 +// https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
  8 +const config = {
  9 + 'featConfig': {
  10 + 'sampleRate': 16000,
  11 + 'featureDim': 80,
  12 + },
  13 + 'modelConfig': {
  14 + 'zipformer2Ctc': {
  15 + 'model':
  16 + './sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18/ctc-epoch-30-avg-3-chunk-16-left-128.int8.onnx',
  17 + },
  18 + 'tokens':
  19 + './sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18/tokens.txt',
  20 + 'numThreads': 2,
  21 + 'provider': 'cpu',
  22 + 'debug': 1,
  23 + }
  24 +};
  25 +
  26 +const waveFilename =
  27 + './sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18/test_wavs/0.wav';
  28 +
  29 +const recognizer = new sherpa_onnx.OnlineRecognizer(config);
  30 +console.log('Started')
  31 +let start = performance.now();
  32 +const stream = recognizer.createStream();
  33 +const wave = sherpa_onnx.readWave(waveFilename);
  34 +stream.acceptWaveform({sampleRate: wave.sampleRate, samples: wave.samples});
  35 +
  36 +const tailPadding = new Float32Array(wave.sampleRate * 0.4);
  37 +stream.acceptWaveform({samples: tailPadding, sampleRate: wave.sampleRate});
  38 +
  39 +while (recognizer.isReady(stream)) {
  40 + recognizer.decode(stream);
  41 +}
  42 +result = recognizer.getResult(stream)
  43 +let stop = performance.now();
  44 +console.log('Done')
  45 +
  46 +const elapsed_seconds = (stop - start) / 1000;
  47 +const duration = wave.samples.length / wave.sampleRate;
  48 +const real_time_factor = elapsed_seconds / duration;
  49 +console.log('Wave duration', duration.toFixed(3), 'secodns')
  50 +console.log('Elapsed', elapsed_seconds.toFixed(3), 'secodns')
  51 +console.log(
  52 + `RTF = ${elapsed_seconds.toFixed(3)}/${duration.toFixed(3)} =`,
  53 + real_time_factor.toFixed(3))
  54 +console.log(waveFilename)
  55 +console.log('result\n', result)
  1 +// Copyright (c) 2024 Xiaomi Corporation
  2 +const sherpa_onnx = require('sherpa-onnx-node');
  3 +const performance = require('perf_hooks').performance;
  4 +
  5 +
  6 +// Please download test files from
  7 +// https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
  8 +const config = {
  9 + 'featConfig': {
  10 + 'sampleRate': 16000,
  11 + 'featureDim': 80,
  12 + },
  13 + 'modelConfig': {
  14 + 'zipformer2Ctc': {
  15 + 'model':
  16 + './sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18/ctc-epoch-30-avg-3-chunk-16-left-128.int8.onnx',
  17 + },
  18 + 'tokens':
  19 + './sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18/tokens.txt',
  20 + 'numThreads': 2,
  21 + 'provider': 'cpu',
  22 + 'debug': 1,
  23 + },
  24 + 'ctcFstDecoderConfig': {
  25 + 'graph': './sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18/HLG.fst',
  26 + },
  27 +};
  28 +
  29 +const waveFilename =
  30 + './sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18/test_wavs/1.wav';
  31 +
  32 +const recognizer = new sherpa_onnx.OnlineRecognizer(config);
  33 +console.log('Started')
  34 +let start = performance.now();
  35 +const stream = recognizer.createStream();
  36 +const wave = sherpa_onnx.readWave(waveFilename);
  37 +stream.acceptWaveform({sampleRate: wave.sampleRate, samples: wave.samples});
  38 +
  39 +const tailPadding = new Float32Array(wave.sampleRate * 0.4);
  40 +stream.acceptWaveform({samples: tailPadding, sampleRate: wave.sampleRate});
  41 +
  42 +while (recognizer.isReady(stream)) {
  43 + recognizer.decode(stream);
  44 +}
  45 +result = recognizer.getResult(stream)
  46 +let stop = performance.now();
  47 +console.log('Done')
  48 +
  49 +const elapsed_seconds = (stop - start) / 1000;
  50 +const duration = wave.samples.length / wave.sampleRate;
  51 +const real_time_factor = elapsed_seconds / duration;
  52 +console.log('Wave duration', duration.toFixed(3), 'secodns')
  53 +console.log('Elapsed', elapsed_seconds.toFixed(3), 'secodns')
  54 +console.log(
  55 + `RTF = ${elapsed_seconds.toFixed(3)}/${duration.toFixed(3)} =`,
  56 + real_time_factor.toFixed(3))
  57 +console.log(waveFilename)
  58 +console.log('result\n', result)
  1 +// Copyright (c) 2023-2024 Xiaomi Corporation (authors: Fangjun Kuang)
  2 +//
  3 +const portAudio = require('naudiodon2');
  4 +// console.log(portAudio.getDevices());
  5 +
  6 +const sherpa_onnx = require('sherpa-onnx-node');
  7 +
  8 +function createOnlineRecognizer() {
  9 + const config = {
  10 + 'featConfig': {
  11 + 'sampleRate': 16000,
  12 + 'featureDim': 80,
  13 + },
  14 + 'modelConfig': {
  15 + 'zipformer2Ctc': {
  16 + 'model':
  17 + './sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18/ctc-epoch-30-avg-3-chunk-16-left-128.int8.onnx',
  18 + },
  19 + 'tokens':
  20 + './sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18/tokens.txt',
  21 + 'numThreads': 2,
  22 + 'provider': 'cpu',
  23 + 'debug': 1,
  24 + },
  25 + 'ctcFstDecoderConfig': {
  26 + 'graph': './sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18/HLG.fst',
  27 + },
  28 + 'enableEndpoint': true,
  29 + 'rule1MinTrailingSilence': 2.4,
  30 + 'rule2MinTrailingSilence': 1.2,
  31 + 'rule3MinUtteranceLength': 20
  32 + };
  33 +
  34 + return new sherpa_onnx.OnlineRecognizer(config);
  35 +}
  36 +
  37 +const recognizer = createOnlineRecognizer();
  38 +const stream = recognizer.createStream();
  39 +
  40 +let lastText = '';
  41 +let segmentIndex = 0;
  42 +
  43 +const ai = new portAudio.AudioIO({
  44 + inOptions: {
  45 + channelCount: 1,
  46 + closeOnError: true, // Close the stream if an audio error is detected, if
  47 + // set false then just log the error
  48 + deviceId: -1, // Use -1 or omit the deviceId to select the default device
  49 + sampleFormat: portAudio.SampleFormatFloat32,
  50 + sampleRate: recognizer.config.featConfig.sampleRate
  51 + }
  52 +});
  53 +
  54 +const display = new sherpa_onnx.Display(50);
  55 +
  56 +ai.on('data', data => {
  57 + const samples = new Float32Array(data.buffer);
  58 +
  59 + stream.acceptWaveform(
  60 + {sampleRate: recognizer.config.featConfig.sampleRate, samples: samples});
  61 +
  62 + while (recognizer.isReady(stream)) {
  63 + recognizer.decode(stream);
  64 + }
  65 +
  66 + const isEndpoint = recognizer.isEndpoint(stream);
  67 + const text = recognizer.getResult(stream).text.toLowerCase();
  68 +
  69 + if (text.length > 0 && lastText != text) {
  70 + lastText = text;
  71 + display.print(segmentIndex, lastText);
  72 + }
  73 + if (isEndpoint) {
  74 + if (text.length > 0) {
  75 + lastText = text;
  76 + segmentIndex += 1;
  77 + }
  78 + recognizer.reset(stream)
  79 + }
  80 +});
  81 +
  82 +ai.on('close', () => {
  83 + console.log('Free resources');
  84 + stream.free();
  85 + recognizer.free();
  86 +});
  87 +
  88 +ai.start();
  89 +console.log('Started! Please speak')
  1 +// Copyright (c) 2023-2024 Xiaomi Corporation (authors: Fangjun Kuang)
  2 +//
  3 +const portAudio = require('naudiodon2');
  4 +// console.log(portAudio.getDevices());
  5 +
  6 +const sherpa_onnx = require('sherpa-onnx-node');
  7 +
  8 +function createOnlineRecognizer() {
  9 + const config = {
  10 + 'featConfig': {
  11 + 'sampleRate': 16000,
  12 + 'featureDim': 80,
  13 + },
  14 + 'modelConfig': {
  15 + 'zipformer2Ctc': {
  16 + 'model':
  17 + './sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18/ctc-epoch-30-avg-3-chunk-16-left-128.int8.onnx',
  18 + },
  19 + 'tokens':
  20 + './sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18/tokens.txt',
  21 + 'numThreads': 2,
  22 + 'provider': 'cpu',
  23 + 'debug': 1,
  24 + },
  25 + 'decodingMethod': 'greedy_search',
  26 + 'maxActivePaths': 4,
  27 + 'enableEndpoint': true,
  28 + 'rule1MinTrailingSilence': 2.4,
  29 + 'rule2MinTrailingSilence': 1.2,
  30 + 'rule3MinUtteranceLength': 20
  31 + };
  32 +
  33 + return new sherpa_onnx.OnlineRecognizer(config);
  34 +}
  35 +
  36 +const recognizer = createOnlineRecognizer();
  37 +const stream = recognizer.createStream();
  38 +
  39 +let lastText = '';
  40 +let segmentIndex = 0;
  41 +
  42 +const ai = new portAudio.AudioIO({
  43 + inOptions: {
  44 + channelCount: 1,
  45 + closeOnError: true, // Close the stream if an audio error is detected, if
  46 + // set false then just log the error
  47 + deviceId: -1, // Use -1 or omit the deviceId to select the default device
  48 + sampleFormat: portAudio.SampleFormatFloat32,
  49 + sampleRate: recognizer.config.featConfig.sampleRate
  50 + }
  51 +});
  52 +
  53 +const display = new sherpa_onnx.Display(50);
  54 +
  55 +ai.on('data', data => {
  56 + const samples = new Float32Array(data.buffer);
  57 +
  58 + stream.acceptWaveform(
  59 + {sampleRate: recognizer.config.featConfig.sampleRate, samples: samples});
  60 +
  61 + while (recognizer.isReady(stream)) {
  62 + recognizer.decode(stream);
  63 + }
  64 +
  65 + const isEndpoint = recognizer.isEndpoint(stream);
  66 + const text = recognizer.getResult(stream).text.toLowerCase();
  67 +
  68 + if (text.length > 0 && lastText != text) {
  69 + lastText = text;
  70 + display.print(segmentIndex, lastText);
  71 + }
  72 + if (isEndpoint) {
  73 + if (text.length > 0) {
  74 + lastText = text;
  75 + segmentIndex += 1;
  76 + }
  77 + recognizer.reset(stream)
  78 + }
  79 +});
  80 +
  81 +ai.on('close', () => {
  82 + console.log('Free resources');
  83 + stream.free();
  84 + recognizer.free();
  85 +});
  86 +
  87 +ai.start();
  88 +console.log('Started! Please speak')
@@ -24,7 +24,6 @@ const config = { @@ -24,7 +24,6 @@ const config = {
24 'numThreads': 2, 24 'numThreads': 2,
25 'provider': 'cpu', 25 'provider': 'cpu',
26 'debug': 1, 26 'debug': 1,
27 - 'modelType': 'zipformer',  
28 } 27 }
29 }; 28 };
30 29
@@ -53,5 +52,8 @@ const duration = wave.samples.length / wave.sampleRate; @@ -53,5 +52,8 @@ const duration = wave.samples.length / wave.sampleRate;
53 const real_time_factor = elapsed_seconds / duration; 52 const real_time_factor = elapsed_seconds / duration;
54 console.log('Wave duration', duration.toFixed(3), 'secodns') 53 console.log('Wave duration', duration.toFixed(3), 'secodns')
55 console.log('Elapsed', elapsed_seconds.toFixed(3), 'secodns') 54 console.log('Elapsed', elapsed_seconds.toFixed(3), 'secodns')
56 -console.log('RTF', real_time_factor.toFixed(3))  
57 -console.log('result', result.text) 55 +console.log(
  56 + `RTF = ${elapsed_seconds.toFixed(3)}/${duration.toFixed(3)} =`,
  57 + real_time_factor.toFixed(3))
  58 +console.log(waveFilename)
  59 +console.log('result\n', result)
@@ -25,7 +25,6 @@ function createOnlineRecognizer() { @@ -25,7 +25,6 @@ function createOnlineRecognizer() {
25 'numThreads': 2, 25 'numThreads': 2,
26 'provider': 'cpu', 26 'provider': 'cpu',
27 'debug': 1, 27 'debug': 1,
28 - 'modelType': 'zipformer',  
29 }, 28 },
30 'decodingMethod': 'greedy_search', 29 'decodingMethod': 'greedy_search',
31 'maxActivePaths': 4, 30 'maxActivePaths': 4,
@@ -68,7 +67,7 @@ ai.on('data', data => { @@ -68,7 +67,7 @@ ai.on('data', data => {
68 } 67 }
69 68
70 const isEndpoint = recognizer.isEndpoint(stream); 69 const isEndpoint = recognizer.isEndpoint(stream);
71 - const text = recognizer.getResult(stream).text; 70 + const text = recognizer.getResult(stream).text.toLowerCase();
72 71
73 if (text.length > 0 && lastText != text) { 72 if (text.length > 0 && lastText != text) {
74 lastText = text; 73 lastText = text;
@@ -158,7 +158,7 @@ def get_piper_models() -> List[TtsModel]: @@ -158,7 +158,7 @@ def get_piper_models() -> List[TtsModel]:
158 TtsModel(model_dir="vits-piper-fa_IR-gyro-medium"), 158 TtsModel(model_dir="vits-piper-fa_IR-gyro-medium"),
159 TtsModel(model_dir="vits-piper-fi_FI-harri-low"), 159 TtsModel(model_dir="vits-piper-fi_FI-harri-low"),
160 TtsModel(model_dir="vits-piper-fi_FI-harri-medium"), 160 TtsModel(model_dir="vits-piper-fi_FI-harri-medium"),
161 - TtsModel(model_dir="vits-piper-fr_FR-mls-medium"), 161 + # TtsModel(model_dir="vits-piper-fr_FR-mls-medium"),
162 TtsModel(model_dir="vits-piper-fr_FR-siwis-low"), 162 TtsModel(model_dir="vits-piper-fr_FR-siwis-low"),
163 TtsModel(model_dir="vits-piper-fr_FR-siwis-medium"), 163 TtsModel(model_dir="vits-piper-fr_FR-siwis-medium"),
164 TtsModel(model_dir="vits-piper-fr_FR-upmc-medium"), 164 TtsModel(model_dir="vits-piper-fr_FR-upmc-medium"),
@@ -9,6 +9,7 @@ const possible_paths = [ @@ -9,6 +9,7 @@ const possible_paths = [
9 '../build/Debug/sherpa-onnx.node', 9 '../build/Debug/sherpa-onnx.node',
10 `./node_modules/sherpa-onnx-${platform_arch}/sherpa-onnx.node`, 10 `./node_modules/sherpa-onnx-${platform_arch}/sherpa-onnx.node`,
11 `../sherpa-onnx-${platform_arch}/sherpa-onnx.node`, 11 `../sherpa-onnx-${platform_arch}/sherpa-onnx.node`,
  12 + './sherpa-onnx.node',
12 ]; 13 ];
13 14
14 let found = false; 15 let found = false;
  1 +#!/usr/bin/env bash
  2 +
  3 +set -ex
  4 +
  5 +if [[ ! -f ../../build/install/lib/libsherpa-onnx-core.dylib && ! -f ../../build/install/lib/libsherpa-onnx-core.so ]]; then
  6 + pushd ../../
  7 + mkdir -p build
  8 + cd build
  9 + cmake -DCMAKE_INSTALL_PREFIX=./install -DBUILD_SHARED_LIBS=ON ..
  10 + make install
  11 + popd
  12 +fi
  13 +export SHERPA_ONNX_INSTALL_DIR=$PWD/../../build/install
  14 +
  15 +./node_modules/.bin/cmake-js compile
@@ -89,6 +89,30 @@ static SherpaOnnxOnlineTransducerModelConfig GetOnlineTransducerModelConfig( @@ -89,6 +89,30 @@ static SherpaOnnxOnlineTransducerModelConfig GetOnlineTransducerModelConfig(
89 return config; 89 return config;
90 } 90 }
91 91
  92 +static SherpaOnnxOnlineZipformer2CtcModelConfig
  93 +GetOnlineZipformer2CtcModelConfig(Napi::Object obj) {
  94 + SherpaOnnxOnlineZipformer2CtcModelConfig config;
  95 + memset(&config, 0, sizeof(config));
  96 +
  97 + if (!obj.Has("zipformer2Ctc") || !obj.Get("zipformer2Ctc").IsObject()) {
  98 + return config;
  99 + }
  100 +
  101 + Napi::Object o = obj.Get("zipformer2Ctc").As<Napi::Object>();
  102 +
  103 + if (o.Has("model") && o.Get("model").IsString()) {
  104 + Napi::String model = o.Get("model").As<Napi::String>();
  105 + std::string s = model.Utf8Value();
  106 + char *p = new char[s.size() + 1];
  107 + std::copy(s.begin(), s.end(), p);
  108 + p[s.size()] = 0;
  109 +
  110 + config.model = p;
  111 + }
  112 +
  113 + return config;
  114 +}
  115 +
92 static SherpaOnnxOnlineModelConfig GetOnlineModelConfig(Napi::Object obj) { 116 static SherpaOnnxOnlineModelConfig GetOnlineModelConfig(Napi::Object obj) {
93 SherpaOnnxOnlineModelConfig config; 117 SherpaOnnxOnlineModelConfig config;
94 memset(&config, 0, sizeof(config)); 118 memset(&config, 0, sizeof(config));
@@ -100,6 +124,7 @@ static SherpaOnnxOnlineModelConfig GetOnlineModelConfig(Napi::Object obj) { @@ -100,6 +124,7 @@ static SherpaOnnxOnlineModelConfig GetOnlineModelConfig(Napi::Object obj) {
100 Napi::Object o = obj.Get("modelConfig").As<Napi::Object>(); 124 Napi::Object o = obj.Get("modelConfig").As<Napi::Object>();
101 125
102 config.transducer = GetOnlineTransducerModelConfig(o); 126 config.transducer = GetOnlineTransducerModelConfig(o);
  127 + config.zipformer2_ctc = GetOnlineZipformer2CtcModelConfig(o);
103 128
104 if (o.Has("tokens") && o.Get("tokens").IsString()) { 129 if (o.Has("tokens") && o.Get("tokens").IsString()) {
105 Napi::String tokens = o.Get("tokens").As<Napi::String>(); 130 Napi::String tokens = o.Get("tokens").As<Napi::String>();
@@ -147,6 +172,35 @@ static SherpaOnnxOnlineModelConfig GetOnlineModelConfig(Napi::Object obj) { @@ -147,6 +172,35 @@ static SherpaOnnxOnlineModelConfig GetOnlineModelConfig(Napi::Object obj) {
147 return config; 172 return config;
148 } 173 }
149 174
  175 +static SherpaOnnxOnlineCtcFstDecoderConfig GetCtcFstDecoderConfig(
  176 + Napi::Object obj) {
  177 + SherpaOnnxOnlineCtcFstDecoderConfig config;
  178 + memset(&config, 0, sizeof(config));
  179 +
  180 + if (!obj.Has("ctcFstDecoderConfig") ||
  181 + !obj.Get("ctcFstDecoderConfig").IsObject()) {
  182 + return config;
  183 + }
  184 +
  185 + Napi::Object o = obj.Get("ctcFstDecoderConfig").As<Napi::Object>();
  186 +
  187 + if (o.Has("graph") && o.Get("graph").IsString()) {
  188 + Napi::String graph = o.Get("graph").As<Napi::String>();
  189 + std::string s = graph.Utf8Value();
  190 + char *p = new char[s.size() + 1];
  191 + std::copy(s.begin(), s.end(), p);
  192 + p[s.size()] = 0;
  193 +
  194 + config.graph = p;
  195 + }
  196 +
  197 + if (o.Has("maxActive") && o.Get("maxActive").IsNumber()) {
  198 + config.max_active = o.Get("maxActive").As<Napi::Number>().Int32Value();
  199 + }
  200 +
  201 + return config;
  202 +}
  203 +
150 static Napi::External<SherpaOnnxOnlineRecognizer> CreateOnlineRecognizerWrapper( 204 static Napi::External<SherpaOnnxOnlineRecognizer> CreateOnlineRecognizerWrapper(
151 const Napi::CallbackInfo &info) { 205 const Napi::CallbackInfo &info) {
152 Napi::Env env = info.Env(); 206 Napi::Env env = info.Env();
@@ -234,6 +288,8 @@ static Napi::External<SherpaOnnxOnlineRecognizer> CreateOnlineRecognizerWrapper( @@ -234,6 +288,8 @@ static Napi::External<SherpaOnnxOnlineRecognizer> CreateOnlineRecognizerWrapper(
234 config.Get("hotwordsScore").As<Napi::Number>().FloatValue(); 288 config.Get("hotwordsScore").As<Napi::Number>().FloatValue();
235 } 289 }
236 290
  291 + c.ctc_fst_decoder_config = GetCtcFstDecoderConfig(config);
  292 +
237 #if 0 293 #if 0
238 printf("encoder: %s\n", c.model_config.transducer.encoder 294 printf("encoder: %s\n", c.model_config.transducer.encoder
239 ? c.model_config.transducer.encoder 295 ? c.model_config.transducer.encoder
@@ -277,6 +333,10 @@ static Napi::External<SherpaOnnxOnlineRecognizer> CreateOnlineRecognizerWrapper( @@ -277,6 +333,10 @@ static Napi::External<SherpaOnnxOnlineRecognizer> CreateOnlineRecognizerWrapper(
277 delete[] c.model_config.transducer.joiner; 333 delete[] c.model_config.transducer.joiner;
278 } 334 }
279 335
  336 + if (c.model_config.zipformer2_ctc.model) {
  337 + delete[] c.model_config.zipformer2_ctc.model;
  338 + }
  339 +
280 if (c.model_config.tokens) { 340 if (c.model_config.tokens) {
281 delete[] c.model_config.tokens; 341 delete[] c.model_config.tokens;
282 } 342 }
@@ -297,6 +357,10 @@ static Napi::External<SherpaOnnxOnlineRecognizer> CreateOnlineRecognizerWrapper( @@ -297,6 +357,10 @@ static Napi::External<SherpaOnnxOnlineRecognizer> CreateOnlineRecognizerWrapper(
297 delete[] c.hotwords_file; 357 delete[] c.hotwords_file;
298 } 358 }
299 359
  360 + if (c.ctc_fst_decoder_config.graph) {
  361 + delete[] c.ctc_fst_decoder_config.graph;
  362 + }
  363 +
300 if (!recognizer) { 364 if (!recognizer) {
301 Napi::TypeError::New(env, "Please check your config!") 365 Napi::TypeError::New(env, "Please check your config!")
302 .ThrowAsJavaScriptException(); 366 .ThrowAsJavaScriptException();
@@ -216,6 +216,8 @@ class OnlineRecognizerCtcImpl : public OnlineRecognizerImpl { @@ -216,6 +216,8 @@ class OnlineRecognizerCtcImpl : public OnlineRecognizerImpl {
216 // clear states 216 // clear states
217 s->SetStates(model_->GetInitStates()); 217 s->SetStates(model_->GetInitStates());
218 218
  219 + s->GetFasterDecoderProcessedFrames() = 0;
  220 +
219 // Note: We only update counters. The underlying audio samples 221 // Note: We only update counters. The underlying audio samples
220 // are not discarded. 222 // are not discarded.
221 s->Reset(); 223 s->Reset();