Fangjun Kuang
Committed by GitHub

Add WebAssembly for SenseVoice (#1158)

@@ -10,6 +10,13 @@ ls -lh @@ -10,6 +10,13 @@ ls -lh
10 ls -lh node_modules 10 ls -lh node_modules
11 11
12 # offline asr 12 # offline asr
  13 +#
  14 +curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17.tar.bz2
  15 +tar xvf sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17.tar.bz2
  16 +rm sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17.tar.bz2
  17 +
  18 +node ./test-offline-sense-voice.js
  19 +rm -rf sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17
13 20
14 curl -LS -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-paraformer-zh-2023-09-14.tar.bz2 21 curl -LS -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-paraformer-zh-2023-09-14.tar.bz2
15 ls -lh 22 ls -lh
@@ -48,6 +48,11 @@ jobs: @@ -48,6 +48,11 @@ jobs:
48 with: 48 with:
49 fetch-depth: 0 49 fetch-depth: 0
50 50
  51 + - name: ccache
  52 + uses: hendrikmuhs/ccache-action@v1.2
  53 + with:
  54 + key: ${{ matrix.os }}-${{ matrix.build_type }}-wasm-nodejs
  55 +
51 - name: Install emsdk 56 - name: Install emsdk
52 uses: mymindstorm/setup-emsdk@v14 57 uses: mymindstorm/setup-emsdk@v14
53 58
@@ -77,6 +82,10 @@ jobs: @@ -77,6 +82,10 @@ jobs:
77 env: 82 env:
78 NODE_AUTH_TOKEN: ${{ secrets.NPM_TOKEN }} 83 NODE_AUTH_TOKEN: ${{ secrets.NPM_TOKEN }}
79 run: | 84 run: |
  85 + export CMAKE_CXX_COMPILER_LAUNCHER=ccache
  86 + export PATH="/usr/lib/ccache:/usr/local/opt/ccache/libexec:$PATH"
  87 + cmake --version
  88 +
80 ./build-wasm-simd-nodejs.sh 89 ./build-wasm-simd-nodejs.sh
81 cp -v build-wasm-simd-nodejs/install/bin/wasm/nodejs/*.js ./scripts/nodejs/ 90 cp -v build-wasm-simd-nodejs/install/bin/wasm/nodejs/*.js ./scripts/nodejs/
82 cp -v build-wasm-simd-nodejs/install/bin/wasm/nodejs/*.wasm ./scripts/nodejs/ 91 cp -v build-wasm-simd-nodejs/install/bin/wasm/nodejs/*.wasm ./scripts/nodejs/
@@ -88,6 +88,21 @@ tar xvf sherpa-onnx-paraformer-zh-2023-09-14.tar.bz2 @@ -88,6 +88,21 @@ tar xvf sherpa-onnx-paraformer-zh-2023-09-14.tar.bz2
88 node ./test-offline-paraformer.js 88 node ./test-offline-paraformer.js
89 ``` 89 ```
90 90
  91 +## ./test-offline-sense-voice.js
  92 +
  93 +[./test-offline-sense-voice.js](./test-offline-sense-voice.js) demonstrates
  94 +how to decode a file with a non-streaming Paraformer model.
  95 +
  96 +You can use the following command to run it:
  97 +
  98 +```bash
  99 +wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17.tar.bz2
  100 +tar xvf sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17.tar.bz2
  101 +rm sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17.tar.bz2
  102 +
  103 +node ./test-offline-sense-voice.js
  104 +```
  105 +
91 ## ./test-offline-transducer.js 106 ## ./test-offline-transducer.js
92 107
93 [./test-offline-transducer.js](./test-offline-transducer.js) demonstrates 108 [./test-offline-transducer.js](./test-offline-transducer.js) demonstrates
@@ -13,27 +13,9 @@ function createOfflineRecognizer() { @@ -13,27 +13,9 @@ function createOfflineRecognizer() {
13 }; 13 };
14 14
15 let modelConfig = { 15 let modelConfig = {
16 - transducer: {  
17 - encoder: '',  
18 - decoder: '',  
19 - joiner: '',  
20 - },  
21 - paraformer: {  
22 - model: '',  
23 - },  
24 nemoCtc: { 16 nemoCtc: {
25 model: './sherpa-onnx-nemo-ctc-en-conformer-small/model.int8.onnx', 17 model: './sherpa-onnx-nemo-ctc-en-conformer-small/model.int8.onnx',
26 }, 18 },
27 - whisper: {  
28 - encoder: '',  
29 - decoder: '',  
30 - language: '',  
31 - task: '',  
32 - tailPaddings: -1,  
33 - },  
34 - tdnn: {  
35 - model: '',  
36 - },  
37 tokens: './sherpa-onnx-nemo-ctc-en-conformer-small/tokens.txt', 19 tokens: './sherpa-onnx-nemo-ctc-en-conformer-small/tokens.txt',
38 numThreads: 1, 20 numThreads: 1,
39 debug: 0, 21 debug: 0,
@@ -41,19 +23,11 @@ function createOfflineRecognizer() { @@ -41,19 +23,11 @@ function createOfflineRecognizer() {
41 modelType: 'nemo_ctc', 23 modelType: 'nemo_ctc',
42 }; 24 };
43 25
44 - let lmConfig = {  
45 - model: '',  
46 - scale: 1.0,  
47 - };  
48 -  
49 let config = { 26 let config = {
50 featConfig: featConfig, 27 featConfig: featConfig,
51 modelConfig: modelConfig, 28 modelConfig: modelConfig,
52 - lmConfig: lmConfig,  
53 decodingMethod: 'greedy_search', 29 decodingMethod: 'greedy_search',
54 maxActivePaths: 4, 30 maxActivePaths: 4,
55 - hotwordsFile: '',  
56 - hotwordsScore: 1.5,  
57 }; 31 };
58 32
59 return sherpa_onnx.createOfflineRecognizer(config); 33 return sherpa_onnx.createOfflineRecognizer(config);
@@ -13,27 +13,9 @@ function createOfflineRecognizer() { @@ -13,27 +13,9 @@ function createOfflineRecognizer() {
13 }; 13 };
14 14
15 let modelConfig = { 15 let modelConfig = {
16 - transducer: {  
17 - encoder: '',  
18 - decoder: '',  
19 - joiner: '',  
20 - },  
21 paraformer: { 16 paraformer: {
22 model: './sherpa-onnx-paraformer-zh-2023-09-14/model.int8.onnx', 17 model: './sherpa-onnx-paraformer-zh-2023-09-14/model.int8.onnx',
23 }, 18 },
24 - nemoCtc: {  
25 - model: '',  
26 - },  
27 - whisper: {  
28 - encoder: '',  
29 - decoder: '',  
30 - language: '',  
31 - task: '',  
32 - tailPaddings: -1,  
33 - },  
34 - tdnn: {  
35 - model: '',  
36 - },  
37 tokens: './sherpa-onnx-paraformer-zh-2023-09-14/tokens.txt', 19 tokens: './sherpa-onnx-paraformer-zh-2023-09-14/tokens.txt',
38 numThreads: 1, 20 numThreads: 1,
39 debug: 0, 21 debug: 0,
@@ -41,19 +23,11 @@ function createOfflineRecognizer() { @@ -41,19 +23,11 @@ function createOfflineRecognizer() {
41 modelType: 'paraformer', 23 modelType: 'paraformer',
42 }; 24 };
43 25
44 - let lmConfig = {  
45 - model: '',  
46 - scale: 1.0,  
47 - };  
48 26
49 let config = { 27 let config = {
50 featConfig: featConfig, 28 featConfig: featConfig,
51 modelConfig: modelConfig, 29 modelConfig: modelConfig,
52 - lmConfig: lmConfig,  
53 decodingMethod: 'greedy_search', 30 decodingMethod: 'greedy_search',
54 - maxActivePaths: 4,  
55 - hotwordsFile: '',  
56 - hotwordsScore: 1.5,  
57 // https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/itn_zh_number.fst 31 // https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/itn_zh_number.fst
58 ruleFsts: './itn_zh_number.fst', 32 ruleFsts: './itn_zh_number.fst',
59 }; 33 };
@@ -13,27 +13,9 @@ function createOfflineRecognizer() { @@ -13,27 +13,9 @@ function createOfflineRecognizer() {
13 }; 13 };
14 14
15 let modelConfig = { 15 let modelConfig = {
16 - transducer: {  
17 - encoder: '',  
18 - decoder: '',  
19 - joiner: '',  
20 - },  
21 paraformer: { 16 paraformer: {
22 model: './sherpa-onnx-paraformer-zh-2023-09-14/model.int8.onnx', 17 model: './sherpa-onnx-paraformer-zh-2023-09-14/model.int8.onnx',
23 }, 18 },
24 - nemoCtc: {  
25 - model: '',  
26 - },  
27 - whisper: {  
28 - encoder: '',  
29 - decoder: '',  
30 - language: '',  
31 - task: '',  
32 - tailPaddings: -1,  
33 - },  
34 - tdnn: {  
35 - model: '',  
36 - },  
37 tokens: './sherpa-onnx-paraformer-zh-2023-09-14/tokens.txt', 19 tokens: './sherpa-onnx-paraformer-zh-2023-09-14/tokens.txt',
38 numThreads: 1, 20 numThreads: 1,
39 debug: 0, 21 debug: 0,
@@ -41,19 +23,10 @@ function createOfflineRecognizer() { @@ -41,19 +23,10 @@ function createOfflineRecognizer() {
41 modelType: 'paraformer', 23 modelType: 'paraformer',
42 }; 24 };
43 25
44 - let lmConfig = {  
45 - model: '',  
46 - scale: 1.0,  
47 - };  
48 -  
49 let config = { 26 let config = {
50 featConfig: featConfig, 27 featConfig: featConfig,
51 modelConfig: modelConfig, 28 modelConfig: modelConfig,
52 - lmConfig: lmConfig,  
53 decodingMethod: 'greedy_search', 29 decodingMethod: 'greedy_search',
54 - maxActivePaths: 4,  
55 - hotwordsFile: '',  
56 - hotwordsScore: 1.5,  
57 }; 30 };
58 31
59 return sherpa_onnx.createOfflineRecognizer(config); 32 return sherpa_onnx.createOfflineRecognizer(config);
  1 +// Copyright (c) 2024 Xiaomi Corporation (authors: Fangjun Kuang)
  2 +
  3 +const fs = require('fs');
  4 +const {Readable} = require('stream');
  5 +const wav = require('wav');
  6 +
  7 +const sherpa_onnx = require('sherpa-onnx');
  8 +
  9 +function createOfflineRecognizer() {
  10 + let featConfig = {
  11 + sampleRate: 16000,
  12 + featureDim: 80,
  13 + };
  14 +
  15 + let modelConfig = {
  16 + senseVoice: {
  17 + model:
  18 + './sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/model.int8.onnx',
  19 + language: '',
  20 + useInverseTextNormalization: 1,
  21 + },
  22 + tokens: './sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/tokens.txt',
  23 + numThreads: 1,
  24 + debug: 0,
  25 + provider: 'cpu',
  26 + };
  27 +
  28 + let config = {
  29 + featConfig: featConfig,
  30 + modelConfig: modelConfig,
  31 + decodingMethod: 'greedy_search',
  32 + };
  33 +
  34 + return sherpa_onnx.createOfflineRecognizer(config);
  35 +}
  36 +
  37 +
  38 +const recognizer = createOfflineRecognizer();
  39 +const stream = recognizer.createStream();
  40 +
  41 +const waveFilename =
  42 + './sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/test_wavs/zh.wav';
  43 +
  44 +const reader = new wav.Reader();
  45 +const readable = new Readable().wrap(reader);
  46 +const buf = [];
  47 +
  48 +reader.on('format', ({audioFormat, bitDepth, channels, sampleRate}) => {
  49 + if (sampleRate != recognizer.config.featConfig.sampleRate) {
  50 + throw new Error(`Only support sampleRate ${
  51 + recognizer.config.featConfig.sampleRate}. Given ${sampleRate}`);
  52 + }
  53 +
  54 + if (audioFormat != 1) {
  55 + throw new Error(`Only support PCM format. Given ${audioFormat}`);
  56 + }
  57 +
  58 + if (channels != 1) {
  59 + throw new Error(`Only a single channel. Given ${channel}`);
  60 + }
  61 +
  62 + if (bitDepth != 16) {
  63 + throw new Error(`Only support 16-bit samples. Given ${bitDepth}`);
  64 + }
  65 +});
  66 +
  67 +fs.createReadStream(waveFilename, {'highWaterMark': 4096})
  68 + .pipe(reader)
  69 + .on('finish', function(err) {
  70 + // tail padding
  71 + const floatSamples =
  72 + new Float32Array(recognizer.config.featConfig.sampleRate * 0.5);
  73 +
  74 + buf.push(floatSamples);
  75 + const flattened =
  76 + Float32Array.from(buf.reduce((a, b) => [...a, ...b], []));
  77 +
  78 + stream.acceptWaveform(recognizer.config.featConfig.sampleRate, flattened);
  79 + recognizer.decode(stream);
  80 + const text = recognizer.getResult(stream).text;
  81 + console.log(text);
  82 +
  83 + stream.free();
  84 + recognizer.free();
  85 + });
  86 +
  87 +readable.on('readable', function() {
  88 + let chunk;
  89 + while ((chunk = readable.read()) != null) {
  90 + const int16Samples = new Int16Array(
  91 + chunk.buffer, chunk.byteOffset,
  92 + chunk.length / Int16Array.BYTES_PER_ELEMENT);
  93 +
  94 + const floatSamples = new Float32Array(int16Samples.length);
  95 + for (let i = 0; i < floatSamples.length; i++) {
  96 + floatSamples[i] = int16Samples[i] / 32768.0;
  97 + }
  98 +
  99 + buf.push(floatSamples);
  100 + }
  101 +});
@@ -21,22 +21,6 @@ function createOfflineRecognizer() { @@ -21,22 +21,6 @@ function createOfflineRecognizer() {
21 joiner: 21 joiner:
22 './sherpa-onnx-zipformer-en-2023-06-26/joiner-epoch-99-avg-1.int8.onnx', 22 './sherpa-onnx-zipformer-en-2023-06-26/joiner-epoch-99-avg-1.int8.onnx',
23 }, 23 },
24 - paraformer: {  
25 - model: '',  
26 - },  
27 - nemoCtc: {  
28 - model: '',  
29 - },  
30 - whisper: {  
31 - encoder: '',  
32 - decoder: '',  
33 - language: '',  
34 - task: '',  
35 - tailPaddings: -1,  
36 - },  
37 - tdnn: {  
38 - model: '',  
39 - },  
40 tokens: './sherpa-onnx-zipformer-en-2023-06-26/tokens.txt', 24 tokens: './sherpa-onnx-zipformer-en-2023-06-26/tokens.txt',
41 numThreads: 1, 25 numThreads: 1,
42 debug: 0, 26 debug: 0,
@@ -44,15 +28,9 @@ function createOfflineRecognizer() { @@ -44,15 +28,9 @@ function createOfflineRecognizer() {
44 modelType: 'transducer', 28 modelType: 'transducer',
45 }; 29 };
46 30
47 - let lmConfig = {  
48 - model: '',  
49 - scale: 1.0,  
50 - };  
51 -  
52 let config = { 31 let config = {
53 featConfig: featConfig, 32 featConfig: featConfig,
54 modelConfig: modelConfig, 33 modelConfig: modelConfig,
55 - lmConfig: lmConfig,  
56 decodingMethod: 'greedy_search', 34 decodingMethod: 'greedy_search',
57 maxActivePaths: 4, 35 maxActivePaths: 4,
58 hotwordsFile: '', 36 hotwordsFile: '',
@@ -5,10 +5,8 @@ const sherpa_onnx = require('sherpa-onnx'); @@ -5,10 +5,8 @@ const sherpa_onnx = require('sherpa-onnx');
5 function createOfflineTts() { 5 function createOfflineTts() {
6 let offlineTtsVitsModelConfig = { 6 let offlineTtsVitsModelConfig = {
7 model: './vits-piper-en_US-amy-low/en_US-amy-low.onnx', 7 model: './vits-piper-en_US-amy-low/en_US-amy-low.onnx',
8 - lexicon: '',  
9 tokens: './vits-piper-en_US-amy-low/tokens.txt', 8 tokens: './vits-piper-en_US-amy-low/tokens.txt',
10 dataDir: './vits-piper-en_US-amy-low/espeak-ng-data', 9 dataDir: './vits-piper-en_US-amy-low/espeak-ng-data',
11 - dictDir: '',  
12 noiseScale: 0.667, 10 noiseScale: 0.667,
13 noiseScaleW: 0.8, 11 noiseScaleW: 0.8,
14 lengthScale: 1.0, 12 lengthScale: 1.0,
@@ -22,8 +20,6 @@ function createOfflineTts() { @@ -22,8 +20,6 @@ function createOfflineTts() {
22 20
23 let offlineTtsConfig = { 21 let offlineTtsConfig = {
24 offlineTtsModelConfig: offlineTtsModelConfig, 22 offlineTtsModelConfig: offlineTtsModelConfig,
25 - ruleFsts: '',  
26 - ruleFars: '',  
27 maxNumSentences: 1, 23 maxNumSentences: 1,
28 }; 24 };
29 25
@@ -7,8 +7,6 @@ function createOfflineTts() { @@ -7,8 +7,6 @@ function createOfflineTts() {
7 model: './vits-icefall-zh-aishell3/model.onnx', 7 model: './vits-icefall-zh-aishell3/model.onnx',
8 lexicon: './vits-icefall-zh-aishell3/lexicon.txt', 8 lexicon: './vits-icefall-zh-aishell3/lexicon.txt',
9 tokens: './vits-icefall-zh-aishell3/tokens.txt', 9 tokens: './vits-icefall-zh-aishell3/tokens.txt',
10 - dataDir: '',  
11 - dictDir: '',  
12 noiseScale: 0.667, 10 noiseScale: 0.667,
13 noiseScaleW: 0.8, 11 noiseScaleW: 0.8,
14 lengthScale: 1.0, 12 lengthScale: 1.0,
@@ -31,7 +29,6 @@ function createOfflineTts() { @@ -31,7 +29,6 @@ function createOfflineTts() {
31 return sherpa_onnx.createOfflineTts(offlineTtsConfig); 29 return sherpa_onnx.createOfflineTts(offlineTtsConfig);
32 } 30 }
33 31
34 -  
35 const tts = createOfflineTts(); 32 const tts = createOfflineTts();
36 const speakerId = 66; 33 const speakerId = 66;
37 const speed = 1.0; 34 const speed = 1.0;
@@ -13,17 +13,6 @@ function createOfflineRecognizer() { @@ -13,17 +13,6 @@ function createOfflineRecognizer() {
13 }; 13 };
14 14
15 let modelConfig = { 15 let modelConfig = {
16 - transducer: {  
17 - encoder: '',  
18 - decoder: '',  
19 - joiner: '',  
20 - },  
21 - paraformer: {  
22 - model: '',  
23 - },  
24 - nemoCtc: {  
25 - model: '',  
26 - },  
27 whisper: { 16 whisper: {
28 encoder: './sherpa-onnx-whisper-tiny.en/tiny.en-encoder.int8.onnx', 17 encoder: './sherpa-onnx-whisper-tiny.en/tiny.en-encoder.int8.onnx',
29 decoder: './sherpa-onnx-whisper-tiny.en/tiny.en-decoder.int8.onnx', 18 decoder: './sherpa-onnx-whisper-tiny.en/tiny.en-decoder.int8.onnx',
@@ -31,9 +20,6 @@ function createOfflineRecognizer() { @@ -31,9 +20,6 @@ function createOfflineRecognizer() {
31 task: 'transcribe', 20 task: 'transcribe',
32 tailPaddings: -1, 21 tailPaddings: -1,
33 }, 22 },
34 - tdnn: {  
35 - model: '',  
36 - },  
37 tokens: './sherpa-onnx-whisper-tiny.en/tiny.en-tokens.txt', 23 tokens: './sherpa-onnx-whisper-tiny.en/tiny.en-tokens.txt',
38 numThreads: 1, 24 numThreads: 1,
39 debug: 0, 25 debug: 0,
@@ -41,19 +27,10 @@ function createOfflineRecognizer() { @@ -41,19 +27,10 @@ function createOfflineRecognizer() {
41 modelType: 'whisper', 27 modelType: 'whisper',
42 }; 28 };
43 29
44 - let lmConfig = {  
45 - model: '',  
46 - scale: 1.0,  
47 - };  
48 -  
49 let config = { 30 let config = {
50 featConfig: featConfig, 31 featConfig: featConfig,
51 modelConfig: modelConfig, 32 modelConfig: modelConfig,
52 - lmConfig: lmConfig,  
53 decodingMethod: 'greedy_search', 33 decodingMethod: 'greedy_search',
54 - maxActivePaths: 4,  
55 - hotwordsFile: '',  
56 - hotwordsScore: 1.5,  
57 }; 34 };
58 35
59 return sherpa_onnx.createOfflineRecognizer(config); 36 return sherpa_onnx.createOfflineRecognizer(config);
@@ -6,12 +6,6 @@ console.log(portAudio.getDevices()); @@ -6,12 +6,6 @@ console.log(portAudio.getDevices());
6 const sherpa_onnx = require('sherpa-onnx'); 6 const sherpa_onnx = require('sherpa-onnx');
7 7
8 function createOnlineRecognizer() { 8 function createOnlineRecognizer() {
9 - let onlineTransducerModelConfig = {  
10 - encoder: '',  
11 - decoder: '',  
12 - joiner: '',  
13 - };  
14 -  
15 let onlineParaformerModelConfig = { 9 let onlineParaformerModelConfig = {
16 encoder: 10 encoder:
17 './sherpa-onnx-streaming-paraformer-bilingual-zh-en/encoder.int8.onnx', 11 './sherpa-onnx-streaming-paraformer-bilingual-zh-en/encoder.int8.onnx',
@@ -19,14 +13,8 @@ function createOnlineRecognizer() { @@ -19,14 +13,8 @@ function createOnlineRecognizer() {
19 './sherpa-onnx-streaming-paraformer-bilingual-zh-en/decoder.int8.onnx', 13 './sherpa-onnx-streaming-paraformer-bilingual-zh-en/decoder.int8.onnx',
20 }; 14 };
21 15
22 - let onlineZipformer2CtcModelConfig = {  
23 - model: '',  
24 - };  
25 -  
26 let onlineModelConfig = { 16 let onlineModelConfig = {
27 - transducer: onlineTransducerModelConfig,  
28 paraformer: onlineParaformerModelConfig, 17 paraformer: onlineParaformerModelConfig,
29 - zipformer2Ctc: onlineZipformer2CtcModelConfig,  
30 tokens: './sherpa-onnx-streaming-paraformer-bilingual-zh-en/tokens.txt', 18 tokens: './sherpa-onnx-streaming-paraformer-bilingual-zh-en/tokens.txt',
31 numThreads: 1, 19 numThreads: 1,
32 provider: 'cpu', 20 provider: 'cpu',
@@ -48,12 +36,6 @@ function createOnlineRecognizer() { @@ -48,12 +36,6 @@ function createOnlineRecognizer() {
48 rule1MinTrailingSilence: 2.4, 36 rule1MinTrailingSilence: 2.4,
49 rule2MinTrailingSilence: 1.2, 37 rule2MinTrailingSilence: 1.2,
50 rule3MinUtteranceLength: 20, 38 rule3MinUtteranceLength: 20,
51 - hotwordsFile: '',  
52 - hotwordsScore: 1.5,  
53 - ctcFstDecoderConfig: {  
54 - graph: '',  
55 - maxActive: 3000,  
56 - }  
57 }; 39 };
58 40
59 return sherpa_onnx.createOnlineRecognizer(recognizerConfig); 41 return sherpa_onnx.createOnlineRecognizer(recognizerConfig);
@@ -7,12 +7,6 @@ const wav = require('wav'); @@ -7,12 +7,6 @@ const wav = require('wav');
7 const sherpa_onnx = require('sherpa-onnx'); 7 const sherpa_onnx = require('sherpa-onnx');
8 8
9 function createOnlineRecognizer() { 9 function createOnlineRecognizer() {
10 - let onlineTransducerModelConfig = {  
11 - encoder: '',  
12 - decoder: '',  
13 - joiner: '',  
14 - };  
15 -  
16 let onlineParaformerModelConfig = { 10 let onlineParaformerModelConfig = {
17 encoder: 11 encoder:
18 './sherpa-onnx-streaming-paraformer-bilingual-zh-en/encoder.int8.onnx', 12 './sherpa-onnx-streaming-paraformer-bilingual-zh-en/encoder.int8.onnx',
@@ -20,14 +14,8 @@ function createOnlineRecognizer() { @@ -20,14 +14,8 @@ function createOnlineRecognizer() {
20 './sherpa-onnx-streaming-paraformer-bilingual-zh-en/decoder.int8.onnx', 14 './sherpa-onnx-streaming-paraformer-bilingual-zh-en/decoder.int8.onnx',
21 }; 15 };
22 16
23 - let onlineZipformer2CtcModelConfig = {  
24 - model: '',  
25 - };  
26 -  
27 let onlineModelConfig = { 17 let onlineModelConfig = {
28 - transducer: onlineTransducerModelConfig,  
29 paraformer: onlineParaformerModelConfig, 18 paraformer: onlineParaformerModelConfig,
30 - zipformer2Ctc: onlineZipformer2CtcModelConfig,  
31 tokens: './sherpa-onnx-streaming-paraformer-bilingual-zh-en/tokens.txt', 19 tokens: './sherpa-onnx-streaming-paraformer-bilingual-zh-en/tokens.txt',
32 numThreads: 1, 20 numThreads: 1,
33 provider: 'cpu', 21 provider: 'cpu',
@@ -49,12 +37,6 @@ function createOnlineRecognizer() { @@ -49,12 +37,6 @@ function createOnlineRecognizer() {
49 rule1MinTrailingSilence: 2.4, 37 rule1MinTrailingSilence: 2.4,
50 rule2MinTrailingSilence: 1.2, 38 rule2MinTrailingSilence: 1.2,
51 rule3MinUtteranceLength: 20, 39 rule3MinUtteranceLength: 20,
52 - hotwordsFile: '',  
53 - hotwordsScore: 1.5,  
54 - ctcFstDecoderConfig: {  
55 - graph: '',  
56 - maxActive: 3000,  
57 - }  
58 }; 40 };
59 41
60 return sherpa_onnx.createOnlineRecognizer(recognizerConfig); 42 return sherpa_onnx.createOnlineRecognizer(recognizerConfig);
@@ -16,19 +16,8 @@ function createOnlineRecognizer() { @@ -16,19 +16,8 @@ function createOnlineRecognizer() {
16 './sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/joiner-epoch-99-avg-1.int8.onnx', 16 './sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/joiner-epoch-99-avg-1.int8.onnx',
17 }; 17 };
18 18
19 - let onlineParaformerModelConfig = {  
20 - encoder: '',  
21 - decoder: '',  
22 - };  
23 -  
24 - let onlineZipformer2CtcModelConfig = {  
25 - model: '',  
26 - };  
27 -  
28 let onlineModelConfig = { 19 let onlineModelConfig = {
29 transducer: onlineTransducerModelConfig, 20 transducer: onlineTransducerModelConfig,
30 - paraformer: onlineParaformerModelConfig,  
31 - zipformer2Ctc: onlineZipformer2CtcModelConfig,  
32 tokens: 21 tokens:
33 './sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/tokens.txt', 22 './sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/tokens.txt',
34 numThreads: 1, 23 numThreads: 1,
@@ -51,12 +40,6 @@ function createOnlineRecognizer() { @@ -51,12 +40,6 @@ function createOnlineRecognizer() {
51 rule1MinTrailingSilence: 2.4, 40 rule1MinTrailingSilence: 2.4,
52 rule2MinTrailingSilence: 1.2, 41 rule2MinTrailingSilence: 1.2,
53 rule3MinUtteranceLength: 20, 42 rule3MinUtteranceLength: 20,
54 - hotwordsFile: '',  
55 - hotwordsScore: 1.5,  
56 - ctcFstDecoderConfig: {  
57 - graph: '',  
58 - maxActive: 3000,  
59 - },  
60 // https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/itn_zh_number.fst 43 // https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/itn_zh_number.fst
61 ruleFsts: './itn_zh_number.fst', 44 ruleFsts: './itn_zh_number.fst',
62 }; 45 };
@@ -15,19 +15,8 @@ function createOnlineRecognizer() { @@ -15,19 +15,8 @@ function createOnlineRecognizer() {
15 './sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/joiner-epoch-99-avg-1.int8.onnx', 15 './sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/joiner-epoch-99-avg-1.int8.onnx',
16 }; 16 };
17 17
18 - let onlineParaformerModelConfig = {  
19 - encoder: '',  
20 - decoder: '',  
21 - };  
22 -  
23 - let onlineZipformer2CtcModelConfig = {  
24 - model: '',  
25 - };  
26 -  
27 let onlineModelConfig = { 18 let onlineModelConfig = {
28 transducer: onlineTransducerModelConfig, 19 transducer: onlineTransducerModelConfig,
29 - paraformer: onlineParaformerModelConfig,  
30 - zipformer2Ctc: onlineZipformer2CtcModelConfig,  
31 tokens: 20 tokens:
32 './sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/tokens.txt', 21 './sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/tokens.txt',
33 numThreads: 1, 22 numThreads: 1,
@@ -50,12 +39,6 @@ function createOnlineRecognizer() { @@ -50,12 +39,6 @@ function createOnlineRecognizer() {
50 rule1MinTrailingSilence: 2.4, 39 rule1MinTrailingSilence: 2.4,
51 rule2MinTrailingSilence: 1.2, 40 rule2MinTrailingSilence: 1.2,
52 rule3MinUtteranceLength: 20, 41 rule3MinUtteranceLength: 20,
53 - hotwordsFile: '',  
54 - hotwordsScore: 1.5,  
55 - ctcFstDecoderConfig: {  
56 - graph: '',  
57 - maxActive: 3000,  
58 - }  
59 }; 42 };
60 43
61 return sherpa_onnx.createOnlineRecognizer(recognizerConfig); 44 return sherpa_onnx.createOnlineRecognizer(recognizerConfig);
@@ -16,19 +16,8 @@ function createOnlineRecognizer() { @@ -16,19 +16,8 @@ function createOnlineRecognizer() {
16 './sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/joiner-epoch-99-avg-1.int8.onnx', 16 './sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/joiner-epoch-99-avg-1.int8.onnx',
17 }; 17 };
18 18
19 - let onlineParaformerModelConfig = {  
20 - encoder: '',  
21 - decoder: '',  
22 - };  
23 -  
24 - let onlineZipformer2CtcModelConfig = {  
25 - model: '',  
26 - };  
27 -  
28 let onlineModelConfig = { 19 let onlineModelConfig = {
29 transducer: onlineTransducerModelConfig, 20 transducer: onlineTransducerModelConfig,
30 - paraformer: onlineParaformerModelConfig,  
31 - zipformer2Ctc: onlineZipformer2CtcModelConfig,  
32 tokens: 21 tokens:
33 './sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/tokens.txt', 22 './sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/tokens.txt',
34 numThreads: 1, 23 numThreads: 1,
@@ -51,12 +40,6 @@ function createOnlineRecognizer() { @@ -51,12 +40,6 @@ function createOnlineRecognizer() {
51 rule1MinTrailingSilence: 2.4, 40 rule1MinTrailingSilence: 2.4,
52 rule2MinTrailingSilence: 1.2, 41 rule2MinTrailingSilence: 1.2,
53 rule3MinUtteranceLength: 20, 42 rule3MinUtteranceLength: 20,
54 - hotwordsFile: '',  
55 - hotwordsScore: 1.5,  
56 - ctcFstDecoderConfig: {  
57 - graph: '',  
58 - maxActive: 3000,  
59 - }  
60 }; 43 };
61 44
62 return sherpa_onnx.createOnlineRecognizer(recognizerConfig); 45 return sherpa_onnx.createOnlineRecognizer(recognizerConfig);
@@ -7,25 +7,12 @@ const wav = require('wav'); @@ -7,25 +7,12 @@ const wav = require('wav');
7 const sherpa_onnx = require('sherpa-onnx'); 7 const sherpa_onnx = require('sherpa-onnx');
8 8
9 function createOnlineRecognizer() { 9 function createOnlineRecognizer() {
10 - let onlineTransducerModelConfig = {  
11 - encoder: '',  
12 - decoder: '',  
13 - joiner: '',  
14 - };  
15 -  
16 - let onlineParaformerModelConfig = {  
17 - encoder: '',  
18 - decoder: '',  
19 - };  
20 -  
21 let onlineZipformer2CtcModelConfig = { 10 let onlineZipformer2CtcModelConfig = {
22 model: 11 model:
23 './sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18/ctc-epoch-30-avg-3-chunk-16-left-128.int8.onnx', 12 './sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18/ctc-epoch-30-avg-3-chunk-16-left-128.int8.onnx',
24 }; 13 };
25 14
26 let onlineModelConfig = { 15 let onlineModelConfig = {
27 - transducer: onlineTransducerModelConfig,  
28 - paraformer: onlineParaformerModelConfig,  
29 zipformer2Ctc: onlineZipformer2CtcModelConfig, 16 zipformer2Ctc: onlineZipformer2CtcModelConfig,
30 tokens: './sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18/tokens.txt', 17 tokens: './sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18/tokens.txt',
31 numThreads: 1, 18 numThreads: 1,
@@ -48,8 +35,6 @@ function createOnlineRecognizer() { @@ -48,8 +35,6 @@ function createOnlineRecognizer() {
48 rule1MinTrailingSilence: 2.4, 35 rule1MinTrailingSilence: 2.4,
49 rule2MinTrailingSilence: 1.2, 36 rule2MinTrailingSilence: 1.2,
50 rule3MinUtteranceLength: 20, 37 rule3MinUtteranceLength: 20,
51 - hotwordsFile: '',  
52 - hotwordsScore: 1.5,  
53 ctcFstDecoderConfig: { 38 ctcFstDecoderConfig: {
54 graph: './sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18/HLG.fst', 39 graph: './sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18/HLG.fst',
55 maxActive: 3000, 40 maxActive: 3000,
@@ -7,32 +7,18 @@ const wav = require('wav'); @@ -7,32 +7,18 @@ const wav = require('wav');
7 const sherpa_onnx = require('sherpa-onnx'); 7 const sherpa_onnx = require('sherpa-onnx');
8 8
9 function createOnlineRecognizer() { 9 function createOnlineRecognizer() {
10 - let onlineTransducerModelConfig = {  
11 - encoder: '',  
12 - decoder: '',  
13 - joiner: '',  
14 - };  
15 -  
16 - let onlineParaformerModelConfig = {  
17 - encoder: '',  
18 - decoder: '',  
19 - };  
20 -  
21 let onlineZipformer2CtcModelConfig = { 10 let onlineZipformer2CtcModelConfig = {
22 model: 11 model:
23 './sherpa-onnx-streaming-zipformer-ctc-multi-zh-hans-2023-12-13/ctc-epoch-20-avg-1-chunk-16-left-128.onnx', 12 './sherpa-onnx-streaming-zipformer-ctc-multi-zh-hans-2023-12-13/ctc-epoch-20-avg-1-chunk-16-left-128.onnx',
24 }; 13 };
25 14
26 let onlineModelConfig = { 15 let onlineModelConfig = {
27 - transducer: onlineTransducerModelConfig,  
28 - paraformer: onlineParaformerModelConfig,  
29 zipformer2Ctc: onlineZipformer2CtcModelConfig, 16 zipformer2Ctc: onlineZipformer2CtcModelConfig,
30 tokens: 17 tokens:
31 './sherpa-onnx-streaming-zipformer-ctc-multi-zh-hans-2023-12-13/tokens.txt', 18 './sherpa-onnx-streaming-zipformer-ctc-multi-zh-hans-2023-12-13/tokens.txt',
32 numThreads: 1, 19 numThreads: 1,
33 provider: 'cpu', 20 provider: 'cpu',
34 debug: 1, 21 debug: 1,
35 - modelType: '',  
36 }; 22 };
37 23
38 let featureConfig = { 24 let featureConfig = {
@@ -49,12 +35,6 @@ function createOnlineRecognizer() { @@ -49,12 +35,6 @@ function createOnlineRecognizer() {
49 rule1MinTrailingSilence: 2.4, 35 rule1MinTrailingSilence: 2.4,
50 rule2MinTrailingSilence: 1.2, 36 rule2MinTrailingSilence: 1.2,
51 rule3MinUtteranceLength: 20, 37 rule3MinUtteranceLength: 20,
52 - hotwordsFile: '',  
53 - hotwordsScore: 1.5,  
54 - ctcFstDecoderConfig: {  
55 - graph: '',  
56 - maxActive: 3000,  
57 - }  
58 }; 38 };
59 39
60 return sherpa_onnx.createOnlineRecognizer(recognizerConfig); 40 return sherpa_onnx.createOnlineRecognizer(recognizerConfig);
@@ -39,6 +39,10 @@ function freeConfig(config, Module) { @@ -39,6 +39,10 @@ function freeConfig(config, Module) {
39 freeConfig(config.tdnn, Module) 39 freeConfig(config.tdnn, Module)
40 } 40 }
41 41
  42 + if ('senseVoice' in config) {
  43 + freeConfig(config.senseVoice, Module)
  44 + }
  45 +
42 if ('lm' in config) { 46 if ('lm' in config) {
43 freeConfig(config.lm, Module) 47 freeConfig(config.lm, Module)
44 } 48 }
@@ -52,9 +56,9 @@ function freeConfig(config, Module) { @@ -52,9 +56,9 @@ function freeConfig(config, Module) {
52 56
53 // The user should free the returned pointers 57 // The user should free the returned pointers
54 function initSherpaOnnxOnlineTransducerModelConfig(config, Module) { 58 function initSherpaOnnxOnlineTransducerModelConfig(config, Module) {
55 - const encoderLen = Module.lengthBytesUTF8(config.encoder) + 1;  
56 - const decoderLen = Module.lengthBytesUTF8(config.decoder) + 1;  
57 - const joinerLen = Module.lengthBytesUTF8(config.joiner) + 1; 59 + const encoderLen = Module.lengthBytesUTF8(config.encoder || '') + 1;
  60 + const decoderLen = Module.lengthBytesUTF8(config.decoder || '') + 1;
  61 + const joinerLen = Module.lengthBytesUTF8(config.joiner || '') + 1;
58 62
59 const n = encoderLen + decoderLen + joinerLen; 63 const n = encoderLen + decoderLen + joinerLen;
60 64
@@ -64,13 +68,13 @@ function initSherpaOnnxOnlineTransducerModelConfig(config, Module) { @@ -64,13 +68,13 @@ function initSherpaOnnxOnlineTransducerModelConfig(config, Module) {
64 const ptr = Module._malloc(len); 68 const ptr = Module._malloc(len);
65 69
66 let offset = 0; 70 let offset = 0;
67 - Module.stringToUTF8(config.encoder, buffer + offset, encoderLen); 71 + Module.stringToUTF8(config.encoder || '', buffer + offset, encoderLen);
68 offset += encoderLen; 72 offset += encoderLen;
69 73
70 - Module.stringToUTF8(config.decoder, buffer + offset, decoderLen); 74 + Module.stringToUTF8(config.decoder || '', buffer + offset, decoderLen);
71 offset += decoderLen; 75 offset += decoderLen;
72 76
73 - Module.stringToUTF8(config.joiner, buffer + offset, joinerLen); 77 + Module.stringToUTF8(config.joiner || '', buffer + offset, joinerLen);
74 78
75 offset = 0; 79 offset = 0;
76 Module.setValue(ptr, buffer + offset, 'i8*'); 80 Module.setValue(ptr, buffer + offset, 'i8*');
@@ -87,8 +91,8 @@ function initSherpaOnnxOnlineTransducerModelConfig(config, Module) { @@ -87,8 +91,8 @@ function initSherpaOnnxOnlineTransducerModelConfig(config, Module) {
87 } 91 }
88 92
89 function initSherpaOnnxOnlineParaformerModelConfig(config, Module) { 93 function initSherpaOnnxOnlineParaformerModelConfig(config, Module) {
90 - const encoderLen = Module.lengthBytesUTF8(config.encoder) + 1;  
91 - const decoderLen = Module.lengthBytesUTF8(config.decoder) + 1; 94 + const encoderLen = Module.lengthBytesUTF8(config.encoder || '') + 1;
  95 + const decoderLen = Module.lengthBytesUTF8(config.decoder || '') + 1;
92 96
93 const n = encoderLen + decoderLen; 97 const n = encoderLen + decoderLen;
94 const buffer = Module._malloc(n); 98 const buffer = Module._malloc(n);
@@ -97,10 +101,10 @@ function initSherpaOnnxOnlineParaformerModelConfig(config, Module) { @@ -97,10 +101,10 @@ function initSherpaOnnxOnlineParaformerModelConfig(config, Module) {
97 const ptr = Module._malloc(len); 101 const ptr = Module._malloc(len);
98 102
99 let offset = 0; 103 let offset = 0;
100 - Module.stringToUTF8(config.encoder, buffer + offset, encoderLen); 104 + Module.stringToUTF8(config.encoder || '', buffer + offset, encoderLen);
101 offset += encoderLen; 105 offset += encoderLen;
102 106
103 - Module.stringToUTF8(config.decoder, buffer + offset, decoderLen); 107 + Module.stringToUTF8(config.decoder || '', buffer + offset, decoderLen);
104 108
105 offset = 0; 109 offset = 0;
106 Module.setValue(ptr, buffer + offset, 'i8*'); 110 Module.setValue(ptr, buffer + offset, 'i8*');
@@ -114,13 +118,13 @@ function initSherpaOnnxOnlineParaformerModelConfig(config, Module) { @@ -114,13 +118,13 @@ function initSherpaOnnxOnlineParaformerModelConfig(config, Module) {
114 } 118 }
115 119
116 function initSherpaOnnxOnlineZipformer2CtcModelConfig(config, Module) { 120 function initSherpaOnnxOnlineZipformer2CtcModelConfig(config, Module) {
117 - const n = Module.lengthBytesUTF8(config.model) + 1; 121 + const n = Module.lengthBytesUTF8(config.model || '') + 1;
118 const buffer = Module._malloc(n); 122 const buffer = Module._malloc(n);
119 123
120 const len = 1 * 4; // 1 pointer 124 const len = 1 * 4; // 1 pointer
121 const ptr = Module._malloc(len); 125 const ptr = Module._malloc(len);
122 126
123 - Module.stringToUTF8(config.model, buffer, n); 127 + Module.stringToUTF8(config.model || '', buffer, n);
124 128
125 Module.setValue(ptr, buffer, 'i8*'); 129 Module.setValue(ptr, buffer, 'i8*');
126 130
@@ -130,10 +134,33 @@ function initSherpaOnnxOnlineZipformer2CtcModelConfig(config, Module) { @@ -130,10 +134,33 @@ function initSherpaOnnxOnlineZipformer2CtcModelConfig(config, Module) {
130 } 134 }
131 135
132 function initSherpaOnnxOnlineModelConfig(config, Module) { 136 function initSherpaOnnxOnlineModelConfig(config, Module) {
  137 + if (!('transducer' in config)) {
  138 + config.transducer = {
  139 + encoder: '',
  140 + decoder: '',
  141 + joiner: '',
  142 + };
  143 + }
  144 +
  145 + if (!('paraformer' in config)) {
  146 + config.paraformer = {
  147 + encoder: '',
  148 + decoder: '',
  149 + };
  150 + }
  151 +
  152 + if (!('zipformer2Ctc' in config)) {
  153 + config.zipformer2Ctc = {
  154 + model: '',
  155 + };
  156 + }
  157 +
133 const transducer = 158 const transducer =
134 initSherpaOnnxOnlineTransducerModelConfig(config.transducer, Module); 159 initSherpaOnnxOnlineTransducerModelConfig(config.transducer, Module);
  160 +
135 const paraformer = 161 const paraformer =
136 initSherpaOnnxOnlineParaformerModelConfig(config.paraformer, Module); 162 initSherpaOnnxOnlineParaformerModelConfig(config.paraformer, Module);
  163 +
137 const ctc = initSherpaOnnxOnlineZipformer2CtcModelConfig( 164 const ctc = initSherpaOnnxOnlineZipformer2CtcModelConfig(
138 config.zipformer2Ctc, Module); 165 config.zipformer2Ctc, Module);
139 166
@@ -150,9 +177,9 @@ function initSherpaOnnxOnlineModelConfig(config, Module) { @@ -150,9 +177,9 @@ function initSherpaOnnxOnlineModelConfig(config, Module) {
150 Module._CopyHeap(ctc.ptr, ctc.len, ptr + offset); 177 Module._CopyHeap(ctc.ptr, ctc.len, ptr + offset);
151 offset += ctc.len; 178 offset += ctc.len;
152 179
153 - const tokensLen = Module.lengthBytesUTF8(config.tokens) + 1;  
154 - const providerLen = Module.lengthBytesUTF8(config.provider) + 1;  
155 - const modelTypeLen = Module.lengthBytesUTF8(config.modelType) + 1; 180 + const tokensLen = Module.lengthBytesUTF8(config.tokens || '') + 1;
  181 + const providerLen = Module.lengthBytesUTF8(config.provider || 'cpu') + 1;
  182 + const modelTypeLen = Module.lengthBytesUTF8(config.modelType || '') + 1;
156 const modelingUnitLen = Module.lengthBytesUTF8(config.modelingUnit || '') + 1; 183 const modelingUnitLen = Module.lengthBytesUTF8(config.modelingUnit || '') + 1;
157 const bpeVocabLen = Module.lengthBytesUTF8(config.bpeVocab || '') + 1; 184 const bpeVocabLen = Module.lengthBytesUTF8(config.bpeVocab || '') + 1;
158 185
@@ -161,13 +188,13 @@ function initSherpaOnnxOnlineModelConfig(config, Module) { @@ -161,13 +188,13 @@ function initSherpaOnnxOnlineModelConfig(config, Module) {
161 const buffer = Module._malloc(bufferLen); 188 const buffer = Module._malloc(bufferLen);
162 189
163 offset = 0; 190 offset = 0;
164 - Module.stringToUTF8(config.tokens, buffer, tokensLen); 191 + Module.stringToUTF8(config.tokens || '', buffer, tokensLen);
165 offset += tokensLen; 192 offset += tokensLen;
166 193
167 - Module.stringToUTF8(config.provider, buffer + offset, providerLen); 194 + Module.stringToUTF8(config.provider || 'cpu', buffer + offset, providerLen);
168 offset += providerLen; 195 offset += providerLen;
169 196
170 - Module.stringToUTF8(config.modelType, buffer + offset, modelTypeLen); 197 + Module.stringToUTF8(config.modelType || '', buffer + offset, modelTypeLen);
171 offset += modelTypeLen; 198 offset += modelTypeLen;
172 199
173 Module.stringToUTF8( 200 Module.stringToUTF8(
@@ -181,13 +208,13 @@ function initSherpaOnnxOnlineModelConfig(config, Module) { @@ -181,13 +208,13 @@ function initSherpaOnnxOnlineModelConfig(config, Module) {
181 Module.setValue(ptr + offset, buffer, 'i8*'); // tokens 208 Module.setValue(ptr + offset, buffer, 'i8*'); // tokens
182 offset += 4; 209 offset += 4;
183 210
184 - Module.setValue(ptr + offset, config.numThreads, 'i32'); 211 + Module.setValue(ptr + offset, config.numThreads || 1, 'i32');
185 offset += 4; 212 offset += 4;
186 213
187 Module.setValue(ptr + offset, buffer + tokensLen, 'i8*'); // provider 214 Module.setValue(ptr + offset, buffer + tokensLen, 'i8*'); // provider
188 offset += 4; 215 offset += 4;
189 216
190 - Module.setValue(ptr + offset, config.debug, 'i32'); 217 + Module.setValue(ptr + offset, config.debug || 0, 'i32');
191 offset += 4; 218 offset += 4;
192 219
193 Module.setValue( 220 Module.setValue(
@@ -215,8 +242,8 @@ function initSherpaOnnxFeatureConfig(config, Module) { @@ -215,8 +242,8 @@ function initSherpaOnnxFeatureConfig(config, Module) {
215 const len = 2 * 4; // 2 pointers 242 const len = 2 * 4; // 2 pointers
216 const ptr = Module._malloc(len); 243 const ptr = Module._malloc(len);
217 244
218 - Module.setValue(ptr, config.sampleRate, 'i32');  
219 - Module.setValue(ptr + 4, config.featureDim, 'i32'); 245 + Module.setValue(ptr, config.sampleRate || 16000, 'i32');
  246 + Module.setValue(ptr + 4, config.featureDim || 80, 'i32');
220 return {ptr: ptr, len: len}; 247 return {ptr: ptr, len: len};
221 } 248 }
222 249
@@ -224,16 +251,30 @@ function initSherpaOnnxOnlineCtcFstDecoderConfig(config, Module) { @@ -224,16 +251,30 @@ function initSherpaOnnxOnlineCtcFstDecoderConfig(config, Module) {
224 const len = 2 * 4; 251 const len = 2 * 4;
225 const ptr = Module._malloc(len); 252 const ptr = Module._malloc(len);
226 253
227 - const graphLen = Module.lengthBytesUTF8(config.graph) + 1; 254 + const graphLen = Module.lengthBytesUTF8(config.graph || '') + 1;
228 const buffer = Module._malloc(graphLen); 255 const buffer = Module._malloc(graphLen);
229 Module.stringToUTF8(config.graph, buffer, graphLen); 256 Module.stringToUTF8(config.graph, buffer, graphLen);
230 257
231 Module.setValue(ptr, buffer, 'i8*'); 258 Module.setValue(ptr, buffer, 'i8*');
232 - Module.setValue(ptr + 4, config.maxActive, 'i32'); 259 + Module.setValue(ptr + 4, config.maxActive || 3000, 'i32');
233 return {ptr: ptr, len: len, buffer: buffer}; 260 return {ptr: ptr, len: len, buffer: buffer};
234 } 261 }
235 262
236 function initSherpaOnnxOnlineRecognizerConfig(config, Module) { 263 function initSherpaOnnxOnlineRecognizerConfig(config, Module) {
  264 + if (!('featConfig' in config)) {
  265 + config.featConfig = {
  266 + sampleRate: 16000,
  267 + featureDim: 80,
  268 + };
  269 + }
  270 +
  271 + if (!('ctcFstDecoderConfig' in config)) {
  272 + config.ctcFstDecoderConfig = {
  273 + graph: '',
  274 + maxActive: 3000,
  275 + };
  276 + }
  277 +
237 const feat = initSherpaOnnxFeatureConfig(config.featConfig, Module); 278 const feat = initSherpaOnnxFeatureConfig(config.featConfig, Module);
238 const model = initSherpaOnnxOnlineModelConfig(config.modelConfig, Module); 279 const model = initSherpaOnnxOnlineModelConfig(config.modelConfig, Module);
239 const ctcFstDecoder = initSherpaOnnxOnlineCtcFstDecoderConfig( 280 const ctcFstDecoder = initSherpaOnnxOnlineCtcFstDecoderConfig(
@@ -249,8 +290,9 @@ function initSherpaOnnxOnlineRecognizerConfig(config, Module) { @@ -249,8 +290,9 @@ function initSherpaOnnxOnlineRecognizerConfig(config, Module) {
249 Module._CopyHeap(model.ptr, model.len, ptr + offset); 290 Module._CopyHeap(model.ptr, model.len, ptr + offset);
250 offset += model.len; 291 offset += model.len;
251 292
252 - const decodingMethodLen = Module.lengthBytesUTF8(config.decodingMethod) + 1;  
253 - const hotwordsFileLen = Module.lengthBytesUTF8(config.hotwordsFile) + 1; 293 + const decodingMethodLen =
  294 + Module.lengthBytesUTF8(config.decodingMethod || 'greedy_search') + 1;
  295 + const hotwordsFileLen = Module.lengthBytesUTF8(config.hotwordsFile || '') + 1;
254 const ruleFstsFileLen = Module.lengthBytesUTF8(config.ruleFsts || '') + 1; 296 const ruleFstsFileLen = Module.lengthBytesUTF8(config.ruleFsts || '') + 1;
255 const ruleFarsFileLen = Module.lengthBytesUTF8(config.ruleFars || '') + 1; 297 const ruleFarsFileLen = Module.lengthBytesUTF8(config.ruleFars || '') + 1;
256 const bufferLen = 298 const bufferLen =
@@ -258,10 +300,12 @@ function initSherpaOnnxOnlineRecognizerConfig(config, Module) { @@ -258,10 +300,12 @@ function initSherpaOnnxOnlineRecognizerConfig(config, Module) {
258 const buffer = Module._malloc(bufferLen); 300 const buffer = Module._malloc(bufferLen);
259 301
260 offset = 0; 302 offset = 0;
261 - Module.stringToUTF8(config.decodingMethod, buffer, decodingMethodLen); 303 + Module.stringToUTF8(
  304 + config.decodingMethod || 'greedy_search', buffer, decodingMethodLen);
262 offset += decodingMethodLen; 305 offset += decodingMethodLen;
263 306
264 - Module.stringToUTF8(config.hotwordsFile, buffer + offset, hotwordsFileLen); 307 + Module.stringToUTF8(
  308 + config.hotwordsFile || '', buffer + offset, hotwordsFileLen);
265 offset += hotwordsFileLen; 309 offset += hotwordsFileLen;
266 310
267 Module.stringToUTF8(config.ruleFsts || '', buffer + offset, ruleFstsFileLen); 311 Module.stringToUTF8(config.ruleFsts || '', buffer + offset, ruleFstsFileLen);
@@ -274,25 +318,25 @@ function initSherpaOnnxOnlineRecognizerConfig(config, Module) { @@ -274,25 +318,25 @@ function initSherpaOnnxOnlineRecognizerConfig(config, Module) {
274 Module.setValue(ptr + offset, buffer, 'i8*'); // decoding method 318 Module.setValue(ptr + offset, buffer, 'i8*'); // decoding method
275 offset += 4; 319 offset += 4;
276 320
277 - Module.setValue(ptr + offset, config.maxActivePaths, 'i32'); 321 + Module.setValue(ptr + offset, config.maxActivePaths || 4, 'i32');
278 offset += 4; 322 offset += 4;
279 323
280 - Module.setValue(ptr + offset, config.enableEndpoint, 'i32'); 324 + Module.setValue(ptr + offset, config.enableEndpoint || 0, 'i32');
281 offset += 4; 325 offset += 4;
282 326
283 - Module.setValue(ptr + offset, config.rule1MinTrailingSilence, 'float'); 327 + Module.setValue(ptr + offset, config.rule1MinTrailingSilence || 2.4, 'float');
284 offset += 4; 328 offset += 4;
285 329
286 - Module.setValue(ptr + offset, config.rule2MinTrailingSilence, 'float'); 330 + Module.setValue(ptr + offset, config.rule2MinTrailingSilence || 1.2, 'float');
287 offset += 4; 331 offset += 4;
288 332
289 - Module.setValue(ptr + offset, config.rule3MinUtteranceLength, 'float'); 333 + Module.setValue(ptr + offset, config.rule3MinUtteranceLength || 20, 'float');
290 offset += 4; 334 offset += 4;
291 335
292 Module.setValue(ptr + offset, buffer + decodingMethodLen, 'i8*'); 336 Module.setValue(ptr + offset, buffer + decodingMethodLen, 'i8*');
293 offset += 4; 337 offset += 4;
294 338
295 - Module.setValue(ptr + offset, config.hotwordsScore, 'float'); 339 + Module.setValue(ptr + offset, config.hotwordsScore || 1.5, 'float');
296 offset += 4; 340 offset += 4;
297 341
298 Module._CopyHeap(ctcFstDecoder.ptr, ctcFstDecoder.len, ptr + offset); 342 Module._CopyHeap(ctcFstDecoder.ptr, ctcFstDecoder.len, ptr + offset);
@@ -313,7 +357,6 @@ function initSherpaOnnxOnlineRecognizerConfig(config, Module) { @@ -313,7 +357,6 @@ function initSherpaOnnxOnlineRecognizerConfig(config, Module) {
313 } 357 }
314 } 358 }
315 359
316 -  
317 function createOnlineRecognizer(Module, myConfig) { 360 function createOnlineRecognizer(Module, myConfig) {
318 const onlineTransducerModelConfig = { 361 const onlineTransducerModelConfig = {
319 encoder: '', 362 encoder: '',
@@ -395,9 +438,9 @@ function createOnlineRecognizer(Module, myConfig) { @@ -395,9 +438,9 @@ function createOnlineRecognizer(Module, myConfig) {
395 } 438 }
396 439
397 function initSherpaOnnxOfflineTransducerModelConfig(config, Module) { 440 function initSherpaOnnxOfflineTransducerModelConfig(config, Module) {
398 - const encoderLen = Module.lengthBytesUTF8(config.encoder) + 1;  
399 - const decoderLen = Module.lengthBytesUTF8(config.decoder) + 1;  
400 - const joinerLen = Module.lengthBytesUTF8(config.joiner) + 1; 441 + const encoderLen = Module.lengthBytesUTF8(config.encoder || '') + 1;
  442 + const decoderLen = Module.lengthBytesUTF8(config.decoder || '') + 1;
  443 + const joinerLen = Module.lengthBytesUTF8(config.joiner || '') + 1;
401 444
402 const n = encoderLen + decoderLen + joinerLen; 445 const n = encoderLen + decoderLen + joinerLen;
403 446
@@ -407,13 +450,13 @@ function initSherpaOnnxOfflineTransducerModelConfig(config, Module) { @@ -407,13 +450,13 @@ function initSherpaOnnxOfflineTransducerModelConfig(config, Module) {
407 const ptr = Module._malloc(len); 450 const ptr = Module._malloc(len);
408 451
409 let offset = 0; 452 let offset = 0;
410 - Module.stringToUTF8(config.encoder, buffer + offset, encoderLen); 453 + Module.stringToUTF8(config.encoder || '', buffer + offset, encoderLen);
411 offset += encoderLen; 454 offset += encoderLen;
412 455
413 - Module.stringToUTF8(config.decoder, buffer + offset, decoderLen); 456 + Module.stringToUTF8(config.decoder || '', buffer + offset, decoderLen);
414 offset += decoderLen; 457 offset += decoderLen;
415 458
416 - Module.stringToUTF8(config.joiner, buffer + offset, joinerLen); 459 + Module.stringToUTF8(config.joiner || '', buffer + offset, joinerLen);
417 460
418 offset = 0; 461 offset = 0;
419 Module.setValue(ptr, buffer + offset, 'i8*'); 462 Module.setValue(ptr, buffer + offset, 'i8*');
@@ -430,14 +473,14 @@ function initSherpaOnnxOfflineTransducerModelConfig(config, Module) { @@ -430,14 +473,14 @@ function initSherpaOnnxOfflineTransducerModelConfig(config, Module) {
430 } 473 }
431 474
432 function initSherpaOnnxOfflineParaformerModelConfig(config, Module) { 475 function initSherpaOnnxOfflineParaformerModelConfig(config, Module) {
433 - const n = Module.lengthBytesUTF8(config.model) + 1; 476 + const n = Module.lengthBytesUTF8(config.model || '') + 1;
434 477
435 const buffer = Module._malloc(n); 478 const buffer = Module._malloc(n);
436 479
437 const len = 1 * 4; // 1 pointer 480 const len = 1 * 4; // 1 pointer
438 const ptr = Module._malloc(len); 481 const ptr = Module._malloc(len);
439 482
440 - Module.stringToUTF8(config.model, buffer, n); 483 + Module.stringToUTF8(config.model || '', buffer, n);
441 484
442 Module.setValue(ptr, buffer, 'i8*'); 485 Module.setValue(ptr, buffer, 'i8*');
443 486
@@ -447,14 +490,14 @@ function initSherpaOnnxOfflineParaformerModelConfig(config, Module) { @@ -447,14 +490,14 @@ function initSherpaOnnxOfflineParaformerModelConfig(config, Module) {
447 } 490 }
448 491
449 function initSherpaOnnxOfflineNemoEncDecCtcModelConfig(config, Module) { 492 function initSherpaOnnxOfflineNemoEncDecCtcModelConfig(config, Module) {
450 - const n = Module.lengthBytesUTF8(config.model) + 1; 493 + const n = Module.lengthBytesUTF8(config.model || '') + 1;
451 494
452 const buffer = Module._malloc(n); 495 const buffer = Module._malloc(n);
453 496
454 const len = 1 * 4; // 1 pointer 497 const len = 1 * 4; // 1 pointer
455 const ptr = Module._malloc(len); 498 const ptr = Module._malloc(len);
456 499
457 - Module.stringToUTF8(config.model, buffer, n); 500 + Module.stringToUTF8(config.model || '', buffer, n);
458 501
459 Module.setValue(ptr, buffer, 'i8*'); 502 Module.setValue(ptr, buffer, 'i8*');
460 503
@@ -464,10 +507,10 @@ function initSherpaOnnxOfflineNemoEncDecCtcModelConfig(config, Module) { @@ -464,10 +507,10 @@ function initSherpaOnnxOfflineNemoEncDecCtcModelConfig(config, Module) {
464 } 507 }
465 508
466 function initSherpaOnnxOfflineWhisperModelConfig(config, Module) { 509 function initSherpaOnnxOfflineWhisperModelConfig(config, Module) {
467 - const encoderLen = Module.lengthBytesUTF8(config.encoder) + 1;  
468 - const decoderLen = Module.lengthBytesUTF8(config.decoder) + 1;  
469 - const languageLen = Module.lengthBytesUTF8(config.language) + 1;  
470 - const taskLen = Module.lengthBytesUTF8(config.task) + 1; 510 + const encoderLen = Module.lengthBytesUTF8(config.encoder || '') + 1;
  511 + const decoderLen = Module.lengthBytesUTF8(config.decoder || '') + 1;
  512 + const languageLen = Module.lengthBytesUTF8(config.language || '') + 1;
  513 + const taskLen = Module.lengthBytesUTF8(config.task || '') + 1;
471 514
472 const n = encoderLen + decoderLen + languageLen + taskLen; 515 const n = encoderLen + decoderLen + languageLen + taskLen;
473 const buffer = Module._malloc(n); 516 const buffer = Module._malloc(n);
@@ -476,16 +519,16 @@ function initSherpaOnnxOfflineWhisperModelConfig(config, Module) { @@ -476,16 +519,16 @@ function initSherpaOnnxOfflineWhisperModelConfig(config, Module) {
476 const ptr = Module._malloc(len); 519 const ptr = Module._malloc(len);
477 520
478 let offset = 0; 521 let offset = 0;
479 - Module.stringToUTF8(config.encoder, buffer + offset, encoderLen); 522 + Module.stringToUTF8(config.encoder || '', buffer + offset, encoderLen);
480 offset += encoderLen; 523 offset += encoderLen;
481 524
482 - Module.stringToUTF8(config.decoder, buffer + offset, decoderLen); 525 + Module.stringToUTF8(config.decoder || '', buffer + offset, decoderLen);
483 offset += decoderLen; 526 offset += decoderLen;
484 527
485 - Module.stringToUTF8(config.language, buffer + offset, languageLen); 528 + Module.stringToUTF8(config.language || '', buffer + offset, languageLen);
486 offset += languageLen; 529 offset += languageLen;
487 530
488 - Module.stringToUTF8(config.task, buffer + offset, taskLen); 531 + Module.stringToUTF8(config.task || '', buffer + offset, taskLen);
489 532
490 offset = 0; 533 offset = 0;
491 Module.setValue(ptr, buffer + offset, 'i8*'); 534 Module.setValue(ptr, buffer + offset, 'i8*');
@@ -508,13 +551,13 @@ function initSherpaOnnxOfflineWhisperModelConfig(config, Module) { @@ -508,13 +551,13 @@ function initSherpaOnnxOfflineWhisperModelConfig(config, Module) {
508 } 551 }
509 552
510 function initSherpaOnnxOfflineTdnnModelConfig(config, Module) { 553 function initSherpaOnnxOfflineTdnnModelConfig(config, Module) {
511 - const n = Module.lengthBytesUTF8(config.model) + 1; 554 + const n = Module.lengthBytesUTF8(config.model || '') + 1;
512 const buffer = Module._malloc(n); 555 const buffer = Module._malloc(n);
513 556
514 const len = 1 * 4; // 1 pointer 557 const len = 1 * 4; // 1 pointer
515 const ptr = Module._malloc(len); 558 const ptr = Module._malloc(len);
516 559
517 - Module.stringToUTF8(config.model, buffer, n); 560 + Module.stringToUTF8(config.model || '', buffer, n);
518 561
519 Module.setValue(ptr, buffer, 'i8*'); 562 Module.setValue(ptr, buffer, 'i8*');
520 563
@@ -523,16 +566,48 @@ function initSherpaOnnxOfflineTdnnModelConfig(config, Module) { @@ -523,16 +566,48 @@ function initSherpaOnnxOfflineTdnnModelConfig(config, Module) {
523 } 566 }
524 } 567 }
525 568
  569 +function initSherpaOnnxOfflineSenseVoiceModelConfig(config, Module) {
  570 + const modelLen = Module.lengthBytesUTF8(config.model || '') + 1;
  571 + const languageLen = Module.lengthBytesUTF8(config.language || '') + 1;
  572 +
  573 + // useItn is a integer with 4 bytes
  574 + const n = modelLen + languageLen;
  575 + const buffer = Module._malloc(n);
  576 +
  577 + const len = 3 * 4; // 2 pointers + 1 int
  578 + const ptr = Module._malloc(len);
  579 +
  580 + let offset = 0;
  581 + Module.stringToUTF8(config.model || '', buffer + offset, modelLen);
  582 + offset += modelLen;
  583 +
  584 + Module.stringToUTF8(config.language || '', buffer + offset, languageLen);
  585 + offset += languageLen;
  586 +
  587 + offset = 0;
  588 + Module.setValue(ptr, buffer + offset, 'i8*');
  589 + offset += modelLen;
  590 +
  591 + Module.setValue(ptr + 4, buffer + offset, 'i8*');
  592 + offset += languageLen;
  593 +
  594 + Module.setValue(ptr + 8, config.useInverseTextNormalization || 0, 'i32');
  595 +
  596 + return {
  597 + buffer: buffer, ptr: ptr, len: len,
  598 + }
  599 +}
  600 +
526 function initSherpaOnnxOfflineLMConfig(config, Module) { 601 function initSherpaOnnxOfflineLMConfig(config, Module) {
527 - const n = Module.lengthBytesUTF8(config.model) + 1; 602 + const n = Module.lengthBytesUTF8(config.model || '') + 1;
528 const buffer = Module._malloc(n); 603 const buffer = Module._malloc(n);
529 604
530 const len = 2 * 4; 605 const len = 2 * 4;
531 const ptr = Module._malloc(len); 606 const ptr = Module._malloc(len);
532 607
533 - Module.stringToUTF8(config.model, buffer, n); 608 + Module.stringToUTF8(config.model || '', buffer, n);
534 Module.setValue(ptr, buffer, 'i8*'); 609 Module.setValue(ptr, buffer, 'i8*');
535 - Module.setValue(ptr + 4, config.scale, 'float'); 610 + Module.setValue(ptr + 4, config.scale || 1, 'float');
536 611
537 return { 612 return {
538 buffer: buffer, ptr: ptr, len: len, 613 buffer: buffer, ptr: ptr, len: len,
@@ -540,18 +615,70 @@ function initSherpaOnnxOfflineLMConfig(config, Module) { @@ -540,18 +615,70 @@ function initSherpaOnnxOfflineLMConfig(config, Module) {
540 } 615 }
541 616
542 function initSherpaOnnxOfflineModelConfig(config, Module) { 617 function initSherpaOnnxOfflineModelConfig(config, Module) {
  618 + if (!('transducer' in config)) {
  619 + config.transducer = {
  620 + encoder: '',
  621 + decoder: '',
  622 + joiner: '',
  623 + };
  624 + }
  625 +
  626 + if (!('paraformer' in config)) {
  627 + config.paraformer = {
  628 + model: '',
  629 + };
  630 + }
  631 +
  632 + if (!('nemoCtc' in config)) {
  633 + config.nemoCtc = {
  634 + model: '',
  635 + };
  636 + }
  637 +
  638 + if (!('whisper' in config)) {
  639 + config.whisper = {
  640 + encoder: '',
  641 + decoder: '',
  642 + language: '',
  643 + task: '',
  644 + tailPaddings: -1,
  645 + };
  646 + }
  647 +
  648 + if (!('tdnn' in config)) {
  649 + config.tdnn = {
  650 + model: '',
  651 + };
  652 + }
  653 +
  654 + if (!('senseVoice' in config)) {
  655 + config.senseVoice = {
  656 + model: '',
  657 + language: '',
  658 + useInverseTextNormalization: 0,
  659 + };
  660 + }
  661 +
543 const transducer = 662 const transducer =
544 initSherpaOnnxOfflineTransducerModelConfig(config.transducer, Module); 663 initSherpaOnnxOfflineTransducerModelConfig(config.transducer, Module);
  664 +
545 const paraformer = 665 const paraformer =
546 initSherpaOnnxOfflineParaformerModelConfig(config.paraformer, Module); 666 initSherpaOnnxOfflineParaformerModelConfig(config.paraformer, Module);
  667 +
547 const nemoCtc = 668 const nemoCtc =
548 initSherpaOnnxOfflineNemoEncDecCtcModelConfig(config.nemoCtc, Module); 669 initSherpaOnnxOfflineNemoEncDecCtcModelConfig(config.nemoCtc, Module);
  670 +
549 const whisper = 671 const whisper =
550 initSherpaOnnxOfflineWhisperModelConfig(config.whisper, Module); 672 initSherpaOnnxOfflineWhisperModelConfig(config.whisper, Module);
  673 +
551 const tdnn = initSherpaOnnxOfflineTdnnModelConfig(config.tdnn, Module); 674 const tdnn = initSherpaOnnxOfflineTdnnModelConfig(config.tdnn, Module);
552 675
  676 + const senseVoice =
  677 + initSherpaOnnxOfflineSenseVoiceModelConfig(config.senseVoice, Module);
  678 +
553 const len = transducer.len + paraformer.len + nemoCtc.len + whisper.len + 679 const len = transducer.len + paraformer.len + nemoCtc.len + whisper.len +
554 - tdnn.len + 8 * 4; 680 + tdnn.len + 8 * 4 + senseVoice.len;
  681 +
555 const ptr = Module._malloc(len); 682 const ptr = Module._malloc(len);
556 683
557 let offset = 0; 684 let offset = 0;
@@ -570,9 +697,10 @@ function initSherpaOnnxOfflineModelConfig(config, Module) { @@ -570,9 +697,10 @@ function initSherpaOnnxOfflineModelConfig(config, Module) {
570 Module._CopyHeap(tdnn.ptr, tdnn.len, ptr + offset); 697 Module._CopyHeap(tdnn.ptr, tdnn.len, ptr + offset);
571 offset += tdnn.len; 698 offset += tdnn.len;
572 699
573 - const tokensLen = Module.lengthBytesUTF8(config.tokens) + 1;  
574 - const providerLen = Module.lengthBytesUTF8(config.provider) + 1;  
575 - const modelTypeLen = Module.lengthBytesUTF8(config.modelType) + 1; 700 +
  701 + const tokensLen = Module.lengthBytesUTF8(config.tokens || '') + 1;
  702 + const providerLen = Module.lengthBytesUTF8(config.provider || 'cpu') + 1;
  703 + const modelTypeLen = Module.lengthBytesUTF8(config.modelType || '') + 1;
576 const modelingUnitLen = Module.lengthBytesUTF8(config.modelingUnit || '') + 1; 704 const modelingUnitLen = Module.lengthBytesUTF8(config.modelingUnit || '') + 1;
577 const bpeVocabLen = Module.lengthBytesUTF8(config.bpeVocab || '') + 1; 705 const bpeVocabLen = Module.lengthBytesUTF8(config.bpeVocab || '') + 1;
578 const teleSpeechCtcLen = 706 const teleSpeechCtcLen =
@@ -580,16 +708,17 @@ function initSherpaOnnxOfflineModelConfig(config, Module) { @@ -580,16 +708,17 @@ function initSherpaOnnxOfflineModelConfig(config, Module) {
580 708
581 const bufferLen = tokensLen + providerLen + modelTypeLen + modelingUnitLen + 709 const bufferLen = tokensLen + providerLen + modelTypeLen + modelingUnitLen +
582 bpeVocabLen + teleSpeechCtcLen; 710 bpeVocabLen + teleSpeechCtcLen;
  711 +
583 const buffer = Module._malloc(bufferLen); 712 const buffer = Module._malloc(bufferLen);
584 713
585 offset = 0; 714 offset = 0;
586 Module.stringToUTF8(config.tokens, buffer, tokensLen); 715 Module.stringToUTF8(config.tokens, buffer, tokensLen);
587 offset += tokensLen; 716 offset += tokensLen;
588 717
589 - Module.stringToUTF8(config.provider, buffer + offset, providerLen); 718 + Module.stringToUTF8(config.provider || 'cpu', buffer + offset, providerLen);
590 offset += providerLen; 719 offset += providerLen;
591 720
592 - Module.stringToUTF8(config.modelType, buffer + offset, modelTypeLen); 721 + Module.stringToUTF8(config.modelType || '', buffer + offset, modelTypeLen);
593 offset += modelTypeLen; 722 offset += modelTypeLen;
594 723
595 Module.stringToUTF8( 724 Module.stringToUTF8(
@@ -608,10 +737,10 @@ function initSherpaOnnxOfflineModelConfig(config, Module) { @@ -608,10 +737,10 @@ function initSherpaOnnxOfflineModelConfig(config, Module) {
608 Module.setValue(ptr + offset, buffer, 'i8*'); // tokens 737 Module.setValue(ptr + offset, buffer, 'i8*'); // tokens
609 offset += 4; 738 offset += 4;
610 739
611 - Module.setValue(ptr + offset, config.numThreads, 'i32'); 740 + Module.setValue(ptr + offset, config.numThreads || 1, 'i32');
612 offset += 4; 741 offset += 4;
613 742
614 - Module.setValue(ptr + offset, config.debug, 'i32'); 743 + Module.setValue(ptr + offset, config.debug || 0, 'i32');
615 offset += 4; 744 offset += 4;
616 745
617 Module.setValue(ptr + offset, buffer + tokensLen, 'i8*'); // provider 746 Module.setValue(ptr + offset, buffer + tokensLen, 'i8*'); // provider
@@ -639,13 +768,30 @@ function initSherpaOnnxOfflineModelConfig(config, Module) { @@ -639,13 +768,30 @@ function initSherpaOnnxOfflineModelConfig(config, Module) {
639 'i8*'); // teleSpeechCtc 768 'i8*'); // teleSpeechCtc
640 offset += 4; 769 offset += 4;
641 770
  771 + Module._CopyHeap(senseVoice.ptr, senseVoice.len, ptr + offset);
  772 +
642 return { 773 return {
643 buffer: buffer, ptr: ptr, len: len, transducer: transducer, 774 buffer: buffer, ptr: ptr, len: len, transducer: transducer,
644 - paraformer: paraformer, nemoCtc: nemoCtc, whisper: whisper, tdnn: tdnn 775 + paraformer: paraformer, nemoCtc: nemoCtc, whisper: whisper, tdnn: tdnn,
  776 + senseVoice: senseVoice,
645 } 777 }
646 } 778 }
647 779
648 function initSherpaOnnxOfflineRecognizerConfig(config, Module) { 780 function initSherpaOnnxOfflineRecognizerConfig(config, Module) {
  781 + if (!('featConfig' in config)) {
  782 + config.featConfig = {
  783 + sampleRate: 16000,
  784 + featureDim: 80,
  785 + };
  786 + }
  787 +
  788 + if (!('lmConfig' in config)) {
  789 + config.lmConfig = {
  790 + model: '',
  791 + scale: 1.0,
  792 + };
  793 + }
  794 +
649 const feat = initSherpaOnnxFeatureConfig(config.featConfig, Module); 795 const feat = initSherpaOnnxFeatureConfig(config.featConfig, Module);
650 const model = initSherpaOnnxOfflineModelConfig(config.modelConfig, Module); 796 const model = initSherpaOnnxOfflineModelConfig(config.modelConfig, Module);
651 const lm = initSherpaOnnxOfflineLMConfig(config.lmConfig, Module); 797 const lm = initSherpaOnnxOfflineLMConfig(config.lmConfig, Module);
@@ -663,8 +809,9 @@ function initSherpaOnnxOfflineRecognizerConfig(config, Module) { @@ -663,8 +809,9 @@ function initSherpaOnnxOfflineRecognizerConfig(config, Module) {
663 Module._CopyHeap(lm.ptr, lm.len, ptr + offset); 809 Module._CopyHeap(lm.ptr, lm.len, ptr + offset);
664 offset += lm.len; 810 offset += lm.len;
665 811
666 - const decodingMethodLen = Module.lengthBytesUTF8(config.decodingMethod) + 1;  
667 - const hotwordsFileLen = Module.lengthBytesUTF8(config.hotwordsFile) + 1; 812 + const decodingMethodLen =
  813 + Module.lengthBytesUTF8(config.decodingMethod || 'greedy_search') + 1;
  814 + const hotwordsFileLen = Module.lengthBytesUTF8(config.hotwordsFile || '') + 1;
668 const ruleFstsLen = Module.lengthBytesUTF8(config.ruleFsts || '') + 1; 815 const ruleFstsLen = Module.lengthBytesUTF8(config.ruleFsts || '') + 1;
669 const ruleFarsLen = Module.lengthBytesUTF8(config.ruleFars || '') + 1; 816 const ruleFarsLen = Module.lengthBytesUTF8(config.ruleFars || '') + 1;
670 const bufferLen = 817 const bufferLen =
@@ -672,10 +819,12 @@ function initSherpaOnnxOfflineRecognizerConfig(config, Module) { @@ -672,10 +819,12 @@ function initSherpaOnnxOfflineRecognizerConfig(config, Module) {
672 const buffer = Module._malloc(bufferLen); 819 const buffer = Module._malloc(bufferLen);
673 820
674 offset = 0; 821 offset = 0;
675 - Module.stringToUTF8(config.decodingMethod, buffer, decodingMethodLen); 822 + Module.stringToUTF8(
  823 + config.decodingMethod || 'greedy_search', buffer, decodingMethodLen);
676 offset += decodingMethodLen; 824 offset += decodingMethodLen;
677 825
678 - Module.stringToUTF8(config.hotwordsFile, buffer + offset, hotwordsFileLen); 826 + Module.stringToUTF8(
  827 + config.hotwordsFile || '', buffer + offset, hotwordsFileLen);
679 offset += hotwordsFileLen; 828 offset += hotwordsFileLen;
680 829
681 Module.stringToUTF8(config.ruleFsts || '', buffer + offset, ruleFstsLen); 830 Module.stringToUTF8(config.ruleFsts || '', buffer + offset, ruleFstsLen);
@@ -689,13 +838,13 @@ function initSherpaOnnxOfflineRecognizerConfig(config, Module) { @@ -689,13 +838,13 @@ function initSherpaOnnxOfflineRecognizerConfig(config, Module) {
689 Module.setValue(ptr + offset, buffer, 'i8*'); // decoding method 838 Module.setValue(ptr + offset, buffer, 'i8*'); // decoding method
690 offset += 4; 839 offset += 4;
691 840
692 - Module.setValue(ptr + offset, config.maxActivePaths, 'i32'); 841 + Module.setValue(ptr + offset, config.maxActivePaths || 4, 'i32');
693 offset += 4; 842 offset += 4;
694 843
695 Module.setValue(ptr + offset, buffer + decodingMethodLen, 'i8*'); 844 Module.setValue(ptr + offset, buffer + decodingMethodLen, 'i8*');
696 offset += 4; 845 offset += 4;
697 846
698 - Module.setValue(ptr + offset, config.hotwordsScore, 'float'); 847 + Module.setValue(ptr + offset, config.hotwordsScore || 1.5, 'float');
699 offset += 4; 848 offset += 4;
700 849
701 Module.setValue( 850 Module.setValue(
@@ -16,6 +16,7 @@ static_assert(sizeof(SherpaOnnxOfflineParaformerModelConfig) == 4, ""); @@ -16,6 +16,7 @@ static_assert(sizeof(SherpaOnnxOfflineParaformerModelConfig) == 4, "");
16 static_assert(sizeof(SherpaOnnxOfflineNemoEncDecCtcModelConfig) == 4, ""); 16 static_assert(sizeof(SherpaOnnxOfflineNemoEncDecCtcModelConfig) == 4, "");
17 static_assert(sizeof(SherpaOnnxOfflineWhisperModelConfig) == 5 * 4, ""); 17 static_assert(sizeof(SherpaOnnxOfflineWhisperModelConfig) == 5 * 4, "");
18 static_assert(sizeof(SherpaOnnxOfflineTdnnModelConfig) == 4, ""); 18 static_assert(sizeof(SherpaOnnxOfflineTdnnModelConfig) == 4, "");
  19 +static_assert(sizeof(SherpaOnnxOfflineSenseVoiceModelConfig) == 3 * 4, "");
19 static_assert(sizeof(SherpaOnnxOfflineLMConfig) == 2 * 4, ""); 20 static_assert(sizeof(SherpaOnnxOfflineLMConfig) == 2 * 4, "");
20 21
21 static_assert(sizeof(SherpaOnnxOfflineModelConfig) == 22 static_assert(sizeof(SherpaOnnxOfflineModelConfig) ==
@@ -23,7 +24,8 @@ static_assert(sizeof(SherpaOnnxOfflineModelConfig) == @@ -23,7 +24,8 @@ static_assert(sizeof(SherpaOnnxOfflineModelConfig) ==
23 sizeof(SherpaOnnxOfflineParaformerModelConfig) + 24 sizeof(SherpaOnnxOfflineParaformerModelConfig) +
24 sizeof(SherpaOnnxOfflineNemoEncDecCtcModelConfig) + 25 sizeof(SherpaOnnxOfflineNemoEncDecCtcModelConfig) +
25 sizeof(SherpaOnnxOfflineWhisperModelConfig) + 26 sizeof(SherpaOnnxOfflineWhisperModelConfig) +
26 - sizeof(SherpaOnnxOfflineTdnnModelConfig) + 8 * 4, 27 + sizeof(SherpaOnnxOfflineTdnnModelConfig) + 8 * 4 +
  28 + sizeof(SherpaOnnxOfflineSenseVoiceModelConfig),
27 ""); 29 "");
28 static_assert(sizeof(SherpaOnnxFeatureConfig) == 2 * 4, ""); 30 static_assert(sizeof(SherpaOnnxFeatureConfig) == 2 * 4, "");
29 static_assert(sizeof(SherpaOnnxOfflineRecognizerConfig) == 31 static_assert(sizeof(SherpaOnnxOfflineRecognizerConfig) ==
@@ -63,6 +65,7 @@ void PrintOfflineRecognizerConfig(SherpaOnnxOfflineRecognizerConfig *config) { @@ -63,6 +65,7 @@ void PrintOfflineRecognizerConfig(SherpaOnnxOfflineRecognizerConfig *config) {
63 auto nemo_ctc = &model_config->nemo_ctc; 65 auto nemo_ctc = &model_config->nemo_ctc;
64 auto whisper = &model_config->whisper; 66 auto whisper = &model_config->whisper;
65 auto tdnn = &model_config->tdnn; 67 auto tdnn = &model_config->tdnn;
  68 + auto sense_voice = &model_config->sense_voice;
66 69
67 fprintf(stdout, "----------offline transducer model config----------\n"); 70 fprintf(stdout, "----------offline transducer model config----------\n");
68 fprintf(stdout, "encoder: %s\n", transducer->encoder); 71 fprintf(stdout, "encoder: %s\n", transducer->encoder);
@@ -85,6 +88,11 @@ void PrintOfflineRecognizerConfig(SherpaOnnxOfflineRecognizerConfig *config) { @@ -85,6 +88,11 @@ void PrintOfflineRecognizerConfig(SherpaOnnxOfflineRecognizerConfig *config) {
85 fprintf(stdout, "----------offline tdnn model config----------\n"); 88 fprintf(stdout, "----------offline tdnn model config----------\n");
86 fprintf(stdout, "model: %s\n", tdnn->model); 89 fprintf(stdout, "model: %s\n", tdnn->model);
87 90
  91 + fprintf(stdout, "----------offline sense_voice model config----------\n");
  92 + fprintf(stdout, "model: %s\n", sense_voice->model);
  93 + fprintf(stdout, "language: %s\n", sense_voice->language);
  94 + fprintf(stdout, "use_itn: %d\n", sense_voice->use_itn);
  95 +
88 fprintf(stdout, "tokens: %s\n", model_config->tokens); 96 fprintf(stdout, "tokens: %s\n", model_config->tokens);
89 fprintf(stdout, "num_threads: %d\n", model_config->num_threads); 97 fprintf(stdout, "num_threads: %d\n", model_config->num_threads);
90 fprintf(stdout, "provider: %s\n", model_config->provider); 98 fprintf(stdout, "provider: %s\n", model_config->provider);
@@ -14,14 +14,10 @@ function freeConfig(config, Module) { @@ -14,14 +14,10 @@ function freeConfig(config, Module) {
14 // The user should free the returned pointers 14 // The user should free the returned pointers
15 function initSherpaOnnxOfflineTtsVitsModelConfig(config, Module) { 15 function initSherpaOnnxOfflineTtsVitsModelConfig(config, Module) {
16 const modelLen = Module.lengthBytesUTF8(config.model) + 1; 16 const modelLen = Module.lengthBytesUTF8(config.model) + 1;
17 - const lexiconLen = Module.lengthBytesUTF8(config.lexicon) + 1;  
18 - const tokensLen = Module.lengthBytesUTF8(config.tokens) + 1;  
19 - const dataDirLen = Module.lengthBytesUTF8(config.dataDir) + 1;  
20 -  
21 - if (!('dictDir' in config)) {  
22 - config.dictDir = ''  
23 - }  
24 - const dictDirLen = Module.lengthBytesUTF8(config.dictDir) + 1; 17 + const lexiconLen = Module.lengthBytesUTF8(config.lexicon || '') + 1;
  18 + const tokensLen = Module.lengthBytesUTF8(config.tokens || '') + 1;
  19 + const dataDirLen = Module.lengthBytesUTF8(config.dataDir || '') + 1;
  20 + const dictDirLen = Module.lengthBytesUTF8(config.dictDir || '') + 1;
25 21
26 const n = modelLen + lexiconLen + tokensLen + dataDirLen + dictDirLen; 22 const n = modelLen + lexiconLen + tokensLen + dataDirLen + dictDirLen;
27 23
@@ -31,19 +27,19 @@ function initSherpaOnnxOfflineTtsVitsModelConfig(config, Module) { @@ -31,19 +27,19 @@ function initSherpaOnnxOfflineTtsVitsModelConfig(config, Module) {
31 const ptr = Module._malloc(len); 27 const ptr = Module._malloc(len);
32 28
33 let offset = 0; 29 let offset = 0;
34 - Module.stringToUTF8(config.model, buffer + offset, modelLen); 30 + Module.stringToUTF8(config.model || '', buffer + offset, modelLen);
35 offset += modelLen; 31 offset += modelLen;
36 32
37 - Module.stringToUTF8(config.lexicon, buffer + offset, lexiconLen); 33 + Module.stringToUTF8(config.lexicon || '', buffer + offset, lexiconLen);
38 offset += lexiconLen; 34 offset += lexiconLen;
39 35
40 - Module.stringToUTF8(config.tokens, buffer + offset, tokensLen); 36 + Module.stringToUTF8(config.tokens || '', buffer + offset, tokensLen);
41 offset += tokensLen; 37 offset += tokensLen;
42 38
43 - Module.stringToUTF8(config.dataDir, buffer + offset, dataDirLen); 39 + Module.stringToUTF8(config.dataDir || '', buffer + offset, dataDirLen);
44 offset += dataDirLen; 40 offset += dataDirLen;
45 41
46 - Module.stringToUTF8(config.dictDir, buffer + offset, dictDirLen); 42 + Module.stringToUTF8(config.dictDir || '', buffer + offset, dictDirLen);
47 offset += dictDirLen; 43 offset += dictDirLen;
48 44
49 offset = 0; 45 offset = 0;
@@ -59,9 +55,9 @@ function initSherpaOnnxOfflineTtsVitsModelConfig(config, Module) { @@ -59,9 +55,9 @@ function initSherpaOnnxOfflineTtsVitsModelConfig(config, Module) {
59 Module.setValue(ptr + 12, buffer + offset, 'i8*'); 55 Module.setValue(ptr + 12, buffer + offset, 'i8*');
60 offset += dataDirLen; 56 offset += dataDirLen;
61 57
62 - Module.setValue(ptr + 16, config.noiseScale, 'float');  
63 - Module.setValue(ptr + 20, config.noiseScaleW, 'float');  
64 - Module.setValue(ptr + 24, config.lengthScale, 'float'); 58 + Module.setValue(ptr + 16, config.noiseScale || 0.667, 'float');
  59 + Module.setValue(ptr + 20, config.noiseScaleW || 0.8, 'float');
  60 + Module.setValue(ptr + 24, config.lengthScale || 1.0, 'float');
65 Module.setValue(ptr + 28, buffer + offset, 'i8*'); 61 Module.setValue(ptr + 28, buffer + offset, 'i8*');
66 offset += dictDirLen; 62 offset += dictDirLen;
67 63
@@ -81,13 +77,13 @@ function initSherpaOnnxOfflineTtsModelConfig(config, Module) { @@ -81,13 +77,13 @@ function initSherpaOnnxOfflineTtsModelConfig(config, Module) {
81 Module._CopyHeap(vitsModelConfig.ptr, vitsModelConfig.len, ptr + offset); 77 Module._CopyHeap(vitsModelConfig.ptr, vitsModelConfig.len, ptr + offset);
82 offset += vitsModelConfig.len; 78 offset += vitsModelConfig.len;
83 79
84 - Module.setValue(ptr + offset, config.numThreads, 'i32'); 80 + Module.setValue(ptr + offset, config.numThreads || 1, 'i32');
85 offset += 4; 81 offset += 4;
86 82
87 - Module.setValue(ptr + offset, config.debug, 'i32'); 83 + Module.setValue(ptr + offset, config.debug || 0, 'i32');
88 offset += 4; 84 offset += 4;
89 85
90 - const providerLen = Module.lengthBytesUTF8(config.provider) + 1; 86 + const providerLen = Module.lengthBytesUTF8(config.provider || 'cpu') + 1;
91 const buffer = Module._malloc(providerLen); 87 const buffer = Module._malloc(providerLen);
92 Module.stringToUTF8(config.provider, buffer, providerLen); 88 Module.stringToUTF8(config.provider, buffer, providerLen);
93 Module.setValue(ptr + offset, buffer, 'i8*'); 89 Module.setValue(ptr + offset, buffer, 'i8*');
@@ -107,17 +103,17 @@ function initSherpaOnnxOfflineTtsConfig(config, Module) { @@ -107,17 +103,17 @@ function initSherpaOnnxOfflineTtsConfig(config, Module) {
107 Module._CopyHeap(modelConfig.ptr, modelConfig.len, ptr + offset); 103 Module._CopyHeap(modelConfig.ptr, modelConfig.len, ptr + offset);
108 offset += modelConfig.len; 104 offset += modelConfig.len;
109 105
110 - const ruleFstsLen = Module.lengthBytesUTF8(config.ruleFsts) + 1;  
111 - const ruleFarsLen = Module.lengthBytesUTF8(config.ruleFars) + 1; 106 + const ruleFstsLen = Module.lengthBytesUTF8(config.ruleFsts || '') + 1;
  107 + const ruleFarsLen = Module.lengthBytesUTF8(config.ruleFars || '') + 1;
112 108
113 const buffer = Module._malloc(ruleFstsLen + ruleFarsLen); 109 const buffer = Module._malloc(ruleFstsLen + ruleFarsLen);
114 - Module.stringToUTF8(config.ruleFsts, buffer, ruleFstsLen);  
115 - Module.stringToUTF8(config.ruleFars, buffer + ruleFstsLen, ruleFarsLen); 110 + Module.stringToUTF8(config.ruleFsts || '', buffer, ruleFstsLen);
  111 + Module.stringToUTF8(config.ruleFars || '', buffer + ruleFstsLen, ruleFarsLen);
116 112
117 Module.setValue(ptr + offset, buffer, 'i8*'); 113 Module.setValue(ptr + offset, buffer, 'i8*');
118 offset += 4; 114 offset += 4;
119 115
120 - Module.setValue(ptr + offset, config.maxNumSentences, 'i32'); 116 + Module.setValue(ptr + offset, config.maxNumSentences || 1, 'i32');
121 offset += 4; 117 offset += 4;
122 118
123 Module.setValue(ptr + offset, buffer + ruleFstsLen, 'i8*'); 119 Module.setValue(ptr + offset, buffer + ruleFstsLen, 'i8*');