正在显示
21 个修改的文件
包含
383 行增加
和
351 行删除
| @@ -10,6 +10,13 @@ ls -lh | @@ -10,6 +10,13 @@ ls -lh | ||
| 10 | ls -lh node_modules | 10 | ls -lh node_modules |
| 11 | 11 | ||
| 12 | # offline asr | 12 | # offline asr |
| 13 | +# | ||
| 14 | +curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17.tar.bz2 | ||
| 15 | +tar xvf sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17.tar.bz2 | ||
| 16 | +rm sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17.tar.bz2 | ||
| 17 | + | ||
| 18 | +node ./test-offline-sense-voice.js | ||
| 19 | +rm -rf sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17 | ||
| 13 | 20 | ||
| 14 | curl -LS -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-paraformer-zh-2023-09-14.tar.bz2 | 21 | curl -LS -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-paraformer-zh-2023-09-14.tar.bz2 |
| 15 | ls -lh | 22 | ls -lh |
| @@ -48,6 +48,11 @@ jobs: | @@ -48,6 +48,11 @@ jobs: | ||
| 48 | with: | 48 | with: |
| 49 | fetch-depth: 0 | 49 | fetch-depth: 0 |
| 50 | 50 | ||
| 51 | + - name: ccache | ||
| 52 | + uses: hendrikmuhs/ccache-action@v1.2 | ||
| 53 | + with: | ||
| 54 | + key: ${{ matrix.os }}-${{ matrix.build_type }}-wasm-nodejs | ||
| 55 | + | ||
| 51 | - name: Install emsdk | 56 | - name: Install emsdk |
| 52 | uses: mymindstorm/setup-emsdk@v14 | 57 | uses: mymindstorm/setup-emsdk@v14 |
| 53 | 58 | ||
| @@ -77,6 +82,10 @@ jobs: | @@ -77,6 +82,10 @@ jobs: | ||
| 77 | env: | 82 | env: |
| 78 | NODE_AUTH_TOKEN: ${{ secrets.NPM_TOKEN }} | 83 | NODE_AUTH_TOKEN: ${{ secrets.NPM_TOKEN }} |
| 79 | run: | | 84 | run: | |
| 85 | + export CMAKE_CXX_COMPILER_LAUNCHER=ccache | ||
| 86 | + export PATH="/usr/lib/ccache:/usr/local/opt/ccache/libexec:$PATH" | ||
| 87 | + cmake --version | ||
| 88 | + | ||
| 80 | ./build-wasm-simd-nodejs.sh | 89 | ./build-wasm-simd-nodejs.sh |
| 81 | cp -v build-wasm-simd-nodejs/install/bin/wasm/nodejs/*.js ./scripts/nodejs/ | 90 | cp -v build-wasm-simd-nodejs/install/bin/wasm/nodejs/*.js ./scripts/nodejs/ |
| 82 | cp -v build-wasm-simd-nodejs/install/bin/wasm/nodejs/*.wasm ./scripts/nodejs/ | 91 | cp -v build-wasm-simd-nodejs/install/bin/wasm/nodejs/*.wasm ./scripts/nodejs/ |
| @@ -88,6 +88,21 @@ tar xvf sherpa-onnx-paraformer-zh-2023-09-14.tar.bz2 | @@ -88,6 +88,21 @@ tar xvf sherpa-onnx-paraformer-zh-2023-09-14.tar.bz2 | ||
| 88 | node ./test-offline-paraformer.js | 88 | node ./test-offline-paraformer.js |
| 89 | ``` | 89 | ``` |
| 90 | 90 | ||
| 91 | +## ./test-offline-sense-voice.js | ||
| 92 | + | ||
| 93 | +[./test-offline-sense-voice.js](./test-offline-sense-voice.js) demonstrates | ||
| 94 | +how to decode a file with a non-streaming Paraformer model. | ||
| 95 | + | ||
| 96 | +You can use the following command to run it: | ||
| 97 | + | ||
| 98 | +```bash | ||
| 99 | +wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17.tar.bz2 | ||
| 100 | +tar xvf sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17.tar.bz2 | ||
| 101 | +rm sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17.tar.bz2 | ||
| 102 | + | ||
| 103 | +node ./test-offline-sense-voice.js | ||
| 104 | +``` | ||
| 105 | + | ||
| 91 | ## ./test-offline-transducer.js | 106 | ## ./test-offline-transducer.js |
| 92 | 107 | ||
| 93 | [./test-offline-transducer.js](./test-offline-transducer.js) demonstrates | 108 | [./test-offline-transducer.js](./test-offline-transducer.js) demonstrates |
| @@ -13,27 +13,9 @@ function createOfflineRecognizer() { | @@ -13,27 +13,9 @@ function createOfflineRecognizer() { | ||
| 13 | }; | 13 | }; |
| 14 | 14 | ||
| 15 | let modelConfig = { | 15 | let modelConfig = { |
| 16 | - transducer: { | ||
| 17 | - encoder: '', | ||
| 18 | - decoder: '', | ||
| 19 | - joiner: '', | ||
| 20 | - }, | ||
| 21 | - paraformer: { | ||
| 22 | - model: '', | ||
| 23 | - }, | ||
| 24 | nemoCtc: { | 16 | nemoCtc: { |
| 25 | model: './sherpa-onnx-nemo-ctc-en-conformer-small/model.int8.onnx', | 17 | model: './sherpa-onnx-nemo-ctc-en-conformer-small/model.int8.onnx', |
| 26 | }, | 18 | }, |
| 27 | - whisper: { | ||
| 28 | - encoder: '', | ||
| 29 | - decoder: '', | ||
| 30 | - language: '', | ||
| 31 | - task: '', | ||
| 32 | - tailPaddings: -1, | ||
| 33 | - }, | ||
| 34 | - tdnn: { | ||
| 35 | - model: '', | ||
| 36 | - }, | ||
| 37 | tokens: './sherpa-onnx-nemo-ctc-en-conformer-small/tokens.txt', | 19 | tokens: './sherpa-onnx-nemo-ctc-en-conformer-small/tokens.txt', |
| 38 | numThreads: 1, | 20 | numThreads: 1, |
| 39 | debug: 0, | 21 | debug: 0, |
| @@ -41,19 +23,11 @@ function createOfflineRecognizer() { | @@ -41,19 +23,11 @@ function createOfflineRecognizer() { | ||
| 41 | modelType: 'nemo_ctc', | 23 | modelType: 'nemo_ctc', |
| 42 | }; | 24 | }; |
| 43 | 25 | ||
| 44 | - let lmConfig = { | ||
| 45 | - model: '', | ||
| 46 | - scale: 1.0, | ||
| 47 | - }; | ||
| 48 | - | ||
| 49 | let config = { | 26 | let config = { |
| 50 | featConfig: featConfig, | 27 | featConfig: featConfig, |
| 51 | modelConfig: modelConfig, | 28 | modelConfig: modelConfig, |
| 52 | - lmConfig: lmConfig, | ||
| 53 | decodingMethod: 'greedy_search', | 29 | decodingMethod: 'greedy_search', |
| 54 | maxActivePaths: 4, | 30 | maxActivePaths: 4, |
| 55 | - hotwordsFile: '', | ||
| 56 | - hotwordsScore: 1.5, | ||
| 57 | }; | 31 | }; |
| 58 | 32 | ||
| 59 | return sherpa_onnx.createOfflineRecognizer(config); | 33 | return sherpa_onnx.createOfflineRecognizer(config); |
| @@ -13,27 +13,9 @@ function createOfflineRecognizer() { | @@ -13,27 +13,9 @@ function createOfflineRecognizer() { | ||
| 13 | }; | 13 | }; |
| 14 | 14 | ||
| 15 | let modelConfig = { | 15 | let modelConfig = { |
| 16 | - transducer: { | ||
| 17 | - encoder: '', | ||
| 18 | - decoder: '', | ||
| 19 | - joiner: '', | ||
| 20 | - }, | ||
| 21 | paraformer: { | 16 | paraformer: { |
| 22 | model: './sherpa-onnx-paraformer-zh-2023-09-14/model.int8.onnx', | 17 | model: './sherpa-onnx-paraformer-zh-2023-09-14/model.int8.onnx', |
| 23 | }, | 18 | }, |
| 24 | - nemoCtc: { | ||
| 25 | - model: '', | ||
| 26 | - }, | ||
| 27 | - whisper: { | ||
| 28 | - encoder: '', | ||
| 29 | - decoder: '', | ||
| 30 | - language: '', | ||
| 31 | - task: '', | ||
| 32 | - tailPaddings: -1, | ||
| 33 | - }, | ||
| 34 | - tdnn: { | ||
| 35 | - model: '', | ||
| 36 | - }, | ||
| 37 | tokens: './sherpa-onnx-paraformer-zh-2023-09-14/tokens.txt', | 19 | tokens: './sherpa-onnx-paraformer-zh-2023-09-14/tokens.txt', |
| 38 | numThreads: 1, | 20 | numThreads: 1, |
| 39 | debug: 0, | 21 | debug: 0, |
| @@ -41,19 +23,11 @@ function createOfflineRecognizer() { | @@ -41,19 +23,11 @@ function createOfflineRecognizer() { | ||
| 41 | modelType: 'paraformer', | 23 | modelType: 'paraformer', |
| 42 | }; | 24 | }; |
| 43 | 25 | ||
| 44 | - let lmConfig = { | ||
| 45 | - model: '', | ||
| 46 | - scale: 1.0, | ||
| 47 | - }; | ||
| 48 | 26 | ||
| 49 | let config = { | 27 | let config = { |
| 50 | featConfig: featConfig, | 28 | featConfig: featConfig, |
| 51 | modelConfig: modelConfig, | 29 | modelConfig: modelConfig, |
| 52 | - lmConfig: lmConfig, | ||
| 53 | decodingMethod: 'greedy_search', | 30 | decodingMethod: 'greedy_search', |
| 54 | - maxActivePaths: 4, | ||
| 55 | - hotwordsFile: '', | ||
| 56 | - hotwordsScore: 1.5, | ||
| 57 | // https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/itn_zh_number.fst | 31 | // https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/itn_zh_number.fst |
| 58 | ruleFsts: './itn_zh_number.fst', | 32 | ruleFsts: './itn_zh_number.fst', |
| 59 | }; | 33 | }; |
| @@ -13,27 +13,9 @@ function createOfflineRecognizer() { | @@ -13,27 +13,9 @@ function createOfflineRecognizer() { | ||
| 13 | }; | 13 | }; |
| 14 | 14 | ||
| 15 | let modelConfig = { | 15 | let modelConfig = { |
| 16 | - transducer: { | ||
| 17 | - encoder: '', | ||
| 18 | - decoder: '', | ||
| 19 | - joiner: '', | ||
| 20 | - }, | ||
| 21 | paraformer: { | 16 | paraformer: { |
| 22 | model: './sherpa-onnx-paraformer-zh-2023-09-14/model.int8.onnx', | 17 | model: './sherpa-onnx-paraformer-zh-2023-09-14/model.int8.onnx', |
| 23 | }, | 18 | }, |
| 24 | - nemoCtc: { | ||
| 25 | - model: '', | ||
| 26 | - }, | ||
| 27 | - whisper: { | ||
| 28 | - encoder: '', | ||
| 29 | - decoder: '', | ||
| 30 | - language: '', | ||
| 31 | - task: '', | ||
| 32 | - tailPaddings: -1, | ||
| 33 | - }, | ||
| 34 | - tdnn: { | ||
| 35 | - model: '', | ||
| 36 | - }, | ||
| 37 | tokens: './sherpa-onnx-paraformer-zh-2023-09-14/tokens.txt', | 19 | tokens: './sherpa-onnx-paraformer-zh-2023-09-14/tokens.txt', |
| 38 | numThreads: 1, | 20 | numThreads: 1, |
| 39 | debug: 0, | 21 | debug: 0, |
| @@ -41,19 +23,10 @@ function createOfflineRecognizer() { | @@ -41,19 +23,10 @@ function createOfflineRecognizer() { | ||
| 41 | modelType: 'paraformer', | 23 | modelType: 'paraformer', |
| 42 | }; | 24 | }; |
| 43 | 25 | ||
| 44 | - let lmConfig = { | ||
| 45 | - model: '', | ||
| 46 | - scale: 1.0, | ||
| 47 | - }; | ||
| 48 | - | ||
| 49 | let config = { | 26 | let config = { |
| 50 | featConfig: featConfig, | 27 | featConfig: featConfig, |
| 51 | modelConfig: modelConfig, | 28 | modelConfig: modelConfig, |
| 52 | - lmConfig: lmConfig, | ||
| 53 | decodingMethod: 'greedy_search', | 29 | decodingMethod: 'greedy_search', |
| 54 | - maxActivePaths: 4, | ||
| 55 | - hotwordsFile: '', | ||
| 56 | - hotwordsScore: 1.5, | ||
| 57 | }; | 30 | }; |
| 58 | 31 | ||
| 59 | return sherpa_onnx.createOfflineRecognizer(config); | 32 | return sherpa_onnx.createOfflineRecognizer(config); |
nodejs-examples/test-offline-sense-voice.js
0 → 100644
| 1 | +// Copyright (c) 2024 Xiaomi Corporation (authors: Fangjun Kuang) | ||
| 2 | + | ||
| 3 | +const fs = require('fs'); | ||
| 4 | +const {Readable} = require('stream'); | ||
| 5 | +const wav = require('wav'); | ||
| 6 | + | ||
| 7 | +const sherpa_onnx = require('sherpa-onnx'); | ||
| 8 | + | ||
| 9 | +function createOfflineRecognizer() { | ||
| 10 | + let featConfig = { | ||
| 11 | + sampleRate: 16000, | ||
| 12 | + featureDim: 80, | ||
| 13 | + }; | ||
| 14 | + | ||
| 15 | + let modelConfig = { | ||
| 16 | + senseVoice: { | ||
| 17 | + model: | ||
| 18 | + './sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/model.int8.onnx', | ||
| 19 | + language: '', | ||
| 20 | + useInverseTextNormalization: 1, | ||
| 21 | + }, | ||
| 22 | + tokens: './sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/tokens.txt', | ||
| 23 | + numThreads: 1, | ||
| 24 | + debug: 0, | ||
| 25 | + provider: 'cpu', | ||
| 26 | + }; | ||
| 27 | + | ||
| 28 | + let config = { | ||
| 29 | + featConfig: featConfig, | ||
| 30 | + modelConfig: modelConfig, | ||
| 31 | + decodingMethod: 'greedy_search', | ||
| 32 | + }; | ||
| 33 | + | ||
| 34 | + return sherpa_onnx.createOfflineRecognizer(config); | ||
| 35 | +} | ||
| 36 | + | ||
| 37 | + | ||
| 38 | +const recognizer = createOfflineRecognizer(); | ||
| 39 | +const stream = recognizer.createStream(); | ||
| 40 | + | ||
| 41 | +const waveFilename = | ||
| 42 | + './sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/test_wavs/zh.wav'; | ||
| 43 | + | ||
| 44 | +const reader = new wav.Reader(); | ||
| 45 | +const readable = new Readable().wrap(reader); | ||
| 46 | +const buf = []; | ||
| 47 | + | ||
| 48 | +reader.on('format', ({audioFormat, bitDepth, channels, sampleRate}) => { | ||
| 49 | + if (sampleRate != recognizer.config.featConfig.sampleRate) { | ||
| 50 | + throw new Error(`Only support sampleRate ${ | ||
| 51 | + recognizer.config.featConfig.sampleRate}. Given ${sampleRate}`); | ||
| 52 | + } | ||
| 53 | + | ||
| 54 | + if (audioFormat != 1) { | ||
| 55 | + throw new Error(`Only support PCM format. Given ${audioFormat}`); | ||
| 56 | + } | ||
| 57 | + | ||
| 58 | + if (channels != 1) { | ||
| 59 | + throw new Error(`Only a single channel. Given ${channel}`); | ||
| 60 | + } | ||
| 61 | + | ||
| 62 | + if (bitDepth != 16) { | ||
| 63 | + throw new Error(`Only support 16-bit samples. Given ${bitDepth}`); | ||
| 64 | + } | ||
| 65 | +}); | ||
| 66 | + | ||
| 67 | +fs.createReadStream(waveFilename, {'highWaterMark': 4096}) | ||
| 68 | + .pipe(reader) | ||
| 69 | + .on('finish', function(err) { | ||
| 70 | + // tail padding | ||
| 71 | + const floatSamples = | ||
| 72 | + new Float32Array(recognizer.config.featConfig.sampleRate * 0.5); | ||
| 73 | + | ||
| 74 | + buf.push(floatSamples); | ||
| 75 | + const flattened = | ||
| 76 | + Float32Array.from(buf.reduce((a, b) => [...a, ...b], [])); | ||
| 77 | + | ||
| 78 | + stream.acceptWaveform(recognizer.config.featConfig.sampleRate, flattened); | ||
| 79 | + recognizer.decode(stream); | ||
| 80 | + const text = recognizer.getResult(stream).text; | ||
| 81 | + console.log(text); | ||
| 82 | + | ||
| 83 | + stream.free(); | ||
| 84 | + recognizer.free(); | ||
| 85 | + }); | ||
| 86 | + | ||
| 87 | +readable.on('readable', function() { | ||
| 88 | + let chunk; | ||
| 89 | + while ((chunk = readable.read()) != null) { | ||
| 90 | + const int16Samples = new Int16Array( | ||
| 91 | + chunk.buffer, chunk.byteOffset, | ||
| 92 | + chunk.length / Int16Array.BYTES_PER_ELEMENT); | ||
| 93 | + | ||
| 94 | + const floatSamples = new Float32Array(int16Samples.length); | ||
| 95 | + for (let i = 0; i < floatSamples.length; i++) { | ||
| 96 | + floatSamples[i] = int16Samples[i] / 32768.0; | ||
| 97 | + } | ||
| 98 | + | ||
| 99 | + buf.push(floatSamples); | ||
| 100 | + } | ||
| 101 | +}); |
| @@ -21,22 +21,6 @@ function createOfflineRecognizer() { | @@ -21,22 +21,6 @@ function createOfflineRecognizer() { | ||
| 21 | joiner: | 21 | joiner: |
| 22 | './sherpa-onnx-zipformer-en-2023-06-26/joiner-epoch-99-avg-1.int8.onnx', | 22 | './sherpa-onnx-zipformer-en-2023-06-26/joiner-epoch-99-avg-1.int8.onnx', |
| 23 | }, | 23 | }, |
| 24 | - paraformer: { | ||
| 25 | - model: '', | ||
| 26 | - }, | ||
| 27 | - nemoCtc: { | ||
| 28 | - model: '', | ||
| 29 | - }, | ||
| 30 | - whisper: { | ||
| 31 | - encoder: '', | ||
| 32 | - decoder: '', | ||
| 33 | - language: '', | ||
| 34 | - task: '', | ||
| 35 | - tailPaddings: -1, | ||
| 36 | - }, | ||
| 37 | - tdnn: { | ||
| 38 | - model: '', | ||
| 39 | - }, | ||
| 40 | tokens: './sherpa-onnx-zipformer-en-2023-06-26/tokens.txt', | 24 | tokens: './sherpa-onnx-zipformer-en-2023-06-26/tokens.txt', |
| 41 | numThreads: 1, | 25 | numThreads: 1, |
| 42 | debug: 0, | 26 | debug: 0, |
| @@ -44,15 +28,9 @@ function createOfflineRecognizer() { | @@ -44,15 +28,9 @@ function createOfflineRecognizer() { | ||
| 44 | modelType: 'transducer', | 28 | modelType: 'transducer', |
| 45 | }; | 29 | }; |
| 46 | 30 | ||
| 47 | - let lmConfig = { | ||
| 48 | - model: '', | ||
| 49 | - scale: 1.0, | ||
| 50 | - }; | ||
| 51 | - | ||
| 52 | let config = { | 31 | let config = { |
| 53 | featConfig: featConfig, | 32 | featConfig: featConfig, |
| 54 | modelConfig: modelConfig, | 33 | modelConfig: modelConfig, |
| 55 | - lmConfig: lmConfig, | ||
| 56 | decodingMethod: 'greedy_search', | 34 | decodingMethod: 'greedy_search', |
| 57 | maxActivePaths: 4, | 35 | maxActivePaths: 4, |
| 58 | hotwordsFile: '', | 36 | hotwordsFile: '', |
| @@ -5,10 +5,8 @@ const sherpa_onnx = require('sherpa-onnx'); | @@ -5,10 +5,8 @@ const sherpa_onnx = require('sherpa-onnx'); | ||
| 5 | function createOfflineTts() { | 5 | function createOfflineTts() { |
| 6 | let offlineTtsVitsModelConfig = { | 6 | let offlineTtsVitsModelConfig = { |
| 7 | model: './vits-piper-en_US-amy-low/en_US-amy-low.onnx', | 7 | model: './vits-piper-en_US-amy-low/en_US-amy-low.onnx', |
| 8 | - lexicon: '', | ||
| 9 | tokens: './vits-piper-en_US-amy-low/tokens.txt', | 8 | tokens: './vits-piper-en_US-amy-low/tokens.txt', |
| 10 | dataDir: './vits-piper-en_US-amy-low/espeak-ng-data', | 9 | dataDir: './vits-piper-en_US-amy-low/espeak-ng-data', |
| 11 | - dictDir: '', | ||
| 12 | noiseScale: 0.667, | 10 | noiseScale: 0.667, |
| 13 | noiseScaleW: 0.8, | 11 | noiseScaleW: 0.8, |
| 14 | lengthScale: 1.0, | 12 | lengthScale: 1.0, |
| @@ -22,8 +20,6 @@ function createOfflineTts() { | @@ -22,8 +20,6 @@ function createOfflineTts() { | ||
| 22 | 20 | ||
| 23 | let offlineTtsConfig = { | 21 | let offlineTtsConfig = { |
| 24 | offlineTtsModelConfig: offlineTtsModelConfig, | 22 | offlineTtsModelConfig: offlineTtsModelConfig, |
| 25 | - ruleFsts: '', | ||
| 26 | - ruleFars: '', | ||
| 27 | maxNumSentences: 1, | 23 | maxNumSentences: 1, |
| 28 | }; | 24 | }; |
| 29 | 25 |
| @@ -7,8 +7,6 @@ function createOfflineTts() { | @@ -7,8 +7,6 @@ function createOfflineTts() { | ||
| 7 | model: './vits-icefall-zh-aishell3/model.onnx', | 7 | model: './vits-icefall-zh-aishell3/model.onnx', |
| 8 | lexicon: './vits-icefall-zh-aishell3/lexicon.txt', | 8 | lexicon: './vits-icefall-zh-aishell3/lexicon.txt', |
| 9 | tokens: './vits-icefall-zh-aishell3/tokens.txt', | 9 | tokens: './vits-icefall-zh-aishell3/tokens.txt', |
| 10 | - dataDir: '', | ||
| 11 | - dictDir: '', | ||
| 12 | noiseScale: 0.667, | 10 | noiseScale: 0.667, |
| 13 | noiseScaleW: 0.8, | 11 | noiseScaleW: 0.8, |
| 14 | lengthScale: 1.0, | 12 | lengthScale: 1.0, |
| @@ -31,7 +29,6 @@ function createOfflineTts() { | @@ -31,7 +29,6 @@ function createOfflineTts() { | ||
| 31 | return sherpa_onnx.createOfflineTts(offlineTtsConfig); | 29 | return sherpa_onnx.createOfflineTts(offlineTtsConfig); |
| 32 | } | 30 | } |
| 33 | 31 | ||
| 34 | - | ||
| 35 | const tts = createOfflineTts(); | 32 | const tts = createOfflineTts(); |
| 36 | const speakerId = 66; | 33 | const speakerId = 66; |
| 37 | const speed = 1.0; | 34 | const speed = 1.0; |
| @@ -13,17 +13,6 @@ function createOfflineRecognizer() { | @@ -13,17 +13,6 @@ function createOfflineRecognizer() { | ||
| 13 | }; | 13 | }; |
| 14 | 14 | ||
| 15 | let modelConfig = { | 15 | let modelConfig = { |
| 16 | - transducer: { | ||
| 17 | - encoder: '', | ||
| 18 | - decoder: '', | ||
| 19 | - joiner: '', | ||
| 20 | - }, | ||
| 21 | - paraformer: { | ||
| 22 | - model: '', | ||
| 23 | - }, | ||
| 24 | - nemoCtc: { | ||
| 25 | - model: '', | ||
| 26 | - }, | ||
| 27 | whisper: { | 16 | whisper: { |
| 28 | encoder: './sherpa-onnx-whisper-tiny.en/tiny.en-encoder.int8.onnx', | 17 | encoder: './sherpa-onnx-whisper-tiny.en/tiny.en-encoder.int8.onnx', |
| 29 | decoder: './sherpa-onnx-whisper-tiny.en/tiny.en-decoder.int8.onnx', | 18 | decoder: './sherpa-onnx-whisper-tiny.en/tiny.en-decoder.int8.onnx', |
| @@ -31,9 +20,6 @@ function createOfflineRecognizer() { | @@ -31,9 +20,6 @@ function createOfflineRecognizer() { | ||
| 31 | task: 'transcribe', | 20 | task: 'transcribe', |
| 32 | tailPaddings: -1, | 21 | tailPaddings: -1, |
| 33 | }, | 22 | }, |
| 34 | - tdnn: { | ||
| 35 | - model: '', | ||
| 36 | - }, | ||
| 37 | tokens: './sherpa-onnx-whisper-tiny.en/tiny.en-tokens.txt', | 23 | tokens: './sherpa-onnx-whisper-tiny.en/tiny.en-tokens.txt', |
| 38 | numThreads: 1, | 24 | numThreads: 1, |
| 39 | debug: 0, | 25 | debug: 0, |
| @@ -41,19 +27,10 @@ function createOfflineRecognizer() { | @@ -41,19 +27,10 @@ function createOfflineRecognizer() { | ||
| 41 | modelType: 'whisper', | 27 | modelType: 'whisper', |
| 42 | }; | 28 | }; |
| 43 | 29 | ||
| 44 | - let lmConfig = { | ||
| 45 | - model: '', | ||
| 46 | - scale: 1.0, | ||
| 47 | - }; | ||
| 48 | - | ||
| 49 | let config = { | 30 | let config = { |
| 50 | featConfig: featConfig, | 31 | featConfig: featConfig, |
| 51 | modelConfig: modelConfig, | 32 | modelConfig: modelConfig, |
| 52 | - lmConfig: lmConfig, | ||
| 53 | decodingMethod: 'greedy_search', | 33 | decodingMethod: 'greedy_search', |
| 54 | - maxActivePaths: 4, | ||
| 55 | - hotwordsFile: '', | ||
| 56 | - hotwordsScore: 1.5, | ||
| 57 | }; | 34 | }; |
| 58 | 35 | ||
| 59 | return sherpa_onnx.createOfflineRecognizer(config); | 36 | return sherpa_onnx.createOfflineRecognizer(config); |
| @@ -6,12 +6,6 @@ console.log(portAudio.getDevices()); | @@ -6,12 +6,6 @@ console.log(portAudio.getDevices()); | ||
| 6 | const sherpa_onnx = require('sherpa-onnx'); | 6 | const sherpa_onnx = require('sherpa-onnx'); |
| 7 | 7 | ||
| 8 | function createOnlineRecognizer() { | 8 | function createOnlineRecognizer() { |
| 9 | - let onlineTransducerModelConfig = { | ||
| 10 | - encoder: '', | ||
| 11 | - decoder: '', | ||
| 12 | - joiner: '', | ||
| 13 | - }; | ||
| 14 | - | ||
| 15 | let onlineParaformerModelConfig = { | 9 | let onlineParaformerModelConfig = { |
| 16 | encoder: | 10 | encoder: |
| 17 | './sherpa-onnx-streaming-paraformer-bilingual-zh-en/encoder.int8.onnx', | 11 | './sherpa-onnx-streaming-paraformer-bilingual-zh-en/encoder.int8.onnx', |
| @@ -19,14 +13,8 @@ function createOnlineRecognizer() { | @@ -19,14 +13,8 @@ function createOnlineRecognizer() { | ||
| 19 | './sherpa-onnx-streaming-paraformer-bilingual-zh-en/decoder.int8.onnx', | 13 | './sherpa-onnx-streaming-paraformer-bilingual-zh-en/decoder.int8.onnx', |
| 20 | }; | 14 | }; |
| 21 | 15 | ||
| 22 | - let onlineZipformer2CtcModelConfig = { | ||
| 23 | - model: '', | ||
| 24 | - }; | ||
| 25 | - | ||
| 26 | let onlineModelConfig = { | 16 | let onlineModelConfig = { |
| 27 | - transducer: onlineTransducerModelConfig, | ||
| 28 | paraformer: onlineParaformerModelConfig, | 17 | paraformer: onlineParaformerModelConfig, |
| 29 | - zipformer2Ctc: onlineZipformer2CtcModelConfig, | ||
| 30 | tokens: './sherpa-onnx-streaming-paraformer-bilingual-zh-en/tokens.txt', | 18 | tokens: './sherpa-onnx-streaming-paraformer-bilingual-zh-en/tokens.txt', |
| 31 | numThreads: 1, | 19 | numThreads: 1, |
| 32 | provider: 'cpu', | 20 | provider: 'cpu', |
| @@ -48,12 +36,6 @@ function createOnlineRecognizer() { | @@ -48,12 +36,6 @@ function createOnlineRecognizer() { | ||
| 48 | rule1MinTrailingSilence: 2.4, | 36 | rule1MinTrailingSilence: 2.4, |
| 49 | rule2MinTrailingSilence: 1.2, | 37 | rule2MinTrailingSilence: 1.2, |
| 50 | rule3MinUtteranceLength: 20, | 38 | rule3MinUtteranceLength: 20, |
| 51 | - hotwordsFile: '', | ||
| 52 | - hotwordsScore: 1.5, | ||
| 53 | - ctcFstDecoderConfig: { | ||
| 54 | - graph: '', | ||
| 55 | - maxActive: 3000, | ||
| 56 | - } | ||
| 57 | }; | 39 | }; |
| 58 | 40 | ||
| 59 | return sherpa_onnx.createOnlineRecognizer(recognizerConfig); | 41 | return sherpa_onnx.createOnlineRecognizer(recognizerConfig); |
| @@ -7,12 +7,6 @@ const wav = require('wav'); | @@ -7,12 +7,6 @@ const wav = require('wav'); | ||
| 7 | const sherpa_onnx = require('sherpa-onnx'); | 7 | const sherpa_onnx = require('sherpa-onnx'); |
| 8 | 8 | ||
| 9 | function createOnlineRecognizer() { | 9 | function createOnlineRecognizer() { |
| 10 | - let onlineTransducerModelConfig = { | ||
| 11 | - encoder: '', | ||
| 12 | - decoder: '', | ||
| 13 | - joiner: '', | ||
| 14 | - }; | ||
| 15 | - | ||
| 16 | let onlineParaformerModelConfig = { | 10 | let onlineParaformerModelConfig = { |
| 17 | encoder: | 11 | encoder: |
| 18 | './sherpa-onnx-streaming-paraformer-bilingual-zh-en/encoder.int8.onnx', | 12 | './sherpa-onnx-streaming-paraformer-bilingual-zh-en/encoder.int8.onnx', |
| @@ -20,14 +14,8 @@ function createOnlineRecognizer() { | @@ -20,14 +14,8 @@ function createOnlineRecognizer() { | ||
| 20 | './sherpa-onnx-streaming-paraformer-bilingual-zh-en/decoder.int8.onnx', | 14 | './sherpa-onnx-streaming-paraformer-bilingual-zh-en/decoder.int8.onnx', |
| 21 | }; | 15 | }; |
| 22 | 16 | ||
| 23 | - let onlineZipformer2CtcModelConfig = { | ||
| 24 | - model: '', | ||
| 25 | - }; | ||
| 26 | - | ||
| 27 | let onlineModelConfig = { | 17 | let onlineModelConfig = { |
| 28 | - transducer: onlineTransducerModelConfig, | ||
| 29 | paraformer: onlineParaformerModelConfig, | 18 | paraformer: onlineParaformerModelConfig, |
| 30 | - zipformer2Ctc: onlineZipformer2CtcModelConfig, | ||
| 31 | tokens: './sherpa-onnx-streaming-paraformer-bilingual-zh-en/tokens.txt', | 19 | tokens: './sherpa-onnx-streaming-paraformer-bilingual-zh-en/tokens.txt', |
| 32 | numThreads: 1, | 20 | numThreads: 1, |
| 33 | provider: 'cpu', | 21 | provider: 'cpu', |
| @@ -49,12 +37,6 @@ function createOnlineRecognizer() { | @@ -49,12 +37,6 @@ function createOnlineRecognizer() { | ||
| 49 | rule1MinTrailingSilence: 2.4, | 37 | rule1MinTrailingSilence: 2.4, |
| 50 | rule2MinTrailingSilence: 1.2, | 38 | rule2MinTrailingSilence: 1.2, |
| 51 | rule3MinUtteranceLength: 20, | 39 | rule3MinUtteranceLength: 20, |
| 52 | - hotwordsFile: '', | ||
| 53 | - hotwordsScore: 1.5, | ||
| 54 | - ctcFstDecoderConfig: { | ||
| 55 | - graph: '', | ||
| 56 | - maxActive: 3000, | ||
| 57 | - } | ||
| 58 | }; | 40 | }; |
| 59 | 41 | ||
| 60 | return sherpa_onnx.createOnlineRecognizer(recognizerConfig); | 42 | return sherpa_onnx.createOnlineRecognizer(recognizerConfig); |
| @@ -16,19 +16,8 @@ function createOnlineRecognizer() { | @@ -16,19 +16,8 @@ function createOnlineRecognizer() { | ||
| 16 | './sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/joiner-epoch-99-avg-1.int8.onnx', | 16 | './sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/joiner-epoch-99-avg-1.int8.onnx', |
| 17 | }; | 17 | }; |
| 18 | 18 | ||
| 19 | - let onlineParaformerModelConfig = { | ||
| 20 | - encoder: '', | ||
| 21 | - decoder: '', | ||
| 22 | - }; | ||
| 23 | - | ||
| 24 | - let onlineZipformer2CtcModelConfig = { | ||
| 25 | - model: '', | ||
| 26 | - }; | ||
| 27 | - | ||
| 28 | let onlineModelConfig = { | 19 | let onlineModelConfig = { |
| 29 | transducer: onlineTransducerModelConfig, | 20 | transducer: onlineTransducerModelConfig, |
| 30 | - paraformer: onlineParaformerModelConfig, | ||
| 31 | - zipformer2Ctc: onlineZipformer2CtcModelConfig, | ||
| 32 | tokens: | 21 | tokens: |
| 33 | './sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/tokens.txt', | 22 | './sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/tokens.txt', |
| 34 | numThreads: 1, | 23 | numThreads: 1, |
| @@ -51,12 +40,6 @@ function createOnlineRecognizer() { | @@ -51,12 +40,6 @@ function createOnlineRecognizer() { | ||
| 51 | rule1MinTrailingSilence: 2.4, | 40 | rule1MinTrailingSilence: 2.4, |
| 52 | rule2MinTrailingSilence: 1.2, | 41 | rule2MinTrailingSilence: 1.2, |
| 53 | rule3MinUtteranceLength: 20, | 42 | rule3MinUtteranceLength: 20, |
| 54 | - hotwordsFile: '', | ||
| 55 | - hotwordsScore: 1.5, | ||
| 56 | - ctcFstDecoderConfig: { | ||
| 57 | - graph: '', | ||
| 58 | - maxActive: 3000, | ||
| 59 | - }, | ||
| 60 | // https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/itn_zh_number.fst | 43 | // https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/itn_zh_number.fst |
| 61 | ruleFsts: './itn_zh_number.fst', | 44 | ruleFsts: './itn_zh_number.fst', |
| 62 | }; | 45 | }; |
| @@ -15,19 +15,8 @@ function createOnlineRecognizer() { | @@ -15,19 +15,8 @@ function createOnlineRecognizer() { | ||
| 15 | './sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/joiner-epoch-99-avg-1.int8.onnx', | 15 | './sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/joiner-epoch-99-avg-1.int8.onnx', |
| 16 | }; | 16 | }; |
| 17 | 17 | ||
| 18 | - let onlineParaformerModelConfig = { | ||
| 19 | - encoder: '', | ||
| 20 | - decoder: '', | ||
| 21 | - }; | ||
| 22 | - | ||
| 23 | - let onlineZipformer2CtcModelConfig = { | ||
| 24 | - model: '', | ||
| 25 | - }; | ||
| 26 | - | ||
| 27 | let onlineModelConfig = { | 18 | let onlineModelConfig = { |
| 28 | transducer: onlineTransducerModelConfig, | 19 | transducer: onlineTransducerModelConfig, |
| 29 | - paraformer: onlineParaformerModelConfig, | ||
| 30 | - zipformer2Ctc: onlineZipformer2CtcModelConfig, | ||
| 31 | tokens: | 20 | tokens: |
| 32 | './sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/tokens.txt', | 21 | './sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/tokens.txt', |
| 33 | numThreads: 1, | 22 | numThreads: 1, |
| @@ -50,12 +39,6 @@ function createOnlineRecognizer() { | @@ -50,12 +39,6 @@ function createOnlineRecognizer() { | ||
| 50 | rule1MinTrailingSilence: 2.4, | 39 | rule1MinTrailingSilence: 2.4, |
| 51 | rule2MinTrailingSilence: 1.2, | 40 | rule2MinTrailingSilence: 1.2, |
| 52 | rule3MinUtteranceLength: 20, | 41 | rule3MinUtteranceLength: 20, |
| 53 | - hotwordsFile: '', | ||
| 54 | - hotwordsScore: 1.5, | ||
| 55 | - ctcFstDecoderConfig: { | ||
| 56 | - graph: '', | ||
| 57 | - maxActive: 3000, | ||
| 58 | - } | ||
| 59 | }; | 42 | }; |
| 60 | 43 | ||
| 61 | return sherpa_onnx.createOnlineRecognizer(recognizerConfig); | 44 | return sherpa_onnx.createOnlineRecognizer(recognizerConfig); |
| @@ -16,19 +16,8 @@ function createOnlineRecognizer() { | @@ -16,19 +16,8 @@ function createOnlineRecognizer() { | ||
| 16 | './sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/joiner-epoch-99-avg-1.int8.onnx', | 16 | './sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/joiner-epoch-99-avg-1.int8.onnx', |
| 17 | }; | 17 | }; |
| 18 | 18 | ||
| 19 | - let onlineParaformerModelConfig = { | ||
| 20 | - encoder: '', | ||
| 21 | - decoder: '', | ||
| 22 | - }; | ||
| 23 | - | ||
| 24 | - let onlineZipformer2CtcModelConfig = { | ||
| 25 | - model: '', | ||
| 26 | - }; | ||
| 27 | - | ||
| 28 | let onlineModelConfig = { | 19 | let onlineModelConfig = { |
| 29 | transducer: onlineTransducerModelConfig, | 20 | transducer: onlineTransducerModelConfig, |
| 30 | - paraformer: onlineParaformerModelConfig, | ||
| 31 | - zipformer2Ctc: onlineZipformer2CtcModelConfig, | ||
| 32 | tokens: | 21 | tokens: |
| 33 | './sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/tokens.txt', | 22 | './sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/tokens.txt', |
| 34 | numThreads: 1, | 23 | numThreads: 1, |
| @@ -51,12 +40,6 @@ function createOnlineRecognizer() { | @@ -51,12 +40,6 @@ function createOnlineRecognizer() { | ||
| 51 | rule1MinTrailingSilence: 2.4, | 40 | rule1MinTrailingSilence: 2.4, |
| 52 | rule2MinTrailingSilence: 1.2, | 41 | rule2MinTrailingSilence: 1.2, |
| 53 | rule3MinUtteranceLength: 20, | 42 | rule3MinUtteranceLength: 20, |
| 54 | - hotwordsFile: '', | ||
| 55 | - hotwordsScore: 1.5, | ||
| 56 | - ctcFstDecoderConfig: { | ||
| 57 | - graph: '', | ||
| 58 | - maxActive: 3000, | ||
| 59 | - } | ||
| 60 | }; | 43 | }; |
| 61 | 44 | ||
| 62 | return sherpa_onnx.createOnlineRecognizer(recognizerConfig); | 45 | return sherpa_onnx.createOnlineRecognizer(recognizerConfig); |
| @@ -7,25 +7,12 @@ const wav = require('wav'); | @@ -7,25 +7,12 @@ const wav = require('wav'); | ||
| 7 | const sherpa_onnx = require('sherpa-onnx'); | 7 | const sherpa_onnx = require('sherpa-onnx'); |
| 8 | 8 | ||
| 9 | function createOnlineRecognizer() { | 9 | function createOnlineRecognizer() { |
| 10 | - let onlineTransducerModelConfig = { | ||
| 11 | - encoder: '', | ||
| 12 | - decoder: '', | ||
| 13 | - joiner: '', | ||
| 14 | - }; | ||
| 15 | - | ||
| 16 | - let onlineParaformerModelConfig = { | ||
| 17 | - encoder: '', | ||
| 18 | - decoder: '', | ||
| 19 | - }; | ||
| 20 | - | ||
| 21 | let onlineZipformer2CtcModelConfig = { | 10 | let onlineZipformer2CtcModelConfig = { |
| 22 | model: | 11 | model: |
| 23 | './sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18/ctc-epoch-30-avg-3-chunk-16-left-128.int8.onnx', | 12 | './sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18/ctc-epoch-30-avg-3-chunk-16-left-128.int8.onnx', |
| 24 | }; | 13 | }; |
| 25 | 14 | ||
| 26 | let onlineModelConfig = { | 15 | let onlineModelConfig = { |
| 27 | - transducer: onlineTransducerModelConfig, | ||
| 28 | - paraformer: onlineParaformerModelConfig, | ||
| 29 | zipformer2Ctc: onlineZipformer2CtcModelConfig, | 16 | zipformer2Ctc: onlineZipformer2CtcModelConfig, |
| 30 | tokens: './sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18/tokens.txt', | 17 | tokens: './sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18/tokens.txt', |
| 31 | numThreads: 1, | 18 | numThreads: 1, |
| @@ -48,8 +35,6 @@ function createOnlineRecognizer() { | @@ -48,8 +35,6 @@ function createOnlineRecognizer() { | ||
| 48 | rule1MinTrailingSilence: 2.4, | 35 | rule1MinTrailingSilence: 2.4, |
| 49 | rule2MinTrailingSilence: 1.2, | 36 | rule2MinTrailingSilence: 1.2, |
| 50 | rule3MinUtteranceLength: 20, | 37 | rule3MinUtteranceLength: 20, |
| 51 | - hotwordsFile: '', | ||
| 52 | - hotwordsScore: 1.5, | ||
| 53 | ctcFstDecoderConfig: { | 38 | ctcFstDecoderConfig: { |
| 54 | graph: './sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18/HLG.fst', | 39 | graph: './sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18/HLG.fst', |
| 55 | maxActive: 3000, | 40 | maxActive: 3000, |
| @@ -7,32 +7,18 @@ const wav = require('wav'); | @@ -7,32 +7,18 @@ const wav = require('wav'); | ||
| 7 | const sherpa_onnx = require('sherpa-onnx'); | 7 | const sherpa_onnx = require('sherpa-onnx'); |
| 8 | 8 | ||
| 9 | function createOnlineRecognizer() { | 9 | function createOnlineRecognizer() { |
| 10 | - let onlineTransducerModelConfig = { | ||
| 11 | - encoder: '', | ||
| 12 | - decoder: '', | ||
| 13 | - joiner: '', | ||
| 14 | - }; | ||
| 15 | - | ||
| 16 | - let onlineParaformerModelConfig = { | ||
| 17 | - encoder: '', | ||
| 18 | - decoder: '', | ||
| 19 | - }; | ||
| 20 | - | ||
| 21 | let onlineZipformer2CtcModelConfig = { | 10 | let onlineZipformer2CtcModelConfig = { |
| 22 | model: | 11 | model: |
| 23 | './sherpa-onnx-streaming-zipformer-ctc-multi-zh-hans-2023-12-13/ctc-epoch-20-avg-1-chunk-16-left-128.onnx', | 12 | './sherpa-onnx-streaming-zipformer-ctc-multi-zh-hans-2023-12-13/ctc-epoch-20-avg-1-chunk-16-left-128.onnx', |
| 24 | }; | 13 | }; |
| 25 | 14 | ||
| 26 | let onlineModelConfig = { | 15 | let onlineModelConfig = { |
| 27 | - transducer: onlineTransducerModelConfig, | ||
| 28 | - paraformer: onlineParaformerModelConfig, | ||
| 29 | zipformer2Ctc: onlineZipformer2CtcModelConfig, | 16 | zipformer2Ctc: onlineZipformer2CtcModelConfig, |
| 30 | tokens: | 17 | tokens: |
| 31 | './sherpa-onnx-streaming-zipformer-ctc-multi-zh-hans-2023-12-13/tokens.txt', | 18 | './sherpa-onnx-streaming-zipformer-ctc-multi-zh-hans-2023-12-13/tokens.txt', |
| 32 | numThreads: 1, | 19 | numThreads: 1, |
| 33 | provider: 'cpu', | 20 | provider: 'cpu', |
| 34 | debug: 1, | 21 | debug: 1, |
| 35 | - modelType: '', | ||
| 36 | }; | 22 | }; |
| 37 | 23 | ||
| 38 | let featureConfig = { | 24 | let featureConfig = { |
| @@ -49,12 +35,6 @@ function createOnlineRecognizer() { | @@ -49,12 +35,6 @@ function createOnlineRecognizer() { | ||
| 49 | rule1MinTrailingSilence: 2.4, | 35 | rule1MinTrailingSilence: 2.4, |
| 50 | rule2MinTrailingSilence: 1.2, | 36 | rule2MinTrailingSilence: 1.2, |
| 51 | rule3MinUtteranceLength: 20, | 37 | rule3MinUtteranceLength: 20, |
| 52 | - hotwordsFile: '', | ||
| 53 | - hotwordsScore: 1.5, | ||
| 54 | - ctcFstDecoderConfig: { | ||
| 55 | - graph: '', | ||
| 56 | - maxActive: 3000, | ||
| 57 | - } | ||
| 58 | }; | 38 | }; |
| 59 | 39 | ||
| 60 | return sherpa_onnx.createOnlineRecognizer(recognizerConfig); | 40 | return sherpa_onnx.createOnlineRecognizer(recognizerConfig); |
| @@ -39,6 +39,10 @@ function freeConfig(config, Module) { | @@ -39,6 +39,10 @@ function freeConfig(config, Module) { | ||
| 39 | freeConfig(config.tdnn, Module) | 39 | freeConfig(config.tdnn, Module) |
| 40 | } | 40 | } |
| 41 | 41 | ||
| 42 | + if ('senseVoice' in config) { | ||
| 43 | + freeConfig(config.senseVoice, Module) | ||
| 44 | + } | ||
| 45 | + | ||
| 42 | if ('lm' in config) { | 46 | if ('lm' in config) { |
| 43 | freeConfig(config.lm, Module) | 47 | freeConfig(config.lm, Module) |
| 44 | } | 48 | } |
| @@ -52,9 +56,9 @@ function freeConfig(config, Module) { | @@ -52,9 +56,9 @@ function freeConfig(config, Module) { | ||
| 52 | 56 | ||
| 53 | // The user should free the returned pointers | 57 | // The user should free the returned pointers |
| 54 | function initSherpaOnnxOnlineTransducerModelConfig(config, Module) { | 58 | function initSherpaOnnxOnlineTransducerModelConfig(config, Module) { |
| 55 | - const encoderLen = Module.lengthBytesUTF8(config.encoder) + 1; | ||
| 56 | - const decoderLen = Module.lengthBytesUTF8(config.decoder) + 1; | ||
| 57 | - const joinerLen = Module.lengthBytesUTF8(config.joiner) + 1; | 59 | + const encoderLen = Module.lengthBytesUTF8(config.encoder || '') + 1; |
| 60 | + const decoderLen = Module.lengthBytesUTF8(config.decoder || '') + 1; | ||
| 61 | + const joinerLen = Module.lengthBytesUTF8(config.joiner || '') + 1; | ||
| 58 | 62 | ||
| 59 | const n = encoderLen + decoderLen + joinerLen; | 63 | const n = encoderLen + decoderLen + joinerLen; |
| 60 | 64 | ||
| @@ -64,13 +68,13 @@ function initSherpaOnnxOnlineTransducerModelConfig(config, Module) { | @@ -64,13 +68,13 @@ function initSherpaOnnxOnlineTransducerModelConfig(config, Module) { | ||
| 64 | const ptr = Module._malloc(len); | 68 | const ptr = Module._malloc(len); |
| 65 | 69 | ||
| 66 | let offset = 0; | 70 | let offset = 0; |
| 67 | - Module.stringToUTF8(config.encoder, buffer + offset, encoderLen); | 71 | + Module.stringToUTF8(config.encoder || '', buffer + offset, encoderLen); |
| 68 | offset += encoderLen; | 72 | offset += encoderLen; |
| 69 | 73 | ||
| 70 | - Module.stringToUTF8(config.decoder, buffer + offset, decoderLen); | 74 | + Module.stringToUTF8(config.decoder || '', buffer + offset, decoderLen); |
| 71 | offset += decoderLen; | 75 | offset += decoderLen; |
| 72 | 76 | ||
| 73 | - Module.stringToUTF8(config.joiner, buffer + offset, joinerLen); | 77 | + Module.stringToUTF8(config.joiner || '', buffer + offset, joinerLen); |
| 74 | 78 | ||
| 75 | offset = 0; | 79 | offset = 0; |
| 76 | Module.setValue(ptr, buffer + offset, 'i8*'); | 80 | Module.setValue(ptr, buffer + offset, 'i8*'); |
| @@ -87,8 +91,8 @@ function initSherpaOnnxOnlineTransducerModelConfig(config, Module) { | @@ -87,8 +91,8 @@ function initSherpaOnnxOnlineTransducerModelConfig(config, Module) { | ||
| 87 | } | 91 | } |
| 88 | 92 | ||
| 89 | function initSherpaOnnxOnlineParaformerModelConfig(config, Module) { | 93 | function initSherpaOnnxOnlineParaformerModelConfig(config, Module) { |
| 90 | - const encoderLen = Module.lengthBytesUTF8(config.encoder) + 1; | ||
| 91 | - const decoderLen = Module.lengthBytesUTF8(config.decoder) + 1; | 94 | + const encoderLen = Module.lengthBytesUTF8(config.encoder || '') + 1; |
| 95 | + const decoderLen = Module.lengthBytesUTF8(config.decoder || '') + 1; | ||
| 92 | 96 | ||
| 93 | const n = encoderLen + decoderLen; | 97 | const n = encoderLen + decoderLen; |
| 94 | const buffer = Module._malloc(n); | 98 | const buffer = Module._malloc(n); |
| @@ -97,10 +101,10 @@ function initSherpaOnnxOnlineParaformerModelConfig(config, Module) { | @@ -97,10 +101,10 @@ function initSherpaOnnxOnlineParaformerModelConfig(config, Module) { | ||
| 97 | const ptr = Module._malloc(len); | 101 | const ptr = Module._malloc(len); |
| 98 | 102 | ||
| 99 | let offset = 0; | 103 | let offset = 0; |
| 100 | - Module.stringToUTF8(config.encoder, buffer + offset, encoderLen); | 104 | + Module.stringToUTF8(config.encoder || '', buffer + offset, encoderLen); |
| 101 | offset += encoderLen; | 105 | offset += encoderLen; |
| 102 | 106 | ||
| 103 | - Module.stringToUTF8(config.decoder, buffer + offset, decoderLen); | 107 | + Module.stringToUTF8(config.decoder || '', buffer + offset, decoderLen); |
| 104 | 108 | ||
| 105 | offset = 0; | 109 | offset = 0; |
| 106 | Module.setValue(ptr, buffer + offset, 'i8*'); | 110 | Module.setValue(ptr, buffer + offset, 'i8*'); |
| @@ -114,13 +118,13 @@ function initSherpaOnnxOnlineParaformerModelConfig(config, Module) { | @@ -114,13 +118,13 @@ function initSherpaOnnxOnlineParaformerModelConfig(config, Module) { | ||
| 114 | } | 118 | } |
| 115 | 119 | ||
| 116 | function initSherpaOnnxOnlineZipformer2CtcModelConfig(config, Module) { | 120 | function initSherpaOnnxOnlineZipformer2CtcModelConfig(config, Module) { |
| 117 | - const n = Module.lengthBytesUTF8(config.model) + 1; | 121 | + const n = Module.lengthBytesUTF8(config.model || '') + 1; |
| 118 | const buffer = Module._malloc(n); | 122 | const buffer = Module._malloc(n); |
| 119 | 123 | ||
| 120 | const len = 1 * 4; // 1 pointer | 124 | const len = 1 * 4; // 1 pointer |
| 121 | const ptr = Module._malloc(len); | 125 | const ptr = Module._malloc(len); |
| 122 | 126 | ||
| 123 | - Module.stringToUTF8(config.model, buffer, n); | 127 | + Module.stringToUTF8(config.model || '', buffer, n); |
| 124 | 128 | ||
| 125 | Module.setValue(ptr, buffer, 'i8*'); | 129 | Module.setValue(ptr, buffer, 'i8*'); |
| 126 | 130 | ||
| @@ -130,10 +134,33 @@ function initSherpaOnnxOnlineZipformer2CtcModelConfig(config, Module) { | @@ -130,10 +134,33 @@ function initSherpaOnnxOnlineZipformer2CtcModelConfig(config, Module) { | ||
| 130 | } | 134 | } |
| 131 | 135 | ||
| 132 | function initSherpaOnnxOnlineModelConfig(config, Module) { | 136 | function initSherpaOnnxOnlineModelConfig(config, Module) { |
| 137 | + if (!('transducer' in config)) { | ||
| 138 | + config.transducer = { | ||
| 139 | + encoder: '', | ||
| 140 | + decoder: '', | ||
| 141 | + joiner: '', | ||
| 142 | + }; | ||
| 143 | + } | ||
| 144 | + | ||
| 145 | + if (!('paraformer' in config)) { | ||
| 146 | + config.paraformer = { | ||
| 147 | + encoder: '', | ||
| 148 | + decoder: '', | ||
| 149 | + }; | ||
| 150 | + } | ||
| 151 | + | ||
| 152 | + if (!('zipformer2Ctc' in config)) { | ||
| 153 | + config.zipformer2Ctc = { | ||
| 154 | + model: '', | ||
| 155 | + }; | ||
| 156 | + } | ||
| 157 | + | ||
| 133 | const transducer = | 158 | const transducer = |
| 134 | initSherpaOnnxOnlineTransducerModelConfig(config.transducer, Module); | 159 | initSherpaOnnxOnlineTransducerModelConfig(config.transducer, Module); |
| 160 | + | ||
| 135 | const paraformer = | 161 | const paraformer = |
| 136 | initSherpaOnnxOnlineParaformerModelConfig(config.paraformer, Module); | 162 | initSherpaOnnxOnlineParaformerModelConfig(config.paraformer, Module); |
| 163 | + | ||
| 137 | const ctc = initSherpaOnnxOnlineZipformer2CtcModelConfig( | 164 | const ctc = initSherpaOnnxOnlineZipformer2CtcModelConfig( |
| 138 | config.zipformer2Ctc, Module); | 165 | config.zipformer2Ctc, Module); |
| 139 | 166 | ||
| @@ -150,9 +177,9 @@ function initSherpaOnnxOnlineModelConfig(config, Module) { | @@ -150,9 +177,9 @@ function initSherpaOnnxOnlineModelConfig(config, Module) { | ||
| 150 | Module._CopyHeap(ctc.ptr, ctc.len, ptr + offset); | 177 | Module._CopyHeap(ctc.ptr, ctc.len, ptr + offset); |
| 151 | offset += ctc.len; | 178 | offset += ctc.len; |
| 152 | 179 | ||
| 153 | - const tokensLen = Module.lengthBytesUTF8(config.tokens) + 1; | ||
| 154 | - const providerLen = Module.lengthBytesUTF8(config.provider) + 1; | ||
| 155 | - const modelTypeLen = Module.lengthBytesUTF8(config.modelType) + 1; | 180 | + const tokensLen = Module.lengthBytesUTF8(config.tokens || '') + 1; |
| 181 | + const providerLen = Module.lengthBytesUTF8(config.provider || 'cpu') + 1; | ||
| 182 | + const modelTypeLen = Module.lengthBytesUTF8(config.modelType || '') + 1; | ||
| 156 | const modelingUnitLen = Module.lengthBytesUTF8(config.modelingUnit || '') + 1; | 183 | const modelingUnitLen = Module.lengthBytesUTF8(config.modelingUnit || '') + 1; |
| 157 | const bpeVocabLen = Module.lengthBytesUTF8(config.bpeVocab || '') + 1; | 184 | const bpeVocabLen = Module.lengthBytesUTF8(config.bpeVocab || '') + 1; |
| 158 | 185 | ||
| @@ -161,13 +188,13 @@ function initSherpaOnnxOnlineModelConfig(config, Module) { | @@ -161,13 +188,13 @@ function initSherpaOnnxOnlineModelConfig(config, Module) { | ||
| 161 | const buffer = Module._malloc(bufferLen); | 188 | const buffer = Module._malloc(bufferLen); |
| 162 | 189 | ||
| 163 | offset = 0; | 190 | offset = 0; |
| 164 | - Module.stringToUTF8(config.tokens, buffer, tokensLen); | 191 | + Module.stringToUTF8(config.tokens || '', buffer, tokensLen); |
| 165 | offset += tokensLen; | 192 | offset += tokensLen; |
| 166 | 193 | ||
| 167 | - Module.stringToUTF8(config.provider, buffer + offset, providerLen); | 194 | + Module.stringToUTF8(config.provider || 'cpu', buffer + offset, providerLen); |
| 168 | offset += providerLen; | 195 | offset += providerLen; |
| 169 | 196 | ||
| 170 | - Module.stringToUTF8(config.modelType, buffer + offset, modelTypeLen); | 197 | + Module.stringToUTF8(config.modelType || '', buffer + offset, modelTypeLen); |
| 171 | offset += modelTypeLen; | 198 | offset += modelTypeLen; |
| 172 | 199 | ||
| 173 | Module.stringToUTF8( | 200 | Module.stringToUTF8( |
| @@ -181,13 +208,13 @@ function initSherpaOnnxOnlineModelConfig(config, Module) { | @@ -181,13 +208,13 @@ function initSherpaOnnxOnlineModelConfig(config, Module) { | ||
| 181 | Module.setValue(ptr + offset, buffer, 'i8*'); // tokens | 208 | Module.setValue(ptr + offset, buffer, 'i8*'); // tokens |
| 182 | offset += 4; | 209 | offset += 4; |
| 183 | 210 | ||
| 184 | - Module.setValue(ptr + offset, config.numThreads, 'i32'); | 211 | + Module.setValue(ptr + offset, config.numThreads || 1, 'i32'); |
| 185 | offset += 4; | 212 | offset += 4; |
| 186 | 213 | ||
| 187 | Module.setValue(ptr + offset, buffer + tokensLen, 'i8*'); // provider | 214 | Module.setValue(ptr + offset, buffer + tokensLen, 'i8*'); // provider |
| 188 | offset += 4; | 215 | offset += 4; |
| 189 | 216 | ||
| 190 | - Module.setValue(ptr + offset, config.debug, 'i32'); | 217 | + Module.setValue(ptr + offset, config.debug || 0, 'i32'); |
| 191 | offset += 4; | 218 | offset += 4; |
| 192 | 219 | ||
| 193 | Module.setValue( | 220 | Module.setValue( |
| @@ -215,8 +242,8 @@ function initSherpaOnnxFeatureConfig(config, Module) { | @@ -215,8 +242,8 @@ function initSherpaOnnxFeatureConfig(config, Module) { | ||
| 215 | const len = 2 * 4; // 2 pointers | 242 | const len = 2 * 4; // 2 pointers |
| 216 | const ptr = Module._malloc(len); | 243 | const ptr = Module._malloc(len); |
| 217 | 244 | ||
| 218 | - Module.setValue(ptr, config.sampleRate, 'i32'); | ||
| 219 | - Module.setValue(ptr + 4, config.featureDim, 'i32'); | 245 | + Module.setValue(ptr, config.sampleRate || 16000, 'i32'); |
| 246 | + Module.setValue(ptr + 4, config.featureDim || 80, 'i32'); | ||
| 220 | return {ptr: ptr, len: len}; | 247 | return {ptr: ptr, len: len}; |
| 221 | } | 248 | } |
| 222 | 249 | ||
| @@ -224,16 +251,30 @@ function initSherpaOnnxOnlineCtcFstDecoderConfig(config, Module) { | @@ -224,16 +251,30 @@ function initSherpaOnnxOnlineCtcFstDecoderConfig(config, Module) { | ||
| 224 | const len = 2 * 4; | 251 | const len = 2 * 4; |
| 225 | const ptr = Module._malloc(len); | 252 | const ptr = Module._malloc(len); |
| 226 | 253 | ||
| 227 | - const graphLen = Module.lengthBytesUTF8(config.graph) + 1; | 254 | + const graphLen = Module.lengthBytesUTF8(config.graph || '') + 1; |
| 228 | const buffer = Module._malloc(graphLen); | 255 | const buffer = Module._malloc(graphLen); |
| 229 | Module.stringToUTF8(config.graph, buffer, graphLen); | 256 | Module.stringToUTF8(config.graph, buffer, graphLen); |
| 230 | 257 | ||
| 231 | Module.setValue(ptr, buffer, 'i8*'); | 258 | Module.setValue(ptr, buffer, 'i8*'); |
| 232 | - Module.setValue(ptr + 4, config.maxActive, 'i32'); | 259 | + Module.setValue(ptr + 4, config.maxActive || 3000, 'i32'); |
| 233 | return {ptr: ptr, len: len, buffer: buffer}; | 260 | return {ptr: ptr, len: len, buffer: buffer}; |
| 234 | } | 261 | } |
| 235 | 262 | ||
| 236 | function initSherpaOnnxOnlineRecognizerConfig(config, Module) { | 263 | function initSherpaOnnxOnlineRecognizerConfig(config, Module) { |
| 264 | + if (!('featConfig' in config)) { | ||
| 265 | + config.featConfig = { | ||
| 266 | + sampleRate: 16000, | ||
| 267 | + featureDim: 80, | ||
| 268 | + }; | ||
| 269 | + } | ||
| 270 | + | ||
| 271 | + if (!('ctcFstDecoderConfig' in config)) { | ||
| 272 | + config.ctcFstDecoderConfig = { | ||
| 273 | + graph: '', | ||
| 274 | + maxActive: 3000, | ||
| 275 | + }; | ||
| 276 | + } | ||
| 277 | + | ||
| 237 | const feat = initSherpaOnnxFeatureConfig(config.featConfig, Module); | 278 | const feat = initSherpaOnnxFeatureConfig(config.featConfig, Module); |
| 238 | const model = initSherpaOnnxOnlineModelConfig(config.modelConfig, Module); | 279 | const model = initSherpaOnnxOnlineModelConfig(config.modelConfig, Module); |
| 239 | const ctcFstDecoder = initSherpaOnnxOnlineCtcFstDecoderConfig( | 280 | const ctcFstDecoder = initSherpaOnnxOnlineCtcFstDecoderConfig( |
| @@ -249,8 +290,9 @@ function initSherpaOnnxOnlineRecognizerConfig(config, Module) { | @@ -249,8 +290,9 @@ function initSherpaOnnxOnlineRecognizerConfig(config, Module) { | ||
| 249 | Module._CopyHeap(model.ptr, model.len, ptr + offset); | 290 | Module._CopyHeap(model.ptr, model.len, ptr + offset); |
| 250 | offset += model.len; | 291 | offset += model.len; |
| 251 | 292 | ||
| 252 | - const decodingMethodLen = Module.lengthBytesUTF8(config.decodingMethod) + 1; | ||
| 253 | - const hotwordsFileLen = Module.lengthBytesUTF8(config.hotwordsFile) + 1; | 293 | + const decodingMethodLen = |
| 294 | + Module.lengthBytesUTF8(config.decodingMethod || 'greedy_search') + 1; | ||
| 295 | + const hotwordsFileLen = Module.lengthBytesUTF8(config.hotwordsFile || '') + 1; | ||
| 254 | const ruleFstsFileLen = Module.lengthBytesUTF8(config.ruleFsts || '') + 1; | 296 | const ruleFstsFileLen = Module.lengthBytesUTF8(config.ruleFsts || '') + 1; |
| 255 | const ruleFarsFileLen = Module.lengthBytesUTF8(config.ruleFars || '') + 1; | 297 | const ruleFarsFileLen = Module.lengthBytesUTF8(config.ruleFars || '') + 1; |
| 256 | const bufferLen = | 298 | const bufferLen = |
| @@ -258,10 +300,12 @@ function initSherpaOnnxOnlineRecognizerConfig(config, Module) { | @@ -258,10 +300,12 @@ function initSherpaOnnxOnlineRecognizerConfig(config, Module) { | ||
| 258 | const buffer = Module._malloc(bufferLen); | 300 | const buffer = Module._malloc(bufferLen); |
| 259 | 301 | ||
| 260 | offset = 0; | 302 | offset = 0; |
| 261 | - Module.stringToUTF8(config.decodingMethod, buffer, decodingMethodLen); | 303 | + Module.stringToUTF8( |
| 304 | + config.decodingMethod || 'greedy_search', buffer, decodingMethodLen); | ||
| 262 | offset += decodingMethodLen; | 305 | offset += decodingMethodLen; |
| 263 | 306 | ||
| 264 | - Module.stringToUTF8(config.hotwordsFile, buffer + offset, hotwordsFileLen); | 307 | + Module.stringToUTF8( |
| 308 | + config.hotwordsFile || '', buffer + offset, hotwordsFileLen); | ||
| 265 | offset += hotwordsFileLen; | 309 | offset += hotwordsFileLen; |
| 266 | 310 | ||
| 267 | Module.stringToUTF8(config.ruleFsts || '', buffer + offset, ruleFstsFileLen); | 311 | Module.stringToUTF8(config.ruleFsts || '', buffer + offset, ruleFstsFileLen); |
| @@ -274,25 +318,25 @@ function initSherpaOnnxOnlineRecognizerConfig(config, Module) { | @@ -274,25 +318,25 @@ function initSherpaOnnxOnlineRecognizerConfig(config, Module) { | ||
| 274 | Module.setValue(ptr + offset, buffer, 'i8*'); // decoding method | 318 | Module.setValue(ptr + offset, buffer, 'i8*'); // decoding method |
| 275 | offset += 4; | 319 | offset += 4; |
| 276 | 320 | ||
| 277 | - Module.setValue(ptr + offset, config.maxActivePaths, 'i32'); | 321 | + Module.setValue(ptr + offset, config.maxActivePaths || 4, 'i32'); |
| 278 | offset += 4; | 322 | offset += 4; |
| 279 | 323 | ||
| 280 | - Module.setValue(ptr + offset, config.enableEndpoint, 'i32'); | 324 | + Module.setValue(ptr + offset, config.enableEndpoint || 0, 'i32'); |
| 281 | offset += 4; | 325 | offset += 4; |
| 282 | 326 | ||
| 283 | - Module.setValue(ptr + offset, config.rule1MinTrailingSilence, 'float'); | 327 | + Module.setValue(ptr + offset, config.rule1MinTrailingSilence || 2.4, 'float'); |
| 284 | offset += 4; | 328 | offset += 4; |
| 285 | 329 | ||
| 286 | - Module.setValue(ptr + offset, config.rule2MinTrailingSilence, 'float'); | 330 | + Module.setValue(ptr + offset, config.rule2MinTrailingSilence || 1.2, 'float'); |
| 287 | offset += 4; | 331 | offset += 4; |
| 288 | 332 | ||
| 289 | - Module.setValue(ptr + offset, config.rule3MinUtteranceLength, 'float'); | 333 | + Module.setValue(ptr + offset, config.rule3MinUtteranceLength || 20, 'float'); |
| 290 | offset += 4; | 334 | offset += 4; |
| 291 | 335 | ||
| 292 | Module.setValue(ptr + offset, buffer + decodingMethodLen, 'i8*'); | 336 | Module.setValue(ptr + offset, buffer + decodingMethodLen, 'i8*'); |
| 293 | offset += 4; | 337 | offset += 4; |
| 294 | 338 | ||
| 295 | - Module.setValue(ptr + offset, config.hotwordsScore, 'float'); | 339 | + Module.setValue(ptr + offset, config.hotwordsScore || 1.5, 'float'); |
| 296 | offset += 4; | 340 | offset += 4; |
| 297 | 341 | ||
| 298 | Module._CopyHeap(ctcFstDecoder.ptr, ctcFstDecoder.len, ptr + offset); | 342 | Module._CopyHeap(ctcFstDecoder.ptr, ctcFstDecoder.len, ptr + offset); |
| @@ -313,7 +357,6 @@ function initSherpaOnnxOnlineRecognizerConfig(config, Module) { | @@ -313,7 +357,6 @@ function initSherpaOnnxOnlineRecognizerConfig(config, Module) { | ||
| 313 | } | 357 | } |
| 314 | } | 358 | } |
| 315 | 359 | ||
| 316 | - | ||
| 317 | function createOnlineRecognizer(Module, myConfig) { | 360 | function createOnlineRecognizer(Module, myConfig) { |
| 318 | const onlineTransducerModelConfig = { | 361 | const onlineTransducerModelConfig = { |
| 319 | encoder: '', | 362 | encoder: '', |
| @@ -395,9 +438,9 @@ function createOnlineRecognizer(Module, myConfig) { | @@ -395,9 +438,9 @@ function createOnlineRecognizer(Module, myConfig) { | ||
| 395 | } | 438 | } |
| 396 | 439 | ||
| 397 | function initSherpaOnnxOfflineTransducerModelConfig(config, Module) { | 440 | function initSherpaOnnxOfflineTransducerModelConfig(config, Module) { |
| 398 | - const encoderLen = Module.lengthBytesUTF8(config.encoder) + 1; | ||
| 399 | - const decoderLen = Module.lengthBytesUTF8(config.decoder) + 1; | ||
| 400 | - const joinerLen = Module.lengthBytesUTF8(config.joiner) + 1; | 441 | + const encoderLen = Module.lengthBytesUTF8(config.encoder || '') + 1; |
| 442 | + const decoderLen = Module.lengthBytesUTF8(config.decoder || '') + 1; | ||
| 443 | + const joinerLen = Module.lengthBytesUTF8(config.joiner || '') + 1; | ||
| 401 | 444 | ||
| 402 | const n = encoderLen + decoderLen + joinerLen; | 445 | const n = encoderLen + decoderLen + joinerLen; |
| 403 | 446 | ||
| @@ -407,13 +450,13 @@ function initSherpaOnnxOfflineTransducerModelConfig(config, Module) { | @@ -407,13 +450,13 @@ function initSherpaOnnxOfflineTransducerModelConfig(config, Module) { | ||
| 407 | const ptr = Module._malloc(len); | 450 | const ptr = Module._malloc(len); |
| 408 | 451 | ||
| 409 | let offset = 0; | 452 | let offset = 0; |
| 410 | - Module.stringToUTF8(config.encoder, buffer + offset, encoderLen); | 453 | + Module.stringToUTF8(config.encoder || '', buffer + offset, encoderLen); |
| 411 | offset += encoderLen; | 454 | offset += encoderLen; |
| 412 | 455 | ||
| 413 | - Module.stringToUTF8(config.decoder, buffer + offset, decoderLen); | 456 | + Module.stringToUTF8(config.decoder || '', buffer + offset, decoderLen); |
| 414 | offset += decoderLen; | 457 | offset += decoderLen; |
| 415 | 458 | ||
| 416 | - Module.stringToUTF8(config.joiner, buffer + offset, joinerLen); | 459 | + Module.stringToUTF8(config.joiner || '', buffer + offset, joinerLen); |
| 417 | 460 | ||
| 418 | offset = 0; | 461 | offset = 0; |
| 419 | Module.setValue(ptr, buffer + offset, 'i8*'); | 462 | Module.setValue(ptr, buffer + offset, 'i8*'); |
| @@ -430,14 +473,14 @@ function initSherpaOnnxOfflineTransducerModelConfig(config, Module) { | @@ -430,14 +473,14 @@ function initSherpaOnnxOfflineTransducerModelConfig(config, Module) { | ||
| 430 | } | 473 | } |
| 431 | 474 | ||
| 432 | function initSherpaOnnxOfflineParaformerModelConfig(config, Module) { | 475 | function initSherpaOnnxOfflineParaformerModelConfig(config, Module) { |
| 433 | - const n = Module.lengthBytesUTF8(config.model) + 1; | 476 | + const n = Module.lengthBytesUTF8(config.model || '') + 1; |
| 434 | 477 | ||
| 435 | const buffer = Module._malloc(n); | 478 | const buffer = Module._malloc(n); |
| 436 | 479 | ||
| 437 | const len = 1 * 4; // 1 pointer | 480 | const len = 1 * 4; // 1 pointer |
| 438 | const ptr = Module._malloc(len); | 481 | const ptr = Module._malloc(len); |
| 439 | 482 | ||
| 440 | - Module.stringToUTF8(config.model, buffer, n); | 483 | + Module.stringToUTF8(config.model || '', buffer, n); |
| 441 | 484 | ||
| 442 | Module.setValue(ptr, buffer, 'i8*'); | 485 | Module.setValue(ptr, buffer, 'i8*'); |
| 443 | 486 | ||
| @@ -447,14 +490,14 @@ function initSherpaOnnxOfflineParaformerModelConfig(config, Module) { | @@ -447,14 +490,14 @@ function initSherpaOnnxOfflineParaformerModelConfig(config, Module) { | ||
| 447 | } | 490 | } |
| 448 | 491 | ||
| 449 | function initSherpaOnnxOfflineNemoEncDecCtcModelConfig(config, Module) { | 492 | function initSherpaOnnxOfflineNemoEncDecCtcModelConfig(config, Module) { |
| 450 | - const n = Module.lengthBytesUTF8(config.model) + 1; | 493 | + const n = Module.lengthBytesUTF8(config.model || '') + 1; |
| 451 | 494 | ||
| 452 | const buffer = Module._malloc(n); | 495 | const buffer = Module._malloc(n); |
| 453 | 496 | ||
| 454 | const len = 1 * 4; // 1 pointer | 497 | const len = 1 * 4; // 1 pointer |
| 455 | const ptr = Module._malloc(len); | 498 | const ptr = Module._malloc(len); |
| 456 | 499 | ||
| 457 | - Module.stringToUTF8(config.model, buffer, n); | 500 | + Module.stringToUTF8(config.model || '', buffer, n); |
| 458 | 501 | ||
| 459 | Module.setValue(ptr, buffer, 'i8*'); | 502 | Module.setValue(ptr, buffer, 'i8*'); |
| 460 | 503 | ||
| @@ -464,10 +507,10 @@ function initSherpaOnnxOfflineNemoEncDecCtcModelConfig(config, Module) { | @@ -464,10 +507,10 @@ function initSherpaOnnxOfflineNemoEncDecCtcModelConfig(config, Module) { | ||
| 464 | } | 507 | } |
| 465 | 508 | ||
| 466 | function initSherpaOnnxOfflineWhisperModelConfig(config, Module) { | 509 | function initSherpaOnnxOfflineWhisperModelConfig(config, Module) { |
| 467 | - const encoderLen = Module.lengthBytesUTF8(config.encoder) + 1; | ||
| 468 | - const decoderLen = Module.lengthBytesUTF8(config.decoder) + 1; | ||
| 469 | - const languageLen = Module.lengthBytesUTF8(config.language) + 1; | ||
| 470 | - const taskLen = Module.lengthBytesUTF8(config.task) + 1; | 510 | + const encoderLen = Module.lengthBytesUTF8(config.encoder || '') + 1; |
| 511 | + const decoderLen = Module.lengthBytesUTF8(config.decoder || '') + 1; | ||
| 512 | + const languageLen = Module.lengthBytesUTF8(config.language || '') + 1; | ||
| 513 | + const taskLen = Module.lengthBytesUTF8(config.task || '') + 1; | ||
| 471 | 514 | ||
| 472 | const n = encoderLen + decoderLen + languageLen + taskLen; | 515 | const n = encoderLen + decoderLen + languageLen + taskLen; |
| 473 | const buffer = Module._malloc(n); | 516 | const buffer = Module._malloc(n); |
| @@ -476,16 +519,16 @@ function initSherpaOnnxOfflineWhisperModelConfig(config, Module) { | @@ -476,16 +519,16 @@ function initSherpaOnnxOfflineWhisperModelConfig(config, Module) { | ||
| 476 | const ptr = Module._malloc(len); | 519 | const ptr = Module._malloc(len); |
| 477 | 520 | ||
| 478 | let offset = 0; | 521 | let offset = 0; |
| 479 | - Module.stringToUTF8(config.encoder, buffer + offset, encoderLen); | 522 | + Module.stringToUTF8(config.encoder || '', buffer + offset, encoderLen); |
| 480 | offset += encoderLen; | 523 | offset += encoderLen; |
| 481 | 524 | ||
| 482 | - Module.stringToUTF8(config.decoder, buffer + offset, decoderLen); | 525 | + Module.stringToUTF8(config.decoder || '', buffer + offset, decoderLen); |
| 483 | offset += decoderLen; | 526 | offset += decoderLen; |
| 484 | 527 | ||
| 485 | - Module.stringToUTF8(config.language, buffer + offset, languageLen); | 528 | + Module.stringToUTF8(config.language || '', buffer + offset, languageLen); |
| 486 | offset += languageLen; | 529 | offset += languageLen; |
| 487 | 530 | ||
| 488 | - Module.stringToUTF8(config.task, buffer + offset, taskLen); | 531 | + Module.stringToUTF8(config.task || '', buffer + offset, taskLen); |
| 489 | 532 | ||
| 490 | offset = 0; | 533 | offset = 0; |
| 491 | Module.setValue(ptr, buffer + offset, 'i8*'); | 534 | Module.setValue(ptr, buffer + offset, 'i8*'); |
| @@ -508,13 +551,13 @@ function initSherpaOnnxOfflineWhisperModelConfig(config, Module) { | @@ -508,13 +551,13 @@ function initSherpaOnnxOfflineWhisperModelConfig(config, Module) { | ||
| 508 | } | 551 | } |
| 509 | 552 | ||
| 510 | function initSherpaOnnxOfflineTdnnModelConfig(config, Module) { | 553 | function initSherpaOnnxOfflineTdnnModelConfig(config, Module) { |
| 511 | - const n = Module.lengthBytesUTF8(config.model) + 1; | 554 | + const n = Module.lengthBytesUTF8(config.model || '') + 1; |
| 512 | const buffer = Module._malloc(n); | 555 | const buffer = Module._malloc(n); |
| 513 | 556 | ||
| 514 | const len = 1 * 4; // 1 pointer | 557 | const len = 1 * 4; // 1 pointer |
| 515 | const ptr = Module._malloc(len); | 558 | const ptr = Module._malloc(len); |
| 516 | 559 | ||
| 517 | - Module.stringToUTF8(config.model, buffer, n); | 560 | + Module.stringToUTF8(config.model || '', buffer, n); |
| 518 | 561 | ||
| 519 | Module.setValue(ptr, buffer, 'i8*'); | 562 | Module.setValue(ptr, buffer, 'i8*'); |
| 520 | 563 | ||
| @@ -523,16 +566,48 @@ function initSherpaOnnxOfflineTdnnModelConfig(config, Module) { | @@ -523,16 +566,48 @@ function initSherpaOnnxOfflineTdnnModelConfig(config, Module) { | ||
| 523 | } | 566 | } |
| 524 | } | 567 | } |
| 525 | 568 | ||
| 569 | +function initSherpaOnnxOfflineSenseVoiceModelConfig(config, Module) { | ||
| 570 | + const modelLen = Module.lengthBytesUTF8(config.model || '') + 1; | ||
| 571 | + const languageLen = Module.lengthBytesUTF8(config.language || '') + 1; | ||
| 572 | + | ||
| 573 | + // useItn is a integer with 4 bytes | ||
| 574 | + const n = modelLen + languageLen; | ||
| 575 | + const buffer = Module._malloc(n); | ||
| 576 | + | ||
| 577 | + const len = 3 * 4; // 2 pointers + 1 int | ||
| 578 | + const ptr = Module._malloc(len); | ||
| 579 | + | ||
| 580 | + let offset = 0; | ||
| 581 | + Module.stringToUTF8(config.model || '', buffer + offset, modelLen); | ||
| 582 | + offset += modelLen; | ||
| 583 | + | ||
| 584 | + Module.stringToUTF8(config.language || '', buffer + offset, languageLen); | ||
| 585 | + offset += languageLen; | ||
| 586 | + | ||
| 587 | + offset = 0; | ||
| 588 | + Module.setValue(ptr, buffer + offset, 'i8*'); | ||
| 589 | + offset += modelLen; | ||
| 590 | + | ||
| 591 | + Module.setValue(ptr + 4, buffer + offset, 'i8*'); | ||
| 592 | + offset += languageLen; | ||
| 593 | + | ||
| 594 | + Module.setValue(ptr + 8, config.useInverseTextNormalization || 0, 'i32'); | ||
| 595 | + | ||
| 596 | + return { | ||
| 597 | + buffer: buffer, ptr: ptr, len: len, | ||
| 598 | + } | ||
| 599 | +} | ||
| 600 | + | ||
| 526 | function initSherpaOnnxOfflineLMConfig(config, Module) { | 601 | function initSherpaOnnxOfflineLMConfig(config, Module) { |
| 527 | - const n = Module.lengthBytesUTF8(config.model) + 1; | 602 | + const n = Module.lengthBytesUTF8(config.model || '') + 1; |
| 528 | const buffer = Module._malloc(n); | 603 | const buffer = Module._malloc(n); |
| 529 | 604 | ||
| 530 | const len = 2 * 4; | 605 | const len = 2 * 4; |
| 531 | const ptr = Module._malloc(len); | 606 | const ptr = Module._malloc(len); |
| 532 | 607 | ||
| 533 | - Module.stringToUTF8(config.model, buffer, n); | 608 | + Module.stringToUTF8(config.model || '', buffer, n); |
| 534 | Module.setValue(ptr, buffer, 'i8*'); | 609 | Module.setValue(ptr, buffer, 'i8*'); |
| 535 | - Module.setValue(ptr + 4, config.scale, 'float'); | 610 | + Module.setValue(ptr + 4, config.scale || 1, 'float'); |
| 536 | 611 | ||
| 537 | return { | 612 | return { |
| 538 | buffer: buffer, ptr: ptr, len: len, | 613 | buffer: buffer, ptr: ptr, len: len, |
| @@ -540,18 +615,70 @@ function initSherpaOnnxOfflineLMConfig(config, Module) { | @@ -540,18 +615,70 @@ function initSherpaOnnxOfflineLMConfig(config, Module) { | ||
| 540 | } | 615 | } |
| 541 | 616 | ||
| 542 | function initSherpaOnnxOfflineModelConfig(config, Module) { | 617 | function initSherpaOnnxOfflineModelConfig(config, Module) { |
| 618 | + if (!('transducer' in config)) { | ||
| 619 | + config.transducer = { | ||
| 620 | + encoder: '', | ||
| 621 | + decoder: '', | ||
| 622 | + joiner: '', | ||
| 623 | + }; | ||
| 624 | + } | ||
| 625 | + | ||
| 626 | + if (!('paraformer' in config)) { | ||
| 627 | + config.paraformer = { | ||
| 628 | + model: '', | ||
| 629 | + }; | ||
| 630 | + } | ||
| 631 | + | ||
| 632 | + if (!('nemoCtc' in config)) { | ||
| 633 | + config.nemoCtc = { | ||
| 634 | + model: '', | ||
| 635 | + }; | ||
| 636 | + } | ||
| 637 | + | ||
| 638 | + if (!('whisper' in config)) { | ||
| 639 | + config.whisper = { | ||
| 640 | + encoder: '', | ||
| 641 | + decoder: '', | ||
| 642 | + language: '', | ||
| 643 | + task: '', | ||
| 644 | + tailPaddings: -1, | ||
| 645 | + }; | ||
| 646 | + } | ||
| 647 | + | ||
| 648 | + if (!('tdnn' in config)) { | ||
| 649 | + config.tdnn = { | ||
| 650 | + model: '', | ||
| 651 | + }; | ||
| 652 | + } | ||
| 653 | + | ||
| 654 | + if (!('senseVoice' in config)) { | ||
| 655 | + config.senseVoice = { | ||
| 656 | + model: '', | ||
| 657 | + language: '', | ||
| 658 | + useInverseTextNormalization: 0, | ||
| 659 | + }; | ||
| 660 | + } | ||
| 661 | + | ||
| 543 | const transducer = | 662 | const transducer = |
| 544 | initSherpaOnnxOfflineTransducerModelConfig(config.transducer, Module); | 663 | initSherpaOnnxOfflineTransducerModelConfig(config.transducer, Module); |
| 664 | + | ||
| 545 | const paraformer = | 665 | const paraformer = |
| 546 | initSherpaOnnxOfflineParaformerModelConfig(config.paraformer, Module); | 666 | initSherpaOnnxOfflineParaformerModelConfig(config.paraformer, Module); |
| 667 | + | ||
| 547 | const nemoCtc = | 668 | const nemoCtc = |
| 548 | initSherpaOnnxOfflineNemoEncDecCtcModelConfig(config.nemoCtc, Module); | 669 | initSherpaOnnxOfflineNemoEncDecCtcModelConfig(config.nemoCtc, Module); |
| 670 | + | ||
| 549 | const whisper = | 671 | const whisper = |
| 550 | initSherpaOnnxOfflineWhisperModelConfig(config.whisper, Module); | 672 | initSherpaOnnxOfflineWhisperModelConfig(config.whisper, Module); |
| 673 | + | ||
| 551 | const tdnn = initSherpaOnnxOfflineTdnnModelConfig(config.tdnn, Module); | 674 | const tdnn = initSherpaOnnxOfflineTdnnModelConfig(config.tdnn, Module); |
| 552 | 675 | ||
| 676 | + const senseVoice = | ||
| 677 | + initSherpaOnnxOfflineSenseVoiceModelConfig(config.senseVoice, Module); | ||
| 678 | + | ||
| 553 | const len = transducer.len + paraformer.len + nemoCtc.len + whisper.len + | 679 | const len = transducer.len + paraformer.len + nemoCtc.len + whisper.len + |
| 554 | - tdnn.len + 8 * 4; | 680 | + tdnn.len + 8 * 4 + senseVoice.len; |
| 681 | + | ||
| 555 | const ptr = Module._malloc(len); | 682 | const ptr = Module._malloc(len); |
| 556 | 683 | ||
| 557 | let offset = 0; | 684 | let offset = 0; |
| @@ -570,9 +697,10 @@ function initSherpaOnnxOfflineModelConfig(config, Module) { | @@ -570,9 +697,10 @@ function initSherpaOnnxOfflineModelConfig(config, Module) { | ||
| 570 | Module._CopyHeap(tdnn.ptr, tdnn.len, ptr + offset); | 697 | Module._CopyHeap(tdnn.ptr, tdnn.len, ptr + offset); |
| 571 | offset += tdnn.len; | 698 | offset += tdnn.len; |
| 572 | 699 | ||
| 573 | - const tokensLen = Module.lengthBytesUTF8(config.tokens) + 1; | ||
| 574 | - const providerLen = Module.lengthBytesUTF8(config.provider) + 1; | ||
| 575 | - const modelTypeLen = Module.lengthBytesUTF8(config.modelType) + 1; | 700 | + |
| 701 | + const tokensLen = Module.lengthBytesUTF8(config.tokens || '') + 1; | ||
| 702 | + const providerLen = Module.lengthBytesUTF8(config.provider || 'cpu') + 1; | ||
| 703 | + const modelTypeLen = Module.lengthBytesUTF8(config.modelType || '') + 1; | ||
| 576 | const modelingUnitLen = Module.lengthBytesUTF8(config.modelingUnit || '') + 1; | 704 | const modelingUnitLen = Module.lengthBytesUTF8(config.modelingUnit || '') + 1; |
| 577 | const bpeVocabLen = Module.lengthBytesUTF8(config.bpeVocab || '') + 1; | 705 | const bpeVocabLen = Module.lengthBytesUTF8(config.bpeVocab || '') + 1; |
| 578 | const teleSpeechCtcLen = | 706 | const teleSpeechCtcLen = |
| @@ -580,16 +708,17 @@ function initSherpaOnnxOfflineModelConfig(config, Module) { | @@ -580,16 +708,17 @@ function initSherpaOnnxOfflineModelConfig(config, Module) { | ||
| 580 | 708 | ||
| 581 | const bufferLen = tokensLen + providerLen + modelTypeLen + modelingUnitLen + | 709 | const bufferLen = tokensLen + providerLen + modelTypeLen + modelingUnitLen + |
| 582 | bpeVocabLen + teleSpeechCtcLen; | 710 | bpeVocabLen + teleSpeechCtcLen; |
| 711 | + | ||
| 583 | const buffer = Module._malloc(bufferLen); | 712 | const buffer = Module._malloc(bufferLen); |
| 584 | 713 | ||
| 585 | offset = 0; | 714 | offset = 0; |
| 586 | Module.stringToUTF8(config.tokens, buffer, tokensLen); | 715 | Module.stringToUTF8(config.tokens, buffer, tokensLen); |
| 587 | offset += tokensLen; | 716 | offset += tokensLen; |
| 588 | 717 | ||
| 589 | - Module.stringToUTF8(config.provider, buffer + offset, providerLen); | 718 | + Module.stringToUTF8(config.provider || 'cpu', buffer + offset, providerLen); |
| 590 | offset += providerLen; | 719 | offset += providerLen; |
| 591 | 720 | ||
| 592 | - Module.stringToUTF8(config.modelType, buffer + offset, modelTypeLen); | 721 | + Module.stringToUTF8(config.modelType || '', buffer + offset, modelTypeLen); |
| 593 | offset += modelTypeLen; | 722 | offset += modelTypeLen; |
| 594 | 723 | ||
| 595 | Module.stringToUTF8( | 724 | Module.stringToUTF8( |
| @@ -608,10 +737,10 @@ function initSherpaOnnxOfflineModelConfig(config, Module) { | @@ -608,10 +737,10 @@ function initSherpaOnnxOfflineModelConfig(config, Module) { | ||
| 608 | Module.setValue(ptr + offset, buffer, 'i8*'); // tokens | 737 | Module.setValue(ptr + offset, buffer, 'i8*'); // tokens |
| 609 | offset += 4; | 738 | offset += 4; |
| 610 | 739 | ||
| 611 | - Module.setValue(ptr + offset, config.numThreads, 'i32'); | 740 | + Module.setValue(ptr + offset, config.numThreads || 1, 'i32'); |
| 612 | offset += 4; | 741 | offset += 4; |
| 613 | 742 | ||
| 614 | - Module.setValue(ptr + offset, config.debug, 'i32'); | 743 | + Module.setValue(ptr + offset, config.debug || 0, 'i32'); |
| 615 | offset += 4; | 744 | offset += 4; |
| 616 | 745 | ||
| 617 | Module.setValue(ptr + offset, buffer + tokensLen, 'i8*'); // provider | 746 | Module.setValue(ptr + offset, buffer + tokensLen, 'i8*'); // provider |
| @@ -639,13 +768,30 @@ function initSherpaOnnxOfflineModelConfig(config, Module) { | @@ -639,13 +768,30 @@ function initSherpaOnnxOfflineModelConfig(config, Module) { | ||
| 639 | 'i8*'); // teleSpeechCtc | 768 | 'i8*'); // teleSpeechCtc |
| 640 | offset += 4; | 769 | offset += 4; |
| 641 | 770 | ||
| 771 | + Module._CopyHeap(senseVoice.ptr, senseVoice.len, ptr + offset); | ||
| 772 | + | ||
| 642 | return { | 773 | return { |
| 643 | buffer: buffer, ptr: ptr, len: len, transducer: transducer, | 774 | buffer: buffer, ptr: ptr, len: len, transducer: transducer, |
| 644 | - paraformer: paraformer, nemoCtc: nemoCtc, whisper: whisper, tdnn: tdnn | 775 | + paraformer: paraformer, nemoCtc: nemoCtc, whisper: whisper, tdnn: tdnn, |
| 776 | + senseVoice: senseVoice, | ||
| 645 | } | 777 | } |
| 646 | } | 778 | } |
| 647 | 779 | ||
| 648 | function initSherpaOnnxOfflineRecognizerConfig(config, Module) { | 780 | function initSherpaOnnxOfflineRecognizerConfig(config, Module) { |
| 781 | + if (!('featConfig' in config)) { | ||
| 782 | + config.featConfig = { | ||
| 783 | + sampleRate: 16000, | ||
| 784 | + featureDim: 80, | ||
| 785 | + }; | ||
| 786 | + } | ||
| 787 | + | ||
| 788 | + if (!('lmConfig' in config)) { | ||
| 789 | + config.lmConfig = { | ||
| 790 | + model: '', | ||
| 791 | + scale: 1.0, | ||
| 792 | + }; | ||
| 793 | + } | ||
| 794 | + | ||
| 649 | const feat = initSherpaOnnxFeatureConfig(config.featConfig, Module); | 795 | const feat = initSherpaOnnxFeatureConfig(config.featConfig, Module); |
| 650 | const model = initSherpaOnnxOfflineModelConfig(config.modelConfig, Module); | 796 | const model = initSherpaOnnxOfflineModelConfig(config.modelConfig, Module); |
| 651 | const lm = initSherpaOnnxOfflineLMConfig(config.lmConfig, Module); | 797 | const lm = initSherpaOnnxOfflineLMConfig(config.lmConfig, Module); |
| @@ -663,8 +809,9 @@ function initSherpaOnnxOfflineRecognizerConfig(config, Module) { | @@ -663,8 +809,9 @@ function initSherpaOnnxOfflineRecognizerConfig(config, Module) { | ||
| 663 | Module._CopyHeap(lm.ptr, lm.len, ptr + offset); | 809 | Module._CopyHeap(lm.ptr, lm.len, ptr + offset); |
| 664 | offset += lm.len; | 810 | offset += lm.len; |
| 665 | 811 | ||
| 666 | - const decodingMethodLen = Module.lengthBytesUTF8(config.decodingMethod) + 1; | ||
| 667 | - const hotwordsFileLen = Module.lengthBytesUTF8(config.hotwordsFile) + 1; | 812 | + const decodingMethodLen = |
| 813 | + Module.lengthBytesUTF8(config.decodingMethod || 'greedy_search') + 1; | ||
| 814 | + const hotwordsFileLen = Module.lengthBytesUTF8(config.hotwordsFile || '') + 1; | ||
| 668 | const ruleFstsLen = Module.lengthBytesUTF8(config.ruleFsts || '') + 1; | 815 | const ruleFstsLen = Module.lengthBytesUTF8(config.ruleFsts || '') + 1; |
| 669 | const ruleFarsLen = Module.lengthBytesUTF8(config.ruleFars || '') + 1; | 816 | const ruleFarsLen = Module.lengthBytesUTF8(config.ruleFars || '') + 1; |
| 670 | const bufferLen = | 817 | const bufferLen = |
| @@ -672,10 +819,12 @@ function initSherpaOnnxOfflineRecognizerConfig(config, Module) { | @@ -672,10 +819,12 @@ function initSherpaOnnxOfflineRecognizerConfig(config, Module) { | ||
| 672 | const buffer = Module._malloc(bufferLen); | 819 | const buffer = Module._malloc(bufferLen); |
| 673 | 820 | ||
| 674 | offset = 0; | 821 | offset = 0; |
| 675 | - Module.stringToUTF8(config.decodingMethod, buffer, decodingMethodLen); | 822 | + Module.stringToUTF8( |
| 823 | + config.decodingMethod || 'greedy_search', buffer, decodingMethodLen); | ||
| 676 | offset += decodingMethodLen; | 824 | offset += decodingMethodLen; |
| 677 | 825 | ||
| 678 | - Module.stringToUTF8(config.hotwordsFile, buffer + offset, hotwordsFileLen); | 826 | + Module.stringToUTF8( |
| 827 | + config.hotwordsFile || '', buffer + offset, hotwordsFileLen); | ||
| 679 | offset += hotwordsFileLen; | 828 | offset += hotwordsFileLen; |
| 680 | 829 | ||
| 681 | Module.stringToUTF8(config.ruleFsts || '', buffer + offset, ruleFstsLen); | 830 | Module.stringToUTF8(config.ruleFsts || '', buffer + offset, ruleFstsLen); |
| @@ -689,13 +838,13 @@ function initSherpaOnnxOfflineRecognizerConfig(config, Module) { | @@ -689,13 +838,13 @@ function initSherpaOnnxOfflineRecognizerConfig(config, Module) { | ||
| 689 | Module.setValue(ptr + offset, buffer, 'i8*'); // decoding method | 838 | Module.setValue(ptr + offset, buffer, 'i8*'); // decoding method |
| 690 | offset += 4; | 839 | offset += 4; |
| 691 | 840 | ||
| 692 | - Module.setValue(ptr + offset, config.maxActivePaths, 'i32'); | 841 | + Module.setValue(ptr + offset, config.maxActivePaths || 4, 'i32'); |
| 693 | offset += 4; | 842 | offset += 4; |
| 694 | 843 | ||
| 695 | Module.setValue(ptr + offset, buffer + decodingMethodLen, 'i8*'); | 844 | Module.setValue(ptr + offset, buffer + decodingMethodLen, 'i8*'); |
| 696 | offset += 4; | 845 | offset += 4; |
| 697 | 846 | ||
| 698 | - Module.setValue(ptr + offset, config.hotwordsScore, 'float'); | 847 | + Module.setValue(ptr + offset, config.hotwordsScore || 1.5, 'float'); |
| 699 | offset += 4; | 848 | offset += 4; |
| 700 | 849 | ||
| 701 | Module.setValue( | 850 | Module.setValue( |
| @@ -16,6 +16,7 @@ static_assert(sizeof(SherpaOnnxOfflineParaformerModelConfig) == 4, ""); | @@ -16,6 +16,7 @@ static_assert(sizeof(SherpaOnnxOfflineParaformerModelConfig) == 4, ""); | ||
| 16 | static_assert(sizeof(SherpaOnnxOfflineNemoEncDecCtcModelConfig) == 4, ""); | 16 | static_assert(sizeof(SherpaOnnxOfflineNemoEncDecCtcModelConfig) == 4, ""); |
| 17 | static_assert(sizeof(SherpaOnnxOfflineWhisperModelConfig) == 5 * 4, ""); | 17 | static_assert(sizeof(SherpaOnnxOfflineWhisperModelConfig) == 5 * 4, ""); |
| 18 | static_assert(sizeof(SherpaOnnxOfflineTdnnModelConfig) == 4, ""); | 18 | static_assert(sizeof(SherpaOnnxOfflineTdnnModelConfig) == 4, ""); |
| 19 | +static_assert(sizeof(SherpaOnnxOfflineSenseVoiceModelConfig) == 3 * 4, ""); | ||
| 19 | static_assert(sizeof(SherpaOnnxOfflineLMConfig) == 2 * 4, ""); | 20 | static_assert(sizeof(SherpaOnnxOfflineLMConfig) == 2 * 4, ""); |
| 20 | 21 | ||
| 21 | static_assert(sizeof(SherpaOnnxOfflineModelConfig) == | 22 | static_assert(sizeof(SherpaOnnxOfflineModelConfig) == |
| @@ -23,7 +24,8 @@ static_assert(sizeof(SherpaOnnxOfflineModelConfig) == | @@ -23,7 +24,8 @@ static_assert(sizeof(SherpaOnnxOfflineModelConfig) == | ||
| 23 | sizeof(SherpaOnnxOfflineParaformerModelConfig) + | 24 | sizeof(SherpaOnnxOfflineParaformerModelConfig) + |
| 24 | sizeof(SherpaOnnxOfflineNemoEncDecCtcModelConfig) + | 25 | sizeof(SherpaOnnxOfflineNemoEncDecCtcModelConfig) + |
| 25 | sizeof(SherpaOnnxOfflineWhisperModelConfig) + | 26 | sizeof(SherpaOnnxOfflineWhisperModelConfig) + |
| 26 | - sizeof(SherpaOnnxOfflineTdnnModelConfig) + 8 * 4, | 27 | + sizeof(SherpaOnnxOfflineTdnnModelConfig) + 8 * 4 + |
| 28 | + sizeof(SherpaOnnxOfflineSenseVoiceModelConfig), | ||
| 27 | ""); | 29 | ""); |
| 28 | static_assert(sizeof(SherpaOnnxFeatureConfig) == 2 * 4, ""); | 30 | static_assert(sizeof(SherpaOnnxFeatureConfig) == 2 * 4, ""); |
| 29 | static_assert(sizeof(SherpaOnnxOfflineRecognizerConfig) == | 31 | static_assert(sizeof(SherpaOnnxOfflineRecognizerConfig) == |
| @@ -63,6 +65,7 @@ void PrintOfflineRecognizerConfig(SherpaOnnxOfflineRecognizerConfig *config) { | @@ -63,6 +65,7 @@ void PrintOfflineRecognizerConfig(SherpaOnnxOfflineRecognizerConfig *config) { | ||
| 63 | auto nemo_ctc = &model_config->nemo_ctc; | 65 | auto nemo_ctc = &model_config->nemo_ctc; |
| 64 | auto whisper = &model_config->whisper; | 66 | auto whisper = &model_config->whisper; |
| 65 | auto tdnn = &model_config->tdnn; | 67 | auto tdnn = &model_config->tdnn; |
| 68 | + auto sense_voice = &model_config->sense_voice; | ||
| 66 | 69 | ||
| 67 | fprintf(stdout, "----------offline transducer model config----------\n"); | 70 | fprintf(stdout, "----------offline transducer model config----------\n"); |
| 68 | fprintf(stdout, "encoder: %s\n", transducer->encoder); | 71 | fprintf(stdout, "encoder: %s\n", transducer->encoder); |
| @@ -85,6 +88,11 @@ void PrintOfflineRecognizerConfig(SherpaOnnxOfflineRecognizerConfig *config) { | @@ -85,6 +88,11 @@ void PrintOfflineRecognizerConfig(SherpaOnnxOfflineRecognizerConfig *config) { | ||
| 85 | fprintf(stdout, "----------offline tdnn model config----------\n"); | 88 | fprintf(stdout, "----------offline tdnn model config----------\n"); |
| 86 | fprintf(stdout, "model: %s\n", tdnn->model); | 89 | fprintf(stdout, "model: %s\n", tdnn->model); |
| 87 | 90 | ||
| 91 | + fprintf(stdout, "----------offline sense_voice model config----------\n"); | ||
| 92 | + fprintf(stdout, "model: %s\n", sense_voice->model); | ||
| 93 | + fprintf(stdout, "language: %s\n", sense_voice->language); | ||
| 94 | + fprintf(stdout, "use_itn: %d\n", sense_voice->use_itn); | ||
| 95 | + | ||
| 88 | fprintf(stdout, "tokens: %s\n", model_config->tokens); | 96 | fprintf(stdout, "tokens: %s\n", model_config->tokens); |
| 89 | fprintf(stdout, "num_threads: %d\n", model_config->num_threads); | 97 | fprintf(stdout, "num_threads: %d\n", model_config->num_threads); |
| 90 | fprintf(stdout, "provider: %s\n", model_config->provider); | 98 | fprintf(stdout, "provider: %s\n", model_config->provider); |
| @@ -14,14 +14,10 @@ function freeConfig(config, Module) { | @@ -14,14 +14,10 @@ function freeConfig(config, Module) { | ||
| 14 | // The user should free the returned pointers | 14 | // The user should free the returned pointers |
| 15 | function initSherpaOnnxOfflineTtsVitsModelConfig(config, Module) { | 15 | function initSherpaOnnxOfflineTtsVitsModelConfig(config, Module) { |
| 16 | const modelLen = Module.lengthBytesUTF8(config.model) + 1; | 16 | const modelLen = Module.lengthBytesUTF8(config.model) + 1; |
| 17 | - const lexiconLen = Module.lengthBytesUTF8(config.lexicon) + 1; | ||
| 18 | - const tokensLen = Module.lengthBytesUTF8(config.tokens) + 1; | ||
| 19 | - const dataDirLen = Module.lengthBytesUTF8(config.dataDir) + 1; | ||
| 20 | - | ||
| 21 | - if (!('dictDir' in config)) { | ||
| 22 | - config.dictDir = '' | ||
| 23 | - } | ||
| 24 | - const dictDirLen = Module.lengthBytesUTF8(config.dictDir) + 1; | 17 | + const lexiconLen = Module.lengthBytesUTF8(config.lexicon || '') + 1; |
| 18 | + const tokensLen = Module.lengthBytesUTF8(config.tokens || '') + 1; | ||
| 19 | + const dataDirLen = Module.lengthBytesUTF8(config.dataDir || '') + 1; | ||
| 20 | + const dictDirLen = Module.lengthBytesUTF8(config.dictDir || '') + 1; | ||
| 25 | 21 | ||
| 26 | const n = modelLen + lexiconLen + tokensLen + dataDirLen + dictDirLen; | 22 | const n = modelLen + lexiconLen + tokensLen + dataDirLen + dictDirLen; |
| 27 | 23 | ||
| @@ -31,19 +27,19 @@ function initSherpaOnnxOfflineTtsVitsModelConfig(config, Module) { | @@ -31,19 +27,19 @@ function initSherpaOnnxOfflineTtsVitsModelConfig(config, Module) { | ||
| 31 | const ptr = Module._malloc(len); | 27 | const ptr = Module._malloc(len); |
| 32 | 28 | ||
| 33 | let offset = 0; | 29 | let offset = 0; |
| 34 | - Module.stringToUTF8(config.model, buffer + offset, modelLen); | 30 | + Module.stringToUTF8(config.model || '', buffer + offset, modelLen); |
| 35 | offset += modelLen; | 31 | offset += modelLen; |
| 36 | 32 | ||
| 37 | - Module.stringToUTF8(config.lexicon, buffer + offset, lexiconLen); | 33 | + Module.stringToUTF8(config.lexicon || '', buffer + offset, lexiconLen); |
| 38 | offset += lexiconLen; | 34 | offset += lexiconLen; |
| 39 | 35 | ||
| 40 | - Module.stringToUTF8(config.tokens, buffer + offset, tokensLen); | 36 | + Module.stringToUTF8(config.tokens || '', buffer + offset, tokensLen); |
| 41 | offset += tokensLen; | 37 | offset += tokensLen; |
| 42 | 38 | ||
| 43 | - Module.stringToUTF8(config.dataDir, buffer + offset, dataDirLen); | 39 | + Module.stringToUTF8(config.dataDir || '', buffer + offset, dataDirLen); |
| 44 | offset += dataDirLen; | 40 | offset += dataDirLen; |
| 45 | 41 | ||
| 46 | - Module.stringToUTF8(config.dictDir, buffer + offset, dictDirLen); | 42 | + Module.stringToUTF8(config.dictDir || '', buffer + offset, dictDirLen); |
| 47 | offset += dictDirLen; | 43 | offset += dictDirLen; |
| 48 | 44 | ||
| 49 | offset = 0; | 45 | offset = 0; |
| @@ -59,9 +55,9 @@ function initSherpaOnnxOfflineTtsVitsModelConfig(config, Module) { | @@ -59,9 +55,9 @@ function initSherpaOnnxOfflineTtsVitsModelConfig(config, Module) { | ||
| 59 | Module.setValue(ptr + 12, buffer + offset, 'i8*'); | 55 | Module.setValue(ptr + 12, buffer + offset, 'i8*'); |
| 60 | offset += dataDirLen; | 56 | offset += dataDirLen; |
| 61 | 57 | ||
| 62 | - Module.setValue(ptr + 16, config.noiseScale, 'float'); | ||
| 63 | - Module.setValue(ptr + 20, config.noiseScaleW, 'float'); | ||
| 64 | - Module.setValue(ptr + 24, config.lengthScale, 'float'); | 58 | + Module.setValue(ptr + 16, config.noiseScale || 0.667, 'float'); |
| 59 | + Module.setValue(ptr + 20, config.noiseScaleW || 0.8, 'float'); | ||
| 60 | + Module.setValue(ptr + 24, config.lengthScale || 1.0, 'float'); | ||
| 65 | Module.setValue(ptr + 28, buffer + offset, 'i8*'); | 61 | Module.setValue(ptr + 28, buffer + offset, 'i8*'); |
| 66 | offset += dictDirLen; | 62 | offset += dictDirLen; |
| 67 | 63 | ||
| @@ -81,13 +77,13 @@ function initSherpaOnnxOfflineTtsModelConfig(config, Module) { | @@ -81,13 +77,13 @@ function initSherpaOnnxOfflineTtsModelConfig(config, Module) { | ||
| 81 | Module._CopyHeap(vitsModelConfig.ptr, vitsModelConfig.len, ptr + offset); | 77 | Module._CopyHeap(vitsModelConfig.ptr, vitsModelConfig.len, ptr + offset); |
| 82 | offset += vitsModelConfig.len; | 78 | offset += vitsModelConfig.len; |
| 83 | 79 | ||
| 84 | - Module.setValue(ptr + offset, config.numThreads, 'i32'); | 80 | + Module.setValue(ptr + offset, config.numThreads || 1, 'i32'); |
| 85 | offset += 4; | 81 | offset += 4; |
| 86 | 82 | ||
| 87 | - Module.setValue(ptr + offset, config.debug, 'i32'); | 83 | + Module.setValue(ptr + offset, config.debug || 0, 'i32'); |
| 88 | offset += 4; | 84 | offset += 4; |
| 89 | 85 | ||
| 90 | - const providerLen = Module.lengthBytesUTF8(config.provider) + 1; | 86 | + const providerLen = Module.lengthBytesUTF8(config.provider || 'cpu') + 1; |
| 91 | const buffer = Module._malloc(providerLen); | 87 | const buffer = Module._malloc(providerLen); |
| 92 | Module.stringToUTF8(config.provider, buffer, providerLen); | 88 | Module.stringToUTF8(config.provider, buffer, providerLen); |
| 93 | Module.setValue(ptr + offset, buffer, 'i8*'); | 89 | Module.setValue(ptr + offset, buffer, 'i8*'); |
| @@ -107,17 +103,17 @@ function initSherpaOnnxOfflineTtsConfig(config, Module) { | @@ -107,17 +103,17 @@ function initSherpaOnnxOfflineTtsConfig(config, Module) { | ||
| 107 | Module._CopyHeap(modelConfig.ptr, modelConfig.len, ptr + offset); | 103 | Module._CopyHeap(modelConfig.ptr, modelConfig.len, ptr + offset); |
| 108 | offset += modelConfig.len; | 104 | offset += modelConfig.len; |
| 109 | 105 | ||
| 110 | - const ruleFstsLen = Module.lengthBytesUTF8(config.ruleFsts) + 1; | ||
| 111 | - const ruleFarsLen = Module.lengthBytesUTF8(config.ruleFars) + 1; | 106 | + const ruleFstsLen = Module.lengthBytesUTF8(config.ruleFsts || '') + 1; |
| 107 | + const ruleFarsLen = Module.lengthBytesUTF8(config.ruleFars || '') + 1; | ||
| 112 | 108 | ||
| 113 | const buffer = Module._malloc(ruleFstsLen + ruleFarsLen); | 109 | const buffer = Module._malloc(ruleFstsLen + ruleFarsLen); |
| 114 | - Module.stringToUTF8(config.ruleFsts, buffer, ruleFstsLen); | ||
| 115 | - Module.stringToUTF8(config.ruleFars, buffer + ruleFstsLen, ruleFarsLen); | 110 | + Module.stringToUTF8(config.ruleFsts || '', buffer, ruleFstsLen); |
| 111 | + Module.stringToUTF8(config.ruleFars || '', buffer + ruleFstsLen, ruleFarsLen); | ||
| 116 | 112 | ||
| 117 | Module.setValue(ptr + offset, buffer, 'i8*'); | 113 | Module.setValue(ptr + offset, buffer, 'i8*'); |
| 118 | offset += 4; | 114 | offset += 4; |
| 119 | 115 | ||
| 120 | - Module.setValue(ptr + offset, config.maxNumSentences, 'i32'); | 116 | + Module.setValue(ptr + offset, config.maxNumSentences || 1, 'i32'); |
| 121 | offset += 4; | 117 | offset += 4; |
| 122 | 118 | ||
| 123 | Module.setValue(ptr + offset, buffer + ruleFstsLen, 'i8*'); | 119 | Module.setValue(ptr + offset, buffer + ruleFstsLen, 'i8*'); |
-
请 注册 或 登录 后发表评论