Fangjun Kuang
Committed by GitHub

Add WebAssembly for NodeJS. (#628)

@@ -9,6 +9,7 @@ concurrency: @@ -9,6 +9,7 @@ concurrency:
9 9
10 permissions: 10 permissions:
11 contents: read 11 contents: read
  12 + id-token: write
12 13
13 jobs: 14 jobs:
14 nodejs: 15 nodejs:
@@ -20,10 +21,20 @@ jobs: @@ -20,10 +21,20 @@ jobs:
20 python-version: ["3.8"] 21 python-version: ["3.8"]
21 22
22 steps: 23 steps:
23 - - uses: actions/checkout@v2 24 + - uses: actions/checkout@v4
24 with: 25 with:
25 fetch-depth: 0 26 fetch-depth: 0
26 27
  28 + - name: Install emsdk
  29 + uses: mymindstorm/setup-emsdk@v14
  30 +
  31 + - name: View emsdk version
  32 + shell: bash
  33 + run: |
  34 + emcc -v
  35 + echo "--------------------"
  36 + emcc --check
  37 +
27 - name: Setup Python ${{ matrix.python-version }} 38 - name: Setup Python ${{ matrix.python-version }}
28 uses: actions/setup-python@v5 39 uses: actions/setup-python@v5
29 with: 40 with:
@@ -31,28 +42,38 @@ jobs: @@ -31,28 +42,38 @@ jobs:
31 42
32 - uses: actions/setup-node@v4 43 - uses: actions/setup-node@v4
33 with: 44 with:
34 - node-version: 13  
35 registry-url: 'https://registry.npmjs.org' 45 registry-url: 'https://registry.npmjs.org'
36 46
37 - name: Display node version 47 - name: Display node version
38 shell: bash 48 shell: bash
39 run: | 49 run: |
40 node --version 50 node --version
41 - npm --version  
42 - cd nodejs-examples  
43 -  
44 - npm install npm@6.14.4 -g  
45 - npm install npm@6.14.4  
46 - npm --version  
47 51
48 - name: Build nodejs package 52 - name: Build nodejs package
49 shell: bash 53 shell: bash
50 env: 54 env:
51 NODE_AUTH_TOKEN: ${{ secrets.NPM_TOKEN }} 55 NODE_AUTH_TOKEN: ${{ secrets.NPM_TOKEN }}
52 run: | 56 run: |
  57 + ./build-wasm-simd-nodejs.sh
  58 + cp -v build-wasm-simd-nodejs/install/bin/wasm/nodejs/*.js ./scripts/nodejs/
  59 + cp -v build-wasm-simd-nodejs/install/bin/wasm/nodejs/*.wasm ./scripts/nodejs/
  60 +
  61 + SHERPA_ONNX_VERSION=$(grep "SHERPA_ONNX_VERSION" ./CMakeLists.txt | cut -d " " -f 2 | cut -d '"' -f 2)
  62 + echo "SHERPA_ONNX_VERSION $SHERPA_ONNX_VERSION"
  63 +
53 cd scripts/nodejs 64 cd scripts/nodejs
54 - ./run.sh 65 +
  66 + owner=${{ github.repository_owner }}
  67 + echo "owner: $owner"
  68 +
  69 + sed -i.bak s/SHERPA_ONNX_VERSION/$SHERPA_ONNX_VERSION/g ./package.json
  70 + sed -i.bak s/k2-fsa/$owner/g ./package.json
  71 +
  72 + rm package.json.bak
  73 +
  74 + git diff
  75 +
55 npm install 76 npm install
56 - rm run.sh  
57 npm ci 77 npm ci
  78 + # see https://docs.npmjs.com/generating-provenance-statements
58 npm publish --provenance --access public 79 npm publish --provenance --access public
@@ -40,7 +40,6 @@ jobs: @@ -40,7 +40,6 @@ jobs:
40 40
41 - uses: actions/setup-node@v4 41 - uses: actions/setup-node@v4
42 with: 42 with:
43 - node-version: 13  
44 registry-url: 'https://registry.npmjs.org' 43 registry-url: 'https://registry.npmjs.org'
45 44
46 - name: Display node version 45 - name: Display node version
@@ -24,7 +24,7 @@ jobs: @@ -24,7 +24,7 @@ jobs:
24 strategy: 24 strategy:
25 fail-fast: false 25 fail-fast: false
26 matrix: 26 matrix:
27 - os: [ubuntu-latest, macos-latest] #, windows-2019] 27 + os: [ubuntu-latest] #, macos-latest] #, windows-2019]
28 python-version: ["3.8"] 28 python-version: ["3.8"]
29 29
30 steps: 30 steps:
@@ -32,49 +32,38 @@ jobs: @@ -32,49 +32,38 @@ jobs:
32 with: 32 with:
33 fetch-depth: 0 33 fetch-depth: 0
34 34
35 - - name: ccache  
36 - uses: hendrikmuhs/ccache-action@v1.2  
37 - with:  
38 - key: ${{ matrix.os }}-Release-ON 35 + - name: Install emsdk
  36 + uses: mymindstorm/setup-emsdk@v14
39 37
40 - - name: Configure CMake 38 + - name: View emsdk version
41 shell: bash 39 shell: bash
42 run: | 40 run: |
43 - export CMAKE_CXX_COMPILER_LAUNCHER=ccache  
44 - export PATH="/usr/lib/ccache:/usr/local/opt/ccache/libexec:$PATH"  
45 - cmake --version  
46 -  
47 - mkdir build  
48 - cd build  
49 - cmake -D CMAKE_BUILD_TYPE=Release -D BUILD_SHARED_LIBS=ON -DCMAKE_INSTALL_PREFIX=./install ..  
50 - cmake --build . --target install --config Release  
51 -  
52 - ls -lh install/lib 41 + emcc -v
  42 + echo "--------------------"
  43 + emcc --check
53 44
54 - name: Setup Python ${{ matrix.python-version }} 45 - name: Setup Python ${{ matrix.python-version }}
55 uses: actions/setup-python@v5 46 uses: actions/setup-python@v5
56 with: 47 with:
57 python-version: ${{ matrix.python-version }} 48 python-version: ${{ matrix.python-version }}
58 49
59 - - name: Copy files 50 + - uses: actions/setup-node@v4
  51 + with:
  52 + registry-url: 'https://registry.npmjs.org'
  53 +
  54 + - name: Display node version
  55 + shell: bash
  56 + run: |
  57 + node --version
  58 +
  59 + - name: Build nodejs package
60 shell: bash 60 shell: bash
  61 + env:
  62 + NODE_AUTH_TOKEN: ${{ secrets.NPM_TOKEN }}
61 run: | 63 run: |
62 - os=${{ matrix.os }}  
63 - if [[ $os == 'ubuntu-latest' ]]; then  
64 - mkdir -p scripts/nodejs/lib/linux-x64  
65 - dst=scripts/nodejs/lib/linux-x64  
66 - elif [[ $os == 'macos-latest' ]]; then  
67 - mkdir -p scripts/nodejs/lib/osx-x64  
68 - dst=scripts/nodejs/lib/osx-x64  
69 - elif [[ $os == 'windows-2019' ]]; then  
70 - mkdir -p scripts/nodejs/lib/win-x64  
71 - dst=scripts/nodejs/lib/win-x64  
72 - fi  
73 - ls -lh build/install/lib/  
74 -  
75 - rm -rf build/install/lib/pkgconfig  
76 -  
77 - cp -v build/install/lib/* $dst/ 64 + ./build-wasm-simd-nodejs.sh
  65 + cp -v build-wasm-simd-nodejs/install/bin/wasm/nodejs/*.js ./scripts/nodejs/
  66 + cp -v build-wasm-simd-nodejs/install/bin/wasm/nodejs/*.wasm ./scripts/nodejs/
78 67
79 - name: replace files 68 - name: replace files
80 shell: bash 69 shell: bash
@@ -89,17 +78,6 @@ jobs: @@ -89,17 +78,6 @@ jobs:
89 git diff 78 git diff
90 cp *.js ../scripts/nodejs 79 cp *.js ../scripts/nodejs
91 80
92 - - uses: actions/setup-node@v4  
93 - with:  
94 - node-version: 13  
95 - registry-url: 'https://registry.npmjs.org'  
96 -  
97 - - name: Display node version  
98 - shell: bash  
99 - run: |  
100 - node --version  
101 - npm --version  
102 -  
103 - name: Run tests 81 - name: Run tests
104 shell: bash 82 shell: bash
105 run: | 83 run: |
@@ -23,6 +23,7 @@ option(SHERPA_ONNX_ENABLE_GPU "Enable ONNX Runtime GPU support" OFF) @@ -23,6 +23,7 @@ option(SHERPA_ONNX_ENABLE_GPU "Enable ONNX Runtime GPU support" OFF)
23 option(SHERPA_ONNX_ENABLE_WASM "Whether to enable WASM" OFF) 23 option(SHERPA_ONNX_ENABLE_WASM "Whether to enable WASM" OFF)
24 option(SHERPA_ONNX_ENABLE_WASM_TTS "Whether to enable WASM for TTS" OFF) 24 option(SHERPA_ONNX_ENABLE_WASM_TTS "Whether to enable WASM for TTS" OFF)
25 option(SHERPA_ONNX_ENABLE_WASM_ASR "Whether to enable WASM for ASR" OFF) 25 option(SHERPA_ONNX_ENABLE_WASM_ASR "Whether to enable WASM for ASR" OFF)
  26 +option(SHERPA_ONNX_ENABLE_WASM_NODEJS "Whether to enable WASM for NodeJS" OFF)
26 option(SHERPA_ONNX_ENABLE_BINARY "Whether to build binaries" ON) 27 option(SHERPA_ONNX_ENABLE_BINARY "Whether to build binaries" ON)
27 option(SHERPA_ONNX_LINK_LIBSTDCPP_STATICALLY "True to link libstdc++ statically. Used only when BUILD_SHARED_LIBS is OFF on Linux" ON) 28 option(SHERPA_ONNX_LINK_LIBSTDCPP_STATICALLY "True to link libstdc++ statically. Used only when BUILD_SHARED_LIBS is OFF on Linux" ON)
28 29
@@ -108,6 +109,7 @@ message(STATUS "SHERPA_ONNX_ENABLE_GPU ${SHERPA_ONNX_ENABLE_GPU}") @@ -108,6 +109,7 @@ message(STATUS "SHERPA_ONNX_ENABLE_GPU ${SHERPA_ONNX_ENABLE_GPU}")
108 message(STATUS "SHERPA_ONNX_ENABLE_WASM ${SHERPA_ONNX_ENABLE_WASM}") 109 message(STATUS "SHERPA_ONNX_ENABLE_WASM ${SHERPA_ONNX_ENABLE_WASM}")
109 message(STATUS "SHERPA_ONNX_ENABLE_WASM_TTS ${SHERPA_ONNX_ENABLE_WASM_TTS}") 110 message(STATUS "SHERPA_ONNX_ENABLE_WASM_TTS ${SHERPA_ONNX_ENABLE_WASM_TTS}")
110 message(STATUS "SHERPA_ONNX_ENABLE_WASM_ASR ${SHERPA_ONNX_ENABLE_WASM_ASR}") 111 message(STATUS "SHERPA_ONNX_ENABLE_WASM_ASR ${SHERPA_ONNX_ENABLE_WASM_ASR}")
  112 +message(STATUS "SHERPA_ONNX_ENABLE_WASM_NODEJS ${SHERPA_ONNX_ENABLE_WASM_NODEJS}")
111 113
112 if(SHERPA_ONNX_ENABLE_WASM_TTS) 114 if(SHERPA_ONNX_ENABLE_WASM_TTS)
113 if(NOT SHERPA_ONNX_ENABLE_WASM) 115 if(NOT SHERPA_ONNX_ENABLE_WASM)
@@ -121,6 +123,12 @@ if(SHERPA_ONNX_ENABLE_WASM_ASR) @@ -121,6 +123,12 @@ if(SHERPA_ONNX_ENABLE_WASM_ASR)
121 endif() 123 endif()
122 endif() 124 endif()
123 125
  126 +if(SHERPA_ONNX_ENABLE_WASM_NODEJS)
  127 + if(NOT SHERPA_ONNX_ENABLE_WASM)
  128 + message(FATAL_ERROR "Please set SHERPA_ONNX_ENABLE_WASM to ON if you enable WASM for NodeJS")
  129 + endif()
  130 +endif()
  131 +
124 if(SHERPA_ONNX_ENABLE_WASM) 132 if(SHERPA_ONNX_ENABLE_WASM)
125 add_definitions(-DSHERPA_ONNX_ENABLE_WASM=1) 133 add_definitions(-DSHERPA_ONNX_ENABLE_WASM=1)
126 endif() 134 endif()
  1 +#!/usr/bin/env bash
  2 +# Copyright (c) 2024 Xiaomi Corporation
  3 +#
  4 +# This script is to build sherpa-onnx for WebAssembly (NodeJS)
  5 +#
  6 +# Please use NodeJS >= 18
  7 +
  8 +set -ex
  9 +
  10 +if [ x"$EMSCRIPTEN" == x"" ]; then
  11 + if ! command -v emcc &> /dev/null; then
  12 + echo "Please install emscripten first"
  13 + echo ""
  14 + echo "You can use the following commands to install it:"
  15 + echo ""
  16 + echo "git clone https://github.com/emscripten-core/emsdk.git"
  17 + echo "cd emsdk"
  18 + echo "git pull"
  19 + echo "./emsdk install latest"
  20 + echo "./emsdk activate latest"
  21 + echo "source ./emsdk_env.sh"
  22 + exit 1
  23 + else
  24 + EMSCRIPTEN=$(dirname $(realpath $(which emcc)))
  25 + fi
  26 +fi
  27 +
  28 +export EMSCRIPTEN=$EMSCRIPTEN
  29 +echo "EMSCRIPTEN: $EMSCRIPTEN"
  30 +if [ ! -f $EMSCRIPTEN/cmake/Modules/Platform/Emscripten.cmake ]; then
  31 + echo "Cannot find $EMSCRIPTEN/cmake/Modules/Platform/Emscripten.cmake"
  32 + echo "Please make sure you have installed emsdk correctly"
  33 + exit 1
  34 +fi
  35 +
  36 +mkdir -p build-wasm-simd-nodejs
  37 +pushd build-wasm-simd-nodejs
  38 +
  39 +export SHERPA_ONNX_IS_USING_BUILD_WASM_SH=ON
  40 +
  41 +cmake \
  42 + -DCMAKE_INSTALL_PREFIX=./install \
  43 + -DCMAKE_BUILD_TYPE=Release \
  44 + -DCMAKE_TOOLCHAIN_FILE=$EMSCRIPTEN/cmake/Modules/Platform/Emscripten.cmake \
  45 + \
  46 + -DSHERPA_ONNX_ENABLE_PYTHON=OFF \
  47 + -DSHERPA_ONNX_ENABLE_TESTS=OFF \
  48 + -DSHERPA_ONNX_ENABLE_CHECK=OFF \
  49 + -DBUILD_SHARED_LIBS=OFF \
  50 + -DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \
  51 + -DSHERPA_ONNX_ENABLE_JNI=OFF \
  52 + -DSHERPA_ONNX_ENABLE_C_API=ON \
  53 + -DSHERPA_ONNX_ENABLE_WEBSOCKET=OFF \
  54 + -DSHERPA_ONNX_ENABLE_GPU=OFF \
  55 + -DSHERPA_ONNX_ENABLE_WASM=ON \
  56 + -DSHERPA_ONNX_ENABLE_WASM_NODEJS=ON \
  57 + -DSHERPA_ONNX_ENABLE_BINARY=OFF \
  58 + -DSHERPA_ONNX_LINK_LIBSTDCPP_STATICALLY=OFF \
  59 + ..
  60 +make -j10
  61 +make install
  62 +
  63 +ls -lh install/bin/wasm/nodejs
1 node_modules 1 node_modules
2 lib 2 lib
3 package-lock.json 3 package-lock.json
  4 +*.tar.bz2
@@ -2,38 +2,18 @@ @@ -2,38 +2,18 @@
2 2
3 This directory contains nodejs examples for [sherpa-onnx](https://github.com/k2-fsa/sherpa-onnx). 3 This directory contains nodejs examples for [sherpa-onnx](https://github.com/k2-fsa/sherpa-onnx).
4 4
5 -Before you continue, please first install the npm package `sherpa-onnx` by 5 +Before you continue, please first run
6 6
7 ```bash 7 ```bash
8 -npm install sherpa-onnx 8 +cd ./nodejs-examples
  9 +
  10 +npm i
9 ``` 11 ```
10 12
11 In the following, we describe how to use [sherpa-onnx](https://github.com/k2-fsa/sherpa-onnx) 13 In the following, we describe how to use [sherpa-onnx](https://github.com/k2-fsa/sherpa-onnx)
12 for text-to-speech and speech-to-text. 14 for text-to-speech and speech-to-text.
13 15
14 -**Caution**: If you get the following error:  
15 -```  
16 -/Users/fangjun/open-source/sherpa-onnx/nodejs-examples/node_modules/ffi-napi/lib/dynamic_library.js:67  
17 - if (match = err.match(/^(([^ \t()])+\.so([^ \t:()])*):([ \t])*/)) {  
18 - ^  
19 -  
20 -TypeError: Cannot read properties of null (reading 'match')  
21 - at new DynamicLibrary (/Users/fangjun/open-source/sherpa-onnx/nodejs-examples/node_modules/ffi-napi/lib/dynamic_library.js:67:21)  
22 - at Object.Library (/Users/fangjun/open-source/sherpa-onnx/nodejs-examples/node_modules/ffi-napi/lib/library.js:47:10)  
23 - at Object.<anonymous> (/Users/fangjun/open-source/sherpa-onnx/nodejs-examples/node_modules/sherpa-onnx3/index.js:268:28)  
24 - at Module._compile (node:internal/modules/cjs/loader:1376:14)  
25 - at Module._extensions..js (node:internal/modules/cjs/loader:1435:10)  
26 - at Module.load (node:internal/modules/cjs/loader:1207:32)  
27 - at Module._load (node:internal/modules/cjs/loader:1023:12)  
28 - at Module.require (node:internal/modules/cjs/loader:1235:19)  
29 - at require (node:internal/modules/helpers:176:18)  
30 - at Object.<anonymous> (/Users/fangjun/open-source/sherpa-onnx/nodejs-examples/test-offline-tts-zh.js:3:21)  
31 -```  
32 -  
33 -Please downgrade your node to version v13.14.0. See also  
34 -https://github.com/node-ffi-napi/node-ffi-napi/issues/244  
35 -and  
36 -https://github.com/node-ffi-napi/node-ffi-napi/issues/97 . 16 +Note: You need `Node >= 18`.
37 17
38 # Text-to-speech 18 # Text-to-speech
39 19
@@ -71,13 +51,7 @@ node ./test-offline-tts-zh.js @@ -71,13 +51,7 @@ node ./test-offline-tts-zh.js
71 # Speech-to-text 51 # Speech-to-text
72 52
73 In the following, we demonstrate how to decode files and how to perform 53 In the following, we demonstrate how to decode files and how to perform
74 -speech recognition with a microphone with `nodejs`. We need to install two additional  
75 -npm packages:  
76 -  
77 -  
78 -```bash  
79 -npm install wav naudiodon2  
80 -``` 54 +speech recognition with a microphone with `nodejs`.
81 55
82 ## ./test-offline-nemo-ctc.js 56 ## ./test-offline-nemo-ctc.js
83 57
@@ -200,60 +174,3 @@ wget -q https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherp @@ -200,60 +174,3 @@ wget -q https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherp
200 tar xvf sherpa-onnx-streaming-zipformer-ctc-multi-zh-hans-2023-12-13.tar.bz2 174 tar xvf sherpa-onnx-streaming-zipformer-ctc-multi-zh-hans-2023-12-13.tar.bz2
201 node ./test-online-zipformer2-ctc.js 175 node ./test-online-zipformer2-ctc.js
202 ``` 176 ```
203 -  
204 -## ./test-vad-microphone-offline-paraformer.js  
205 -  
206 -[./test-vad-microphone-offline-paraformer.js](./test-vad-microphone-offline-paraformer.js)  
207 -demonstrates how to use [silero-vad](https://github.com/snakers4/silero-vad)  
208 -with non-streaming Paraformer for speech recognition from microphone.  
209 -  
210 -You can use the following command to run it:  
211 -  
212 -```bash  
213 -wget -q https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx  
214 -wget -q https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-paraformer-zh-2023-03-28.tar.bz2  
215 -tar xvf sherpa-onnx-paraformer-zh-2023-03-28.tar.bz2  
216 -node ./test-vad-microphone-offline-paraformer.js  
217 -```  
218 -  
219 -## ./test-vad-microphone-offline-transducer.js  
220 -  
221 -[./test-vad-microphone-offline-transducer.js](./test-vad-microphone-offline-transducer.js)  
222 -demonstrates how to use [silero-vad](https://github.com/snakers4/silero-vad)  
223 -with a non-streaming transducer model for speech recognition from microphone.  
224 -  
225 -You can use the following command to run it:  
226 -  
227 -```bash  
228 -wget -q https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx  
229 -wget -q https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-zipformer-en-2023-06-26.tar.bz2  
230 -tar xvf sherpa-onnx-zipformer-en-2023-06-26.tar.bz2  
231 -node ./test-vad-microphone-offline-transducer.js  
232 -```  
233 -  
234 -## ./test-vad-microphone-offline-whisper.js  
235 -  
236 -[./test-vad-microphone-offline-whisper.js](./test-vad-microphone-offline-whisper.js)  
237 -demonstrates how to use [silero-vad](https://github.com/snakers4/silero-vad)  
238 -with whisper for speech recognition from microphone.  
239 -  
240 -You can use the following command to run it:  
241 -  
242 -```bash  
243 -wget -q https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx  
244 -wget -q https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-whisper-tiny.en.tar.bz2  
245 -tar xvf sherpa-onnx-whisper-tiny.en.tar.bz2  
246 -node ./test-vad-microphone-offline-whisper.js  
247 -```  
248 -  
249 -## ./test-vad-microphone.js  
250 -  
251 -[./test-vad-microphone.js](./test-vad-microphone.js)  
252 -demonstrates how to use [silero-vad](https://github.com/snakers4/silero-vad).  
253 -  
254 -You can use the following command to run it:  
255 -  
256 -```bash  
257 -wget -q https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx  
258 -node ./test-vad-microphone.js  
259 -```  
1 { 1 {
2 "dependencies": { 2 "dependencies": {
3 "naudiodon2": "^2.4.0", 3 "naudiodon2": "^2.4.0",
4 - "sherpa-onnx": "^1.8.12", 4 + "sherpa-onnx": "*",
5 "wav": "^1.0.2" 5 "wav": "^1.0.2"
6 } 6 }
7 } 7 }
1 -// Copyright (c) 2023 Xiaomi Corporation (authors: Fangjun Kuang) 1 +// Copyright (c) 2023-2024 Xiaomi Corporation (authors: Fangjun Kuang)
2 // 2 //
3 const fs = require('fs'); 3 const fs = require('fs');
4 const {Readable} = require('stream'); 4 const {Readable} = require('stream');
@@ -6,32 +6,58 @@ const wav = require('wav'); @@ -6,32 +6,58 @@ const wav = require('wav');
6 6
7 const sherpa_onnx = require('sherpa-onnx'); 7 const sherpa_onnx = require('sherpa-onnx');
8 8
9 -function createRecognizer() {  
10 - const featConfig = new sherpa_onnx.FeatureConfig();  
11 - featConfig.sampleRate = 16000;  
12 - featConfig.featureDim = 80;  
13 -  
14 - // test online recognizer  
15 - const nemoCtc = new sherpa_onnx.OfflineNemoEncDecCtcModelConfig();  
16 - nemoCtc.model = './sherpa-onnx-nemo-ctc-en-conformer-small/model.int8.onnx';  
17 - const tokens = './sherpa-onnx-nemo-ctc-en-conformer-small/tokens.txt';  
18 -  
19 - const modelConfig = new sherpa_onnx.OfflineModelConfig();  
20 - modelConfig.nemoCtc = nemoCtc;  
21 - modelConfig.tokens = tokens;  
22 - modelConfig.modelType = 'nemo_ctc';  
23 -  
24 - const recognizerConfig = new sherpa_onnx.OfflineRecognizerConfig();  
25 - recognizerConfig.featConfig = featConfig;  
26 - recognizerConfig.modelConfig = modelConfig;  
27 - recognizerConfig.decodingMethod = 'greedy_search';  
28 -  
29 - const recognizer = new sherpa_onnx.OfflineRecognizer(recognizerConfig);  
30 - return recognizer; 9 +function createOfflineRecognizer() {
  10 + let featConfig = {
  11 + sampleRate: 16000,
  12 + featureDim: 80,
  13 + };
  14 +
  15 + let modelConfig = {
  16 + transducer: {
  17 + encoder: '',
  18 + decoder: '',
  19 + joiner: '',
  20 + },
  21 + paraformer: {
  22 + model: '',
  23 + },
  24 + nemoCtc: {
  25 + model: './sherpa-onnx-nemo-ctc-en-conformer-small/model.int8.onnx',
  26 + },
  27 + whisper: {
  28 + encoder: '',
  29 + decoder: '',
  30 + },
  31 + tdnn: {
  32 + model: '',
  33 + },
  34 + tokens: './sherpa-onnx-nemo-ctc-en-conformer-small/tokens.txt',
  35 + numThreads: 1,
  36 + debug: 0,
  37 + provider: 'cpu',
  38 + modelType: 'nemo_ctc',
  39 + };
  40 +
  41 + let lmConfig = {
  42 + model: '',
  43 + scale: 1.0,
  44 + };
  45 +
  46 + let config = {
  47 + featConfig: featConfig,
  48 + modelConfig: modelConfig,
  49 + lmConfig: lmConfig,
  50 + decodingMethod: 'greedy_search',
  51 + maxActivePaths: 4,
  52 + hotwordsFile: '',
  53 + hotwordsScore: 1.5,
  54 + };
  55 +
  56 + return sherpa_onnx.createOfflineRecognizer(config);
31 } 57 }
32 58
33 -recognizer = createRecognizer();  
34 -stream = recognizer.createStream(); 59 +const recognizer = createOfflineRecognizer();
  60 +const stream = recognizer.createStream();
35 61
36 const waveFilename = 62 const waveFilename =
37 './sherpa-onnx-nemo-ctc-en-conformer-small/test_wavs/0.wav'; 63 './sherpa-onnx-nemo-ctc-en-conformer-small/test_wavs/0.wav';
@@ -72,8 +98,8 @@ fs.createReadStream(waveFilename, {highWaterMark: 4096}) @@ -72,8 +98,8 @@ fs.createReadStream(waveFilename, {highWaterMark: 4096})
72 98
73 stream.acceptWaveform(recognizer.config.featConfig.sampleRate, flattened); 99 stream.acceptWaveform(recognizer.config.featConfig.sampleRate, flattened);
74 recognizer.decode(stream); 100 recognizer.decode(stream);
75 - const r = recognizer.getResult(stream);  
76 - console.log(r.text); 101 + const text = recognizer.getResult(stream);
  102 + console.log(text);
77 103
78 stream.free(); 104 stream.free();
79 recognizer.free(); 105 recognizer.free();
@@ -6,32 +6,59 @@ const wav = require('wav'); @@ -6,32 +6,59 @@ const wav = require('wav');
6 6
7 const sherpa_onnx = require('sherpa-onnx'); 7 const sherpa_onnx = require('sherpa-onnx');
8 8
9 -function createRecognizer() {  
10 - const featConfig = new sherpa_onnx.FeatureConfig();  
11 - featConfig.sampleRate = 16000;  
12 - featConfig.featureDim = 80;  
13 -  
14 - // test online recognizer  
15 - const paraformer = new sherpa_onnx.OfflineParaformerModelConfig();  
16 - paraformer.model = './sherpa-onnx-paraformer-zh-2023-03-28/model.onnx';  
17 - const tokens = './sherpa-onnx-paraformer-zh-2023-03-28/tokens.txt';  
18 -  
19 - const modelConfig = new sherpa_onnx.OfflineModelConfig();  
20 - modelConfig.paraformer = paraformer;  
21 - modelConfig.tokens = tokens;  
22 - modelConfig.modelType = 'paraformer';  
23 -  
24 - const recognizerConfig = new sherpa_onnx.OfflineRecognizerConfig();  
25 - recognizerConfig.featConfig = featConfig;  
26 - recognizerConfig.modelConfig = modelConfig;  
27 - recognizerConfig.decodingMethod = 'greedy_search';  
28 -  
29 - const recognizer = new sherpa_onnx.OfflineRecognizer(recognizerConfig);  
30 - return recognizer; 9 +function createOfflineRecognizer() {
  10 + let featConfig = {
  11 + sampleRate: 16000,
  12 + featureDim: 80,
  13 + };
  14 +
  15 + let modelConfig = {
  16 + transducer: {
  17 + encoder: '',
  18 + decoder: '',
  19 + joiner: '',
  20 + },
  21 + paraformer: {
  22 + model: './sherpa-onnx-paraformer-zh-2023-03-28/model.int8.onnx',
  23 + },
  24 + nemoCtc: {
  25 + model: '',
  26 + },
  27 + whisper: {
  28 + encoder: '',
  29 + decoder: '',
  30 + },
  31 + tdnn: {
  32 + model: '',
  33 + },
  34 + tokens: './sherpa-onnx-paraformer-zh-2023-03-28/tokens.txt',
  35 + numThreads: 1,
  36 + debug: 0,
  37 + provider: 'cpu',
  38 + modelType: 'paraformer',
  39 + };
  40 +
  41 + let lmConfig = {
  42 + model: '',
  43 + scale: 1.0,
  44 + };
  45 +
  46 + let config = {
  47 + featConfig: featConfig,
  48 + modelConfig: modelConfig,
  49 + lmConfig: lmConfig,
  50 + decodingMethod: 'greedy_search',
  51 + maxActivePaths: 4,
  52 + hotwordsFile: '',
  53 + hotwordsScore: 1.5,
  54 + };
  55 +
  56 + return sherpa_onnx.createOfflineRecognizer(config);
31 } 57 }
32 58
33 -recognizer = createRecognizer();  
34 -stream = recognizer.createStream(); 59 +
  60 +const recognizer = createOfflineRecognizer();
  61 +const stream = recognizer.createStream();
35 62
36 const waveFilename = './sherpa-onnx-paraformer-zh-2023-03-28/test_wavs/0.wav'; 63 const waveFilename = './sherpa-onnx-paraformer-zh-2023-03-28/test_wavs/0.wav';
37 64
@@ -71,8 +98,8 @@ fs.createReadStream(waveFilename, {'highWaterMark': 4096}) @@ -71,8 +98,8 @@ fs.createReadStream(waveFilename, {'highWaterMark': 4096})
71 98
72 stream.acceptWaveform(recognizer.config.featConfig.sampleRate, flattened); 99 stream.acceptWaveform(recognizer.config.featConfig.sampleRate, flattened);
73 recognizer.decode(stream); 100 recognizer.decode(stream);
74 - const r = recognizer.getResult(stream);  
75 - console.log(r.text); 101 + const text = recognizer.getResult(stream);
  102 + console.log(text);
76 103
77 stream.free(); 104 stream.free();
78 recognizer.free(); 105 recognizer.free();
@@ -6,37 +6,60 @@ const wav = require('wav'); @@ -6,37 +6,60 @@ const wav = require('wav');
6 6
7 const sherpa_onnx = require('sherpa-onnx'); 7 const sherpa_onnx = require('sherpa-onnx');
8 8
9 -function createRecognizer() {  
10 - const featConfig = new sherpa_onnx.FeatureConfig();  
11 - featConfig.sampleRate = 16000;  
12 - featConfig.featureDim = 80;  
13 -  
14 - // test online recognizer  
15 - const transducer = new sherpa_onnx.OfflineTransducerModelConfig();  
16 - transducer.encoder =  
17 - './sherpa-onnx-zipformer-en-2023-06-26/encoder-epoch-99-avg-1.onnx';  
18 - transducer.decoder =  
19 - './sherpa-onnx-zipformer-en-2023-06-26/decoder-epoch-99-avg-1.onnx';  
20 - transducer.joiner =  
21 - './sherpa-onnx-zipformer-en-2023-06-26/joiner-epoch-99-avg-1.onnx';  
22 - const tokens = './sherpa-onnx-zipformer-en-2023-06-26/tokens.txt';  
23 -  
24 - const modelConfig = new sherpa_onnx.OfflineModelConfig();  
25 - modelConfig.transducer = transducer;  
26 - modelConfig.tokens = tokens;  
27 - modelConfig.modelType = 'transducer';  
28 -  
29 - const recognizerConfig = new sherpa_onnx.OfflineRecognizerConfig();  
30 - recognizerConfig.featConfig = featConfig;  
31 - recognizerConfig.modelConfig = modelConfig;  
32 - recognizerConfig.decodingMethod = 'greedy_search';  
33 -  
34 - const recognizer = new sherpa_onnx.OfflineRecognizer(recognizerConfig);  
35 - return recognizer; 9 +function createOfflineRecognizer() {
  10 + let featConfig = {
  11 + sampleRate: 16000,
  12 + featureDim: 80,
  13 + };
  14 +
  15 + let modelConfig = {
  16 + transducer: {
  17 + encoder:
  18 + './sherpa-onnx-zipformer-en-2023-06-26/encoder-epoch-99-avg-1.int8.onnx',
  19 + decoder:
  20 + './sherpa-onnx-zipformer-en-2023-06-26/decoder-epoch-99-avg-1.onnx',
  21 + joiner:
  22 + './sherpa-onnx-zipformer-en-2023-06-26/joiner-epoch-99-avg-1.int8.onnx',
  23 + },
  24 + paraformer: {
  25 + model: '',
  26 + },
  27 + nemoCtc: {
  28 + model: '',
  29 + },
  30 + whisper: {
  31 + encoder: '',
  32 + decoder: '',
  33 + },
  34 + tdnn: {
  35 + model: '',
  36 + },
  37 + tokens: './sherpa-onnx-zipformer-en-2023-06-26/tokens.txt',
  38 + numThreads: 1,
  39 + debug: 0,
  40 + provider: 'cpu',
  41 + modelType: 'transducer',
  42 + };
  43 +
  44 + let lmConfig = {
  45 + model: '',
  46 + scale: 1.0,
  47 + };
  48 +
  49 + let config = {
  50 + featConfig: featConfig,
  51 + modelConfig: modelConfig,
  52 + lmConfig: lmConfig,
  53 + decodingMethod: 'greedy_search',
  54 + maxActivePaths: 4,
  55 + hotwordsFile: '',
  56 + hotwordsScore: 1.5,
  57 + };
  58 +
  59 + return sherpa_onnx.createOfflineRecognizer(config);
36 } 60 }
37 -  
38 -recognizer = createRecognizer();  
39 -stream = recognizer.createStream(); 61 +const recognizer = createOfflineRecognizer();
  62 +const stream = recognizer.createStream();
40 63
41 const waveFilename = './sherpa-onnx-zipformer-en-2023-06-26/test_wavs/0.wav'; 64 const waveFilename = './sherpa-onnx-zipformer-en-2023-06-26/test_wavs/0.wav';
42 65
@@ -76,8 +99,8 @@ fs.createReadStream(waveFilename, {'highWaterMark': 4096}) @@ -76,8 +99,8 @@ fs.createReadStream(waveFilename, {'highWaterMark': 4096})
76 99
77 stream.acceptWaveform(recognizer.config.featConfig.sampleRate, flattened); 100 stream.acceptWaveform(recognizer.config.featConfig.sampleRate, flattened);
78 recognizer.decode(stream); 101 recognizer.decode(stream);
79 - const r = recognizer.getResult(stream);  
80 - console.log(r.text); 102 + const text = recognizer.getResult(stream);
  103 + console.log(text);
81 104
82 stream.free(); 105 stream.free();
83 recognizer.free(); 106 recognizer.free();
1 -// Copyright (c) 2023 Xiaomi Corporation (authors: Fangjun Kuang) 1 +// Copyright (c) 2023-2024 Xiaomi Corporation (authors: Fangjun Kuang)
2 2
3 const sherpa_onnx = require('sherpa-onnx'); 3 const sherpa_onnx = require('sherpa-onnx');
4 4
5 function createOfflineTts() { 5 function createOfflineTts() {
6 - const vits = new sherpa_onnx.OfflineTtsVitsModelConfig();  
7 - vits.model = 'vits-piper-en_US-amy-low/en_US-amy-low.onnx'  
8 - vits.tokens = './vits-piper-en_US-amy-low/tokens.txt';  
9 - vits.dataDir = './vits-piper-en_US-amy-low/espeak-ng-data' 6 + let offlineTtsVitsModelConfig = {
  7 + model: './vits-piper-en_US-amy-low/en_US-amy-low.onnx',
  8 + lexicon: '',
  9 + tokens: './vits-piper-en_US-amy-low/tokens.txt',
  10 + dataDir: './vits-piper-en_US-amy-low/espeak-ng-data',
  11 + noiseScale: 0.667,
  12 + noiseScaleW: 0.8,
  13 + lengthScale: 1.0,
  14 + };
  15 + let offlineTtsModelConfig = {
  16 + offlineTtsVitsModelConfig: offlineTtsVitsModelConfig,
  17 + numThreads: 1,
  18 + debug: 1,
  19 + provider: 'cpu',
  20 + };
10 21
11 - const modelConfig = new sherpa_onnx.OfflineTtsModelConfig();  
12 - modelConfig.vits = vits; 22 + let offlineTtsConfig = {
  23 + offlineTtsModelConfig: offlineTtsModelConfig,
  24 + ruleFsts: '',
  25 + maxNumSentences: 1,
  26 + };
13 27
14 - const config = new sherpa_onnx.OfflineTtsConfig();  
15 - config.model = modelConfig;  
16 -  
17 - return new sherpa_onnx.OfflineTts(config); 28 + return sherpa_onnx.createOfflineTts(offlineTtsConfig);
18 } 29 }
19 30
  31 +
20 const tts = createOfflineTts(); 32 const tts = createOfflineTts();
21 const speakerId = 0; 33 const speakerId = 0;
22 const speed = 1.0; 34 const speed = 1.0;
23 -const audio = tts.generate(  
24 - '“Today as always, men fall into two groups: slaves and free men. Whoever does not have two-thirds of his day for himself, is a slave, whatever he may be: a statesman, a businessman, an official, or a scholar.”',  
25 - speakerId, speed);  
26 -audio.save('./test-en.wav'); 35 +const audio = tts.generate({
  36 + text:
  37 + '“Today as always, men fall into two groups: slaves and free men. Whoever does not have two-thirds of his day for himself, is a slave, whatever he may be: a statesman, a businessman, an official, or a scholar.”',
  38 + sid: speakerId,
  39 + speed: speed
  40 +});
  41 +
  42 +tts.save('./test-en.wav', audio);
27 console.log('Saved to test-en.wav successfully.'); 43 console.log('Saved to test-en.wav successfully.');
  44 +
28 tts.free(); 45 tts.free();
@@ -3,25 +3,37 @@ @@ -3,25 +3,37 @@
3 const sherpa_onnx = require('sherpa-onnx'); 3 const sherpa_onnx = require('sherpa-onnx');
4 4
5 function createOfflineTts() { 5 function createOfflineTts() {
6 - const vits = new sherpa_onnx.OfflineTtsVitsModelConfig();  
7 - vits.model = './vits-zh-aishell3/vits-aishell3.onnx';  
8 - vits.lexicon = './vits-zh-aishell3/lexicon.txt';  
9 - vits.tokens = './vits-zh-aishell3/tokens.txt'; 6 + let offlineTtsVitsModelConfig = {
  7 + model: './vits-zh-aishell3/vits-aishell3.onnx',
  8 + lexicon: './vits-zh-aishell3/lexicon.txt',
  9 + tokens: './vits-zh-aishell3/tokens.txt',
  10 + dataDir: '',
  11 + noiseScale: 0.667,
  12 + noiseScaleW: 0.8,
  13 + lengthScale: 1.0,
  14 + };
  15 + let offlineTtsModelConfig = {
  16 + offlineTtsVitsModelConfig: offlineTtsVitsModelConfig,
  17 + numThreads: 1,
  18 + debug: 1,
  19 + provider: 'cpu',
  20 + };
10 21
11 - const modelConfig = new sherpa_onnx.OfflineTtsModelConfig();  
12 - modelConfig.vits = vits; 22 + let offlineTtsConfig = {
  23 + offlineTtsModelConfig: offlineTtsModelConfig,
  24 + ruleFsts: './vits-zh-aishell3/rule.fst',
  25 + maxNumSentences: 1,
  26 + };
13 27
14 - const config = new sherpa_onnx.OfflineTtsConfig();  
15 - config.model = modelConfig;  
16 - config.ruleFsts = './vits-zh-aishell3/rule.fst';  
17 -  
18 - return new sherpa_onnx.OfflineTts(config); 28 + return sherpa_onnx.createOfflineTts(offlineTtsConfig);
19 } 29 }
20 30
  31 +
21 const tts = createOfflineTts(); 32 const tts = createOfflineTts();
22 const speakerId = 66; 33 const speakerId = 66;
23 const speed = 1.0; 34 const speed = 1.0;
24 -const audio = tts.generate('3年前中国总人口是1411778724人', speakerId, speed);  
25 -audio.save('./test-zh.wav'); 35 +const audio = tts.generate(
  36 + {text: '3年前中国总人口是1411778724人', sid: speakerId, speed: speed});
  37 +tts.save('./test-zh.wav', audio);
26 console.log('Saved to test-zh.wav successfully.'); 38 console.log('Saved to test-zh.wav successfully.');
27 tts.free(); 39 tts.free();
@@ -6,32 +6,58 @@ const wav = require('wav'); @@ -6,32 +6,58 @@ const wav = require('wav');
6 6
7 const sherpa_onnx = require('sherpa-onnx'); 7 const sherpa_onnx = require('sherpa-onnx');
8 8
9 -function createRecognizer() {  
10 - const featConfig = new sherpa_onnx.FeatureConfig();  
11 - featConfig.sampleRate = 16000;  
12 - featConfig.featureDim = 80;  
13 -  
14 - // test online recognizer  
15 - const whisper = new sherpa_onnx.OfflineWhisperModelConfig();  
16 - whisper.encoder = './sherpa-onnx-whisper-tiny.en/tiny.en-encoder.int8.onnx';  
17 - whisper.decoder = './sherpa-onnx-whisper-tiny.en/tiny.en-decoder.int8.onnx';  
18 - const tokens = './sherpa-onnx-whisper-tiny.en/tiny.en-tokens.txt';  
19 -  
20 - const modelConfig = new sherpa_onnx.OfflineModelConfig();  
21 - modelConfig.whisper = whisper;  
22 - modelConfig.tokens = tokens;  
23 - modelConfig.modelType = 'whisper';  
24 -  
25 - const recognizerConfig = new sherpa_onnx.OfflineRecognizerConfig();  
26 - recognizerConfig.featConfig = featConfig;  
27 - recognizerConfig.modelConfig = modelConfig;  
28 - recognizerConfig.decodingMethod = 'greedy_search';  
29 -  
30 - const recognizer = new sherpa_onnx.OfflineRecognizer(recognizerConfig);  
31 - return recognizer; 9 +function createOfflineRecognizer() {
  10 + let featConfig = {
  11 + sampleRate: 16000,
  12 + featureDim: 80,
  13 + };
  14 +
  15 + let modelConfig = {
  16 + transducer: {
  17 + encoder: '',
  18 + decoder: '',
  19 + joiner: '',
  20 + },
  21 + paraformer: {
  22 + model: '',
  23 + },
  24 + nemoCtc: {
  25 + model: '',
  26 + },
  27 + whisper: {
  28 + encoder: './sherpa-onnx-whisper-tiny.en/tiny.en-encoder.int8.onnx',
  29 + decoder: './sherpa-onnx-whisper-tiny.en/tiny.en-decoder.int8.onnx',
  30 + },
  31 + tdnn: {
  32 + model: '',
  33 + },
  34 + tokens: './sherpa-onnx-whisper-tiny.en/tiny.en-tokens.txt',
  35 + numThreads: 1,
  36 + debug: 0,
  37 + provider: 'cpu',
  38 + modelType: 'whisper',
  39 + };
  40 +
  41 + let lmConfig = {
  42 + model: '',
  43 + scale: 1.0,
  44 + };
  45 +
  46 + let config = {
  47 + featConfig: featConfig,
  48 + modelConfig: modelConfig,
  49 + lmConfig: lmConfig,
  50 + decodingMethod: 'greedy_search',
  51 + maxActivePaths: 4,
  52 + hotwordsFile: '',
  53 + hotwordsScore: 1.5,
  54 + };
  55 +
  56 + return sherpa_onnx.createOfflineRecognizer(config);
32 } 57 }
33 58
34 -recognizer = createRecognizer(); 59 +
  60 +recognizer = createOfflineRecognizer();
35 stream = recognizer.createStream(); 61 stream = recognizer.createStream();
36 62
37 const waveFilename = './sherpa-onnx-whisper-tiny.en/test_wavs/0.wav'; 63 const waveFilename = './sherpa-onnx-whisper-tiny.en/test_wavs/0.wav';
@@ -72,8 +98,8 @@ fs.createReadStream(waveFilename, {'highWaterMark': 4096}) @@ -72,8 +98,8 @@ fs.createReadStream(waveFilename, {'highWaterMark': 4096})
72 98
73 stream.acceptWaveform(recognizer.config.featConfig.sampleRate, flattened); 99 stream.acceptWaveform(recognizer.config.featConfig.sampleRate, flattened);
74 recognizer.decode(stream); 100 recognizer.decode(stream);
75 - const r = recognizer.getResult(stream);  
76 - console.log(r.text); 101 + const text = recognizer.getResult(stream);
  102 + console.log(text);
77 103
78 stream.free(); 104 stream.free();
79 recognizer.free(); 105 recognizer.free();
@@ -5,37 +5,58 @@ console.log(portAudio.getDevices()); @@ -5,37 +5,58 @@ console.log(portAudio.getDevices());
5 5
6 const sherpa_onnx = require('sherpa-onnx'); 6 const sherpa_onnx = require('sherpa-onnx');
7 7
8 -function createRecognizer() {  
9 - const featConfig = new sherpa_onnx.FeatureConfig();  
10 - featConfig.sampleRate = 16000;  
11 - featConfig.featureDim = 80;  
12 -  
13 - const paraformer = new sherpa_onnx.OnlineParaformerModelConfig();  
14 - paraformer.encoder =  
15 - './sherpa-onnx-streaming-paraformer-bilingual-zh-en/encoder.int8.onnx';  
16 - paraformer.decoder =  
17 - './sherpa-onnx-streaming-paraformer-bilingual-zh-en/decoder.int8.onnx';  
18 - const tokens =  
19 - './sherpa-onnx-streaming-paraformer-bilingual-zh-en/tokens.txt';  
20 -  
21 - const modelConfig = new sherpa_onnx.OnlineModelConfig();  
22 - modelConfig.paraformer = paraformer;  
23 - modelConfig.tokens = tokens;  
24 - modelConfig.modelType = 'paraformer';  
25 -  
26 - const recognizerConfig = new sherpa_onnx.OnlineRecognizerConfig();  
27 - recognizerConfig.featConfig = featConfig;  
28 - recognizerConfig.modelConfig = modelConfig;  
29 - recognizerConfig.decodingMethod = 'greedy_search';  
30 - recognizerConfig.enableEndpoint = 1;  
31 -  
32 - const recognizer = new sherpa_onnx.OnlineRecognizer(recognizerConfig);  
33 - return recognizer; 8 +function createOnlineRecognizer() {
  9 + let onlineTransducerModelConfig = {
  10 + encoder: '',
  11 + decoder: '',
  12 + joiner: '',
  13 + };
  14 +
  15 + let onlineParaformerModelConfig = {
  16 + encoder:
  17 + './sherpa-onnx-streaming-paraformer-bilingual-zh-en/encoder.int8.onnx',
  18 + decoder:
  19 + './sherpa-onnx-streaming-paraformer-bilingual-zh-en/decoder.int8.onnx',
  20 + };
  21 +
  22 + let onlineZipformer2CtcModelConfig = {
  23 + model: '',
  24 + };
  25 +
  26 + let onlineModelConfig = {
  27 + transducer: onlineTransducerModelConfig,
  28 + paraformer: onlineParaformerModelConfig,
  29 + zipformer2Ctc: onlineZipformer2CtcModelConfig,
  30 + tokens: './sherpa-onnx-streaming-paraformer-bilingual-zh-en/tokens.txt',
  31 + numThreads: 1,
  32 + provider: 'cpu',
  33 + debug: 1,
  34 + modelType: 'paraformer',
  35 + };
  36 +
  37 + let featureConfig = {
  38 + sampleRate: 16000,
  39 + featureDim: 80,
  40 + };
  41 +
  42 + let recognizerConfig = {
  43 + featConfig: featureConfig,
  44 + modelConfig: onlineModelConfig,
  45 + decodingMethod: 'greedy_search',
  46 + maxActivePaths: 4,
  47 + enableEndpoint: 1,
  48 + rule1MinTrailingSilence: 2.4,
  49 + rule2MinTrailingSilence: 1.2,
  50 + rule3MinUtteranceLength: 20,
  51 + hotwordsFile: '',
  52 + hotwordsScore: 1.5,
  53 + };
  54 +
  55 + return sherpa_onnx.createOnlineRecognizer(recognizerConfig);
34 } 56 }
35 -recognizer = createRecognizer();  
36 -stream = recognizer.createStream();  
37 57
38 -display = new sherpa_onnx.Display(50); 58 +const recognizer = createOnlineRecognizer();
  59 +const stream = recognizer.createStream();
39 60
40 let lastText = ''; 61 let lastText = '';
41 let segmentIndex = 0; 62 let segmentIndex = 0;
@@ -61,11 +82,11 @@ ai.on('data', data => { @@ -61,11 +82,11 @@ ai.on('data', data => {
61 } 82 }
62 83
63 const isEndpoint = recognizer.isEndpoint(stream); 84 const isEndpoint = recognizer.isEndpoint(stream);
64 - const text = recognizer.getResult(stream).text; 85 + const text = recognizer.getResult(stream);
65 86
66 if (text.length > 0 && lastText != text) { 87 if (text.length > 0 && lastText != text) {
67 lastText = text; 88 lastText = text;
68 - display.print(segmentIndex, lastText); 89 + console.log(segmentIndex, lastText);
69 } 90 }
70 if (isEndpoint) { 91 if (isEndpoint) {
71 if (text.length > 0) { 92 if (text.length > 0) {
@@ -6,34 +6,58 @@ const wav = require('wav'); @@ -6,34 +6,58 @@ const wav = require('wav');
6 6
7 const sherpa_onnx = require('sherpa-onnx'); 7 const sherpa_onnx = require('sherpa-onnx');
8 8
9 -function createRecognizer() {  
10 - const featConfig = new sherpa_onnx.FeatureConfig();  
11 - featConfig.sampleRate = 16000;  
12 - featConfig.featureDim = 80;  
13 -  
14 - const paraformer = new sherpa_onnx.OnlineParaformerModelConfig();  
15 - paraformer.encoder =  
16 - './sherpa-onnx-streaming-paraformer-bilingual-zh-en/encoder.onnx';  
17 - paraformer.decoder =  
18 - './sherpa-onnx-streaming-paraformer-bilingual-zh-en/decoder.onnx';  
19 - const tokens =  
20 - './sherpa-onnx-streaming-paraformer-bilingual-zh-en/tokens.txt';  
21 -  
22 - const modelConfig = new sherpa_onnx.OnlineModelConfig();  
23 - modelConfig.paraformer = paraformer;  
24 - modelConfig.tokens = tokens;  
25 - modelConfig.modelType = 'paraformer';  
26 -  
27 - const recognizerConfig = new sherpa_onnx.OnlineRecognizerConfig();  
28 - recognizerConfig.featConfig = featConfig;  
29 - recognizerConfig.modelConfig = modelConfig;  
30 - recognizerConfig.decodingMethod = 'greedy_search';  
31 -  
32 - const recognizer = new sherpa_onnx.OnlineRecognizer(recognizerConfig);  
33 - return recognizer; 9 +function createOnlineRecognizer() {
  10 + let onlineTransducerModelConfig = {
  11 + encoder: '',
  12 + decoder: '',
  13 + joiner: '',
  14 + };
  15 +
  16 + let onlineParaformerModelConfig = {
  17 + encoder:
  18 + './sherpa-onnx-streaming-paraformer-bilingual-zh-en/encoder.int8.onnx',
  19 + decoder:
  20 + './sherpa-onnx-streaming-paraformer-bilingual-zh-en/decoder.int8.onnx',
  21 + };
  22 +
  23 + let onlineZipformer2CtcModelConfig = {
  24 + model: '',
  25 + };
  26 +
  27 + let onlineModelConfig = {
  28 + transducer: onlineTransducerModelConfig,
  29 + paraformer: onlineParaformerModelConfig,
  30 + zipformer2Ctc: onlineZipformer2CtcModelConfig,
  31 + tokens: './sherpa-onnx-streaming-paraformer-bilingual-zh-en/tokens.txt',
  32 + numThreads: 1,
  33 + provider: 'cpu',
  34 + debug: 1,
  35 + modelType: 'paraformer',
  36 + };
  37 +
  38 + let featureConfig = {
  39 + sampleRate: 16000,
  40 + featureDim: 80,
  41 + };
  42 +
  43 + let recognizerConfig = {
  44 + featConfig: featureConfig,
  45 + modelConfig: onlineModelConfig,
  46 + decodingMethod: 'greedy_search',
  47 + maxActivePaths: 4,
  48 + enableEndpoint: 1,
  49 + rule1MinTrailingSilence: 2.4,
  50 + rule2MinTrailingSilence: 1.2,
  51 + rule3MinUtteranceLength: 20,
  52 + hotwordsFile: '',
  53 + hotwordsScore: 1.5,
  54 + };
  55 +
  56 + return sherpa_onnx.createOnlineRecognizer(recognizerConfig);
34 } 57 }
35 -recognizer = createRecognizer();  
36 -stream = recognizer.createStream(); 58 +
  59 +const recognizer = createOnlineRecognizer();
  60 +const stream = recognizer.createStream();
37 61
38 const waveFilename = 62 const waveFilename =
39 './sherpa-onnx-streaming-paraformer-bilingual-zh-en/test_wavs/0.wav'; 63 './sherpa-onnx-streaming-paraformer-bilingual-zh-en/test_wavs/0.wav';
@@ -47,8 +71,8 @@ function decode(samples) { @@ -47,8 +71,8 @@ function decode(samples) {
47 while (recognizer.isReady(stream)) { 71 while (recognizer.isReady(stream)) {
48 recognizer.decode(stream); 72 recognizer.decode(stream);
49 } 73 }
50 - const r = recognizer.getResult(stream);  
51 - console.log(r.text); 74 + const text = recognizer.getResult(stream);
  75 + console.log(text);
52 } 76 }
53 77
54 reader.on('format', ({audioFormat, bitDepth, channels, sampleRate}) => { 78 reader.on('format', ({audioFormat, bitDepth, channels, sampleRate}) => {
@@ -5,39 +5,60 @@ const portAudio = require('naudiodon2'); @@ -5,39 +5,60 @@ const portAudio = require('naudiodon2');
5 5
6 const sherpa_onnx = require('sherpa-onnx'); 6 const sherpa_onnx = require('sherpa-onnx');
7 7
8 -function createRecognizer() {  
9 - const featConfig = new sherpa_onnx.FeatureConfig();  
10 - featConfig.sampleRate = 16000;  
11 - featConfig.featureDim = 80;  
12 -  
13 - // test online recognizer  
14 - const transducer = new sherpa_onnx.OnlineTransducerModelConfig();  
15 - transducer.encoder =  
16 - './sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/encoder-epoch-99-avg-1.int8.onnx';  
17 - transducer.decoder =  
18 - './sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/decoder-epoch-99-avg-1.onnx';  
19 - transducer.joiner =  
20 - './sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/joiner-epoch-99-avg-1.onnx';  
21 - const tokens =  
22 - './sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/tokens.txt';  
23 -  
24 - const modelConfig = new sherpa_onnx.OnlineModelConfig();  
25 - modelConfig.transducer = transducer;  
26 - modelConfig.tokens = tokens;  
27 - modelConfig.modelType = 'zipformer';  
28 -  
29 - const recognizerConfig = new sherpa_onnx.OnlineRecognizerConfig();  
30 - recognizerConfig.featConfig = featConfig;  
31 - recognizerConfig.modelConfig = modelConfig;  
32 - recognizerConfig.decodingMethod = 'greedy_search';  
33 - recognizerConfig.enableEndpoint = 1;  
34 -  
35 - const recognizer = new sherpa_onnx.OnlineRecognizer(recognizerConfig);  
36 - return recognizer; 8 +function createOnlineRecognizer() {
  9 + let onlineTransducerModelConfig = {
  10 + encoder:
  11 + './sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/encoder-epoch-99-avg-1.int8.onnx',
  12 + decoder:
  13 + './sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/decoder-epoch-99-avg-1.onnx',
  14 + joiner:
  15 + './sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/joiner-epoch-99-avg-1.int8.onnx',
  16 + };
  17 +
  18 + let onlineParaformerModelConfig = {
  19 + encoder: '',
  20 + decoder: '',
  21 + };
  22 +
  23 + let onlineZipformer2CtcModelConfig = {
  24 + model: '',
  25 + };
  26 +
  27 + let onlineModelConfig = {
  28 + transducer: onlineTransducerModelConfig,
  29 + paraformer: onlineParaformerModelConfig,
  30 + zipformer2Ctc: onlineZipformer2CtcModelConfig,
  31 + tokens:
  32 + './sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/tokens.txt',
  33 + numThreads: 1,
  34 + provider: 'cpu',
  35 + debug: 1,
  36 + modelType: 'zipformer',
  37 + };
  38 +
  39 + let featureConfig = {
  40 + sampleRate: 16000,
  41 + featureDim: 80,
  42 + };
  43 +
  44 + let recognizerConfig = {
  45 + featConfig: featureConfig,
  46 + modelConfig: onlineModelConfig,
  47 + decodingMethod: 'greedy_search',
  48 + maxActivePaths: 4,
  49 + enableEndpoint: 1,
  50 + rule1MinTrailingSilence: 2.4,
  51 + rule2MinTrailingSilence: 1.2,
  52 + rule3MinUtteranceLength: 20,
  53 + hotwordsFile: '',
  54 + hotwordsScore: 1.5,
  55 + };
  56 +
  57 + return sherpa_onnx.createOnlineRecognizer(recognizerConfig);
37 } 58 }
38 -recognizer = createRecognizer();  
39 -stream = recognizer.createStream();  
40 -display = new sherpa_onnx.Display(50); 59 +
  60 +const recognizer = createOnlineRecognizer();
  61 +const stream = recognizer.createStream();
41 62
42 let lastText = ''; 63 let lastText = '';
43 let segmentIndex = 0; 64 let segmentIndex = 0;
@@ -63,11 +84,11 @@ ai.on('data', data => { @@ -63,11 +84,11 @@ ai.on('data', data => {
63 } 84 }
64 85
65 const isEndpoint = recognizer.isEndpoint(stream); 86 const isEndpoint = recognizer.isEndpoint(stream);
66 - const text = recognizer.getResult(stream).text; 87 + const text = recognizer.getResult(stream);
67 88
68 if (text.length > 0 && lastText != text) { 89 if (text.length > 0 && lastText != text) {
69 lastText = text; 90 lastText = text;
70 - display.print(segmentIndex, lastText); 91 + console.log(segmentIndex, lastText);
71 } 92 }
72 if (isEndpoint) { 93 if (isEndpoint) {
73 if (text.length > 0) { 94 if (text.length > 0) {
@@ -6,37 +6,60 @@ const wav = require('wav'); @@ -6,37 +6,60 @@ const wav = require('wav');
6 6
7 const sherpa_onnx = require('sherpa-onnx'); 7 const sherpa_onnx = require('sherpa-onnx');
8 8
9 -function createRecognizer() {  
10 - const featConfig = new sherpa_onnx.FeatureConfig();  
11 - featConfig.sampleRate = 16000;  
12 - featConfig.featureDim = 80;  
13 -  
14 - // test online recognizer  
15 - const transducer = new sherpa_onnx.OnlineTransducerModelConfig();  
16 - transducer.encoder =  
17 - './sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/encoder-epoch-99-avg-1.int8.onnx';  
18 - transducer.decoder =  
19 - './sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/decoder-epoch-99-avg-1.onnx';  
20 - transducer.joiner =  
21 - './sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/joiner-epoch-99-avg-1.onnx';  
22 - const tokens =  
23 - './sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/tokens.txt';  
24 -  
25 - const modelConfig = new sherpa_onnx.OnlineModelConfig();  
26 - modelConfig.transducer = transducer;  
27 - modelConfig.tokens = tokens;  
28 - modelConfig.modelType = 'zipformer';  
29 -  
30 - const recognizerConfig = new sherpa_onnx.OnlineRecognizerConfig();  
31 - recognizerConfig.featConfig = featConfig;  
32 - recognizerConfig.modelConfig = modelConfig;  
33 - recognizerConfig.decodingMethod = 'greedy_search';  
34 -  
35 - recognizer = new sherpa_onnx.OnlineRecognizer(recognizerConfig);  
36 - return recognizer; 9 +function createOnlineRecognizer() {
  10 + let onlineTransducerModelConfig = {
  11 + encoder:
  12 + './sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/encoder-epoch-99-avg-1.int8.onnx',
  13 + decoder:
  14 + './sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/decoder-epoch-99-avg-1.onnx',
  15 + joiner:
  16 + './sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/joiner-epoch-99-avg-1.int8.onnx',
  17 + };
  18 +
  19 + let onlineParaformerModelConfig = {
  20 + encoder: '',
  21 + decoder: '',
  22 + };
  23 +
  24 + let onlineZipformer2CtcModelConfig = {
  25 + model: '',
  26 + };
  27 +
  28 + let onlineModelConfig = {
  29 + transducer: onlineTransducerModelConfig,
  30 + paraformer: onlineParaformerModelConfig,
  31 + zipformer2Ctc: onlineZipformer2CtcModelConfig,
  32 + tokens:
  33 + './sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/tokens.txt',
  34 + numThreads: 1,
  35 + provider: 'cpu',
  36 + debug: 1,
  37 + modelType: 'zipformer',
  38 + };
  39 +
  40 + let featureConfig = {
  41 + sampleRate: 16000,
  42 + featureDim: 80,
  43 + };
  44 +
  45 + let recognizerConfig = {
  46 + featConfig: featureConfig,
  47 + modelConfig: onlineModelConfig,
  48 + decodingMethod: 'greedy_search',
  49 + maxActivePaths: 4,
  50 + enableEndpoint: 1,
  51 + rule1MinTrailingSilence: 2.4,
  52 + rule2MinTrailingSilence: 1.2,
  53 + rule3MinUtteranceLength: 20,
  54 + hotwordsFile: '',
  55 + hotwordsScore: 1.5,
  56 + };
  57 +
  58 + return sherpa_onnx.createOnlineRecognizer(recognizerConfig);
37 } 59 }
38 -recognizer = createRecognizer();  
39 -stream = recognizer.createStream(); 60 +
  61 +const recognizer = createOnlineRecognizer();
  62 +const stream = recognizer.createStream();
40 63
41 const waveFilename = 64 const waveFilename =
42 './sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/test_wavs/0.wav'; 65 './sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/test_wavs/0.wav';
@@ -50,8 +73,8 @@ function decode(samples) { @@ -50,8 +73,8 @@ function decode(samples) {
50 while (recognizer.isReady(stream)) { 73 while (recognizer.isReady(stream)) {
51 recognizer.decode(stream); 74 recognizer.decode(stream);
52 } 75 }
53 - const r = recognizer.getResult(stream);  
54 - console.log(r.text); 76 + const text = recognizer.getResult(stream);
  77 + console.log(text);
55 } 78 }
56 79
57 reader.on('format', ({audioFormat, bitDepth, channels, sampleRate}) => { 80 reader.on('format', ({audioFormat, bitDepth, channels, sampleRate}) => {
@@ -6,32 +6,58 @@ const wav = require('wav'); @@ -6,32 +6,58 @@ const wav = require('wav');
6 6
7 const sherpa_onnx = require('sherpa-onnx'); 7 const sherpa_onnx = require('sherpa-onnx');
8 8
9 -function createRecognizer() {  
10 - const featConfig = new sherpa_onnx.FeatureConfig();  
11 - featConfig.sampleRate = 16000;  
12 - featConfig.featureDim = 80;  
13 -  
14 - // test online recognizer  
15 - const zipformer2Ctc = new sherpa_onnx.OnlineZipformer2CtcModelConfig();  
16 - zipformer2Ctc.model =  
17 - './sherpa-onnx-streaming-zipformer-ctc-multi-zh-hans-2023-12-13/ctc-epoch-20-avg-1-chunk-16-left-128.onnx';  
18 - const tokens =  
19 - './sherpa-onnx-streaming-zipformer-ctc-multi-zh-hans-2023-12-13/tokens.txt';  
20 -  
21 - const modelConfig = new sherpa_onnx.OnlineModelConfig();  
22 - modelConfig.zipformer2Ctc = zipformer2Ctc;  
23 - modelConfig.tokens = tokens;  
24 -  
25 - const recognizerConfig = new sherpa_onnx.OnlineRecognizerConfig();  
26 - recognizerConfig.featConfig = featConfig;  
27 - recognizerConfig.modelConfig = modelConfig;  
28 - recognizerConfig.decodingMethod = 'greedy_search';  
29 -  
30 - recognizer = new sherpa_onnx.OnlineRecognizer(recognizerConfig);  
31 - return recognizer; 9 +function createOnlineRecognizer() {
  10 + let onlineTransducerModelConfig = {
  11 + encoder: '',
  12 + decoder: '',
  13 + joiner: '',
  14 + };
  15 +
  16 + let onlineParaformerModelConfig = {
  17 + encoder: '',
  18 + decoder: '',
  19 + };
  20 +
  21 + let onlineZipformer2CtcModelConfig = {
  22 + model:
  23 + './sherpa-onnx-streaming-zipformer-ctc-multi-zh-hans-2023-12-13/ctc-epoch-20-avg-1-chunk-16-left-128.onnx',
  24 + };
  25 +
  26 + let onlineModelConfig = {
  27 + transducer: onlineTransducerModelConfig,
  28 + paraformer: onlineParaformerModelConfig,
  29 + zipformer2Ctc: onlineZipformer2CtcModelConfig,
  30 + tokens:
  31 + './sherpa-onnx-streaming-zipformer-ctc-multi-zh-hans-2023-12-13/tokens.txt',
  32 + numThreads: 1,
  33 + provider: 'cpu',
  34 + debug: 1,
  35 + modelType: '',
  36 + };
  37 +
  38 + let featureConfig = {
  39 + sampleRate: 16000,
  40 + featureDim: 80,
  41 + };
  42 +
  43 + let recognizerConfig = {
  44 + featConfig: featureConfig,
  45 + modelConfig: onlineModelConfig,
  46 + decodingMethod: 'greedy_search',
  47 + maxActivePaths: 4,
  48 + enableEndpoint: 1,
  49 + rule1MinTrailingSilence: 2.4,
  50 + rule2MinTrailingSilence: 1.2,
  51 + rule3MinUtteranceLength: 20,
  52 + hotwordsFile: '',
  53 + hotwordsScore: 1.5,
  54 + };
  55 +
  56 + return sherpa_onnx.createOnlineRecognizer(recognizerConfig);
32 } 57 }
33 -recognizer = createRecognizer();  
34 -stream = recognizer.createStream(); 58 +
  59 +const recognizer = createOnlineRecognizer();
  60 +const stream = recognizer.createStream();
35 61
36 const waveFilename = 62 const waveFilename =
37 './sherpa-onnx-streaming-zipformer-ctc-multi-zh-hans-2023-12-13/test_wavs/DEV_T0000000000.wav'; 63 './sherpa-onnx-streaming-zipformer-ctc-multi-zh-hans-2023-12-13/test_wavs/DEV_T0000000000.wav';
@@ -45,8 +71,8 @@ function decode(samples) { @@ -45,8 +71,8 @@ function decode(samples) {
45 while (recognizer.isReady(stream)) { 71 while (recognizer.isReady(stream)) {
46 recognizer.decode(stream); 72 recognizer.decode(stream);
47 } 73 }
48 - const r = recognizer.getResult(stream);  
49 - console.log(r.text); 74 + const text = recognizer.getResult(stream);
  75 + console.log(text);
50 } 76 }
51 77
52 reader.on('format', ({audioFormat, bitDepth, channels, sampleRate}) => { 78 reader.on('format', ({audioFormat, bitDepth, channels, sampleRate}) => {
1 -// Copyright (c) 2023 Xiaomi Corporation (authors: Fangjun Kuang)  
2 -//  
3 -const sherpa_onnx = require('sherpa-onnx3');  
4 -const portAudio = require('naudiodon2');  
5 -console.log(portAudio.getDevices());  
6 -  
7 -function createOfflineRecognizer() {  
8 - const featConfig = new sherpa_onnx.FeatureConfig();  
9 - featConfig.sampleRate = 16000;  
10 - featConfig.featureDim = 80;  
11 -  
12 - // test online recognizer  
13 - const paraformer = new sherpa_onnx.OfflineParaformerModelConfig();  
14 - paraformer.model = './sherpa-onnx-paraformer-zh-2023-03-28/model.int8.onnx';  
15 - const tokens = './sherpa-onnx-paraformer-zh-2023-03-28/tokens.txt';  
16 -  
17 - const modelConfig = new sherpa_onnx.OfflineModelConfig();  
18 - modelConfig.paraformer = paraformer;  
19 - modelConfig.tokens = tokens;  
20 - modelConfig.modelType = 'paraformer';  
21 -  
22 - const recognizerConfig = new sherpa_onnx.OfflineRecognizerConfig();  
23 - recognizerConfig.featConfig = featConfig;  
24 - recognizerConfig.modelConfig = modelConfig;  
25 - recognizerConfig.decodingMethod = 'greedy_search';  
26 -  
27 - const recognizer = new sherpa_onnx.OfflineRecognizer(recognizerConfig);  
28 - return recognizer  
29 -}  
30 -  
31 -function createVad() {  
32 - const sileroVadModelConfig = new sherpa_onnx.SileroVadModelConfig();  
33 - sileroVadModelConfig.model = './silero_vad.onnx';  
34 - sileroVadModelConfig.minSpeechDuration = 0.3; // seconds  
35 - sileroVadModelConfig.minSilenceDuration = 0.3; // seconds  
36 - sileroVadModelConfig.windowSize = 512;  
37 -  
38 - const vadModelConfig = new sherpa_onnx.VadModelConfig();  
39 - vadModelConfig.sileroVad = sileroVadModelConfig;  
40 - vadModelConfig.sampleRate = 16000;  
41 -  
42 - const bufferSizeInSeconds = 60;  
43 - const vad = new sherpa_onnx.VoiceActivityDetector(  
44 - vadModelConfig, bufferSizeInSeconds);  
45 - return vad;  
46 -}  
47 -  
48 -const recognizer = createOfflineRecognizer();  
49 -const vad = createVad();  
50 -  
51 -const bufferSizeInSeconds = 30;  
52 -const buffer =  
53 - new sherpa_onnx.CircularBuffer(bufferSizeInSeconds * vad.config.sampleRate);  
54 -  
55 -var ai = new portAudio.AudioIO({  
56 - inOptions: {  
57 - channelCount: 1,  
58 - sampleFormat: portAudio.SampleFormatFloat32,  
59 - sampleRate: vad.config.sampleRate,  
60 - deviceId: -1, // Use -1 or omit the deviceId to select the default device  
61 - closeOnError: true // Close the stream if an audio error is detected, if  
62 - // set false then just log the error  
63 - }  
64 -});  
65 -  
66 -let printed = false;  
67 -let index = 0;  
68 -ai.on('data', data => {  
69 - const windowSize = vad.config.sileroVad.windowSize;  
70 - buffer.push(new Float32Array(data.buffer));  
71 - while (buffer.size() > windowSize) {  
72 - const samples = buffer.get(buffer.head(), windowSize);  
73 - buffer.pop(windowSize);  
74 - vad.acceptWaveform(samples)  
75 - }  
76 -  
77 - while (!vad.isEmpty()) {  
78 - const segment = vad.front();  
79 - vad.pop();  
80 - const stream = recognizer.createStream();  
81 - stream.acceptWaveform(  
82 - recognizer.config.featConfig.sampleRate, segment.samples);  
83 - recognizer.decode(stream);  
84 - const r = recognizer.getResult(stream);  
85 - stream.free();  
86 - if (r.text.length > 0) {  
87 - console.log(`${index}: ${r.text}`);  
88 - index += 1;  
89 - }  
90 - }  
91 -});  
92 -  
93 -ai.on('close', () => {  
94 - console.log('Free resources');  
95 - recognizer.free();  
96 - vad.free();  
97 - buffer.free();  
98 -});  
99 -  
100 -ai.start();  
101 -console.log('Started! Please speak')  
1 -// Copyright (c) 2023 Xiaomi Corporation (authors: Fangjun Kuang)  
2 -//  
3 -const sherpa_onnx = require('sherpa-onnx');  
4 -const portAudio = require('naudiodon2');  
5 -console.log(portAudio.getDevices());  
6 -  
7 -function createOfflineRecognizer() {  
8 - const featConfig = new sherpa_onnx.FeatureConfig();  
9 - featConfig.sampleRate = 16000;  
10 - featConfig.featureDim = 80;  
11 -  
12 - // test online recognizer  
13 - const transducer = new sherpa_onnx.OfflineTransducerModelConfig();  
14 - transducer.encoder =  
15 - './sherpa-onnx-zipformer-en-2023-06-26/encoder-epoch-99-avg-1.onnx';  
16 - transducer.decoder =  
17 - './sherpa-onnx-zipformer-en-2023-06-26/decoder-epoch-99-avg-1.onnx';  
18 - transducer.joiner =  
19 - './sherpa-onnx-zipformer-en-2023-06-26/joiner-epoch-99-avg-1.onnx';  
20 - const tokens = './sherpa-onnx-zipformer-en-2023-06-26/tokens.txt';  
21 -  
22 - const modelConfig = new sherpa_onnx.OfflineModelConfig();  
23 - modelConfig.transducer = transducer;  
24 - modelConfig.tokens = tokens;  
25 - modelConfig.modelType = 'transducer';  
26 -  
27 - const recognizerConfig = new sherpa_onnx.OfflineRecognizerConfig();  
28 - recognizerConfig.featConfig = featConfig;  
29 - recognizerConfig.modelConfig = modelConfig;  
30 - recognizerConfig.decodingMethod = 'greedy_search';  
31 -  
32 - const recognizer = new sherpa_onnx.OfflineRecognizer(recognizerConfig);  
33 - return recognizer;  
34 -}  
35 -  
36 -function createVad() {  
37 - const sileroVadModelConfig = new sherpa_onnx.SileroVadModelConfig();  
38 - sileroVadModelConfig.model = './silero_vad.onnx';  
39 - sileroVadModelConfig.minSpeechDuration = 0.3; // seconds  
40 - sileroVadModelConfig.minSilenceDuration = 0.3; // seconds  
41 - sileroVadModelConfig.windowSize = 512;  
42 -  
43 - const vadModelConfig = new sherpa_onnx.VadModelConfig();  
44 - vadModelConfig.sileroVad = sileroVadModelConfig;  
45 - vadModelConfig.sampleRate = 16000;  
46 -  
47 - const bufferSizeInSeconds = 60;  
48 - const vad = new sherpa_onnx.VoiceActivityDetector(  
49 - vadModelConfig, bufferSizeInSeconds);  
50 - return vad;  
51 -}  
52 -  
53 -const recognizer = createOfflineRecognizer();  
54 -const vad = createVad();  
55 -  
56 -const bufferSizeInSeconds = 30;  
57 -const buffer =  
58 - new sherpa_onnx.CircularBuffer(bufferSizeInSeconds * vad.config.sampleRate);  
59 -  
60 -const ai = new portAudio.AudioIO({  
61 - inOptions: {  
62 - channelCount: 1,  
63 - closeOnError: true, // Close the stream if an audio error is detected, if  
64 - // set false then just log the error  
65 - deviceId: -1, // Use -1 or omit the deviceId to select the default device  
66 - sampleFormat: portAudio.SampleFormatFloat32,  
67 - sampleRate: vad.config.sampleRate  
68 - }  
69 -});  
70 -  
71 -let printed = false;  
72 -let index = 0;  
73 -ai.on('data', data => {  
74 - const windowSize = vad.config.sileroVad.windowSize;  
75 - buffer.push(new Float32Array(data.buffer));  
76 - while (buffer.size() > windowSize) {  
77 - const samples = buffer.get(buffer.head(), windowSize);  
78 - buffer.pop(windowSize);  
79 - vad.acceptWaveform(samples)  
80 - }  
81 -  
82 - while (!vad.isEmpty()) {  
83 - const segment = vad.front();  
84 - vad.pop();  
85 - const stream = recognizer.createStream();  
86 - stream.acceptWaveform(  
87 - recognizer.config.featConfig.sampleRate, segment.samples);  
88 - recognizer.decode(stream);  
89 - const r = recognizer.getResult(stream);  
90 - stream.free();  
91 - if (r.text.length > 0) {  
92 - console.log(`${index}: ${r.text}`);  
93 - index += 1;  
94 - }  
95 - }  
96 -});  
97 -  
98 -ai.on('close', () => {  
99 - console.log('Free resources');  
100 - recognizer.free();  
101 - vad.free();  
102 - buffer.free();  
103 -});  
104 -  
105 -ai.start();  
106 -console.log('Started! Please speak')  
1 -// Copyright (c) 2023 Xiaomi Corporation (authors: Fangjun Kuang)  
2 -//  
3 -const sherpa_onnx = require('sherpa-onnx');  
4 -const portAudio = require('naudiodon2');  
5 -console.log(portAudio.getDevices());  
6 -  
7 -function createOfflineRecognizer() {  
8 - const featConfig = new sherpa_onnx.FeatureConfig();  
9 - featConfig.sampleRate = 16000;  
10 - featConfig.featureDim = 80;  
11 -  
12 - // test online recognizer  
13 - const whisper = new sherpa_onnx.OfflineWhisperModelConfig();  
14 - whisper.encoder = './sherpa-onnx-whisper-tiny.en/tiny.en-encoder.int8.onnx';  
15 - whisper.decoder = './sherpa-onnx-whisper-tiny.en/tiny.en-decoder.int8.onnx';  
16 - const tokens = './sherpa-onnx-whisper-tiny.en/tiny.en-tokens.txt';  
17 -  
18 - const modelConfig = new sherpa_onnx.OfflineModelConfig();  
19 - modelConfig.whisper = whisper;  
20 - modelConfig.tokens = tokens;  
21 - modelConfig.modelType = 'whisper';  
22 -  
23 - const recognizerConfig = new sherpa_onnx.OfflineRecognizerConfig();  
24 - recognizerConfig.featConfig = featConfig;  
25 - recognizerConfig.modelConfig = modelConfig;  
26 - recognizerConfig.decodingMethod = 'greedy_search';  
27 -  
28 - const recognizer = new sherpa_onnx.OfflineRecognizer(recognizerConfig);  
29 - return recognizer;  
30 -}  
31 -  
32 -function createVad() {  
33 - const sileroVadModelConfig = new sherpa_onnx.SileroVadModelConfig();  
34 - sileroVadModelConfig.model = './silero_vad.onnx';  
35 - sileroVadModelConfig.minSpeechDuration = 0.3; // seconds  
36 - sileroVadModelConfig.minSilenceDuration = 0.3; // seconds  
37 - sileroVadModelConfig.windowSize = 512;  
38 -  
39 - const vadModelConfig = new sherpa_onnx.VadModelConfig();  
40 - vadModelConfig.sileroVad = sileroVadModelConfig;  
41 - vadModelConfig.sampleRate = 16000;  
42 -  
43 - const bufferSizeInSeconds = 60;  
44 - const vad = new sherpa_onnx.VoiceActivityDetector(  
45 - vadModelConfig, bufferSizeInSeconds);  
46 - return vad;  
47 -}  
48 -  
49 -const recognizer = createOfflineRecognizer();  
50 -const vad = createVad();  
51 -  
52 -const bufferSizeInSeconds = 30;  
53 -const buffer =  
54 - new sherpa_onnx.CircularBuffer(bufferSizeInSeconds * vad.config.sampleRate);  
55 -  
56 -const ai = new portAudio.AudioIO({  
57 - inOptions: {  
58 - channelCount: 1,  
59 - closeOnError: true, // Close the stream if an audio error is detected, if  
60 - // set false then just log the error  
61 - deviceId: -1, // Use -1 or omit the deviceId to select the default device  
62 - sampleFormat: portAudio.SampleFormatFloat32,  
63 - sampleRate: vad.config.sampleRate  
64 - }  
65 -});  
66 -  
67 -let printed = false;  
68 -let index = 0;  
69 -ai.on('data', data => {  
70 - const windowSize = vad.config.sileroVad.windowSize;  
71 - buffer.push(new Float32Array(data.buffer));  
72 - while (buffer.size() > windowSize) {  
73 - const samples = buffer.get(buffer.head(), windowSize);  
74 - buffer.pop(windowSize);  
75 - vad.acceptWaveform(samples)  
76 - }  
77 -  
78 - while (!vad.isEmpty()) {  
79 - const segment = vad.front();  
80 - vad.pop();  
81 - const stream = recognizer.createStream();  
82 - stream.acceptWaveform(  
83 - recognizer.config.featConfig.sampleRate, segment.samples);  
84 - recognizer.decode(stream);  
85 - const r = recognizer.getResult(stream);  
86 - stream.free();  
87 - if (r.text.length > 0) {  
88 - console.log(`${index}: ${r.text}`);  
89 - index += 1;  
90 - }  
91 - }  
92 -});  
93 -  
94 -ai.on('close', () => {  
95 - console.log('Free resources');  
96 - recognizer.free();  
97 - vad.free();  
98 - buffer.free();  
99 -});  
100 -  
101 -ai.start();  
102 -console.log('Started! Please speak')  
1 -// Copyright (c) 2023 Xiaomi Corporation (authors: Fangjun Kuang)  
2 -  
3 -const sherpa_onnx = require('sherpa-onnx');  
4 -const portAudio = require('naudiodon2');  
5 -console.log(portAudio.getDevices());  
6 -  
7 -function createVad() {  
8 - const sileroVadModelConfig = new sherpa_onnx.SileroVadModelConfig();  
9 - sileroVadModelConfig.model = './silero_vad.onnx';  
10 - sileroVadModelConfig.minSpeechDuration = 0.3; // seconds  
11 - sileroVadModelConfig.minSilenceDuration = 0.3; // seconds  
12 - sileroVadModelConfig.windowSize = 512;  
13 -  
14 - const vadModelConfig = new sherpa_onnx.VadModelConfig();  
15 - vadModelConfig.sileroVad = sileroVadModelConfig;  
16 - vadModelConfig.sampleRate = 16000;  
17 -  
18 - const bufferSizeInSeconds = 60;  
19 - const vad = new sherpa_onnx.VoiceActivityDetector(  
20 - vadModelConfig, bufferSizeInSeconds);  
21 - return vad;  
22 -}  
23 -vad = createVad();  
24 -const bufferSizeInSeconds = 30;  
25 -const buffer =  
26 - new sherpa_onnx.CircularBuffer(bufferSizeInSeconds * vad.config.sampleRate);  
27 -  
28 -const ai = new portAudio.AudioIO({  
29 - inOptions: {  
30 - channelCount: 1,  
31 - closeOnError: true, // Close the stream if an audio error is detected, if  
32 - // set false then just log the error  
33 - deviceId: -1, // Use -1 or omit the deviceId to select the default device  
34 - sampleFormat: portAudio.SampleFormatFloat32,  
35 - sampleRate: vad.config.sampleRate  
36 - }  
37 -});  
38 -  
39 -let printed = false;  
40 -let index = 0;  
41 -ai.on('data', data => {  
42 - const windowSize = vad.config.sileroVad.windowSize;  
43 - buffer.push(new Float32Array(data.buffer));  
44 - while (buffer.size() > windowSize) {  
45 - const samples = buffer.get(buffer.head(), windowSize);  
46 - buffer.pop(windowSize);  
47 - vad.acceptWaveform(samples)  
48 - if (vad.isDetected() && !printed) {  
49 - console.log(`${index}: Detected speech`)  
50 - printed = true;  
51 - }  
52 -  
53 - if (!vad.isDetected()) {  
54 - printed = false;  
55 - }  
56 -  
57 - while (!vad.isEmpty()) {  
58 - const segment = vad.front();  
59 - vad.pop();  
60 - const duration = segment.samples.length / vad.config.sampleRate;  
61 - console.log(`${index} End of speech. Duration: ${duration} seconds`);  
62 - index += 1;  
63 - }  
64 - }  
65 -});  
66 -  
67 -ai.on('close', () => {  
68 - console.log('Free resources');  
69 - vad.free();  
70 - buffer.free();  
71 -});  
72 -  
73 -ai.start();  
74 -console.log('Started! Please speak')  
1 -Language: JavaScript  
2 -JavaScriptQuotes: Double  
3 -  
@@ -7,3 +7,5 @@ It processes everything locally without accessing the Internet. @@ -7,3 +7,5 @@ It processes everything locally without accessing the Internet.
7 Please refer to 7 Please refer to
8 https://github.com/k2-fsa/sherpa-onnx/tree/master/nodejs-examples 8 https://github.com/k2-fsa/sherpa-onnx/tree/master/nodejs-examples
9 for examples. 9 for examples.
  10 +
  11 +You need Node >= 18 for this package.
1 -// Copyright (c) 2023 Xiaomi Corporation (authors: Fangjun Kuang)  
2 -//  
3 -// Please use  
4 -//  
5 -// npm install ffi-napi ref-struct-napi  
6 -//  
7 -// before you use this file  
8 -//  
9 -//  
10 -// Please use node 13. node 16, 18, 20, and 21 are known not working.  
11 -// See also  
12 -// https://github.com/node-ffi-napi/node-ffi-napi/issues/244  
13 -// and  
14 -// https://github.com/node-ffi-napi/node-ffi-napi/issues/97  
15 -"use strict" 1 +// Copyright (c) 2023-2024 Xiaomi Corporation (authors: Fangjun Kuang)
  2 +'use strict'
16 3
17 -const debug = require("debug")("sherpa-onnx");  
18 -const os = require("os");  
19 -const path = require("path");  
20 -const ffi = require("ffi-napi");  
21 -const ref = require("ref-napi");  
22 -const fs = require("fs");  
23 -var ArrayType = require("ref-array-napi"); 4 +const wasmModule = require('./sherpa-onnx-wasm-nodejs.js')();
  5 +const sherpa_onnx_asr = require('./sherpa-onnx-asr.js');
  6 +const sherpa_onnx_tts = require('./sherpa-onnx-tts.js');
24 7
25 -const FloatArray = ArrayType(ref.types.float);  
26 -const StructType = require("ref-struct-napi");  
27 -const cstring = ref.types.CString;  
28 -const cstringPtr = ref.refType(cstring);  
29 -const int32_t = ref.types.int32;  
30 -const float = ref.types.float;  
31 -const floatPtr = ref.refType(float);  
32 -  
33 -const SherpaOnnxOnlineTransducerModelConfig = StructType({  
34 - "encoder" : cstring,  
35 - "decoder" : cstring,  
36 - "joiner" : cstring,  
37 -});  
38 -  
39 -const SherpaOnnxOnlineParaformerModelConfig = StructType({  
40 - "encoder" : cstring,  
41 - "decoder" : cstring,  
42 -});  
43 -  
44 -const SherpaOnnxOnlineZipformer2CtcModelConfig = StructType({  
45 - "model" : cstring,  
46 -});  
47 -  
48 -const SherpaOnnxOnlineModelConfig = StructType({  
49 - "transducer" : SherpaOnnxOnlineTransducerModelConfig,  
50 - "paraformer" : SherpaOnnxOnlineParaformerModelConfig,  
51 - "zipformer2Ctc" : SherpaOnnxOnlineZipformer2CtcModelConfig,  
52 - "tokens" : cstring,  
53 - "numThreads" : int32_t,  
54 - "provider" : cstring,  
55 - "debug" : int32_t,  
56 - "modelType" : cstring,  
57 -});  
58 -  
59 -const SherpaOnnxFeatureConfig = StructType({  
60 - "sampleRate" : int32_t,  
61 - "featureDim" : int32_t,  
62 -});  
63 -  
64 -const SherpaOnnxOnlineRecognizerConfig = StructType({  
65 - "featConfig" : SherpaOnnxFeatureConfig,  
66 - "modelConfig" : SherpaOnnxOnlineModelConfig,  
67 - "decodingMethod" : cstring,  
68 - "maxActivePaths" : int32_t,  
69 - "enableEndpoint" : int32_t,  
70 - "rule1MinTrailingSilence" : float,  
71 - "rule2MinTrailingSilence" : float,  
72 - "rule3MinUtteranceLength" : float,  
73 - "hotwordsFile" : cstring,  
74 - "hotwordsScore" : float,  
75 -});  
76 -  
77 -const SherpaOnnxOnlineRecognizerResult = StructType({  
78 - "text" : cstring,  
79 - "tokens" : cstring,  
80 - "tokensArr" : cstringPtr,  
81 - "timestamps" : floatPtr,  
82 - "count" : int32_t,  
83 - "json" : cstring,  
84 -});  
85 -  
86 -const SherpaOnnxOnlineRecognizerPtr = ref.refType(ref.types.void);  
87 -const SherpaOnnxOnlineStreamPtr = ref.refType(ref.types.void);  
88 -const SherpaOnnxOnlineStreamPtrPtr = ref.refType(SherpaOnnxOnlineStreamPtr);  
89 -const SherpaOnnxOnlineRecognizerResultPtr =  
90 - ref.refType(SherpaOnnxOnlineRecognizerResult);  
91 -  
92 -const SherpaOnnxOnlineRecognizerConfigPtr =  
93 - ref.refType(SherpaOnnxOnlineRecognizerConfig);  
94 -  
95 -const SherpaOnnxOfflineTransducerModelConfig = StructType({  
96 - "encoder" : cstring,  
97 - "decoder" : cstring,  
98 - "joiner" : cstring,  
99 -});  
100 -  
101 -const SherpaOnnxOfflineParaformerModelConfig = StructType({  
102 - "model" : cstring,  
103 -});  
104 -  
105 -const SherpaOnnxOfflineNemoEncDecCtcModelConfig = StructType({  
106 - "model" : cstring,  
107 -});  
108 -  
109 -const SherpaOnnxOfflineWhisperModelConfig = StructType({  
110 - "encoder" : cstring,  
111 - "decoder" : cstring,  
112 -});  
113 -  
114 -const SherpaOnnxOfflineTdnnModelConfig = StructType({  
115 - "model" : cstring,  
116 -});  
117 -  
118 -const SherpaOnnxOfflineLMConfig = StructType({  
119 - "model" : cstring,  
120 - "scale" : float,  
121 -});  
122 -  
123 -const SherpaOnnxOfflineModelConfig = StructType({  
124 - "transducer" : SherpaOnnxOfflineTransducerModelConfig,  
125 - "paraformer" : SherpaOnnxOfflineParaformerModelConfig,  
126 - "nemoCtc" : SherpaOnnxOfflineNemoEncDecCtcModelConfig,  
127 - "whisper" : SherpaOnnxOfflineWhisperModelConfig,  
128 - "tdnn" : SherpaOnnxOfflineTdnnModelConfig,  
129 - "tokens" : cstring,  
130 - "numThreads" : int32_t,  
131 - "debug" : int32_t,  
132 - "provider" : cstring,  
133 - "modelType" : cstring,  
134 -});  
135 -  
136 -const SherpaOnnxOfflineRecognizerConfig = StructType({  
137 - "featConfig" : SherpaOnnxFeatureConfig,  
138 - "modelConfig" : SherpaOnnxOfflineModelConfig,  
139 - "lmConfig" : SherpaOnnxOfflineLMConfig,  
140 - "decodingMethod" : cstring,  
141 - "maxActivePaths" : int32_t,  
142 - "hotwordsFile" : cstring,  
143 - "hotwordsScore" : float,  
144 -});  
145 -  
146 -const SherpaOnnxOfflineRecognizerResult = StructType({  
147 - "text" : cstring,  
148 - "timestamps" : floatPtr,  
149 - "count" : int32_t,  
150 -});  
151 -  
152 -const SherpaOnnxOfflineRecognizerPtr = ref.refType(ref.types.void);  
153 -const SherpaOnnxOfflineStreamPtr = ref.refType(ref.types.void);  
154 -const SherpaOnnxOfflineStreamPtrPtr = ref.refType(SherpaOnnxOfflineStreamPtr);  
155 -const SherpaOnnxOfflineRecognizerResultPtr =  
156 - ref.refType(SherpaOnnxOfflineRecognizerResult);  
157 -  
158 -const SherpaOnnxOfflineRecognizerConfigPtr =  
159 - ref.refType(SherpaOnnxOfflineRecognizerConfig);  
160 -  
161 -// vad  
162 -const SherpaOnnxSileroVadModelConfig = StructType({  
163 - "model" : cstring,  
164 - "threshold" : float,  
165 - "minSilenceDuration" : float,  
166 - "minSpeechDuration" : float,  
167 - "windowSize" : int32_t,  
168 -});  
169 -  
170 -const SherpaOnnxVadModelConfig = StructType({  
171 - "sileroVad" : SherpaOnnxSileroVadModelConfig,  
172 - "sampleRate" : int32_t,  
173 - "numThreads" : int32_t,  
174 - "provider" : cstring,  
175 - "debug" : int32_t,  
176 -});  
177 -  
178 -const SherpaOnnxSpeechSegment = StructType({  
179 - "start" : int32_t,  
180 - "samples" : FloatArray,  
181 - "n" : int32_t,  
182 -});  
183 -  
184 -const SherpaOnnxVadModelConfigPtr = ref.refType(SherpaOnnxVadModelConfig);  
185 -const SherpaOnnxSpeechSegmentPtr = ref.refType(SherpaOnnxSpeechSegment);  
186 -const SherpaOnnxCircularBufferPtr = ref.refType(ref.types.void);  
187 -const SherpaOnnxVoiceActivityDetectorPtr = ref.refType(ref.types.void);  
188 -  
189 -// tts  
190 -const SherpaOnnxOfflineTtsVitsModelConfig = StructType({  
191 - "model" : cstring,  
192 - "lexicon" : cstring,  
193 - "tokens" : cstring,  
194 - "dataDir" : cstring,  
195 - "noiseScale" : float,  
196 - "noiseScaleW" : float,  
197 - "lengthScale" : float,  
198 -});  
199 -  
200 -const SherpaOnnxOfflineTtsModelConfig = StructType({  
201 - "vits" : SherpaOnnxOfflineTtsVitsModelConfig,  
202 - "numThreads" : int32_t,  
203 - "debug" : int32_t,  
204 - "provider" : cstring,  
205 -});  
206 -  
207 -const SherpaOnnxOfflineTtsConfig = StructType({  
208 - "model" : SherpaOnnxOfflineTtsModelConfig,  
209 - "ruleFsts" : cstring,  
210 - "maxNumSentences" : int32_t,  
211 -});  
212 -  
213 -const SherpaOnnxGeneratedAudio = StructType({  
214 - "samples" : FloatArray,  
215 - "n" : int32_t,  
216 - "sampleRate" : int32_t,  
217 -});  
218 -  
219 -const SherpaOnnxOfflineTtsVitsModelConfigPtr =  
220 - ref.refType(SherpaOnnxOfflineTtsVitsModelConfig);  
221 -const SherpaOnnxOfflineTtsConfigPtr = ref.refType(SherpaOnnxOfflineTtsConfig);  
222 -const SherpaOnnxGeneratedAudioPtr = ref.refType(SherpaOnnxGeneratedAudio);  
223 -const SherpaOnnxOfflineTtsPtr = ref.refType(ref.types.void);  
224 -  
225 -const SherpaOnnxDisplayPtr = ref.refType(ref.types.void);  
226 -  
227 -let soname;  
228 -if (os.platform() == "win32") {  
229 - // see https://nodejs.org/api/process.html#processarch  
230 - if (process.arch == "x64") {  
231 - let currentPath = process.env.Path;  
232 - let dllDirectory = path.resolve(path.join(__dirname, "lib", "win-x64"));  
233 - process.env.Path = currentPath + path.delimiter + dllDirectory;  
234 -  
235 - soname = path.join(__dirname, "lib", "win-x64", "sherpa-onnx-c-api.dll")  
236 - } else if (process.arch == "ia32") {  
237 - let currentPath = process.env.Path;  
238 - let dllDirectory = path.resolve(path.join(__dirname, "lib", "win-x86"));  
239 - process.env.Path = currentPath + path.delimiter + dllDirectory;  
240 -  
241 - soname = path.join(__dirname, "lib", "win-x86", "sherpa-onnx-c-api.dll")  
242 - } else {  
243 - throw new Error(  
244 - `Support only Windows x86 and x64 for now. Given ${process.arch}`);  
245 - }  
246 -} else if (os.platform() == "darwin") {  
247 - if (process.arch == "x64") {  
248 - soname =  
249 - path.join(__dirname, "lib", "osx-x64", "libsherpa-onnx-c-api.dylib");  
250 - } else if (process.arch == "arm64") {  
251 - soname =  
252 - path.join(__dirname, "lib", "osx-arm64", "libsherpa-onnx-c-api.dylib");  
253 - } else {  
254 - throw new Error(  
255 - `Support only macOS x64 and arm64 for now. Given ${process.arch}`);  
256 - }  
257 -} else if (os.platform() == "linux") {  
258 - if (process.arch == "x64") {  
259 - soname =  
260 - path.join(__dirname, "lib", "linux-x64", "libsherpa-onnx-c-api.so");  
261 - } else {  
262 - throw new Error(`Support only Linux x64 for now. Given ${process.arch}`);  
263 - }  
264 -} else {  
265 - throw new Error(`Unsupported platform ${os.platform()}`); 8 +function createOnlineRecognizer(config) {
  9 + return sherpa_onnx_asr.createOnlineRecognizer(wasmModule, config);
266 } 10 }
267 11
268 -if (!fs.existsSync(soname)) {  
269 - throw new Error(`Cannot find file ${soname}. Please make sure you have run  
270 - ./build.sh`); 12 +function createOfflineRecognizer(config) {
  13 + return new sherpa_onnx_asr.OfflineRecognizer(config, wasmModule);
271 } 14 }
272 15
273 -debug("soname ", soname)  
274 -  
275 -const libsherpa_onnx = ffi.Library(soname, {  
276 - // online asr  
277 - "CreateOnlineRecognizer" : [  
278 - SherpaOnnxOnlineRecognizerPtr, [ SherpaOnnxOnlineRecognizerConfigPtr ]  
279 - ],  
280 - "DestroyOnlineRecognizer" : [ "void", [ SherpaOnnxOnlineRecognizerPtr ] ],  
281 - "CreateOnlineStream" :  
282 - [ SherpaOnnxOnlineStreamPtr, [ SherpaOnnxOnlineRecognizerPtr ] ],  
283 - "CreateOnlineStreamWithHotwords" :  
284 - [ SherpaOnnxOnlineStreamPtr, [ SherpaOnnxOnlineRecognizerPtr, cstring ] ],  
285 - "DestroyOnlineStream" : [ "void", [ SherpaOnnxOnlineStreamPtr ] ],  
286 - "AcceptWaveform" :  
287 - [ "void", [ SherpaOnnxOnlineStreamPtr, int32_t, floatPtr, int32_t ] ],  
288 - "IsOnlineStreamReady" :  
289 - [ int32_t, [ SherpaOnnxOnlineRecognizerPtr, SherpaOnnxOnlineStreamPtr ] ],  
290 - "DecodeOnlineStream" :  
291 - [ "void", [ SherpaOnnxOnlineRecognizerPtr, SherpaOnnxOnlineStreamPtr ] ],  
292 - "DecodeMultipleOnlineStreams" : [  
293 - "void",  
294 - [ SherpaOnnxOnlineRecognizerPtr, SherpaOnnxOnlineStreamPtrPtr, int32_t ]  
295 - ],  
296 - "GetOnlineStreamResult" : [  
297 - SherpaOnnxOnlineRecognizerResultPtr,  
298 - [ SherpaOnnxOnlineRecognizerPtr, SherpaOnnxOnlineStreamPtr ]  
299 - ],  
300 - "DestroyOnlineRecognizerResult" :  
301 - [ "void", [ SherpaOnnxOnlineRecognizerResultPtr ] ],  
302 - "Reset" :  
303 - [ "void", [ SherpaOnnxOnlineRecognizerPtr, SherpaOnnxOnlineStreamPtr ] ],  
304 - "InputFinished" : [ "void", [ SherpaOnnxOnlineStreamPtr ] ],  
305 - "IsEndpoint" :  
306 - [ int32_t, [ SherpaOnnxOnlineRecognizerPtr, SherpaOnnxOnlineStreamPtr ] ],  
307 -  
308 - // offline asr  
309 - "CreateOfflineRecognizer" : [  
310 - SherpaOnnxOfflineRecognizerPtr, [ SherpaOnnxOfflineRecognizerConfigPtr ]  
311 - ],  
312 - "DestroyOfflineRecognizer" : [ "void", [ SherpaOnnxOfflineRecognizerPtr ] ],  
313 - "CreateOfflineStream" :  
314 - [ SherpaOnnxOfflineStreamPtr, [ SherpaOnnxOfflineRecognizerPtr ] ],  
315 - "DestroyOfflineStream" : [ "void", [ SherpaOnnxOfflineStreamPtr ] ],  
316 - "AcceptWaveformOffline" :  
317 - [ "void", [ SherpaOnnxOfflineStreamPtr, int32_t, floatPtr, int32_t ] ],  
318 - "DecodeOfflineStream" : [  
319 - "void", [ SherpaOnnxOfflineRecognizerPtr, SherpaOnnxOfflineStreamPtr ]  
320 - ],  
321 - "DecodeMultipleOfflineStreams" : [  
322 - "void",  
323 - [ SherpaOnnxOfflineRecognizerPtr, SherpaOnnxOfflineStreamPtrPtr, int32_t ]  
324 - ],  
325 - "GetOfflineStreamResult" :  
326 - [ SherpaOnnxOfflineRecognizerResultPtr, [ SherpaOnnxOfflineStreamPtr ] ],  
327 - "DestroyOfflineRecognizerResult" :  
328 - [ "void", [ SherpaOnnxOfflineRecognizerResultPtr ] ],  
329 -  
330 - // vad  
331 - "SherpaOnnxCreateCircularBuffer" :  
332 - [ SherpaOnnxCircularBufferPtr, [ int32_t ] ],  
333 - "SherpaOnnxDestroyCircularBuffer" :  
334 - [ "void", [ SherpaOnnxCircularBufferPtr ] ],  
335 - "SherpaOnnxCircularBufferPush" :  
336 - [ "void", [ SherpaOnnxCircularBufferPtr, floatPtr, int32_t ] ],  
337 - "SherpaOnnxCircularBufferGet" :  
338 - [ FloatArray, [ SherpaOnnxCircularBufferPtr, int32_t, int32_t ] ],  
339 - "SherpaOnnxCircularBufferFree" : [ "void", [ FloatArray ] ],  
340 - "SherpaOnnxCircularBufferPop" :  
341 - [ "void", [ SherpaOnnxCircularBufferPtr, int32_t ] ],  
342 - "SherpaOnnxCircularBufferSize" : [ int32_t, [ SherpaOnnxCircularBufferPtr ] ],  
343 - "SherpaOnnxCircularBufferHead" : [ int32_t, [ SherpaOnnxCircularBufferPtr ] ],  
344 - "SherpaOnnxCircularBufferReset" : [ "void", [ SherpaOnnxCircularBufferPtr ] ],  
345 - "SherpaOnnxCreateVoiceActivityDetector" : [  
346 - SherpaOnnxVoiceActivityDetectorPtr, [ SherpaOnnxVadModelConfigPtr, float ]  
347 - ],  
348 - "SherpaOnnxDestroyVoiceActivityDetector" :  
349 - [ "void", [ SherpaOnnxVoiceActivityDetectorPtr ] ],  
350 - "SherpaOnnxVoiceActivityDetectorAcceptWaveform" :  
351 - [ "void", [ SherpaOnnxVoiceActivityDetectorPtr, floatPtr, int32_t ] ],  
352 - "SherpaOnnxVoiceActivityDetectorEmpty" :  
353 - [ int32_t, [ SherpaOnnxVoiceActivityDetectorPtr ] ],  
354 - "SherpaOnnxVoiceActivityDetectorDetected" :  
355 - [ int32_t, [ SherpaOnnxVoiceActivityDetectorPtr ] ],  
356 - "SherpaOnnxVoiceActivityDetectorPop" :  
357 - [ "void", [ SherpaOnnxVoiceActivityDetectorPtr ] ],  
358 - "SherpaOnnxVoiceActivityDetectorClear" :  
359 - [ "void", [ SherpaOnnxVoiceActivityDetectorPtr ] ],  
360 - "SherpaOnnxVoiceActivityDetectorFront" :  
361 - [ SherpaOnnxSpeechSegmentPtr, [ SherpaOnnxVoiceActivityDetectorPtr ] ],  
362 - "SherpaOnnxDestroySpeechSegment" : [ "void", [ SherpaOnnxSpeechSegmentPtr ] ],  
363 - "SherpaOnnxVoiceActivityDetectorReset" :  
364 - [ "void", [ SherpaOnnxVoiceActivityDetectorPtr ] ],  
365 - // tts  
366 - "SherpaOnnxCreateOfflineTts" :  
367 - [ SherpaOnnxOfflineTtsPtr, [ SherpaOnnxOfflineTtsConfigPtr ] ],  
368 - "SherpaOnnxDestroyOfflineTts" : [ "void", [ SherpaOnnxOfflineTtsPtr ] ],  
369 - "SherpaOnnxOfflineTtsGenerate" : [  
370 - SherpaOnnxGeneratedAudioPtr,  
371 - [ SherpaOnnxOfflineTtsPtr, cstring, int32_t, float ]  
372 - ],  
373 - "SherpaOnnxDestroyOfflineTtsGeneratedAudio" :  
374 - [ "void", [ SherpaOnnxGeneratedAudioPtr ] ],  
375 - "SherpaOnnxWriteWave" : [ "void", [ floatPtr, int32_t, int32_t, cstring ] ],  
376 -  
377 - // display  
378 - "CreateDisplay" : [ SherpaOnnxDisplayPtr, [ int32_t ] ],  
379 - "DestroyDisplay" : [ "void", [ SherpaOnnxDisplayPtr ] ],  
380 - "SherpaOnnxPrint" : [ "void", [ SherpaOnnxDisplayPtr, int32_t, cstring ] ],  
381 -});  
382 -  
383 -class Display {  
384 - constructor(maxWordPerLine) {  
385 - this.handle = libsherpa_onnx.CreateDisplay(maxWordPerLine);  
386 - }  
387 - free() {  
388 - if (this.handle) {  
389 - libsherpa_onnx.DestroyDisplay(this.handle);  
390 - this.handle = null;  
391 - }  
392 - }  
393 -  
394 - print(idx, s) { libsherpa_onnx.SherpaOnnxPrint(this.handle, idx, s); }  
395 -};  
396 -  
397 -class OnlineResult {  
398 - constructor(text) { this.text = Buffer.from(text, "utf-8").toString(); }  
399 -};  
400 -  
401 -class OnlineStream {  
402 - constructor(handle) { this.handle = handle }  
403 -  
404 - free() {  
405 - if (this.handle) {  
406 - libsherpa_onnx.DestroyOnlineStream(this.handle);  
407 - this.handle = null;  
408 - }  
409 - }  
410 -  
411 - /**  
412 - * @param sampleRate {Number}  
413 - * @param samples {Float32Array} Containing samples in the range [-1, 1]  
414 - */  
415 - acceptWaveform(sampleRate, samples) {  
416 - libsherpa_onnx.AcceptWaveform(this.handle, sampleRate, samples,  
417 - samples.length);  
418 - }  
419 -};  
420 -  
421 -class OnlineRecognizer {  
422 - constructor(config) {  
423 - this.config = config;  
424 - this.recognizer_handle =  
425 - libsherpa_onnx.CreateOnlineRecognizer(config.ref());  
426 - }  
427 -  
428 - free() {  
429 - if (this.recognizer_handle) {  
430 - libsherpa_onnx.DestroyOnlineRecognizer(this.recognizer_handle);  
431 - this.recognizer_handle = null;  
432 - }  
433 - }  
434 -  
435 - createStream() {  
436 - let handle = libsherpa_onnx.CreateOnlineStream(this.recognizer_handle);  
437 - return new OnlineStream(handle);  
438 - }  
439 -  
440 - isReady(stream) {  
441 - return libsherpa_onnx.IsOnlineStreamReady(this.recognizer_handle,  
442 - stream.handle)  
443 - }  
444 -  
445 - isEndpoint(stream) {  
446 - return libsherpa_onnx.IsEndpoint(this.recognizer_handle, stream.handle);  
447 - }  
448 -  
449 - reset(stream) { libsherpa_onnx.Reset(this.recognizer_handle, stream.handle); }  
450 -  
451 - decode(stream) {  
452 - libsherpa_onnx.DecodeOnlineStream(this.recognizer_handle, stream.handle)  
453 - }  
454 -  
455 - getResult(stream) {  
456 - let handle = libsherpa_onnx.GetOnlineStreamResult(this.recognizer_handle,  
457 - stream.handle);  
458 - let r = handle.deref();  
459 - let ans = new OnlineResult(r.text);  
460 - libsherpa_onnx.DestroyOnlineRecognizerResult(handle);  
461 -  
462 - return ans  
463 - }  
464 -};  
465 -  
466 -class OfflineResult {  
467 - constructor(text) { this.text = Buffer.from(text, "utf-8").toString(); }  
468 -};  
469 -  
470 -class OfflineStream {  
471 - constructor(handle) { this.handle = handle }  
472 -  
473 - free() {  
474 - if (this.handle) {  
475 - libsherpa_onnx.DestroyOfflineStream(this.handle);  
476 - this.handle = null;  
477 - }  
478 - }  
479 -  
480 - /**  
481 - * @param sampleRate {Number}  
482 - * @param samples {Float32Array} Containing samples in the range [-1, 1]  
483 - */  
484 - acceptWaveform(sampleRate, samples) {  
485 - libsherpa_onnx.AcceptWaveformOffline(this.handle, sampleRate, samples,  
486 - samples.length);  
487 - }  
488 -};  
489 -  
490 -class OfflineRecognizer {  
491 - constructor(config) {  
492 - this.config = config;  
493 - this.recognizer_handle =  
494 - libsherpa_onnx.CreateOfflineRecognizer(config.ref());  
495 - }  
496 -  
497 - free() {  
498 - if (this.recognizer_handle) {  
499 - libsherpa_onnx.DestroyOfflineRecognizer(this.recognizer_handle);  
500 - this.recognizer_handle = null;  
501 - }  
502 - }  
503 -  
504 - createStream() {  
505 - let handle = libsherpa_onnx.CreateOfflineStream(this.recognizer_handle);  
506 - return new OfflineStream(handle);  
507 - }  
508 -  
509 - decode(stream) {  
510 - libsherpa_onnx.DecodeOfflineStream(this.recognizer_handle, stream.handle)  
511 - }  
512 -  
513 - getResult(stream) {  
514 - let handle = libsherpa_onnx.GetOfflineStreamResult(stream.handle);  
515 - let r = handle.deref();  
516 - let ans = new OfflineResult(r.text);  
517 - libsherpa_onnx.DestroyOfflineRecognizerResult(handle);  
518 -  
519 - return ans  
520 - }  
521 -};  
522 -  
523 -class SpeechSegment {  
524 - constructor(start, samples) {  
525 - this.start = start;  
526 - this.samples = samples;  
527 - }  
528 -};  
529 -  
530 -// this buffer holds only float entries.  
531 -class CircularBuffer {  
532 - /**  
533 - * @param capacity {int} The capacity of the circular buffer.  
534 - */  
535 - constructor(capacity) {  
536 - this.handle = libsherpa_onnx.SherpaOnnxCreateCircularBuffer(capacity);  
537 - }  
538 -  
539 - free() {  
540 - if (this.handle) {  
541 - libsherpa_onnx.SherpaOnnxDestroyCircularBuffer(this.handle);  
542 - this.handle = null;  
543 - }  
544 - }  
545 -  
546 - /**  
547 - * @param samples {Float32Array}  
548 - */  
549 - push(samples) {  
550 - libsherpa_onnx.SherpaOnnxCircularBufferPush(this.handle, samples,  
551 - samples.length);  
552 - }  
553 -  
554 - get(startIndex, n) {  
555 - let data =  
556 - libsherpa_onnx.SherpaOnnxCircularBufferGet(this.handle, startIndex, n);  
557 -  
558 - // https://tootallnate.github.io/ref/#exports-reinterpret  
559 - const buffer = data.buffer.reinterpret(n * ref.sizeof.float).buffer;  
560 -  
561 - // create a copy since we are going to free the buffer at the end  
562 - let s = new Float32Array(buffer).slice(0);  
563 - libsherpa_onnx.SherpaOnnxCircularBufferFree(data);  
564 - return s;  
565 - }  
566 -  
567 - pop(n) { libsherpa_onnx.SherpaOnnxCircularBufferPop(this.handle, n); }  
568 -  
569 - size() { return libsherpa_onnx.SherpaOnnxCircularBufferSize(this.handle); }  
570 -  
571 - head() { return libsherpa_onnx.SherpaOnnxCircularBufferHead(this.handle); }  
572 -  
573 - reset() { libsherpa_onnx.SherpaOnnxCircularBufferReset(this.handle); }  
574 -};  
575 -  
576 -class VoiceActivityDetector {  
577 - constructor(config, bufferSizeInSeconds) {  
578 - this.config = config;  
579 - this.handle = libsherpa_onnx.SherpaOnnxCreateVoiceActivityDetector(  
580 - config.ref(), bufferSizeInSeconds);  
581 - }  
582 -  
583 - free() {  
584 - if (this.handle) {  
585 - libsherpa_onnx.SherpaOnnxDestroyVoiceActivityDetector(this.handle);  
586 - }  
587 - }  
588 -  
589 - acceptWaveform(samples) {  
590 - libsherpa_onnx.SherpaOnnxVoiceActivityDetectorAcceptWaveform(  
591 - this.handle, samples, samples.length);  
592 - }  
593 -  
594 - isEmpty() {  
595 - return libsherpa_onnx.SherpaOnnxVoiceActivityDetectorEmpty(this.handle);  
596 - }  
597 -  
598 - isDetected() {  
599 - return libsherpa_onnx.SherpaOnnxVoiceActivityDetectorDetected(this.handle);  
600 - }  
601 - pop() { libsherpa_onnx.SherpaOnnxVoiceActivityDetectorPop(this.handle); }  
602 -  
603 - clear() { libsherpa_onnx.SherpaOnnxVoiceActivityDetectorClear(this.handle); }  
604 -  
605 - reset() { libsherpa_onnx.SherpaOnnxVoiceActivityDetectorReset(this.handle); }  
606 -  
607 - front() {  
608 - let segment =  
609 - libsherpa_onnx.SherpaOnnxVoiceActivityDetectorFront(this.handle);  
610 -  
611 - let buffer =  
612 - segment.deref()  
613 - .samples.buffer.reinterpret(segment.deref().n * ref.sizeof.float)  
614 - .buffer;  
615 -  
616 - let samples = new Float32Array(buffer).slice(0);  
617 - let ans = new SpeechSegment(segment.deref().start, samples);  
618 -  
619 - libsherpa_onnx.SherpaOnnxDestroySpeechSegment(segment);  
620 - return ans;  
621 - }  
622 -};  
623 -  
624 -class GeneratedAudio {  
625 - constructor(sampleRate, samples) {  
626 - this.sampleRate = sampleRate;  
627 - this.samples = samples;  
628 - }  
629 - save(filename) {  
630 - libsherpa_onnx.SherpaOnnxWriteWave(this.samples, this.samples.length,  
631 - this.sampleRate, filename);  
632 - }  
633 -};  
634 -  
635 -class OfflineTts {  
636 - constructor(config) {  
637 - this.config = config;  
638 - this.handle = libsherpa_onnx.SherpaOnnxCreateOfflineTts(config.ref());  
639 - }  
640 -  
641 - free() {  
642 - if (this.handle) {  
643 - libsherpa_onnx.SherpaOnnxDestroyOfflineTts(this.handle);  
644 - this.handle = null;  
645 - }  
646 - }  
647 - generate(text, sid, speed) {  
648 - let r = libsherpa_onnx.SherpaOnnxOfflineTtsGenerate(this.handle, text, sid,  
649 - speed);  
650 - const buffer =  
651 - r.deref()  
652 - .samples.buffer.reinterpret(r.deref().n * ref.sizeof.float)  
653 - .buffer;  
654 - let samples = new Float32Array(buffer).slice(0);  
655 - let sampleRate = r.deref().sampleRate;  
656 -  
657 - let generatedAudio = new GeneratedAudio(sampleRate, samples);  
658 -  
659 - libsherpa_onnx.SherpaOnnxDestroyOfflineTtsGeneratedAudio(r);  
660 -  
661 - return generatedAudio;  
662 - }  
663 -};  
664 -  
665 -// online asr  
666 -const OnlineTransducerModelConfig = SherpaOnnxOnlineTransducerModelConfig;  
667 -const OnlineModelConfig = SherpaOnnxOnlineModelConfig;  
668 -const FeatureConfig = SherpaOnnxFeatureConfig;  
669 -const OnlineRecognizerConfig = SherpaOnnxOnlineRecognizerConfig;  
670 -const OnlineParaformerModelConfig = SherpaOnnxOnlineParaformerModelConfig;  
671 -const OnlineZipformer2CtcModelConfig = SherpaOnnxOnlineZipformer2CtcModelConfig;  
672 -  
673 -// offline asr  
674 -const OfflineTransducerModelConfig = SherpaOnnxOfflineTransducerModelConfig;  
675 -const OfflineModelConfig = SherpaOnnxOfflineModelConfig;  
676 -const OfflineRecognizerConfig = SherpaOnnxOfflineRecognizerConfig;  
677 -const OfflineParaformerModelConfig = SherpaOnnxOfflineParaformerModelConfig;  
678 -const OfflineWhisperModelConfig = SherpaOnnxOfflineWhisperModelConfig;  
679 -const OfflineNemoEncDecCtcModelConfig =  
680 - SherpaOnnxOfflineNemoEncDecCtcModelConfig;  
681 -const OfflineTdnnModelConfig = SherpaOnnxOfflineTdnnModelConfig;  
682 -  
683 -// vad  
684 -const SileroVadModelConfig = SherpaOnnxSileroVadModelConfig;  
685 -const VadModelConfig = SherpaOnnxVadModelConfig;  
686 -  
687 -// tts  
688 -const OfflineTtsVitsModelConfig = SherpaOnnxOfflineTtsVitsModelConfig;  
689 -const OfflineTtsModelConfig = SherpaOnnxOfflineTtsModelConfig;  
690 -const OfflineTtsConfig = SherpaOnnxOfflineTtsConfig; 16 +function createOfflineTts(config) {
  17 + return sherpa_onnx_tts.createOfflineTts(wasmModule, config);
  18 +}
691 19
  20 +// Note: online means streaming and offline means non-streaming here.
  21 +// Both of them don't require internet connection.
692 module.exports = { 22 module.exports = {
693 - // online asr  
694 - OnlineTransducerModelConfig,  
695 - OnlineModelConfig,  
696 - FeatureConfig,  
697 - OnlineRecognizerConfig,  
698 - OnlineRecognizer,  
699 - OnlineStream,  
700 - OnlineParaformerModelConfig,  
701 - OnlineZipformer2CtcModelConfig,  
702 -  
703 - // offline asr  
704 - OfflineRecognizer,  
705 - OfflineStream,  
706 - OfflineTransducerModelConfig,  
707 - OfflineModelConfig,  
708 - OfflineRecognizerConfig,  
709 - OfflineParaformerModelConfig,  
710 - OfflineWhisperModelConfig,  
711 - OfflineNemoEncDecCtcModelConfig,  
712 - OfflineTdnnModelConfig,  
713 - // vad  
714 - SileroVadModelConfig,  
715 - VadModelConfig,  
716 - CircularBuffer,  
717 - VoiceActivityDetector,  
718 - // tts  
719 - OfflineTtsVitsModelConfig,  
720 - OfflineTtsModelConfig,  
721 - OfflineTtsConfig,  
722 - OfflineTts,  
723 -  
724 - //  
725 - Display, 23 + createOnlineRecognizer,
  24 + createOfflineRecognizer,
  25 + createOfflineTts,
726 }; 26 };
1 { 1 {
2 - "name": "sherpa-onnx2",  
3 - "version": "1.8.10",  
4 - "description": "Real-time speech recognition with Next-gen Kaldi", 2 + "name": "sherpa-onnx",
  3 + "version": "SHERPA_ONNX_VERSION",
  4 + "description": "Speech-to-text and text-to-speech using Next-gen Kaldi without internet connection",
5 "main": "index.js", 5 "main": "index.js",
6 "scripts": { 6 "scripts": {
7 "test": "echo \"Error: no test specified\" && exit 1" 7 "test": "echo \"Error: no test specified\" && exit 1"
@@ -11,15 +11,30 @@ @@ -11,15 +11,30 @@
11 "url": "git+https://github.com/k2-fsa/sherpa-onnx.git" 11 "url": "git+https://github.com/k2-fsa/sherpa-onnx.git"
12 }, 12 },
13 "keywords": [ 13 "keywords": [
14 - "speech-to-text",  
15 - "text-to-speech", 14 + "speech to text",
  15 + "text to speech",
  16 + "transcription",
16 "real-time speech recognition", 17 "real-time speech recognition",
17 - "without internet connect", 18 + "without internet connection",
18 "embedded systems", 19 "embedded systems",
19 "open source", 20 "open source",
20 "zipformer", 21 "zipformer",
21 "asr", 22 "asr",
22 - "speech" 23 + "tts",
  24 + "stt",
  25 + "c++",
  26 + "onnxruntime",
  27 + "onnx",
  28 + "ai",
  29 + "next-gen kaldi",
  30 + "offline",
  31 + "privacy",
  32 + "open source",
  33 + "streaming speech recognition",
  34 + "speech",
  35 + "recognition",
  36 + "WebAssembly",
  37 + "wasm"
23 ], 38 ],
24 "author": "The next-gen Kaldi team", 39 "author": "The next-gen Kaldi team",
25 "license": "Apache-2.0", 40 "license": "Apache-2.0",
@@ -28,10 +43,5 @@ @@ -28,10 +43,5 @@
28 }, 43 },
29 "homepage": "https://github.com/k2-fsa/sherpa-onnx#readme", 44 "homepage": "https://github.com/k2-fsa/sherpa-onnx#readme",
30 "dependencies": { 45 "dependencies": {
31 - "ffi-napi": "^4.0.3",  
32 - "npm": "^6.14.18",  
33 - "ref-array-napi": "^1.2.2",  
34 - "ref-napi": "^3.0.3",  
35 - "ref-struct-napi": "^1.1.1"  
36 } 46 }
37 } 47 }
1 -{  
2 - "name": "sherpa-onnx",  
3 - "version": "SHERPA_ONNX_VERSION",  
4 - "description": "Real-time speech recognition with Next-gen Kaldi",  
5 - "main": "index.js",  
6 - "scripts": {  
7 - "test": "echo \"Error: no test specified\" && exit 1"  
8 - },  
9 - "repository": {  
10 - "type": "git",  
11 - "url": "git+https://github.com/k2-fsa/sherpa-onnx.git"  
12 - },  
13 - "keywords": [  
14 - "speech to text",  
15 - "text to speech",  
16 - "transcription",  
17 - "real-time speech recognition",  
18 - "without internet connect",  
19 - "embedded systems",  
20 - "open source",  
21 - "zipformer",  
22 - "asr",  
23 - "tts",  
24 - "stt",  
25 - "c++",  
26 - "onnxruntime",  
27 - "onnx",  
28 - "ai",  
29 - "next-gen kaldi",  
30 - "offline",  
31 - "privacy",  
32 - "open source",  
33 - "streaming speech recognition",  
34 - "speech",  
35 - "recognition"  
36 - ],  
37 - "author": "The next-gen Kaldi team",  
38 - "license": "Apache-2.0",  
39 - "bugs": {  
40 - "url": "https://github.com/k2-fsa/sherpa-onnx/issues"  
41 - },  
42 - "homepage": "https://github.com/k2-fsa/sherpa-onnx#readme",  
43 - "dependencies": {  
44 - "ffi-napi": "^4.0.3",  
45 - "npm": "^6.14.18",  
46 - "ref-array-napi": "^1.2.2",  
47 - "ref-napi": "^3.0.3",  
48 - "ref-struct-napi": "^1.1.1"  
49 - }  
50 -}  
1 -#!/usr/bin/env bash  
2 -set -ex  
3 -  
4 -SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )  
5 -SHERPA_ONNX_DIR=$(realpath $SCRIPT_DIR/../..)  
6 -echo "SCRIPT_DIR: $SCRIPT_DIR"  
7 -echo "SHERPA_ONNX_DIR: $SHERPA_ONNX_DIR"  
8 -  
9 -SHERPA_ONNX_VERSION=$(grep "SHERPA_ONNX_VERSION" $SHERPA_ONNX_DIR/CMakeLists.txt | cut -d " " -f 2 | cut -d '"' -f 2)  
10 -  
11 -echo "SHERPA_ONNX_VERSION $SHERPA_ONNX_VERSION"  
12 -sed -i.bak s/SHERPA_ONNX_VERSION/$SHERPA_ONNX_VERSION/g ./package.json.in  
13 -  
14 -cp package.json.in package.json  
15 -rm package.json.in  
16 -rm package.json.in.bak  
17 -rm .clang-format  
18 -  
19 -function windows_x64() {  
20 - echo "Process Windows (x64)"  
21 - mkdir -p lib/win-x64  
22 - dst=$(realpath lib/win-x64)  
23 - mkdir t  
24 - cd t  
25 - wget -q https://huggingface.co/csukuangfj/sherpa-onnx-wheels/resolve/main/sherpa_onnx-${SHERPA_ONNX_VERSION}-cp38-cp38-win_amd64.whl  
26 - unzip ./sherpa_onnx-${SHERPA_ONNX_VERSION}-cp38-cp38-win_amd64.whl  
27 -  
28 - cp -v sherpa_onnx-${SHERPA_ONNX_VERSION}.data/data/bin/*.dll $dst  
29 - cp -v sherpa_onnx-${SHERPA_ONNX_VERSION}.data/data/bin/*.lib $dst  
30 - rm -fv $dst/sherpa-onnx-portaudio.dll  
31 -  
32 - cd ..  
33 - rm -rf t  
34 -}  
35 -  
36 -function windows_x86() {  
37 - echo "Process Windows (x86)"  
38 - mkdir -p lib/win-x86  
39 - dst=$(realpath lib/win-x86)  
40 - mkdir t  
41 - cd t  
42 - wget -q https://huggingface.co/csukuangfj/sherpa-onnx-wheels/resolve/main/sherpa_onnx-${SHERPA_ONNX_VERSION}-cp38-cp38-win32.whl  
43 - unzip ./sherpa_onnx-${SHERPA_ONNX_VERSION}-cp38-cp38-win32.whl  
44 -  
45 - cp -v sherpa_onnx-${SHERPA_ONNX_VERSION}.data/data/bin/*.dll $dst  
46 - cp -v sherpa_onnx-${SHERPA_ONNX_VERSION}.data/data/bin/*.lib $dst  
47 - rm -fv $dst/sherpa-onnx-portaudio.dll  
48 -  
49 - cd ..  
50 - rm -rf t  
51 -}  
52 -  
53 -function linux_x64() {  
54 - echo "Process Linux (x64)"  
55 - mkdir -p lib/linux-x64  
56 - dst=$(realpath lib/linux-x64)  
57 - mkdir t  
58 - cd t  
59 - wget -q https://huggingface.co/csukuangfj/sherpa-onnx-wheels/resolve/main/sherpa_onnx-${SHERPA_ONNX_VERSION}-cp38-cp38-manylinux_2_28_x86_64.whl  
60 - unzip ./sherpa_onnx-${SHERPA_ONNX_VERSION}-cp38-cp38-manylinux_2_28_x86_64.whl  
61 -  
62 - cp -v sherpa_onnx/lib/*.so* $dst  
63 - rm -v $dst/libcargs.so  
64 - rm -v $dst/libsherpa-onnx-portaudio.so  
65 - rm -v $dst/libsherpa-onnx-fst.so  
66 - rm -v $dst/libonnxruntime.so  
67 -  
68 - cd ..  
69 - rm -rf t  
70 -}  
71 -  
72 -function osx_x64() {  
73 - echo "Process osx-x64"  
74 - mkdir -p lib/osx-x64  
75 - dst=$(realpath lib/osx-x64)  
76 - mkdir t  
77 - cd t  
78 - wget -q https://huggingface.co/csukuangfj/sherpa-onnx-wheels/resolve/main/sherpa_onnx-${SHERPA_ONNX_VERSION}-cp38-cp38-macosx_11_0_x86_64.whl  
79 - unzip ./sherpa_onnx-${SHERPA_ONNX_VERSION}-cp38-cp38-macosx_11_0_x86_64.whl  
80 -  
81 - cp -v sherpa_onnx/lib/*.dylib $dst/  
82 - rm -v $dst/libonnxruntime.dylib  
83 - rm -v $dst/libcargs.dylib  
84 - rm -v $dst/libsherpa-onnx-fst.dylib  
85 - rm -v $dst/libsherpa-onnx-portaudio.dylib  
86 -  
87 - cd ..  
88 - rm -rf t  
89 -}  
90 -  
91 -function osx_arm64() {  
92 - echo "Process osx-arm64"  
93 - mkdir -p lib/osx-arm64  
94 - dst=$(realpath lib/osx-arm64)  
95 - mkdir t  
96 - cd t  
97 - wget -q https://huggingface.co/csukuangfj/sherpa-onnx-wheels/resolve/main/sherpa_onnx-${SHERPA_ONNX_VERSION}-cp38-cp38-macosx_11_0_arm64.whl  
98 - unzip ./sherpa_onnx-${SHERPA_ONNX_VERSION}-cp38-cp38-macosx_11_0_arm64.whl  
99 -  
100 - cp -v sherpa_onnx/lib/*.dylib $dst/  
101 - rm -v $dst/libonnxruntime.dylib  
102 - rm -v $dst/libcargs.dylib  
103 - rm -v $dst/libsherpa-onnx-fst.dylib  
104 - rm -v $dst/libsherpa-onnx-portaudio.dylib  
105 -  
106 - cd ..  
107 - rm -rf t  
108 -}  
109 -  
110 -windows_x64  
111 -ls -lh lib/win-x64  
112 -  
113 -windows_x86  
114 -ls -lh lib/win-x86  
115 -  
116 -linux_x64  
117 -ls -lh lib/linux-x64  
118 -  
119 -osx_x64  
120 -ls -lh lib/osx-x64  
121 -  
122 -osx_arm64  
123 -ls -lh lib/osx-arm64  
@@ -94,6 +94,11 @@ SherpaOnnxOnlineRecognizer *CreateOnlineRecognizer( @@ -94,6 +94,11 @@ SherpaOnnxOnlineRecognizer *CreateOnlineRecognizer(
94 SHERPA_ONNX_LOGE("%s\n", recognizer_config.ToString().c_str()); 94 SHERPA_ONNX_LOGE("%s\n", recognizer_config.ToString().c_str());
95 } 95 }
96 96
  97 + if (!recognizer_config.Validate()) {
  98 + SHERPA_ONNX_LOGE("Errors in config!");
  99 + return nullptr;
  100 + }
  101 +
97 SherpaOnnxOnlineRecognizer *recognizer = new SherpaOnnxOnlineRecognizer; 102 SherpaOnnxOnlineRecognizer *recognizer = new SherpaOnnxOnlineRecognizer;
98 103
99 recognizer->impl = 104 recognizer->impl =
@@ -324,6 +329,11 @@ SherpaOnnxOfflineRecognizer *CreateOfflineRecognizer( @@ -324,6 +329,11 @@ SherpaOnnxOfflineRecognizer *CreateOfflineRecognizer(
324 SHERPA_ONNX_LOGE("%s", recognizer_config.ToString().c_str()); 329 SHERPA_ONNX_LOGE("%s", recognizer_config.ToString().c_str());
325 } 330 }
326 331
  332 + if (!recognizer_config.Validate()) {
  333 + SHERPA_ONNX_LOGE("Errors in config");
  334 + return nullptr;
  335 + }
  336 +
327 SherpaOnnxOfflineRecognizer *recognizer = new SherpaOnnxOfflineRecognizer; 337 SherpaOnnxOfflineRecognizer *recognizer = new SherpaOnnxOfflineRecognizer;
328 338
329 recognizer->impl = 339 recognizer->impl =
@@ -480,6 +490,11 @@ SherpaOnnxVoiceActivityDetector *SherpaOnnxCreateVoiceActivityDetector( @@ -480,6 +490,11 @@ SherpaOnnxVoiceActivityDetector *SherpaOnnxCreateVoiceActivityDetector(
480 SHERPA_ONNX_LOGE("%s", vad_config.ToString().c_str()); 490 SHERPA_ONNX_LOGE("%s", vad_config.ToString().c_str());
481 } 491 }
482 492
  493 + if (!vad_config.Validate()) {
  494 + SHERPA_ONNX_LOGE("Errors in config");
  495 + return nullptr;
  496 + }
  497 +
483 SherpaOnnxVoiceActivityDetector *p = new SherpaOnnxVoiceActivityDetector; 498 SherpaOnnxVoiceActivityDetector *p = new SherpaOnnxVoiceActivityDetector;
484 p->impl = std::make_unique<sherpa_onnx::VoiceActivityDetector>( 499 p->impl = std::make_unique<sherpa_onnx::VoiceActivityDetector>(
485 vad_config, buffer_size_in_seconds); 500 vad_config, buffer_size_in_seconds);
@@ -570,6 +585,11 @@ SherpaOnnxOfflineTts *SherpaOnnxCreateOfflineTts( @@ -570,6 +585,11 @@ SherpaOnnxOfflineTts *SherpaOnnxCreateOfflineTts(
570 SHERPA_ONNX_LOGE("%s\n", tts_config.ToString().c_str()); 585 SHERPA_ONNX_LOGE("%s\n", tts_config.ToString().c_str());
571 } 586 }
572 587
  588 + if (!tts_config.Validate()) {
  589 + SHERPA_ONNX_LOGE("Errors in config");
  590 + return nullptr;
  591 + }
  592 +
573 SherpaOnnxOfflineTts *tts = new SherpaOnnxOfflineTts; 593 SherpaOnnxOfflineTts *tts = new SherpaOnnxOfflineTts;
574 594
575 tts->impl = std::make_unique<sherpa_onnx::OfflineTts>(tts_config); 595 tts->impl = std::make_unique<sherpa_onnx::OfflineTts>(tts_config);
@@ -5,3 +5,7 @@ endif() @@ -5,3 +5,7 @@ endif()
5 if(SHERPA_ONNX_ENABLE_WASM_ASR) 5 if(SHERPA_ONNX_ENABLE_WASM_ASR)
6 add_subdirectory(asr) 6 add_subdirectory(asr)
7 endif() 7 endif()
  8 +
  9 +if(SHERPA_ONNX_ENABLE_WASM_NODEJS)
  10 + add_subdirectory(nodejs)
  11 +endif()
@@ -45,7 +45,7 @@ Module.onRuntimeInitialized = function() { @@ -45,7 +45,7 @@ Module.onRuntimeInitialized = function() {
45 45
46 startBtn.disabled = false; 46 startBtn.disabled = false;
47 47
48 - recognizer = createRecognizer(); 48 + recognizer = createOnlineRecognizer(Module);
49 console.log('recognizer is created!', recognizer); 49 console.log('recognizer is created!', recognizer);
50 }; 50 };
51 51
1 -function freeConfig(config) { 1 +function freeConfig(config, Module) {
2 if ('buffer' in config) { 2 if ('buffer' in config) {
3 - _free(config.buffer); 3 + Module._free(config.buffer);
4 } 4 }
5 5
6 if ('config' in config) { 6 if ('config' in config) {
7 - freeConfig(config.config) 7 + freeConfig(config.config, Module)
8 } 8 }
9 9
10 if ('transducer' in config) { 10 if ('transducer' in config) {
11 - freeConfig(config.transducer) 11 + freeConfig(config.transducer, Module)
12 } 12 }
13 13
14 if ('paraformer' in config) { 14 if ('paraformer' in config) {
15 - freeConfig(config.paraformer) 15 + freeConfig(config.paraformer, Module)
16 } 16 }
17 17
18 if ('ctc' in config) { 18 if ('ctc' in config) {
19 - freeConfig(config.ctc) 19 + freeConfig(config.ctc, Module)
20 } 20 }
21 21
22 if ('feat' in config) { 22 if ('feat' in config) {
23 - freeConfig(config.feat) 23 + freeConfig(config.feat, Module)
24 } 24 }
25 25
26 if ('model' in config) { 26 if ('model' in config) {
27 - freeConfig(config.model) 27 + freeConfig(config.model, Module)
28 } 28 }
29 29
30 - _free(config.ptr); 30 + if ('nemoCtc' in config) {
  31 + freeConfig(config.nemoCtc, Module)
  32 + }
  33 +
  34 + if ('whisper' in config) {
  35 + freeConfig(config.whisper, Module)
  36 + }
  37 +
  38 + if ('tdnn' in config) {
  39 + freeConfig(config.tdnn, Module)
  40 + }
  41 +
  42 + if ('lm' in config) {
  43 + freeConfig(config.lm, Module)
  44 + }
  45 +
  46 + Module._free(config.ptr);
31 } 47 }
32 48
33 // The user should free the returned pointers 49 // The user should free the returned pointers
34 -function initSherpaOnnxOnlineTransducerModelConfig(config) {  
35 - let encoderLen = lengthBytesUTF8(config.encoder) + 1;  
36 - let decoderLen = lengthBytesUTF8(config.decoder) + 1;  
37 - let joinerLen = lengthBytesUTF8(config.joiner) + 1; 50 +function initSherpaOnnxOnlineTransducerModelConfig(config, Module) {
  51 + const encoderLen = Module.lengthBytesUTF8(config.encoder) + 1;
  52 + const decoderLen = Module.lengthBytesUTF8(config.decoder) + 1;
  53 + const joinerLen = Module.lengthBytesUTF8(config.joiner) + 1;
38 54
39 - let n = encoderLen + decoderLen + joinerLen; 55 + const n = encoderLen + decoderLen + joinerLen;
40 56
41 - let buffer = _malloc(n); 57 + const buffer = Module._malloc(n);
42 58
43 - let len = 3 * 4; // 3 pointers  
44 - let ptr = _malloc(len); 59 + const len = 3 * 4; // 3 pointers
  60 + const ptr = Module._malloc(len);
45 61
46 let offset = 0; 62 let offset = 0;
47 - stringToUTF8(config.encoder, buffer + offset, encoderLen); 63 + Module.stringToUTF8(config.encoder, buffer + offset, encoderLen);
48 offset += encoderLen; 64 offset += encoderLen;
49 65
50 - stringToUTF8(config.decoder, buffer + offset, decoderLen); 66 + Module.stringToUTF8(config.decoder, buffer + offset, decoderLen);
51 offset += decoderLen; 67 offset += decoderLen;
52 68
53 - stringToUTF8(config.joiner, buffer + offset, joinerLen); 69 + Module.stringToUTF8(config.joiner, buffer + offset, joinerLen);
54 70
55 offset = 0; 71 offset = 0;
56 - setValue(ptr, buffer + offset, 'i8*'); 72 + Module.setValue(ptr, buffer + offset, 'i8*');
57 offset += encoderLen; 73 offset += encoderLen;
58 74
59 - setValue(ptr + 4, buffer + offset, 'i8*'); 75 + Module.setValue(ptr + 4, buffer + offset, 'i8*');
60 offset += decoderLen; 76 offset += decoderLen;
61 77
62 - setValue(ptr + 8, buffer + offset, 'i8*'); 78 + Module.setValue(ptr + 8, buffer + offset, 'i8*');
63 79
64 return { 80 return {
65 buffer: buffer, ptr: ptr, len: len, 81 buffer: buffer, ptr: ptr, len: len,
66 } 82 }
67 } 83 }
68 84
69 -function initSherpaOnnxOnlineParaformerModelConfig(config) {  
70 - let encoderLen = lengthBytesUTF8(config.encoder) + 1;  
71 - let decoderLen = lengthBytesUTF8(config.decoder) + 1; 85 +function initSherpaOnnxOnlineParaformerModelConfig(config, Module) {
  86 + const encoderLen = Module.lengthBytesUTF8(config.encoder) + 1;
  87 + const decoderLen = Module.lengthBytesUTF8(config.decoder) + 1;
72 88
73 - let n = encoderLen + decoderLen;  
74 - let buffer = _malloc(n); 89 + const n = encoderLen + decoderLen;
  90 + const buffer = Module._malloc(n);
75 91
76 - let len = 2 * 4; // 2 pointers  
77 - let ptr = _malloc(len); 92 + const len = 2 * 4; // 2 pointers
  93 + const ptr = Module._malloc(len);
78 94
79 let offset = 0; 95 let offset = 0;
80 - stringToUTF8(config.encoder, buffer + offset, encoderLen); 96 + Module.stringToUTF8(config.encoder, buffer + offset, encoderLen);
81 offset += encoderLen; 97 offset += encoderLen;
82 98
83 - stringToUTF8(config.decoder, buffer + offset, decoderLen); 99 + Module.stringToUTF8(config.decoder, buffer + offset, decoderLen);
84 100
85 offset = 0; 101 offset = 0;
86 - setValue(ptr, buffer + offset, 'i8*'); 102 + Module.setValue(ptr, buffer + offset, 'i8*');
87 offset += encoderLen; 103 offset += encoderLen;
88 104
89 - setValue(ptr + 4, buffer + offset, 'i8*'); 105 + Module.setValue(ptr + 4, buffer + offset, 'i8*');
90 106
91 return { 107 return {
92 buffer: buffer, ptr: ptr, len: len, 108 buffer: buffer, ptr: ptr, len: len,
93 } 109 }
94 } 110 }
95 111
96 -function initSherpaOnnxOnlineZipformer2CtcModelConfig(config) {  
97 - let n = lengthBytesUTF8(config.model) + 1;  
98 - let buffer = _malloc(n); 112 +function initSherpaOnnxOnlineZipformer2CtcModelConfig(config, Module) {
  113 + const n = Module.lengthBytesUTF8(config.model) + 1;
  114 + const buffer = Module._malloc(n);
99 115
100 - let len = 1 * 4; // 1 pointer  
101 - let ptr = _malloc(len); 116 + const len = 1 * 4; // 1 pointer
  117 + const ptr = Module._malloc(len);
102 118
103 - stringToUTF8(config.model, buffer, n); 119 + Module.stringToUTF8(config.model, buffer, n);
104 120
105 - setValue(ptr, buffer, 'i8*'); 121 + Module.setValue(ptr, buffer, 'i8*');
106 122
107 return { 123 return {
108 buffer: buffer, ptr: ptr, len: len, 124 buffer: buffer, ptr: ptr, len: len,
109 } 125 }
110 } 126 }
111 127
112 -function initSherpaOnnxOnlineModelConfig(config) {  
113 - let transducer = initSherpaOnnxOnlineTransducerModelConfig(config.transducer);  
114 - let paraformer = initSherpaOnnxOnlineParaformerModelConfig(config.paraformer);  
115 - let ctc = initSherpaOnnxOnlineZipformer2CtcModelConfig(config.zipformer2Ctc); 128 +function initSherpaOnnxOnlineModelConfig(config, Module) {
  129 + const transducer =
  130 + initSherpaOnnxOnlineTransducerModelConfig(config.transducer, Module);
  131 + const paraformer =
  132 + initSherpaOnnxOnlineParaformerModelConfig(config.paraformer, Module);
  133 + const ctc = initSherpaOnnxOnlineZipformer2CtcModelConfig(
  134 + config.zipformer2Ctc, Module);
116 135
117 - let len = transducer.len + paraformer.len + ctc.len + 5 * 4;  
118 - let ptr = _malloc(len); 136 + const len = transducer.len + paraformer.len + ctc.len + 5 * 4;
  137 + const ptr = Module._malloc(len);
119 138
120 let offset = 0; 139 let offset = 0;
121 - _CopyHeap(transducer.ptr, transducer.len, ptr + offset); 140 + Module._CopyHeap(transducer.ptr, transducer.len, ptr + offset);
122 offset += transducer.len; 141 offset += transducer.len;
123 142
124 - _CopyHeap(paraformer.ptr, paraformer.len, ptr + offset); 143 + Module._CopyHeap(paraformer.ptr, paraformer.len, ptr + offset);
125 offset += paraformer.len; 144 offset += paraformer.len;
126 145
127 - _CopyHeap(ctc.ptr, ctc.len, ptr + offset); 146 + Module._CopyHeap(ctc.ptr, ctc.len, ptr + offset);
128 offset += ctc.len; 147 offset += ctc.len;
129 148
130 - let tokensLen = lengthBytesUTF8(config.tokens) + 1;  
131 - let providerLen = lengthBytesUTF8(config.provider) + 1;  
132 - let modelTypeLen = lengthBytesUTF8(config.modelType) + 1;  
133 - let bufferLen = tokensLen + providerLen + modelTypeLen;  
134 - let buffer = _malloc(bufferLen); 149 + const tokensLen = Module.lengthBytesUTF8(config.tokens) + 1;
  150 + const providerLen = Module.lengthBytesUTF8(config.provider) + 1;
  151 + const modelTypeLen = Module.lengthBytesUTF8(config.modelType) + 1;
  152 + const bufferLen = tokensLen + providerLen + modelTypeLen;
  153 + const buffer = Module._malloc(bufferLen);
135 154
136 offset = 0; 155 offset = 0;
137 - stringToUTF8(config.tokens, buffer, tokensLen); 156 + Module.stringToUTF8(config.tokens, buffer, tokensLen);
138 offset += tokensLen; 157 offset += tokensLen;
139 158
140 - stringToUTF8(config.provider, buffer + offset, providerLen); 159 + Module.stringToUTF8(config.provider, buffer + offset, providerLen);
141 offset += providerLen; 160 offset += providerLen;
142 161
143 - stringToUTF8(config.modelType, buffer + offset, modelTypeLen); 162 + Module.stringToUTF8(config.modelType, buffer + offset, modelTypeLen);
144 163
145 offset = transducer.len + paraformer.len + ctc.len; 164 offset = transducer.len + paraformer.len + ctc.len;
146 - setValue(ptr + offset, buffer, 'i8*'); // tokens 165 + Module.setValue(ptr + offset, buffer, 'i8*'); // tokens
147 offset += 4; 166 offset += 4;
148 167
149 - setValue(ptr + offset, config.numThreads, 'i32'); 168 + Module.setValue(ptr + offset, config.numThreads, 'i32');
150 offset += 4; 169 offset += 4;
151 170
152 - setValue(ptr + offset, buffer + tokensLen, 'i8*'); // provider 171 + Module.setValue(ptr + offset, buffer + tokensLen, 'i8*'); // provider
153 offset += 4; 172 offset += 4;
154 173
155 - setValue(ptr + offset, config.debug, 'i32'); 174 + Module.setValue(ptr + offset, config.debug, 'i32');
156 offset += 4; 175 offset += 4;
157 176
158 - setValue(ptr + offset, buffer + tokensLen + providerLen, 'i8*'); // modelType 177 + Module.setValue(
  178 + ptr + offset, buffer + tokensLen + providerLen, 'i8*'); // modelType
159 offset += 4; 179 offset += 4;
160 180
161 return { 181 return {
@@ -164,63 +184,63 @@ function initSherpaOnnxOnlineModelConfig(config) { @@ -164,63 +184,63 @@ function initSherpaOnnxOnlineModelConfig(config) {
164 } 184 }
165 } 185 }
166 186
167 -function initSherpaOnnxFeatureConfig(config) {  
168 - let len = 2 * 4; // 2 pointers  
169 - let ptr = _malloc(len); 187 +function initSherpaOnnxFeatureConfig(config, Module) {
  188 + const len = 2 * 4; // 2 pointers
  189 + const ptr = Module._malloc(len);
170 190
171 - setValue(ptr, config.sampleRate, 'i32');  
172 - setValue(ptr + 4, config.featureDim, 'i32'); 191 + Module.setValue(ptr, config.sampleRate, 'i32');
  192 + Module.setValue(ptr + 4, config.featureDim, 'i32');
173 return {ptr: ptr, len: len}; 193 return {ptr: ptr, len: len};
174 } 194 }
175 195
176 -function initSherpaOnnxOnlineRecognizerConfig(config) {  
177 - let feat = initSherpaOnnxFeatureConfig(config.featConfig);  
178 - let model = initSherpaOnnxOnlineModelConfig(config.modelConfig); 196 +function initSherpaOnnxOnlineRecognizerConfig(config, Module) {
  197 + const feat = initSherpaOnnxFeatureConfig(config.featConfig, Module);
  198 + const model = initSherpaOnnxOnlineModelConfig(config.modelConfig, Module);
179 199
180 - let len = feat.len + model.len + 8 * 4;  
181 - let ptr = _malloc(len); 200 + const len = feat.len + model.len + 8 * 4;
  201 + const ptr = Module._malloc(len);
182 202
183 let offset = 0; 203 let offset = 0;
184 - _CopyHeap(feat.ptr, feat.len, ptr + offset); 204 + Module._CopyHeap(feat.ptr, feat.len, ptr + offset);
185 offset += feat.len; 205 offset += feat.len;
186 206
187 - _CopyHeap(model.ptr, model.len, ptr + offset); 207 + Module._CopyHeap(model.ptr, model.len, ptr + offset);
188 offset += model.len; 208 offset += model.len;
189 209
190 - let decodingMethodLen = lengthBytesUTF8(config.decodingMethod) + 1;  
191 - let hotwordsFileLen = lengthBytesUTF8(config.hotwordsFile) + 1;  
192 - let bufferLen = decodingMethodLen + hotwordsFileLen;  
193 - let buffer = _malloc(bufferLen); 210 + const decodingMethodLen = Module.lengthBytesUTF8(config.decodingMethod) + 1;
  211 + const hotwordsFileLen = Module.lengthBytesUTF8(config.hotwordsFile) + 1;
  212 + const bufferLen = decodingMethodLen + hotwordsFileLen;
  213 + const buffer = Module._malloc(bufferLen);
194 214
195 offset = 0; 215 offset = 0;
196 - stringToUTF8(config.decodingMethod, buffer, decodingMethodLen); 216 + Module.stringToUTF8(config.decodingMethod, buffer, decodingMethodLen);
197 offset += decodingMethodLen; 217 offset += decodingMethodLen;
198 218
199 - stringToUTF8(config.hotwordsFile, buffer + offset, hotwordsFileLen); 219 + Module.stringToUTF8(config.hotwordsFile, buffer + offset, hotwordsFileLen);
200 220
201 offset = feat.len + model.len; 221 offset = feat.len + model.len;
202 - setValue(ptr + offset, buffer, 'i8*'); // decoding method 222 + Module.setValue(ptr + offset, buffer, 'i8*'); // decoding method
203 offset += 4; 223 offset += 4;
204 224
205 - setValue(ptr + offset, config.maxActivePaths, 'i32'); 225 + Module.setValue(ptr + offset, config.maxActivePaths, 'i32');
206 offset += 4; 226 offset += 4;
207 227
208 - setValue(ptr + offset, config.enableEndpoint, 'i32'); 228 + Module.setValue(ptr + offset, config.enableEndpoint, 'i32');
209 offset += 4; 229 offset += 4;
210 230
211 - setValue(ptr + offset, config.rule1MinTrailingSilence, 'float'); 231 + Module.setValue(ptr + offset, config.rule1MinTrailingSilence, 'float');
212 offset += 4; 232 offset += 4;
213 233
214 - setValue(ptr + offset, config.rule2MinTrailingSilence, 'float'); 234 + Module.setValue(ptr + offset, config.rule2MinTrailingSilence, 'float');
215 offset += 4; 235 offset += 4;
216 236
217 - setValue(ptr + offset, config.rule3MinUtteranceLength, 'float'); 237 + Module.setValue(ptr + offset, config.rule3MinUtteranceLength, 'float');
218 offset += 4; 238 offset += 4;
219 239
220 - setValue(ptr + offset, buffer + decodingMethodLen, 'i8*'); 240 + Module.setValue(ptr + offset, buffer + decodingMethodLen, 'i8*');
221 offset += 4; 241 offset += 4;
222 242
223 - setValue(ptr + offset, config.hotwordsScore, 'float'); 243 + Module.setValue(ptr + offset, config.hotwordsScore, 'float');
224 offset += 4; 244 offset += 4;
225 245
226 return { 246 return {
@@ -229,21 +249,21 @@ function initSherpaOnnxOnlineRecognizerConfig(config) { @@ -229,21 +249,21 @@ function initSherpaOnnxOnlineRecognizerConfig(config) {
229 } 249 }
230 250
231 251
232 -function createRecognizer() {  
233 - let onlineTransducerModelConfig = { 252 +function createOnlineRecognizer(Module, myConfig) {
  253 + const onlineTransducerModelConfig = {
234 encoder: '', 254 encoder: '',
235 decoder: '', 255 decoder: '',
236 joiner: '', 256 joiner: '',
237 - } 257 + };
238 258
239 - let onlineParaformerModelConfig = { 259 + const onlineParaformerModelConfig = {
240 encoder: '', 260 encoder: '',
241 decoder: '', 261 decoder: '',
242 - } 262 + };
243 263
244 - let onlineZipformer2CtcModelConfig = { 264 + const onlineZipformer2CtcModelConfig = {
245 model: '', 265 model: '',
246 - } 266 + };
247 267
248 let type = 0; 268 let type = 0;
249 269
@@ -266,7 +286,7 @@ function createRecognizer() { @@ -266,7 +286,7 @@ function createRecognizer() {
266 } 286 }
267 287
268 288
269 - let onlineModelConfig = { 289 + const onlineModelConfig = {
270 transducer: onlineTransducerModelConfig, 290 transducer: onlineTransducerModelConfig,
271 paraformer: onlineParaformerModelConfig, 291 paraformer: onlineParaformerModelConfig,
272 zipformer2Ctc: onlineZipformer2CtcModelConfig, 292 zipformer2Ctc: onlineZipformer2CtcModelConfig,
@@ -275,12 +295,12 @@ function createRecognizer() { @@ -275,12 +295,12 @@ function createRecognizer() {
275 provider: 'cpu', 295 provider: 'cpu',
276 debug: 1, 296 debug: 1,
277 modelType: '', 297 modelType: '',
278 - } 298 + };
279 299
280 - let featureConfig = { 300 + const featureConfig = {
281 sampleRate: 16000, 301 sampleRate: 16000,
282 featureDim: 80, 302 featureDim: 80,
283 - } 303 + };
284 304
285 let recognizerConfig = { 305 let recognizerConfig = {
286 featConfig: featureConfig, 306 featConfig: featureConfig,
@@ -293,23 +313,336 @@ function createRecognizer() { @@ -293,23 +313,336 @@ function createRecognizer() {
293 rule3MinUtteranceLength: 20, 313 rule3MinUtteranceLength: 20,
294 hotwordsFile: '', 314 hotwordsFile: '',
295 hotwordsScore: 1.5, 315 hotwordsScore: 1.5,
  316 + };
  317 + if (myConfig) {
  318 + recognizerConfig = myConfig;
  319 + }
  320 +
  321 + return new OnlineRecognizer(recognizerConfig, Module);
  322 +}
  323 +
  324 +function initSherpaOnnxOfflineTransducerModelConfig(config, Module) {
  325 + const encoderLen = Module.lengthBytesUTF8(config.encoder) + 1;
  326 + const decoderLen = Module.lengthBytesUTF8(config.decoder) + 1;
  327 + const joinerLen = Module.lengthBytesUTF8(config.joiner) + 1;
  328 +
  329 + const n = encoderLen + decoderLen + joinerLen;
  330 +
  331 + const buffer = Module._malloc(n);
  332 +
  333 + const len = 3 * 4; // 3 pointers
  334 + const ptr = Module._malloc(len);
  335 +
  336 + let offset = 0;
  337 + Module.stringToUTF8(config.encoder, buffer + offset, encoderLen);
  338 + offset += encoderLen;
  339 +
  340 + Module.stringToUTF8(config.decoder, buffer + offset, decoderLen);
  341 + offset += decoderLen;
  342 +
  343 + Module.stringToUTF8(config.joiner, buffer + offset, joinerLen);
  344 +
  345 + offset = 0;
  346 + Module.setValue(ptr, buffer + offset, 'i8*');
  347 + offset += encoderLen;
  348 +
  349 + Module.setValue(ptr + 4, buffer + offset, 'i8*');
  350 + offset += decoderLen;
  351 +
  352 + Module.setValue(ptr + 8, buffer + offset, 'i8*');
  353 +
  354 + return {
  355 + buffer: buffer, ptr: ptr, len: len,
  356 + }
  357 +}
  358 +
  359 +function initSherpaOnnxOfflineParaformerModelConfig(config, Module) {
  360 + const n = Module.lengthBytesUTF8(config.model) + 1;
  361 +
  362 + const buffer = Module._malloc(n);
  363 +
  364 + const len = 1 * 4; // 1 pointer
  365 + const ptr = Module._malloc(len);
  366 +
  367 + Module.stringToUTF8(config.model, buffer, n);
  368 +
  369 + Module.setValue(ptr, buffer, 'i8*');
  370 +
  371 + return {
  372 + buffer: buffer, ptr: ptr, len: len,
  373 + }
  374 +}
  375 +
  376 +function initSherpaOnnxOfflineNemoEncDecCtcModelConfig(config, Module) {
  377 + const n = Module.lengthBytesUTF8(config.model) + 1;
  378 +
  379 + const buffer = Module._malloc(n);
  380 +
  381 + const len = 1 * 4; // 1 pointer
  382 + const ptr = Module._malloc(len);
  383 +
  384 + Module.stringToUTF8(config.model, buffer, n);
  385 +
  386 + Module.setValue(ptr, buffer, 'i8*');
  387 +
  388 + return {
  389 + buffer: buffer, ptr: ptr, len: len,
  390 + }
  391 +}
  392 +
  393 +function initSherpaOnnxOfflineWhisperModelConfig(config, Module) {
  394 + const encoderLen = Module.lengthBytesUTF8(config.encoder) + 1;
  395 + const decoderLen = Module.lengthBytesUTF8(config.decoder) + 1;
  396 +
  397 + const n = encoderLen + decoderLen;
  398 + const buffer = Module._malloc(n);
  399 +
  400 + const len = 2 * 4; // 2 pointers
  401 + const ptr = Module._malloc(len);
  402 +
  403 + let offset = 0;
  404 + Module.stringToUTF8(config.encoder, buffer + offset, encoderLen);
  405 + offset += encoderLen;
  406 +
  407 + Module.stringToUTF8(config.decoder, buffer + offset, decoderLen);
  408 +
  409 + offset = 0;
  410 + Module.setValue(ptr, buffer + offset, 'i8*');
  411 + offset += encoderLen;
  412 +
  413 + Module.setValue(ptr + 4, buffer + offset, 'i8*');
  414 +
  415 + return {
  416 + buffer: buffer, ptr: ptr, len: len,
  417 + }
  418 +}
  419 +
  420 +function initSherpaOnnxOfflineTdnnModelConfig(config, Module) {
  421 + const n = Module.lengthBytesUTF8(config.model) + 1;
  422 + const buffer = Module._malloc(n);
  423 +
  424 + const len = 1 * 4; // 1 pointer
  425 + const ptr = Module._malloc(len);
  426 +
  427 + Module.stringToUTF8(config.model, buffer, n);
  428 +
  429 + Module.setValue(ptr, buffer, 'i8*');
  430 +
  431 + return {
  432 + buffer: buffer, ptr: ptr, len: len,
  433 + }
  434 +}
  435 +
  436 +function initSherpaOnnxOfflineLMConfig(config, Module) {
  437 + const n = Module.lengthBytesUTF8(config.model) + 1;
  438 + const buffer = Module._malloc(n);
  439 +
  440 + const len = 2 * 4;
  441 + const ptr = Module._malloc(len);
  442 +
  443 + Module.stringToUTF8(config.model, buffer, n);
  444 + Module.setValue(ptr, buffer, 'i8*');
  445 + Module.setValue(ptr + 4, config.scale, 'float');
  446 +
  447 + return {
  448 + buffer: buffer, ptr: ptr, len: len,
  449 + }
  450 +}
  451 +
  452 +function initSherpaOnnxOfflineModelConfig(config, Module) {
  453 + const transducer =
  454 + initSherpaOnnxOfflineTransducerModelConfig(config.transducer, Module);
  455 + const paraformer =
  456 + initSherpaOnnxOfflineParaformerModelConfig(config.paraformer, Module);
  457 + const nemoCtc =
  458 + initSherpaOnnxOfflineNemoEncDecCtcModelConfig(config.nemoCtc, Module);
  459 + const whisper =
  460 + initSherpaOnnxOfflineWhisperModelConfig(config.whisper, Module);
  461 + const tdnn = initSherpaOnnxOfflineTdnnModelConfig(config.tdnn, Module);
  462 +
  463 + const len = transducer.len + paraformer.len + nemoCtc.len + whisper.len +
  464 + tdnn.len + 5 * 4;
  465 + const ptr = Module._malloc(len);
  466 +
  467 + let offset = 0;
  468 + Module._CopyHeap(transducer.ptr, transducer.len, ptr + offset);
  469 + offset += transducer.len;
  470 +
  471 + Module._CopyHeap(paraformer.ptr, paraformer.len, ptr + offset);
  472 + offset += paraformer.len;
  473 +
  474 + Module._CopyHeap(nemoCtc.ptr, nemoCtc.len, ptr + offset);
  475 + offset += nemoCtc.len;
  476 +
  477 + Module._CopyHeap(whisper.ptr, whisper.len, ptr + offset);
  478 + offset += whisper.len;
  479 +
  480 + Module._CopyHeap(tdnn.ptr, tdnn.len, ptr + offset);
  481 + offset += tdnn.len;
  482 +
  483 + const tokensLen = Module.lengthBytesUTF8(config.tokens) + 1;
  484 + const providerLen = Module.lengthBytesUTF8(config.provider) + 1;
  485 + const modelTypeLen = Module.lengthBytesUTF8(config.modelType) + 1;
  486 + const bufferLen = tokensLen + providerLen + modelTypeLen;
  487 + const buffer = Module._malloc(bufferLen);
  488 +
  489 + offset = 0;
  490 + Module.stringToUTF8(config.tokens, buffer, tokensLen);
  491 + offset += tokensLen;
  492 +
  493 + Module.stringToUTF8(config.provider, buffer + offset, providerLen);
  494 + offset += providerLen;
  495 +
  496 + Module.stringToUTF8(config.modelType, buffer + offset, modelTypeLen);
  497 +
  498 + offset =
  499 + transducer.len + paraformer.len + nemoCtc.len + whisper.len + tdnn.len;
  500 + Module.setValue(ptr + offset, buffer, 'i8*'); // tokens
  501 + offset += 4;
  502 +
  503 + Module.setValue(ptr + offset, config.numThreads, 'i32');
  504 + offset += 4;
  505 +
  506 + Module.setValue(ptr + offset, config.debug, 'i32');
  507 + offset += 4;
  508 +
  509 + Module.setValue(ptr + offset, buffer + tokensLen, 'i8*'); // provider
  510 + offset += 4;
  511 +
  512 + Module.setValue(
  513 + ptr + offset, buffer + tokensLen + providerLen, 'i8*'); // modelType
  514 + offset += 4;
  515 +
  516 + return {
  517 + buffer: buffer, ptr: ptr, len: len, transducer: transducer,
  518 + paraformer: paraformer, nemoCtc: nemoCtc, whisper: whisper, tdnn: tdnn
296 } 519 }
  520 +}
  521 +
  522 +function initSherpaOnnxOfflineRecognizerConfig(config, Module) {
  523 + const feat = initSherpaOnnxFeatureConfig(config.featConfig, Module);
  524 + const model = initSherpaOnnxOfflineModelConfig(config.modelConfig, Module);
  525 + const lm = initSherpaOnnxOfflineLMConfig(config.lmConfig, Module);
  526 +
  527 + const len = feat.len + model.len + lm.len + 4 * 4;
  528 + const ptr = Module._malloc(len);
  529 +
  530 + let offset = 0;
  531 + Module._CopyHeap(feat.ptr, feat.len, ptr + offset);
  532 + offset += feat.len;
  533 +
  534 + Module._CopyHeap(model.ptr, model.len, ptr + offset);
  535 + offset += model.len;
  536 +
  537 + Module._CopyHeap(lm.ptr, lm.len, ptr + offset);
  538 + offset += lm.len;
  539 +
  540 + const decodingMethodLen = Module.lengthBytesUTF8(config.decodingMethod) + 1;
  541 + const hotwordsFileLen = Module.lengthBytesUTF8(config.hotwordsFile) + 1;
  542 + const bufferLen = decodingMethodLen + hotwordsFileLen;
  543 + const buffer = Module._malloc(bufferLen);
  544 +
  545 + offset = 0;
  546 + Module.stringToUTF8(config.decodingMethod, buffer, decodingMethodLen);
  547 + offset += decodingMethodLen;
  548 +
  549 + Module.stringToUTF8(config.hotwordsFile, buffer + offset, hotwordsFileLen);
  550 +
  551 + offset = feat.len + model.len + lm.len;
  552 +
  553 + Module.setValue(ptr + offset, buffer, 'i8*'); // decoding method
  554 + offset += 4;
  555 +
  556 + Module.setValue(ptr + offset, config.maxActivePaths, 'i32');
  557 + offset += 4;
297 558
298 - return new OnlineRecognizer(recognizerConfig); 559 + Module.setValue(ptr + offset, buffer + decodingMethodLen, 'i8*');
  560 + offset += 4;
  561 +
  562 + Module.setValue(ptr + offset, config.hotwordsScore, 'float');
  563 + offset += 4;
  564 +
  565 + return {
  566 + buffer: buffer, ptr: ptr, len: len, feat: feat, model: model, lm: lm
  567 + }
299 } 568 }
300 569
  570 +class OfflineStream {
  571 + constructor(handle, Module) {
  572 + this.handle = handle;
  573 + this.Module = Module;
  574 + }
  575 +
  576 + free() {
  577 + if (this.handle) {
  578 + this.Module._DestroyOfflineStream(this.handle);
  579 + this.handle = null;
  580 + }
  581 + }
  582 +
  583 + /**
  584 + * @param sampleRate {Number}
  585 + * @param samples {Float32Array} Containing samples in the range [-1, 1]
  586 + */
  587 + acceptWaveform(sampleRate, samples) {
  588 + const pointer =
  589 + this.Module._malloc(samples.length * samples.BYTES_PER_ELEMENT);
  590 + this.Module.HEAPF32.set(samples, pointer / samples.BYTES_PER_ELEMENT);
  591 + this.Module._AcceptWaveformOffline(
  592 + this.handle, sampleRate, pointer, samples.length);
  593 + this.Module._free(pointer);
  594 + }
  595 +};
  596 +
  597 +class OfflineRecognizer {
  598 + constructor(configObj, Module) {
  599 + this.config = configObj;
  600 + const config = initSherpaOnnxOfflineRecognizerConfig(configObj, Module);
  601 + const handle = Module._CreateOfflineRecognizer(config.ptr);
  602 + freeConfig(config, Module);
  603 +
  604 + this.handle = handle;
  605 + this.Module = Module;
  606 + }
  607 +
  608 + free() {
  609 + this.Module._DestroyOfflineRecognizer(this.handle);
  610 + this.handle = 0
  611 + }
  612 +
  613 + createStream() {
  614 + const handle = this.Module._CreateOfflineStream(this.handle);
  615 + return new OfflineStream(handle, this.Module);
  616 + }
  617 +
  618 + decode(stream) {
  619 + this.Module._DecodeOfflineStream(this.handle, stream.handle);
  620 + }
  621 +
  622 + getResult(stream) {
  623 + const r = this.Module._GetOfflineStreamResult(stream.handle);
  624 +
  625 + const textPtr = this.Module.getValue(r, 'i8*');
  626 + const text = this.Module.UTF8ToString(textPtr);
  627 +
  628 + this.Module._DestroyOfflineRecognizerResult(r);
  629 + return text;
  630 + }
  631 +};
  632 +
301 class OnlineStream { 633 class OnlineStream {
302 - constructor(handle) { 634 + constructor(handle, Module) {
303 this.handle = handle; 635 this.handle = handle;
304 this.pointer = null; // buffer 636 this.pointer = null; // buffer
305 this.n = 0; // buffer size 637 this.n = 0; // buffer size
  638 + this.Module = Module;
306 } 639 }
307 640
308 free() { 641 free() {
309 if (this.handle) { 642 if (this.handle) {
310 - _DestroyOnlineStream(this.handle); 643 + this.Module._DestroyOnlineStream(this.handle);
311 this.handle = null; 644 this.handle = null;
312 - _free(this.pointer); 645 + this.Module._free(this.pointer);
313 this.pointer = null; 646 this.pointer = null;
314 this.n = 0; 647 this.n = 0;
315 } 648 }
@@ -321,61 +654,73 @@ class OnlineStream { @@ -321,61 +654,73 @@ class OnlineStream {
321 */ 654 */
322 acceptWaveform(sampleRate, samples) { 655 acceptWaveform(sampleRate, samples) {
323 if (this.n < samples.length) { 656 if (this.n < samples.length) {
324 - _free(this.pointer);  
325 - this.pointer = _malloc(samples.length * samples.BYTES_PER_ELEMENT); 657 + this.Module._free(this.pointer);
  658 + this.pointer =
  659 + this.Module._malloc(samples.length * samples.BYTES_PER_ELEMENT);
326 this.n = samples.length 660 this.n = samples.length
327 } 661 }
328 662
329 - Module.HEAPF32.set(samples, this.pointer / samples.BYTES_PER_ELEMENT);  
330 - _AcceptWaveform(this.handle, sampleRate, this.pointer, samples.length); 663 + this.Module.HEAPF32.set(samples, this.pointer / samples.BYTES_PER_ELEMENT);
  664 + this.Module._AcceptWaveform(
  665 + this.handle, sampleRate, this.pointer, samples.length);
331 } 666 }
332 667
333 inputFinished() { 668 inputFinished() {
334 - _InputFinished(this.handle); 669 + this.Module._InputFinished(this.handle);
335 } 670 }
336 }; 671 };
337 672
338 class OnlineRecognizer { 673 class OnlineRecognizer {
339 - constructor(configObj) {  
340 - let config = initSherpaOnnxOnlineRecognizerConfig(configObj)  
341 - let handle = _CreateOnlineRecognizer(config.ptr); 674 + constructor(configObj, Module) {
  675 + this.config = configObj;
  676 + const config = initSherpaOnnxOnlineRecognizerConfig(configObj, Module)
  677 + const handle = Module._CreateOnlineRecognizer(config.ptr);
342 678
343 - freeConfig(config); 679 + freeConfig(config, Module);
344 680
345 this.handle = handle; 681 this.handle = handle;
  682 + this.Module = Module;
346 } 683 }
347 684
348 free() { 685 free() {
349 - _DestroyOnlineRecognizer(this.handle); 686 + this.Module._DestroyOnlineRecognizer(this.handle);
350 this.handle = 0 687 this.handle = 0
351 } 688 }
352 689
353 createStream() { 690 createStream() {
354 - let handle = _CreateOnlineStream(this.handle);  
355 - return new OnlineStream(handle); 691 + const handle = this.Module._CreateOnlineStream(this.handle);
  692 + return new OnlineStream(handle, this.Module);
356 } 693 }
357 694
358 isReady(stream) { 695 isReady(stream) {
359 - return _IsOnlineStreamReady(this.handle, stream.handle) == 1; 696 + return this.Module._IsOnlineStreamReady(this.handle, stream.handle) == 1;
360 } 697 }
361 698
362 decode(stream) { 699 decode(stream) {
363 - return _DecodeOnlineStream(this.handle, stream.handle); 700 + this.Module._DecodeOnlineStream(this.handle, stream.handle);
364 } 701 }
365 702
366 isEndpoint(stream) { 703 isEndpoint(stream) {
367 - return _IsEndpoint(this.handle, stream.handle) == 1; 704 + return this.Module._IsEndpoint(this.handle, stream.handle) == 1;
368 } 705 }
369 706
370 reset(stream) { 707 reset(stream) {
371 - _Reset(this.handle, stream.handle); 708 + this.Module._Reset(this.handle, stream.handle);
372 } 709 }
373 710
374 getResult(stream) { 711 getResult(stream) {
375 - let r = _GetOnlineStreamResult(this.handle, stream.handle);  
376 - let textPtr = getValue(r, 'i8*');  
377 - let text = UTF8ToString(textPtr);  
378 - _DestroyOnlineRecognizerResult(r); 712 + const r = this.Module._GetOnlineStreamResult(this.handle, stream.handle);
  713 + const textPtr = this.Module.getValue(r, 'i8*');
  714 + const text = this.Module.UTF8ToString(textPtr);
  715 + this.Module._DestroyOnlineRecognizerResult(r);
379 return text; 716 return text;
380 } 717 }
381 } 718 }
  719 +
  720 +if (typeof process == 'object' && typeof process.versions == 'object' &&
  721 + typeof process.versions.node == 'string') {
  722 + module.exports = {
  723 + createOnlineRecognizer,
  724 + OfflineRecognizer,
  725 + };
  726 +}
1 -// wasm/sherpa-onnx-wasm-asr-main.cc 1 +// wasm/sherpa-onnx-wasm-main-asr.cc
2 // 2 //
3 // Copyright (c) 2024 Xiaomi Corporation 3 // Copyright (c) 2024 Xiaomi Corporation
4 #include <stdio.h> 4 #include <stdio.h>
  1 +if(NOT $ENV{SHERPA_ONNX_IS_USING_BUILD_WASM_SH})
  2 + message(FATAL_ERROR "Please use ./build-wasm-simd-nodejs.sh to build for wasm NodeJS")
  3 +endif()
  4 +
  5 +set(exported_functions
  6 + #tts
  7 + PrintOfflineTtsConfig
  8 + SherpaOnnxCreateOfflineTts
  9 + SherpaOnnxDestroyOfflineTts
  10 + SherpaOnnxDestroyOfflineTtsGeneratedAudio
  11 + SherpaOnnxOfflineTtsGenerate
  12 + SherpaOnnxOfflineTtsGenerateWithCallback
  13 + SherpaOnnxOfflineTtsNumSpeakers
  14 + SherpaOnnxOfflineTtsSampleRate
  15 + SherpaOnnxWriteWave
  16 + # streaming asr
  17 + AcceptWaveform
  18 + CreateOnlineRecognizer
  19 + CreateOnlineStream
  20 + DecodeOnlineStream
  21 + DestroyOnlineRecognizer
  22 + DestroyOnlineRecognizerResult
  23 + DestroyOnlineStream
  24 + GetOnlineStreamResult
  25 + InputFinished
  26 + IsEndpoint
  27 + IsOnlineStreamReady
  28 + Reset
  29 + # non-streaming ASR
  30 + PrintOfflineRecognizerConfig
  31 + CreateOfflineRecognizer
  32 + DestroyOfflineRecognizer
  33 + CreateOfflineStream
  34 + DestroyOfflineStream
  35 + AcceptWaveformOffline
  36 + DecodeOfflineStream
  37 + DecodeMultipleOfflineStreams
  38 + GetOfflineStreamResult
  39 + DestroyOfflineRecognizerResult
  40 +)
  41 +
  42 +
  43 +set(mangled_exported_functions)
  44 +foreach(x IN LISTS exported_functions)
  45 + list(APPEND mangled_exported_functions "_${x}")
  46 +endforeach()
  47 +list(JOIN mangled_exported_functions "," all_exported_functions)
  48 +
  49 +include_directories(${CMAKE_SOURCE_DIR})
  50 +set(MY_FLAGS " -s FORCE_FILESYSTEM=1 -s INITIAL_MEMORY=512MB -s ALLOW_MEMORY_GROWTH=1")
  51 +string(APPEND MY_FLAGS " -sSTACK_SIZE=10485760 ") # 10MB
  52 +string(APPEND MY_FLAGS " -sEXPORTED_FUNCTIONS=[_CopyHeap,_malloc,_free,${all_exported_functions}] ")
  53 +string(APPEND MY_FLAGS " -sEXPORTED_RUNTIME_METHODS=['ccall','stringToUTF8','setValue','getValue'] ")
  54 +string(APPEND MY_FLAGS " -sNODERAWFS=1 ")
  55 +string(APPEND MY_FLAGS " -sEXPORTED_RUNTIME_METHODS=['ccall','stringToUTF8','setValue','getValue','lengthBytesUTF8','UTF8ToString'] ")
  56 +string(APPEND MY_FLAGS " -sMODULARIZE=1 -sWASM_ASYNC_COMPILATION=0 ")
  57 +
  58 +message(STATUS "MY_FLAGS: ${MY_FLAGS}")
  59 +
  60 +set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${MY_FLAGS}")
  61 +set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${MY_FLAGS}")
  62 +set(CMAKE_EXECUTBLE_LINKER_FLAGS "${CMAKE_EXECUTBLE_LINKER_FLAGS} ${MY_FLAGS}")
  63 +
  64 +add_executable(sherpa-onnx-wasm-nodejs sherpa-onnx-wasm-nodejs.cc)
  65 +target_link_libraries(sherpa-onnx-wasm-nodejs sherpa-onnx-core sherpa-onnx-c-api)
  66 +install(TARGETS sherpa-onnx-wasm-nodejs DESTINATION bin/wasm/nodejs)
  67 +
  68 +install(
  69 + FILES
  70 + ${CMAKE_SOURCE_DIR}/wasm/asr/sherpa-onnx-asr.js
  71 + ${CMAKE_SOURCE_DIR}/wasm/tts/sherpa-onnx-tts.js
  72 + "$<TARGET_FILE_DIR:sherpa-onnx-wasm-nodejs>/sherpa-onnx-wasm-nodejs.js"
  73 + "$<TARGET_FILE_DIR:sherpa-onnx-wasm-nodejs>/sherpa-onnx-wasm-nodejs.wasm"
  74 + DESTINATION
  75 + bin/wasm/nodejs
  76 +)
  1 +// wasm/sherpa-onnx-wasm-main-nodejs.cc
  2 +//
  3 +// Copyright (c) 2024 Xiaomi Corporation
  4 +#include <stdio.h>
  5 +
  6 +#include <algorithm>
  7 +#include <memory>
  8 +
  9 +#include "sherpa-onnx/c-api/c-api.h"
  10 +
  11 +extern "C" {
  12 +
  13 +static_assert(sizeof(SherpaOnnxOfflineTransducerModelConfig) == 3 * 4, "");
  14 +static_assert(sizeof(SherpaOnnxOfflineParaformerModelConfig) == 4, "");
  15 +
  16 +static_assert(sizeof(SherpaOnnxOfflineNemoEncDecCtcModelConfig) == 4, "");
  17 +static_assert(sizeof(SherpaOnnxOfflineWhisperModelConfig) == 2 * 4, "");
  18 +static_assert(sizeof(SherpaOnnxOfflineTdnnModelConfig) == 4, "");
  19 +static_assert(sizeof(SherpaOnnxOfflineLMConfig) == 2 * 4, "");
  20 +
  21 +static_assert(sizeof(SherpaOnnxOfflineModelConfig) ==
  22 + sizeof(SherpaOnnxOfflineTransducerModelConfig) +
  23 + sizeof(SherpaOnnxOfflineParaformerModelConfig) +
  24 + sizeof(SherpaOnnxOfflineNemoEncDecCtcModelConfig) +
  25 + sizeof(SherpaOnnxOfflineWhisperModelConfig) +
  26 + sizeof(SherpaOnnxOfflineTdnnModelConfig) + 5 * 4,
  27 + "");
  28 +static_assert(sizeof(SherpaOnnxFeatureConfig) == 2 * 4, "");
  29 +static_assert(sizeof(SherpaOnnxOfflineRecognizerConfig) ==
  30 + sizeof(SherpaOnnxFeatureConfig) +
  31 + sizeof(SherpaOnnxOfflineLMConfig) +
  32 + sizeof(SherpaOnnxOfflineModelConfig) + 4 * 4,
  33 + "");
  34 +
  35 +void PrintOfflineTtsConfig(SherpaOnnxOfflineTtsConfig *tts_config) {
  36 + auto tts_model_config = &tts_config->model;
  37 + auto vits_model_config = &tts_model_config->vits;
  38 + fprintf(stdout, "----------vits model config----------\n");
  39 + fprintf(stdout, "model: %s\n", vits_model_config->model);
  40 + fprintf(stdout, "lexicon: %s\n", vits_model_config->lexicon);
  41 + fprintf(stdout, "tokens: %s\n", vits_model_config->tokens);
  42 + fprintf(stdout, "data_dir: %s\n", vits_model_config->data_dir);
  43 + fprintf(stdout, "noise scale: %.3f\n", vits_model_config->noise_scale);
  44 + fprintf(stdout, "noise scale w: %.3f\n", vits_model_config->noise_scale_w);
  45 + fprintf(stdout, "length scale: %.3f\n", vits_model_config->length_scale);
  46 +
  47 + fprintf(stdout, "----------tts model config----------\n");
  48 + fprintf(stdout, "num threads: %d\n", tts_model_config->num_threads);
  49 + fprintf(stdout, "debug: %d\n", tts_model_config->debug);
  50 + fprintf(stdout, "provider: %s\n", tts_model_config->provider);
  51 +
  52 + fprintf(stdout, "----------tts config----------\n");
  53 + fprintf(stdout, "rule_fsts: %s\n", tts_config->rule_fsts);
  54 + fprintf(stdout, "max num sentences: %d\n", tts_config->max_num_sentences);
  55 +}
  56 +
  57 +void PrintOfflineRecognizerConfig(SherpaOnnxOfflineRecognizerConfig *config) {
  58 + auto model_config = &config->model_config;
  59 + auto feat = &config->feat_config;
  60 + auto transducer = &model_config->transducer;
  61 + auto paraformer = &model_config->paraformer;
  62 + auto nemo_ctc = &model_config->nemo_ctc;
  63 + auto whisper = &model_config->whisper;
  64 + auto tdnn = &model_config->tdnn;
  65 +
  66 + fprintf(stdout, "----------offline transducer model config----------\n");
  67 + fprintf(stdout, "encoder: %s\n", transducer->encoder);
  68 + fprintf(stdout, "decoder: %s\n", transducer->decoder);
  69 + fprintf(stdout, "joiner: %s\n", transducer->joiner);
  70 +
  71 + fprintf(stdout, "----------offline paraformer model config----------\n");
  72 + fprintf(stdout, "model: %s\n", paraformer->model);
  73 +
  74 + fprintf(stdout, "----------offline nemo_ctc model config----------\n");
  75 + fprintf(stdout, "model: %s\n", nemo_ctc->model);
  76 +
  77 + fprintf(stdout, "----------offline whisper model config----------\n");
  78 + fprintf(stdout, "encoder: %s\n", whisper->encoder);
  79 + fprintf(stdout, "decoder: %s\n", whisper->decoder);
  80 +
  81 + fprintf(stdout, "----------offline tdnn model config----------\n");
  82 + fprintf(stdout, "model: %s\n", tdnn->model);
  83 +
  84 + fprintf(stdout, "tokens: %s\n", model_config->tokens);
  85 + fprintf(stdout, "num_threads: %d\n", model_config->num_threads);
  86 + fprintf(stdout, "provider: %s\n", model_config->provider);
  87 + fprintf(stdout, "debug: %d\n", model_config->debug);
  88 + fprintf(stdout, "model type: %s\n", model_config->model_type);
  89 +
  90 + fprintf(stdout, "----------feat config----------\n");
  91 + fprintf(stdout, "sample rate: %d\n", feat->sample_rate);
  92 + fprintf(stdout, "feat dim: %d\n", feat->feature_dim);
  93 +
  94 + fprintf(stdout, "----------recognizer config----------\n");
  95 + fprintf(stdout, "decoding method: %s\n", config->decoding_method);
  96 + fprintf(stdout, "max active paths: %d\n", config->max_active_paths);
  97 + fprintf(stdout, "hotwords_file: %s\n", config->hotwords_file);
  98 + fprintf(stdout, "hotwords_score: %.2f\n", config->hotwords_score);
  99 +}
  100 +
  101 +void CopyHeap(const char *src, int32_t num_bytes, char *dst) {
  102 + std::copy(src, src + num_bytes, dst);
  103 +}
  104 +}
@@ -22,7 +22,7 @@ Module.onRuntimeInitialized = function() { @@ -22,7 +22,7 @@ Module.onRuntimeInitialized = function() {
22 console.log('Model files downloaded!'); 22 console.log('Model files downloaded!');
23 23
24 console.log('Initializing tts ......'); 24 console.log('Initializing tts ......');
25 - tts = initSherpaOnnxOfflineTts() 25 + tts = createOfflineTts(Module)
26 if (tts.numSpeakers > 1) { 26 if (tts.numSpeakers > 1) {
27 speakerIdLabel.innerHTML = `Speaker ID (0 - ${tts.numSpeakers - 1}):`; 27 speakerIdLabel.innerHTML = `Speaker ID (0 - ${tts.numSpeakers - 1}):`;
28 } 28 }
1 1
2 -function freeConfig(config) { 2 +function freeConfig(config, Module) {
3 if ('buffer' in config) { 3 if ('buffer' in config) {
4 - _free(config.buffer); 4 + Module._free(config.buffer);
5 } 5 }
6 6
7 if ('config' in config) { 7 if ('config' in config) {
8 - freeConfig(config.config) 8 + freeConfig(config.config, Module)
9 } 9 }
10 10
11 - _free(config.ptr); 11 + Module._free(config.ptr);
12 } 12 }
13 13
14 // The user should free the returned pointers 14 // The user should free the returned pointers
15 -function initSherpaOnnxOfflineTtsVitsModelConfig(config) {  
16 - let modelLen = lengthBytesUTF8(config.model) + 1;  
17 - let lexiconLen = lengthBytesUTF8(config.lexicon) + 1;  
18 - let tokensLen = lengthBytesUTF8(config.tokens) + 1;  
19 - let dataDirLen = lengthBytesUTF8(config.dataDir) + 1; 15 +function initSherpaOnnxOfflineTtsVitsModelConfig(config, Module) {
  16 + const modelLen = Module.lengthBytesUTF8(config.model) + 1;
  17 + const lexiconLen = Module.lengthBytesUTF8(config.lexicon) + 1;
  18 + const tokensLen = Module.lengthBytesUTF8(config.tokens) + 1;
  19 + const dataDirLen = Module.lengthBytesUTF8(config.dataDir) + 1;
20 20
21 - let n = modelLen + lexiconLen + tokensLen + dataDirLen; 21 + const n = modelLen + lexiconLen + tokensLen + dataDirLen;
22 22
23 - let buffer = _malloc(n); 23 + const buffer = Module._malloc(n);
24 24
25 - let len = 7 * 4;  
26 - let ptr = _malloc(len); 25 + const len = 7 * 4;
  26 + const ptr = Module._malloc(len);
27 27
28 let offset = 0; 28 let offset = 0;
29 - stringToUTF8(config.model, buffer + offset, modelLen); 29 + Module.stringToUTF8(config.model, buffer + offset, modelLen);
30 offset += modelLen; 30 offset += modelLen;
31 31
32 - stringToUTF8(config.lexicon, buffer + offset, lexiconLen); 32 + Module.stringToUTF8(config.lexicon, buffer + offset, lexiconLen);
33 offset += lexiconLen; 33 offset += lexiconLen;
34 34
35 - stringToUTF8(config.tokens, buffer + offset, tokensLen); 35 + Module.stringToUTF8(config.tokens, buffer + offset, tokensLen);
36 offset += tokensLen; 36 offset += tokensLen;
37 37
38 - stringToUTF8(config.dataDir, buffer + offset, dataDirLen); 38 + Module.stringToUTF8(config.dataDir, buffer + offset, dataDirLen);
39 offset += dataDirLen; 39 offset += dataDirLen;
40 40
41 offset = 0; 41 offset = 0;
42 - setValue(ptr, buffer + offset, 'i8*'); 42 + Module.setValue(ptr, buffer + offset, 'i8*');
43 offset += modelLen; 43 offset += modelLen;
44 44
45 - setValue(ptr + 4, buffer + offset, 'i8*'); 45 + Module.setValue(ptr + 4, buffer + offset, 'i8*');
46 offset += lexiconLen; 46 offset += lexiconLen;
47 47
48 - setValue(ptr + 8, buffer + offset, 'i8*'); 48 + Module.setValue(ptr + 8, buffer + offset, 'i8*');
49 offset += tokensLen; 49 offset += tokensLen;
50 50
51 - setValue(ptr + 12, buffer + offset, 'i8*'); 51 + Module.setValue(ptr + 12, buffer + offset, 'i8*');
52 offset += dataDirLen; 52 offset += dataDirLen;
53 53
54 - setValue(ptr + 16, config.noiseScale, 'float');  
55 - setValue(ptr + 20, config.noiseScaleW, 'float');  
56 - setValue(ptr + 24, config.lengthScale, 'float'); 54 + Module.setValue(ptr + 16, config.noiseScale, 'float');
  55 + Module.setValue(ptr + 20, config.noiseScaleW, 'float');
  56 + Module.setValue(ptr + 24, config.lengthScale, 'float');
57 57
58 return { 58 return {
59 buffer: buffer, ptr: ptr, len: len, 59 buffer: buffer, ptr: ptr, len: len,
60 } 60 }
61 } 61 }
62 62
63 -function initSherpaOnnxOfflineTtsModelConfig(config) {  
64 - let vitsModelConfig =  
65 - initSherpaOnnxOfflineTtsVitsModelConfig(config.offlineTtsVitsModelConfig); 63 +function initSherpaOnnxOfflineTtsModelConfig(config, Module) {
  64 + const vitsModelConfig = initSherpaOnnxOfflineTtsVitsModelConfig(
  65 + config.offlineTtsVitsModelConfig, Module);
66 66
67 - let len = vitsModelConfig.len + 3 * 4;  
68 - let ptr = _malloc(len); 67 + const len = vitsModelConfig.len + 3 * 4;
  68 + const ptr = Module._malloc(len);
69 69
70 let offset = 0; 70 let offset = 0;
71 - _CopyHeap(vitsModelConfig.ptr, vitsModelConfig.len, ptr + offset); 71 + Module._CopyHeap(vitsModelConfig.ptr, vitsModelConfig.len, ptr + offset);
72 offset += vitsModelConfig.len; 72 offset += vitsModelConfig.len;
73 73
74 - setValue(ptr + offset, config.numThreads, 'i32'); 74 + Module.setValue(ptr + offset, config.numThreads, 'i32');
75 offset += 4; 75 offset += 4;
76 76
77 - setValue(ptr + offset, config.debug, 'i32'); 77 + Module.setValue(ptr + offset, config.debug, 'i32');
78 offset += 4; 78 offset += 4;
79 79
80 - let providerLen = lengthBytesUTF8(config.provider) + 1;  
81 - let buffer = _malloc(providerLen);  
82 - stringToUTF8(config.provider, buffer, providerLen);  
83 - setValue(ptr + offset, buffer, 'i8*'); 80 + const providerLen = Module.lengthBytesUTF8(config.provider) + 1;
  81 + const buffer = Module._malloc(providerLen);
  82 + Module.stringToUTF8(config.provider, buffer, providerLen);
  83 + Module.setValue(ptr + offset, buffer, 'i8*');
84 84
85 return { 85 return {
86 buffer: buffer, ptr: ptr, len: len, config: vitsModelConfig, 86 buffer: buffer, ptr: ptr, len: len, config: vitsModelConfig,
87 } 87 }
88 } 88 }
89 89
90 -function initSherpaOnnxOfflineTtsConfig(config) {  
91 - let modelConfig =  
92 - initSherpaOnnxOfflineTtsModelConfig(config.offlineTtsModelConfig);  
93 - let len = modelConfig.len + 2 * 4;  
94 - let ptr = _malloc(len); 90 +function initSherpaOnnxOfflineTtsConfig(config, Module) {
  91 + const modelConfig =
  92 + initSherpaOnnxOfflineTtsModelConfig(config.offlineTtsModelConfig, Module);
  93 + const len = modelConfig.len + 2 * 4;
  94 + const ptr = Module._malloc(len);
95 95
96 let offset = 0; 96 let offset = 0;
97 - _CopyHeap(modelConfig.ptr, modelConfig.len, ptr + offset); 97 + Module._CopyHeap(modelConfig.ptr, modelConfig.len, ptr + offset);
98 offset += modelConfig.len; 98 offset += modelConfig.len;
99 99
100 - let ruleFstsLen = lengthBytesUTF8(config.ruleFsts) + 1;  
101 - let buffer = _malloc(ruleFstsLen);  
102 - stringToUTF8(config.ruleFsts, buffer, ruleFstsLen);  
103 - setValue(ptr + offset, buffer, 'i8*'); 100 + const ruleFstsLen = Module.lengthBytesUTF8(config.ruleFsts) + 1;
  101 + const buffer = Module._malloc(ruleFstsLen);
  102 + Module.stringToUTF8(config.ruleFsts, buffer, ruleFstsLen);
  103 + Module.setValue(ptr + offset, buffer, 'i8*');
104 offset += 4; 104 offset += 4;
105 105
106 - setValue(ptr + offset, config.maxNumSentences, 'i32'); 106 + Module.setValue(ptr + offset, config.maxNumSentences, 'i32');
107 107
108 return { 108 return {
109 buffer: buffer, ptr: ptr, len: len, config: modelConfig, 109 buffer: buffer, ptr: ptr, len: len, config: modelConfig,
@@ -111,19 +111,21 @@ function initSherpaOnnxOfflineTtsConfig(config) { @@ -111,19 +111,21 @@ function initSherpaOnnxOfflineTtsConfig(config) {
111 } 111 }
112 112
113 class OfflineTts { 113 class OfflineTts {
114 - constructor(configObj) {  
115 - let config = initSherpaOnnxOfflineTtsConfig(configObj)  
116 - let handle = _SherpaOnnxCreateOfflineTts(config.ptr); 114 + constructor(configObj, Module) {
  115 + console.log(configObj)
  116 + const config = initSherpaOnnxOfflineTtsConfig(configObj, Module)
  117 + const handle = Module._SherpaOnnxCreateOfflineTts(config.ptr);
117 118
118 - freeConfig(config); 119 + freeConfig(config, Module);
119 120
120 this.handle = handle; 121 this.handle = handle;
121 - this.sampleRate = _SherpaOnnxOfflineTtsSampleRate(this.handle);  
122 - this.numSpeakers = _SherpaOnnxOfflineTtsNumSpeakers(this.handle); 122 + this.sampleRate = Module._SherpaOnnxOfflineTtsSampleRate(this.handle);
  123 + this.numSpeakers = Module._SherpaOnnxOfflineTtsNumSpeakers(this.handle);
  124 + this.Module = Module
123 } 125 }
124 126
125 free() { 127 free() {
126 - _SherpaOnnxDestroyOfflineTts(this.handle); 128 + this.Module._SherpaOnnxDestroyOfflineTts(this.handle);
127 this.handle = 0 129 this.handle = 0
128 } 130 }
129 131
@@ -133,29 +135,44 @@ class OfflineTts { @@ -133,29 +135,44 @@ class OfflineTts {
133 // speed: 1.0 135 // speed: 1.0
134 // } 136 // }
135 generate(config) { 137 generate(config) {
136 - let textLen = lengthBytesUTF8(config.text) + 1;  
137 - let textPtr = _malloc(textLen);  
138 - stringToUTF8(config.text, textPtr, textLen); 138 + const textLen = this.Module.lengthBytesUTF8(config.text) + 1;
  139 + const textPtr = this.Module._malloc(textLen);
  140 + this.Module.stringToUTF8(config.text, textPtr, textLen);
139 141
140 - let h = _SherpaOnnxOfflineTtsGenerate( 142 + const h = this.Module._SherpaOnnxOfflineTtsGenerate(
141 this.handle, textPtr, config.sid, config.speed); 143 this.handle, textPtr, config.sid, config.speed);
142 144
143 - let numSamples = HEAP32[h / 4 + 1];  
144 - let sampleRate = HEAP32[h / 4 + 2]; 145 + const numSamples = this.Module.HEAP32[h / 4 + 1];
  146 + const sampleRate = this.Module.HEAP32[h / 4 + 2];
145 147
146 - let samplesPtr = HEAP32[h / 4] / 4;  
147 - let samples = new Float32Array(numSamples); 148 + const samplesPtr = this.Module.HEAP32[h / 4] / 4;
  149 + const samples = new Float32Array(numSamples);
148 for (let i = 0; i < numSamples; i++) { 150 for (let i = 0; i < numSamples; i++) {
149 - samples[i] = HEAPF32[samplesPtr + i]; 151 + samples[i] = this.Module.HEAPF32[samplesPtr + i];
150 } 152 }
151 153
152 - _SherpaOnnxDestroyOfflineTtsGeneratedAudio(h); 154 + this.Module._SherpaOnnxDestroyOfflineTtsGeneratedAudio(h);
153 return {samples: samples, sampleRate: sampleRate}; 155 return {samples: samples, sampleRate: sampleRate};
154 } 156 }
  157 + save(filename, audio) {
  158 + const samples = audio.samples;
  159 + const sampleRate = audio.sampleRate;
  160 + const ptr = this.Module._malloc(samples.length * 4);
  161 + for (let i = 0; i < samples.length; i++) {
  162 + this.Module.HEAPF32[ptr / 4 + i] = samples[i];
  163 + }
  164 +
  165 + const filenameLen = this.Module.lengthBytesUTF8(filename) + 1;
  166 + const buffer = this.Module._malloc(filenameLen);
  167 + this.Module.stringToUTF8(filename, buffer, filenameLen);
  168 + this.Module._SherpaOnnxWriteWave(ptr, samples.length, sampleRate, buffer);
  169 + this.Module._free(buffer);
  170 + this.Module._free(ptr);
  171 + }
155 } 172 }
156 173
157 -function initSherpaOnnxOfflineTts() {  
158 - let offlineTtsVitsModelConfig = { 174 +function createOfflineTts(Module, myConfig) {
  175 + const offlineTtsVitsModelConfig = {
159 model: './model.onnx', 176 model: './model.onnx',
160 lexicon: '', 177 lexicon: '',
161 tokens: './tokens.txt', 178 tokens: './tokens.txt',
@@ -164,7 +181,7 @@ function initSherpaOnnxOfflineTts() { @@ -164,7 +181,7 @@ function initSherpaOnnxOfflineTts() {
164 noiseScaleW: 0.8, 181 noiseScaleW: 0.8,
165 lengthScale: 1.0, 182 lengthScale: 1.0,
166 }; 183 };
167 - let offlineTtsModelConfig = { 184 + const offlineTtsModelConfig = {
168 offlineTtsVitsModelConfig: offlineTtsVitsModelConfig, 185 offlineTtsVitsModelConfig: offlineTtsVitsModelConfig,
169 numThreads: 1, 186 numThreads: 1,
170 debug: 1, 187 debug: 1,
@@ -176,5 +193,16 @@ function initSherpaOnnxOfflineTts() { @@ -176,5 +193,16 @@ function initSherpaOnnxOfflineTts() {
176 maxNumSentences: 1, 193 maxNumSentences: 1,
177 } 194 }
178 195
179 - return new OfflineTts(offlineTtsConfig); 196 + if (myConfig) {
  197 + offlineTtsConfig = myConfig;
  198 + }
  199 +
  200 + return new OfflineTts(offlineTtsConfig, Module);
  201 +}
  202 +
  203 +if (typeof process == 'object' && typeof process.versions == 'object' &&
  204 + typeof process.versions.node == 'string') {
  205 + module.exports = {
  206 + createOfflineTts,
  207 + };
180 } 208 }
1 -// wasm/sherpa-onnx-wasm-main.cc 1 +// wasm/sherpa-onnx-wasm-main-tts.cc
2 // 2 //
3 // Copyright (c) 2024 Xiaomi Corporation 3 // Copyright (c) 2024 Xiaomi Corporation
4 #include <stdio.h> 4 #include <stdio.h>