正在显示
39 个修改的文件
包含
1484 行增加
和
1921 行删除
| @@ -9,6 +9,7 @@ concurrency: | @@ -9,6 +9,7 @@ concurrency: | ||
| 9 | 9 | ||
| 10 | permissions: | 10 | permissions: |
| 11 | contents: read | 11 | contents: read |
| 12 | + id-token: write | ||
| 12 | 13 | ||
| 13 | jobs: | 14 | jobs: |
| 14 | nodejs: | 15 | nodejs: |
| @@ -20,10 +21,20 @@ jobs: | @@ -20,10 +21,20 @@ jobs: | ||
| 20 | python-version: ["3.8"] | 21 | python-version: ["3.8"] |
| 21 | 22 | ||
| 22 | steps: | 23 | steps: |
| 23 | - - uses: actions/checkout@v2 | 24 | + - uses: actions/checkout@v4 |
| 24 | with: | 25 | with: |
| 25 | fetch-depth: 0 | 26 | fetch-depth: 0 |
| 26 | 27 | ||
| 28 | + - name: Install emsdk | ||
| 29 | + uses: mymindstorm/setup-emsdk@v14 | ||
| 30 | + | ||
| 31 | + - name: View emsdk version | ||
| 32 | + shell: bash | ||
| 33 | + run: | | ||
| 34 | + emcc -v | ||
| 35 | + echo "--------------------" | ||
| 36 | + emcc --check | ||
| 37 | + | ||
| 27 | - name: Setup Python ${{ matrix.python-version }} | 38 | - name: Setup Python ${{ matrix.python-version }} |
| 28 | uses: actions/setup-python@v5 | 39 | uses: actions/setup-python@v5 |
| 29 | with: | 40 | with: |
| @@ -31,28 +42,38 @@ jobs: | @@ -31,28 +42,38 @@ jobs: | ||
| 31 | 42 | ||
| 32 | - uses: actions/setup-node@v4 | 43 | - uses: actions/setup-node@v4 |
| 33 | with: | 44 | with: |
| 34 | - node-version: 13 | ||
| 35 | registry-url: 'https://registry.npmjs.org' | 45 | registry-url: 'https://registry.npmjs.org' |
| 36 | 46 | ||
| 37 | - name: Display node version | 47 | - name: Display node version |
| 38 | shell: bash | 48 | shell: bash |
| 39 | run: | | 49 | run: | |
| 40 | node --version | 50 | node --version |
| 41 | - npm --version | ||
| 42 | - cd nodejs-examples | ||
| 43 | - | ||
| 44 | - npm install npm@6.14.4 -g | ||
| 45 | - npm install npm@6.14.4 | ||
| 46 | - npm --version | ||
| 47 | 51 | ||
| 48 | - name: Build nodejs package | 52 | - name: Build nodejs package |
| 49 | shell: bash | 53 | shell: bash |
| 50 | env: | 54 | env: |
| 51 | NODE_AUTH_TOKEN: ${{ secrets.NPM_TOKEN }} | 55 | NODE_AUTH_TOKEN: ${{ secrets.NPM_TOKEN }} |
| 52 | run: | | 56 | run: | |
| 57 | + ./build-wasm-simd-nodejs.sh | ||
| 58 | + cp -v build-wasm-simd-nodejs/install/bin/wasm/nodejs/*.js ./scripts/nodejs/ | ||
| 59 | + cp -v build-wasm-simd-nodejs/install/bin/wasm/nodejs/*.wasm ./scripts/nodejs/ | ||
| 60 | + | ||
| 61 | + SHERPA_ONNX_VERSION=$(grep "SHERPA_ONNX_VERSION" ./CMakeLists.txt | cut -d " " -f 2 | cut -d '"' -f 2) | ||
| 62 | + echo "SHERPA_ONNX_VERSION $SHERPA_ONNX_VERSION" | ||
| 63 | + | ||
| 53 | cd scripts/nodejs | 64 | cd scripts/nodejs |
| 54 | - ./run.sh | 65 | + |
| 66 | + owner=${{ github.repository_owner }} | ||
| 67 | + echo "owner: $owner" | ||
| 68 | + | ||
| 69 | + sed -i.bak s/SHERPA_ONNX_VERSION/$SHERPA_ONNX_VERSION/g ./package.json | ||
| 70 | + sed -i.bak s/k2-fsa/$owner/g ./package.json | ||
| 71 | + | ||
| 72 | + rm package.json.bak | ||
| 73 | + | ||
| 74 | + git diff | ||
| 75 | + | ||
| 55 | npm install | 76 | npm install |
| 56 | - rm run.sh | ||
| 57 | npm ci | 77 | npm ci |
| 78 | + # see https://docs.npmjs.com/generating-provenance-statements | ||
| 58 | npm publish --provenance --access public | 79 | npm publish --provenance --access public |
| @@ -40,7 +40,6 @@ jobs: | @@ -40,7 +40,6 @@ jobs: | ||
| 40 | 40 | ||
| 41 | - uses: actions/setup-node@v4 | 41 | - uses: actions/setup-node@v4 |
| 42 | with: | 42 | with: |
| 43 | - node-version: 13 | ||
| 44 | registry-url: 'https://registry.npmjs.org' | 43 | registry-url: 'https://registry.npmjs.org' |
| 45 | 44 | ||
| 46 | - name: Display node version | 45 | - name: Display node version |
| @@ -24,7 +24,7 @@ jobs: | @@ -24,7 +24,7 @@ jobs: | ||
| 24 | strategy: | 24 | strategy: |
| 25 | fail-fast: false | 25 | fail-fast: false |
| 26 | matrix: | 26 | matrix: |
| 27 | - os: [ubuntu-latest, macos-latest] #, windows-2019] | 27 | + os: [ubuntu-latest] #, macos-latest] #, windows-2019] |
| 28 | python-version: ["3.8"] | 28 | python-version: ["3.8"] |
| 29 | 29 | ||
| 30 | steps: | 30 | steps: |
| @@ -32,49 +32,38 @@ jobs: | @@ -32,49 +32,38 @@ jobs: | ||
| 32 | with: | 32 | with: |
| 33 | fetch-depth: 0 | 33 | fetch-depth: 0 |
| 34 | 34 | ||
| 35 | - - name: ccache | ||
| 36 | - uses: hendrikmuhs/ccache-action@v1.2 | ||
| 37 | - with: | ||
| 38 | - key: ${{ matrix.os }}-Release-ON | 35 | + - name: Install emsdk |
| 36 | + uses: mymindstorm/setup-emsdk@v14 | ||
| 39 | 37 | ||
| 40 | - - name: Configure CMake | 38 | + - name: View emsdk version |
| 41 | shell: bash | 39 | shell: bash |
| 42 | run: | | 40 | run: | |
| 43 | - export CMAKE_CXX_COMPILER_LAUNCHER=ccache | ||
| 44 | - export PATH="/usr/lib/ccache:/usr/local/opt/ccache/libexec:$PATH" | ||
| 45 | - cmake --version | ||
| 46 | - | ||
| 47 | - mkdir build | ||
| 48 | - cd build | ||
| 49 | - cmake -D CMAKE_BUILD_TYPE=Release -D BUILD_SHARED_LIBS=ON -DCMAKE_INSTALL_PREFIX=./install .. | ||
| 50 | - cmake --build . --target install --config Release | ||
| 51 | - | ||
| 52 | - ls -lh install/lib | 41 | + emcc -v |
| 42 | + echo "--------------------" | ||
| 43 | + emcc --check | ||
| 53 | 44 | ||
| 54 | - name: Setup Python ${{ matrix.python-version }} | 45 | - name: Setup Python ${{ matrix.python-version }} |
| 55 | uses: actions/setup-python@v5 | 46 | uses: actions/setup-python@v5 |
| 56 | with: | 47 | with: |
| 57 | python-version: ${{ matrix.python-version }} | 48 | python-version: ${{ matrix.python-version }} |
| 58 | 49 | ||
| 59 | - - name: Copy files | 50 | + - uses: actions/setup-node@v4 |
| 51 | + with: | ||
| 52 | + registry-url: 'https://registry.npmjs.org' | ||
| 53 | + | ||
| 54 | + - name: Display node version | ||
| 55 | + shell: bash | ||
| 56 | + run: | | ||
| 57 | + node --version | ||
| 58 | + | ||
| 59 | + - name: Build nodejs package | ||
| 60 | shell: bash | 60 | shell: bash |
| 61 | + env: | ||
| 62 | + NODE_AUTH_TOKEN: ${{ secrets.NPM_TOKEN }} | ||
| 61 | run: | | 63 | run: | |
| 62 | - os=${{ matrix.os }} | ||
| 63 | - if [[ $os == 'ubuntu-latest' ]]; then | ||
| 64 | - mkdir -p scripts/nodejs/lib/linux-x64 | ||
| 65 | - dst=scripts/nodejs/lib/linux-x64 | ||
| 66 | - elif [[ $os == 'macos-latest' ]]; then | ||
| 67 | - mkdir -p scripts/nodejs/lib/osx-x64 | ||
| 68 | - dst=scripts/nodejs/lib/osx-x64 | ||
| 69 | - elif [[ $os == 'windows-2019' ]]; then | ||
| 70 | - mkdir -p scripts/nodejs/lib/win-x64 | ||
| 71 | - dst=scripts/nodejs/lib/win-x64 | ||
| 72 | - fi | ||
| 73 | - ls -lh build/install/lib/ | ||
| 74 | - | ||
| 75 | - rm -rf build/install/lib/pkgconfig | ||
| 76 | - | ||
| 77 | - cp -v build/install/lib/* $dst/ | 64 | + ./build-wasm-simd-nodejs.sh |
| 65 | + cp -v build-wasm-simd-nodejs/install/bin/wasm/nodejs/*.js ./scripts/nodejs/ | ||
| 66 | + cp -v build-wasm-simd-nodejs/install/bin/wasm/nodejs/*.wasm ./scripts/nodejs/ | ||
| 78 | 67 | ||
| 79 | - name: replace files | 68 | - name: replace files |
| 80 | shell: bash | 69 | shell: bash |
| @@ -89,17 +78,6 @@ jobs: | @@ -89,17 +78,6 @@ jobs: | ||
| 89 | git diff | 78 | git diff |
| 90 | cp *.js ../scripts/nodejs | 79 | cp *.js ../scripts/nodejs |
| 91 | 80 | ||
| 92 | - - uses: actions/setup-node@v4 | ||
| 93 | - with: | ||
| 94 | - node-version: 13 | ||
| 95 | - registry-url: 'https://registry.npmjs.org' | ||
| 96 | - | ||
| 97 | - - name: Display node version | ||
| 98 | - shell: bash | ||
| 99 | - run: | | ||
| 100 | - node --version | ||
| 101 | - npm --version | ||
| 102 | - | ||
| 103 | - name: Run tests | 81 | - name: Run tests |
| 104 | shell: bash | 82 | shell: bash |
| 105 | run: | | 83 | run: | |
| @@ -23,6 +23,7 @@ option(SHERPA_ONNX_ENABLE_GPU "Enable ONNX Runtime GPU support" OFF) | @@ -23,6 +23,7 @@ option(SHERPA_ONNX_ENABLE_GPU "Enable ONNX Runtime GPU support" OFF) | ||
| 23 | option(SHERPA_ONNX_ENABLE_WASM "Whether to enable WASM" OFF) | 23 | option(SHERPA_ONNX_ENABLE_WASM "Whether to enable WASM" OFF) |
| 24 | option(SHERPA_ONNX_ENABLE_WASM_TTS "Whether to enable WASM for TTS" OFF) | 24 | option(SHERPA_ONNX_ENABLE_WASM_TTS "Whether to enable WASM for TTS" OFF) |
| 25 | option(SHERPA_ONNX_ENABLE_WASM_ASR "Whether to enable WASM for ASR" OFF) | 25 | option(SHERPA_ONNX_ENABLE_WASM_ASR "Whether to enable WASM for ASR" OFF) |
| 26 | +option(SHERPA_ONNX_ENABLE_WASM_NODEJS "Whether to enable WASM for NodeJS" OFF) | ||
| 26 | option(SHERPA_ONNX_ENABLE_BINARY "Whether to build binaries" ON) | 27 | option(SHERPA_ONNX_ENABLE_BINARY "Whether to build binaries" ON) |
| 27 | option(SHERPA_ONNX_LINK_LIBSTDCPP_STATICALLY "True to link libstdc++ statically. Used only when BUILD_SHARED_LIBS is OFF on Linux" ON) | 28 | option(SHERPA_ONNX_LINK_LIBSTDCPP_STATICALLY "True to link libstdc++ statically. Used only when BUILD_SHARED_LIBS is OFF on Linux" ON) |
| 28 | 29 | ||
| @@ -108,6 +109,7 @@ message(STATUS "SHERPA_ONNX_ENABLE_GPU ${SHERPA_ONNX_ENABLE_GPU}") | @@ -108,6 +109,7 @@ message(STATUS "SHERPA_ONNX_ENABLE_GPU ${SHERPA_ONNX_ENABLE_GPU}") | ||
| 108 | message(STATUS "SHERPA_ONNX_ENABLE_WASM ${SHERPA_ONNX_ENABLE_WASM}") | 109 | message(STATUS "SHERPA_ONNX_ENABLE_WASM ${SHERPA_ONNX_ENABLE_WASM}") |
| 109 | message(STATUS "SHERPA_ONNX_ENABLE_WASM_TTS ${SHERPA_ONNX_ENABLE_WASM_TTS}") | 110 | message(STATUS "SHERPA_ONNX_ENABLE_WASM_TTS ${SHERPA_ONNX_ENABLE_WASM_TTS}") |
| 110 | message(STATUS "SHERPA_ONNX_ENABLE_WASM_ASR ${SHERPA_ONNX_ENABLE_WASM_ASR}") | 111 | message(STATUS "SHERPA_ONNX_ENABLE_WASM_ASR ${SHERPA_ONNX_ENABLE_WASM_ASR}") |
| 112 | +message(STATUS "SHERPA_ONNX_ENABLE_WASM_NODEJS ${SHERPA_ONNX_ENABLE_WASM_NODEJS}") | ||
| 111 | 113 | ||
| 112 | if(SHERPA_ONNX_ENABLE_WASM_TTS) | 114 | if(SHERPA_ONNX_ENABLE_WASM_TTS) |
| 113 | if(NOT SHERPA_ONNX_ENABLE_WASM) | 115 | if(NOT SHERPA_ONNX_ENABLE_WASM) |
| @@ -121,6 +123,12 @@ if(SHERPA_ONNX_ENABLE_WASM_ASR) | @@ -121,6 +123,12 @@ if(SHERPA_ONNX_ENABLE_WASM_ASR) | ||
| 121 | endif() | 123 | endif() |
| 122 | endif() | 124 | endif() |
| 123 | 125 | ||
| 126 | +if(SHERPA_ONNX_ENABLE_WASM_NODEJS) | ||
| 127 | + if(NOT SHERPA_ONNX_ENABLE_WASM) | ||
| 128 | + message(FATAL_ERROR "Please set SHERPA_ONNX_ENABLE_WASM to ON if you enable WASM for NodeJS") | ||
| 129 | + endif() | ||
| 130 | +endif() | ||
| 131 | + | ||
| 124 | if(SHERPA_ONNX_ENABLE_WASM) | 132 | if(SHERPA_ONNX_ENABLE_WASM) |
| 125 | add_definitions(-DSHERPA_ONNX_ENABLE_WASM=1) | 133 | add_definitions(-DSHERPA_ONNX_ENABLE_WASM=1) |
| 126 | endif() | 134 | endif() |
build-wasm-simd-nodejs.sh
0 → 100755
| 1 | +#!/usr/bin/env bash | ||
| 2 | +# Copyright (c) 2024 Xiaomi Corporation | ||
| 3 | +# | ||
| 4 | +# This script is to build sherpa-onnx for WebAssembly (NodeJS) | ||
| 5 | +# | ||
| 6 | +# Please use NodeJS >= 18 | ||
| 7 | + | ||
| 8 | +set -ex | ||
| 9 | + | ||
| 10 | +if [ x"$EMSCRIPTEN" == x"" ]; then | ||
| 11 | + if ! command -v emcc &> /dev/null; then | ||
| 12 | + echo "Please install emscripten first" | ||
| 13 | + echo "" | ||
| 14 | + echo "You can use the following commands to install it:" | ||
| 15 | + echo "" | ||
| 16 | + echo "git clone https://github.com/emscripten-core/emsdk.git" | ||
| 17 | + echo "cd emsdk" | ||
| 18 | + echo "git pull" | ||
| 19 | + echo "./emsdk install latest" | ||
| 20 | + echo "./emsdk activate latest" | ||
| 21 | + echo "source ./emsdk_env.sh" | ||
| 22 | + exit 1 | ||
| 23 | + else | ||
| 24 | + EMSCRIPTEN=$(dirname $(realpath $(which emcc))) | ||
| 25 | + fi | ||
| 26 | +fi | ||
| 27 | + | ||
| 28 | +export EMSCRIPTEN=$EMSCRIPTEN | ||
| 29 | +echo "EMSCRIPTEN: $EMSCRIPTEN" | ||
| 30 | +if [ ! -f $EMSCRIPTEN/cmake/Modules/Platform/Emscripten.cmake ]; then | ||
| 31 | + echo "Cannot find $EMSCRIPTEN/cmake/Modules/Platform/Emscripten.cmake" | ||
| 32 | + echo "Please make sure you have installed emsdk correctly" | ||
| 33 | + exit 1 | ||
| 34 | +fi | ||
| 35 | + | ||
| 36 | +mkdir -p build-wasm-simd-nodejs | ||
| 37 | +pushd build-wasm-simd-nodejs | ||
| 38 | + | ||
| 39 | +export SHERPA_ONNX_IS_USING_BUILD_WASM_SH=ON | ||
| 40 | + | ||
| 41 | +cmake \ | ||
| 42 | + -DCMAKE_INSTALL_PREFIX=./install \ | ||
| 43 | + -DCMAKE_BUILD_TYPE=Release \ | ||
| 44 | + -DCMAKE_TOOLCHAIN_FILE=$EMSCRIPTEN/cmake/Modules/Platform/Emscripten.cmake \ | ||
| 45 | + \ | ||
| 46 | + -DSHERPA_ONNX_ENABLE_PYTHON=OFF \ | ||
| 47 | + -DSHERPA_ONNX_ENABLE_TESTS=OFF \ | ||
| 48 | + -DSHERPA_ONNX_ENABLE_CHECK=OFF \ | ||
| 49 | + -DBUILD_SHARED_LIBS=OFF \ | ||
| 50 | + -DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \ | ||
| 51 | + -DSHERPA_ONNX_ENABLE_JNI=OFF \ | ||
| 52 | + -DSHERPA_ONNX_ENABLE_C_API=ON \ | ||
| 53 | + -DSHERPA_ONNX_ENABLE_WEBSOCKET=OFF \ | ||
| 54 | + -DSHERPA_ONNX_ENABLE_GPU=OFF \ | ||
| 55 | + -DSHERPA_ONNX_ENABLE_WASM=ON \ | ||
| 56 | + -DSHERPA_ONNX_ENABLE_WASM_NODEJS=ON \ | ||
| 57 | + -DSHERPA_ONNX_ENABLE_BINARY=OFF \ | ||
| 58 | + -DSHERPA_ONNX_LINK_LIBSTDCPP_STATICALLY=OFF \ | ||
| 59 | + .. | ||
| 60 | +make -j10 | ||
| 61 | +make install | ||
| 62 | + | ||
| 63 | +ls -lh install/bin/wasm/nodejs |
| @@ -2,38 +2,18 @@ | @@ -2,38 +2,18 @@ | ||
| 2 | 2 | ||
| 3 | This directory contains nodejs examples for [sherpa-onnx](https://github.com/k2-fsa/sherpa-onnx). | 3 | This directory contains nodejs examples for [sherpa-onnx](https://github.com/k2-fsa/sherpa-onnx). |
| 4 | 4 | ||
| 5 | -Before you continue, please first install the npm package `sherpa-onnx` by | 5 | +Before you continue, please first run |
| 6 | 6 | ||
| 7 | ```bash | 7 | ```bash |
| 8 | -npm install sherpa-onnx | 8 | +cd ./nodejs-examples |
| 9 | + | ||
| 10 | +npm i | ||
| 9 | ``` | 11 | ``` |
| 10 | 12 | ||
| 11 | In the following, we describe how to use [sherpa-onnx](https://github.com/k2-fsa/sherpa-onnx) | 13 | In the following, we describe how to use [sherpa-onnx](https://github.com/k2-fsa/sherpa-onnx) |
| 12 | for text-to-speech and speech-to-text. | 14 | for text-to-speech and speech-to-text. |
| 13 | 15 | ||
| 14 | -**Caution**: If you get the following error: | ||
| 15 | -``` | ||
| 16 | -/Users/fangjun/open-source/sherpa-onnx/nodejs-examples/node_modules/ffi-napi/lib/dynamic_library.js:67 | ||
| 17 | - if (match = err.match(/^(([^ \t()])+\.so([^ \t:()])*):([ \t])*/)) { | ||
| 18 | - ^ | ||
| 19 | - | ||
| 20 | -TypeError: Cannot read properties of null (reading 'match') | ||
| 21 | - at new DynamicLibrary (/Users/fangjun/open-source/sherpa-onnx/nodejs-examples/node_modules/ffi-napi/lib/dynamic_library.js:67:21) | ||
| 22 | - at Object.Library (/Users/fangjun/open-source/sherpa-onnx/nodejs-examples/node_modules/ffi-napi/lib/library.js:47:10) | ||
| 23 | - at Object.<anonymous> (/Users/fangjun/open-source/sherpa-onnx/nodejs-examples/node_modules/sherpa-onnx3/index.js:268:28) | ||
| 24 | - at Module._compile (node:internal/modules/cjs/loader:1376:14) | ||
| 25 | - at Module._extensions..js (node:internal/modules/cjs/loader:1435:10) | ||
| 26 | - at Module.load (node:internal/modules/cjs/loader:1207:32) | ||
| 27 | - at Module._load (node:internal/modules/cjs/loader:1023:12) | ||
| 28 | - at Module.require (node:internal/modules/cjs/loader:1235:19) | ||
| 29 | - at require (node:internal/modules/helpers:176:18) | ||
| 30 | - at Object.<anonymous> (/Users/fangjun/open-source/sherpa-onnx/nodejs-examples/test-offline-tts-zh.js:3:21) | ||
| 31 | -``` | ||
| 32 | - | ||
| 33 | -Please downgrade your node to version v13.14.0. See also | ||
| 34 | -https://github.com/node-ffi-napi/node-ffi-napi/issues/244 | ||
| 35 | -and | ||
| 36 | -https://github.com/node-ffi-napi/node-ffi-napi/issues/97 . | 16 | +Note: You need `Node >= 18`. |
| 37 | 17 | ||
| 38 | # Text-to-speech | 18 | # Text-to-speech |
| 39 | 19 | ||
| @@ -71,13 +51,7 @@ node ./test-offline-tts-zh.js | @@ -71,13 +51,7 @@ node ./test-offline-tts-zh.js | ||
| 71 | # Speech-to-text | 51 | # Speech-to-text |
| 72 | 52 | ||
| 73 | In the following, we demonstrate how to decode files and how to perform | 53 | In the following, we demonstrate how to decode files and how to perform |
| 74 | -speech recognition with a microphone with `nodejs`. We need to install two additional | ||
| 75 | -npm packages: | ||
| 76 | - | ||
| 77 | - | ||
| 78 | -```bash | ||
| 79 | -npm install wav naudiodon2 | ||
| 80 | -``` | 54 | +speech recognition with a microphone with `nodejs`. |
| 81 | 55 | ||
| 82 | ## ./test-offline-nemo-ctc.js | 56 | ## ./test-offline-nemo-ctc.js |
| 83 | 57 | ||
| @@ -200,60 +174,3 @@ wget -q https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherp | @@ -200,60 +174,3 @@ wget -q https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherp | ||
| 200 | tar xvf sherpa-onnx-streaming-zipformer-ctc-multi-zh-hans-2023-12-13.tar.bz2 | 174 | tar xvf sherpa-onnx-streaming-zipformer-ctc-multi-zh-hans-2023-12-13.tar.bz2 |
| 201 | node ./test-online-zipformer2-ctc.js | 175 | node ./test-online-zipformer2-ctc.js |
| 202 | ``` | 176 | ``` |
| 203 | - | ||
| 204 | -## ./test-vad-microphone-offline-paraformer.js | ||
| 205 | - | ||
| 206 | -[./test-vad-microphone-offline-paraformer.js](./test-vad-microphone-offline-paraformer.js) | ||
| 207 | -demonstrates how to use [silero-vad](https://github.com/snakers4/silero-vad) | ||
| 208 | -with non-streaming Paraformer for speech recognition from microphone. | ||
| 209 | - | ||
| 210 | -You can use the following command to run it: | ||
| 211 | - | ||
| 212 | -```bash | ||
| 213 | -wget -q https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx | ||
| 214 | -wget -q https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-paraformer-zh-2023-03-28.tar.bz2 | ||
| 215 | -tar xvf sherpa-onnx-paraformer-zh-2023-03-28.tar.bz2 | ||
| 216 | -node ./test-vad-microphone-offline-paraformer.js | ||
| 217 | -``` | ||
| 218 | - | ||
| 219 | -## ./test-vad-microphone-offline-transducer.js | ||
| 220 | - | ||
| 221 | -[./test-vad-microphone-offline-transducer.js](./test-vad-microphone-offline-transducer.js) | ||
| 222 | -demonstrates how to use [silero-vad](https://github.com/snakers4/silero-vad) | ||
| 223 | -with a non-streaming transducer model for speech recognition from microphone. | ||
| 224 | - | ||
| 225 | -You can use the following command to run it: | ||
| 226 | - | ||
| 227 | -```bash | ||
| 228 | -wget -q https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx | ||
| 229 | -wget -q https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-zipformer-en-2023-06-26.tar.bz2 | ||
| 230 | -tar xvf sherpa-onnx-zipformer-en-2023-06-26.tar.bz2 | ||
| 231 | -node ./test-vad-microphone-offline-transducer.js | ||
| 232 | -``` | ||
| 233 | - | ||
| 234 | -## ./test-vad-microphone-offline-whisper.js | ||
| 235 | - | ||
| 236 | -[./test-vad-microphone-offline-whisper.js](./test-vad-microphone-offline-whisper.js) | ||
| 237 | -demonstrates how to use [silero-vad](https://github.com/snakers4/silero-vad) | ||
| 238 | -with whisper for speech recognition from microphone. | ||
| 239 | - | ||
| 240 | -You can use the following command to run it: | ||
| 241 | - | ||
| 242 | -```bash | ||
| 243 | -wget -q https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx | ||
| 244 | -wget -q https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-whisper-tiny.en.tar.bz2 | ||
| 245 | -tar xvf sherpa-onnx-whisper-tiny.en.tar.bz2 | ||
| 246 | -node ./test-vad-microphone-offline-whisper.js | ||
| 247 | -``` | ||
| 248 | - | ||
| 249 | -## ./test-vad-microphone.js | ||
| 250 | - | ||
| 251 | -[./test-vad-microphone.js](./test-vad-microphone.js) | ||
| 252 | -demonstrates how to use [silero-vad](https://github.com/snakers4/silero-vad). | ||
| 253 | - | ||
| 254 | -You can use the following command to run it: | ||
| 255 | - | ||
| 256 | -```bash | ||
| 257 | -wget -q https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx | ||
| 258 | -node ./test-vad-microphone.js | ||
| 259 | -``` |
| 1 | -// Copyright (c) 2023 Xiaomi Corporation (authors: Fangjun Kuang) | 1 | +// Copyright (c) 2023-2024 Xiaomi Corporation (authors: Fangjun Kuang) |
| 2 | // | 2 | // |
| 3 | const fs = require('fs'); | 3 | const fs = require('fs'); |
| 4 | const {Readable} = require('stream'); | 4 | const {Readable} = require('stream'); |
| @@ -6,32 +6,58 @@ const wav = require('wav'); | @@ -6,32 +6,58 @@ const wav = require('wav'); | ||
| 6 | 6 | ||
| 7 | const sherpa_onnx = require('sherpa-onnx'); | 7 | const sherpa_onnx = require('sherpa-onnx'); |
| 8 | 8 | ||
| 9 | -function createRecognizer() { | ||
| 10 | - const featConfig = new sherpa_onnx.FeatureConfig(); | ||
| 11 | - featConfig.sampleRate = 16000; | ||
| 12 | - featConfig.featureDim = 80; | ||
| 13 | - | ||
| 14 | - // test online recognizer | ||
| 15 | - const nemoCtc = new sherpa_onnx.OfflineNemoEncDecCtcModelConfig(); | ||
| 16 | - nemoCtc.model = './sherpa-onnx-nemo-ctc-en-conformer-small/model.int8.onnx'; | ||
| 17 | - const tokens = './sherpa-onnx-nemo-ctc-en-conformer-small/tokens.txt'; | ||
| 18 | - | ||
| 19 | - const modelConfig = new sherpa_onnx.OfflineModelConfig(); | ||
| 20 | - modelConfig.nemoCtc = nemoCtc; | ||
| 21 | - modelConfig.tokens = tokens; | ||
| 22 | - modelConfig.modelType = 'nemo_ctc'; | ||
| 23 | - | ||
| 24 | - const recognizerConfig = new sherpa_onnx.OfflineRecognizerConfig(); | ||
| 25 | - recognizerConfig.featConfig = featConfig; | ||
| 26 | - recognizerConfig.modelConfig = modelConfig; | ||
| 27 | - recognizerConfig.decodingMethod = 'greedy_search'; | ||
| 28 | - | ||
| 29 | - const recognizer = new sherpa_onnx.OfflineRecognizer(recognizerConfig); | ||
| 30 | - return recognizer; | 9 | +function createOfflineRecognizer() { |
| 10 | + let featConfig = { | ||
| 11 | + sampleRate: 16000, | ||
| 12 | + featureDim: 80, | ||
| 13 | + }; | ||
| 14 | + | ||
| 15 | + let modelConfig = { | ||
| 16 | + transducer: { | ||
| 17 | + encoder: '', | ||
| 18 | + decoder: '', | ||
| 19 | + joiner: '', | ||
| 20 | + }, | ||
| 21 | + paraformer: { | ||
| 22 | + model: '', | ||
| 23 | + }, | ||
| 24 | + nemoCtc: { | ||
| 25 | + model: './sherpa-onnx-nemo-ctc-en-conformer-small/model.int8.onnx', | ||
| 26 | + }, | ||
| 27 | + whisper: { | ||
| 28 | + encoder: '', | ||
| 29 | + decoder: '', | ||
| 30 | + }, | ||
| 31 | + tdnn: { | ||
| 32 | + model: '', | ||
| 33 | + }, | ||
| 34 | + tokens: './sherpa-onnx-nemo-ctc-en-conformer-small/tokens.txt', | ||
| 35 | + numThreads: 1, | ||
| 36 | + debug: 0, | ||
| 37 | + provider: 'cpu', | ||
| 38 | + modelType: 'nemo_ctc', | ||
| 39 | + }; | ||
| 40 | + | ||
| 41 | + let lmConfig = { | ||
| 42 | + model: '', | ||
| 43 | + scale: 1.0, | ||
| 44 | + }; | ||
| 45 | + | ||
| 46 | + let config = { | ||
| 47 | + featConfig: featConfig, | ||
| 48 | + modelConfig: modelConfig, | ||
| 49 | + lmConfig: lmConfig, | ||
| 50 | + decodingMethod: 'greedy_search', | ||
| 51 | + maxActivePaths: 4, | ||
| 52 | + hotwordsFile: '', | ||
| 53 | + hotwordsScore: 1.5, | ||
| 54 | + }; | ||
| 55 | + | ||
| 56 | + return sherpa_onnx.createOfflineRecognizer(config); | ||
| 31 | } | 57 | } |
| 32 | 58 | ||
| 33 | -recognizer = createRecognizer(); | ||
| 34 | -stream = recognizer.createStream(); | 59 | +const recognizer = createOfflineRecognizer(); |
| 60 | +const stream = recognizer.createStream(); | ||
| 35 | 61 | ||
| 36 | const waveFilename = | 62 | const waveFilename = |
| 37 | './sherpa-onnx-nemo-ctc-en-conformer-small/test_wavs/0.wav'; | 63 | './sherpa-onnx-nemo-ctc-en-conformer-small/test_wavs/0.wav'; |
| @@ -72,8 +98,8 @@ fs.createReadStream(waveFilename, {highWaterMark: 4096}) | @@ -72,8 +98,8 @@ fs.createReadStream(waveFilename, {highWaterMark: 4096}) | ||
| 72 | 98 | ||
| 73 | stream.acceptWaveform(recognizer.config.featConfig.sampleRate, flattened); | 99 | stream.acceptWaveform(recognizer.config.featConfig.sampleRate, flattened); |
| 74 | recognizer.decode(stream); | 100 | recognizer.decode(stream); |
| 75 | - const r = recognizer.getResult(stream); | ||
| 76 | - console.log(r.text); | 101 | + const text = recognizer.getResult(stream); |
| 102 | + console.log(text); | ||
| 77 | 103 | ||
| 78 | stream.free(); | 104 | stream.free(); |
| 79 | recognizer.free(); | 105 | recognizer.free(); |
| @@ -6,32 +6,59 @@ const wav = require('wav'); | @@ -6,32 +6,59 @@ const wav = require('wav'); | ||
| 6 | 6 | ||
| 7 | const sherpa_onnx = require('sherpa-onnx'); | 7 | const sherpa_onnx = require('sherpa-onnx'); |
| 8 | 8 | ||
| 9 | -function createRecognizer() { | ||
| 10 | - const featConfig = new sherpa_onnx.FeatureConfig(); | ||
| 11 | - featConfig.sampleRate = 16000; | ||
| 12 | - featConfig.featureDim = 80; | ||
| 13 | - | ||
| 14 | - // test online recognizer | ||
| 15 | - const paraformer = new sherpa_onnx.OfflineParaformerModelConfig(); | ||
| 16 | - paraformer.model = './sherpa-onnx-paraformer-zh-2023-03-28/model.onnx'; | ||
| 17 | - const tokens = './sherpa-onnx-paraformer-zh-2023-03-28/tokens.txt'; | ||
| 18 | - | ||
| 19 | - const modelConfig = new sherpa_onnx.OfflineModelConfig(); | ||
| 20 | - modelConfig.paraformer = paraformer; | ||
| 21 | - modelConfig.tokens = tokens; | ||
| 22 | - modelConfig.modelType = 'paraformer'; | ||
| 23 | - | ||
| 24 | - const recognizerConfig = new sherpa_onnx.OfflineRecognizerConfig(); | ||
| 25 | - recognizerConfig.featConfig = featConfig; | ||
| 26 | - recognizerConfig.modelConfig = modelConfig; | ||
| 27 | - recognizerConfig.decodingMethod = 'greedy_search'; | ||
| 28 | - | ||
| 29 | - const recognizer = new sherpa_onnx.OfflineRecognizer(recognizerConfig); | ||
| 30 | - return recognizer; | 9 | +function createOfflineRecognizer() { |
| 10 | + let featConfig = { | ||
| 11 | + sampleRate: 16000, | ||
| 12 | + featureDim: 80, | ||
| 13 | + }; | ||
| 14 | + | ||
| 15 | + let modelConfig = { | ||
| 16 | + transducer: { | ||
| 17 | + encoder: '', | ||
| 18 | + decoder: '', | ||
| 19 | + joiner: '', | ||
| 20 | + }, | ||
| 21 | + paraformer: { | ||
| 22 | + model: './sherpa-onnx-paraformer-zh-2023-03-28/model.int8.onnx', | ||
| 23 | + }, | ||
| 24 | + nemoCtc: { | ||
| 25 | + model: '', | ||
| 26 | + }, | ||
| 27 | + whisper: { | ||
| 28 | + encoder: '', | ||
| 29 | + decoder: '', | ||
| 30 | + }, | ||
| 31 | + tdnn: { | ||
| 32 | + model: '', | ||
| 33 | + }, | ||
| 34 | + tokens: './sherpa-onnx-paraformer-zh-2023-03-28/tokens.txt', | ||
| 35 | + numThreads: 1, | ||
| 36 | + debug: 0, | ||
| 37 | + provider: 'cpu', | ||
| 38 | + modelType: 'paraformer', | ||
| 39 | + }; | ||
| 40 | + | ||
| 41 | + let lmConfig = { | ||
| 42 | + model: '', | ||
| 43 | + scale: 1.0, | ||
| 44 | + }; | ||
| 45 | + | ||
| 46 | + let config = { | ||
| 47 | + featConfig: featConfig, | ||
| 48 | + modelConfig: modelConfig, | ||
| 49 | + lmConfig: lmConfig, | ||
| 50 | + decodingMethod: 'greedy_search', | ||
| 51 | + maxActivePaths: 4, | ||
| 52 | + hotwordsFile: '', | ||
| 53 | + hotwordsScore: 1.5, | ||
| 54 | + }; | ||
| 55 | + | ||
| 56 | + return sherpa_onnx.createOfflineRecognizer(config); | ||
| 31 | } | 57 | } |
| 32 | 58 | ||
| 33 | -recognizer = createRecognizer(); | ||
| 34 | -stream = recognizer.createStream(); | 59 | + |
| 60 | +const recognizer = createOfflineRecognizer(); | ||
| 61 | +const stream = recognizer.createStream(); | ||
| 35 | 62 | ||
| 36 | const waveFilename = './sherpa-onnx-paraformer-zh-2023-03-28/test_wavs/0.wav'; | 63 | const waveFilename = './sherpa-onnx-paraformer-zh-2023-03-28/test_wavs/0.wav'; |
| 37 | 64 | ||
| @@ -71,8 +98,8 @@ fs.createReadStream(waveFilename, {'highWaterMark': 4096}) | @@ -71,8 +98,8 @@ fs.createReadStream(waveFilename, {'highWaterMark': 4096}) | ||
| 71 | 98 | ||
| 72 | stream.acceptWaveform(recognizer.config.featConfig.sampleRate, flattened); | 99 | stream.acceptWaveform(recognizer.config.featConfig.sampleRate, flattened); |
| 73 | recognizer.decode(stream); | 100 | recognizer.decode(stream); |
| 74 | - const r = recognizer.getResult(stream); | ||
| 75 | - console.log(r.text); | 101 | + const text = recognizer.getResult(stream); |
| 102 | + console.log(text); | ||
| 76 | 103 | ||
| 77 | stream.free(); | 104 | stream.free(); |
| 78 | recognizer.free(); | 105 | recognizer.free(); |
| @@ -6,37 +6,60 @@ const wav = require('wav'); | @@ -6,37 +6,60 @@ const wav = require('wav'); | ||
| 6 | 6 | ||
| 7 | const sherpa_onnx = require('sherpa-onnx'); | 7 | const sherpa_onnx = require('sherpa-onnx'); |
| 8 | 8 | ||
| 9 | -function createRecognizer() { | ||
| 10 | - const featConfig = new sherpa_onnx.FeatureConfig(); | ||
| 11 | - featConfig.sampleRate = 16000; | ||
| 12 | - featConfig.featureDim = 80; | ||
| 13 | - | ||
| 14 | - // test online recognizer | ||
| 15 | - const transducer = new sherpa_onnx.OfflineTransducerModelConfig(); | ||
| 16 | - transducer.encoder = | ||
| 17 | - './sherpa-onnx-zipformer-en-2023-06-26/encoder-epoch-99-avg-1.onnx'; | ||
| 18 | - transducer.decoder = | ||
| 19 | - './sherpa-onnx-zipformer-en-2023-06-26/decoder-epoch-99-avg-1.onnx'; | ||
| 20 | - transducer.joiner = | ||
| 21 | - './sherpa-onnx-zipformer-en-2023-06-26/joiner-epoch-99-avg-1.onnx'; | ||
| 22 | - const tokens = './sherpa-onnx-zipformer-en-2023-06-26/tokens.txt'; | ||
| 23 | - | ||
| 24 | - const modelConfig = new sherpa_onnx.OfflineModelConfig(); | ||
| 25 | - modelConfig.transducer = transducer; | ||
| 26 | - modelConfig.tokens = tokens; | ||
| 27 | - modelConfig.modelType = 'transducer'; | ||
| 28 | - | ||
| 29 | - const recognizerConfig = new sherpa_onnx.OfflineRecognizerConfig(); | ||
| 30 | - recognizerConfig.featConfig = featConfig; | ||
| 31 | - recognizerConfig.modelConfig = modelConfig; | ||
| 32 | - recognizerConfig.decodingMethod = 'greedy_search'; | ||
| 33 | - | ||
| 34 | - const recognizer = new sherpa_onnx.OfflineRecognizer(recognizerConfig); | ||
| 35 | - return recognizer; | 9 | +function createOfflineRecognizer() { |
| 10 | + let featConfig = { | ||
| 11 | + sampleRate: 16000, | ||
| 12 | + featureDim: 80, | ||
| 13 | + }; | ||
| 14 | + | ||
| 15 | + let modelConfig = { | ||
| 16 | + transducer: { | ||
| 17 | + encoder: | ||
| 18 | + './sherpa-onnx-zipformer-en-2023-06-26/encoder-epoch-99-avg-1.int8.onnx', | ||
| 19 | + decoder: | ||
| 20 | + './sherpa-onnx-zipformer-en-2023-06-26/decoder-epoch-99-avg-1.onnx', | ||
| 21 | + joiner: | ||
| 22 | + './sherpa-onnx-zipformer-en-2023-06-26/joiner-epoch-99-avg-1.int8.onnx', | ||
| 23 | + }, | ||
| 24 | + paraformer: { | ||
| 25 | + model: '', | ||
| 26 | + }, | ||
| 27 | + nemoCtc: { | ||
| 28 | + model: '', | ||
| 29 | + }, | ||
| 30 | + whisper: { | ||
| 31 | + encoder: '', | ||
| 32 | + decoder: '', | ||
| 33 | + }, | ||
| 34 | + tdnn: { | ||
| 35 | + model: '', | ||
| 36 | + }, | ||
| 37 | + tokens: './sherpa-onnx-zipformer-en-2023-06-26/tokens.txt', | ||
| 38 | + numThreads: 1, | ||
| 39 | + debug: 0, | ||
| 40 | + provider: 'cpu', | ||
| 41 | + modelType: 'transducer', | ||
| 42 | + }; | ||
| 43 | + | ||
| 44 | + let lmConfig = { | ||
| 45 | + model: '', | ||
| 46 | + scale: 1.0, | ||
| 47 | + }; | ||
| 48 | + | ||
| 49 | + let config = { | ||
| 50 | + featConfig: featConfig, | ||
| 51 | + modelConfig: modelConfig, | ||
| 52 | + lmConfig: lmConfig, | ||
| 53 | + decodingMethod: 'greedy_search', | ||
| 54 | + maxActivePaths: 4, | ||
| 55 | + hotwordsFile: '', | ||
| 56 | + hotwordsScore: 1.5, | ||
| 57 | + }; | ||
| 58 | + | ||
| 59 | + return sherpa_onnx.createOfflineRecognizer(config); | ||
| 36 | } | 60 | } |
| 37 | - | ||
| 38 | -recognizer = createRecognizer(); | ||
| 39 | -stream = recognizer.createStream(); | 61 | +const recognizer = createOfflineRecognizer(); |
| 62 | +const stream = recognizer.createStream(); | ||
| 40 | 63 | ||
| 41 | const waveFilename = './sherpa-onnx-zipformer-en-2023-06-26/test_wavs/0.wav'; | 64 | const waveFilename = './sherpa-onnx-zipformer-en-2023-06-26/test_wavs/0.wav'; |
| 42 | 65 | ||
| @@ -76,8 +99,8 @@ fs.createReadStream(waveFilename, {'highWaterMark': 4096}) | @@ -76,8 +99,8 @@ fs.createReadStream(waveFilename, {'highWaterMark': 4096}) | ||
| 76 | 99 | ||
| 77 | stream.acceptWaveform(recognizer.config.featConfig.sampleRate, flattened); | 100 | stream.acceptWaveform(recognizer.config.featConfig.sampleRate, flattened); |
| 78 | recognizer.decode(stream); | 101 | recognizer.decode(stream); |
| 79 | - const r = recognizer.getResult(stream); | ||
| 80 | - console.log(r.text); | 102 | + const text = recognizer.getResult(stream); |
| 103 | + console.log(text); | ||
| 81 | 104 | ||
| 82 | stream.free(); | 105 | stream.free(); |
| 83 | recognizer.free(); | 106 | recognizer.free(); |
| 1 | -// Copyright (c) 2023 Xiaomi Corporation (authors: Fangjun Kuang) | 1 | +// Copyright (c) 2023-2024 Xiaomi Corporation (authors: Fangjun Kuang) |
| 2 | 2 | ||
| 3 | const sherpa_onnx = require('sherpa-onnx'); | 3 | const sherpa_onnx = require('sherpa-onnx'); |
| 4 | 4 | ||
| 5 | function createOfflineTts() { | 5 | function createOfflineTts() { |
| 6 | - const vits = new sherpa_onnx.OfflineTtsVitsModelConfig(); | ||
| 7 | - vits.model = 'vits-piper-en_US-amy-low/en_US-amy-low.onnx' | ||
| 8 | - vits.tokens = './vits-piper-en_US-amy-low/tokens.txt'; | ||
| 9 | - vits.dataDir = './vits-piper-en_US-amy-low/espeak-ng-data' | 6 | + let offlineTtsVitsModelConfig = { |
| 7 | + model: './vits-piper-en_US-amy-low/en_US-amy-low.onnx', | ||
| 8 | + lexicon: '', | ||
| 9 | + tokens: './vits-piper-en_US-amy-low/tokens.txt', | ||
| 10 | + dataDir: './vits-piper-en_US-amy-low/espeak-ng-data', | ||
| 11 | + noiseScale: 0.667, | ||
| 12 | + noiseScaleW: 0.8, | ||
| 13 | + lengthScale: 1.0, | ||
| 14 | + }; | ||
| 15 | + let offlineTtsModelConfig = { | ||
| 16 | + offlineTtsVitsModelConfig: offlineTtsVitsModelConfig, | ||
| 17 | + numThreads: 1, | ||
| 18 | + debug: 1, | ||
| 19 | + provider: 'cpu', | ||
| 20 | + }; | ||
| 10 | 21 | ||
| 11 | - const modelConfig = new sherpa_onnx.OfflineTtsModelConfig(); | ||
| 12 | - modelConfig.vits = vits; | 22 | + let offlineTtsConfig = { |
| 23 | + offlineTtsModelConfig: offlineTtsModelConfig, | ||
| 24 | + ruleFsts: '', | ||
| 25 | + maxNumSentences: 1, | ||
| 26 | + }; | ||
| 13 | 27 | ||
| 14 | - const config = new sherpa_onnx.OfflineTtsConfig(); | ||
| 15 | - config.model = modelConfig; | ||
| 16 | - | ||
| 17 | - return new sherpa_onnx.OfflineTts(config); | 28 | + return sherpa_onnx.createOfflineTts(offlineTtsConfig); |
| 18 | } | 29 | } |
| 19 | 30 | ||
| 31 | + | ||
| 20 | const tts = createOfflineTts(); | 32 | const tts = createOfflineTts(); |
| 21 | const speakerId = 0; | 33 | const speakerId = 0; |
| 22 | const speed = 1.0; | 34 | const speed = 1.0; |
| 23 | -const audio = tts.generate( | ||
| 24 | - '“Today as always, men fall into two groups: slaves and free men. Whoever does not have two-thirds of his day for himself, is a slave, whatever he may be: a statesman, a businessman, an official, or a scholar.”', | ||
| 25 | - speakerId, speed); | ||
| 26 | -audio.save('./test-en.wav'); | 35 | +const audio = tts.generate({ |
| 36 | + text: | ||
| 37 | + '“Today as always, men fall into two groups: slaves and free men. Whoever does not have two-thirds of his day for himself, is a slave, whatever he may be: a statesman, a businessman, an official, or a scholar.”', | ||
| 38 | + sid: speakerId, | ||
| 39 | + speed: speed | ||
| 40 | +}); | ||
| 41 | + | ||
| 42 | +tts.save('./test-en.wav', audio); | ||
| 27 | console.log('Saved to test-en.wav successfully.'); | 43 | console.log('Saved to test-en.wav successfully.'); |
| 44 | + | ||
| 28 | tts.free(); | 45 | tts.free(); |
| @@ -3,25 +3,37 @@ | @@ -3,25 +3,37 @@ | ||
| 3 | const sherpa_onnx = require('sherpa-onnx'); | 3 | const sherpa_onnx = require('sherpa-onnx'); |
| 4 | 4 | ||
| 5 | function createOfflineTts() { | 5 | function createOfflineTts() { |
| 6 | - const vits = new sherpa_onnx.OfflineTtsVitsModelConfig(); | ||
| 7 | - vits.model = './vits-zh-aishell3/vits-aishell3.onnx'; | ||
| 8 | - vits.lexicon = './vits-zh-aishell3/lexicon.txt'; | ||
| 9 | - vits.tokens = './vits-zh-aishell3/tokens.txt'; | 6 | + let offlineTtsVitsModelConfig = { |
| 7 | + model: './vits-zh-aishell3/vits-aishell3.onnx', | ||
| 8 | + lexicon: './vits-zh-aishell3/lexicon.txt', | ||
| 9 | + tokens: './vits-zh-aishell3/tokens.txt', | ||
| 10 | + dataDir: '', | ||
| 11 | + noiseScale: 0.667, | ||
| 12 | + noiseScaleW: 0.8, | ||
| 13 | + lengthScale: 1.0, | ||
| 14 | + }; | ||
| 15 | + let offlineTtsModelConfig = { | ||
| 16 | + offlineTtsVitsModelConfig: offlineTtsVitsModelConfig, | ||
| 17 | + numThreads: 1, | ||
| 18 | + debug: 1, | ||
| 19 | + provider: 'cpu', | ||
| 20 | + }; | ||
| 10 | 21 | ||
| 11 | - const modelConfig = new sherpa_onnx.OfflineTtsModelConfig(); | ||
| 12 | - modelConfig.vits = vits; | 22 | + let offlineTtsConfig = { |
| 23 | + offlineTtsModelConfig: offlineTtsModelConfig, | ||
| 24 | + ruleFsts: './vits-zh-aishell3/rule.fst', | ||
| 25 | + maxNumSentences: 1, | ||
| 26 | + }; | ||
| 13 | 27 | ||
| 14 | - const config = new sherpa_onnx.OfflineTtsConfig(); | ||
| 15 | - config.model = modelConfig; | ||
| 16 | - config.ruleFsts = './vits-zh-aishell3/rule.fst'; | ||
| 17 | - | ||
| 18 | - return new sherpa_onnx.OfflineTts(config); | 28 | + return sherpa_onnx.createOfflineTts(offlineTtsConfig); |
| 19 | } | 29 | } |
| 20 | 30 | ||
| 31 | + | ||
| 21 | const tts = createOfflineTts(); | 32 | const tts = createOfflineTts(); |
| 22 | const speakerId = 66; | 33 | const speakerId = 66; |
| 23 | const speed = 1.0; | 34 | const speed = 1.0; |
| 24 | -const audio = tts.generate('3年前中国总人口是1411778724人', speakerId, speed); | ||
| 25 | -audio.save('./test-zh.wav'); | 35 | +const audio = tts.generate( |
| 36 | + {text: '3年前中国总人口是1411778724人', sid: speakerId, speed: speed}); | ||
| 37 | +tts.save('./test-zh.wav', audio); | ||
| 26 | console.log('Saved to test-zh.wav successfully.'); | 38 | console.log('Saved to test-zh.wav successfully.'); |
| 27 | tts.free(); | 39 | tts.free(); |
| @@ -6,32 +6,58 @@ const wav = require('wav'); | @@ -6,32 +6,58 @@ const wav = require('wav'); | ||
| 6 | 6 | ||
| 7 | const sherpa_onnx = require('sherpa-onnx'); | 7 | const sherpa_onnx = require('sherpa-onnx'); |
| 8 | 8 | ||
| 9 | -function createRecognizer() { | ||
| 10 | - const featConfig = new sherpa_onnx.FeatureConfig(); | ||
| 11 | - featConfig.sampleRate = 16000; | ||
| 12 | - featConfig.featureDim = 80; | ||
| 13 | - | ||
| 14 | - // test online recognizer | ||
| 15 | - const whisper = new sherpa_onnx.OfflineWhisperModelConfig(); | ||
| 16 | - whisper.encoder = './sherpa-onnx-whisper-tiny.en/tiny.en-encoder.int8.onnx'; | ||
| 17 | - whisper.decoder = './sherpa-onnx-whisper-tiny.en/tiny.en-decoder.int8.onnx'; | ||
| 18 | - const tokens = './sherpa-onnx-whisper-tiny.en/tiny.en-tokens.txt'; | ||
| 19 | - | ||
| 20 | - const modelConfig = new sherpa_onnx.OfflineModelConfig(); | ||
| 21 | - modelConfig.whisper = whisper; | ||
| 22 | - modelConfig.tokens = tokens; | ||
| 23 | - modelConfig.modelType = 'whisper'; | ||
| 24 | - | ||
| 25 | - const recognizerConfig = new sherpa_onnx.OfflineRecognizerConfig(); | ||
| 26 | - recognizerConfig.featConfig = featConfig; | ||
| 27 | - recognizerConfig.modelConfig = modelConfig; | ||
| 28 | - recognizerConfig.decodingMethod = 'greedy_search'; | ||
| 29 | - | ||
| 30 | - const recognizer = new sherpa_onnx.OfflineRecognizer(recognizerConfig); | ||
| 31 | - return recognizer; | 9 | +function createOfflineRecognizer() { |
| 10 | + let featConfig = { | ||
| 11 | + sampleRate: 16000, | ||
| 12 | + featureDim: 80, | ||
| 13 | + }; | ||
| 14 | + | ||
| 15 | + let modelConfig = { | ||
| 16 | + transducer: { | ||
| 17 | + encoder: '', | ||
| 18 | + decoder: '', | ||
| 19 | + joiner: '', | ||
| 20 | + }, | ||
| 21 | + paraformer: { | ||
| 22 | + model: '', | ||
| 23 | + }, | ||
| 24 | + nemoCtc: { | ||
| 25 | + model: '', | ||
| 26 | + }, | ||
| 27 | + whisper: { | ||
| 28 | + encoder: './sherpa-onnx-whisper-tiny.en/tiny.en-encoder.int8.onnx', | ||
| 29 | + decoder: './sherpa-onnx-whisper-tiny.en/tiny.en-decoder.int8.onnx', | ||
| 30 | + }, | ||
| 31 | + tdnn: { | ||
| 32 | + model: '', | ||
| 33 | + }, | ||
| 34 | + tokens: './sherpa-onnx-whisper-tiny.en/tiny.en-tokens.txt', | ||
| 35 | + numThreads: 1, | ||
| 36 | + debug: 0, | ||
| 37 | + provider: 'cpu', | ||
| 38 | + modelType: 'whisper', | ||
| 39 | + }; | ||
| 40 | + | ||
| 41 | + let lmConfig = { | ||
| 42 | + model: '', | ||
| 43 | + scale: 1.0, | ||
| 44 | + }; | ||
| 45 | + | ||
| 46 | + let config = { | ||
| 47 | + featConfig: featConfig, | ||
| 48 | + modelConfig: modelConfig, | ||
| 49 | + lmConfig: lmConfig, | ||
| 50 | + decodingMethod: 'greedy_search', | ||
| 51 | + maxActivePaths: 4, | ||
| 52 | + hotwordsFile: '', | ||
| 53 | + hotwordsScore: 1.5, | ||
| 54 | + }; | ||
| 55 | + | ||
| 56 | + return sherpa_onnx.createOfflineRecognizer(config); | ||
| 32 | } | 57 | } |
| 33 | 58 | ||
| 34 | -recognizer = createRecognizer(); | 59 | + |
| 60 | +recognizer = createOfflineRecognizer(); | ||
| 35 | stream = recognizer.createStream(); | 61 | stream = recognizer.createStream(); |
| 36 | 62 | ||
| 37 | const waveFilename = './sherpa-onnx-whisper-tiny.en/test_wavs/0.wav'; | 63 | const waveFilename = './sherpa-onnx-whisper-tiny.en/test_wavs/0.wav'; |
| @@ -72,8 +98,8 @@ fs.createReadStream(waveFilename, {'highWaterMark': 4096}) | @@ -72,8 +98,8 @@ fs.createReadStream(waveFilename, {'highWaterMark': 4096}) | ||
| 72 | 98 | ||
| 73 | stream.acceptWaveform(recognizer.config.featConfig.sampleRate, flattened); | 99 | stream.acceptWaveform(recognizer.config.featConfig.sampleRate, flattened); |
| 74 | recognizer.decode(stream); | 100 | recognizer.decode(stream); |
| 75 | - const r = recognizer.getResult(stream); | ||
| 76 | - console.log(r.text); | 101 | + const text = recognizer.getResult(stream); |
| 102 | + console.log(text); | ||
| 77 | 103 | ||
| 78 | stream.free(); | 104 | stream.free(); |
| 79 | recognizer.free(); | 105 | recognizer.free(); |
| @@ -5,37 +5,58 @@ console.log(portAudio.getDevices()); | @@ -5,37 +5,58 @@ console.log(portAudio.getDevices()); | ||
| 5 | 5 | ||
| 6 | const sherpa_onnx = require('sherpa-onnx'); | 6 | const sherpa_onnx = require('sherpa-onnx'); |
| 7 | 7 | ||
| 8 | -function createRecognizer() { | ||
| 9 | - const featConfig = new sherpa_onnx.FeatureConfig(); | ||
| 10 | - featConfig.sampleRate = 16000; | ||
| 11 | - featConfig.featureDim = 80; | ||
| 12 | - | ||
| 13 | - const paraformer = new sherpa_onnx.OnlineParaformerModelConfig(); | ||
| 14 | - paraformer.encoder = | ||
| 15 | - './sherpa-onnx-streaming-paraformer-bilingual-zh-en/encoder.int8.onnx'; | ||
| 16 | - paraformer.decoder = | ||
| 17 | - './sherpa-onnx-streaming-paraformer-bilingual-zh-en/decoder.int8.onnx'; | ||
| 18 | - const tokens = | ||
| 19 | - './sherpa-onnx-streaming-paraformer-bilingual-zh-en/tokens.txt'; | ||
| 20 | - | ||
| 21 | - const modelConfig = new sherpa_onnx.OnlineModelConfig(); | ||
| 22 | - modelConfig.paraformer = paraformer; | ||
| 23 | - modelConfig.tokens = tokens; | ||
| 24 | - modelConfig.modelType = 'paraformer'; | ||
| 25 | - | ||
| 26 | - const recognizerConfig = new sherpa_onnx.OnlineRecognizerConfig(); | ||
| 27 | - recognizerConfig.featConfig = featConfig; | ||
| 28 | - recognizerConfig.modelConfig = modelConfig; | ||
| 29 | - recognizerConfig.decodingMethod = 'greedy_search'; | ||
| 30 | - recognizerConfig.enableEndpoint = 1; | ||
| 31 | - | ||
| 32 | - const recognizer = new sherpa_onnx.OnlineRecognizer(recognizerConfig); | ||
| 33 | - return recognizer; | 8 | +function createOnlineRecognizer() { |
| 9 | + let onlineTransducerModelConfig = { | ||
| 10 | + encoder: '', | ||
| 11 | + decoder: '', | ||
| 12 | + joiner: '', | ||
| 13 | + }; | ||
| 14 | + | ||
| 15 | + let onlineParaformerModelConfig = { | ||
| 16 | + encoder: | ||
| 17 | + './sherpa-onnx-streaming-paraformer-bilingual-zh-en/encoder.int8.onnx', | ||
| 18 | + decoder: | ||
| 19 | + './sherpa-onnx-streaming-paraformer-bilingual-zh-en/decoder.int8.onnx', | ||
| 20 | + }; | ||
| 21 | + | ||
| 22 | + let onlineZipformer2CtcModelConfig = { | ||
| 23 | + model: '', | ||
| 24 | + }; | ||
| 25 | + | ||
| 26 | + let onlineModelConfig = { | ||
| 27 | + transducer: onlineTransducerModelConfig, | ||
| 28 | + paraformer: onlineParaformerModelConfig, | ||
| 29 | + zipformer2Ctc: onlineZipformer2CtcModelConfig, | ||
| 30 | + tokens: './sherpa-onnx-streaming-paraformer-bilingual-zh-en/tokens.txt', | ||
| 31 | + numThreads: 1, | ||
| 32 | + provider: 'cpu', | ||
| 33 | + debug: 1, | ||
| 34 | + modelType: 'paraformer', | ||
| 35 | + }; | ||
| 36 | + | ||
| 37 | + let featureConfig = { | ||
| 38 | + sampleRate: 16000, | ||
| 39 | + featureDim: 80, | ||
| 40 | + }; | ||
| 41 | + | ||
| 42 | + let recognizerConfig = { | ||
| 43 | + featConfig: featureConfig, | ||
| 44 | + modelConfig: onlineModelConfig, | ||
| 45 | + decodingMethod: 'greedy_search', | ||
| 46 | + maxActivePaths: 4, | ||
| 47 | + enableEndpoint: 1, | ||
| 48 | + rule1MinTrailingSilence: 2.4, | ||
| 49 | + rule2MinTrailingSilence: 1.2, | ||
| 50 | + rule3MinUtteranceLength: 20, | ||
| 51 | + hotwordsFile: '', | ||
| 52 | + hotwordsScore: 1.5, | ||
| 53 | + }; | ||
| 54 | + | ||
| 55 | + return sherpa_onnx.createOnlineRecognizer(recognizerConfig); | ||
| 34 | } | 56 | } |
| 35 | -recognizer = createRecognizer(); | ||
| 36 | -stream = recognizer.createStream(); | ||
| 37 | 57 | ||
| 38 | -display = new sherpa_onnx.Display(50); | 58 | +const recognizer = createOnlineRecognizer(); |
| 59 | +const stream = recognizer.createStream(); | ||
| 39 | 60 | ||
| 40 | let lastText = ''; | 61 | let lastText = ''; |
| 41 | let segmentIndex = 0; | 62 | let segmentIndex = 0; |
| @@ -61,11 +82,11 @@ ai.on('data', data => { | @@ -61,11 +82,11 @@ ai.on('data', data => { | ||
| 61 | } | 82 | } |
| 62 | 83 | ||
| 63 | const isEndpoint = recognizer.isEndpoint(stream); | 84 | const isEndpoint = recognizer.isEndpoint(stream); |
| 64 | - const text = recognizer.getResult(stream).text; | 85 | + const text = recognizer.getResult(stream); |
| 65 | 86 | ||
| 66 | if (text.length > 0 && lastText != text) { | 87 | if (text.length > 0 && lastText != text) { |
| 67 | lastText = text; | 88 | lastText = text; |
| 68 | - display.print(segmentIndex, lastText); | 89 | + console.log(segmentIndex, lastText); |
| 69 | } | 90 | } |
| 70 | if (isEndpoint) { | 91 | if (isEndpoint) { |
| 71 | if (text.length > 0) { | 92 | if (text.length > 0) { |
| @@ -6,34 +6,58 @@ const wav = require('wav'); | @@ -6,34 +6,58 @@ const wav = require('wav'); | ||
| 6 | 6 | ||
| 7 | const sherpa_onnx = require('sherpa-onnx'); | 7 | const sherpa_onnx = require('sherpa-onnx'); |
| 8 | 8 | ||
| 9 | -function createRecognizer() { | ||
| 10 | - const featConfig = new sherpa_onnx.FeatureConfig(); | ||
| 11 | - featConfig.sampleRate = 16000; | ||
| 12 | - featConfig.featureDim = 80; | ||
| 13 | - | ||
| 14 | - const paraformer = new sherpa_onnx.OnlineParaformerModelConfig(); | ||
| 15 | - paraformer.encoder = | ||
| 16 | - './sherpa-onnx-streaming-paraformer-bilingual-zh-en/encoder.onnx'; | ||
| 17 | - paraformer.decoder = | ||
| 18 | - './sherpa-onnx-streaming-paraformer-bilingual-zh-en/decoder.onnx'; | ||
| 19 | - const tokens = | ||
| 20 | - './sherpa-onnx-streaming-paraformer-bilingual-zh-en/tokens.txt'; | ||
| 21 | - | ||
| 22 | - const modelConfig = new sherpa_onnx.OnlineModelConfig(); | ||
| 23 | - modelConfig.paraformer = paraformer; | ||
| 24 | - modelConfig.tokens = tokens; | ||
| 25 | - modelConfig.modelType = 'paraformer'; | ||
| 26 | - | ||
| 27 | - const recognizerConfig = new sherpa_onnx.OnlineRecognizerConfig(); | ||
| 28 | - recognizerConfig.featConfig = featConfig; | ||
| 29 | - recognizerConfig.modelConfig = modelConfig; | ||
| 30 | - recognizerConfig.decodingMethod = 'greedy_search'; | ||
| 31 | - | ||
| 32 | - const recognizer = new sherpa_onnx.OnlineRecognizer(recognizerConfig); | ||
| 33 | - return recognizer; | 9 | +function createOnlineRecognizer() { |
| 10 | + let onlineTransducerModelConfig = { | ||
| 11 | + encoder: '', | ||
| 12 | + decoder: '', | ||
| 13 | + joiner: '', | ||
| 14 | + }; | ||
| 15 | + | ||
| 16 | + let onlineParaformerModelConfig = { | ||
| 17 | + encoder: | ||
| 18 | + './sherpa-onnx-streaming-paraformer-bilingual-zh-en/encoder.int8.onnx', | ||
| 19 | + decoder: | ||
| 20 | + './sherpa-onnx-streaming-paraformer-bilingual-zh-en/decoder.int8.onnx', | ||
| 21 | + }; | ||
| 22 | + | ||
| 23 | + let onlineZipformer2CtcModelConfig = { | ||
| 24 | + model: '', | ||
| 25 | + }; | ||
| 26 | + | ||
| 27 | + let onlineModelConfig = { | ||
| 28 | + transducer: onlineTransducerModelConfig, | ||
| 29 | + paraformer: onlineParaformerModelConfig, | ||
| 30 | + zipformer2Ctc: onlineZipformer2CtcModelConfig, | ||
| 31 | + tokens: './sherpa-onnx-streaming-paraformer-bilingual-zh-en/tokens.txt', | ||
| 32 | + numThreads: 1, | ||
| 33 | + provider: 'cpu', | ||
| 34 | + debug: 1, | ||
| 35 | + modelType: 'paraformer', | ||
| 36 | + }; | ||
| 37 | + | ||
| 38 | + let featureConfig = { | ||
| 39 | + sampleRate: 16000, | ||
| 40 | + featureDim: 80, | ||
| 41 | + }; | ||
| 42 | + | ||
| 43 | + let recognizerConfig = { | ||
| 44 | + featConfig: featureConfig, | ||
| 45 | + modelConfig: onlineModelConfig, | ||
| 46 | + decodingMethod: 'greedy_search', | ||
| 47 | + maxActivePaths: 4, | ||
| 48 | + enableEndpoint: 1, | ||
| 49 | + rule1MinTrailingSilence: 2.4, | ||
| 50 | + rule2MinTrailingSilence: 1.2, | ||
| 51 | + rule3MinUtteranceLength: 20, | ||
| 52 | + hotwordsFile: '', | ||
| 53 | + hotwordsScore: 1.5, | ||
| 54 | + }; | ||
| 55 | + | ||
| 56 | + return sherpa_onnx.createOnlineRecognizer(recognizerConfig); | ||
| 34 | } | 57 | } |
| 35 | -recognizer = createRecognizer(); | ||
| 36 | -stream = recognizer.createStream(); | 58 | + |
| 59 | +const recognizer = createOnlineRecognizer(); | ||
| 60 | +const stream = recognizer.createStream(); | ||
| 37 | 61 | ||
| 38 | const waveFilename = | 62 | const waveFilename = |
| 39 | './sherpa-onnx-streaming-paraformer-bilingual-zh-en/test_wavs/0.wav'; | 63 | './sherpa-onnx-streaming-paraformer-bilingual-zh-en/test_wavs/0.wav'; |
| @@ -47,8 +71,8 @@ function decode(samples) { | @@ -47,8 +71,8 @@ function decode(samples) { | ||
| 47 | while (recognizer.isReady(stream)) { | 71 | while (recognizer.isReady(stream)) { |
| 48 | recognizer.decode(stream); | 72 | recognizer.decode(stream); |
| 49 | } | 73 | } |
| 50 | - const r = recognizer.getResult(stream); | ||
| 51 | - console.log(r.text); | 74 | + const text = recognizer.getResult(stream); |
| 75 | + console.log(text); | ||
| 52 | } | 76 | } |
| 53 | 77 | ||
| 54 | reader.on('format', ({audioFormat, bitDepth, channels, sampleRate}) => { | 78 | reader.on('format', ({audioFormat, bitDepth, channels, sampleRate}) => { |
| @@ -5,39 +5,60 @@ const portAudio = require('naudiodon2'); | @@ -5,39 +5,60 @@ const portAudio = require('naudiodon2'); | ||
| 5 | 5 | ||
| 6 | const sherpa_onnx = require('sherpa-onnx'); | 6 | const sherpa_onnx = require('sherpa-onnx'); |
| 7 | 7 | ||
| 8 | -function createRecognizer() { | ||
| 9 | - const featConfig = new sherpa_onnx.FeatureConfig(); | ||
| 10 | - featConfig.sampleRate = 16000; | ||
| 11 | - featConfig.featureDim = 80; | ||
| 12 | - | ||
| 13 | - // test online recognizer | ||
| 14 | - const transducer = new sherpa_onnx.OnlineTransducerModelConfig(); | ||
| 15 | - transducer.encoder = | ||
| 16 | - './sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/encoder-epoch-99-avg-1.int8.onnx'; | ||
| 17 | - transducer.decoder = | ||
| 18 | - './sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/decoder-epoch-99-avg-1.onnx'; | ||
| 19 | - transducer.joiner = | ||
| 20 | - './sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/joiner-epoch-99-avg-1.onnx'; | ||
| 21 | - const tokens = | ||
| 22 | - './sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/tokens.txt'; | ||
| 23 | - | ||
| 24 | - const modelConfig = new sherpa_onnx.OnlineModelConfig(); | ||
| 25 | - modelConfig.transducer = transducer; | ||
| 26 | - modelConfig.tokens = tokens; | ||
| 27 | - modelConfig.modelType = 'zipformer'; | ||
| 28 | - | ||
| 29 | - const recognizerConfig = new sherpa_onnx.OnlineRecognizerConfig(); | ||
| 30 | - recognizerConfig.featConfig = featConfig; | ||
| 31 | - recognizerConfig.modelConfig = modelConfig; | ||
| 32 | - recognizerConfig.decodingMethod = 'greedy_search'; | ||
| 33 | - recognizerConfig.enableEndpoint = 1; | ||
| 34 | - | ||
| 35 | - const recognizer = new sherpa_onnx.OnlineRecognizer(recognizerConfig); | ||
| 36 | - return recognizer; | 8 | +function createOnlineRecognizer() { |
| 9 | + let onlineTransducerModelConfig = { | ||
| 10 | + encoder: | ||
| 11 | + './sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/encoder-epoch-99-avg-1.int8.onnx', | ||
| 12 | + decoder: | ||
| 13 | + './sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/decoder-epoch-99-avg-1.onnx', | ||
| 14 | + joiner: | ||
| 15 | + './sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/joiner-epoch-99-avg-1.int8.onnx', | ||
| 16 | + }; | ||
| 17 | + | ||
| 18 | + let onlineParaformerModelConfig = { | ||
| 19 | + encoder: '', | ||
| 20 | + decoder: '', | ||
| 21 | + }; | ||
| 22 | + | ||
| 23 | + let onlineZipformer2CtcModelConfig = { | ||
| 24 | + model: '', | ||
| 25 | + }; | ||
| 26 | + | ||
| 27 | + let onlineModelConfig = { | ||
| 28 | + transducer: onlineTransducerModelConfig, | ||
| 29 | + paraformer: onlineParaformerModelConfig, | ||
| 30 | + zipformer2Ctc: onlineZipformer2CtcModelConfig, | ||
| 31 | + tokens: | ||
| 32 | + './sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/tokens.txt', | ||
| 33 | + numThreads: 1, | ||
| 34 | + provider: 'cpu', | ||
| 35 | + debug: 1, | ||
| 36 | + modelType: 'zipformer', | ||
| 37 | + }; | ||
| 38 | + | ||
| 39 | + let featureConfig = { | ||
| 40 | + sampleRate: 16000, | ||
| 41 | + featureDim: 80, | ||
| 42 | + }; | ||
| 43 | + | ||
| 44 | + let recognizerConfig = { | ||
| 45 | + featConfig: featureConfig, | ||
| 46 | + modelConfig: onlineModelConfig, | ||
| 47 | + decodingMethod: 'greedy_search', | ||
| 48 | + maxActivePaths: 4, | ||
| 49 | + enableEndpoint: 1, | ||
| 50 | + rule1MinTrailingSilence: 2.4, | ||
| 51 | + rule2MinTrailingSilence: 1.2, | ||
| 52 | + rule3MinUtteranceLength: 20, | ||
| 53 | + hotwordsFile: '', | ||
| 54 | + hotwordsScore: 1.5, | ||
| 55 | + }; | ||
| 56 | + | ||
| 57 | + return sherpa_onnx.createOnlineRecognizer(recognizerConfig); | ||
| 37 | } | 58 | } |
| 38 | -recognizer = createRecognizer(); | ||
| 39 | -stream = recognizer.createStream(); | ||
| 40 | -display = new sherpa_onnx.Display(50); | 59 | + |
| 60 | +const recognizer = createOnlineRecognizer(); | ||
| 61 | +const stream = recognizer.createStream(); | ||
| 41 | 62 | ||
| 42 | let lastText = ''; | 63 | let lastText = ''; |
| 43 | let segmentIndex = 0; | 64 | let segmentIndex = 0; |
| @@ -63,11 +84,11 @@ ai.on('data', data => { | @@ -63,11 +84,11 @@ ai.on('data', data => { | ||
| 63 | } | 84 | } |
| 64 | 85 | ||
| 65 | const isEndpoint = recognizer.isEndpoint(stream); | 86 | const isEndpoint = recognizer.isEndpoint(stream); |
| 66 | - const text = recognizer.getResult(stream).text; | 87 | + const text = recognizer.getResult(stream); |
| 67 | 88 | ||
| 68 | if (text.length > 0 && lastText != text) { | 89 | if (text.length > 0 && lastText != text) { |
| 69 | lastText = text; | 90 | lastText = text; |
| 70 | - display.print(segmentIndex, lastText); | 91 | + console.log(segmentIndex, lastText); |
| 71 | } | 92 | } |
| 72 | if (isEndpoint) { | 93 | if (isEndpoint) { |
| 73 | if (text.length > 0) { | 94 | if (text.length > 0) { |
| @@ -6,37 +6,60 @@ const wav = require('wav'); | @@ -6,37 +6,60 @@ const wav = require('wav'); | ||
| 6 | 6 | ||
| 7 | const sherpa_onnx = require('sherpa-onnx'); | 7 | const sherpa_onnx = require('sherpa-onnx'); |
| 8 | 8 | ||
| 9 | -function createRecognizer() { | ||
| 10 | - const featConfig = new sherpa_onnx.FeatureConfig(); | ||
| 11 | - featConfig.sampleRate = 16000; | ||
| 12 | - featConfig.featureDim = 80; | ||
| 13 | - | ||
| 14 | - // test online recognizer | ||
| 15 | - const transducer = new sherpa_onnx.OnlineTransducerModelConfig(); | ||
| 16 | - transducer.encoder = | ||
| 17 | - './sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/encoder-epoch-99-avg-1.int8.onnx'; | ||
| 18 | - transducer.decoder = | ||
| 19 | - './sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/decoder-epoch-99-avg-1.onnx'; | ||
| 20 | - transducer.joiner = | ||
| 21 | - './sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/joiner-epoch-99-avg-1.onnx'; | ||
| 22 | - const tokens = | ||
| 23 | - './sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/tokens.txt'; | ||
| 24 | - | ||
| 25 | - const modelConfig = new sherpa_onnx.OnlineModelConfig(); | ||
| 26 | - modelConfig.transducer = transducer; | ||
| 27 | - modelConfig.tokens = tokens; | ||
| 28 | - modelConfig.modelType = 'zipformer'; | ||
| 29 | - | ||
| 30 | - const recognizerConfig = new sherpa_onnx.OnlineRecognizerConfig(); | ||
| 31 | - recognizerConfig.featConfig = featConfig; | ||
| 32 | - recognizerConfig.modelConfig = modelConfig; | ||
| 33 | - recognizerConfig.decodingMethod = 'greedy_search'; | ||
| 34 | - | ||
| 35 | - recognizer = new sherpa_onnx.OnlineRecognizer(recognizerConfig); | ||
| 36 | - return recognizer; | 9 | +function createOnlineRecognizer() { |
| 10 | + let onlineTransducerModelConfig = { | ||
| 11 | + encoder: | ||
| 12 | + './sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/encoder-epoch-99-avg-1.int8.onnx', | ||
| 13 | + decoder: | ||
| 14 | + './sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/decoder-epoch-99-avg-1.onnx', | ||
| 15 | + joiner: | ||
| 16 | + './sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/joiner-epoch-99-avg-1.int8.onnx', | ||
| 17 | + }; | ||
| 18 | + | ||
| 19 | + let onlineParaformerModelConfig = { | ||
| 20 | + encoder: '', | ||
| 21 | + decoder: '', | ||
| 22 | + }; | ||
| 23 | + | ||
| 24 | + let onlineZipformer2CtcModelConfig = { | ||
| 25 | + model: '', | ||
| 26 | + }; | ||
| 27 | + | ||
| 28 | + let onlineModelConfig = { | ||
| 29 | + transducer: onlineTransducerModelConfig, | ||
| 30 | + paraformer: onlineParaformerModelConfig, | ||
| 31 | + zipformer2Ctc: onlineZipformer2CtcModelConfig, | ||
| 32 | + tokens: | ||
| 33 | + './sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/tokens.txt', | ||
| 34 | + numThreads: 1, | ||
| 35 | + provider: 'cpu', | ||
| 36 | + debug: 1, | ||
| 37 | + modelType: 'zipformer', | ||
| 38 | + }; | ||
| 39 | + | ||
| 40 | + let featureConfig = { | ||
| 41 | + sampleRate: 16000, | ||
| 42 | + featureDim: 80, | ||
| 43 | + }; | ||
| 44 | + | ||
| 45 | + let recognizerConfig = { | ||
| 46 | + featConfig: featureConfig, | ||
| 47 | + modelConfig: onlineModelConfig, | ||
| 48 | + decodingMethod: 'greedy_search', | ||
| 49 | + maxActivePaths: 4, | ||
| 50 | + enableEndpoint: 1, | ||
| 51 | + rule1MinTrailingSilence: 2.4, | ||
| 52 | + rule2MinTrailingSilence: 1.2, | ||
| 53 | + rule3MinUtteranceLength: 20, | ||
| 54 | + hotwordsFile: '', | ||
| 55 | + hotwordsScore: 1.5, | ||
| 56 | + }; | ||
| 57 | + | ||
| 58 | + return sherpa_onnx.createOnlineRecognizer(recognizerConfig); | ||
| 37 | } | 59 | } |
| 38 | -recognizer = createRecognizer(); | ||
| 39 | -stream = recognizer.createStream(); | 60 | + |
| 61 | +const recognizer = createOnlineRecognizer(); | ||
| 62 | +const stream = recognizer.createStream(); | ||
| 40 | 63 | ||
| 41 | const waveFilename = | 64 | const waveFilename = |
| 42 | './sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/test_wavs/0.wav'; | 65 | './sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/test_wavs/0.wav'; |
| @@ -50,8 +73,8 @@ function decode(samples) { | @@ -50,8 +73,8 @@ function decode(samples) { | ||
| 50 | while (recognizer.isReady(stream)) { | 73 | while (recognizer.isReady(stream)) { |
| 51 | recognizer.decode(stream); | 74 | recognizer.decode(stream); |
| 52 | } | 75 | } |
| 53 | - const r = recognizer.getResult(stream); | ||
| 54 | - console.log(r.text); | 76 | + const text = recognizer.getResult(stream); |
| 77 | + console.log(text); | ||
| 55 | } | 78 | } |
| 56 | 79 | ||
| 57 | reader.on('format', ({audioFormat, bitDepth, channels, sampleRate}) => { | 80 | reader.on('format', ({audioFormat, bitDepth, channels, sampleRate}) => { |
| @@ -6,32 +6,58 @@ const wav = require('wav'); | @@ -6,32 +6,58 @@ const wav = require('wav'); | ||
| 6 | 6 | ||
| 7 | const sherpa_onnx = require('sherpa-onnx'); | 7 | const sherpa_onnx = require('sherpa-onnx'); |
| 8 | 8 | ||
| 9 | -function createRecognizer() { | ||
| 10 | - const featConfig = new sherpa_onnx.FeatureConfig(); | ||
| 11 | - featConfig.sampleRate = 16000; | ||
| 12 | - featConfig.featureDim = 80; | ||
| 13 | - | ||
| 14 | - // test online recognizer | ||
| 15 | - const zipformer2Ctc = new sherpa_onnx.OnlineZipformer2CtcModelConfig(); | ||
| 16 | - zipformer2Ctc.model = | ||
| 17 | - './sherpa-onnx-streaming-zipformer-ctc-multi-zh-hans-2023-12-13/ctc-epoch-20-avg-1-chunk-16-left-128.onnx'; | ||
| 18 | - const tokens = | ||
| 19 | - './sherpa-onnx-streaming-zipformer-ctc-multi-zh-hans-2023-12-13/tokens.txt'; | ||
| 20 | - | ||
| 21 | - const modelConfig = new sherpa_onnx.OnlineModelConfig(); | ||
| 22 | - modelConfig.zipformer2Ctc = zipformer2Ctc; | ||
| 23 | - modelConfig.tokens = tokens; | ||
| 24 | - | ||
| 25 | - const recognizerConfig = new sherpa_onnx.OnlineRecognizerConfig(); | ||
| 26 | - recognizerConfig.featConfig = featConfig; | ||
| 27 | - recognizerConfig.modelConfig = modelConfig; | ||
| 28 | - recognizerConfig.decodingMethod = 'greedy_search'; | ||
| 29 | - | ||
| 30 | - recognizer = new sherpa_onnx.OnlineRecognizer(recognizerConfig); | ||
| 31 | - return recognizer; | 9 | +function createOnlineRecognizer() { |
| 10 | + let onlineTransducerModelConfig = { | ||
| 11 | + encoder: '', | ||
| 12 | + decoder: '', | ||
| 13 | + joiner: '', | ||
| 14 | + }; | ||
| 15 | + | ||
| 16 | + let onlineParaformerModelConfig = { | ||
| 17 | + encoder: '', | ||
| 18 | + decoder: '', | ||
| 19 | + }; | ||
| 20 | + | ||
| 21 | + let onlineZipformer2CtcModelConfig = { | ||
| 22 | + model: | ||
| 23 | + './sherpa-onnx-streaming-zipformer-ctc-multi-zh-hans-2023-12-13/ctc-epoch-20-avg-1-chunk-16-left-128.onnx', | ||
| 24 | + }; | ||
| 25 | + | ||
| 26 | + let onlineModelConfig = { | ||
| 27 | + transducer: onlineTransducerModelConfig, | ||
| 28 | + paraformer: onlineParaformerModelConfig, | ||
| 29 | + zipformer2Ctc: onlineZipformer2CtcModelConfig, | ||
| 30 | + tokens: | ||
| 31 | + './sherpa-onnx-streaming-zipformer-ctc-multi-zh-hans-2023-12-13/tokens.txt', | ||
| 32 | + numThreads: 1, | ||
| 33 | + provider: 'cpu', | ||
| 34 | + debug: 1, | ||
| 35 | + modelType: '', | ||
| 36 | + }; | ||
| 37 | + | ||
| 38 | + let featureConfig = { | ||
| 39 | + sampleRate: 16000, | ||
| 40 | + featureDim: 80, | ||
| 41 | + }; | ||
| 42 | + | ||
| 43 | + let recognizerConfig = { | ||
| 44 | + featConfig: featureConfig, | ||
| 45 | + modelConfig: onlineModelConfig, | ||
| 46 | + decodingMethod: 'greedy_search', | ||
| 47 | + maxActivePaths: 4, | ||
| 48 | + enableEndpoint: 1, | ||
| 49 | + rule1MinTrailingSilence: 2.4, | ||
| 50 | + rule2MinTrailingSilence: 1.2, | ||
| 51 | + rule3MinUtteranceLength: 20, | ||
| 52 | + hotwordsFile: '', | ||
| 53 | + hotwordsScore: 1.5, | ||
| 54 | + }; | ||
| 55 | + | ||
| 56 | + return sherpa_onnx.createOnlineRecognizer(recognizerConfig); | ||
| 32 | } | 57 | } |
| 33 | -recognizer = createRecognizer(); | ||
| 34 | -stream = recognizer.createStream(); | 58 | + |
| 59 | +const recognizer = createOnlineRecognizer(); | ||
| 60 | +const stream = recognizer.createStream(); | ||
| 35 | 61 | ||
| 36 | const waveFilename = | 62 | const waveFilename = |
| 37 | './sherpa-onnx-streaming-zipformer-ctc-multi-zh-hans-2023-12-13/test_wavs/DEV_T0000000000.wav'; | 63 | './sherpa-onnx-streaming-zipformer-ctc-multi-zh-hans-2023-12-13/test_wavs/DEV_T0000000000.wav'; |
| @@ -45,8 +71,8 @@ function decode(samples) { | @@ -45,8 +71,8 @@ function decode(samples) { | ||
| 45 | while (recognizer.isReady(stream)) { | 71 | while (recognizer.isReady(stream)) { |
| 46 | recognizer.decode(stream); | 72 | recognizer.decode(stream); |
| 47 | } | 73 | } |
| 48 | - const r = recognizer.getResult(stream); | ||
| 49 | - console.log(r.text); | 74 | + const text = recognizer.getResult(stream); |
| 75 | + console.log(text); | ||
| 50 | } | 76 | } |
| 51 | 77 | ||
| 52 | reader.on('format', ({audioFormat, bitDepth, channels, sampleRate}) => { | 78 | reader.on('format', ({audioFormat, bitDepth, channels, sampleRate}) => { |
| 1 | -// Copyright (c) 2023 Xiaomi Corporation (authors: Fangjun Kuang) | ||
| 2 | -// | ||
| 3 | -const sherpa_onnx = require('sherpa-onnx3'); | ||
| 4 | -const portAudio = require('naudiodon2'); | ||
| 5 | -console.log(portAudio.getDevices()); | ||
| 6 | - | ||
| 7 | -function createOfflineRecognizer() { | ||
| 8 | - const featConfig = new sherpa_onnx.FeatureConfig(); | ||
| 9 | - featConfig.sampleRate = 16000; | ||
| 10 | - featConfig.featureDim = 80; | ||
| 11 | - | ||
| 12 | - // test online recognizer | ||
| 13 | - const paraformer = new sherpa_onnx.OfflineParaformerModelConfig(); | ||
| 14 | - paraformer.model = './sherpa-onnx-paraformer-zh-2023-03-28/model.int8.onnx'; | ||
| 15 | - const tokens = './sherpa-onnx-paraformer-zh-2023-03-28/tokens.txt'; | ||
| 16 | - | ||
| 17 | - const modelConfig = new sherpa_onnx.OfflineModelConfig(); | ||
| 18 | - modelConfig.paraformer = paraformer; | ||
| 19 | - modelConfig.tokens = tokens; | ||
| 20 | - modelConfig.modelType = 'paraformer'; | ||
| 21 | - | ||
| 22 | - const recognizerConfig = new sherpa_onnx.OfflineRecognizerConfig(); | ||
| 23 | - recognizerConfig.featConfig = featConfig; | ||
| 24 | - recognizerConfig.modelConfig = modelConfig; | ||
| 25 | - recognizerConfig.decodingMethod = 'greedy_search'; | ||
| 26 | - | ||
| 27 | - const recognizer = new sherpa_onnx.OfflineRecognizer(recognizerConfig); | ||
| 28 | - return recognizer | ||
| 29 | -} | ||
| 30 | - | ||
| 31 | -function createVad() { | ||
| 32 | - const sileroVadModelConfig = new sherpa_onnx.SileroVadModelConfig(); | ||
| 33 | - sileroVadModelConfig.model = './silero_vad.onnx'; | ||
| 34 | - sileroVadModelConfig.minSpeechDuration = 0.3; // seconds | ||
| 35 | - sileroVadModelConfig.minSilenceDuration = 0.3; // seconds | ||
| 36 | - sileroVadModelConfig.windowSize = 512; | ||
| 37 | - | ||
| 38 | - const vadModelConfig = new sherpa_onnx.VadModelConfig(); | ||
| 39 | - vadModelConfig.sileroVad = sileroVadModelConfig; | ||
| 40 | - vadModelConfig.sampleRate = 16000; | ||
| 41 | - | ||
| 42 | - const bufferSizeInSeconds = 60; | ||
| 43 | - const vad = new sherpa_onnx.VoiceActivityDetector( | ||
| 44 | - vadModelConfig, bufferSizeInSeconds); | ||
| 45 | - return vad; | ||
| 46 | -} | ||
| 47 | - | ||
| 48 | -const recognizer = createOfflineRecognizer(); | ||
| 49 | -const vad = createVad(); | ||
| 50 | - | ||
| 51 | -const bufferSizeInSeconds = 30; | ||
| 52 | -const buffer = | ||
| 53 | - new sherpa_onnx.CircularBuffer(bufferSizeInSeconds * vad.config.sampleRate); | ||
| 54 | - | ||
| 55 | -var ai = new portAudio.AudioIO({ | ||
| 56 | - inOptions: { | ||
| 57 | - channelCount: 1, | ||
| 58 | - sampleFormat: portAudio.SampleFormatFloat32, | ||
| 59 | - sampleRate: vad.config.sampleRate, | ||
| 60 | - deviceId: -1, // Use -1 or omit the deviceId to select the default device | ||
| 61 | - closeOnError: true // Close the stream if an audio error is detected, if | ||
| 62 | - // set false then just log the error | ||
| 63 | - } | ||
| 64 | -}); | ||
| 65 | - | ||
| 66 | -let printed = false; | ||
| 67 | -let index = 0; | ||
| 68 | -ai.on('data', data => { | ||
| 69 | - const windowSize = vad.config.sileroVad.windowSize; | ||
| 70 | - buffer.push(new Float32Array(data.buffer)); | ||
| 71 | - while (buffer.size() > windowSize) { | ||
| 72 | - const samples = buffer.get(buffer.head(), windowSize); | ||
| 73 | - buffer.pop(windowSize); | ||
| 74 | - vad.acceptWaveform(samples) | ||
| 75 | - } | ||
| 76 | - | ||
| 77 | - while (!vad.isEmpty()) { | ||
| 78 | - const segment = vad.front(); | ||
| 79 | - vad.pop(); | ||
| 80 | - const stream = recognizer.createStream(); | ||
| 81 | - stream.acceptWaveform( | ||
| 82 | - recognizer.config.featConfig.sampleRate, segment.samples); | ||
| 83 | - recognizer.decode(stream); | ||
| 84 | - const r = recognizer.getResult(stream); | ||
| 85 | - stream.free(); | ||
| 86 | - if (r.text.length > 0) { | ||
| 87 | - console.log(`${index}: ${r.text}`); | ||
| 88 | - index += 1; | ||
| 89 | - } | ||
| 90 | - } | ||
| 91 | -}); | ||
| 92 | - | ||
| 93 | -ai.on('close', () => { | ||
| 94 | - console.log('Free resources'); | ||
| 95 | - recognizer.free(); | ||
| 96 | - vad.free(); | ||
| 97 | - buffer.free(); | ||
| 98 | -}); | ||
| 99 | - | ||
| 100 | -ai.start(); | ||
| 101 | -console.log('Started! Please speak') |
| 1 | -// Copyright (c) 2023 Xiaomi Corporation (authors: Fangjun Kuang) | ||
| 2 | -// | ||
| 3 | -const sherpa_onnx = require('sherpa-onnx'); | ||
| 4 | -const portAudio = require('naudiodon2'); | ||
| 5 | -console.log(portAudio.getDevices()); | ||
| 6 | - | ||
| 7 | -function createOfflineRecognizer() { | ||
| 8 | - const featConfig = new sherpa_onnx.FeatureConfig(); | ||
| 9 | - featConfig.sampleRate = 16000; | ||
| 10 | - featConfig.featureDim = 80; | ||
| 11 | - | ||
| 12 | - // test online recognizer | ||
| 13 | - const transducer = new sherpa_onnx.OfflineTransducerModelConfig(); | ||
| 14 | - transducer.encoder = | ||
| 15 | - './sherpa-onnx-zipformer-en-2023-06-26/encoder-epoch-99-avg-1.onnx'; | ||
| 16 | - transducer.decoder = | ||
| 17 | - './sherpa-onnx-zipformer-en-2023-06-26/decoder-epoch-99-avg-1.onnx'; | ||
| 18 | - transducer.joiner = | ||
| 19 | - './sherpa-onnx-zipformer-en-2023-06-26/joiner-epoch-99-avg-1.onnx'; | ||
| 20 | - const tokens = './sherpa-onnx-zipformer-en-2023-06-26/tokens.txt'; | ||
| 21 | - | ||
| 22 | - const modelConfig = new sherpa_onnx.OfflineModelConfig(); | ||
| 23 | - modelConfig.transducer = transducer; | ||
| 24 | - modelConfig.tokens = tokens; | ||
| 25 | - modelConfig.modelType = 'transducer'; | ||
| 26 | - | ||
| 27 | - const recognizerConfig = new sherpa_onnx.OfflineRecognizerConfig(); | ||
| 28 | - recognizerConfig.featConfig = featConfig; | ||
| 29 | - recognizerConfig.modelConfig = modelConfig; | ||
| 30 | - recognizerConfig.decodingMethod = 'greedy_search'; | ||
| 31 | - | ||
| 32 | - const recognizer = new sherpa_onnx.OfflineRecognizer(recognizerConfig); | ||
| 33 | - return recognizer; | ||
| 34 | -} | ||
| 35 | - | ||
| 36 | -function createVad() { | ||
| 37 | - const sileroVadModelConfig = new sherpa_onnx.SileroVadModelConfig(); | ||
| 38 | - sileroVadModelConfig.model = './silero_vad.onnx'; | ||
| 39 | - sileroVadModelConfig.minSpeechDuration = 0.3; // seconds | ||
| 40 | - sileroVadModelConfig.minSilenceDuration = 0.3; // seconds | ||
| 41 | - sileroVadModelConfig.windowSize = 512; | ||
| 42 | - | ||
| 43 | - const vadModelConfig = new sherpa_onnx.VadModelConfig(); | ||
| 44 | - vadModelConfig.sileroVad = sileroVadModelConfig; | ||
| 45 | - vadModelConfig.sampleRate = 16000; | ||
| 46 | - | ||
| 47 | - const bufferSizeInSeconds = 60; | ||
| 48 | - const vad = new sherpa_onnx.VoiceActivityDetector( | ||
| 49 | - vadModelConfig, bufferSizeInSeconds); | ||
| 50 | - return vad; | ||
| 51 | -} | ||
| 52 | - | ||
| 53 | -const recognizer = createOfflineRecognizer(); | ||
| 54 | -const vad = createVad(); | ||
| 55 | - | ||
| 56 | -const bufferSizeInSeconds = 30; | ||
| 57 | -const buffer = | ||
| 58 | - new sherpa_onnx.CircularBuffer(bufferSizeInSeconds * vad.config.sampleRate); | ||
| 59 | - | ||
| 60 | -const ai = new portAudio.AudioIO({ | ||
| 61 | - inOptions: { | ||
| 62 | - channelCount: 1, | ||
| 63 | - closeOnError: true, // Close the stream if an audio error is detected, if | ||
| 64 | - // set false then just log the error | ||
| 65 | - deviceId: -1, // Use -1 or omit the deviceId to select the default device | ||
| 66 | - sampleFormat: portAudio.SampleFormatFloat32, | ||
| 67 | - sampleRate: vad.config.sampleRate | ||
| 68 | - } | ||
| 69 | -}); | ||
| 70 | - | ||
| 71 | -let printed = false; | ||
| 72 | -let index = 0; | ||
| 73 | -ai.on('data', data => { | ||
| 74 | - const windowSize = vad.config.sileroVad.windowSize; | ||
| 75 | - buffer.push(new Float32Array(data.buffer)); | ||
| 76 | - while (buffer.size() > windowSize) { | ||
| 77 | - const samples = buffer.get(buffer.head(), windowSize); | ||
| 78 | - buffer.pop(windowSize); | ||
| 79 | - vad.acceptWaveform(samples) | ||
| 80 | - } | ||
| 81 | - | ||
| 82 | - while (!vad.isEmpty()) { | ||
| 83 | - const segment = vad.front(); | ||
| 84 | - vad.pop(); | ||
| 85 | - const stream = recognizer.createStream(); | ||
| 86 | - stream.acceptWaveform( | ||
| 87 | - recognizer.config.featConfig.sampleRate, segment.samples); | ||
| 88 | - recognizer.decode(stream); | ||
| 89 | - const r = recognizer.getResult(stream); | ||
| 90 | - stream.free(); | ||
| 91 | - if (r.text.length > 0) { | ||
| 92 | - console.log(`${index}: ${r.text}`); | ||
| 93 | - index += 1; | ||
| 94 | - } | ||
| 95 | - } | ||
| 96 | -}); | ||
| 97 | - | ||
| 98 | -ai.on('close', () => { | ||
| 99 | - console.log('Free resources'); | ||
| 100 | - recognizer.free(); | ||
| 101 | - vad.free(); | ||
| 102 | - buffer.free(); | ||
| 103 | -}); | ||
| 104 | - | ||
| 105 | -ai.start(); | ||
| 106 | -console.log('Started! Please speak') |
| 1 | -// Copyright (c) 2023 Xiaomi Corporation (authors: Fangjun Kuang) | ||
| 2 | -// | ||
| 3 | -const sherpa_onnx = require('sherpa-onnx'); | ||
| 4 | -const portAudio = require('naudiodon2'); | ||
| 5 | -console.log(portAudio.getDevices()); | ||
| 6 | - | ||
| 7 | -function createOfflineRecognizer() { | ||
| 8 | - const featConfig = new sherpa_onnx.FeatureConfig(); | ||
| 9 | - featConfig.sampleRate = 16000; | ||
| 10 | - featConfig.featureDim = 80; | ||
| 11 | - | ||
| 12 | - // test online recognizer | ||
| 13 | - const whisper = new sherpa_onnx.OfflineWhisperModelConfig(); | ||
| 14 | - whisper.encoder = './sherpa-onnx-whisper-tiny.en/tiny.en-encoder.int8.onnx'; | ||
| 15 | - whisper.decoder = './sherpa-onnx-whisper-tiny.en/tiny.en-decoder.int8.onnx'; | ||
| 16 | - const tokens = './sherpa-onnx-whisper-tiny.en/tiny.en-tokens.txt'; | ||
| 17 | - | ||
| 18 | - const modelConfig = new sherpa_onnx.OfflineModelConfig(); | ||
| 19 | - modelConfig.whisper = whisper; | ||
| 20 | - modelConfig.tokens = tokens; | ||
| 21 | - modelConfig.modelType = 'whisper'; | ||
| 22 | - | ||
| 23 | - const recognizerConfig = new sherpa_onnx.OfflineRecognizerConfig(); | ||
| 24 | - recognizerConfig.featConfig = featConfig; | ||
| 25 | - recognizerConfig.modelConfig = modelConfig; | ||
| 26 | - recognizerConfig.decodingMethod = 'greedy_search'; | ||
| 27 | - | ||
| 28 | - const recognizer = new sherpa_onnx.OfflineRecognizer(recognizerConfig); | ||
| 29 | - return recognizer; | ||
| 30 | -} | ||
| 31 | - | ||
| 32 | -function createVad() { | ||
| 33 | - const sileroVadModelConfig = new sherpa_onnx.SileroVadModelConfig(); | ||
| 34 | - sileroVadModelConfig.model = './silero_vad.onnx'; | ||
| 35 | - sileroVadModelConfig.minSpeechDuration = 0.3; // seconds | ||
| 36 | - sileroVadModelConfig.minSilenceDuration = 0.3; // seconds | ||
| 37 | - sileroVadModelConfig.windowSize = 512; | ||
| 38 | - | ||
| 39 | - const vadModelConfig = new sherpa_onnx.VadModelConfig(); | ||
| 40 | - vadModelConfig.sileroVad = sileroVadModelConfig; | ||
| 41 | - vadModelConfig.sampleRate = 16000; | ||
| 42 | - | ||
| 43 | - const bufferSizeInSeconds = 60; | ||
| 44 | - const vad = new sherpa_onnx.VoiceActivityDetector( | ||
| 45 | - vadModelConfig, bufferSizeInSeconds); | ||
| 46 | - return vad; | ||
| 47 | -} | ||
| 48 | - | ||
| 49 | -const recognizer = createOfflineRecognizer(); | ||
| 50 | -const vad = createVad(); | ||
| 51 | - | ||
| 52 | -const bufferSizeInSeconds = 30; | ||
| 53 | -const buffer = | ||
| 54 | - new sherpa_onnx.CircularBuffer(bufferSizeInSeconds * vad.config.sampleRate); | ||
| 55 | - | ||
| 56 | -const ai = new portAudio.AudioIO({ | ||
| 57 | - inOptions: { | ||
| 58 | - channelCount: 1, | ||
| 59 | - closeOnError: true, // Close the stream if an audio error is detected, if | ||
| 60 | - // set false then just log the error | ||
| 61 | - deviceId: -1, // Use -1 or omit the deviceId to select the default device | ||
| 62 | - sampleFormat: portAudio.SampleFormatFloat32, | ||
| 63 | - sampleRate: vad.config.sampleRate | ||
| 64 | - } | ||
| 65 | -}); | ||
| 66 | - | ||
| 67 | -let printed = false; | ||
| 68 | -let index = 0; | ||
| 69 | -ai.on('data', data => { | ||
| 70 | - const windowSize = vad.config.sileroVad.windowSize; | ||
| 71 | - buffer.push(new Float32Array(data.buffer)); | ||
| 72 | - while (buffer.size() > windowSize) { | ||
| 73 | - const samples = buffer.get(buffer.head(), windowSize); | ||
| 74 | - buffer.pop(windowSize); | ||
| 75 | - vad.acceptWaveform(samples) | ||
| 76 | - } | ||
| 77 | - | ||
| 78 | - while (!vad.isEmpty()) { | ||
| 79 | - const segment = vad.front(); | ||
| 80 | - vad.pop(); | ||
| 81 | - const stream = recognizer.createStream(); | ||
| 82 | - stream.acceptWaveform( | ||
| 83 | - recognizer.config.featConfig.sampleRate, segment.samples); | ||
| 84 | - recognizer.decode(stream); | ||
| 85 | - const r = recognizer.getResult(stream); | ||
| 86 | - stream.free(); | ||
| 87 | - if (r.text.length > 0) { | ||
| 88 | - console.log(`${index}: ${r.text}`); | ||
| 89 | - index += 1; | ||
| 90 | - } | ||
| 91 | - } | ||
| 92 | -}); | ||
| 93 | - | ||
| 94 | -ai.on('close', () => { | ||
| 95 | - console.log('Free resources'); | ||
| 96 | - recognizer.free(); | ||
| 97 | - vad.free(); | ||
| 98 | - buffer.free(); | ||
| 99 | -}); | ||
| 100 | - | ||
| 101 | -ai.start(); | ||
| 102 | -console.log('Started! Please speak') |
nodejs-examples/test-vad-microphone.js
已删除
100644 → 0
| 1 | -// Copyright (c) 2023 Xiaomi Corporation (authors: Fangjun Kuang) | ||
| 2 | - | ||
| 3 | -const sherpa_onnx = require('sherpa-onnx'); | ||
| 4 | -const portAudio = require('naudiodon2'); | ||
| 5 | -console.log(portAudio.getDevices()); | ||
| 6 | - | ||
| 7 | -function createVad() { | ||
| 8 | - const sileroVadModelConfig = new sherpa_onnx.SileroVadModelConfig(); | ||
| 9 | - sileroVadModelConfig.model = './silero_vad.onnx'; | ||
| 10 | - sileroVadModelConfig.minSpeechDuration = 0.3; // seconds | ||
| 11 | - sileroVadModelConfig.minSilenceDuration = 0.3; // seconds | ||
| 12 | - sileroVadModelConfig.windowSize = 512; | ||
| 13 | - | ||
| 14 | - const vadModelConfig = new sherpa_onnx.VadModelConfig(); | ||
| 15 | - vadModelConfig.sileroVad = sileroVadModelConfig; | ||
| 16 | - vadModelConfig.sampleRate = 16000; | ||
| 17 | - | ||
| 18 | - const bufferSizeInSeconds = 60; | ||
| 19 | - const vad = new sherpa_onnx.VoiceActivityDetector( | ||
| 20 | - vadModelConfig, bufferSizeInSeconds); | ||
| 21 | - return vad; | ||
| 22 | -} | ||
| 23 | -vad = createVad(); | ||
| 24 | -const bufferSizeInSeconds = 30; | ||
| 25 | -const buffer = | ||
| 26 | - new sherpa_onnx.CircularBuffer(bufferSizeInSeconds * vad.config.sampleRate); | ||
| 27 | - | ||
| 28 | -const ai = new portAudio.AudioIO({ | ||
| 29 | - inOptions: { | ||
| 30 | - channelCount: 1, | ||
| 31 | - closeOnError: true, // Close the stream if an audio error is detected, if | ||
| 32 | - // set false then just log the error | ||
| 33 | - deviceId: -1, // Use -1 or omit the deviceId to select the default device | ||
| 34 | - sampleFormat: portAudio.SampleFormatFloat32, | ||
| 35 | - sampleRate: vad.config.sampleRate | ||
| 36 | - } | ||
| 37 | -}); | ||
| 38 | - | ||
| 39 | -let printed = false; | ||
| 40 | -let index = 0; | ||
| 41 | -ai.on('data', data => { | ||
| 42 | - const windowSize = vad.config.sileroVad.windowSize; | ||
| 43 | - buffer.push(new Float32Array(data.buffer)); | ||
| 44 | - while (buffer.size() > windowSize) { | ||
| 45 | - const samples = buffer.get(buffer.head(), windowSize); | ||
| 46 | - buffer.pop(windowSize); | ||
| 47 | - vad.acceptWaveform(samples) | ||
| 48 | - if (vad.isDetected() && !printed) { | ||
| 49 | - console.log(`${index}: Detected speech`) | ||
| 50 | - printed = true; | ||
| 51 | - } | ||
| 52 | - | ||
| 53 | - if (!vad.isDetected()) { | ||
| 54 | - printed = false; | ||
| 55 | - } | ||
| 56 | - | ||
| 57 | - while (!vad.isEmpty()) { | ||
| 58 | - const segment = vad.front(); | ||
| 59 | - vad.pop(); | ||
| 60 | - const duration = segment.samples.length / vad.config.sampleRate; | ||
| 61 | - console.log(`${index} End of speech. Duration: ${duration} seconds`); | ||
| 62 | - index += 1; | ||
| 63 | - } | ||
| 64 | - } | ||
| 65 | -}); | ||
| 66 | - | ||
| 67 | -ai.on('close', () => { | ||
| 68 | - console.log('Free resources'); | ||
| 69 | - vad.free(); | ||
| 70 | - buffer.free(); | ||
| 71 | -}); | ||
| 72 | - | ||
| 73 | -ai.start(); | ||
| 74 | -console.log('Started! Please speak') |
scripts/nodejs/.clang-format
已删除
100644 → 0
| @@ -7,3 +7,5 @@ It processes everything locally without accessing the Internet. | @@ -7,3 +7,5 @@ It processes everything locally without accessing the Internet. | ||
| 7 | Please refer to | 7 | Please refer to |
| 8 | https://github.com/k2-fsa/sherpa-onnx/tree/master/nodejs-examples | 8 | https://github.com/k2-fsa/sherpa-onnx/tree/master/nodejs-examples |
| 9 | for examples. | 9 | for examples. |
| 10 | + | ||
| 11 | +You need Node >= 18 for this package. |
| 1 | -// Copyright (c) 2023 Xiaomi Corporation (authors: Fangjun Kuang) | ||
| 2 | -// | ||
| 3 | -// Please use | ||
| 4 | -// | ||
| 5 | -// npm install ffi-napi ref-struct-napi | ||
| 6 | -// | ||
| 7 | -// before you use this file | ||
| 8 | -// | ||
| 9 | -// | ||
| 10 | -// Please use node 13. node 16, 18, 20, and 21 are known not working. | ||
| 11 | -// See also | ||
| 12 | -// https://github.com/node-ffi-napi/node-ffi-napi/issues/244 | ||
| 13 | -// and | ||
| 14 | -// https://github.com/node-ffi-napi/node-ffi-napi/issues/97 | ||
| 15 | -"use strict" | 1 | +// Copyright (c) 2023-2024 Xiaomi Corporation (authors: Fangjun Kuang) |
| 2 | +'use strict' | ||
| 16 | 3 | ||
| 17 | -const debug = require("debug")("sherpa-onnx"); | ||
| 18 | -const os = require("os"); | ||
| 19 | -const path = require("path"); | ||
| 20 | -const ffi = require("ffi-napi"); | ||
| 21 | -const ref = require("ref-napi"); | ||
| 22 | -const fs = require("fs"); | ||
| 23 | -var ArrayType = require("ref-array-napi"); | 4 | +const wasmModule = require('./sherpa-onnx-wasm-nodejs.js')(); |
| 5 | +const sherpa_onnx_asr = require('./sherpa-onnx-asr.js'); | ||
| 6 | +const sherpa_onnx_tts = require('./sherpa-onnx-tts.js'); | ||
| 24 | 7 | ||
| 25 | -const FloatArray = ArrayType(ref.types.float); | ||
| 26 | -const StructType = require("ref-struct-napi"); | ||
| 27 | -const cstring = ref.types.CString; | ||
| 28 | -const cstringPtr = ref.refType(cstring); | ||
| 29 | -const int32_t = ref.types.int32; | ||
| 30 | -const float = ref.types.float; | ||
| 31 | -const floatPtr = ref.refType(float); | ||
| 32 | - | ||
| 33 | -const SherpaOnnxOnlineTransducerModelConfig = StructType({ | ||
| 34 | - "encoder" : cstring, | ||
| 35 | - "decoder" : cstring, | ||
| 36 | - "joiner" : cstring, | ||
| 37 | -}); | ||
| 38 | - | ||
| 39 | -const SherpaOnnxOnlineParaformerModelConfig = StructType({ | ||
| 40 | - "encoder" : cstring, | ||
| 41 | - "decoder" : cstring, | ||
| 42 | -}); | ||
| 43 | - | ||
| 44 | -const SherpaOnnxOnlineZipformer2CtcModelConfig = StructType({ | ||
| 45 | - "model" : cstring, | ||
| 46 | -}); | ||
| 47 | - | ||
| 48 | -const SherpaOnnxOnlineModelConfig = StructType({ | ||
| 49 | - "transducer" : SherpaOnnxOnlineTransducerModelConfig, | ||
| 50 | - "paraformer" : SherpaOnnxOnlineParaformerModelConfig, | ||
| 51 | - "zipformer2Ctc" : SherpaOnnxOnlineZipformer2CtcModelConfig, | ||
| 52 | - "tokens" : cstring, | ||
| 53 | - "numThreads" : int32_t, | ||
| 54 | - "provider" : cstring, | ||
| 55 | - "debug" : int32_t, | ||
| 56 | - "modelType" : cstring, | ||
| 57 | -}); | ||
| 58 | - | ||
| 59 | -const SherpaOnnxFeatureConfig = StructType({ | ||
| 60 | - "sampleRate" : int32_t, | ||
| 61 | - "featureDim" : int32_t, | ||
| 62 | -}); | ||
| 63 | - | ||
| 64 | -const SherpaOnnxOnlineRecognizerConfig = StructType({ | ||
| 65 | - "featConfig" : SherpaOnnxFeatureConfig, | ||
| 66 | - "modelConfig" : SherpaOnnxOnlineModelConfig, | ||
| 67 | - "decodingMethod" : cstring, | ||
| 68 | - "maxActivePaths" : int32_t, | ||
| 69 | - "enableEndpoint" : int32_t, | ||
| 70 | - "rule1MinTrailingSilence" : float, | ||
| 71 | - "rule2MinTrailingSilence" : float, | ||
| 72 | - "rule3MinUtteranceLength" : float, | ||
| 73 | - "hotwordsFile" : cstring, | ||
| 74 | - "hotwordsScore" : float, | ||
| 75 | -}); | ||
| 76 | - | ||
| 77 | -const SherpaOnnxOnlineRecognizerResult = StructType({ | ||
| 78 | - "text" : cstring, | ||
| 79 | - "tokens" : cstring, | ||
| 80 | - "tokensArr" : cstringPtr, | ||
| 81 | - "timestamps" : floatPtr, | ||
| 82 | - "count" : int32_t, | ||
| 83 | - "json" : cstring, | ||
| 84 | -}); | ||
| 85 | - | ||
| 86 | -const SherpaOnnxOnlineRecognizerPtr = ref.refType(ref.types.void); | ||
| 87 | -const SherpaOnnxOnlineStreamPtr = ref.refType(ref.types.void); | ||
| 88 | -const SherpaOnnxOnlineStreamPtrPtr = ref.refType(SherpaOnnxOnlineStreamPtr); | ||
| 89 | -const SherpaOnnxOnlineRecognizerResultPtr = | ||
| 90 | - ref.refType(SherpaOnnxOnlineRecognizerResult); | ||
| 91 | - | ||
| 92 | -const SherpaOnnxOnlineRecognizerConfigPtr = | ||
| 93 | - ref.refType(SherpaOnnxOnlineRecognizerConfig); | ||
| 94 | - | ||
| 95 | -const SherpaOnnxOfflineTransducerModelConfig = StructType({ | ||
| 96 | - "encoder" : cstring, | ||
| 97 | - "decoder" : cstring, | ||
| 98 | - "joiner" : cstring, | ||
| 99 | -}); | ||
| 100 | - | ||
| 101 | -const SherpaOnnxOfflineParaformerModelConfig = StructType({ | ||
| 102 | - "model" : cstring, | ||
| 103 | -}); | ||
| 104 | - | ||
| 105 | -const SherpaOnnxOfflineNemoEncDecCtcModelConfig = StructType({ | ||
| 106 | - "model" : cstring, | ||
| 107 | -}); | ||
| 108 | - | ||
| 109 | -const SherpaOnnxOfflineWhisperModelConfig = StructType({ | ||
| 110 | - "encoder" : cstring, | ||
| 111 | - "decoder" : cstring, | ||
| 112 | -}); | ||
| 113 | - | ||
| 114 | -const SherpaOnnxOfflineTdnnModelConfig = StructType({ | ||
| 115 | - "model" : cstring, | ||
| 116 | -}); | ||
| 117 | - | ||
| 118 | -const SherpaOnnxOfflineLMConfig = StructType({ | ||
| 119 | - "model" : cstring, | ||
| 120 | - "scale" : float, | ||
| 121 | -}); | ||
| 122 | - | ||
| 123 | -const SherpaOnnxOfflineModelConfig = StructType({ | ||
| 124 | - "transducer" : SherpaOnnxOfflineTransducerModelConfig, | ||
| 125 | - "paraformer" : SherpaOnnxOfflineParaformerModelConfig, | ||
| 126 | - "nemoCtc" : SherpaOnnxOfflineNemoEncDecCtcModelConfig, | ||
| 127 | - "whisper" : SherpaOnnxOfflineWhisperModelConfig, | ||
| 128 | - "tdnn" : SherpaOnnxOfflineTdnnModelConfig, | ||
| 129 | - "tokens" : cstring, | ||
| 130 | - "numThreads" : int32_t, | ||
| 131 | - "debug" : int32_t, | ||
| 132 | - "provider" : cstring, | ||
| 133 | - "modelType" : cstring, | ||
| 134 | -}); | ||
| 135 | - | ||
| 136 | -const SherpaOnnxOfflineRecognizerConfig = StructType({ | ||
| 137 | - "featConfig" : SherpaOnnxFeatureConfig, | ||
| 138 | - "modelConfig" : SherpaOnnxOfflineModelConfig, | ||
| 139 | - "lmConfig" : SherpaOnnxOfflineLMConfig, | ||
| 140 | - "decodingMethod" : cstring, | ||
| 141 | - "maxActivePaths" : int32_t, | ||
| 142 | - "hotwordsFile" : cstring, | ||
| 143 | - "hotwordsScore" : float, | ||
| 144 | -}); | ||
| 145 | - | ||
| 146 | -const SherpaOnnxOfflineRecognizerResult = StructType({ | ||
| 147 | - "text" : cstring, | ||
| 148 | - "timestamps" : floatPtr, | ||
| 149 | - "count" : int32_t, | ||
| 150 | -}); | ||
| 151 | - | ||
| 152 | -const SherpaOnnxOfflineRecognizerPtr = ref.refType(ref.types.void); | ||
| 153 | -const SherpaOnnxOfflineStreamPtr = ref.refType(ref.types.void); | ||
| 154 | -const SherpaOnnxOfflineStreamPtrPtr = ref.refType(SherpaOnnxOfflineStreamPtr); | ||
| 155 | -const SherpaOnnxOfflineRecognizerResultPtr = | ||
| 156 | - ref.refType(SherpaOnnxOfflineRecognizerResult); | ||
| 157 | - | ||
| 158 | -const SherpaOnnxOfflineRecognizerConfigPtr = | ||
| 159 | - ref.refType(SherpaOnnxOfflineRecognizerConfig); | ||
| 160 | - | ||
| 161 | -// vad | ||
| 162 | -const SherpaOnnxSileroVadModelConfig = StructType({ | ||
| 163 | - "model" : cstring, | ||
| 164 | - "threshold" : float, | ||
| 165 | - "minSilenceDuration" : float, | ||
| 166 | - "minSpeechDuration" : float, | ||
| 167 | - "windowSize" : int32_t, | ||
| 168 | -}); | ||
| 169 | - | ||
| 170 | -const SherpaOnnxVadModelConfig = StructType({ | ||
| 171 | - "sileroVad" : SherpaOnnxSileroVadModelConfig, | ||
| 172 | - "sampleRate" : int32_t, | ||
| 173 | - "numThreads" : int32_t, | ||
| 174 | - "provider" : cstring, | ||
| 175 | - "debug" : int32_t, | ||
| 176 | -}); | ||
| 177 | - | ||
| 178 | -const SherpaOnnxSpeechSegment = StructType({ | ||
| 179 | - "start" : int32_t, | ||
| 180 | - "samples" : FloatArray, | ||
| 181 | - "n" : int32_t, | ||
| 182 | -}); | ||
| 183 | - | ||
| 184 | -const SherpaOnnxVadModelConfigPtr = ref.refType(SherpaOnnxVadModelConfig); | ||
| 185 | -const SherpaOnnxSpeechSegmentPtr = ref.refType(SherpaOnnxSpeechSegment); | ||
| 186 | -const SherpaOnnxCircularBufferPtr = ref.refType(ref.types.void); | ||
| 187 | -const SherpaOnnxVoiceActivityDetectorPtr = ref.refType(ref.types.void); | ||
| 188 | - | ||
| 189 | -// tts | ||
| 190 | -const SherpaOnnxOfflineTtsVitsModelConfig = StructType({ | ||
| 191 | - "model" : cstring, | ||
| 192 | - "lexicon" : cstring, | ||
| 193 | - "tokens" : cstring, | ||
| 194 | - "dataDir" : cstring, | ||
| 195 | - "noiseScale" : float, | ||
| 196 | - "noiseScaleW" : float, | ||
| 197 | - "lengthScale" : float, | ||
| 198 | -}); | ||
| 199 | - | ||
| 200 | -const SherpaOnnxOfflineTtsModelConfig = StructType({ | ||
| 201 | - "vits" : SherpaOnnxOfflineTtsVitsModelConfig, | ||
| 202 | - "numThreads" : int32_t, | ||
| 203 | - "debug" : int32_t, | ||
| 204 | - "provider" : cstring, | ||
| 205 | -}); | ||
| 206 | - | ||
| 207 | -const SherpaOnnxOfflineTtsConfig = StructType({ | ||
| 208 | - "model" : SherpaOnnxOfflineTtsModelConfig, | ||
| 209 | - "ruleFsts" : cstring, | ||
| 210 | - "maxNumSentences" : int32_t, | ||
| 211 | -}); | ||
| 212 | - | ||
| 213 | -const SherpaOnnxGeneratedAudio = StructType({ | ||
| 214 | - "samples" : FloatArray, | ||
| 215 | - "n" : int32_t, | ||
| 216 | - "sampleRate" : int32_t, | ||
| 217 | -}); | ||
| 218 | - | ||
| 219 | -const SherpaOnnxOfflineTtsVitsModelConfigPtr = | ||
| 220 | - ref.refType(SherpaOnnxOfflineTtsVitsModelConfig); | ||
| 221 | -const SherpaOnnxOfflineTtsConfigPtr = ref.refType(SherpaOnnxOfflineTtsConfig); | ||
| 222 | -const SherpaOnnxGeneratedAudioPtr = ref.refType(SherpaOnnxGeneratedAudio); | ||
| 223 | -const SherpaOnnxOfflineTtsPtr = ref.refType(ref.types.void); | ||
| 224 | - | ||
| 225 | -const SherpaOnnxDisplayPtr = ref.refType(ref.types.void); | ||
| 226 | - | ||
| 227 | -let soname; | ||
| 228 | -if (os.platform() == "win32") { | ||
| 229 | - // see https://nodejs.org/api/process.html#processarch | ||
| 230 | - if (process.arch == "x64") { | ||
| 231 | - let currentPath = process.env.Path; | ||
| 232 | - let dllDirectory = path.resolve(path.join(__dirname, "lib", "win-x64")); | ||
| 233 | - process.env.Path = currentPath + path.delimiter + dllDirectory; | ||
| 234 | - | ||
| 235 | - soname = path.join(__dirname, "lib", "win-x64", "sherpa-onnx-c-api.dll") | ||
| 236 | - } else if (process.arch == "ia32") { | ||
| 237 | - let currentPath = process.env.Path; | ||
| 238 | - let dllDirectory = path.resolve(path.join(__dirname, "lib", "win-x86")); | ||
| 239 | - process.env.Path = currentPath + path.delimiter + dllDirectory; | ||
| 240 | - | ||
| 241 | - soname = path.join(__dirname, "lib", "win-x86", "sherpa-onnx-c-api.dll") | ||
| 242 | - } else { | ||
| 243 | - throw new Error( | ||
| 244 | - `Support only Windows x86 and x64 for now. Given ${process.arch}`); | ||
| 245 | - } | ||
| 246 | -} else if (os.platform() == "darwin") { | ||
| 247 | - if (process.arch == "x64") { | ||
| 248 | - soname = | ||
| 249 | - path.join(__dirname, "lib", "osx-x64", "libsherpa-onnx-c-api.dylib"); | ||
| 250 | - } else if (process.arch == "arm64") { | ||
| 251 | - soname = | ||
| 252 | - path.join(__dirname, "lib", "osx-arm64", "libsherpa-onnx-c-api.dylib"); | ||
| 253 | - } else { | ||
| 254 | - throw new Error( | ||
| 255 | - `Support only macOS x64 and arm64 for now. Given ${process.arch}`); | ||
| 256 | - } | ||
| 257 | -} else if (os.platform() == "linux") { | ||
| 258 | - if (process.arch == "x64") { | ||
| 259 | - soname = | ||
| 260 | - path.join(__dirname, "lib", "linux-x64", "libsherpa-onnx-c-api.so"); | ||
| 261 | - } else { | ||
| 262 | - throw new Error(`Support only Linux x64 for now. Given ${process.arch}`); | ||
| 263 | - } | ||
| 264 | -} else { | ||
| 265 | - throw new Error(`Unsupported platform ${os.platform()}`); | 8 | +function createOnlineRecognizer(config) { |
| 9 | + return sherpa_onnx_asr.createOnlineRecognizer(wasmModule, config); | ||
| 266 | } | 10 | } |
| 267 | 11 | ||
| 268 | -if (!fs.existsSync(soname)) { | ||
| 269 | - throw new Error(`Cannot find file ${soname}. Please make sure you have run | ||
| 270 | - ./build.sh`); | 12 | +function createOfflineRecognizer(config) { |
| 13 | + return new sherpa_onnx_asr.OfflineRecognizer(config, wasmModule); | ||
| 271 | } | 14 | } |
| 272 | 15 | ||
| 273 | -debug("soname ", soname) | ||
| 274 | - | ||
| 275 | -const libsherpa_onnx = ffi.Library(soname, { | ||
| 276 | - // online asr | ||
| 277 | - "CreateOnlineRecognizer" : [ | ||
| 278 | - SherpaOnnxOnlineRecognizerPtr, [ SherpaOnnxOnlineRecognizerConfigPtr ] | ||
| 279 | - ], | ||
| 280 | - "DestroyOnlineRecognizer" : [ "void", [ SherpaOnnxOnlineRecognizerPtr ] ], | ||
| 281 | - "CreateOnlineStream" : | ||
| 282 | - [ SherpaOnnxOnlineStreamPtr, [ SherpaOnnxOnlineRecognizerPtr ] ], | ||
| 283 | - "CreateOnlineStreamWithHotwords" : | ||
| 284 | - [ SherpaOnnxOnlineStreamPtr, [ SherpaOnnxOnlineRecognizerPtr, cstring ] ], | ||
| 285 | - "DestroyOnlineStream" : [ "void", [ SherpaOnnxOnlineStreamPtr ] ], | ||
| 286 | - "AcceptWaveform" : | ||
| 287 | - [ "void", [ SherpaOnnxOnlineStreamPtr, int32_t, floatPtr, int32_t ] ], | ||
| 288 | - "IsOnlineStreamReady" : | ||
| 289 | - [ int32_t, [ SherpaOnnxOnlineRecognizerPtr, SherpaOnnxOnlineStreamPtr ] ], | ||
| 290 | - "DecodeOnlineStream" : | ||
| 291 | - [ "void", [ SherpaOnnxOnlineRecognizerPtr, SherpaOnnxOnlineStreamPtr ] ], | ||
| 292 | - "DecodeMultipleOnlineStreams" : [ | ||
| 293 | - "void", | ||
| 294 | - [ SherpaOnnxOnlineRecognizerPtr, SherpaOnnxOnlineStreamPtrPtr, int32_t ] | ||
| 295 | - ], | ||
| 296 | - "GetOnlineStreamResult" : [ | ||
| 297 | - SherpaOnnxOnlineRecognizerResultPtr, | ||
| 298 | - [ SherpaOnnxOnlineRecognizerPtr, SherpaOnnxOnlineStreamPtr ] | ||
| 299 | - ], | ||
| 300 | - "DestroyOnlineRecognizerResult" : | ||
| 301 | - [ "void", [ SherpaOnnxOnlineRecognizerResultPtr ] ], | ||
| 302 | - "Reset" : | ||
| 303 | - [ "void", [ SherpaOnnxOnlineRecognizerPtr, SherpaOnnxOnlineStreamPtr ] ], | ||
| 304 | - "InputFinished" : [ "void", [ SherpaOnnxOnlineStreamPtr ] ], | ||
| 305 | - "IsEndpoint" : | ||
| 306 | - [ int32_t, [ SherpaOnnxOnlineRecognizerPtr, SherpaOnnxOnlineStreamPtr ] ], | ||
| 307 | - | ||
| 308 | - // offline asr | ||
| 309 | - "CreateOfflineRecognizer" : [ | ||
| 310 | - SherpaOnnxOfflineRecognizerPtr, [ SherpaOnnxOfflineRecognizerConfigPtr ] | ||
| 311 | - ], | ||
| 312 | - "DestroyOfflineRecognizer" : [ "void", [ SherpaOnnxOfflineRecognizerPtr ] ], | ||
| 313 | - "CreateOfflineStream" : | ||
| 314 | - [ SherpaOnnxOfflineStreamPtr, [ SherpaOnnxOfflineRecognizerPtr ] ], | ||
| 315 | - "DestroyOfflineStream" : [ "void", [ SherpaOnnxOfflineStreamPtr ] ], | ||
| 316 | - "AcceptWaveformOffline" : | ||
| 317 | - [ "void", [ SherpaOnnxOfflineStreamPtr, int32_t, floatPtr, int32_t ] ], | ||
| 318 | - "DecodeOfflineStream" : [ | ||
| 319 | - "void", [ SherpaOnnxOfflineRecognizerPtr, SherpaOnnxOfflineStreamPtr ] | ||
| 320 | - ], | ||
| 321 | - "DecodeMultipleOfflineStreams" : [ | ||
| 322 | - "void", | ||
| 323 | - [ SherpaOnnxOfflineRecognizerPtr, SherpaOnnxOfflineStreamPtrPtr, int32_t ] | ||
| 324 | - ], | ||
| 325 | - "GetOfflineStreamResult" : | ||
| 326 | - [ SherpaOnnxOfflineRecognizerResultPtr, [ SherpaOnnxOfflineStreamPtr ] ], | ||
| 327 | - "DestroyOfflineRecognizerResult" : | ||
| 328 | - [ "void", [ SherpaOnnxOfflineRecognizerResultPtr ] ], | ||
| 329 | - | ||
| 330 | - // vad | ||
| 331 | - "SherpaOnnxCreateCircularBuffer" : | ||
| 332 | - [ SherpaOnnxCircularBufferPtr, [ int32_t ] ], | ||
| 333 | - "SherpaOnnxDestroyCircularBuffer" : | ||
| 334 | - [ "void", [ SherpaOnnxCircularBufferPtr ] ], | ||
| 335 | - "SherpaOnnxCircularBufferPush" : | ||
| 336 | - [ "void", [ SherpaOnnxCircularBufferPtr, floatPtr, int32_t ] ], | ||
| 337 | - "SherpaOnnxCircularBufferGet" : | ||
| 338 | - [ FloatArray, [ SherpaOnnxCircularBufferPtr, int32_t, int32_t ] ], | ||
| 339 | - "SherpaOnnxCircularBufferFree" : [ "void", [ FloatArray ] ], | ||
| 340 | - "SherpaOnnxCircularBufferPop" : | ||
| 341 | - [ "void", [ SherpaOnnxCircularBufferPtr, int32_t ] ], | ||
| 342 | - "SherpaOnnxCircularBufferSize" : [ int32_t, [ SherpaOnnxCircularBufferPtr ] ], | ||
| 343 | - "SherpaOnnxCircularBufferHead" : [ int32_t, [ SherpaOnnxCircularBufferPtr ] ], | ||
| 344 | - "SherpaOnnxCircularBufferReset" : [ "void", [ SherpaOnnxCircularBufferPtr ] ], | ||
| 345 | - "SherpaOnnxCreateVoiceActivityDetector" : [ | ||
| 346 | - SherpaOnnxVoiceActivityDetectorPtr, [ SherpaOnnxVadModelConfigPtr, float ] | ||
| 347 | - ], | ||
| 348 | - "SherpaOnnxDestroyVoiceActivityDetector" : | ||
| 349 | - [ "void", [ SherpaOnnxVoiceActivityDetectorPtr ] ], | ||
| 350 | - "SherpaOnnxVoiceActivityDetectorAcceptWaveform" : | ||
| 351 | - [ "void", [ SherpaOnnxVoiceActivityDetectorPtr, floatPtr, int32_t ] ], | ||
| 352 | - "SherpaOnnxVoiceActivityDetectorEmpty" : | ||
| 353 | - [ int32_t, [ SherpaOnnxVoiceActivityDetectorPtr ] ], | ||
| 354 | - "SherpaOnnxVoiceActivityDetectorDetected" : | ||
| 355 | - [ int32_t, [ SherpaOnnxVoiceActivityDetectorPtr ] ], | ||
| 356 | - "SherpaOnnxVoiceActivityDetectorPop" : | ||
| 357 | - [ "void", [ SherpaOnnxVoiceActivityDetectorPtr ] ], | ||
| 358 | - "SherpaOnnxVoiceActivityDetectorClear" : | ||
| 359 | - [ "void", [ SherpaOnnxVoiceActivityDetectorPtr ] ], | ||
| 360 | - "SherpaOnnxVoiceActivityDetectorFront" : | ||
| 361 | - [ SherpaOnnxSpeechSegmentPtr, [ SherpaOnnxVoiceActivityDetectorPtr ] ], | ||
| 362 | - "SherpaOnnxDestroySpeechSegment" : [ "void", [ SherpaOnnxSpeechSegmentPtr ] ], | ||
| 363 | - "SherpaOnnxVoiceActivityDetectorReset" : | ||
| 364 | - [ "void", [ SherpaOnnxVoiceActivityDetectorPtr ] ], | ||
| 365 | - // tts | ||
| 366 | - "SherpaOnnxCreateOfflineTts" : | ||
| 367 | - [ SherpaOnnxOfflineTtsPtr, [ SherpaOnnxOfflineTtsConfigPtr ] ], | ||
| 368 | - "SherpaOnnxDestroyOfflineTts" : [ "void", [ SherpaOnnxOfflineTtsPtr ] ], | ||
| 369 | - "SherpaOnnxOfflineTtsGenerate" : [ | ||
| 370 | - SherpaOnnxGeneratedAudioPtr, | ||
| 371 | - [ SherpaOnnxOfflineTtsPtr, cstring, int32_t, float ] | ||
| 372 | - ], | ||
| 373 | - "SherpaOnnxDestroyOfflineTtsGeneratedAudio" : | ||
| 374 | - [ "void", [ SherpaOnnxGeneratedAudioPtr ] ], | ||
| 375 | - "SherpaOnnxWriteWave" : [ "void", [ floatPtr, int32_t, int32_t, cstring ] ], | ||
| 376 | - | ||
| 377 | - // display | ||
| 378 | - "CreateDisplay" : [ SherpaOnnxDisplayPtr, [ int32_t ] ], | ||
| 379 | - "DestroyDisplay" : [ "void", [ SherpaOnnxDisplayPtr ] ], | ||
| 380 | - "SherpaOnnxPrint" : [ "void", [ SherpaOnnxDisplayPtr, int32_t, cstring ] ], | ||
| 381 | -}); | ||
| 382 | - | ||
| 383 | -class Display { | ||
| 384 | - constructor(maxWordPerLine) { | ||
| 385 | - this.handle = libsherpa_onnx.CreateDisplay(maxWordPerLine); | ||
| 386 | - } | ||
| 387 | - free() { | ||
| 388 | - if (this.handle) { | ||
| 389 | - libsherpa_onnx.DestroyDisplay(this.handle); | ||
| 390 | - this.handle = null; | ||
| 391 | - } | ||
| 392 | - } | ||
| 393 | - | ||
| 394 | - print(idx, s) { libsherpa_onnx.SherpaOnnxPrint(this.handle, idx, s); } | ||
| 395 | -}; | ||
| 396 | - | ||
| 397 | -class OnlineResult { | ||
| 398 | - constructor(text) { this.text = Buffer.from(text, "utf-8").toString(); } | ||
| 399 | -}; | ||
| 400 | - | ||
| 401 | -class OnlineStream { | ||
| 402 | - constructor(handle) { this.handle = handle } | ||
| 403 | - | ||
| 404 | - free() { | ||
| 405 | - if (this.handle) { | ||
| 406 | - libsherpa_onnx.DestroyOnlineStream(this.handle); | ||
| 407 | - this.handle = null; | ||
| 408 | - } | ||
| 409 | - } | ||
| 410 | - | ||
| 411 | - /** | ||
| 412 | - * @param sampleRate {Number} | ||
| 413 | - * @param samples {Float32Array} Containing samples in the range [-1, 1] | ||
| 414 | - */ | ||
| 415 | - acceptWaveform(sampleRate, samples) { | ||
| 416 | - libsherpa_onnx.AcceptWaveform(this.handle, sampleRate, samples, | ||
| 417 | - samples.length); | ||
| 418 | - } | ||
| 419 | -}; | ||
| 420 | - | ||
| 421 | -class OnlineRecognizer { | ||
| 422 | - constructor(config) { | ||
| 423 | - this.config = config; | ||
| 424 | - this.recognizer_handle = | ||
| 425 | - libsherpa_onnx.CreateOnlineRecognizer(config.ref()); | ||
| 426 | - } | ||
| 427 | - | ||
| 428 | - free() { | ||
| 429 | - if (this.recognizer_handle) { | ||
| 430 | - libsherpa_onnx.DestroyOnlineRecognizer(this.recognizer_handle); | ||
| 431 | - this.recognizer_handle = null; | ||
| 432 | - } | ||
| 433 | - } | ||
| 434 | - | ||
| 435 | - createStream() { | ||
| 436 | - let handle = libsherpa_onnx.CreateOnlineStream(this.recognizer_handle); | ||
| 437 | - return new OnlineStream(handle); | ||
| 438 | - } | ||
| 439 | - | ||
| 440 | - isReady(stream) { | ||
| 441 | - return libsherpa_onnx.IsOnlineStreamReady(this.recognizer_handle, | ||
| 442 | - stream.handle) | ||
| 443 | - } | ||
| 444 | - | ||
| 445 | - isEndpoint(stream) { | ||
| 446 | - return libsherpa_onnx.IsEndpoint(this.recognizer_handle, stream.handle); | ||
| 447 | - } | ||
| 448 | - | ||
| 449 | - reset(stream) { libsherpa_onnx.Reset(this.recognizer_handle, stream.handle); } | ||
| 450 | - | ||
| 451 | - decode(stream) { | ||
| 452 | - libsherpa_onnx.DecodeOnlineStream(this.recognizer_handle, stream.handle) | ||
| 453 | - } | ||
| 454 | - | ||
| 455 | - getResult(stream) { | ||
| 456 | - let handle = libsherpa_onnx.GetOnlineStreamResult(this.recognizer_handle, | ||
| 457 | - stream.handle); | ||
| 458 | - let r = handle.deref(); | ||
| 459 | - let ans = new OnlineResult(r.text); | ||
| 460 | - libsherpa_onnx.DestroyOnlineRecognizerResult(handle); | ||
| 461 | - | ||
| 462 | - return ans | ||
| 463 | - } | ||
| 464 | -}; | ||
| 465 | - | ||
| 466 | -class OfflineResult { | ||
| 467 | - constructor(text) { this.text = Buffer.from(text, "utf-8").toString(); } | ||
| 468 | -}; | ||
| 469 | - | ||
| 470 | -class OfflineStream { | ||
| 471 | - constructor(handle) { this.handle = handle } | ||
| 472 | - | ||
| 473 | - free() { | ||
| 474 | - if (this.handle) { | ||
| 475 | - libsherpa_onnx.DestroyOfflineStream(this.handle); | ||
| 476 | - this.handle = null; | ||
| 477 | - } | ||
| 478 | - } | ||
| 479 | - | ||
| 480 | - /** | ||
| 481 | - * @param sampleRate {Number} | ||
| 482 | - * @param samples {Float32Array} Containing samples in the range [-1, 1] | ||
| 483 | - */ | ||
| 484 | - acceptWaveform(sampleRate, samples) { | ||
| 485 | - libsherpa_onnx.AcceptWaveformOffline(this.handle, sampleRate, samples, | ||
| 486 | - samples.length); | ||
| 487 | - } | ||
| 488 | -}; | ||
| 489 | - | ||
| 490 | -class OfflineRecognizer { | ||
| 491 | - constructor(config) { | ||
| 492 | - this.config = config; | ||
| 493 | - this.recognizer_handle = | ||
| 494 | - libsherpa_onnx.CreateOfflineRecognizer(config.ref()); | ||
| 495 | - } | ||
| 496 | - | ||
| 497 | - free() { | ||
| 498 | - if (this.recognizer_handle) { | ||
| 499 | - libsherpa_onnx.DestroyOfflineRecognizer(this.recognizer_handle); | ||
| 500 | - this.recognizer_handle = null; | ||
| 501 | - } | ||
| 502 | - } | ||
| 503 | - | ||
| 504 | - createStream() { | ||
| 505 | - let handle = libsherpa_onnx.CreateOfflineStream(this.recognizer_handle); | ||
| 506 | - return new OfflineStream(handle); | ||
| 507 | - } | ||
| 508 | - | ||
| 509 | - decode(stream) { | ||
| 510 | - libsherpa_onnx.DecodeOfflineStream(this.recognizer_handle, stream.handle) | ||
| 511 | - } | ||
| 512 | - | ||
| 513 | - getResult(stream) { | ||
| 514 | - let handle = libsherpa_onnx.GetOfflineStreamResult(stream.handle); | ||
| 515 | - let r = handle.deref(); | ||
| 516 | - let ans = new OfflineResult(r.text); | ||
| 517 | - libsherpa_onnx.DestroyOfflineRecognizerResult(handle); | ||
| 518 | - | ||
| 519 | - return ans | ||
| 520 | - } | ||
| 521 | -}; | ||
| 522 | - | ||
| 523 | -class SpeechSegment { | ||
| 524 | - constructor(start, samples) { | ||
| 525 | - this.start = start; | ||
| 526 | - this.samples = samples; | ||
| 527 | - } | ||
| 528 | -}; | ||
| 529 | - | ||
| 530 | -// this buffer holds only float entries. | ||
| 531 | -class CircularBuffer { | ||
| 532 | - /** | ||
| 533 | - * @param capacity {int} The capacity of the circular buffer. | ||
| 534 | - */ | ||
| 535 | - constructor(capacity) { | ||
| 536 | - this.handle = libsherpa_onnx.SherpaOnnxCreateCircularBuffer(capacity); | ||
| 537 | - } | ||
| 538 | - | ||
| 539 | - free() { | ||
| 540 | - if (this.handle) { | ||
| 541 | - libsherpa_onnx.SherpaOnnxDestroyCircularBuffer(this.handle); | ||
| 542 | - this.handle = null; | ||
| 543 | - } | ||
| 544 | - } | ||
| 545 | - | ||
| 546 | - /** | ||
| 547 | - * @param samples {Float32Array} | ||
| 548 | - */ | ||
| 549 | - push(samples) { | ||
| 550 | - libsherpa_onnx.SherpaOnnxCircularBufferPush(this.handle, samples, | ||
| 551 | - samples.length); | ||
| 552 | - } | ||
| 553 | - | ||
| 554 | - get(startIndex, n) { | ||
| 555 | - let data = | ||
| 556 | - libsherpa_onnx.SherpaOnnxCircularBufferGet(this.handle, startIndex, n); | ||
| 557 | - | ||
| 558 | - // https://tootallnate.github.io/ref/#exports-reinterpret | ||
| 559 | - const buffer = data.buffer.reinterpret(n * ref.sizeof.float).buffer; | ||
| 560 | - | ||
| 561 | - // create a copy since we are going to free the buffer at the end | ||
| 562 | - let s = new Float32Array(buffer).slice(0); | ||
| 563 | - libsherpa_onnx.SherpaOnnxCircularBufferFree(data); | ||
| 564 | - return s; | ||
| 565 | - } | ||
| 566 | - | ||
| 567 | - pop(n) { libsherpa_onnx.SherpaOnnxCircularBufferPop(this.handle, n); } | ||
| 568 | - | ||
| 569 | - size() { return libsherpa_onnx.SherpaOnnxCircularBufferSize(this.handle); } | ||
| 570 | - | ||
| 571 | - head() { return libsherpa_onnx.SherpaOnnxCircularBufferHead(this.handle); } | ||
| 572 | - | ||
| 573 | - reset() { libsherpa_onnx.SherpaOnnxCircularBufferReset(this.handle); } | ||
| 574 | -}; | ||
| 575 | - | ||
| 576 | -class VoiceActivityDetector { | ||
| 577 | - constructor(config, bufferSizeInSeconds) { | ||
| 578 | - this.config = config; | ||
| 579 | - this.handle = libsherpa_onnx.SherpaOnnxCreateVoiceActivityDetector( | ||
| 580 | - config.ref(), bufferSizeInSeconds); | ||
| 581 | - } | ||
| 582 | - | ||
| 583 | - free() { | ||
| 584 | - if (this.handle) { | ||
| 585 | - libsherpa_onnx.SherpaOnnxDestroyVoiceActivityDetector(this.handle); | ||
| 586 | - } | ||
| 587 | - } | ||
| 588 | - | ||
| 589 | - acceptWaveform(samples) { | ||
| 590 | - libsherpa_onnx.SherpaOnnxVoiceActivityDetectorAcceptWaveform( | ||
| 591 | - this.handle, samples, samples.length); | ||
| 592 | - } | ||
| 593 | - | ||
| 594 | - isEmpty() { | ||
| 595 | - return libsherpa_onnx.SherpaOnnxVoiceActivityDetectorEmpty(this.handle); | ||
| 596 | - } | ||
| 597 | - | ||
| 598 | - isDetected() { | ||
| 599 | - return libsherpa_onnx.SherpaOnnxVoiceActivityDetectorDetected(this.handle); | ||
| 600 | - } | ||
| 601 | - pop() { libsherpa_onnx.SherpaOnnxVoiceActivityDetectorPop(this.handle); } | ||
| 602 | - | ||
| 603 | - clear() { libsherpa_onnx.SherpaOnnxVoiceActivityDetectorClear(this.handle); } | ||
| 604 | - | ||
| 605 | - reset() { libsherpa_onnx.SherpaOnnxVoiceActivityDetectorReset(this.handle); } | ||
| 606 | - | ||
| 607 | - front() { | ||
| 608 | - let segment = | ||
| 609 | - libsherpa_onnx.SherpaOnnxVoiceActivityDetectorFront(this.handle); | ||
| 610 | - | ||
| 611 | - let buffer = | ||
| 612 | - segment.deref() | ||
| 613 | - .samples.buffer.reinterpret(segment.deref().n * ref.sizeof.float) | ||
| 614 | - .buffer; | ||
| 615 | - | ||
| 616 | - let samples = new Float32Array(buffer).slice(0); | ||
| 617 | - let ans = new SpeechSegment(segment.deref().start, samples); | ||
| 618 | - | ||
| 619 | - libsherpa_onnx.SherpaOnnxDestroySpeechSegment(segment); | ||
| 620 | - return ans; | ||
| 621 | - } | ||
| 622 | -}; | ||
| 623 | - | ||
| 624 | -class GeneratedAudio { | ||
| 625 | - constructor(sampleRate, samples) { | ||
| 626 | - this.sampleRate = sampleRate; | ||
| 627 | - this.samples = samples; | ||
| 628 | - } | ||
| 629 | - save(filename) { | ||
| 630 | - libsherpa_onnx.SherpaOnnxWriteWave(this.samples, this.samples.length, | ||
| 631 | - this.sampleRate, filename); | ||
| 632 | - } | ||
| 633 | -}; | ||
| 634 | - | ||
| 635 | -class OfflineTts { | ||
| 636 | - constructor(config) { | ||
| 637 | - this.config = config; | ||
| 638 | - this.handle = libsherpa_onnx.SherpaOnnxCreateOfflineTts(config.ref()); | ||
| 639 | - } | ||
| 640 | - | ||
| 641 | - free() { | ||
| 642 | - if (this.handle) { | ||
| 643 | - libsherpa_onnx.SherpaOnnxDestroyOfflineTts(this.handle); | ||
| 644 | - this.handle = null; | ||
| 645 | - } | ||
| 646 | - } | ||
| 647 | - generate(text, sid, speed) { | ||
| 648 | - let r = libsherpa_onnx.SherpaOnnxOfflineTtsGenerate(this.handle, text, sid, | ||
| 649 | - speed); | ||
| 650 | - const buffer = | ||
| 651 | - r.deref() | ||
| 652 | - .samples.buffer.reinterpret(r.deref().n * ref.sizeof.float) | ||
| 653 | - .buffer; | ||
| 654 | - let samples = new Float32Array(buffer).slice(0); | ||
| 655 | - let sampleRate = r.deref().sampleRate; | ||
| 656 | - | ||
| 657 | - let generatedAudio = new GeneratedAudio(sampleRate, samples); | ||
| 658 | - | ||
| 659 | - libsherpa_onnx.SherpaOnnxDestroyOfflineTtsGeneratedAudio(r); | ||
| 660 | - | ||
| 661 | - return generatedAudio; | ||
| 662 | - } | ||
| 663 | -}; | ||
| 664 | - | ||
| 665 | -// online asr | ||
| 666 | -const OnlineTransducerModelConfig = SherpaOnnxOnlineTransducerModelConfig; | ||
| 667 | -const OnlineModelConfig = SherpaOnnxOnlineModelConfig; | ||
| 668 | -const FeatureConfig = SherpaOnnxFeatureConfig; | ||
| 669 | -const OnlineRecognizerConfig = SherpaOnnxOnlineRecognizerConfig; | ||
| 670 | -const OnlineParaformerModelConfig = SherpaOnnxOnlineParaformerModelConfig; | ||
| 671 | -const OnlineZipformer2CtcModelConfig = SherpaOnnxOnlineZipformer2CtcModelConfig; | ||
| 672 | - | ||
| 673 | -// offline asr | ||
| 674 | -const OfflineTransducerModelConfig = SherpaOnnxOfflineTransducerModelConfig; | ||
| 675 | -const OfflineModelConfig = SherpaOnnxOfflineModelConfig; | ||
| 676 | -const OfflineRecognizerConfig = SherpaOnnxOfflineRecognizerConfig; | ||
| 677 | -const OfflineParaformerModelConfig = SherpaOnnxOfflineParaformerModelConfig; | ||
| 678 | -const OfflineWhisperModelConfig = SherpaOnnxOfflineWhisperModelConfig; | ||
| 679 | -const OfflineNemoEncDecCtcModelConfig = | ||
| 680 | - SherpaOnnxOfflineNemoEncDecCtcModelConfig; | ||
| 681 | -const OfflineTdnnModelConfig = SherpaOnnxOfflineTdnnModelConfig; | ||
| 682 | - | ||
| 683 | -// vad | ||
| 684 | -const SileroVadModelConfig = SherpaOnnxSileroVadModelConfig; | ||
| 685 | -const VadModelConfig = SherpaOnnxVadModelConfig; | ||
| 686 | - | ||
| 687 | -// tts | ||
| 688 | -const OfflineTtsVitsModelConfig = SherpaOnnxOfflineTtsVitsModelConfig; | ||
| 689 | -const OfflineTtsModelConfig = SherpaOnnxOfflineTtsModelConfig; | ||
| 690 | -const OfflineTtsConfig = SherpaOnnxOfflineTtsConfig; | 16 | +function createOfflineTts(config) { |
| 17 | + return sherpa_onnx_tts.createOfflineTts(wasmModule, config); | ||
| 18 | +} | ||
| 691 | 19 | ||
| 20 | +// Note: online means streaming and offline means non-streaming here. | ||
| 21 | +// Both of them don't require internet connection. | ||
| 692 | module.exports = { | 22 | module.exports = { |
| 693 | - // online asr | ||
| 694 | - OnlineTransducerModelConfig, | ||
| 695 | - OnlineModelConfig, | ||
| 696 | - FeatureConfig, | ||
| 697 | - OnlineRecognizerConfig, | ||
| 698 | - OnlineRecognizer, | ||
| 699 | - OnlineStream, | ||
| 700 | - OnlineParaformerModelConfig, | ||
| 701 | - OnlineZipformer2CtcModelConfig, | ||
| 702 | - | ||
| 703 | - // offline asr | ||
| 704 | - OfflineRecognizer, | ||
| 705 | - OfflineStream, | ||
| 706 | - OfflineTransducerModelConfig, | ||
| 707 | - OfflineModelConfig, | ||
| 708 | - OfflineRecognizerConfig, | ||
| 709 | - OfflineParaformerModelConfig, | ||
| 710 | - OfflineWhisperModelConfig, | ||
| 711 | - OfflineNemoEncDecCtcModelConfig, | ||
| 712 | - OfflineTdnnModelConfig, | ||
| 713 | - // vad | ||
| 714 | - SileroVadModelConfig, | ||
| 715 | - VadModelConfig, | ||
| 716 | - CircularBuffer, | ||
| 717 | - VoiceActivityDetector, | ||
| 718 | - // tts | ||
| 719 | - OfflineTtsVitsModelConfig, | ||
| 720 | - OfflineTtsModelConfig, | ||
| 721 | - OfflineTtsConfig, | ||
| 722 | - OfflineTts, | ||
| 723 | - | ||
| 724 | - // | ||
| 725 | - Display, | 23 | + createOnlineRecognizer, |
| 24 | + createOfflineRecognizer, | ||
| 25 | + createOfflineTts, | ||
| 726 | }; | 26 | }; |
| 1 | { | 1 | { |
| 2 | - "name": "sherpa-onnx2", | ||
| 3 | - "version": "1.8.10", | ||
| 4 | - "description": "Real-time speech recognition with Next-gen Kaldi", | 2 | + "name": "sherpa-onnx", |
| 3 | + "version": "SHERPA_ONNX_VERSION", | ||
| 4 | + "description": "Speech-to-text and text-to-speech using Next-gen Kaldi without internet connection", | ||
| 5 | "main": "index.js", | 5 | "main": "index.js", |
| 6 | "scripts": { | 6 | "scripts": { |
| 7 | "test": "echo \"Error: no test specified\" && exit 1" | 7 | "test": "echo \"Error: no test specified\" && exit 1" |
| @@ -11,15 +11,30 @@ | @@ -11,15 +11,30 @@ | ||
| 11 | "url": "git+https://github.com/k2-fsa/sherpa-onnx.git" | 11 | "url": "git+https://github.com/k2-fsa/sherpa-onnx.git" |
| 12 | }, | 12 | }, |
| 13 | "keywords": [ | 13 | "keywords": [ |
| 14 | - "speech-to-text", | ||
| 15 | - "text-to-speech", | 14 | + "speech to text", |
| 15 | + "text to speech", | ||
| 16 | + "transcription", | ||
| 16 | "real-time speech recognition", | 17 | "real-time speech recognition", |
| 17 | - "without internet connect", | 18 | + "without internet connection", |
| 18 | "embedded systems", | 19 | "embedded systems", |
| 19 | "open source", | 20 | "open source", |
| 20 | "zipformer", | 21 | "zipformer", |
| 21 | "asr", | 22 | "asr", |
| 22 | - "speech" | 23 | + "tts", |
| 24 | + "stt", | ||
| 25 | + "c++", | ||
| 26 | + "onnxruntime", | ||
| 27 | + "onnx", | ||
| 28 | + "ai", | ||
| 29 | + "next-gen kaldi", | ||
| 30 | + "offline", | ||
| 31 | + "privacy", | ||
| 32 | + "open source", | ||
| 33 | + "streaming speech recognition", | ||
| 34 | + "speech", | ||
| 35 | + "recognition", | ||
| 36 | + "WebAssembly", | ||
| 37 | + "wasm" | ||
| 23 | ], | 38 | ], |
| 24 | "author": "The next-gen Kaldi team", | 39 | "author": "The next-gen Kaldi team", |
| 25 | "license": "Apache-2.0", | 40 | "license": "Apache-2.0", |
| @@ -28,10 +43,5 @@ | @@ -28,10 +43,5 @@ | ||
| 28 | }, | 43 | }, |
| 29 | "homepage": "https://github.com/k2-fsa/sherpa-onnx#readme", | 44 | "homepage": "https://github.com/k2-fsa/sherpa-onnx#readme", |
| 30 | "dependencies": { | 45 | "dependencies": { |
| 31 | - "ffi-napi": "^4.0.3", | ||
| 32 | - "npm": "^6.14.18", | ||
| 33 | - "ref-array-napi": "^1.2.2", | ||
| 34 | - "ref-napi": "^3.0.3", | ||
| 35 | - "ref-struct-napi": "^1.1.1" | ||
| 36 | } | 46 | } |
| 37 | } | 47 | } |
scripts/nodejs/package.json.in
已删除
100644 → 0
| 1 | -{ | ||
| 2 | - "name": "sherpa-onnx", | ||
| 3 | - "version": "SHERPA_ONNX_VERSION", | ||
| 4 | - "description": "Real-time speech recognition with Next-gen Kaldi", | ||
| 5 | - "main": "index.js", | ||
| 6 | - "scripts": { | ||
| 7 | - "test": "echo \"Error: no test specified\" && exit 1" | ||
| 8 | - }, | ||
| 9 | - "repository": { | ||
| 10 | - "type": "git", | ||
| 11 | - "url": "git+https://github.com/k2-fsa/sherpa-onnx.git" | ||
| 12 | - }, | ||
| 13 | - "keywords": [ | ||
| 14 | - "speech to text", | ||
| 15 | - "text to speech", | ||
| 16 | - "transcription", | ||
| 17 | - "real-time speech recognition", | ||
| 18 | - "without internet connect", | ||
| 19 | - "embedded systems", | ||
| 20 | - "open source", | ||
| 21 | - "zipformer", | ||
| 22 | - "asr", | ||
| 23 | - "tts", | ||
| 24 | - "stt", | ||
| 25 | - "c++", | ||
| 26 | - "onnxruntime", | ||
| 27 | - "onnx", | ||
| 28 | - "ai", | ||
| 29 | - "next-gen kaldi", | ||
| 30 | - "offline", | ||
| 31 | - "privacy", | ||
| 32 | - "open source", | ||
| 33 | - "streaming speech recognition", | ||
| 34 | - "speech", | ||
| 35 | - "recognition" | ||
| 36 | - ], | ||
| 37 | - "author": "The next-gen Kaldi team", | ||
| 38 | - "license": "Apache-2.0", | ||
| 39 | - "bugs": { | ||
| 40 | - "url": "https://github.com/k2-fsa/sherpa-onnx/issues" | ||
| 41 | - }, | ||
| 42 | - "homepage": "https://github.com/k2-fsa/sherpa-onnx#readme", | ||
| 43 | - "dependencies": { | ||
| 44 | - "ffi-napi": "^4.0.3", | ||
| 45 | - "npm": "^6.14.18", | ||
| 46 | - "ref-array-napi": "^1.2.2", | ||
| 47 | - "ref-napi": "^3.0.3", | ||
| 48 | - "ref-struct-napi": "^1.1.1" | ||
| 49 | - } | ||
| 50 | -} |
scripts/nodejs/run.sh
已删除
100755 → 0
| 1 | -#!/usr/bin/env bash | ||
| 2 | -set -ex | ||
| 3 | - | ||
| 4 | -SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd ) | ||
| 5 | -SHERPA_ONNX_DIR=$(realpath $SCRIPT_DIR/../..) | ||
| 6 | -echo "SCRIPT_DIR: $SCRIPT_DIR" | ||
| 7 | -echo "SHERPA_ONNX_DIR: $SHERPA_ONNX_DIR" | ||
| 8 | - | ||
| 9 | -SHERPA_ONNX_VERSION=$(grep "SHERPA_ONNX_VERSION" $SHERPA_ONNX_DIR/CMakeLists.txt | cut -d " " -f 2 | cut -d '"' -f 2) | ||
| 10 | - | ||
| 11 | -echo "SHERPA_ONNX_VERSION $SHERPA_ONNX_VERSION" | ||
| 12 | -sed -i.bak s/SHERPA_ONNX_VERSION/$SHERPA_ONNX_VERSION/g ./package.json.in | ||
| 13 | - | ||
| 14 | -cp package.json.in package.json | ||
| 15 | -rm package.json.in | ||
| 16 | -rm package.json.in.bak | ||
| 17 | -rm .clang-format | ||
| 18 | - | ||
| 19 | -function windows_x64() { | ||
| 20 | - echo "Process Windows (x64)" | ||
| 21 | - mkdir -p lib/win-x64 | ||
| 22 | - dst=$(realpath lib/win-x64) | ||
| 23 | - mkdir t | ||
| 24 | - cd t | ||
| 25 | - wget -q https://huggingface.co/csukuangfj/sherpa-onnx-wheels/resolve/main/sherpa_onnx-${SHERPA_ONNX_VERSION}-cp38-cp38-win_amd64.whl | ||
| 26 | - unzip ./sherpa_onnx-${SHERPA_ONNX_VERSION}-cp38-cp38-win_amd64.whl | ||
| 27 | - | ||
| 28 | - cp -v sherpa_onnx-${SHERPA_ONNX_VERSION}.data/data/bin/*.dll $dst | ||
| 29 | - cp -v sherpa_onnx-${SHERPA_ONNX_VERSION}.data/data/bin/*.lib $dst | ||
| 30 | - rm -fv $dst/sherpa-onnx-portaudio.dll | ||
| 31 | - | ||
| 32 | - cd .. | ||
| 33 | - rm -rf t | ||
| 34 | -} | ||
| 35 | - | ||
| 36 | -function windows_x86() { | ||
| 37 | - echo "Process Windows (x86)" | ||
| 38 | - mkdir -p lib/win-x86 | ||
| 39 | - dst=$(realpath lib/win-x86) | ||
| 40 | - mkdir t | ||
| 41 | - cd t | ||
| 42 | - wget -q https://huggingface.co/csukuangfj/sherpa-onnx-wheels/resolve/main/sherpa_onnx-${SHERPA_ONNX_VERSION}-cp38-cp38-win32.whl | ||
| 43 | - unzip ./sherpa_onnx-${SHERPA_ONNX_VERSION}-cp38-cp38-win32.whl | ||
| 44 | - | ||
| 45 | - cp -v sherpa_onnx-${SHERPA_ONNX_VERSION}.data/data/bin/*.dll $dst | ||
| 46 | - cp -v sherpa_onnx-${SHERPA_ONNX_VERSION}.data/data/bin/*.lib $dst | ||
| 47 | - rm -fv $dst/sherpa-onnx-portaudio.dll | ||
| 48 | - | ||
| 49 | - cd .. | ||
| 50 | - rm -rf t | ||
| 51 | -} | ||
| 52 | - | ||
| 53 | -function linux_x64() { | ||
| 54 | - echo "Process Linux (x64)" | ||
| 55 | - mkdir -p lib/linux-x64 | ||
| 56 | - dst=$(realpath lib/linux-x64) | ||
| 57 | - mkdir t | ||
| 58 | - cd t | ||
| 59 | - wget -q https://huggingface.co/csukuangfj/sherpa-onnx-wheels/resolve/main/sherpa_onnx-${SHERPA_ONNX_VERSION}-cp38-cp38-manylinux_2_28_x86_64.whl | ||
| 60 | - unzip ./sherpa_onnx-${SHERPA_ONNX_VERSION}-cp38-cp38-manylinux_2_28_x86_64.whl | ||
| 61 | - | ||
| 62 | - cp -v sherpa_onnx/lib/*.so* $dst | ||
| 63 | - rm -v $dst/libcargs.so | ||
| 64 | - rm -v $dst/libsherpa-onnx-portaudio.so | ||
| 65 | - rm -v $dst/libsherpa-onnx-fst.so | ||
| 66 | - rm -v $dst/libonnxruntime.so | ||
| 67 | - | ||
| 68 | - cd .. | ||
| 69 | - rm -rf t | ||
| 70 | -} | ||
| 71 | - | ||
| 72 | -function osx_x64() { | ||
| 73 | - echo "Process osx-x64" | ||
| 74 | - mkdir -p lib/osx-x64 | ||
| 75 | - dst=$(realpath lib/osx-x64) | ||
| 76 | - mkdir t | ||
| 77 | - cd t | ||
| 78 | - wget -q https://huggingface.co/csukuangfj/sherpa-onnx-wheels/resolve/main/sherpa_onnx-${SHERPA_ONNX_VERSION}-cp38-cp38-macosx_11_0_x86_64.whl | ||
| 79 | - unzip ./sherpa_onnx-${SHERPA_ONNX_VERSION}-cp38-cp38-macosx_11_0_x86_64.whl | ||
| 80 | - | ||
| 81 | - cp -v sherpa_onnx/lib/*.dylib $dst/ | ||
| 82 | - rm -v $dst/libonnxruntime.dylib | ||
| 83 | - rm -v $dst/libcargs.dylib | ||
| 84 | - rm -v $dst/libsherpa-onnx-fst.dylib | ||
| 85 | - rm -v $dst/libsherpa-onnx-portaudio.dylib | ||
| 86 | - | ||
| 87 | - cd .. | ||
| 88 | - rm -rf t | ||
| 89 | -} | ||
| 90 | - | ||
| 91 | -function osx_arm64() { | ||
| 92 | - echo "Process osx-arm64" | ||
| 93 | - mkdir -p lib/osx-arm64 | ||
| 94 | - dst=$(realpath lib/osx-arm64) | ||
| 95 | - mkdir t | ||
| 96 | - cd t | ||
| 97 | - wget -q https://huggingface.co/csukuangfj/sherpa-onnx-wheels/resolve/main/sherpa_onnx-${SHERPA_ONNX_VERSION}-cp38-cp38-macosx_11_0_arm64.whl | ||
| 98 | - unzip ./sherpa_onnx-${SHERPA_ONNX_VERSION}-cp38-cp38-macosx_11_0_arm64.whl | ||
| 99 | - | ||
| 100 | - cp -v sherpa_onnx/lib/*.dylib $dst/ | ||
| 101 | - rm -v $dst/libonnxruntime.dylib | ||
| 102 | - rm -v $dst/libcargs.dylib | ||
| 103 | - rm -v $dst/libsherpa-onnx-fst.dylib | ||
| 104 | - rm -v $dst/libsherpa-onnx-portaudio.dylib | ||
| 105 | - | ||
| 106 | - cd .. | ||
| 107 | - rm -rf t | ||
| 108 | -} | ||
| 109 | - | ||
| 110 | -windows_x64 | ||
| 111 | -ls -lh lib/win-x64 | ||
| 112 | - | ||
| 113 | -windows_x86 | ||
| 114 | -ls -lh lib/win-x86 | ||
| 115 | - | ||
| 116 | -linux_x64 | ||
| 117 | -ls -lh lib/linux-x64 | ||
| 118 | - | ||
| 119 | -osx_x64 | ||
| 120 | -ls -lh lib/osx-x64 | ||
| 121 | - | ||
| 122 | -osx_arm64 | ||
| 123 | -ls -lh lib/osx-arm64 |
| @@ -94,6 +94,11 @@ SherpaOnnxOnlineRecognizer *CreateOnlineRecognizer( | @@ -94,6 +94,11 @@ SherpaOnnxOnlineRecognizer *CreateOnlineRecognizer( | ||
| 94 | SHERPA_ONNX_LOGE("%s\n", recognizer_config.ToString().c_str()); | 94 | SHERPA_ONNX_LOGE("%s\n", recognizer_config.ToString().c_str()); |
| 95 | } | 95 | } |
| 96 | 96 | ||
| 97 | + if (!recognizer_config.Validate()) { | ||
| 98 | + SHERPA_ONNX_LOGE("Errors in config!"); | ||
| 99 | + return nullptr; | ||
| 100 | + } | ||
| 101 | + | ||
| 97 | SherpaOnnxOnlineRecognizer *recognizer = new SherpaOnnxOnlineRecognizer; | 102 | SherpaOnnxOnlineRecognizer *recognizer = new SherpaOnnxOnlineRecognizer; |
| 98 | 103 | ||
| 99 | recognizer->impl = | 104 | recognizer->impl = |
| @@ -324,6 +329,11 @@ SherpaOnnxOfflineRecognizer *CreateOfflineRecognizer( | @@ -324,6 +329,11 @@ SherpaOnnxOfflineRecognizer *CreateOfflineRecognizer( | ||
| 324 | SHERPA_ONNX_LOGE("%s", recognizer_config.ToString().c_str()); | 329 | SHERPA_ONNX_LOGE("%s", recognizer_config.ToString().c_str()); |
| 325 | } | 330 | } |
| 326 | 331 | ||
| 332 | + if (!recognizer_config.Validate()) { | ||
| 333 | + SHERPA_ONNX_LOGE("Errors in config"); | ||
| 334 | + return nullptr; | ||
| 335 | + } | ||
| 336 | + | ||
| 327 | SherpaOnnxOfflineRecognizer *recognizer = new SherpaOnnxOfflineRecognizer; | 337 | SherpaOnnxOfflineRecognizer *recognizer = new SherpaOnnxOfflineRecognizer; |
| 328 | 338 | ||
| 329 | recognizer->impl = | 339 | recognizer->impl = |
| @@ -480,6 +490,11 @@ SherpaOnnxVoiceActivityDetector *SherpaOnnxCreateVoiceActivityDetector( | @@ -480,6 +490,11 @@ SherpaOnnxVoiceActivityDetector *SherpaOnnxCreateVoiceActivityDetector( | ||
| 480 | SHERPA_ONNX_LOGE("%s", vad_config.ToString().c_str()); | 490 | SHERPA_ONNX_LOGE("%s", vad_config.ToString().c_str()); |
| 481 | } | 491 | } |
| 482 | 492 | ||
| 493 | + if (!vad_config.Validate()) { | ||
| 494 | + SHERPA_ONNX_LOGE("Errors in config"); | ||
| 495 | + return nullptr; | ||
| 496 | + } | ||
| 497 | + | ||
| 483 | SherpaOnnxVoiceActivityDetector *p = new SherpaOnnxVoiceActivityDetector; | 498 | SherpaOnnxVoiceActivityDetector *p = new SherpaOnnxVoiceActivityDetector; |
| 484 | p->impl = std::make_unique<sherpa_onnx::VoiceActivityDetector>( | 499 | p->impl = std::make_unique<sherpa_onnx::VoiceActivityDetector>( |
| 485 | vad_config, buffer_size_in_seconds); | 500 | vad_config, buffer_size_in_seconds); |
| @@ -570,6 +585,11 @@ SherpaOnnxOfflineTts *SherpaOnnxCreateOfflineTts( | @@ -570,6 +585,11 @@ SherpaOnnxOfflineTts *SherpaOnnxCreateOfflineTts( | ||
| 570 | SHERPA_ONNX_LOGE("%s\n", tts_config.ToString().c_str()); | 585 | SHERPA_ONNX_LOGE("%s\n", tts_config.ToString().c_str()); |
| 571 | } | 586 | } |
| 572 | 587 | ||
| 588 | + if (!tts_config.Validate()) { | ||
| 589 | + SHERPA_ONNX_LOGE("Errors in config"); | ||
| 590 | + return nullptr; | ||
| 591 | + } | ||
| 592 | + | ||
| 573 | SherpaOnnxOfflineTts *tts = new SherpaOnnxOfflineTts; | 593 | SherpaOnnxOfflineTts *tts = new SherpaOnnxOfflineTts; |
| 574 | 594 | ||
| 575 | tts->impl = std::make_unique<sherpa_onnx::OfflineTts>(tts_config); | 595 | tts->impl = std::make_unique<sherpa_onnx::OfflineTts>(tts_config); |
| @@ -45,7 +45,7 @@ Module.onRuntimeInitialized = function() { | @@ -45,7 +45,7 @@ Module.onRuntimeInitialized = function() { | ||
| 45 | 45 | ||
| 46 | startBtn.disabled = false; | 46 | startBtn.disabled = false; |
| 47 | 47 | ||
| 48 | - recognizer = createRecognizer(); | 48 | + recognizer = createOnlineRecognizer(Module); |
| 49 | console.log('recognizer is created!', recognizer); | 49 | console.log('recognizer is created!', recognizer); |
| 50 | }; | 50 | }; |
| 51 | 51 |
| 1 | -function freeConfig(config) { | 1 | +function freeConfig(config, Module) { |
| 2 | if ('buffer' in config) { | 2 | if ('buffer' in config) { |
| 3 | - _free(config.buffer); | 3 | + Module._free(config.buffer); |
| 4 | } | 4 | } |
| 5 | 5 | ||
| 6 | if ('config' in config) { | 6 | if ('config' in config) { |
| 7 | - freeConfig(config.config) | 7 | + freeConfig(config.config, Module) |
| 8 | } | 8 | } |
| 9 | 9 | ||
| 10 | if ('transducer' in config) { | 10 | if ('transducer' in config) { |
| 11 | - freeConfig(config.transducer) | 11 | + freeConfig(config.transducer, Module) |
| 12 | } | 12 | } |
| 13 | 13 | ||
| 14 | if ('paraformer' in config) { | 14 | if ('paraformer' in config) { |
| 15 | - freeConfig(config.paraformer) | 15 | + freeConfig(config.paraformer, Module) |
| 16 | } | 16 | } |
| 17 | 17 | ||
| 18 | if ('ctc' in config) { | 18 | if ('ctc' in config) { |
| 19 | - freeConfig(config.ctc) | 19 | + freeConfig(config.ctc, Module) |
| 20 | } | 20 | } |
| 21 | 21 | ||
| 22 | if ('feat' in config) { | 22 | if ('feat' in config) { |
| 23 | - freeConfig(config.feat) | 23 | + freeConfig(config.feat, Module) |
| 24 | } | 24 | } |
| 25 | 25 | ||
| 26 | if ('model' in config) { | 26 | if ('model' in config) { |
| 27 | - freeConfig(config.model) | 27 | + freeConfig(config.model, Module) |
| 28 | } | 28 | } |
| 29 | 29 | ||
| 30 | - _free(config.ptr); | 30 | + if ('nemoCtc' in config) { |
| 31 | + freeConfig(config.nemoCtc, Module) | ||
| 32 | + } | ||
| 33 | + | ||
| 34 | + if ('whisper' in config) { | ||
| 35 | + freeConfig(config.whisper, Module) | ||
| 36 | + } | ||
| 37 | + | ||
| 38 | + if ('tdnn' in config) { | ||
| 39 | + freeConfig(config.tdnn, Module) | ||
| 40 | + } | ||
| 41 | + | ||
| 42 | + if ('lm' in config) { | ||
| 43 | + freeConfig(config.lm, Module) | ||
| 44 | + } | ||
| 45 | + | ||
| 46 | + Module._free(config.ptr); | ||
| 31 | } | 47 | } |
| 32 | 48 | ||
| 33 | // The user should free the returned pointers | 49 | // The user should free the returned pointers |
| 34 | -function initSherpaOnnxOnlineTransducerModelConfig(config) { | ||
| 35 | - let encoderLen = lengthBytesUTF8(config.encoder) + 1; | ||
| 36 | - let decoderLen = lengthBytesUTF8(config.decoder) + 1; | ||
| 37 | - let joinerLen = lengthBytesUTF8(config.joiner) + 1; | 50 | +function initSherpaOnnxOnlineTransducerModelConfig(config, Module) { |
| 51 | + const encoderLen = Module.lengthBytesUTF8(config.encoder) + 1; | ||
| 52 | + const decoderLen = Module.lengthBytesUTF8(config.decoder) + 1; | ||
| 53 | + const joinerLen = Module.lengthBytesUTF8(config.joiner) + 1; | ||
| 38 | 54 | ||
| 39 | - let n = encoderLen + decoderLen + joinerLen; | 55 | + const n = encoderLen + decoderLen + joinerLen; |
| 40 | 56 | ||
| 41 | - let buffer = _malloc(n); | 57 | + const buffer = Module._malloc(n); |
| 42 | 58 | ||
| 43 | - let len = 3 * 4; // 3 pointers | ||
| 44 | - let ptr = _malloc(len); | 59 | + const len = 3 * 4; // 3 pointers |
| 60 | + const ptr = Module._malloc(len); | ||
| 45 | 61 | ||
| 46 | let offset = 0; | 62 | let offset = 0; |
| 47 | - stringToUTF8(config.encoder, buffer + offset, encoderLen); | 63 | + Module.stringToUTF8(config.encoder, buffer + offset, encoderLen); |
| 48 | offset += encoderLen; | 64 | offset += encoderLen; |
| 49 | 65 | ||
| 50 | - stringToUTF8(config.decoder, buffer + offset, decoderLen); | 66 | + Module.stringToUTF8(config.decoder, buffer + offset, decoderLen); |
| 51 | offset += decoderLen; | 67 | offset += decoderLen; |
| 52 | 68 | ||
| 53 | - stringToUTF8(config.joiner, buffer + offset, joinerLen); | 69 | + Module.stringToUTF8(config.joiner, buffer + offset, joinerLen); |
| 54 | 70 | ||
| 55 | offset = 0; | 71 | offset = 0; |
| 56 | - setValue(ptr, buffer + offset, 'i8*'); | 72 | + Module.setValue(ptr, buffer + offset, 'i8*'); |
| 57 | offset += encoderLen; | 73 | offset += encoderLen; |
| 58 | 74 | ||
| 59 | - setValue(ptr + 4, buffer + offset, 'i8*'); | 75 | + Module.setValue(ptr + 4, buffer + offset, 'i8*'); |
| 60 | offset += decoderLen; | 76 | offset += decoderLen; |
| 61 | 77 | ||
| 62 | - setValue(ptr + 8, buffer + offset, 'i8*'); | 78 | + Module.setValue(ptr + 8, buffer + offset, 'i8*'); |
| 63 | 79 | ||
| 64 | return { | 80 | return { |
| 65 | buffer: buffer, ptr: ptr, len: len, | 81 | buffer: buffer, ptr: ptr, len: len, |
| 66 | } | 82 | } |
| 67 | } | 83 | } |
| 68 | 84 | ||
| 69 | -function initSherpaOnnxOnlineParaformerModelConfig(config) { | ||
| 70 | - let encoderLen = lengthBytesUTF8(config.encoder) + 1; | ||
| 71 | - let decoderLen = lengthBytesUTF8(config.decoder) + 1; | 85 | +function initSherpaOnnxOnlineParaformerModelConfig(config, Module) { |
| 86 | + const encoderLen = Module.lengthBytesUTF8(config.encoder) + 1; | ||
| 87 | + const decoderLen = Module.lengthBytesUTF8(config.decoder) + 1; | ||
| 72 | 88 | ||
| 73 | - let n = encoderLen + decoderLen; | ||
| 74 | - let buffer = _malloc(n); | 89 | + const n = encoderLen + decoderLen; |
| 90 | + const buffer = Module._malloc(n); | ||
| 75 | 91 | ||
| 76 | - let len = 2 * 4; // 2 pointers | ||
| 77 | - let ptr = _malloc(len); | 92 | + const len = 2 * 4; // 2 pointers |
| 93 | + const ptr = Module._malloc(len); | ||
| 78 | 94 | ||
| 79 | let offset = 0; | 95 | let offset = 0; |
| 80 | - stringToUTF8(config.encoder, buffer + offset, encoderLen); | 96 | + Module.stringToUTF8(config.encoder, buffer + offset, encoderLen); |
| 81 | offset += encoderLen; | 97 | offset += encoderLen; |
| 82 | 98 | ||
| 83 | - stringToUTF8(config.decoder, buffer + offset, decoderLen); | 99 | + Module.stringToUTF8(config.decoder, buffer + offset, decoderLen); |
| 84 | 100 | ||
| 85 | offset = 0; | 101 | offset = 0; |
| 86 | - setValue(ptr, buffer + offset, 'i8*'); | 102 | + Module.setValue(ptr, buffer + offset, 'i8*'); |
| 87 | offset += encoderLen; | 103 | offset += encoderLen; |
| 88 | 104 | ||
| 89 | - setValue(ptr + 4, buffer + offset, 'i8*'); | 105 | + Module.setValue(ptr + 4, buffer + offset, 'i8*'); |
| 90 | 106 | ||
| 91 | return { | 107 | return { |
| 92 | buffer: buffer, ptr: ptr, len: len, | 108 | buffer: buffer, ptr: ptr, len: len, |
| 93 | } | 109 | } |
| 94 | } | 110 | } |
| 95 | 111 | ||
| 96 | -function initSherpaOnnxOnlineZipformer2CtcModelConfig(config) { | ||
| 97 | - let n = lengthBytesUTF8(config.model) + 1; | ||
| 98 | - let buffer = _malloc(n); | 112 | +function initSherpaOnnxOnlineZipformer2CtcModelConfig(config, Module) { |
| 113 | + const n = Module.lengthBytesUTF8(config.model) + 1; | ||
| 114 | + const buffer = Module._malloc(n); | ||
| 99 | 115 | ||
| 100 | - let len = 1 * 4; // 1 pointer | ||
| 101 | - let ptr = _malloc(len); | 116 | + const len = 1 * 4; // 1 pointer |
| 117 | + const ptr = Module._malloc(len); | ||
| 102 | 118 | ||
| 103 | - stringToUTF8(config.model, buffer, n); | 119 | + Module.stringToUTF8(config.model, buffer, n); |
| 104 | 120 | ||
| 105 | - setValue(ptr, buffer, 'i8*'); | 121 | + Module.setValue(ptr, buffer, 'i8*'); |
| 106 | 122 | ||
| 107 | return { | 123 | return { |
| 108 | buffer: buffer, ptr: ptr, len: len, | 124 | buffer: buffer, ptr: ptr, len: len, |
| 109 | } | 125 | } |
| 110 | } | 126 | } |
| 111 | 127 | ||
| 112 | -function initSherpaOnnxOnlineModelConfig(config) { | ||
| 113 | - let transducer = initSherpaOnnxOnlineTransducerModelConfig(config.transducer); | ||
| 114 | - let paraformer = initSherpaOnnxOnlineParaformerModelConfig(config.paraformer); | ||
| 115 | - let ctc = initSherpaOnnxOnlineZipformer2CtcModelConfig(config.zipformer2Ctc); | 128 | +function initSherpaOnnxOnlineModelConfig(config, Module) { |
| 129 | + const transducer = | ||
| 130 | + initSherpaOnnxOnlineTransducerModelConfig(config.transducer, Module); | ||
| 131 | + const paraformer = | ||
| 132 | + initSherpaOnnxOnlineParaformerModelConfig(config.paraformer, Module); | ||
| 133 | + const ctc = initSherpaOnnxOnlineZipformer2CtcModelConfig( | ||
| 134 | + config.zipformer2Ctc, Module); | ||
| 116 | 135 | ||
| 117 | - let len = transducer.len + paraformer.len + ctc.len + 5 * 4; | ||
| 118 | - let ptr = _malloc(len); | 136 | + const len = transducer.len + paraformer.len + ctc.len + 5 * 4; |
| 137 | + const ptr = Module._malloc(len); | ||
| 119 | 138 | ||
| 120 | let offset = 0; | 139 | let offset = 0; |
| 121 | - _CopyHeap(transducer.ptr, transducer.len, ptr + offset); | 140 | + Module._CopyHeap(transducer.ptr, transducer.len, ptr + offset); |
| 122 | offset += transducer.len; | 141 | offset += transducer.len; |
| 123 | 142 | ||
| 124 | - _CopyHeap(paraformer.ptr, paraformer.len, ptr + offset); | 143 | + Module._CopyHeap(paraformer.ptr, paraformer.len, ptr + offset); |
| 125 | offset += paraformer.len; | 144 | offset += paraformer.len; |
| 126 | 145 | ||
| 127 | - _CopyHeap(ctc.ptr, ctc.len, ptr + offset); | 146 | + Module._CopyHeap(ctc.ptr, ctc.len, ptr + offset); |
| 128 | offset += ctc.len; | 147 | offset += ctc.len; |
| 129 | 148 | ||
| 130 | - let tokensLen = lengthBytesUTF8(config.tokens) + 1; | ||
| 131 | - let providerLen = lengthBytesUTF8(config.provider) + 1; | ||
| 132 | - let modelTypeLen = lengthBytesUTF8(config.modelType) + 1; | ||
| 133 | - let bufferLen = tokensLen + providerLen + modelTypeLen; | ||
| 134 | - let buffer = _malloc(bufferLen); | 149 | + const tokensLen = Module.lengthBytesUTF8(config.tokens) + 1; |
| 150 | + const providerLen = Module.lengthBytesUTF8(config.provider) + 1; | ||
| 151 | + const modelTypeLen = Module.lengthBytesUTF8(config.modelType) + 1; | ||
| 152 | + const bufferLen = tokensLen + providerLen + modelTypeLen; | ||
| 153 | + const buffer = Module._malloc(bufferLen); | ||
| 135 | 154 | ||
| 136 | offset = 0; | 155 | offset = 0; |
| 137 | - stringToUTF8(config.tokens, buffer, tokensLen); | 156 | + Module.stringToUTF8(config.tokens, buffer, tokensLen); |
| 138 | offset += tokensLen; | 157 | offset += tokensLen; |
| 139 | 158 | ||
| 140 | - stringToUTF8(config.provider, buffer + offset, providerLen); | 159 | + Module.stringToUTF8(config.provider, buffer + offset, providerLen); |
| 141 | offset += providerLen; | 160 | offset += providerLen; |
| 142 | 161 | ||
| 143 | - stringToUTF8(config.modelType, buffer + offset, modelTypeLen); | 162 | + Module.stringToUTF8(config.modelType, buffer + offset, modelTypeLen); |
| 144 | 163 | ||
| 145 | offset = transducer.len + paraformer.len + ctc.len; | 164 | offset = transducer.len + paraformer.len + ctc.len; |
| 146 | - setValue(ptr + offset, buffer, 'i8*'); // tokens | 165 | + Module.setValue(ptr + offset, buffer, 'i8*'); // tokens |
| 147 | offset += 4; | 166 | offset += 4; |
| 148 | 167 | ||
| 149 | - setValue(ptr + offset, config.numThreads, 'i32'); | 168 | + Module.setValue(ptr + offset, config.numThreads, 'i32'); |
| 150 | offset += 4; | 169 | offset += 4; |
| 151 | 170 | ||
| 152 | - setValue(ptr + offset, buffer + tokensLen, 'i8*'); // provider | 171 | + Module.setValue(ptr + offset, buffer + tokensLen, 'i8*'); // provider |
| 153 | offset += 4; | 172 | offset += 4; |
| 154 | 173 | ||
| 155 | - setValue(ptr + offset, config.debug, 'i32'); | 174 | + Module.setValue(ptr + offset, config.debug, 'i32'); |
| 156 | offset += 4; | 175 | offset += 4; |
| 157 | 176 | ||
| 158 | - setValue(ptr + offset, buffer + tokensLen + providerLen, 'i8*'); // modelType | 177 | + Module.setValue( |
| 178 | + ptr + offset, buffer + tokensLen + providerLen, 'i8*'); // modelType | ||
| 159 | offset += 4; | 179 | offset += 4; |
| 160 | 180 | ||
| 161 | return { | 181 | return { |
| @@ -164,63 +184,63 @@ function initSherpaOnnxOnlineModelConfig(config) { | @@ -164,63 +184,63 @@ function initSherpaOnnxOnlineModelConfig(config) { | ||
| 164 | } | 184 | } |
| 165 | } | 185 | } |
| 166 | 186 | ||
| 167 | -function initSherpaOnnxFeatureConfig(config) { | ||
| 168 | - let len = 2 * 4; // 2 pointers | ||
| 169 | - let ptr = _malloc(len); | 187 | +function initSherpaOnnxFeatureConfig(config, Module) { |
| 188 | + const len = 2 * 4; // 2 pointers | ||
| 189 | + const ptr = Module._malloc(len); | ||
| 170 | 190 | ||
| 171 | - setValue(ptr, config.sampleRate, 'i32'); | ||
| 172 | - setValue(ptr + 4, config.featureDim, 'i32'); | 191 | + Module.setValue(ptr, config.sampleRate, 'i32'); |
| 192 | + Module.setValue(ptr + 4, config.featureDim, 'i32'); | ||
| 173 | return {ptr: ptr, len: len}; | 193 | return {ptr: ptr, len: len}; |
| 174 | } | 194 | } |
| 175 | 195 | ||
| 176 | -function initSherpaOnnxOnlineRecognizerConfig(config) { | ||
| 177 | - let feat = initSherpaOnnxFeatureConfig(config.featConfig); | ||
| 178 | - let model = initSherpaOnnxOnlineModelConfig(config.modelConfig); | 196 | +function initSherpaOnnxOnlineRecognizerConfig(config, Module) { |
| 197 | + const feat = initSherpaOnnxFeatureConfig(config.featConfig, Module); | ||
| 198 | + const model = initSherpaOnnxOnlineModelConfig(config.modelConfig, Module); | ||
| 179 | 199 | ||
| 180 | - let len = feat.len + model.len + 8 * 4; | ||
| 181 | - let ptr = _malloc(len); | 200 | + const len = feat.len + model.len + 8 * 4; |
| 201 | + const ptr = Module._malloc(len); | ||
| 182 | 202 | ||
| 183 | let offset = 0; | 203 | let offset = 0; |
| 184 | - _CopyHeap(feat.ptr, feat.len, ptr + offset); | 204 | + Module._CopyHeap(feat.ptr, feat.len, ptr + offset); |
| 185 | offset += feat.len; | 205 | offset += feat.len; |
| 186 | 206 | ||
| 187 | - _CopyHeap(model.ptr, model.len, ptr + offset); | 207 | + Module._CopyHeap(model.ptr, model.len, ptr + offset); |
| 188 | offset += model.len; | 208 | offset += model.len; |
| 189 | 209 | ||
| 190 | - let decodingMethodLen = lengthBytesUTF8(config.decodingMethod) + 1; | ||
| 191 | - let hotwordsFileLen = lengthBytesUTF8(config.hotwordsFile) + 1; | ||
| 192 | - let bufferLen = decodingMethodLen + hotwordsFileLen; | ||
| 193 | - let buffer = _malloc(bufferLen); | 210 | + const decodingMethodLen = Module.lengthBytesUTF8(config.decodingMethod) + 1; |
| 211 | + const hotwordsFileLen = Module.lengthBytesUTF8(config.hotwordsFile) + 1; | ||
| 212 | + const bufferLen = decodingMethodLen + hotwordsFileLen; | ||
| 213 | + const buffer = Module._malloc(bufferLen); | ||
| 194 | 214 | ||
| 195 | offset = 0; | 215 | offset = 0; |
| 196 | - stringToUTF8(config.decodingMethod, buffer, decodingMethodLen); | 216 | + Module.stringToUTF8(config.decodingMethod, buffer, decodingMethodLen); |
| 197 | offset += decodingMethodLen; | 217 | offset += decodingMethodLen; |
| 198 | 218 | ||
| 199 | - stringToUTF8(config.hotwordsFile, buffer + offset, hotwordsFileLen); | 219 | + Module.stringToUTF8(config.hotwordsFile, buffer + offset, hotwordsFileLen); |
| 200 | 220 | ||
| 201 | offset = feat.len + model.len; | 221 | offset = feat.len + model.len; |
| 202 | - setValue(ptr + offset, buffer, 'i8*'); // decoding method | 222 | + Module.setValue(ptr + offset, buffer, 'i8*'); // decoding method |
| 203 | offset += 4; | 223 | offset += 4; |
| 204 | 224 | ||
| 205 | - setValue(ptr + offset, config.maxActivePaths, 'i32'); | 225 | + Module.setValue(ptr + offset, config.maxActivePaths, 'i32'); |
| 206 | offset += 4; | 226 | offset += 4; |
| 207 | 227 | ||
| 208 | - setValue(ptr + offset, config.enableEndpoint, 'i32'); | 228 | + Module.setValue(ptr + offset, config.enableEndpoint, 'i32'); |
| 209 | offset += 4; | 229 | offset += 4; |
| 210 | 230 | ||
| 211 | - setValue(ptr + offset, config.rule1MinTrailingSilence, 'float'); | 231 | + Module.setValue(ptr + offset, config.rule1MinTrailingSilence, 'float'); |
| 212 | offset += 4; | 232 | offset += 4; |
| 213 | 233 | ||
| 214 | - setValue(ptr + offset, config.rule2MinTrailingSilence, 'float'); | 234 | + Module.setValue(ptr + offset, config.rule2MinTrailingSilence, 'float'); |
| 215 | offset += 4; | 235 | offset += 4; |
| 216 | 236 | ||
| 217 | - setValue(ptr + offset, config.rule3MinUtteranceLength, 'float'); | 237 | + Module.setValue(ptr + offset, config.rule3MinUtteranceLength, 'float'); |
| 218 | offset += 4; | 238 | offset += 4; |
| 219 | 239 | ||
| 220 | - setValue(ptr + offset, buffer + decodingMethodLen, 'i8*'); | 240 | + Module.setValue(ptr + offset, buffer + decodingMethodLen, 'i8*'); |
| 221 | offset += 4; | 241 | offset += 4; |
| 222 | 242 | ||
| 223 | - setValue(ptr + offset, config.hotwordsScore, 'float'); | 243 | + Module.setValue(ptr + offset, config.hotwordsScore, 'float'); |
| 224 | offset += 4; | 244 | offset += 4; |
| 225 | 245 | ||
| 226 | return { | 246 | return { |
| @@ -229,21 +249,21 @@ function initSherpaOnnxOnlineRecognizerConfig(config) { | @@ -229,21 +249,21 @@ function initSherpaOnnxOnlineRecognizerConfig(config) { | ||
| 229 | } | 249 | } |
| 230 | 250 | ||
| 231 | 251 | ||
| 232 | -function createRecognizer() { | ||
| 233 | - let onlineTransducerModelConfig = { | 252 | +function createOnlineRecognizer(Module, myConfig) { |
| 253 | + const onlineTransducerModelConfig = { | ||
| 234 | encoder: '', | 254 | encoder: '', |
| 235 | decoder: '', | 255 | decoder: '', |
| 236 | joiner: '', | 256 | joiner: '', |
| 237 | - } | 257 | + }; |
| 238 | 258 | ||
| 239 | - let onlineParaformerModelConfig = { | 259 | + const onlineParaformerModelConfig = { |
| 240 | encoder: '', | 260 | encoder: '', |
| 241 | decoder: '', | 261 | decoder: '', |
| 242 | - } | 262 | + }; |
| 243 | 263 | ||
| 244 | - let onlineZipformer2CtcModelConfig = { | 264 | + const onlineZipformer2CtcModelConfig = { |
| 245 | model: '', | 265 | model: '', |
| 246 | - } | 266 | + }; |
| 247 | 267 | ||
| 248 | let type = 0; | 268 | let type = 0; |
| 249 | 269 | ||
| @@ -266,7 +286,7 @@ function createRecognizer() { | @@ -266,7 +286,7 @@ function createRecognizer() { | ||
| 266 | } | 286 | } |
| 267 | 287 | ||
| 268 | 288 | ||
| 269 | - let onlineModelConfig = { | 289 | + const onlineModelConfig = { |
| 270 | transducer: onlineTransducerModelConfig, | 290 | transducer: onlineTransducerModelConfig, |
| 271 | paraformer: onlineParaformerModelConfig, | 291 | paraformer: onlineParaformerModelConfig, |
| 272 | zipformer2Ctc: onlineZipformer2CtcModelConfig, | 292 | zipformer2Ctc: onlineZipformer2CtcModelConfig, |
| @@ -275,12 +295,12 @@ function createRecognizer() { | @@ -275,12 +295,12 @@ function createRecognizer() { | ||
| 275 | provider: 'cpu', | 295 | provider: 'cpu', |
| 276 | debug: 1, | 296 | debug: 1, |
| 277 | modelType: '', | 297 | modelType: '', |
| 278 | - } | 298 | + }; |
| 279 | 299 | ||
| 280 | - let featureConfig = { | 300 | + const featureConfig = { |
| 281 | sampleRate: 16000, | 301 | sampleRate: 16000, |
| 282 | featureDim: 80, | 302 | featureDim: 80, |
| 283 | - } | 303 | + }; |
| 284 | 304 | ||
| 285 | let recognizerConfig = { | 305 | let recognizerConfig = { |
| 286 | featConfig: featureConfig, | 306 | featConfig: featureConfig, |
| @@ -293,23 +313,336 @@ function createRecognizer() { | @@ -293,23 +313,336 @@ function createRecognizer() { | ||
| 293 | rule3MinUtteranceLength: 20, | 313 | rule3MinUtteranceLength: 20, |
| 294 | hotwordsFile: '', | 314 | hotwordsFile: '', |
| 295 | hotwordsScore: 1.5, | 315 | hotwordsScore: 1.5, |
| 316 | + }; | ||
| 317 | + if (myConfig) { | ||
| 318 | + recognizerConfig = myConfig; | ||
| 319 | + } | ||
| 320 | + | ||
| 321 | + return new OnlineRecognizer(recognizerConfig, Module); | ||
| 322 | +} | ||
| 323 | + | ||
| 324 | +function initSherpaOnnxOfflineTransducerModelConfig(config, Module) { | ||
| 325 | + const encoderLen = Module.lengthBytesUTF8(config.encoder) + 1; | ||
| 326 | + const decoderLen = Module.lengthBytesUTF8(config.decoder) + 1; | ||
| 327 | + const joinerLen = Module.lengthBytesUTF8(config.joiner) + 1; | ||
| 328 | + | ||
| 329 | + const n = encoderLen + decoderLen + joinerLen; | ||
| 330 | + | ||
| 331 | + const buffer = Module._malloc(n); | ||
| 332 | + | ||
| 333 | + const len = 3 * 4; // 3 pointers | ||
| 334 | + const ptr = Module._malloc(len); | ||
| 335 | + | ||
| 336 | + let offset = 0; | ||
| 337 | + Module.stringToUTF8(config.encoder, buffer + offset, encoderLen); | ||
| 338 | + offset += encoderLen; | ||
| 339 | + | ||
| 340 | + Module.stringToUTF8(config.decoder, buffer + offset, decoderLen); | ||
| 341 | + offset += decoderLen; | ||
| 342 | + | ||
| 343 | + Module.stringToUTF8(config.joiner, buffer + offset, joinerLen); | ||
| 344 | + | ||
| 345 | + offset = 0; | ||
| 346 | + Module.setValue(ptr, buffer + offset, 'i8*'); | ||
| 347 | + offset += encoderLen; | ||
| 348 | + | ||
| 349 | + Module.setValue(ptr + 4, buffer + offset, 'i8*'); | ||
| 350 | + offset += decoderLen; | ||
| 351 | + | ||
| 352 | + Module.setValue(ptr + 8, buffer + offset, 'i8*'); | ||
| 353 | + | ||
| 354 | + return { | ||
| 355 | + buffer: buffer, ptr: ptr, len: len, | ||
| 356 | + } | ||
| 357 | +} | ||
| 358 | + | ||
| 359 | +function initSherpaOnnxOfflineParaformerModelConfig(config, Module) { | ||
| 360 | + const n = Module.lengthBytesUTF8(config.model) + 1; | ||
| 361 | + | ||
| 362 | + const buffer = Module._malloc(n); | ||
| 363 | + | ||
| 364 | + const len = 1 * 4; // 1 pointer | ||
| 365 | + const ptr = Module._malloc(len); | ||
| 366 | + | ||
| 367 | + Module.stringToUTF8(config.model, buffer, n); | ||
| 368 | + | ||
| 369 | + Module.setValue(ptr, buffer, 'i8*'); | ||
| 370 | + | ||
| 371 | + return { | ||
| 372 | + buffer: buffer, ptr: ptr, len: len, | ||
| 373 | + } | ||
| 374 | +} | ||
| 375 | + | ||
| 376 | +function initSherpaOnnxOfflineNemoEncDecCtcModelConfig(config, Module) { | ||
| 377 | + const n = Module.lengthBytesUTF8(config.model) + 1; | ||
| 378 | + | ||
| 379 | + const buffer = Module._malloc(n); | ||
| 380 | + | ||
| 381 | + const len = 1 * 4; // 1 pointer | ||
| 382 | + const ptr = Module._malloc(len); | ||
| 383 | + | ||
| 384 | + Module.stringToUTF8(config.model, buffer, n); | ||
| 385 | + | ||
| 386 | + Module.setValue(ptr, buffer, 'i8*'); | ||
| 387 | + | ||
| 388 | + return { | ||
| 389 | + buffer: buffer, ptr: ptr, len: len, | ||
| 390 | + } | ||
| 391 | +} | ||
| 392 | + | ||
| 393 | +function initSherpaOnnxOfflineWhisperModelConfig(config, Module) { | ||
| 394 | + const encoderLen = Module.lengthBytesUTF8(config.encoder) + 1; | ||
| 395 | + const decoderLen = Module.lengthBytesUTF8(config.decoder) + 1; | ||
| 396 | + | ||
| 397 | + const n = encoderLen + decoderLen; | ||
| 398 | + const buffer = Module._malloc(n); | ||
| 399 | + | ||
| 400 | + const len = 2 * 4; // 2 pointers | ||
| 401 | + const ptr = Module._malloc(len); | ||
| 402 | + | ||
| 403 | + let offset = 0; | ||
| 404 | + Module.stringToUTF8(config.encoder, buffer + offset, encoderLen); | ||
| 405 | + offset += encoderLen; | ||
| 406 | + | ||
| 407 | + Module.stringToUTF8(config.decoder, buffer + offset, decoderLen); | ||
| 408 | + | ||
| 409 | + offset = 0; | ||
| 410 | + Module.setValue(ptr, buffer + offset, 'i8*'); | ||
| 411 | + offset += encoderLen; | ||
| 412 | + | ||
| 413 | + Module.setValue(ptr + 4, buffer + offset, 'i8*'); | ||
| 414 | + | ||
| 415 | + return { | ||
| 416 | + buffer: buffer, ptr: ptr, len: len, | ||
| 417 | + } | ||
| 418 | +} | ||
| 419 | + | ||
| 420 | +function initSherpaOnnxOfflineTdnnModelConfig(config, Module) { | ||
| 421 | + const n = Module.lengthBytesUTF8(config.model) + 1; | ||
| 422 | + const buffer = Module._malloc(n); | ||
| 423 | + | ||
| 424 | + const len = 1 * 4; // 1 pointer | ||
| 425 | + const ptr = Module._malloc(len); | ||
| 426 | + | ||
| 427 | + Module.stringToUTF8(config.model, buffer, n); | ||
| 428 | + | ||
| 429 | + Module.setValue(ptr, buffer, 'i8*'); | ||
| 430 | + | ||
| 431 | + return { | ||
| 432 | + buffer: buffer, ptr: ptr, len: len, | ||
| 433 | + } | ||
| 434 | +} | ||
| 435 | + | ||
| 436 | +function initSherpaOnnxOfflineLMConfig(config, Module) { | ||
| 437 | + const n = Module.lengthBytesUTF8(config.model) + 1; | ||
| 438 | + const buffer = Module._malloc(n); | ||
| 439 | + | ||
| 440 | + const len = 2 * 4; | ||
| 441 | + const ptr = Module._malloc(len); | ||
| 442 | + | ||
| 443 | + Module.stringToUTF8(config.model, buffer, n); | ||
| 444 | + Module.setValue(ptr, buffer, 'i8*'); | ||
| 445 | + Module.setValue(ptr + 4, config.scale, 'float'); | ||
| 446 | + | ||
| 447 | + return { | ||
| 448 | + buffer: buffer, ptr: ptr, len: len, | ||
| 449 | + } | ||
| 450 | +} | ||
| 451 | + | ||
| 452 | +function initSherpaOnnxOfflineModelConfig(config, Module) { | ||
| 453 | + const transducer = | ||
| 454 | + initSherpaOnnxOfflineTransducerModelConfig(config.transducer, Module); | ||
| 455 | + const paraformer = | ||
| 456 | + initSherpaOnnxOfflineParaformerModelConfig(config.paraformer, Module); | ||
| 457 | + const nemoCtc = | ||
| 458 | + initSherpaOnnxOfflineNemoEncDecCtcModelConfig(config.nemoCtc, Module); | ||
| 459 | + const whisper = | ||
| 460 | + initSherpaOnnxOfflineWhisperModelConfig(config.whisper, Module); | ||
| 461 | + const tdnn = initSherpaOnnxOfflineTdnnModelConfig(config.tdnn, Module); | ||
| 462 | + | ||
| 463 | + const len = transducer.len + paraformer.len + nemoCtc.len + whisper.len + | ||
| 464 | + tdnn.len + 5 * 4; | ||
| 465 | + const ptr = Module._malloc(len); | ||
| 466 | + | ||
| 467 | + let offset = 0; | ||
| 468 | + Module._CopyHeap(transducer.ptr, transducer.len, ptr + offset); | ||
| 469 | + offset += transducer.len; | ||
| 470 | + | ||
| 471 | + Module._CopyHeap(paraformer.ptr, paraformer.len, ptr + offset); | ||
| 472 | + offset += paraformer.len; | ||
| 473 | + | ||
| 474 | + Module._CopyHeap(nemoCtc.ptr, nemoCtc.len, ptr + offset); | ||
| 475 | + offset += nemoCtc.len; | ||
| 476 | + | ||
| 477 | + Module._CopyHeap(whisper.ptr, whisper.len, ptr + offset); | ||
| 478 | + offset += whisper.len; | ||
| 479 | + | ||
| 480 | + Module._CopyHeap(tdnn.ptr, tdnn.len, ptr + offset); | ||
| 481 | + offset += tdnn.len; | ||
| 482 | + | ||
| 483 | + const tokensLen = Module.lengthBytesUTF8(config.tokens) + 1; | ||
| 484 | + const providerLen = Module.lengthBytesUTF8(config.provider) + 1; | ||
| 485 | + const modelTypeLen = Module.lengthBytesUTF8(config.modelType) + 1; | ||
| 486 | + const bufferLen = tokensLen + providerLen + modelTypeLen; | ||
| 487 | + const buffer = Module._malloc(bufferLen); | ||
| 488 | + | ||
| 489 | + offset = 0; | ||
| 490 | + Module.stringToUTF8(config.tokens, buffer, tokensLen); | ||
| 491 | + offset += tokensLen; | ||
| 492 | + | ||
| 493 | + Module.stringToUTF8(config.provider, buffer + offset, providerLen); | ||
| 494 | + offset += providerLen; | ||
| 495 | + | ||
| 496 | + Module.stringToUTF8(config.modelType, buffer + offset, modelTypeLen); | ||
| 497 | + | ||
| 498 | + offset = | ||
| 499 | + transducer.len + paraformer.len + nemoCtc.len + whisper.len + tdnn.len; | ||
| 500 | + Module.setValue(ptr + offset, buffer, 'i8*'); // tokens | ||
| 501 | + offset += 4; | ||
| 502 | + | ||
| 503 | + Module.setValue(ptr + offset, config.numThreads, 'i32'); | ||
| 504 | + offset += 4; | ||
| 505 | + | ||
| 506 | + Module.setValue(ptr + offset, config.debug, 'i32'); | ||
| 507 | + offset += 4; | ||
| 508 | + | ||
| 509 | + Module.setValue(ptr + offset, buffer + tokensLen, 'i8*'); // provider | ||
| 510 | + offset += 4; | ||
| 511 | + | ||
| 512 | + Module.setValue( | ||
| 513 | + ptr + offset, buffer + tokensLen + providerLen, 'i8*'); // modelType | ||
| 514 | + offset += 4; | ||
| 515 | + | ||
| 516 | + return { | ||
| 517 | + buffer: buffer, ptr: ptr, len: len, transducer: transducer, | ||
| 518 | + paraformer: paraformer, nemoCtc: nemoCtc, whisper: whisper, tdnn: tdnn | ||
| 296 | } | 519 | } |
| 520 | +} | ||
| 521 | + | ||
| 522 | +function initSherpaOnnxOfflineRecognizerConfig(config, Module) { | ||
| 523 | + const feat = initSherpaOnnxFeatureConfig(config.featConfig, Module); | ||
| 524 | + const model = initSherpaOnnxOfflineModelConfig(config.modelConfig, Module); | ||
| 525 | + const lm = initSherpaOnnxOfflineLMConfig(config.lmConfig, Module); | ||
| 526 | + | ||
| 527 | + const len = feat.len + model.len + lm.len + 4 * 4; | ||
| 528 | + const ptr = Module._malloc(len); | ||
| 529 | + | ||
| 530 | + let offset = 0; | ||
| 531 | + Module._CopyHeap(feat.ptr, feat.len, ptr + offset); | ||
| 532 | + offset += feat.len; | ||
| 533 | + | ||
| 534 | + Module._CopyHeap(model.ptr, model.len, ptr + offset); | ||
| 535 | + offset += model.len; | ||
| 536 | + | ||
| 537 | + Module._CopyHeap(lm.ptr, lm.len, ptr + offset); | ||
| 538 | + offset += lm.len; | ||
| 539 | + | ||
| 540 | + const decodingMethodLen = Module.lengthBytesUTF8(config.decodingMethod) + 1; | ||
| 541 | + const hotwordsFileLen = Module.lengthBytesUTF8(config.hotwordsFile) + 1; | ||
| 542 | + const bufferLen = decodingMethodLen + hotwordsFileLen; | ||
| 543 | + const buffer = Module._malloc(bufferLen); | ||
| 544 | + | ||
| 545 | + offset = 0; | ||
| 546 | + Module.stringToUTF8(config.decodingMethod, buffer, decodingMethodLen); | ||
| 547 | + offset += decodingMethodLen; | ||
| 548 | + | ||
| 549 | + Module.stringToUTF8(config.hotwordsFile, buffer + offset, hotwordsFileLen); | ||
| 550 | + | ||
| 551 | + offset = feat.len + model.len + lm.len; | ||
| 552 | + | ||
| 553 | + Module.setValue(ptr + offset, buffer, 'i8*'); // decoding method | ||
| 554 | + offset += 4; | ||
| 555 | + | ||
| 556 | + Module.setValue(ptr + offset, config.maxActivePaths, 'i32'); | ||
| 557 | + offset += 4; | ||
| 297 | 558 | ||
| 298 | - return new OnlineRecognizer(recognizerConfig); | 559 | + Module.setValue(ptr + offset, buffer + decodingMethodLen, 'i8*'); |
| 560 | + offset += 4; | ||
| 561 | + | ||
| 562 | + Module.setValue(ptr + offset, config.hotwordsScore, 'float'); | ||
| 563 | + offset += 4; | ||
| 564 | + | ||
| 565 | + return { | ||
| 566 | + buffer: buffer, ptr: ptr, len: len, feat: feat, model: model, lm: lm | ||
| 567 | + } | ||
| 299 | } | 568 | } |
| 300 | 569 | ||
| 570 | +class OfflineStream { | ||
| 571 | + constructor(handle, Module) { | ||
| 572 | + this.handle = handle; | ||
| 573 | + this.Module = Module; | ||
| 574 | + } | ||
| 575 | + | ||
| 576 | + free() { | ||
| 577 | + if (this.handle) { | ||
| 578 | + this.Module._DestroyOfflineStream(this.handle); | ||
| 579 | + this.handle = null; | ||
| 580 | + } | ||
| 581 | + } | ||
| 582 | + | ||
| 583 | + /** | ||
| 584 | + * @param sampleRate {Number} | ||
| 585 | + * @param samples {Float32Array} Containing samples in the range [-1, 1] | ||
| 586 | + */ | ||
| 587 | + acceptWaveform(sampleRate, samples) { | ||
| 588 | + const pointer = | ||
| 589 | + this.Module._malloc(samples.length * samples.BYTES_PER_ELEMENT); | ||
| 590 | + this.Module.HEAPF32.set(samples, pointer / samples.BYTES_PER_ELEMENT); | ||
| 591 | + this.Module._AcceptWaveformOffline( | ||
| 592 | + this.handle, sampleRate, pointer, samples.length); | ||
| 593 | + this.Module._free(pointer); | ||
| 594 | + } | ||
| 595 | +}; | ||
| 596 | + | ||
| 597 | +class OfflineRecognizer { | ||
| 598 | + constructor(configObj, Module) { | ||
| 599 | + this.config = configObj; | ||
| 600 | + const config = initSherpaOnnxOfflineRecognizerConfig(configObj, Module); | ||
| 601 | + const handle = Module._CreateOfflineRecognizer(config.ptr); | ||
| 602 | + freeConfig(config, Module); | ||
| 603 | + | ||
| 604 | + this.handle = handle; | ||
| 605 | + this.Module = Module; | ||
| 606 | + } | ||
| 607 | + | ||
| 608 | + free() { | ||
| 609 | + this.Module._DestroyOfflineRecognizer(this.handle); | ||
| 610 | + this.handle = 0 | ||
| 611 | + } | ||
| 612 | + | ||
| 613 | + createStream() { | ||
| 614 | + const handle = this.Module._CreateOfflineStream(this.handle); | ||
| 615 | + return new OfflineStream(handle, this.Module); | ||
| 616 | + } | ||
| 617 | + | ||
| 618 | + decode(stream) { | ||
| 619 | + this.Module._DecodeOfflineStream(this.handle, stream.handle); | ||
| 620 | + } | ||
| 621 | + | ||
| 622 | + getResult(stream) { | ||
| 623 | + const r = this.Module._GetOfflineStreamResult(stream.handle); | ||
| 624 | + | ||
| 625 | + const textPtr = this.Module.getValue(r, 'i8*'); | ||
| 626 | + const text = this.Module.UTF8ToString(textPtr); | ||
| 627 | + | ||
| 628 | + this.Module._DestroyOfflineRecognizerResult(r); | ||
| 629 | + return text; | ||
| 630 | + } | ||
| 631 | +}; | ||
| 632 | + | ||
| 301 | class OnlineStream { | 633 | class OnlineStream { |
| 302 | - constructor(handle) { | 634 | + constructor(handle, Module) { |
| 303 | this.handle = handle; | 635 | this.handle = handle; |
| 304 | this.pointer = null; // buffer | 636 | this.pointer = null; // buffer |
| 305 | this.n = 0; // buffer size | 637 | this.n = 0; // buffer size |
| 638 | + this.Module = Module; | ||
| 306 | } | 639 | } |
| 307 | 640 | ||
| 308 | free() { | 641 | free() { |
| 309 | if (this.handle) { | 642 | if (this.handle) { |
| 310 | - _DestroyOnlineStream(this.handle); | 643 | + this.Module._DestroyOnlineStream(this.handle); |
| 311 | this.handle = null; | 644 | this.handle = null; |
| 312 | - _free(this.pointer); | 645 | + this.Module._free(this.pointer); |
| 313 | this.pointer = null; | 646 | this.pointer = null; |
| 314 | this.n = 0; | 647 | this.n = 0; |
| 315 | } | 648 | } |
| @@ -321,61 +654,73 @@ class OnlineStream { | @@ -321,61 +654,73 @@ class OnlineStream { | ||
| 321 | */ | 654 | */ |
| 322 | acceptWaveform(sampleRate, samples) { | 655 | acceptWaveform(sampleRate, samples) { |
| 323 | if (this.n < samples.length) { | 656 | if (this.n < samples.length) { |
| 324 | - _free(this.pointer); | ||
| 325 | - this.pointer = _malloc(samples.length * samples.BYTES_PER_ELEMENT); | 657 | + this.Module._free(this.pointer); |
| 658 | + this.pointer = | ||
| 659 | + this.Module._malloc(samples.length * samples.BYTES_PER_ELEMENT); | ||
| 326 | this.n = samples.length | 660 | this.n = samples.length |
| 327 | } | 661 | } |
| 328 | 662 | ||
| 329 | - Module.HEAPF32.set(samples, this.pointer / samples.BYTES_PER_ELEMENT); | ||
| 330 | - _AcceptWaveform(this.handle, sampleRate, this.pointer, samples.length); | 663 | + this.Module.HEAPF32.set(samples, this.pointer / samples.BYTES_PER_ELEMENT); |
| 664 | + this.Module._AcceptWaveform( | ||
| 665 | + this.handle, sampleRate, this.pointer, samples.length); | ||
| 331 | } | 666 | } |
| 332 | 667 | ||
| 333 | inputFinished() { | 668 | inputFinished() { |
| 334 | - _InputFinished(this.handle); | 669 | + this.Module._InputFinished(this.handle); |
| 335 | } | 670 | } |
| 336 | }; | 671 | }; |
| 337 | 672 | ||
| 338 | class OnlineRecognizer { | 673 | class OnlineRecognizer { |
| 339 | - constructor(configObj) { | ||
| 340 | - let config = initSherpaOnnxOnlineRecognizerConfig(configObj) | ||
| 341 | - let handle = _CreateOnlineRecognizer(config.ptr); | 674 | + constructor(configObj, Module) { |
| 675 | + this.config = configObj; | ||
| 676 | + const config = initSherpaOnnxOnlineRecognizerConfig(configObj, Module) | ||
| 677 | + const handle = Module._CreateOnlineRecognizer(config.ptr); | ||
| 342 | 678 | ||
| 343 | - freeConfig(config); | 679 | + freeConfig(config, Module); |
| 344 | 680 | ||
| 345 | this.handle = handle; | 681 | this.handle = handle; |
| 682 | + this.Module = Module; | ||
| 346 | } | 683 | } |
| 347 | 684 | ||
| 348 | free() { | 685 | free() { |
| 349 | - _DestroyOnlineRecognizer(this.handle); | 686 | + this.Module._DestroyOnlineRecognizer(this.handle); |
| 350 | this.handle = 0 | 687 | this.handle = 0 |
| 351 | } | 688 | } |
| 352 | 689 | ||
| 353 | createStream() { | 690 | createStream() { |
| 354 | - let handle = _CreateOnlineStream(this.handle); | ||
| 355 | - return new OnlineStream(handle); | 691 | + const handle = this.Module._CreateOnlineStream(this.handle); |
| 692 | + return new OnlineStream(handle, this.Module); | ||
| 356 | } | 693 | } |
| 357 | 694 | ||
| 358 | isReady(stream) { | 695 | isReady(stream) { |
| 359 | - return _IsOnlineStreamReady(this.handle, stream.handle) == 1; | 696 | + return this.Module._IsOnlineStreamReady(this.handle, stream.handle) == 1; |
| 360 | } | 697 | } |
| 361 | 698 | ||
| 362 | decode(stream) { | 699 | decode(stream) { |
| 363 | - return _DecodeOnlineStream(this.handle, stream.handle); | 700 | + this.Module._DecodeOnlineStream(this.handle, stream.handle); |
| 364 | } | 701 | } |
| 365 | 702 | ||
| 366 | isEndpoint(stream) { | 703 | isEndpoint(stream) { |
| 367 | - return _IsEndpoint(this.handle, stream.handle) == 1; | 704 | + return this.Module._IsEndpoint(this.handle, stream.handle) == 1; |
| 368 | } | 705 | } |
| 369 | 706 | ||
| 370 | reset(stream) { | 707 | reset(stream) { |
| 371 | - _Reset(this.handle, stream.handle); | 708 | + this.Module._Reset(this.handle, stream.handle); |
| 372 | } | 709 | } |
| 373 | 710 | ||
| 374 | getResult(stream) { | 711 | getResult(stream) { |
| 375 | - let r = _GetOnlineStreamResult(this.handle, stream.handle); | ||
| 376 | - let textPtr = getValue(r, 'i8*'); | ||
| 377 | - let text = UTF8ToString(textPtr); | ||
| 378 | - _DestroyOnlineRecognizerResult(r); | 712 | + const r = this.Module._GetOnlineStreamResult(this.handle, stream.handle); |
| 713 | + const textPtr = this.Module.getValue(r, 'i8*'); | ||
| 714 | + const text = this.Module.UTF8ToString(textPtr); | ||
| 715 | + this.Module._DestroyOnlineRecognizerResult(r); | ||
| 379 | return text; | 716 | return text; |
| 380 | } | 717 | } |
| 381 | } | 718 | } |
| 719 | + | ||
| 720 | +if (typeof process == 'object' && typeof process.versions == 'object' && | ||
| 721 | + typeof process.versions.node == 'string') { | ||
| 722 | + module.exports = { | ||
| 723 | + createOnlineRecognizer, | ||
| 724 | + OfflineRecognizer, | ||
| 725 | + }; | ||
| 726 | +} |
wasm/nodejs/CMakeLists.txt
0 → 100644
| 1 | +if(NOT $ENV{SHERPA_ONNX_IS_USING_BUILD_WASM_SH}) | ||
| 2 | + message(FATAL_ERROR "Please use ./build-wasm-simd-nodejs.sh to build for wasm NodeJS") | ||
| 3 | +endif() | ||
| 4 | + | ||
| 5 | +set(exported_functions | ||
| 6 | + #tts | ||
| 7 | + PrintOfflineTtsConfig | ||
| 8 | + SherpaOnnxCreateOfflineTts | ||
| 9 | + SherpaOnnxDestroyOfflineTts | ||
| 10 | + SherpaOnnxDestroyOfflineTtsGeneratedAudio | ||
| 11 | + SherpaOnnxOfflineTtsGenerate | ||
| 12 | + SherpaOnnxOfflineTtsGenerateWithCallback | ||
| 13 | + SherpaOnnxOfflineTtsNumSpeakers | ||
| 14 | + SherpaOnnxOfflineTtsSampleRate | ||
| 15 | + SherpaOnnxWriteWave | ||
| 16 | + # streaming asr | ||
| 17 | + AcceptWaveform | ||
| 18 | + CreateOnlineRecognizer | ||
| 19 | + CreateOnlineStream | ||
| 20 | + DecodeOnlineStream | ||
| 21 | + DestroyOnlineRecognizer | ||
| 22 | + DestroyOnlineRecognizerResult | ||
| 23 | + DestroyOnlineStream | ||
| 24 | + GetOnlineStreamResult | ||
| 25 | + InputFinished | ||
| 26 | + IsEndpoint | ||
| 27 | + IsOnlineStreamReady | ||
| 28 | + Reset | ||
| 29 | + # non-streaming ASR | ||
| 30 | + PrintOfflineRecognizerConfig | ||
| 31 | + CreateOfflineRecognizer | ||
| 32 | + DestroyOfflineRecognizer | ||
| 33 | + CreateOfflineStream | ||
| 34 | + DestroyOfflineStream | ||
| 35 | + AcceptWaveformOffline | ||
| 36 | + DecodeOfflineStream | ||
| 37 | + DecodeMultipleOfflineStreams | ||
| 38 | + GetOfflineStreamResult | ||
| 39 | + DestroyOfflineRecognizerResult | ||
| 40 | +) | ||
| 41 | + | ||
| 42 | + | ||
| 43 | +set(mangled_exported_functions) | ||
| 44 | +foreach(x IN LISTS exported_functions) | ||
| 45 | + list(APPEND mangled_exported_functions "_${x}") | ||
| 46 | +endforeach() | ||
| 47 | +list(JOIN mangled_exported_functions "," all_exported_functions) | ||
| 48 | + | ||
| 49 | +include_directories(${CMAKE_SOURCE_DIR}) | ||
| 50 | +set(MY_FLAGS " -s FORCE_FILESYSTEM=1 -s INITIAL_MEMORY=512MB -s ALLOW_MEMORY_GROWTH=1") | ||
| 51 | +string(APPEND MY_FLAGS " -sSTACK_SIZE=10485760 ") # 10MB | ||
| 52 | +string(APPEND MY_FLAGS " -sEXPORTED_FUNCTIONS=[_CopyHeap,_malloc,_free,${all_exported_functions}] ") | ||
| 53 | +string(APPEND MY_FLAGS " -sEXPORTED_RUNTIME_METHODS=['ccall','stringToUTF8','setValue','getValue'] ") | ||
| 54 | +string(APPEND MY_FLAGS " -sNODERAWFS=1 ") | ||
| 55 | +string(APPEND MY_FLAGS " -sEXPORTED_RUNTIME_METHODS=['ccall','stringToUTF8','setValue','getValue','lengthBytesUTF8','UTF8ToString'] ") | ||
| 56 | +string(APPEND MY_FLAGS " -sMODULARIZE=1 -sWASM_ASYNC_COMPILATION=0 ") | ||
| 57 | + | ||
| 58 | +message(STATUS "MY_FLAGS: ${MY_FLAGS}") | ||
| 59 | + | ||
| 60 | +set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${MY_FLAGS}") | ||
| 61 | +set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${MY_FLAGS}") | ||
| 62 | +set(CMAKE_EXECUTBLE_LINKER_FLAGS "${CMAKE_EXECUTBLE_LINKER_FLAGS} ${MY_FLAGS}") | ||
| 63 | + | ||
| 64 | +add_executable(sherpa-onnx-wasm-nodejs sherpa-onnx-wasm-nodejs.cc) | ||
| 65 | +target_link_libraries(sherpa-onnx-wasm-nodejs sherpa-onnx-core sherpa-onnx-c-api) | ||
| 66 | +install(TARGETS sherpa-onnx-wasm-nodejs DESTINATION bin/wasm/nodejs) | ||
| 67 | + | ||
| 68 | +install( | ||
| 69 | + FILES | ||
| 70 | + ${CMAKE_SOURCE_DIR}/wasm/asr/sherpa-onnx-asr.js | ||
| 71 | + ${CMAKE_SOURCE_DIR}/wasm/tts/sherpa-onnx-tts.js | ||
| 72 | + "$<TARGET_FILE_DIR:sherpa-onnx-wasm-nodejs>/sherpa-onnx-wasm-nodejs.js" | ||
| 73 | + "$<TARGET_FILE_DIR:sherpa-onnx-wasm-nodejs>/sherpa-onnx-wasm-nodejs.wasm" | ||
| 74 | + DESTINATION | ||
| 75 | + bin/wasm/nodejs | ||
| 76 | +) |
wasm/nodejs/sherpa-onnx-wasm-nodejs.cc
0 → 100644
| 1 | +// wasm/sherpa-onnx-wasm-main-nodejs.cc | ||
| 2 | +// | ||
| 3 | +// Copyright (c) 2024 Xiaomi Corporation | ||
| 4 | +#include <stdio.h> | ||
| 5 | + | ||
| 6 | +#include <algorithm> | ||
| 7 | +#include <memory> | ||
| 8 | + | ||
| 9 | +#include "sherpa-onnx/c-api/c-api.h" | ||
| 10 | + | ||
| 11 | +extern "C" { | ||
| 12 | + | ||
| 13 | +static_assert(sizeof(SherpaOnnxOfflineTransducerModelConfig) == 3 * 4, ""); | ||
| 14 | +static_assert(sizeof(SherpaOnnxOfflineParaformerModelConfig) == 4, ""); | ||
| 15 | + | ||
| 16 | +static_assert(sizeof(SherpaOnnxOfflineNemoEncDecCtcModelConfig) == 4, ""); | ||
| 17 | +static_assert(sizeof(SherpaOnnxOfflineWhisperModelConfig) == 2 * 4, ""); | ||
| 18 | +static_assert(sizeof(SherpaOnnxOfflineTdnnModelConfig) == 4, ""); | ||
| 19 | +static_assert(sizeof(SherpaOnnxOfflineLMConfig) == 2 * 4, ""); | ||
| 20 | + | ||
| 21 | +static_assert(sizeof(SherpaOnnxOfflineModelConfig) == | ||
| 22 | + sizeof(SherpaOnnxOfflineTransducerModelConfig) + | ||
| 23 | + sizeof(SherpaOnnxOfflineParaformerModelConfig) + | ||
| 24 | + sizeof(SherpaOnnxOfflineNemoEncDecCtcModelConfig) + | ||
| 25 | + sizeof(SherpaOnnxOfflineWhisperModelConfig) + | ||
| 26 | + sizeof(SherpaOnnxOfflineTdnnModelConfig) + 5 * 4, | ||
| 27 | + ""); | ||
| 28 | +static_assert(sizeof(SherpaOnnxFeatureConfig) == 2 * 4, ""); | ||
| 29 | +static_assert(sizeof(SherpaOnnxOfflineRecognizerConfig) == | ||
| 30 | + sizeof(SherpaOnnxFeatureConfig) + | ||
| 31 | + sizeof(SherpaOnnxOfflineLMConfig) + | ||
| 32 | + sizeof(SherpaOnnxOfflineModelConfig) + 4 * 4, | ||
| 33 | + ""); | ||
| 34 | + | ||
| 35 | +void PrintOfflineTtsConfig(SherpaOnnxOfflineTtsConfig *tts_config) { | ||
| 36 | + auto tts_model_config = &tts_config->model; | ||
| 37 | + auto vits_model_config = &tts_model_config->vits; | ||
| 38 | + fprintf(stdout, "----------vits model config----------\n"); | ||
| 39 | + fprintf(stdout, "model: %s\n", vits_model_config->model); | ||
| 40 | + fprintf(stdout, "lexicon: %s\n", vits_model_config->lexicon); | ||
| 41 | + fprintf(stdout, "tokens: %s\n", vits_model_config->tokens); | ||
| 42 | + fprintf(stdout, "data_dir: %s\n", vits_model_config->data_dir); | ||
| 43 | + fprintf(stdout, "noise scale: %.3f\n", vits_model_config->noise_scale); | ||
| 44 | + fprintf(stdout, "noise scale w: %.3f\n", vits_model_config->noise_scale_w); | ||
| 45 | + fprintf(stdout, "length scale: %.3f\n", vits_model_config->length_scale); | ||
| 46 | + | ||
| 47 | + fprintf(stdout, "----------tts model config----------\n"); | ||
| 48 | + fprintf(stdout, "num threads: %d\n", tts_model_config->num_threads); | ||
| 49 | + fprintf(stdout, "debug: %d\n", tts_model_config->debug); | ||
| 50 | + fprintf(stdout, "provider: %s\n", tts_model_config->provider); | ||
| 51 | + | ||
| 52 | + fprintf(stdout, "----------tts config----------\n"); | ||
| 53 | + fprintf(stdout, "rule_fsts: %s\n", tts_config->rule_fsts); | ||
| 54 | + fprintf(stdout, "max num sentences: %d\n", tts_config->max_num_sentences); | ||
| 55 | +} | ||
| 56 | + | ||
| 57 | +void PrintOfflineRecognizerConfig(SherpaOnnxOfflineRecognizerConfig *config) { | ||
| 58 | + auto model_config = &config->model_config; | ||
| 59 | + auto feat = &config->feat_config; | ||
| 60 | + auto transducer = &model_config->transducer; | ||
| 61 | + auto paraformer = &model_config->paraformer; | ||
| 62 | + auto nemo_ctc = &model_config->nemo_ctc; | ||
| 63 | + auto whisper = &model_config->whisper; | ||
| 64 | + auto tdnn = &model_config->tdnn; | ||
| 65 | + | ||
| 66 | + fprintf(stdout, "----------offline transducer model config----------\n"); | ||
| 67 | + fprintf(stdout, "encoder: %s\n", transducer->encoder); | ||
| 68 | + fprintf(stdout, "decoder: %s\n", transducer->decoder); | ||
| 69 | + fprintf(stdout, "joiner: %s\n", transducer->joiner); | ||
| 70 | + | ||
| 71 | + fprintf(stdout, "----------offline paraformer model config----------\n"); | ||
| 72 | + fprintf(stdout, "model: %s\n", paraformer->model); | ||
| 73 | + | ||
| 74 | + fprintf(stdout, "----------offline nemo_ctc model config----------\n"); | ||
| 75 | + fprintf(stdout, "model: %s\n", nemo_ctc->model); | ||
| 76 | + | ||
| 77 | + fprintf(stdout, "----------offline whisper model config----------\n"); | ||
| 78 | + fprintf(stdout, "encoder: %s\n", whisper->encoder); | ||
| 79 | + fprintf(stdout, "decoder: %s\n", whisper->decoder); | ||
| 80 | + | ||
| 81 | + fprintf(stdout, "----------offline tdnn model config----------\n"); | ||
| 82 | + fprintf(stdout, "model: %s\n", tdnn->model); | ||
| 83 | + | ||
| 84 | + fprintf(stdout, "tokens: %s\n", model_config->tokens); | ||
| 85 | + fprintf(stdout, "num_threads: %d\n", model_config->num_threads); | ||
| 86 | + fprintf(stdout, "provider: %s\n", model_config->provider); | ||
| 87 | + fprintf(stdout, "debug: %d\n", model_config->debug); | ||
| 88 | + fprintf(stdout, "model type: %s\n", model_config->model_type); | ||
| 89 | + | ||
| 90 | + fprintf(stdout, "----------feat config----------\n"); | ||
| 91 | + fprintf(stdout, "sample rate: %d\n", feat->sample_rate); | ||
| 92 | + fprintf(stdout, "feat dim: %d\n", feat->feature_dim); | ||
| 93 | + | ||
| 94 | + fprintf(stdout, "----------recognizer config----------\n"); | ||
| 95 | + fprintf(stdout, "decoding method: %s\n", config->decoding_method); | ||
| 96 | + fprintf(stdout, "max active paths: %d\n", config->max_active_paths); | ||
| 97 | + fprintf(stdout, "hotwords_file: %s\n", config->hotwords_file); | ||
| 98 | + fprintf(stdout, "hotwords_score: %.2f\n", config->hotwords_score); | ||
| 99 | +} | ||
| 100 | + | ||
| 101 | +void CopyHeap(const char *src, int32_t num_bytes, char *dst) { | ||
| 102 | + std::copy(src, src + num_bytes, dst); | ||
| 103 | +} | ||
| 104 | +} |
| @@ -22,7 +22,7 @@ Module.onRuntimeInitialized = function() { | @@ -22,7 +22,7 @@ Module.onRuntimeInitialized = function() { | ||
| 22 | console.log('Model files downloaded!'); | 22 | console.log('Model files downloaded!'); |
| 23 | 23 | ||
| 24 | console.log('Initializing tts ......'); | 24 | console.log('Initializing tts ......'); |
| 25 | - tts = initSherpaOnnxOfflineTts() | 25 | + tts = createOfflineTts(Module) |
| 26 | if (tts.numSpeakers > 1) { | 26 | if (tts.numSpeakers > 1) { |
| 27 | speakerIdLabel.innerHTML = `Speaker ID (0 - ${tts.numSpeakers - 1}):`; | 27 | speakerIdLabel.innerHTML = `Speaker ID (0 - ${tts.numSpeakers - 1}):`; |
| 28 | } | 28 | } |
| 1 | 1 | ||
| 2 | -function freeConfig(config) { | 2 | +function freeConfig(config, Module) { |
| 3 | if ('buffer' in config) { | 3 | if ('buffer' in config) { |
| 4 | - _free(config.buffer); | 4 | + Module._free(config.buffer); |
| 5 | } | 5 | } |
| 6 | 6 | ||
| 7 | if ('config' in config) { | 7 | if ('config' in config) { |
| 8 | - freeConfig(config.config) | 8 | + freeConfig(config.config, Module) |
| 9 | } | 9 | } |
| 10 | 10 | ||
| 11 | - _free(config.ptr); | 11 | + Module._free(config.ptr); |
| 12 | } | 12 | } |
| 13 | 13 | ||
| 14 | // The user should free the returned pointers | 14 | // The user should free the returned pointers |
| 15 | -function initSherpaOnnxOfflineTtsVitsModelConfig(config) { | ||
| 16 | - let modelLen = lengthBytesUTF8(config.model) + 1; | ||
| 17 | - let lexiconLen = lengthBytesUTF8(config.lexicon) + 1; | ||
| 18 | - let tokensLen = lengthBytesUTF8(config.tokens) + 1; | ||
| 19 | - let dataDirLen = lengthBytesUTF8(config.dataDir) + 1; | 15 | +function initSherpaOnnxOfflineTtsVitsModelConfig(config, Module) { |
| 16 | + const modelLen = Module.lengthBytesUTF8(config.model) + 1; | ||
| 17 | + const lexiconLen = Module.lengthBytesUTF8(config.lexicon) + 1; | ||
| 18 | + const tokensLen = Module.lengthBytesUTF8(config.tokens) + 1; | ||
| 19 | + const dataDirLen = Module.lengthBytesUTF8(config.dataDir) + 1; | ||
| 20 | 20 | ||
| 21 | - let n = modelLen + lexiconLen + tokensLen + dataDirLen; | 21 | + const n = modelLen + lexiconLen + tokensLen + dataDirLen; |
| 22 | 22 | ||
| 23 | - let buffer = _malloc(n); | 23 | + const buffer = Module._malloc(n); |
| 24 | 24 | ||
| 25 | - let len = 7 * 4; | ||
| 26 | - let ptr = _malloc(len); | 25 | + const len = 7 * 4; |
| 26 | + const ptr = Module._malloc(len); | ||
| 27 | 27 | ||
| 28 | let offset = 0; | 28 | let offset = 0; |
| 29 | - stringToUTF8(config.model, buffer + offset, modelLen); | 29 | + Module.stringToUTF8(config.model, buffer + offset, modelLen); |
| 30 | offset += modelLen; | 30 | offset += modelLen; |
| 31 | 31 | ||
| 32 | - stringToUTF8(config.lexicon, buffer + offset, lexiconLen); | 32 | + Module.stringToUTF8(config.lexicon, buffer + offset, lexiconLen); |
| 33 | offset += lexiconLen; | 33 | offset += lexiconLen; |
| 34 | 34 | ||
| 35 | - stringToUTF8(config.tokens, buffer + offset, tokensLen); | 35 | + Module.stringToUTF8(config.tokens, buffer + offset, tokensLen); |
| 36 | offset += tokensLen; | 36 | offset += tokensLen; |
| 37 | 37 | ||
| 38 | - stringToUTF8(config.dataDir, buffer + offset, dataDirLen); | 38 | + Module.stringToUTF8(config.dataDir, buffer + offset, dataDirLen); |
| 39 | offset += dataDirLen; | 39 | offset += dataDirLen; |
| 40 | 40 | ||
| 41 | offset = 0; | 41 | offset = 0; |
| 42 | - setValue(ptr, buffer + offset, 'i8*'); | 42 | + Module.setValue(ptr, buffer + offset, 'i8*'); |
| 43 | offset += modelLen; | 43 | offset += modelLen; |
| 44 | 44 | ||
| 45 | - setValue(ptr + 4, buffer + offset, 'i8*'); | 45 | + Module.setValue(ptr + 4, buffer + offset, 'i8*'); |
| 46 | offset += lexiconLen; | 46 | offset += lexiconLen; |
| 47 | 47 | ||
| 48 | - setValue(ptr + 8, buffer + offset, 'i8*'); | 48 | + Module.setValue(ptr + 8, buffer + offset, 'i8*'); |
| 49 | offset += tokensLen; | 49 | offset += tokensLen; |
| 50 | 50 | ||
| 51 | - setValue(ptr + 12, buffer + offset, 'i8*'); | 51 | + Module.setValue(ptr + 12, buffer + offset, 'i8*'); |
| 52 | offset += dataDirLen; | 52 | offset += dataDirLen; |
| 53 | 53 | ||
| 54 | - setValue(ptr + 16, config.noiseScale, 'float'); | ||
| 55 | - setValue(ptr + 20, config.noiseScaleW, 'float'); | ||
| 56 | - setValue(ptr + 24, config.lengthScale, 'float'); | 54 | + Module.setValue(ptr + 16, config.noiseScale, 'float'); |
| 55 | + Module.setValue(ptr + 20, config.noiseScaleW, 'float'); | ||
| 56 | + Module.setValue(ptr + 24, config.lengthScale, 'float'); | ||
| 57 | 57 | ||
| 58 | return { | 58 | return { |
| 59 | buffer: buffer, ptr: ptr, len: len, | 59 | buffer: buffer, ptr: ptr, len: len, |
| 60 | } | 60 | } |
| 61 | } | 61 | } |
| 62 | 62 | ||
| 63 | -function initSherpaOnnxOfflineTtsModelConfig(config) { | ||
| 64 | - let vitsModelConfig = | ||
| 65 | - initSherpaOnnxOfflineTtsVitsModelConfig(config.offlineTtsVitsModelConfig); | 63 | +function initSherpaOnnxOfflineTtsModelConfig(config, Module) { |
| 64 | + const vitsModelConfig = initSherpaOnnxOfflineTtsVitsModelConfig( | ||
| 65 | + config.offlineTtsVitsModelConfig, Module); | ||
| 66 | 66 | ||
| 67 | - let len = vitsModelConfig.len + 3 * 4; | ||
| 68 | - let ptr = _malloc(len); | 67 | + const len = vitsModelConfig.len + 3 * 4; |
| 68 | + const ptr = Module._malloc(len); | ||
| 69 | 69 | ||
| 70 | let offset = 0; | 70 | let offset = 0; |
| 71 | - _CopyHeap(vitsModelConfig.ptr, vitsModelConfig.len, ptr + offset); | 71 | + Module._CopyHeap(vitsModelConfig.ptr, vitsModelConfig.len, ptr + offset); |
| 72 | offset += vitsModelConfig.len; | 72 | offset += vitsModelConfig.len; |
| 73 | 73 | ||
| 74 | - setValue(ptr + offset, config.numThreads, 'i32'); | 74 | + Module.setValue(ptr + offset, config.numThreads, 'i32'); |
| 75 | offset += 4; | 75 | offset += 4; |
| 76 | 76 | ||
| 77 | - setValue(ptr + offset, config.debug, 'i32'); | 77 | + Module.setValue(ptr + offset, config.debug, 'i32'); |
| 78 | offset += 4; | 78 | offset += 4; |
| 79 | 79 | ||
| 80 | - let providerLen = lengthBytesUTF8(config.provider) + 1; | ||
| 81 | - let buffer = _malloc(providerLen); | ||
| 82 | - stringToUTF8(config.provider, buffer, providerLen); | ||
| 83 | - setValue(ptr + offset, buffer, 'i8*'); | 80 | + const providerLen = Module.lengthBytesUTF8(config.provider) + 1; |
| 81 | + const buffer = Module._malloc(providerLen); | ||
| 82 | + Module.stringToUTF8(config.provider, buffer, providerLen); | ||
| 83 | + Module.setValue(ptr + offset, buffer, 'i8*'); | ||
| 84 | 84 | ||
| 85 | return { | 85 | return { |
| 86 | buffer: buffer, ptr: ptr, len: len, config: vitsModelConfig, | 86 | buffer: buffer, ptr: ptr, len: len, config: vitsModelConfig, |
| 87 | } | 87 | } |
| 88 | } | 88 | } |
| 89 | 89 | ||
| 90 | -function initSherpaOnnxOfflineTtsConfig(config) { | ||
| 91 | - let modelConfig = | ||
| 92 | - initSherpaOnnxOfflineTtsModelConfig(config.offlineTtsModelConfig); | ||
| 93 | - let len = modelConfig.len + 2 * 4; | ||
| 94 | - let ptr = _malloc(len); | 90 | +function initSherpaOnnxOfflineTtsConfig(config, Module) { |
| 91 | + const modelConfig = | ||
| 92 | + initSherpaOnnxOfflineTtsModelConfig(config.offlineTtsModelConfig, Module); | ||
| 93 | + const len = modelConfig.len + 2 * 4; | ||
| 94 | + const ptr = Module._malloc(len); | ||
| 95 | 95 | ||
| 96 | let offset = 0; | 96 | let offset = 0; |
| 97 | - _CopyHeap(modelConfig.ptr, modelConfig.len, ptr + offset); | 97 | + Module._CopyHeap(modelConfig.ptr, modelConfig.len, ptr + offset); |
| 98 | offset += modelConfig.len; | 98 | offset += modelConfig.len; |
| 99 | 99 | ||
| 100 | - let ruleFstsLen = lengthBytesUTF8(config.ruleFsts) + 1; | ||
| 101 | - let buffer = _malloc(ruleFstsLen); | ||
| 102 | - stringToUTF8(config.ruleFsts, buffer, ruleFstsLen); | ||
| 103 | - setValue(ptr + offset, buffer, 'i8*'); | 100 | + const ruleFstsLen = Module.lengthBytesUTF8(config.ruleFsts) + 1; |
| 101 | + const buffer = Module._malloc(ruleFstsLen); | ||
| 102 | + Module.stringToUTF8(config.ruleFsts, buffer, ruleFstsLen); | ||
| 103 | + Module.setValue(ptr + offset, buffer, 'i8*'); | ||
| 104 | offset += 4; | 104 | offset += 4; |
| 105 | 105 | ||
| 106 | - setValue(ptr + offset, config.maxNumSentences, 'i32'); | 106 | + Module.setValue(ptr + offset, config.maxNumSentences, 'i32'); |
| 107 | 107 | ||
| 108 | return { | 108 | return { |
| 109 | buffer: buffer, ptr: ptr, len: len, config: modelConfig, | 109 | buffer: buffer, ptr: ptr, len: len, config: modelConfig, |
| @@ -111,19 +111,21 @@ function initSherpaOnnxOfflineTtsConfig(config) { | @@ -111,19 +111,21 @@ function initSherpaOnnxOfflineTtsConfig(config) { | ||
| 111 | } | 111 | } |
| 112 | 112 | ||
| 113 | class OfflineTts { | 113 | class OfflineTts { |
| 114 | - constructor(configObj) { | ||
| 115 | - let config = initSherpaOnnxOfflineTtsConfig(configObj) | ||
| 116 | - let handle = _SherpaOnnxCreateOfflineTts(config.ptr); | 114 | + constructor(configObj, Module) { |
| 115 | + console.log(configObj) | ||
| 116 | + const config = initSherpaOnnxOfflineTtsConfig(configObj, Module) | ||
| 117 | + const handle = Module._SherpaOnnxCreateOfflineTts(config.ptr); | ||
| 117 | 118 | ||
| 118 | - freeConfig(config); | 119 | + freeConfig(config, Module); |
| 119 | 120 | ||
| 120 | this.handle = handle; | 121 | this.handle = handle; |
| 121 | - this.sampleRate = _SherpaOnnxOfflineTtsSampleRate(this.handle); | ||
| 122 | - this.numSpeakers = _SherpaOnnxOfflineTtsNumSpeakers(this.handle); | 122 | + this.sampleRate = Module._SherpaOnnxOfflineTtsSampleRate(this.handle); |
| 123 | + this.numSpeakers = Module._SherpaOnnxOfflineTtsNumSpeakers(this.handle); | ||
| 124 | + this.Module = Module | ||
| 123 | } | 125 | } |
| 124 | 126 | ||
| 125 | free() { | 127 | free() { |
| 126 | - _SherpaOnnxDestroyOfflineTts(this.handle); | 128 | + this.Module._SherpaOnnxDestroyOfflineTts(this.handle); |
| 127 | this.handle = 0 | 129 | this.handle = 0 |
| 128 | } | 130 | } |
| 129 | 131 | ||
| @@ -133,29 +135,44 @@ class OfflineTts { | @@ -133,29 +135,44 @@ class OfflineTts { | ||
| 133 | // speed: 1.0 | 135 | // speed: 1.0 |
| 134 | // } | 136 | // } |
| 135 | generate(config) { | 137 | generate(config) { |
| 136 | - let textLen = lengthBytesUTF8(config.text) + 1; | ||
| 137 | - let textPtr = _malloc(textLen); | ||
| 138 | - stringToUTF8(config.text, textPtr, textLen); | 138 | + const textLen = this.Module.lengthBytesUTF8(config.text) + 1; |
| 139 | + const textPtr = this.Module._malloc(textLen); | ||
| 140 | + this.Module.stringToUTF8(config.text, textPtr, textLen); | ||
| 139 | 141 | ||
| 140 | - let h = _SherpaOnnxOfflineTtsGenerate( | 142 | + const h = this.Module._SherpaOnnxOfflineTtsGenerate( |
| 141 | this.handle, textPtr, config.sid, config.speed); | 143 | this.handle, textPtr, config.sid, config.speed); |
| 142 | 144 | ||
| 143 | - let numSamples = HEAP32[h / 4 + 1]; | ||
| 144 | - let sampleRate = HEAP32[h / 4 + 2]; | 145 | + const numSamples = this.Module.HEAP32[h / 4 + 1]; |
| 146 | + const sampleRate = this.Module.HEAP32[h / 4 + 2]; | ||
| 145 | 147 | ||
| 146 | - let samplesPtr = HEAP32[h / 4] / 4; | ||
| 147 | - let samples = new Float32Array(numSamples); | 148 | + const samplesPtr = this.Module.HEAP32[h / 4] / 4; |
| 149 | + const samples = new Float32Array(numSamples); | ||
| 148 | for (let i = 0; i < numSamples; i++) { | 150 | for (let i = 0; i < numSamples; i++) { |
| 149 | - samples[i] = HEAPF32[samplesPtr + i]; | 151 | + samples[i] = this.Module.HEAPF32[samplesPtr + i]; |
| 150 | } | 152 | } |
| 151 | 153 | ||
| 152 | - _SherpaOnnxDestroyOfflineTtsGeneratedAudio(h); | 154 | + this.Module._SherpaOnnxDestroyOfflineTtsGeneratedAudio(h); |
| 153 | return {samples: samples, sampleRate: sampleRate}; | 155 | return {samples: samples, sampleRate: sampleRate}; |
| 154 | } | 156 | } |
| 157 | + save(filename, audio) { | ||
| 158 | + const samples = audio.samples; | ||
| 159 | + const sampleRate = audio.sampleRate; | ||
| 160 | + const ptr = this.Module._malloc(samples.length * 4); | ||
| 161 | + for (let i = 0; i < samples.length; i++) { | ||
| 162 | + this.Module.HEAPF32[ptr / 4 + i] = samples[i]; | ||
| 163 | + } | ||
| 164 | + | ||
| 165 | + const filenameLen = this.Module.lengthBytesUTF8(filename) + 1; | ||
| 166 | + const buffer = this.Module._malloc(filenameLen); | ||
| 167 | + this.Module.stringToUTF8(filename, buffer, filenameLen); | ||
| 168 | + this.Module._SherpaOnnxWriteWave(ptr, samples.length, sampleRate, buffer); | ||
| 169 | + this.Module._free(buffer); | ||
| 170 | + this.Module._free(ptr); | ||
| 171 | + } | ||
| 155 | } | 172 | } |
| 156 | 173 | ||
| 157 | -function initSherpaOnnxOfflineTts() { | ||
| 158 | - let offlineTtsVitsModelConfig = { | 174 | +function createOfflineTts(Module, myConfig) { |
| 175 | + const offlineTtsVitsModelConfig = { | ||
| 159 | model: './model.onnx', | 176 | model: './model.onnx', |
| 160 | lexicon: '', | 177 | lexicon: '', |
| 161 | tokens: './tokens.txt', | 178 | tokens: './tokens.txt', |
| @@ -164,7 +181,7 @@ function initSherpaOnnxOfflineTts() { | @@ -164,7 +181,7 @@ function initSherpaOnnxOfflineTts() { | ||
| 164 | noiseScaleW: 0.8, | 181 | noiseScaleW: 0.8, |
| 165 | lengthScale: 1.0, | 182 | lengthScale: 1.0, |
| 166 | }; | 183 | }; |
| 167 | - let offlineTtsModelConfig = { | 184 | + const offlineTtsModelConfig = { |
| 168 | offlineTtsVitsModelConfig: offlineTtsVitsModelConfig, | 185 | offlineTtsVitsModelConfig: offlineTtsVitsModelConfig, |
| 169 | numThreads: 1, | 186 | numThreads: 1, |
| 170 | debug: 1, | 187 | debug: 1, |
| @@ -176,5 +193,16 @@ function initSherpaOnnxOfflineTts() { | @@ -176,5 +193,16 @@ function initSherpaOnnxOfflineTts() { | ||
| 176 | maxNumSentences: 1, | 193 | maxNumSentences: 1, |
| 177 | } | 194 | } |
| 178 | 195 | ||
| 179 | - return new OfflineTts(offlineTtsConfig); | 196 | + if (myConfig) { |
| 197 | + offlineTtsConfig = myConfig; | ||
| 198 | + } | ||
| 199 | + | ||
| 200 | + return new OfflineTts(offlineTtsConfig, Module); | ||
| 201 | +} | ||
| 202 | + | ||
| 203 | +if (typeof process == 'object' && typeof process.versions == 'object' && | ||
| 204 | + typeof process.versions.node == 'string') { | ||
| 205 | + module.exports = { | ||
| 206 | + createOfflineTts, | ||
| 207 | + }; | ||
| 180 | } | 208 | } |
-
请 注册 或 登录 后发表评论