正在显示
34 个修改的文件
包含
2704 行增加
和
4 行删除
.github/scripts/test-nodejs-npm.sh
0 → 100755
| 1 | +#!/usr/bin/env bash | ||
| 2 | + | ||
| 3 | +set -ex | ||
| 4 | + | ||
| 5 | +echo "dir: $d" | ||
| 6 | +cd $d | ||
| 7 | +npm install | ||
| 8 | +git status | ||
| 9 | +ls -lh | ||
| 10 | +ls -lh node_modules | ||
| 11 | + | ||
| 12 | +# offline asr | ||
| 13 | + | ||
| 14 | +wget -q https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-nemo-ctc-en-conformer-small.tar.bz2 | ||
| 15 | +tar xvf sherpa-onnx-nemo-ctc-en-conformer-small.tar.bz2 | ||
| 16 | +rm sherpa-onnx-nemo-ctc-en-conformer-small.tar.bz2 | ||
| 17 | +node ./test-offline-nemo-ctc.js | ||
| 18 | +rm -rf sherpa-onnx-nemo-ctc-en-conformer-small | ||
| 19 | + | ||
| 20 | +wget -q https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-paraformer-zh-2023-03-28.tar.bz2 | ||
| 21 | +tar xvf sherpa-onnx-paraformer-zh-2023-03-28.tar.bz2 | ||
| 22 | +rm sherpa-onnx-paraformer-zh-2023-03-28.tar.bz2 | ||
| 23 | +node ./test-offline-paraformer.js | ||
| 24 | +rm -rf sherpa-onnx-paraformer-zh-2023-03-28 | ||
| 25 | + | ||
| 26 | +wget -q https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-zipformer-en-2023-06-26.tar.bz2 | ||
| 27 | +tar xvf sherpa-onnx-zipformer-en-2023-06-26.tar.bz2 | ||
| 28 | +rm sherpa-onnx-zipformer-en-2023-06-26.tar.bz2 | ||
| 29 | +node ./test-offline-transducer.js | ||
| 30 | +rm -rf sherpa-onnx-zipformer-en-2023-06-26 | ||
| 31 | + | ||
| 32 | +wget -q https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-whisper-tiny.en.tar.bz2 | ||
| 33 | +tar xvf sherpa-onnx-whisper-tiny.en.tar.bz2 | ||
| 34 | +rm sherpa-onnx-whisper-tiny.en.tar.bz2 | ||
| 35 | +node ./test-offline-whisper.js | ||
| 36 | +rm -rf sherpa-onnx-whisper-tiny.en | ||
| 37 | + | ||
| 38 | +# online asr | ||
| 39 | +wget -q https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-paraformer-bilingual-zh-en.tar.bz2 | ||
| 40 | +tar xvf sherpa-onnx-streaming-paraformer-bilingual-zh-en.tar.bz2 | ||
| 41 | +rm sherpa-onnx-streaming-paraformer-bilingual-zh-en.tar.bz2 | ||
| 42 | +node ./test-online-paraformer.js | ||
| 43 | +rm -rf sherpa-onnx-streaming-paraformer-bilingual-zh-en | ||
| 44 | + | ||
| 45 | +wget -q https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2 | ||
| 46 | +tar xvf sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2 | ||
| 47 | +rm sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2 | ||
| 48 | +node ./test-online-transducer.js | ||
| 49 | +rm -rf sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20 | ||
| 50 | + | ||
| 51 | +# offline tts | ||
| 52 | +wget -q https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-vctk.tar.bz2 | ||
| 53 | +tar xvf vits-vctk.tar.bz2 | ||
| 54 | +rm vits-vctk.tar.bz2 | ||
| 55 | +node ./test-offline-tts-en.js | ||
| 56 | +rm -rf vits-vctk | ||
| 57 | + | ||
| 58 | +wget -q https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-zh-aishell3.tar.bz2 | ||
| 59 | +tar xvf vits-zh-aishell3.tar.bz2 | ||
| 60 | +rm vits-zh-aishell3.tar.bz2 | ||
| 61 | +node ./test-offline-tts-zh.js | ||
| 62 | +rm -rf vits-zh-aishell3 |
.github/workflows/npm.yaml
0 → 100644
| 1 | +name: npm | ||
| 2 | + | ||
| 3 | +on: | ||
| 4 | + workflow_dispatch: | ||
| 5 | + | ||
| 6 | +concurrency: | ||
| 7 | + group: npm-${{ github.ref }} | ||
| 8 | + cancel-in-progress: true | ||
| 9 | + | ||
| 10 | +permissions: | ||
| 11 | + contents: read | ||
| 12 | + | ||
| 13 | +jobs: | ||
| 14 | + nodejs: | ||
| 15 | + runs-on: ${{ matrix.os }} | ||
| 16 | + strategy: | ||
| 17 | + fail-fast: false | ||
| 18 | + matrix: | ||
| 19 | + os: [ubuntu-latest] | ||
| 20 | + python-version: ["3.8"] | ||
| 21 | + | ||
| 22 | + steps: | ||
| 23 | + - uses: actions/checkout@v2 | ||
| 24 | + with: | ||
| 25 | + fetch-depth: 0 | ||
| 26 | + | ||
| 27 | + - name: Setup Python ${{ matrix.python-version }} | ||
| 28 | + uses: actions/setup-python@v2 | ||
| 29 | + with: | ||
| 30 | + python-version: ${{ matrix.python-version }} | ||
| 31 | + | ||
| 32 | + - uses: actions/setup-node@v3 | ||
| 33 | + with: | ||
| 34 | + node-version: 13 | ||
| 35 | + registry-url: 'https://registry.npmjs.org' | ||
| 36 | + | ||
| 37 | + - name: Display node version | ||
| 38 | + shell: bash | ||
| 39 | + run: | | ||
| 40 | + node --version | ||
| 41 | + npm --version | ||
| 42 | + cd nodejs-examples | ||
| 43 | + | ||
| 44 | + npm install npm@6.14.4 -g | ||
| 45 | + npm install npm@6.14.4 | ||
| 46 | + npm --version | ||
| 47 | + | ||
| 48 | + - name: Build nodejs package | ||
| 49 | + shell: bash | ||
| 50 | + env: | ||
| 51 | + NODE_AUTH_TOKEN: ${{ secrets.NPM_TOKEN }} | ||
| 52 | + run: | | ||
| 53 | + cd scripts/nodejs | ||
| 54 | + ./run.sh | ||
| 55 | + npm install | ||
| 56 | + rm run.sh | ||
| 57 | + npm ci | ||
| 58 | + npm publish --provenance --access public |
.github/workflows/test-nodejs-npm.yaml
0 → 100644
| 1 | +name: test-nodejs-npm | ||
| 2 | + | ||
| 3 | +on: | ||
| 4 | + workflow_dispatch: | ||
| 5 | + | ||
| 6 | + schedule: | ||
| 7 | + # minute (0-59) | ||
| 8 | + # hour (0-23) | ||
| 9 | + # day of the month (1-31) | ||
| 10 | + # month (1-12) | ||
| 11 | + # day of the week (0-6) | ||
| 12 | + # nightly build at 23:50 UTC time every day | ||
| 13 | + - cron: "50 23 * * *" | ||
| 14 | + | ||
| 15 | +concurrency: | ||
| 16 | + group: test-nodejs-npm-${{ github.ref }} | ||
| 17 | + cancel-in-progress: true | ||
| 18 | + | ||
| 19 | +permissions: | ||
| 20 | + contents: read | ||
| 21 | + | ||
| 22 | +jobs: | ||
| 23 | + test-nodejs-npm: | ||
| 24 | + runs-on: ${{ matrix.os }} | ||
| 25 | + strategy: | ||
| 26 | + fail-fast: false | ||
| 27 | + matrix: | ||
| 28 | + os: [ubuntu-latest, macos-latest] #, windows-latest] | ||
| 29 | + python-version: ["3.8"] | ||
| 30 | + | ||
| 31 | + steps: | ||
| 32 | + - uses: actions/checkout@v4 | ||
| 33 | + with: | ||
| 34 | + fetch-depth: 0 | ||
| 35 | + | ||
| 36 | + - name: Setup Python ${{ matrix.python-version }} | ||
| 37 | + uses: actions/setup-python@v2 | ||
| 38 | + with: | ||
| 39 | + python-version: ${{ matrix.python-version }} | ||
| 40 | + | ||
| 41 | + - uses: actions/setup-node@v3 | ||
| 42 | + with: | ||
| 43 | + node-version: 13 | ||
| 44 | + registry-url: 'https://registry.npmjs.org' | ||
| 45 | + | ||
| 46 | + - name: Display node version | ||
| 47 | + shell: bash | ||
| 48 | + run: | | ||
| 49 | + node --version | ||
| 50 | + npm --version | ||
| 51 | + | ||
| 52 | + - name: Run tests | ||
| 53 | + shell: bash | ||
| 54 | + run: | | ||
| 55 | + node --version | ||
| 56 | + npm --version | ||
| 57 | + | ||
| 58 | + export d=nodejs-examples | ||
| 59 | + ./.github/scripts/test-nodejs-npm.sh |
.github/workflows/test-nodejs.yaml
0 → 100644
| 1 | +name: test-nodejs | ||
| 2 | + | ||
| 3 | +on: | ||
| 4 | + push: | ||
| 5 | + branches: | ||
| 6 | + - master | ||
| 7 | + | ||
| 8 | + pull_request: | ||
| 9 | + branches: | ||
| 10 | + - master | ||
| 11 | + | ||
| 12 | + workflow_dispatch: | ||
| 13 | + | ||
| 14 | +concurrency: | ||
| 15 | + group: test-nodejs-${{ github.ref }} | ||
| 16 | + cancel-in-progress: true | ||
| 17 | + | ||
| 18 | +permissions: | ||
| 19 | + contents: read | ||
| 20 | + | ||
| 21 | +jobs: | ||
| 22 | + test-nodejs: | ||
| 23 | + runs-on: ${{ matrix.os }} | ||
| 24 | + strategy: | ||
| 25 | + fail-fast: false | ||
| 26 | + matrix: | ||
| 27 | + os: [ubuntu-latest, macos-latest] #, windows-latest] | ||
| 28 | + python-version: ["3.8"] | ||
| 29 | + | ||
| 30 | + steps: | ||
| 31 | + - uses: actions/checkout@v4 | ||
| 32 | + with: | ||
| 33 | + fetch-depth: 0 | ||
| 34 | + | ||
| 35 | + - name: ccache | ||
| 36 | + uses: hendrikmuhs/ccache-action@v1.2 | ||
| 37 | + with: | ||
| 38 | + key: ${{ matrix.os }}-Release-ON | ||
| 39 | + | ||
| 40 | + - name: Configure CMake | ||
| 41 | + shell: bash | ||
| 42 | + run: | | ||
| 43 | + export CMAKE_CXX_COMPILER_LAUNCHER=ccache | ||
| 44 | + export PATH="/usr/lib/ccache:/usr/local/opt/ccache/libexec:$PATH" | ||
| 45 | + cmake --version | ||
| 46 | + | ||
| 47 | + mkdir build | ||
| 48 | + cd build | ||
| 49 | + cmake -D CMAKE_BUILD_TYPE=Release -D BUILD_SHARED_LIBS=ON -DCMAKE_INSTALL_PREFIX=./install .. | ||
| 50 | + make -j2 | ||
| 51 | + make install | ||
| 52 | + ls -lh install/lib | ||
| 53 | + | ||
| 54 | + - name: Setup Python ${{ matrix.python-version }} | ||
| 55 | + uses: actions/setup-python@v2 | ||
| 56 | + with: | ||
| 57 | + python-version: ${{ matrix.python-version }} | ||
| 58 | + | ||
| 59 | + - name: Copy files | ||
| 60 | + shell: bash | ||
| 61 | + run: | | ||
| 62 | + os=${{ matrix.os }} | ||
| 63 | + if [[ $os == 'ubuntu-latest' ]]; then | ||
| 64 | + mkdir -p scripts/nodejs/lib/linux-x64 | ||
| 65 | + dst=scripts/nodejs/lib/linux-x64 | ||
| 66 | + elif [[ $os == 'macos-latest' ]]; then | ||
| 67 | + mkdir -p scripts/nodejs/lib/osx-x64 | ||
| 68 | + dst=scripts/nodejs/lib/osx-x64 | ||
| 69 | + fi | ||
| 70 | + cp -v build/install/lib/* $dst/ | ||
| 71 | + | ||
| 72 | + - name: replace files | ||
| 73 | + shell: bash | ||
| 74 | + run: | | ||
| 75 | + cd nodejs-examples | ||
| 76 | + files=$(ls -1 *.js) | ||
| 77 | + for f in ${files[@]}; do | ||
| 78 | + echo $f | ||
| 79 | + sed -i.bak s%\'sherpa-onnx\'%\'./index.js\'% $f | ||
| 80 | + git status | ||
| 81 | + done | ||
| 82 | + git diff | ||
| 83 | + cp *.js ../scripts/nodejs | ||
| 84 | + | ||
| 85 | + - uses: actions/setup-node@v3 | ||
| 86 | + with: | ||
| 87 | + node-version: 13 | ||
| 88 | + registry-url: 'https://registry.npmjs.org' | ||
| 89 | + | ||
| 90 | + - name: Display node version | ||
| 91 | + shell: bash | ||
| 92 | + run: | | ||
| 93 | + node --version | ||
| 94 | + npm --version | ||
| 95 | + | ||
| 96 | + - name: Run tests | ||
| 97 | + shell: bash | ||
| 98 | + run: | | ||
| 99 | + node --version | ||
| 100 | + npm --version | ||
| 101 | + export d=scripts/nodejs | ||
| 102 | + | ||
| 103 | + pushd $d | ||
| 104 | + npm install | ||
| 105 | + npm install wav | ||
| 106 | + popd | ||
| 107 | + | ||
| 108 | + ./.github/scripts/test-nodejs-npm.sh |
nodejs-examples/.gitignore
0 → 100644
nodejs-examples/README.md
0 → 100644
| 1 | +# Introduction | ||
| 2 | + | ||
| 3 | +This directory contains nodejs examples for [sherpa-onnx](https://github.com/k2-fsa/sherpa-onnx). | ||
| 4 | + | ||
| 5 | +Before you continue, please first install the npm package `sherpa-onnx` by | ||
| 6 | + | ||
| 7 | +```bash | ||
| 8 | +npm install sherpa-onnx | ||
| 9 | +``` | ||
| 10 | + | ||
| 11 | +In the following, we describe how to use [sherpa-onnx](https://github.com/k2-fsa/sherpa-onnx) | ||
| 12 | +for text-to-speech and speech-to-text. | ||
| 13 | + | ||
| 14 | +**Caution**: If you get the following error: | ||
| 15 | +``` | ||
| 16 | +/Users/fangjun/open-source/sherpa-onnx/nodejs-examples/node_modules/ffi-napi/lib/dynamic_library.js:67 | ||
| 17 | + if (match = err.match(/^(([^ \t()])+\.so([^ \t:()])*):([ \t])*/)) { | ||
| 18 | + ^ | ||
| 19 | + | ||
| 20 | +TypeError: Cannot read properties of null (reading 'match') | ||
| 21 | + at new DynamicLibrary (/Users/fangjun/open-source/sherpa-onnx/nodejs-examples/node_modules/ffi-napi/lib/dynamic_library.js:67:21) | ||
| 22 | + at Object.Library (/Users/fangjun/open-source/sherpa-onnx/nodejs-examples/node_modules/ffi-napi/lib/library.js:47:10) | ||
| 23 | + at Object.<anonymous> (/Users/fangjun/open-source/sherpa-onnx/nodejs-examples/node_modules/sherpa-onnx3/index.js:268:28) | ||
| 24 | + at Module._compile (node:internal/modules/cjs/loader:1376:14) | ||
| 25 | + at Module._extensions..js (node:internal/modules/cjs/loader:1435:10) | ||
| 26 | + at Module.load (node:internal/modules/cjs/loader:1207:32) | ||
| 27 | + at Module._load (node:internal/modules/cjs/loader:1023:12) | ||
| 28 | + at Module.require (node:internal/modules/cjs/loader:1235:19) | ||
| 29 | + at require (node:internal/modules/helpers:176:18) | ||
| 30 | + at Object.<anonymous> (/Users/fangjun/open-source/sherpa-onnx/nodejs-examples/test-offline-tts-zh.js:3:21) | ||
| 31 | +``` | ||
| 32 | + | ||
| 33 | +Please downgrade your node to version v13.14.0. See also | ||
| 34 | +https://github.com/node-ffi-napi/node-ffi-napi/issues/244 | ||
| 35 | +and | ||
| 36 | +https://github.com/node-ffi-napi/node-ffi-napi/issues/97 . | ||
| 37 | + | ||
| 38 | +# Text-to-speech | ||
| 39 | + | ||
| 40 | +In the following, we demonstrate how to run text-to-speech. | ||
| 41 | + | ||
| 42 | +## ./test-offline-tts-en.js | ||
| 43 | + | ||
| 44 | +[./test-offline-tts-en.js](./test-offline-tts-en.js) shows how to use | ||
| 45 | +a VITS pretrained model | ||
| 46 | +[VCTK](https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/vits.html#vctk-english-multi-speaker-109-speakers) | ||
| 47 | +for text-to-speech. | ||
| 48 | + | ||
| 49 | +You can use the following command to run it: | ||
| 50 | + | ||
| 51 | +```bash | ||
| 52 | +wget -q https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-vctk.tar.bz2 | ||
| 53 | +tar xvf vits-vctk.tar.bz2 | ||
| 54 | +node ./test-offline-tts-en.js | ||
| 55 | +``` | ||
| 56 | + | ||
| 57 | +## ./test-offline-tts-zh.js | ||
| 58 | + | ||
| 59 | +[./test-offline-tts-zh.js](./test-offline-tts-zh.js) shows how to use | ||
| 60 | +a VITS pretrained model | ||
| 61 | +[aishell3](https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/vits.html#vits-model-aishell3) | ||
| 62 | +for text-to-speech. | ||
| 63 | + | ||
| 64 | +You can use the following command to run it: | ||
| 65 | + | ||
| 66 | +```bash | ||
| 67 | +wget -q https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-zh-aishell3.tar.bz2 | ||
| 68 | +tar xvf vits-zh-aishell3.tar.bz2 | ||
| 69 | +node ./test-offline-tts-zh.js | ||
| 70 | +``` | ||
| 71 | + | ||
| 72 | +# Speech-to-text | ||
| 73 | + | ||
| 74 | +In the following, we demonstrate how to decode files and how to perform | ||
| 75 | +speech recognition with a microphone with `nodejs`. We need to install two additional | ||
| 76 | +npm packages: | ||
| 77 | + | ||
| 78 | + | ||
| 79 | +```bash | ||
| 80 | +npm install wav naudiodon2 | ||
| 81 | +``` | ||
| 82 | + | ||
| 83 | +## ./test-offline-nemo-ctc.js | ||
| 84 | + | ||
| 85 | +[./test-offline-nemo-ctc.js](./test-offline-nemo-ctc.js) demonstrates | ||
| 86 | +how to decode a file with a NeMo CTC model. In the code we use | ||
| 87 | +[stt_en_conformer_ctc_small](https://k2-fsa.github.io/sherpa/onnx/pretrained_models/offline-ctc/nemo/english.html#stt-en-conformer-ctc-small). | ||
| 88 | + | ||
| 89 | +You can use the following command run it: | ||
| 90 | + | ||
| 91 | +```bash | ||
| 92 | +wget -q https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-nemo-ctc-en-conformer-small.tar.bz2 | ||
| 93 | +tar xvf sherpa-onnx-nemo-ctc-en-conformer-small.tar.bz2 | ||
| 94 | +node ./test-offline-nemo-ctc.js | ||
| 95 | +``` | ||
| 96 | + | ||
| 97 | +## ./test-offline-paraformer.js | ||
| 98 | + | ||
| 99 | +[./test-offline-paraformer.js](./test-offline-paraformer.js) demonstrates | ||
| 100 | +how to decode a file with a non-streaming Paraformer model. In the code we use | ||
| 101 | +[sherpa-onnx-paraformer-zh-2023-03-28](https://k2-fsa.github.io/sherpa/onnx/pretrained_models/offline-paraformer/paraformer-models.html#csukuangfj-sherpa-onnx-paraformer-zh-2023-03-28-chinese). | ||
| 102 | + | ||
| 103 | +You can use the following command run it: | ||
| 104 | + | ||
| 105 | +```bash | ||
| 106 | +wget -q https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-paraformer-zh-2023-03-28.tar.bz2 | ||
| 107 | +tar xvf sherpa-onnx-paraformer-zh-2023-03-28.tar.bz2 | ||
| 108 | +node ./test-offline-paraformer.js | ||
| 109 | +``` | ||
| 110 | + | ||
| 111 | +## ./test-offline-transducer.js | ||
| 112 | + | ||
| 113 | +[./test-offline-transducer.js](./test-offline-transducer.js) demonstrates | ||
| 114 | +how to decode a file with a non-streaming transducer model. In the code we use | ||
| 115 | +[sherpa-onnx-zipformer-en-2023-06-26](https://k2-fsa.github.io/sherpa/onnx/pretrained_models/offline-transducer/zipformer-transducer-models.html#csukuangfj-sherpa-onnx-zipformer-en-2023-06-26-english). | ||
| 116 | + | ||
| 117 | +You can use the following command run it: | ||
| 118 | + | ||
| 119 | +```bash | ||
| 120 | +wget -q https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-zipformer-en-2023-06-26.tar.bz2 | ||
| 121 | +tar xvf sherpa-onnx-zipformer-en-2023-06-26.tar.bz2 | ||
| 122 | +node ./test-offline-transducer.js | ||
| 123 | +``` | ||
| 124 | + | ||
| 125 | +## ./test-offline-whisper.js | ||
| 126 | +[./test-offline-whisper.js](./test-offline-whisper.js) demonstrates | ||
| 127 | +how to decode a file with a Whisper model. In the code we use | ||
| 128 | +[sherpa-onnx-whisper-tiny.en](https://k2-fsa.github.io/sherpa/onnx/pretrained_models/whisper/tiny.en.html). | ||
| 129 | + | ||
| 130 | +You can use the following command run it: | ||
| 131 | + | ||
| 132 | +```bash | ||
| 133 | +wget -q https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-whisper-tiny.en.tar.bz2 | ||
| 134 | +tar xvf sherpa-onnx-whisper-tiny.en.tar.bz2 | ||
| 135 | +node ./test-offline-whisper.js | ||
| 136 | +``` | ||
| 137 | + | ||
| 138 | +## ./test-online-paraformer-microphone.js | ||
| 139 | +[./test-online-paraformer-microphone.js](./test-online-paraformer-microphone.js) | ||
| 140 | +demonstrates how to do real-time speech recognition from microphone | ||
| 141 | +with a streaming Paraformer model. In the code we use | ||
| 142 | +[sherpa-onnx-streaming-paraformer-bilingual-zh-en](https://k2-fsa.github.io/sherpa/onnx/pretrained_models/online-paraformer/paraformer-models.html#csukuangfj-sherpa-onnx-streaming-paraformer-bilingual-zh-en-chinese-english). | ||
| 143 | + | ||
| 144 | +You can use the following command run it: | ||
| 145 | + | ||
| 146 | +```bash | ||
| 147 | +wget -q https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-paraformer-bilingual-zh-en.tar.bz2 | ||
| 148 | +rm sherpa-onnx-streaming-paraformer-bilingual-zh-en.tar.bz2 | ||
| 149 | +node ./test-online-paraformer-microphone.js | ||
| 150 | +``` | ||
| 151 | + | ||
| 152 | +## ./test-online-paraformer.js | ||
| 153 | +[./test-online-paraformer.js](./test-online-paraformer.js) demonstrates | ||
| 154 | +how to decode a file using a streaming Paraformer model. In the code we use | ||
| 155 | +[sherpa-onnx-streaming-paraformer-bilingual-zh-en](https://k2-fsa.github.io/sherpa/onnx/pretrained_models/online-paraformer/paraformer-models.html#csukuangfj-sherpa-onnx-streaming-paraformer-bilingual-zh-en-chinese-english). | ||
| 156 | + | ||
| 157 | +You can use the following command run it: | ||
| 158 | + | ||
| 159 | +```bash | ||
| 160 | +wget -q https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-paraformer-bilingual-zh-en.tar.bz2 | ||
| 161 | +rm sherpa-onnx-streaming-paraformer-bilingual-zh-en.tar.bz2 | ||
| 162 | +node ./test-online-paraformer.js | ||
| 163 | +``` | ||
| 164 | + | ||
| 165 | +## ./test-online-transducer-microphone.js | ||
| 166 | +[./test-online-transducer-microphone.js](./test-online-transducer-microphone.js) | ||
| 167 | +demonstrates how to do real-time speech recognition with microphone using a streaming transducer model. In the code | ||
| 168 | +we use [sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20](https://k2-fsa.github.io/sherpa/onnx/pretrained_models/online-transducer/zipformer-transducer-models.html#csukuangfj-sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20-bilingual-chinese-english). | ||
| 169 | + | ||
| 170 | + | ||
| 171 | +You can use the following command run it: | ||
| 172 | + | ||
| 173 | +```bash | ||
| 174 | +wget -q https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2 | ||
| 175 | +tar xvf sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2 | ||
| 176 | +node ./test-online-transducer-microphone.js | ||
| 177 | +``` | ||
| 178 | + | ||
| 179 | +## ./test-online-transducer.js | ||
| 180 | +[./test-online-transducer.js](./test-online-transducer.js) demonstrates | ||
| 181 | +how to decode a file using a streaming transducer model. In the code | ||
| 182 | +we use [sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20](https://k2-fsa.github.io/sherpa/onnx/pretrained_models/online-transducer/zipformer-transducer-models.html#csukuangfj-sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20-bilingual-chinese-english). | ||
| 183 | + | ||
| 184 | +You can use the following command run it: | ||
| 185 | + | ||
| 186 | +```bash | ||
| 187 | +wget -q https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2 | ||
| 188 | +tar xvf sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2 | ||
| 189 | +node ./test-online-transducer.js | ||
| 190 | +``` | ||
| 191 | + | ||
| 192 | +## ./test-vad-microphone-offline-paraformer.js | ||
| 193 | + | ||
| 194 | +[./test-vad-microphone-offline-paraformer.js](./test-vad-microphone-offline-paraformer.js) | ||
| 195 | +demonstrates how to use [silero-vad](https://github.com/snakers4/silero-vad) | ||
| 196 | +with non-streaming Paraformer for speech recognition from microphone. | ||
| 197 | + | ||
| 198 | +You can use the following command run it: | ||
| 199 | + | ||
| 200 | +```bash | ||
| 201 | +wget -q https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx | ||
| 202 | +wget -q https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-paraformer-zh-2023-03-28.tar.bz2 | ||
| 203 | +tar xvf sherpa-onnx-paraformer-zh-2023-03-28.tar.bz2 | ||
| 204 | +node ./test-vad-microphone-offline-paraformer.js | ||
| 205 | +``` | ||
| 206 | + | ||
| 207 | +## ./test-vad-microphone-offline-transducer.js | ||
| 208 | + | ||
| 209 | +[./test-vad-microphone-offline-transducer.js](./test-vad-microphone-offline-transducer.js) | ||
| 210 | +demonstrates how to use [silero-vad](https://github.com/snakers4/silero-vad) | ||
| 211 | +with a non-streaming transducer model for speech recognition from microphone. | ||
| 212 | + | ||
| 213 | +You can use the following command run it: | ||
| 214 | + | ||
| 215 | +```bash | ||
| 216 | +wget -q https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx | ||
| 217 | +wget -q https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-zipformer-en-2023-06-26.tar.bz2 | ||
| 218 | +tar xvf sherpa-onnx-zipformer-en-2023-06-26.tar.bz2 | ||
| 219 | +node ./test-vad-microphone-offline-transducer.js | ||
| 220 | +``` | ||
| 221 | + | ||
| 222 | +## ./test-vad-microphone-offline-whisper.js | ||
| 223 | + | ||
| 224 | +[./test-vad-microphone-offline-whisper.js](./test-vad-microphone-offline-whisper.js) | ||
| 225 | +demonstrates how to use [silero-vad](https://github.com/snakers4/silero-vad) | ||
| 226 | +with whisper for speech recognition from microphone. | ||
| 227 | + | ||
| 228 | +You can use the following command run it: | ||
| 229 | + | ||
| 230 | +```bash | ||
| 231 | +wget -q https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx | ||
| 232 | +wget -q https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-whisper-tiny.en.tar.bz2 | ||
| 233 | +tar xvf sherpa-onnx-whisper-tiny.en.tar.bz2 | ||
| 234 | +node ./test-vad-microphone-offline-whisper.js | ||
| 235 | +``` | ||
| 236 | + | ||
| 237 | +## ./test-vad-microphone.js | ||
| 238 | + | ||
| 239 | +[./test-vad-microphone.js](./test-vad-microphone.js) | ||
| 240 | +demonstrates how to use [silero-vad](https://github.com/snakers4/silero-vad). | ||
| 241 | + | ||
| 242 | +You can use the following command run it: | ||
| 243 | + | ||
| 244 | +```bash | ||
| 245 | +wget -q https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx | ||
| 246 | +node ./test-vad-microphone.js | ||
| 247 | +``` |
nodejs-examples/package.json
0 → 100644
nodejs-examples/test-offline-nemo-ctc.js
0 → 100644
| 1 | +// Copyright (c) 2023 Xiaomi Corporation (authors: Fangjun Kuang) | ||
| 2 | +// | ||
| 3 | +const fs = require('fs'); | ||
| 4 | +const {Readable} = require('stream'); | ||
| 5 | +const wav = require('wav'); | ||
| 6 | + | ||
| 7 | +const sherpa_onnx = require('sherpa-onnx'); | ||
| 8 | + | ||
| 9 | +function createRecognizer() { | ||
| 10 | + const featConfig = new sherpa_onnx.FeatureConfig(); | ||
| 11 | + featConfig.sampleRate = 16000; | ||
| 12 | + featConfig.featureDim = 80; | ||
| 13 | + | ||
| 14 | + // test online recognizer | ||
| 15 | + const nemoCtc = new sherpa_onnx.OfflineNemoEncDecCtcModelConfig(); | ||
| 16 | + nemoCtc.model = './sherpa-onnx-nemo-ctc-en-conformer-small/model.int8.onnx'; | ||
| 17 | + const tokens = './sherpa-onnx-nemo-ctc-en-conformer-small/tokens.txt'; | ||
| 18 | + | ||
| 19 | + const modelConfig = new sherpa_onnx.OfflineModelConfig(); | ||
| 20 | + modelConfig.nemoCtc = nemoCtc; | ||
| 21 | + modelConfig.tokens = tokens; | ||
| 22 | + modelConfig.modelType = 'nemo_ctc'; | ||
| 23 | + | ||
| 24 | + const recognizerConfig = new sherpa_onnx.OfflineRecognizerConfig(); | ||
| 25 | + recognizerConfig.featConfig = featConfig; | ||
| 26 | + recognizerConfig.modelConfig = modelConfig; | ||
| 27 | + recognizerConfig.decodingMethod = 'greedy_search'; | ||
| 28 | + | ||
| 29 | + const recognizer = new sherpa_onnx.OfflineRecognizer(recognizerConfig); | ||
| 30 | + return recognizer; | ||
| 31 | +} | ||
| 32 | + | ||
| 33 | +recognizer = createRecognizer(); | ||
| 34 | +stream = recognizer.createStream(); | ||
| 35 | + | ||
| 36 | +const waveFilename = | ||
| 37 | + './sherpa-onnx-nemo-ctc-en-conformer-small/test_wavs/0.wav'; | ||
| 38 | + | ||
| 39 | +const reader = new wav.Reader(); | ||
| 40 | +const readable = new Readable().wrap(reader); | ||
| 41 | +const buf = []; | ||
| 42 | + | ||
| 43 | +reader.on('format', ({audioFormat, bitDepth, channels, sampleRate}) => { | ||
| 44 | + if (sampleRate != recognizer.config.featConfig.sampleRate) { | ||
| 45 | + throw new Error(`Only support sampleRate ${ | ||
| 46 | + recognizer.config.featConfig.sampleRate}. Given ${sampleRate}`); | ||
| 47 | + } | ||
| 48 | + | ||
| 49 | + if (audioFormat != 1) { | ||
| 50 | + throw new Error(`Only support PCM format. Given ${audioFormat}`); | ||
| 51 | + } | ||
| 52 | + | ||
| 53 | + if (channels != 1) { | ||
| 54 | + throw new Error(`Only a single channel. Given ${channel}`); | ||
| 55 | + } | ||
| 56 | + | ||
| 57 | + if (bitDepth != 16) { | ||
| 58 | + throw new Error(`Only support 16-bit samples. Given ${bitDepth}`); | ||
| 59 | + } | ||
| 60 | +}); | ||
| 61 | + | ||
| 62 | +fs.createReadStream(waveFilename, {highWaterMark: 4096}) | ||
| 63 | + .pipe(reader) | ||
| 64 | + .on('finish', function(err) { | ||
| 65 | + // tail padding | ||
| 66 | + const floatSamples = | ||
| 67 | + new Float32Array(recognizer.config.featConfig.sampleRate * 0.5); | ||
| 68 | + | ||
| 69 | + buf.push(floatSamples); | ||
| 70 | + const flattened = | ||
| 71 | + Float32Array.from(buf.reduce((a, b) => [...a, ...b], [])); | ||
| 72 | + | ||
| 73 | + stream.acceptWaveform(recognizer.config.featConfig.sampleRate, flattened); | ||
| 74 | + recognizer.decode(stream); | ||
| 75 | + const r = recognizer.getResult(stream); | ||
| 76 | + console.log(r.text); | ||
| 77 | + | ||
| 78 | + stream.free(); | ||
| 79 | + recognizer.free(); | ||
| 80 | + }); | ||
| 81 | + | ||
| 82 | +readable.on('readable', function() { | ||
| 83 | + let chunk; | ||
| 84 | + while ((chunk = readable.read()) != null) { | ||
| 85 | + const int16Samples = new Int16Array( | ||
| 86 | + chunk.buffer, chunk.byteOffset, | ||
| 87 | + chunk.length / Int16Array.BYTES_PER_ELEMENT); | ||
| 88 | + | ||
| 89 | + const floatSamples = new Float32Array(int16Samples.length); | ||
| 90 | + | ||
| 91 | + for (let i = 0; i < floatSamples.length; i++) { | ||
| 92 | + floatSamples[i] = int16Samples[i] / 32768.0; | ||
| 93 | + } | ||
| 94 | + | ||
| 95 | + buf.push(floatSamples); | ||
| 96 | + } | ||
| 97 | +}); |
nodejs-examples/test-offline-paraformer.js
0 → 100644
| 1 | +// Copyright (c) 2023 Xiaomi Corporation (authors: Fangjun Kuang) | ||
| 2 | + | ||
| 3 | +const fs = require('fs'); | ||
| 4 | +const {Readable} = require('stream'); | ||
| 5 | +const wav = require('wav'); | ||
| 6 | + | ||
| 7 | +const sherpa_onnx = require('sherpa-onnx'); | ||
| 8 | + | ||
| 9 | +function createRecognizer() { | ||
| 10 | + const featConfig = new sherpa_onnx.FeatureConfig(); | ||
| 11 | + featConfig.sampleRate = 16000; | ||
| 12 | + featConfig.featureDim = 80; | ||
| 13 | + | ||
| 14 | + // test online recognizer | ||
| 15 | + const paraformer = new sherpa_onnx.OfflineParaformerModelConfig(); | ||
| 16 | + paraformer.model = './sherpa-onnx-paraformer-zh-2023-03-28/model.onnx'; | ||
| 17 | + const tokens = './sherpa-onnx-paraformer-zh-2023-03-28/tokens.txt'; | ||
| 18 | + | ||
| 19 | + const modelConfig = new sherpa_onnx.OfflineModelConfig(); | ||
| 20 | + modelConfig.paraformer = paraformer; | ||
| 21 | + modelConfig.tokens = tokens; | ||
| 22 | + modelConfig.modelType = 'paraformer'; | ||
| 23 | + | ||
| 24 | + const recognizerConfig = new sherpa_onnx.OfflineRecognizerConfig(); | ||
| 25 | + recognizerConfig.featConfig = featConfig; | ||
| 26 | + recognizerConfig.modelConfig = modelConfig; | ||
| 27 | + recognizerConfig.decodingMethod = 'greedy_search'; | ||
| 28 | + | ||
| 29 | + const recognizer = new sherpa_onnx.OfflineRecognizer(recognizerConfig); | ||
| 30 | + return recognizer; | ||
| 31 | +} | ||
| 32 | + | ||
| 33 | +recognizer = createRecognizer(); | ||
| 34 | +stream = recognizer.createStream(); | ||
| 35 | + | ||
| 36 | +const waveFilename = './sherpa-onnx-paraformer-zh-2023-03-28/test_wavs/0.wav'; | ||
| 37 | + | ||
| 38 | +const reader = new wav.Reader(); | ||
| 39 | +const readable = new Readable().wrap(reader); | ||
| 40 | +const buf = []; | ||
| 41 | + | ||
| 42 | +reader.on('format', ({audioFormat, bitDepth, channels, sampleRate}) => { | ||
| 43 | + if (sampleRate != recognizer.config.featConfig.sampleRate) { | ||
| 44 | + throw new Error(`Only support sampleRate ${ | ||
| 45 | + recognizer.config.featConfig.sampleRate}. Given ${sampleRate}`); | ||
| 46 | + } | ||
| 47 | + | ||
| 48 | + if (audioFormat != 1) { | ||
| 49 | + throw new Error(`Only support PCM format. Given ${audioFormat}`); | ||
| 50 | + } | ||
| 51 | + | ||
| 52 | + if (channels != 1) { | ||
| 53 | + throw new Error(`Only a single channel. Given ${channel}`); | ||
| 54 | + } | ||
| 55 | + | ||
| 56 | + if (bitDepth != 16) { | ||
| 57 | + throw new Error(`Only support 16-bit samples. Given ${bitDepth}`); | ||
| 58 | + } | ||
| 59 | +}); | ||
| 60 | + | ||
| 61 | +fs.createReadStream(waveFilename, {'highWaterMark': 4096}) | ||
| 62 | + .pipe(reader) | ||
| 63 | + .on('finish', function(err) { | ||
| 64 | + // tail padding | ||
| 65 | + const floatSamples = | ||
| 66 | + new Float32Array(recognizer.config.featConfig.sampleRate * 0.5); | ||
| 67 | + | ||
| 68 | + buf.push(floatSamples); | ||
| 69 | + const flattened = | ||
| 70 | + Float32Array.from(buf.reduce((a, b) => [...a, ...b], [])); | ||
| 71 | + | ||
| 72 | + stream.acceptWaveform(recognizer.config.featConfig.sampleRate, flattened); | ||
| 73 | + recognizer.decode(stream); | ||
| 74 | + const r = recognizer.getResult(stream); | ||
| 75 | + console.log(r.text); | ||
| 76 | + | ||
| 77 | + stream.free(); | ||
| 78 | + recognizer.free(); | ||
| 79 | + }); | ||
| 80 | + | ||
| 81 | +readable.on('readable', function() { | ||
| 82 | + let chunk; | ||
| 83 | + while ((chunk = readable.read()) != null) { | ||
| 84 | + const int16Samples = new Int16Array( | ||
| 85 | + chunk.buffer, chunk.byteOffset, | ||
| 86 | + chunk.length / Int16Array.BYTES_PER_ELEMENT); | ||
| 87 | + | ||
| 88 | + const floatSamples = new Float32Array(int16Samples.length); | ||
| 89 | + for (let i = 0; i < floatSamples.length; i++) { | ||
| 90 | + floatSamples[i] = int16Samples[i] / 32768.0; | ||
| 91 | + } | ||
| 92 | + | ||
| 93 | + buf.push(floatSamples); | ||
| 94 | + } | ||
| 95 | +}); |
nodejs-examples/test-offline-transducer.js
0 → 100644
| 1 | +// Copyright (c) 2023 Xiaomi Corporation (authors: Fangjun Kuang) | ||
| 2 | +// | ||
| 3 | +const fs = require('fs'); | ||
| 4 | +const {Readable} = require('stream'); | ||
| 5 | +const wav = require('wav'); | ||
| 6 | + | ||
| 7 | +const sherpa_onnx = require('sherpa-onnx'); | ||
| 8 | + | ||
| 9 | +function createRecognizer() { | ||
| 10 | + const featConfig = new sherpa_onnx.FeatureConfig(); | ||
| 11 | + featConfig.sampleRate = 16000; | ||
| 12 | + featConfig.featureDim = 80; | ||
| 13 | + | ||
| 14 | + // test online recognizer | ||
| 15 | + const transducer = new sherpa_onnx.OfflineTransducerModelConfig(); | ||
| 16 | + transducer.encoder = | ||
| 17 | + './sherpa-onnx-zipformer-en-2023-06-26/encoder-epoch-99-avg-1.onnx'; | ||
| 18 | + transducer.decoder = | ||
| 19 | + './sherpa-onnx-zipformer-en-2023-06-26/decoder-epoch-99-avg-1.onnx'; | ||
| 20 | + transducer.joiner = | ||
| 21 | + './sherpa-onnx-zipformer-en-2023-06-26/joiner-epoch-99-avg-1.onnx'; | ||
| 22 | + const tokens = './sherpa-onnx-zipformer-en-2023-06-26/tokens.txt'; | ||
| 23 | + | ||
| 24 | + const modelConfig = new sherpa_onnx.OfflineModelConfig(); | ||
| 25 | + modelConfig.transducer = transducer; | ||
| 26 | + modelConfig.tokens = tokens; | ||
| 27 | + modelConfig.modelType = 'transducer'; | ||
| 28 | + | ||
| 29 | + const recognizerConfig = new sherpa_onnx.OfflineRecognizerConfig(); | ||
| 30 | + recognizerConfig.featConfig = featConfig; | ||
| 31 | + recognizerConfig.modelConfig = modelConfig; | ||
| 32 | + recognizerConfig.decodingMethod = 'greedy_search'; | ||
| 33 | + | ||
| 34 | + const recognizer = new sherpa_onnx.OfflineRecognizer(recognizerConfig); | ||
| 35 | + return recognizer; | ||
| 36 | +} | ||
| 37 | + | ||
| 38 | +recognizer = createRecognizer(); | ||
| 39 | +stream = recognizer.createStream(); | ||
| 40 | + | ||
| 41 | +const waveFilename = './sherpa-onnx-zipformer-en-2023-06-26/test_wavs/0.wav'; | ||
| 42 | + | ||
| 43 | +const reader = new wav.Reader(); | ||
| 44 | +const readable = new Readable().wrap(reader); | ||
| 45 | +const buf = []; | ||
| 46 | + | ||
| 47 | +reader.on('format', ({audioFormat, bitDepth, channels, sampleRate}) => { | ||
| 48 | + if (sampleRate != recognizer.config.featConfig.sampleRate) { | ||
| 49 | + throw new Error(`Only support sampleRate ${ | ||
| 50 | + recognizer.config.featConfig.sampleRate}. Given ${sampleRate}`); | ||
| 51 | + } | ||
| 52 | + | ||
| 53 | + if (audioFormat != 1) { | ||
| 54 | + throw new Error(`Only support PCM format. Given ${audioFormat}`); | ||
| 55 | + } | ||
| 56 | + | ||
| 57 | + if (channels != 1) { | ||
| 58 | + throw new Error(`Only a single channel. Given ${channel}`); | ||
| 59 | + } | ||
| 60 | + | ||
| 61 | + if (bitDepth != 16) { | ||
| 62 | + throw new Error(`Only support 16-bit samples. Given ${bitDepth}`); | ||
| 63 | + } | ||
| 64 | +}); | ||
| 65 | + | ||
| 66 | +fs.createReadStream(waveFilename, {'highWaterMark': 4096}) | ||
| 67 | + .pipe(reader) | ||
| 68 | + .on('finish', function(err) { | ||
| 69 | + // tail padding | ||
| 70 | + const floatSamples = | ||
| 71 | + new Float32Array(recognizer.config.featConfig.sampleRate * 0.5); | ||
| 72 | + | ||
| 73 | + buf.push(floatSamples); | ||
| 74 | + const flattened = | ||
| 75 | + Float32Array.from(buf.reduce((a, b) => [...a, ...b], [])); | ||
| 76 | + | ||
| 77 | + stream.acceptWaveform(recognizer.config.featConfig.sampleRate, flattened); | ||
| 78 | + recognizer.decode(stream); | ||
| 79 | + const r = recognizer.getResult(stream); | ||
| 80 | + console.log(r.text); | ||
| 81 | + | ||
| 82 | + stream.free(); | ||
| 83 | + recognizer.free(); | ||
| 84 | + }); | ||
| 85 | + | ||
| 86 | +readable.on('readable', function() { | ||
| 87 | + let chunk; | ||
| 88 | + while ((chunk = readable.read()) != null) { | ||
| 89 | + const int16Samples = new Int16Array( | ||
| 90 | + chunk.buffer, chunk.byteOffset, | ||
| 91 | + chunk.length / Int16Array.BYTES_PER_ELEMENT); | ||
| 92 | + | ||
| 93 | + const floatSamples = new Float32Array(int16Samples.length); | ||
| 94 | + for (let i = 0; i < floatSamples.length; i++) { | ||
| 95 | + floatSamples[i] = int16Samples[i] / 32768.0; | ||
| 96 | + } | ||
| 97 | + | ||
| 98 | + buf.push(floatSamples); | ||
| 99 | + } | ||
| 100 | +}); |
nodejs-examples/test-offline-tts-en.js
0 → 100644
| 1 | +// Copyright (c) 2023 Xiaomi Corporation (authors: Fangjun Kuang) | ||
| 2 | + | ||
| 3 | +const sherpa_onnx = require('sherpa-onnx'); | ||
| 4 | + | ||
| 5 | +function createOfflineTts() { | ||
| 6 | + const vits = new sherpa_onnx.OfflineTtsVitsModelConfig(); | ||
| 7 | + vits.model = './vits-vctk/vits-vctk.onnx'; | ||
| 8 | + vits.lexicon = './vits-vctk/lexicon.txt'; | ||
| 9 | + vits.tokens = './vits-vctk/tokens.txt'; | ||
| 10 | + | ||
| 11 | + const modelConfig = new sherpa_onnx.OfflineTtsModelConfig(); | ||
| 12 | + modelConfig.vits = vits; | ||
| 13 | + | ||
| 14 | + const config = new sherpa_onnx.OfflineTtsConfig(); | ||
| 15 | + config.model = modelConfig; | ||
| 16 | + | ||
| 17 | + return new sherpa_onnx.OfflineTts(config); | ||
| 18 | +} | ||
| 19 | + | ||
| 20 | +const tts = createOfflineTts(); | ||
| 21 | +const speakerId = 99; | ||
| 22 | +const speed = 1.0; | ||
| 23 | +const audio = | ||
| 24 | + tts.generate('Good morning. How are you doing?', speakerId, speed); | ||
| 25 | +audio.save('./test-en.wav'); | ||
| 26 | +console.log('Saved to test-en.wav successfully.'); | ||
| 27 | +tts.free(); |
nodejs-examples/test-offline-tts-zh.js
0 → 100644
| 1 | +// Copyright (c) 2023 Xiaomi Corporation (authors: Fangjun Kuang) | ||
| 2 | + | ||
| 3 | +const sherpa_onnx = require('sherpa-onnx'); | ||
| 4 | + | ||
| 5 | +function createOfflineTts() { | ||
| 6 | + const vits = new sherpa_onnx.OfflineTtsVitsModelConfig(); | ||
| 7 | + vits.model = './vits-zh-aishell3/vits-aishell3.onnx'; | ||
| 8 | + vits.lexicon = './vits-zh-aishell3/lexicon.txt'; | ||
| 9 | + vits.tokens = './vits-zh-aishell3/tokens.txt'; | ||
| 10 | + | ||
| 11 | + const modelConfig = new sherpa_onnx.OfflineTtsModelConfig(); | ||
| 12 | + modelConfig.vits = vits; | ||
| 13 | + | ||
| 14 | + const config = new sherpa_onnx.OfflineTtsConfig(); | ||
| 15 | + config.model = modelConfig; | ||
| 16 | + config.ruleFsts = './vits-zh-aishell3/rule.fst'; | ||
| 17 | + | ||
| 18 | + return new sherpa_onnx.OfflineTts(config); | ||
| 19 | +} | ||
| 20 | + | ||
| 21 | +const tts = createOfflineTts(); | ||
| 22 | +const speakerId = 66; | ||
| 23 | +const speed = 1.0; | ||
| 24 | +const audio = tts.generate('3年前中国总人口是1411778724人', speakerId, speed); | ||
| 25 | +audio.save('./test-zh.wav'); | ||
| 26 | +console.log('Saved to test-zh.wav successfully.'); | ||
| 27 | +tts.free(); |
nodejs-examples/test-offline-whisper.js
0 → 100644
| 1 | +// Copyright (c) 2023 Xiaomi Corporation (authors: Fangjun Kuang) | ||
| 2 | +// | ||
| 3 | +const fs = require('fs'); | ||
| 4 | +const {Readable} = require('stream'); | ||
| 5 | +const wav = require('wav'); | ||
| 6 | + | ||
| 7 | +const sherpa_onnx = require('sherpa-onnx'); | ||
| 8 | + | ||
| 9 | +function createRecognizer() { | ||
| 10 | + const featConfig = new sherpa_onnx.FeatureConfig(); | ||
| 11 | + featConfig.sampleRate = 16000; | ||
| 12 | + featConfig.featureDim = 80; | ||
| 13 | + | ||
| 14 | + // test online recognizer | ||
| 15 | + const whisper = new sherpa_onnx.OfflineWhisperModelConfig(); | ||
| 16 | + whisper.encoder = './sherpa-onnx-whisper-tiny.en/tiny.en-encoder.int8.onnx'; | ||
| 17 | + whisper.decoder = './sherpa-onnx-whisper-tiny.en/tiny.en-decoder.int8.onnx'; | ||
| 18 | + const tokens = './sherpa-onnx-whisper-tiny.en/tiny.en-tokens.txt'; | ||
| 19 | + | ||
| 20 | + const modelConfig = new sherpa_onnx.OfflineModelConfig(); | ||
| 21 | + modelConfig.whisper = whisper; | ||
| 22 | + modelConfig.tokens = tokens; | ||
| 23 | + modelConfig.modelType = 'whisper'; | ||
| 24 | + | ||
| 25 | + const recognizerConfig = new sherpa_onnx.OfflineRecognizerConfig(); | ||
| 26 | + recognizerConfig.featConfig = featConfig; | ||
| 27 | + recognizerConfig.modelConfig = modelConfig; | ||
| 28 | + recognizerConfig.decodingMethod = 'greedy_search'; | ||
| 29 | + | ||
| 30 | + const recognizer = new sherpa_onnx.OfflineRecognizer(recognizerConfig); | ||
| 31 | + return recognizer; | ||
| 32 | +} | ||
| 33 | + | ||
| 34 | +recognizer = createRecognizer(); | ||
| 35 | +stream = recognizer.createStream(); | ||
| 36 | + | ||
| 37 | +const waveFilename = './sherpa-onnx-whisper-tiny.en/test_wavs/0.wav'; | ||
| 38 | + | ||
| 39 | +const reader = new wav.Reader(); | ||
| 40 | +const readable = new Readable().wrap(reader); | ||
| 41 | +const buf = []; | ||
| 42 | + | ||
| 43 | +reader.on('format', ({audioFormat, bitDepth, channels, sampleRate}) => { | ||
| 44 | + if (sampleRate != recognizer.config.featConfig.sampleRate) { | ||
| 45 | + throw new Error(`Only support sampleRate ${ | ||
| 46 | + recognizer.config.featConfig.sampleRate}. Given ${sampleRate}`); | ||
| 47 | + } | ||
| 48 | + | ||
| 49 | + if (audioFormat != 1) { | ||
| 50 | + throw new Error(`Only support PCM format. Given ${audioFormat}`); | ||
| 51 | + } | ||
| 52 | + | ||
| 53 | + if (channels != 1) { | ||
| 54 | + throw new Error(`Only a single channel. Given ${channel}`); | ||
| 55 | + } | ||
| 56 | + | ||
| 57 | + if (bitDepth != 16) { | ||
| 58 | + throw new Error(`Only support 16-bit samples. Given ${bitDepth}`); | ||
| 59 | + } | ||
| 60 | +}); | ||
| 61 | + | ||
| 62 | +fs.createReadStream(waveFilename, {'highWaterMark': 4096}) | ||
| 63 | + .pipe(reader) | ||
| 64 | + .on('finish', function(err) { | ||
| 65 | + // tail padding | ||
| 66 | + const floatSamples = | ||
| 67 | + new Float32Array(recognizer.config.featConfig.sampleRate * 0.5); | ||
| 68 | + | ||
| 69 | + buf.push(floatSamples); | ||
| 70 | + const flattened = | ||
| 71 | + Float32Array.from(buf.reduce((a, b) => [...a, ...b], [])); | ||
| 72 | + | ||
| 73 | + stream.acceptWaveform(recognizer.config.featConfig.sampleRate, flattened); | ||
| 74 | + recognizer.decode(stream); | ||
| 75 | + const r = recognizer.getResult(stream); | ||
| 76 | + console.log(r.text); | ||
| 77 | + | ||
| 78 | + stream.free(); | ||
| 79 | + recognizer.free(); | ||
| 80 | + }); | ||
| 81 | + | ||
| 82 | +readable.on('readable', function() { | ||
| 83 | + let chunk; | ||
| 84 | + while ((chunk = readable.read()) != null) { | ||
| 85 | + const int16Samples = new Int16Array( | ||
| 86 | + chunk.buffer, chunk.byteOffset, | ||
| 87 | + chunk.length / Int16Array.BYTES_PER_ELEMENT); | ||
| 88 | + | ||
| 89 | + const floatSamples = new Float32Array(int16Samples.length); | ||
| 90 | + | ||
| 91 | + for (let i = 0; i < floatSamples.length; i++) { | ||
| 92 | + floatSamples[i] = int16Samples[i] / 32768.0; | ||
| 93 | + } | ||
| 94 | + | ||
| 95 | + buf.push(floatSamples); | ||
| 96 | + } | ||
| 97 | +}); |
| 1 | +// Copyright (c) 2023 Xiaomi Corporation (authors: Fangjun Kuang) | ||
| 2 | +// | ||
| 3 | +const portAudio = require('naudiodon2'); | ||
| 4 | +console.log(portAudio.getDevices()); | ||
| 5 | + | ||
| 6 | +const sherpa_onnx = require('sherpa-onnx'); | ||
| 7 | + | ||
| 8 | +function createRecognizer() { | ||
| 9 | + const featConfig = new sherpa_onnx.FeatureConfig(); | ||
| 10 | + featConfig.sampleRate = 16000; | ||
| 11 | + featConfig.featureDim = 80; | ||
| 12 | + | ||
| 13 | + const paraformer = new sherpa_onnx.OnlineParaformerModelConfig(); | ||
| 14 | + paraformer.encoder = | ||
| 15 | + './sherpa-onnx-streaming-paraformer-bilingual-zh-en/encoder.int8.onnx'; | ||
| 16 | + paraformer.decoder = | ||
| 17 | + './sherpa-onnx-streaming-paraformer-bilingual-zh-en/decoder.int8.onnx'; | ||
| 18 | + const tokens = | ||
| 19 | + './sherpa-onnx-streaming-paraformer-bilingual-zh-en/tokens.txt'; | ||
| 20 | + | ||
| 21 | + const modelConfig = new sherpa_onnx.OnlineModelConfig(); | ||
| 22 | + modelConfig.paraformer = paraformer; | ||
| 23 | + modelConfig.tokens = tokens; | ||
| 24 | + modelConfig.modelType = 'paraformer'; | ||
| 25 | + | ||
| 26 | + const recognizerConfig = new sherpa_onnx.OnlineRecognizerConfig(); | ||
| 27 | + recognizerConfig.featConfig = featConfig; | ||
| 28 | + recognizerConfig.modelConfig = modelConfig; | ||
| 29 | + recognizerConfig.decodingMethod = 'greedy_search'; | ||
| 30 | + recognizerConfig.enableEndpoint = 1; | ||
| 31 | + | ||
| 32 | + const recognizer = new sherpa_onnx.OnlineRecognizer(recognizerConfig); | ||
| 33 | + return recognizer; | ||
| 34 | +} | ||
| 35 | +recognizer = createRecognizer(); | ||
| 36 | +stream = recognizer.createStream(); | ||
| 37 | + | ||
| 38 | +display = new sherpa_onnx.Display(50); | ||
| 39 | + | ||
| 40 | +let lastText = ''; | ||
| 41 | +let segmentIndex = 0; | ||
| 42 | + | ||
| 43 | +const ai = new portAudio.AudioIO({ | ||
| 44 | + inOptions: { | ||
| 45 | + channelCount: 1, | ||
| 46 | + closeOnError: true, // Close the stream if an audio error is detected, if | ||
| 47 | + // set false then just log the error | ||
| 48 | + deviceId: -1, // Use -1 or omit the deviceId to select the default device | ||
| 49 | + sampleFormat: portAudio.SampleFormatFloat32, | ||
| 50 | + sampleRate: recognizer.config.featConfig.sampleRate | ||
| 51 | + } | ||
| 52 | +}); | ||
| 53 | + | ||
| 54 | +ai.on('data', data => { | ||
| 55 | + const samples = new Float32Array(data.buffer); | ||
| 56 | + | ||
| 57 | + stream.acceptWaveform(recognizer.config.featConfig.sampleRate, samples); | ||
| 58 | + | ||
| 59 | + while (recognizer.isReady(stream)) { | ||
| 60 | + recognizer.decode(stream); | ||
| 61 | + } | ||
| 62 | + | ||
| 63 | + const isEndpoint = recognizer.isEndpoint(stream); | ||
| 64 | + const text = recognizer.getResult(stream).text; | ||
| 65 | + | ||
| 66 | + if (text.length > 0 && lastText != text) { | ||
| 67 | + lastText = text; | ||
| 68 | + display.print(segmentIndex, lastText); | ||
| 69 | + } | ||
| 70 | + if (isEndpoint) { | ||
| 71 | + if (text.length > 0) { | ||
| 72 | + lastText = text; | ||
| 73 | + segmentIndex += 1; | ||
| 74 | + } | ||
| 75 | + recognizer.reset(stream) | ||
| 76 | + } | ||
| 77 | +}); | ||
| 78 | + | ||
| 79 | +ai.on('close', () => { | ||
| 80 | + console.log('Free resources'); | ||
| 81 | + stream.free(); | ||
| 82 | + recognizer.free(); | ||
| 83 | +}); | ||
| 84 | + | ||
| 85 | +ai.start(); | ||
| 86 | +console.log('Started! Please speak') |
nodejs-examples/test-online-paraformer.js
0 → 100644
| 1 | +// Copyright (c) 2023 Xiaomi Corporation (authors: Fangjun Kuang) | ||
| 2 | +// | ||
| 3 | +const fs = require('fs'); | ||
| 4 | +const {Readable} = require('stream'); | ||
| 5 | +const wav = require('wav'); | ||
| 6 | + | ||
| 7 | +const sherpa_onnx = require('sherpa-onnx'); | ||
| 8 | + | ||
| 9 | +function createRecognizer() { | ||
| 10 | + const featConfig = new sherpa_onnx.FeatureConfig(); | ||
| 11 | + featConfig.sampleRate = 16000; | ||
| 12 | + featConfig.featureDim = 80; | ||
| 13 | + | ||
| 14 | + const paraformer = new sherpa_onnx.OnlineParaformerModelConfig(); | ||
| 15 | + paraformer.encoder = | ||
| 16 | + './sherpa-onnx-streaming-paraformer-bilingual-zh-en/encoder.onnx'; | ||
| 17 | + paraformer.decoder = | ||
| 18 | + './sherpa-onnx-streaming-paraformer-bilingual-zh-en/decoder.onnx'; | ||
| 19 | + const tokens = | ||
| 20 | + './sherpa-onnx-streaming-paraformer-bilingual-zh-en/tokens.txt'; | ||
| 21 | + | ||
| 22 | + const modelConfig = new sherpa_onnx.OnlineModelConfig(); | ||
| 23 | + modelConfig.paraformer = paraformer; | ||
| 24 | + modelConfig.tokens = tokens; | ||
| 25 | + modelConfig.modelType = 'paraformer'; | ||
| 26 | + | ||
| 27 | + const recognizerConfig = new sherpa_onnx.OnlineRecognizerConfig(); | ||
| 28 | + recognizerConfig.featConfig = featConfig; | ||
| 29 | + recognizerConfig.modelConfig = modelConfig; | ||
| 30 | + recognizerConfig.decodingMethod = 'greedy_search'; | ||
| 31 | + | ||
| 32 | + const recognizer = new sherpa_onnx.OnlineRecognizer(recognizerConfig); | ||
| 33 | + return recognizer; | ||
| 34 | +} | ||
| 35 | +recognizer = createRecognizer(); | ||
| 36 | +stream = recognizer.createStream(); | ||
| 37 | + | ||
| 38 | +const waveFilename = | ||
| 39 | + './sherpa-onnx-streaming-paraformer-bilingual-zh-en/test_wavs/0.wav'; | ||
| 40 | + | ||
| 41 | +const reader = new wav.Reader(); | ||
| 42 | +const readable = new Readable().wrap(reader); | ||
| 43 | + | ||
| 44 | +function decode(samples) { | ||
| 45 | + stream.acceptWaveform(recognizer.config.featConfig.sampleRate, samples); | ||
| 46 | + | ||
| 47 | + while (recognizer.isReady(stream)) { | ||
| 48 | + recognizer.decode(stream); | ||
| 49 | + } | ||
| 50 | + const r = recognizer.getResult(stream); | ||
| 51 | + console.log(r.text); | ||
| 52 | +} | ||
| 53 | + | ||
| 54 | +reader.on('format', ({audioFormat, bitDepth, channels, sampleRate}) => { | ||
| 55 | + if (sampleRate != recognizer.config.featConfig.sampleRate) { | ||
| 56 | + throw new Error(`Only support sampleRate ${ | ||
| 57 | + recognizer.config.featConfig.sampleRate}. Given ${sampleRate}`); | ||
| 58 | + } | ||
| 59 | + | ||
| 60 | + if (audioFormat != 1) { | ||
| 61 | + throw new Error(`Only support PCM format. Given ${audioFormat}`); | ||
| 62 | + } | ||
| 63 | + | ||
| 64 | + if (channels != 1) { | ||
| 65 | + throw new Error(`Only a single channel. Given ${channel}`); | ||
| 66 | + } | ||
| 67 | + | ||
| 68 | + if (bitDepth != 16) { | ||
| 69 | + throw new Error(`Only support 16-bit samples. Given ${bitDepth}`); | ||
| 70 | + } | ||
| 71 | +}); | ||
| 72 | + | ||
| 73 | +fs.createReadStream(waveFilename, {'highWaterMark': 4096}) | ||
| 74 | + .pipe(reader) | ||
| 75 | + .on('finish', function(err) { | ||
| 76 | + // tail padding | ||
| 77 | + const floatSamples = | ||
| 78 | + new Float32Array(recognizer.config.featConfig.sampleRate * 0.5); | ||
| 79 | + decode(floatSamples); | ||
| 80 | + stream.free(); | ||
| 81 | + recognizer.free(); | ||
| 82 | + }); | ||
| 83 | + | ||
| 84 | +readable.on('readable', function() { | ||
| 85 | + let chunk; | ||
| 86 | + while ((chunk = readable.read()) != null) { | ||
| 87 | + const int16Samples = new Int16Array( | ||
| 88 | + chunk.buffer, chunk.byteOffset, | ||
| 89 | + chunk.length / Int16Array.BYTES_PER_ELEMENT); | ||
| 90 | + | ||
| 91 | + const floatSamples = new Float32Array(int16Samples.length); | ||
| 92 | + | ||
| 93 | + for (let i = 0; i < floatSamples.length; i++) { | ||
| 94 | + floatSamples[i] = int16Samples[i] / 32768.0; | ||
| 95 | + } | ||
| 96 | + | ||
| 97 | + decode(floatSamples); | ||
| 98 | + } | ||
| 99 | +}); |
| 1 | +// Copyright (c) 2023 Xiaomi Corporation (authors: Fangjun Kuang) | ||
| 2 | +// | ||
| 3 | +const portAudio = require('naudiodon2'); | ||
| 4 | +// console.log(portAudio.getDevices()); | ||
| 5 | + | ||
| 6 | +const sherpa_onnx = require('sherpa-onnx'); | ||
| 7 | + | ||
| 8 | +function createRecognizer() { | ||
| 9 | + const featConfig = new sherpa_onnx.FeatureConfig(); | ||
| 10 | + featConfig.sampleRate = 16000; | ||
| 11 | + featConfig.featureDim = 80; | ||
| 12 | + | ||
| 13 | + // test online recognizer | ||
| 14 | + const transducer = new sherpa_onnx.OnlineTransducerModelConfig(); | ||
| 15 | + transducer.encoder = | ||
| 16 | + './sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/encoder-epoch-99-avg-1.int8.onnx'; | ||
| 17 | + transducer.decoder = | ||
| 18 | + './sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/decoder-epoch-99-avg-1.onnx'; | ||
| 19 | + transducer.joiner = | ||
| 20 | + './sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/joiner-epoch-99-avg-1.onnx'; | ||
| 21 | + const tokens = | ||
| 22 | + './sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/tokens.txt'; | ||
| 23 | + | ||
| 24 | + const modelConfig = new sherpa_onnx.OnlineModelConfig(); | ||
| 25 | + modelConfig.transducer = transducer; | ||
| 26 | + modelConfig.tokens = tokens; | ||
| 27 | + modelConfig.modelType = 'zipformer'; | ||
| 28 | + | ||
| 29 | + const recognizerConfig = new sherpa_onnx.OnlineRecognizerConfig(); | ||
| 30 | + recognizerConfig.featConfig = featConfig; | ||
| 31 | + recognizerConfig.modelConfig = modelConfig; | ||
| 32 | + recognizerConfig.decodingMethod = 'greedy_search'; | ||
| 33 | + recognizerConfig.enableEndpoint = 1; | ||
| 34 | + | ||
| 35 | + const recognizer = new sherpa_onnx.OnlineRecognizer(recognizerConfig); | ||
| 36 | + return recognizer; | ||
| 37 | +} | ||
| 38 | +recognizer = createRecognizer(); | ||
| 39 | +stream = recognizer.createStream(); | ||
| 40 | +display = new sherpa_onnx.Display(50); | ||
| 41 | + | ||
| 42 | +let lastText = ''; | ||
| 43 | +let segmentIndex = 0; | ||
| 44 | + | ||
| 45 | +const ai = new portAudio.AudioIO({ | ||
| 46 | + inOptions: { | ||
| 47 | + channelCount: 1, | ||
| 48 | + closeOnError: true, // Close the stream if an audio error is detected, if | ||
| 49 | + // set false then just log the error | ||
| 50 | + deviceId: -1, // Use -1 or omit the deviceId to select the default device | ||
| 51 | + sampleFormat: portAudio.SampleFormatFloat32, | ||
| 52 | + sampleRate: recognizer.config.featConfig.sampleRate | ||
| 53 | + } | ||
| 54 | +}); | ||
| 55 | + | ||
| 56 | +ai.on('data', data => { | ||
| 57 | + const samples = new Float32Array(data.buffer); | ||
| 58 | + | ||
| 59 | + stream.acceptWaveform(recognizer.config.featConfig.sampleRate, samples); | ||
| 60 | + | ||
| 61 | + while (recognizer.isReady(stream)) { | ||
| 62 | + recognizer.decode(stream); | ||
| 63 | + } | ||
| 64 | + | ||
| 65 | + const isEndpoint = recognizer.isEndpoint(stream); | ||
| 66 | + const text = recognizer.getResult(stream).text; | ||
| 67 | + | ||
| 68 | + if (text.length > 0 && lastText != text) { | ||
| 69 | + lastText = text; | ||
| 70 | + display.print(segmentIndex, lastText); | ||
| 71 | + } | ||
| 72 | + if (isEndpoint) { | ||
| 73 | + if (text.length > 0) { | ||
| 74 | + lastText = text; | ||
| 75 | + segmentIndex += 1; | ||
| 76 | + } | ||
| 77 | + recognizer.reset(stream) | ||
| 78 | + } | ||
| 79 | +}); | ||
| 80 | + | ||
| 81 | +ai.on('close', () => { | ||
| 82 | + console.log('Free resources'); | ||
| 83 | + stream.free(); | ||
| 84 | + recognizer.free(); | ||
| 85 | +}); | ||
| 86 | + | ||
| 87 | +ai.start(); | ||
| 88 | +console.log('Started! Please speak') |
nodejs-examples/test-online-transducer.js
0 → 100644
| 1 | +// Copyright (c) 2023 Xiaomi Corporation (authors: Fangjun Kuang) | ||
| 2 | +// | ||
| 3 | +const fs = require('fs'); | ||
| 4 | +const {Readable} = require('stream'); | ||
| 5 | +const wav = require('wav'); | ||
| 6 | + | ||
| 7 | +const sherpa_onnx = require('sherpa-onnx'); | ||
| 8 | + | ||
| 9 | +function createRecognizer() { | ||
| 10 | + const featConfig = new sherpa_onnx.FeatureConfig(); | ||
| 11 | + featConfig.sampleRate = 16000; | ||
| 12 | + featConfig.featureDim = 80; | ||
| 13 | + | ||
| 14 | + // test online recognizer | ||
| 15 | + const transducer = new sherpa_onnx.OnlineTransducerModelConfig(); | ||
| 16 | + transducer.encoder = | ||
| 17 | + './sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/encoder-epoch-99-avg-1.int8.onnx'; | ||
| 18 | + transducer.decoder = | ||
| 19 | + './sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/decoder-epoch-99-avg-1.onnx'; | ||
| 20 | + transducer.joiner = | ||
| 21 | + './sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/joiner-epoch-99-avg-1.onnx'; | ||
| 22 | + const tokens = | ||
| 23 | + './sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/tokens.txt'; | ||
| 24 | + | ||
| 25 | + const modelConfig = new sherpa_onnx.OnlineModelConfig(); | ||
| 26 | + modelConfig.transducer = transducer; | ||
| 27 | + modelConfig.tokens = tokens; | ||
| 28 | + modelConfig.modelType = 'zipformer'; | ||
| 29 | + | ||
| 30 | + const recognizerConfig = new sherpa_onnx.OnlineRecognizerConfig(); | ||
| 31 | + recognizerConfig.featConfig = featConfig; | ||
| 32 | + recognizerConfig.modelConfig = modelConfig; | ||
| 33 | + recognizerConfig.decodingMethod = 'greedy_search'; | ||
| 34 | + | ||
| 35 | + recognizer = new sherpa_onnx.OnlineRecognizer(recognizerConfig); | ||
| 36 | + return recognizer; | ||
| 37 | +} | ||
| 38 | +recognizer = createRecognizer(); | ||
| 39 | +stream = recognizer.createStream(); | ||
| 40 | + | ||
| 41 | +const waveFilename = | ||
| 42 | + './sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/test_wavs/0.wav'; | ||
| 43 | + | ||
| 44 | +const reader = new wav.Reader(); | ||
| 45 | +const readable = new Readable().wrap(reader); | ||
| 46 | + | ||
| 47 | +function decode(samples) { | ||
| 48 | + stream.acceptWaveform(recognizer.config.featConfig.sampleRate, samples); | ||
| 49 | + | ||
| 50 | + while (recognizer.isReady(stream)) { | ||
| 51 | + recognizer.decode(stream); | ||
| 52 | + } | ||
| 53 | + const r = recognizer.getResult(stream); | ||
| 54 | + console.log(r.text); | ||
| 55 | +} | ||
| 56 | + | ||
| 57 | +reader.on('format', ({audioFormat, bitDepth, channels, sampleRate}) => { | ||
| 58 | + if (sampleRate != recognizer.config.featConfig.sampleRate) { | ||
| 59 | + throw new Error(`Only support sampleRate ${ | ||
| 60 | + recognizer.config.featConfig.sampleRate}. Given ${sampleRate}`); | ||
| 61 | + } | ||
| 62 | + | ||
| 63 | + if (audioFormat != 1) { | ||
| 64 | + throw new Error(`Only support PCM format. Given ${audioFormat}`); | ||
| 65 | + } | ||
| 66 | + | ||
| 67 | + if (channels != 1) { | ||
| 68 | + throw new Error(`Only a single channel. Given ${channel}`); | ||
| 69 | + } | ||
| 70 | + | ||
| 71 | + if (bitDepth != 16) { | ||
| 72 | + throw new Error(`Only support 16-bit samples. Given ${bitDepth}`); | ||
| 73 | + } | ||
| 74 | +}); | ||
| 75 | + | ||
| 76 | +fs.createReadStream(waveFilename, {'highWaterMark': 4096}) | ||
| 77 | + .pipe(reader) | ||
| 78 | + .on('finish', function(err) { | ||
| 79 | + // tail padding | ||
| 80 | + const floatSamples = | ||
| 81 | + new Float32Array(recognizer.config.featConfig.sampleRate * 0.5); | ||
| 82 | + decode(floatSamples); | ||
| 83 | + stream.free(); | ||
| 84 | + recognizer.free(); | ||
| 85 | + }); | ||
| 86 | + | ||
| 87 | +readable.on('readable', function() { | ||
| 88 | + let chunk; | ||
| 89 | + while ((chunk = readable.read()) != null) { | ||
| 90 | + const int16Samples = new Int16Array( | ||
| 91 | + chunk.buffer, chunk.byteOffset, | ||
| 92 | + chunk.length / Int16Array.BYTES_PER_ELEMENT); | ||
| 93 | + | ||
| 94 | + const floatSamples = new Float32Array(int16Samples.length); | ||
| 95 | + | ||
| 96 | + for (let i = 0; i < floatSamples.length; i++) { | ||
| 97 | + floatSamples[i] = int16Samples[i] / 32768.0; | ||
| 98 | + } | ||
| 99 | + | ||
| 100 | + decode(floatSamples); | ||
| 101 | + } | ||
| 102 | +}); |
| 1 | +// Copyright (c) 2023 Xiaomi Corporation (authors: Fangjun Kuang) | ||
| 2 | +// | ||
| 3 | +const sherpa_onnx = require('sherpa-onnx3'); | ||
| 4 | +const portAudio = require('naudiodon2'); | ||
| 5 | +console.log(portAudio.getDevices()); | ||
| 6 | + | ||
| 7 | +function createOfflineRecognizer() { | ||
| 8 | + const featConfig = new sherpa_onnx.FeatureConfig(); | ||
| 9 | + featConfig.sampleRate = 16000; | ||
| 10 | + featConfig.featureDim = 80; | ||
| 11 | + | ||
| 12 | + // test online recognizer | ||
| 13 | + const paraformer = new sherpa_onnx.OfflineParaformerModelConfig(); | ||
| 14 | + paraformer.model = './sherpa-onnx-paraformer-zh-2023-03-28/model.int8.onnx'; | ||
| 15 | + const tokens = './sherpa-onnx-paraformer-zh-2023-03-28/tokens.txt'; | ||
| 16 | + | ||
| 17 | + const modelConfig = new sherpa_onnx.OfflineModelConfig(); | ||
| 18 | + modelConfig.paraformer = paraformer; | ||
| 19 | + modelConfig.tokens = tokens; | ||
| 20 | + modelConfig.modelType = 'paraformer'; | ||
| 21 | + | ||
| 22 | + const recognizerConfig = new sherpa_onnx.OfflineRecognizerConfig(); | ||
| 23 | + recognizerConfig.featConfig = featConfig; | ||
| 24 | + recognizerConfig.modelConfig = modelConfig; | ||
| 25 | + recognizerConfig.decodingMethod = 'greedy_search'; | ||
| 26 | + | ||
| 27 | + const recognizer = new sherpa_onnx.OfflineRecognizer(recognizerConfig); | ||
| 28 | + return recognizer | ||
| 29 | +} | ||
| 30 | + | ||
| 31 | +function createVad() { | ||
| 32 | + const sileroVadModelConfig = new sherpa_onnx.SileroVadModelConfig(); | ||
| 33 | + sileroVadModelConfig.model = './silero_vad.onnx'; | ||
| 34 | + sileroVadModelConfig.minSpeechDuration = 0.3; // seconds | ||
| 35 | + sileroVadModelConfig.minSilenceDuration = 0.3; // seconds | ||
| 36 | + sileroVadModelConfig.windowSize = 512; | ||
| 37 | + | ||
| 38 | + const vadModelConfig = new sherpa_onnx.VadModelConfig(); | ||
| 39 | + vadModelConfig.sileroVad = sileroVadModelConfig; | ||
| 40 | + vadModelConfig.sampleRate = 16000; | ||
| 41 | + | ||
| 42 | + const bufferSizeInSeconds = 60; | ||
| 43 | + const vad = new sherpa_onnx.VoiceActivityDetector( | ||
| 44 | + vadModelConfig, bufferSizeInSeconds); | ||
| 45 | + return vad; | ||
| 46 | +} | ||
| 47 | + | ||
| 48 | +const recognizer = createOfflineRecognizer(); | ||
| 49 | +const vad = createVad(); | ||
| 50 | + | ||
| 51 | +const bufferSizeInSeconds = 30; | ||
| 52 | +const buffer = | ||
| 53 | + new sherpa_onnx.CircularBuffer(bufferSizeInSeconds * vad.config.sampleRate); | ||
| 54 | + | ||
| 55 | +var ai = new portAudio.AudioIO({ | ||
| 56 | + inOptions: { | ||
| 57 | + channelCount: 1, | ||
| 58 | + sampleFormat: portAudio.SampleFormatFloat32, | ||
| 59 | + sampleRate: vad.config.sampleRate, | ||
| 60 | + deviceId: -1, // Use -1 or omit the deviceId to select the default device | ||
| 61 | + closeOnError: true // Close the stream if an audio error is detected, if | ||
| 62 | + // set false then just log the error | ||
| 63 | + } | ||
| 64 | +}); | ||
| 65 | + | ||
| 66 | +let printed = false; | ||
| 67 | +let index = 0; | ||
| 68 | +ai.on('data', data => { | ||
| 69 | + const windowSize = vad.config.sileroVad.windowSize; | ||
| 70 | + buffer.push(new Float32Array(data.buffer)); | ||
| 71 | + while (buffer.size() > windowSize) { | ||
| 72 | + const samples = buffer.get(buffer.head(), windowSize); | ||
| 73 | + buffer.pop(windowSize); | ||
| 74 | + vad.acceptWaveform(samples) | ||
| 75 | + } | ||
| 76 | + | ||
| 77 | + while (!vad.isEmpty()) { | ||
| 78 | + const segment = vad.front(); | ||
| 79 | + vad.pop(); | ||
| 80 | + const stream = recognizer.createStream(); | ||
| 81 | + stream.acceptWaveform( | ||
| 82 | + recognizer.config.featConfig.sampleRate, segment.samples); | ||
| 83 | + recognizer.decode(stream); | ||
| 84 | + const r = recognizer.getResult(stream); | ||
| 85 | + stream.free(); | ||
| 86 | + if (r.text.length > 0) { | ||
| 87 | + console.log(`${index}: ${r.text}`); | ||
| 88 | + index += 1; | ||
| 89 | + } | ||
| 90 | + } | ||
| 91 | +}); | ||
| 92 | + | ||
| 93 | +ai.on('close', () => { | ||
| 94 | + console.log('Free resources'); | ||
| 95 | + recognizer.free(); | ||
| 96 | + vad.free(); | ||
| 97 | + buffer.free(); | ||
| 98 | +}); | ||
| 99 | + | ||
| 100 | +ai.start(); | ||
| 101 | +console.log('Started! Please speak') |
| 1 | +// Copyright (c) 2023 Xiaomi Corporation (authors: Fangjun Kuang) | ||
| 2 | +// | ||
| 3 | +const sherpa_onnx = require('sherpa-onnx'); | ||
| 4 | +const portAudio = require('naudiodon2'); | ||
| 5 | +console.log(portAudio.getDevices()); | ||
| 6 | + | ||
| 7 | +function createOfflineRecognizer() { | ||
| 8 | + const featConfig = new sherpa_onnx.FeatureConfig(); | ||
| 9 | + featConfig.sampleRate = 16000; | ||
| 10 | + featConfig.featureDim = 80; | ||
| 11 | + | ||
| 12 | + // test online recognizer | ||
| 13 | + const transducer = new sherpa_onnx.OfflineTransducerModelConfig(); | ||
| 14 | + transducer.encoder = | ||
| 15 | + './sherpa-onnx-zipformer-en-2023-06-26/encoder-epoch-99-avg-1.onnx'; | ||
| 16 | + transducer.decoder = | ||
| 17 | + './sherpa-onnx-zipformer-en-2023-06-26/decoder-epoch-99-avg-1.onnx'; | ||
| 18 | + transducer.joiner = | ||
| 19 | + './sherpa-onnx-zipformer-en-2023-06-26/joiner-epoch-99-avg-1.onnx'; | ||
| 20 | + const tokens = './sherpa-onnx-zipformer-en-2023-06-26/tokens.txt'; | ||
| 21 | + | ||
| 22 | + const modelConfig = new sherpa_onnx.OfflineModelConfig(); | ||
| 23 | + modelConfig.transducer = transducer; | ||
| 24 | + modelConfig.tokens = tokens; | ||
| 25 | + modelConfig.modelType = 'transducer'; | ||
| 26 | + | ||
| 27 | + const recognizerConfig = new sherpa_onnx.OfflineRecognizerConfig(); | ||
| 28 | + recognizerConfig.featConfig = featConfig; | ||
| 29 | + recognizerConfig.modelConfig = modelConfig; | ||
| 30 | + recognizerConfig.decodingMethod = 'greedy_search'; | ||
| 31 | + | ||
| 32 | + const recognizer = new sherpa_onnx.OfflineRecognizer(recognizerConfig); | ||
| 33 | + return recognizer; | ||
| 34 | +} | ||
| 35 | + | ||
| 36 | +function createVad() { | ||
| 37 | + const sileroVadModelConfig = new sherpa_onnx.SileroVadModelConfig(); | ||
| 38 | + sileroVadModelConfig.model = './silero_vad.onnx'; | ||
| 39 | + sileroVadModelConfig.minSpeechDuration = 0.3; // seconds | ||
| 40 | + sileroVadModelConfig.minSilenceDuration = 0.3; // seconds | ||
| 41 | + sileroVadModelConfig.windowSize = 512; | ||
| 42 | + | ||
| 43 | + const vadModelConfig = new sherpa_onnx.VadModelConfig(); | ||
| 44 | + vadModelConfig.sileroVad = sileroVadModelConfig; | ||
| 45 | + vadModelConfig.sampleRate = 16000; | ||
| 46 | + | ||
| 47 | + const bufferSizeInSeconds = 60; | ||
| 48 | + const vad = new sherpa_onnx.VoiceActivityDetector( | ||
| 49 | + vadModelConfig, bufferSizeInSeconds); | ||
| 50 | + return vad; | ||
| 51 | +} | ||
| 52 | + | ||
| 53 | +const recognizer = createOfflineRecognizer(); | ||
| 54 | +const vad = createVad(); | ||
| 55 | + | ||
| 56 | +const bufferSizeInSeconds = 30; | ||
| 57 | +const buffer = | ||
| 58 | + new sherpa_onnx.CircularBuffer(bufferSizeInSeconds * vad.config.sampleRate); | ||
| 59 | + | ||
| 60 | +const ai = new portAudio.AudioIO({ | ||
| 61 | + inOptions: { | ||
| 62 | + channelCount: 1, | ||
| 63 | + closeOnError: true, // Close the stream if an audio error is detected, if | ||
| 64 | + // set false then just log the error | ||
| 65 | + deviceId: -1, // Use -1 or omit the deviceId to select the default device | ||
| 66 | + sampleFormat: portAudio.SampleFormatFloat32, | ||
| 67 | + sampleRate: vad.config.sampleRate | ||
| 68 | + } | ||
| 69 | +}); | ||
| 70 | + | ||
| 71 | +let printed = false; | ||
| 72 | +let index = 0; | ||
| 73 | +ai.on('data', data => { | ||
| 74 | + const windowSize = vad.config.sileroVad.windowSize; | ||
| 75 | + buffer.push(new Float32Array(data.buffer)); | ||
| 76 | + while (buffer.size() > windowSize) { | ||
| 77 | + const samples = buffer.get(buffer.head(), windowSize); | ||
| 78 | + buffer.pop(windowSize); | ||
| 79 | + vad.acceptWaveform(samples) | ||
| 80 | + } | ||
| 81 | + | ||
| 82 | + while (!vad.isEmpty()) { | ||
| 83 | + const segment = vad.front(); | ||
| 84 | + vad.pop(); | ||
| 85 | + const stream = recognizer.createStream(); | ||
| 86 | + stream.acceptWaveform( | ||
| 87 | + recognizer.config.featConfig.sampleRate, segment.samples); | ||
| 88 | + recognizer.decode(stream); | ||
| 89 | + const r = recognizer.getResult(stream); | ||
| 90 | + stream.free(); | ||
| 91 | + if (r.text.length > 0) { | ||
| 92 | + console.log(`${index}: ${r.text}`); | ||
| 93 | + index += 1; | ||
| 94 | + } | ||
| 95 | + } | ||
| 96 | +}); | ||
| 97 | + | ||
| 98 | +ai.on('close', () => { | ||
| 99 | + console.log('Free resources'); | ||
| 100 | + recognizer.free(); | ||
| 101 | + vad.free(); | ||
| 102 | + buffer.free(); | ||
| 103 | +}); | ||
| 104 | + | ||
| 105 | +ai.start(); | ||
| 106 | +console.log('Started! Please speak') |
| 1 | +// Copyright (c) 2023 Xiaomi Corporation (authors: Fangjun Kuang) | ||
| 2 | +// | ||
| 3 | +const sherpa_onnx = require('sherpa-onnx'); | ||
| 4 | +const portAudio = require('naudiodon2'); | ||
| 5 | +console.log(portAudio.getDevices()); | ||
| 6 | + | ||
| 7 | +function createOfflineRecognizer() { | ||
| 8 | + const featConfig = new sherpa_onnx.FeatureConfig(); | ||
| 9 | + featConfig.sampleRate = 16000; | ||
| 10 | + featConfig.featureDim = 80; | ||
| 11 | + | ||
| 12 | + // test online recognizer | ||
| 13 | + const whisper = new sherpa_onnx.OfflineWhisperModelConfig(); | ||
| 14 | + whisper.encoder = './sherpa-onnx-whisper-tiny.en/tiny.en-encoder.int8.onnx'; | ||
| 15 | + whisper.decoder = './sherpa-onnx-whisper-tiny.en/tiny.en-decoder.int8.onnx'; | ||
| 16 | + const tokens = './sherpa-onnx-whisper-tiny.en/tiny.en-tokens.txt'; | ||
| 17 | + | ||
| 18 | + const modelConfig = new sherpa_onnx.OfflineModelConfig(); | ||
| 19 | + modelConfig.whisper = whisper; | ||
| 20 | + modelConfig.tokens = tokens; | ||
| 21 | + modelConfig.modelType = 'whisper'; | ||
| 22 | + | ||
| 23 | + const recognizerConfig = new sherpa_onnx.OfflineRecognizerConfig(); | ||
| 24 | + recognizerConfig.featConfig = featConfig; | ||
| 25 | + recognizerConfig.modelConfig = modelConfig; | ||
| 26 | + recognizerConfig.decodingMethod = 'greedy_search'; | ||
| 27 | + | ||
| 28 | + const recognizer = new sherpa_onnx.OfflineRecognizer(recognizerConfig); | ||
| 29 | + return recognizer; | ||
| 30 | +} | ||
| 31 | + | ||
| 32 | +function createVad() { | ||
| 33 | + const sileroVadModelConfig = new sherpa_onnx.SileroVadModelConfig(); | ||
| 34 | + sileroVadModelConfig.model = './silero_vad.onnx'; | ||
| 35 | + sileroVadModelConfig.minSpeechDuration = 0.3; // seconds | ||
| 36 | + sileroVadModelConfig.minSilenceDuration = 0.3; // seconds | ||
| 37 | + sileroVadModelConfig.windowSize = 512; | ||
| 38 | + | ||
| 39 | + const vadModelConfig = new sherpa_onnx.VadModelConfig(); | ||
| 40 | + vadModelConfig.sileroVad = sileroVadModelConfig; | ||
| 41 | + vadModelConfig.sampleRate = 16000; | ||
| 42 | + | ||
| 43 | + const bufferSizeInSeconds = 60; | ||
| 44 | + const vad = new sherpa_onnx.VoiceActivityDetector( | ||
| 45 | + vadModelConfig, bufferSizeInSeconds); | ||
| 46 | + return vad; | ||
| 47 | +} | ||
| 48 | + | ||
| 49 | +const recognizer = createOfflineRecognizer(); | ||
| 50 | +const vad = createVad(); | ||
| 51 | + | ||
| 52 | +const bufferSizeInSeconds = 30; | ||
| 53 | +const buffer = | ||
| 54 | + new sherpa_onnx.CircularBuffer(bufferSizeInSeconds * vad.config.sampleRate); | ||
| 55 | + | ||
| 56 | +const ai = new portAudio.AudioIO({ | ||
| 57 | + inOptions: { | ||
| 58 | + channelCount: 1, | ||
| 59 | + closeOnError: true, // Close the stream if an audio error is detected, if | ||
| 60 | + // set false then just log the error | ||
| 61 | + deviceId: -1, // Use -1 or omit the deviceId to select the default device | ||
| 62 | + sampleFormat: portAudio.SampleFormatFloat32, | ||
| 63 | + sampleRate: vad.config.sampleRate | ||
| 64 | + } | ||
| 65 | +}); | ||
| 66 | + | ||
| 67 | +let printed = false; | ||
| 68 | +let index = 0; | ||
| 69 | +ai.on('data', data => { | ||
| 70 | + const windowSize = vad.config.sileroVad.windowSize; | ||
| 71 | + buffer.push(new Float32Array(data.buffer)); | ||
| 72 | + while (buffer.size() > windowSize) { | ||
| 73 | + const samples = buffer.get(buffer.head(), windowSize); | ||
| 74 | + buffer.pop(windowSize); | ||
| 75 | + vad.acceptWaveform(samples) | ||
| 76 | + } | ||
| 77 | + | ||
| 78 | + while (!vad.isEmpty()) { | ||
| 79 | + const segment = vad.front(); | ||
| 80 | + vad.pop(); | ||
| 81 | + const stream = recognizer.createStream(); | ||
| 82 | + stream.acceptWaveform( | ||
| 83 | + recognizer.config.featConfig.sampleRate, segment.samples); | ||
| 84 | + recognizer.decode(stream); | ||
| 85 | + const r = recognizer.getResult(stream); | ||
| 86 | + stream.free(); | ||
| 87 | + if (r.text.length > 0) { | ||
| 88 | + console.log(`${index}: ${r.text}`); | ||
| 89 | + index += 1; | ||
| 90 | + } | ||
| 91 | + } | ||
| 92 | +}); | ||
| 93 | + | ||
| 94 | +ai.on('close', () => { | ||
| 95 | + console.log('Free resources'); | ||
| 96 | + recognizer.free(); | ||
| 97 | + vad.free(); | ||
| 98 | + buffer.free(); | ||
| 99 | +}); | ||
| 100 | + | ||
| 101 | +ai.start(); | ||
| 102 | +console.log('Started! Please speak') |
nodejs-examples/test-vad-microphone.js
0 → 100644
| 1 | +// Copyright (c) 2023 Xiaomi Corporation (authors: Fangjun Kuang) | ||
| 2 | + | ||
| 3 | +const sherpa_onnx = require('sherpa-onnx'); | ||
| 4 | +const portAudio = require('naudiodon2'); | ||
| 5 | +console.log(portAudio.getDevices()); | ||
| 6 | + | ||
| 7 | +function createVad() { | ||
| 8 | + const sileroVadModelConfig = new sherpa_onnx.SileroVadModelConfig(); | ||
| 9 | + sileroVadModelConfig.model = './silero_vad.onnx'; | ||
| 10 | + sileroVadModelConfig.minSpeechDuration = 0.3; // seconds | ||
| 11 | + sileroVadModelConfig.minSilenceDuration = 0.3; // seconds | ||
| 12 | + sileroVadModelConfig.windowSize = 512; | ||
| 13 | + | ||
| 14 | + const vadModelConfig = new sherpa_onnx.VadModelConfig(); | ||
| 15 | + vadModelConfig.sileroVad = sileroVadModelConfig; | ||
| 16 | + vadModelConfig.sampleRate = 16000; | ||
| 17 | + | ||
| 18 | + const bufferSizeInSeconds = 60; | ||
| 19 | + const vad = new sherpa_onnx.VoiceActivityDetector( | ||
| 20 | + vadModelConfig, bufferSizeInSeconds); | ||
| 21 | + return vad; | ||
| 22 | +} | ||
| 23 | +vad = createVad(); | ||
| 24 | +const bufferSizeInSeconds = 30; | ||
| 25 | +const buffer = | ||
| 26 | + new sherpa_onnx.CircularBuffer(bufferSizeInSeconds * vad.config.sampleRate); | ||
| 27 | + | ||
| 28 | +const ai = new portAudio.AudioIO({ | ||
| 29 | + inOptions: { | ||
| 30 | + channelCount: 1, | ||
| 31 | + closeOnError: true, // Close the stream if an audio error is detected, if | ||
| 32 | + // set false then just log the error | ||
| 33 | + deviceId: -1, // Use -1 or omit the deviceId to select the default device | ||
| 34 | + sampleFormat: portAudio.SampleFormatFloat32, | ||
| 35 | + sampleRate: vad.config.sampleRate | ||
| 36 | + } | ||
| 37 | +}); | ||
| 38 | + | ||
| 39 | +let printed = false; | ||
| 40 | +let index = 0; | ||
| 41 | +ai.on('data', data => { | ||
| 42 | + const windowSize = vad.config.sileroVad.windowSize; | ||
| 43 | + buffer.push(new Float32Array(data.buffer)); | ||
| 44 | + while (buffer.size() > windowSize) { | ||
| 45 | + const samples = buffer.get(buffer.head(), windowSize); | ||
| 46 | + buffer.pop(windowSize); | ||
| 47 | + vad.acceptWaveform(samples) | ||
| 48 | + if (vad.isDetected() && !printed) { | ||
| 49 | + console.log(`${index}: Detected speech`) | ||
| 50 | + printed = true; | ||
| 51 | + } | ||
| 52 | + | ||
| 53 | + if (!vad.isDetected()) { | ||
| 54 | + printed = false; | ||
| 55 | + } | ||
| 56 | + | ||
| 57 | + while (!vad.isEmpty()) { | ||
| 58 | + const segment = vad.front(); | ||
| 59 | + vad.pop(); | ||
| 60 | + const duration = segment.samples.length / vad.config.sampleRate; | ||
| 61 | + console.log(`${index} End of speech. Duration: ${duration} seconds`); | ||
| 62 | + index += 1; | ||
| 63 | + } | ||
| 64 | + } | ||
| 65 | +}); | ||
| 66 | + | ||
| 67 | +ai.on('close', () => { | ||
| 68 | + console.log('Free resources'); | ||
| 69 | + vad.free(); | ||
| 70 | + buffer.free(); | ||
| 71 | +}); | ||
| 72 | + | ||
| 73 | +ai.start(); | ||
| 74 | +console.log('Started! Please speak') |
scripts/nodejs/.clang-format
0 → 100644
scripts/nodejs/.gitignore
0 → 100644
scripts/nodejs/README.md
0 → 100644
scripts/nodejs/index.js
0 → 100644
| 1 | +// Copyright (c) 2023 Xiaomi Corporation (authors: Fangjun Kuang) | ||
| 2 | +// | ||
| 3 | +// Please use | ||
| 4 | +// | ||
| 5 | +// npm install ffi-napi ref-struct-napi | ||
| 6 | +// | ||
| 7 | +// before you use this file | ||
| 8 | +// | ||
| 9 | +// | ||
| 10 | +// Please use node 13. node 16, 18, 20, and 21 are known not working. | ||
| 11 | +// See also | ||
| 12 | +// https://github.com/node-ffi-napi/node-ffi-napi/issues/244 | ||
| 13 | +// and | ||
| 14 | +// https://github.com/node-ffi-napi/node-ffi-napi/issues/97 | ||
| 15 | +"use strict" | ||
| 16 | + | ||
| 17 | +const debug = require("debug")("sherpa-onnx"); | ||
| 18 | +const os = require("os"); | ||
| 19 | +const path = require("path"); | ||
| 20 | +const ffi = require("ffi-napi"); | ||
| 21 | +const ref = require("ref-napi"); | ||
| 22 | +const fs = require("fs"); | ||
| 23 | +var ArrayType = require("ref-array-napi"); | ||
| 24 | + | ||
| 25 | +const FloatArray = ArrayType(ref.types.float); | ||
| 26 | +const StructType = require("ref-struct-napi"); | ||
| 27 | +const cstring = ref.types.CString; | ||
| 28 | +const cstringPtr = ref.refType(cstring); | ||
| 29 | +const int32_t = ref.types.int32; | ||
| 30 | +const float = ref.types.float; | ||
| 31 | +const floatPtr = ref.refType(float); | ||
| 32 | + | ||
| 33 | +const SherpaOnnxOnlineTransducerModelConfig = StructType({ | ||
| 34 | + "encoder" : cstring, | ||
| 35 | + "decoder" : cstring, | ||
| 36 | + "joiner" : cstring, | ||
| 37 | +}); | ||
| 38 | + | ||
| 39 | +const SherpaOnnxOnlineParaformerModelConfig = StructType({ | ||
| 40 | + "encoder" : cstring, | ||
| 41 | + "decoder" : cstring, | ||
| 42 | +}); | ||
| 43 | + | ||
| 44 | +const SherpaOnnxOnlineModelConfig = StructType({ | ||
| 45 | + "transducer" : SherpaOnnxOnlineTransducerModelConfig, | ||
| 46 | + "paraformer" : SherpaOnnxOnlineParaformerModelConfig, | ||
| 47 | + "tokens" : cstring, | ||
| 48 | + "numThreads" : int32_t, | ||
| 49 | + "provider" : cstring, | ||
| 50 | + "debug" : int32_t, | ||
| 51 | + "modelType" : cstring, | ||
| 52 | +}); | ||
| 53 | + | ||
| 54 | +const SherpaOnnxFeatureConfig = StructType({ | ||
| 55 | + "sampleRate" : int32_t, | ||
| 56 | + "featureDim" : int32_t, | ||
| 57 | +}); | ||
| 58 | + | ||
| 59 | +const SherpaOnnxOnlineRecognizerConfig = StructType({ | ||
| 60 | + "featConfig" : SherpaOnnxFeatureConfig, | ||
| 61 | + "modelConfig" : SherpaOnnxOnlineModelConfig, | ||
| 62 | + "decodingMethod" : cstring, | ||
| 63 | + "maxActivePaths" : int32_t, | ||
| 64 | + "enableEndpoint" : int32_t, | ||
| 65 | + "rule1MinTrailingSilence" : float, | ||
| 66 | + "rule2MinTrailingSilence" : float, | ||
| 67 | + "rule3MinUtteranceLength" : float, | ||
| 68 | + "hotwordsFile" : cstring, | ||
| 69 | + "hotwordsScore" : float, | ||
| 70 | +}); | ||
| 71 | + | ||
| 72 | +const SherpaOnnxOnlineRecognizerResult = StructType({ | ||
| 73 | + "text" : cstring, | ||
| 74 | + "tokens" : cstring, | ||
| 75 | + "tokensArr" : cstringPtr, | ||
| 76 | + "timestamps" : floatPtr, | ||
| 77 | + "count" : int32_t, | ||
| 78 | + "json" : cstring, | ||
| 79 | +}); | ||
| 80 | + | ||
| 81 | +const SherpaOnnxOnlineRecognizerPtr = ref.refType(ref.types.void); | ||
| 82 | +const SherpaOnnxOnlineStreamPtr = ref.refType(ref.types.void); | ||
| 83 | +const SherpaOnnxOnlineStreamPtrPtr = ref.refType(SherpaOnnxOnlineStreamPtr); | ||
| 84 | +const SherpaOnnxOnlineRecognizerResultPtr = | ||
| 85 | + ref.refType(SherpaOnnxOnlineRecognizerResult); | ||
| 86 | + | ||
| 87 | +const SherpaOnnxOnlineRecognizerConfigPtr = | ||
| 88 | + ref.refType(SherpaOnnxOnlineRecognizerConfig); | ||
| 89 | + | ||
| 90 | +const SherpaOnnxOfflineTransducerModelConfig = StructType({ | ||
| 91 | + "encoder" : cstring, | ||
| 92 | + "decoder" : cstring, | ||
| 93 | + "joiner" : cstring, | ||
| 94 | +}); | ||
| 95 | + | ||
| 96 | +const SherpaOnnxOfflineParaformerModelConfig = StructType({ | ||
| 97 | + "model" : cstring, | ||
| 98 | +}); | ||
| 99 | + | ||
| 100 | +const SherpaOnnxOfflineNemoEncDecCtcModelConfig = StructType({ | ||
| 101 | + "model" : cstring, | ||
| 102 | +}); | ||
| 103 | + | ||
| 104 | +const SherpaOnnxOfflineWhisperModelConfig = StructType({ | ||
| 105 | + "encoder" : cstring, | ||
| 106 | + "decoder" : cstring, | ||
| 107 | +}); | ||
| 108 | + | ||
| 109 | +const SherpaOnnxOfflineTdnnModelConfig = StructType({ | ||
| 110 | + "model" : cstring, | ||
| 111 | +}); | ||
| 112 | + | ||
| 113 | +const SherpaOnnxOfflineLMConfig = StructType({ | ||
| 114 | + "model" : cstring, | ||
| 115 | + "scale" : float, | ||
| 116 | +}); | ||
| 117 | + | ||
| 118 | +const SherpaOnnxOfflineModelConfig = StructType({ | ||
| 119 | + "transducer" : SherpaOnnxOfflineTransducerModelConfig, | ||
| 120 | + "paraformer" : SherpaOnnxOfflineParaformerModelConfig, | ||
| 121 | + "nemoCtc" : SherpaOnnxOfflineNemoEncDecCtcModelConfig, | ||
| 122 | + "whisper" : SherpaOnnxOfflineWhisperModelConfig, | ||
| 123 | + "tdnn" : SherpaOnnxOfflineTdnnModelConfig, | ||
| 124 | + "tokens" : cstring, | ||
| 125 | + "numThreads" : int32_t, | ||
| 126 | + "debug" : int32_t, | ||
| 127 | + "provider" : cstring, | ||
| 128 | + "modelType" : cstring, | ||
| 129 | +}); | ||
| 130 | + | ||
| 131 | +const SherpaOnnxOfflineRecognizerConfig = StructType({ | ||
| 132 | + "featConfig" : SherpaOnnxFeatureConfig, | ||
| 133 | + "modelConfig" : SherpaOnnxOfflineModelConfig, | ||
| 134 | + "lmConfig" : SherpaOnnxOfflineLMConfig, | ||
| 135 | + "decodingMethod" : cstring, | ||
| 136 | + "maxActivePaths" : int32_t, | ||
| 137 | + "hotwordsFile" : cstring, | ||
| 138 | + "hotwordsScore" : float, | ||
| 139 | +}); | ||
| 140 | + | ||
| 141 | +const SherpaOnnxOfflineRecognizerResult = StructType({ | ||
| 142 | + "text" : cstring, | ||
| 143 | + "timestamps" : floatPtr, | ||
| 144 | + "count" : int32_t, | ||
| 145 | +}); | ||
| 146 | + | ||
| 147 | +const SherpaOnnxOfflineRecognizerPtr = ref.refType(ref.types.void); | ||
| 148 | +const SherpaOnnxOfflineStreamPtr = ref.refType(ref.types.void); | ||
| 149 | +const SherpaOnnxOfflineStreamPtrPtr = ref.refType(SherpaOnnxOfflineStreamPtr); | ||
| 150 | +const SherpaOnnxOfflineRecognizerResultPtr = | ||
| 151 | + ref.refType(SherpaOnnxOfflineRecognizerResult); | ||
| 152 | + | ||
| 153 | +const SherpaOnnxOfflineRecognizerConfigPtr = | ||
| 154 | + ref.refType(SherpaOnnxOfflineRecognizerConfig); | ||
| 155 | + | ||
| 156 | +// vad | ||
| 157 | +const SherpaOnnxSileroVadModelConfig = StructType({ | ||
| 158 | + "model" : cstring, | ||
| 159 | + "threshold" : float, | ||
| 160 | + "minSilenceDuration" : float, | ||
| 161 | + "minSpeechDuration" : float, | ||
| 162 | + "windowSize" : int32_t, | ||
| 163 | +}); | ||
| 164 | + | ||
| 165 | +const SherpaOnnxVadModelConfig = StructType({ | ||
| 166 | + "sileroVad" : SherpaOnnxSileroVadModelConfig, | ||
| 167 | + "sampleRate" : int32_t, | ||
| 168 | + "numThreads" : int32_t, | ||
| 169 | + "provider" : cstring, | ||
| 170 | + "debug" : int32_t, | ||
| 171 | +}); | ||
| 172 | + | ||
| 173 | +const SherpaOnnxSpeechSegment = StructType({ | ||
| 174 | + "start" : int32_t, | ||
| 175 | + "samples" : FloatArray, | ||
| 176 | + "n" : int32_t, | ||
| 177 | +}); | ||
| 178 | + | ||
| 179 | +const SherpaOnnxVadModelConfigPtr = ref.refType(SherpaOnnxVadModelConfig); | ||
| 180 | +const SherpaOnnxSpeechSegmentPtr = ref.refType(SherpaOnnxSpeechSegment); | ||
| 181 | +const SherpaOnnxCircularBufferPtr = ref.refType(ref.types.void); | ||
| 182 | +const SherpaOnnxVoiceActivityDetectorPtr = ref.refType(ref.types.void); | ||
| 183 | + | ||
| 184 | +// tts | ||
| 185 | +const SherpaOnnxOfflineTtsVitsModelConfig = StructType({ | ||
| 186 | + "model" : cstring, | ||
| 187 | + "lexicon" : cstring, | ||
| 188 | + "tokens" : cstring, | ||
| 189 | + "noiseScale" : float, | ||
| 190 | + "noiseScaleW" : float, | ||
| 191 | + "lengthScale" : float, | ||
| 192 | +}); | ||
| 193 | + | ||
| 194 | +const SherpaOnnxOfflineTtsModelConfig = StructType({ | ||
| 195 | + "vits" : SherpaOnnxOfflineTtsVitsModelConfig, | ||
| 196 | + "numThreads" : int32_t, | ||
| 197 | + "debug" : int32_t, | ||
| 198 | + "provider" : cstring, | ||
| 199 | +}); | ||
| 200 | + | ||
| 201 | +const SherpaOnnxOfflineTtsConfig = StructType({ | ||
| 202 | + "model" : SherpaOnnxOfflineTtsModelConfig, | ||
| 203 | + "ruleFsts" : cstring, | ||
| 204 | +}); | ||
| 205 | + | ||
| 206 | +const SherpaOnnxGeneratedAudio = StructType({ | ||
| 207 | + "samples" : FloatArray, | ||
| 208 | + "n" : int32_t, | ||
| 209 | + "sampleRate" : int32_t, | ||
| 210 | +}); | ||
| 211 | + | ||
| 212 | +const SherpaOnnxOfflineTtsVitsModelConfigPtr = | ||
| 213 | + ref.refType(SherpaOnnxOfflineTtsVitsModelConfig); | ||
| 214 | +const SherpaOnnxOfflineTtsConfigPtr = ref.refType(SherpaOnnxOfflineTtsConfig); | ||
| 215 | +const SherpaOnnxGeneratedAudioPtr = ref.refType(SherpaOnnxGeneratedAudio); | ||
| 216 | +const SherpaOnnxOfflineTtsPtr = ref.refType(ref.types.void); | ||
| 217 | + | ||
| 218 | +const SherpaOnnxDisplayPtr = ref.refType(ref.types.void); | ||
| 219 | + | ||
| 220 | +let soname; | ||
| 221 | +if (os.platform() == "win32") { | ||
| 222 | + // see https://nodejs.org/api/process.html#processarch | ||
| 223 | + if (process.arch == "x64") { | ||
| 224 | + let currentPath = process.env.Path; | ||
| 225 | + let dllDirectory = path.resolve(path.join(__dirname, "lib", "win-x64")); | ||
| 226 | + process.env.Path = currentPath + path.delimiter + dllDirectory; | ||
| 227 | + | ||
| 228 | + soname = path.join(__dirname, "lib", "win-x64", "sherpa-onnx-c-api.dll") | ||
| 229 | + } else if (process.arch == "ia32") { | ||
| 230 | + let currentPath = process.env.Path; | ||
| 231 | + let dllDirectory = path.resolve(path.join(__dirname, "lib", "win-x86")); | ||
| 232 | + process.env.Path = currentPath + path.delimiter + dllDirectory; | ||
| 233 | + | ||
| 234 | + soname = path.join(__dirname, "lib", "win-x86", "sherpa-onnx-c-api.dll") | ||
| 235 | + } else { | ||
| 236 | + throw new Error( | ||
| 237 | + `Support only Windows x86 and x64 for now. Given ${process.arch}`); | ||
| 238 | + } | ||
| 239 | +} else if (os.platform() == "darwin") { | ||
| 240 | + if (process.arch == "x64") { | ||
| 241 | + soname = | ||
| 242 | + path.join(__dirname, "lib", "osx-x64", "libsherpa-onnx-c-api.dylib"); | ||
| 243 | + } else if (process.arch == "arm64") { | ||
| 244 | + soname = | ||
| 245 | + path.join(__dirname, "lib", "osx-arm64", "libsherpa-onnx-c-api.dylib"); | ||
| 246 | + } else { | ||
| 247 | + throw new Error( | ||
| 248 | + `Support only macOS x64 and arm64 for now. Given ${process.arch}`); | ||
| 249 | + } | ||
| 250 | +} else if (os.platform() == "linux") { | ||
| 251 | + if (process.arch == "x64") { | ||
| 252 | + soname = | ||
| 253 | + path.join(__dirname, "lib", "linux-x64", "libsherpa-onnx-c-api.so"); | ||
| 254 | + } else { | ||
| 255 | + throw new Error(`Support only Linux x64 for now. Given ${process.arch}`); | ||
| 256 | + } | ||
| 257 | +} else { | ||
| 258 | + throw new Error(`Unsupported platform ${os.platform()}`); | ||
| 259 | +} | ||
| 260 | + | ||
| 261 | +if (!fs.existsSync(soname)) { | ||
| 262 | + throw new Error(`Cannot find file ${soname}. Please make sure you have run | ||
| 263 | + ./build.sh`); | ||
| 264 | +} | ||
| 265 | + | ||
| 266 | +debug("soname ", soname) | ||
| 267 | + | ||
| 268 | +const libsherpa_onnx = ffi.Library(soname, { | ||
| 269 | + // online asr | ||
| 270 | + "CreateOnlineRecognizer" : [ | ||
| 271 | + SherpaOnnxOnlineRecognizerPtr, [ SherpaOnnxOnlineRecognizerConfigPtr ] | ||
| 272 | + ], | ||
| 273 | + "DestroyOnlineRecognizer" : [ "void", [ SherpaOnnxOnlineRecognizerPtr ] ], | ||
| 274 | + "CreateOnlineStream" : | ||
| 275 | + [ SherpaOnnxOnlineStreamPtr, [ SherpaOnnxOnlineRecognizerPtr ] ], | ||
| 276 | + "CreateOnlineStreamWithHotwords" : | ||
| 277 | + [ SherpaOnnxOnlineStreamPtr, [ SherpaOnnxOnlineRecognizerPtr, cstring ] ], | ||
| 278 | + "DestroyOnlineStream" : [ "void", [ SherpaOnnxOnlineStreamPtr ] ], | ||
| 279 | + "AcceptWaveform" : | ||
| 280 | + [ "void", [ SherpaOnnxOnlineStreamPtr, int32_t, floatPtr, int32_t ] ], | ||
| 281 | + "IsOnlineStreamReady" : | ||
| 282 | + [ int32_t, [ SherpaOnnxOnlineRecognizerPtr, SherpaOnnxOnlineStreamPtr ] ], | ||
| 283 | + "DecodeOnlineStream" : | ||
| 284 | + [ "void", [ SherpaOnnxOnlineRecognizerPtr, SherpaOnnxOnlineStreamPtr ] ], | ||
| 285 | + "DecodeMultipleOnlineStreams" : [ | ||
| 286 | + "void", | ||
| 287 | + [ SherpaOnnxOnlineRecognizerPtr, SherpaOnnxOnlineStreamPtrPtr, int32_t ] | ||
| 288 | + ], | ||
| 289 | + "GetOnlineStreamResult" : [ | ||
| 290 | + SherpaOnnxOnlineRecognizerResultPtr, | ||
| 291 | + [ SherpaOnnxOnlineRecognizerPtr, SherpaOnnxOnlineStreamPtr ] | ||
| 292 | + ], | ||
| 293 | + "DestroyOnlineRecognizerResult" : | ||
| 294 | + [ "void", [ SherpaOnnxOnlineRecognizerResultPtr ] ], | ||
| 295 | + "Reset" : | ||
| 296 | + [ "void", [ SherpaOnnxOnlineRecognizerPtr, SherpaOnnxOnlineStreamPtr ] ], | ||
| 297 | + "InputFinished" : [ "void", [ SherpaOnnxOnlineStreamPtr ] ], | ||
| 298 | + "IsEndpoint" : | ||
| 299 | + [ int32_t, [ SherpaOnnxOnlineRecognizerPtr, SherpaOnnxOnlineStreamPtr ] ], | ||
| 300 | + | ||
| 301 | + // offline asr | ||
| 302 | + "CreateOfflineRecognizer" : [ | ||
| 303 | + SherpaOnnxOfflineRecognizerPtr, [ SherpaOnnxOfflineRecognizerConfigPtr ] | ||
| 304 | + ], | ||
| 305 | + "DestroyOfflineRecognizer" : [ "void", [ SherpaOnnxOfflineRecognizerPtr ] ], | ||
| 306 | + "CreateOfflineStream" : | ||
| 307 | + [ SherpaOnnxOfflineStreamPtr, [ SherpaOnnxOfflineRecognizerPtr ] ], | ||
| 308 | + "DestroyOfflineStream" : [ "void", [ SherpaOnnxOfflineStreamPtr ] ], | ||
| 309 | + "AcceptWaveformOffline" : | ||
| 310 | + [ "void", [ SherpaOnnxOfflineStreamPtr, int32_t, floatPtr, int32_t ] ], | ||
| 311 | + "DecodeOfflineStream" : [ | ||
| 312 | + "void", [ SherpaOnnxOfflineRecognizerPtr, SherpaOnnxOfflineStreamPtr ] | ||
| 313 | + ], | ||
| 314 | + "DecodeMultipleOfflineStreams" : [ | ||
| 315 | + "void", | ||
| 316 | + [ SherpaOnnxOfflineRecognizerPtr, SherpaOnnxOfflineStreamPtrPtr, int32_t ] | ||
| 317 | + ], | ||
| 318 | + "GetOfflineStreamResult" : | ||
| 319 | + [ SherpaOnnxOfflineRecognizerResultPtr, [ SherpaOnnxOfflineStreamPtr ] ], | ||
| 320 | + "DestroyOfflineRecognizerResult" : | ||
| 321 | + [ "void", [ SherpaOnnxOfflineRecognizerResultPtr ] ], | ||
| 322 | + | ||
| 323 | + // vad | ||
| 324 | + "SherpaOnnxCreateCircularBuffer" : | ||
| 325 | + [ SherpaOnnxCircularBufferPtr, [ int32_t ] ], | ||
| 326 | + "SherpaOnnxDestroyCircularBuffer" : | ||
| 327 | + [ "void", [ SherpaOnnxCircularBufferPtr ] ], | ||
| 328 | + "SherpaOnnxCircularBufferPush" : | ||
| 329 | + [ "void", [ SherpaOnnxCircularBufferPtr, floatPtr, int32_t ] ], | ||
| 330 | + "SherpaOnnxCircularBufferGet" : | ||
| 331 | + [ FloatArray, [ SherpaOnnxCircularBufferPtr, int32_t, int32_t ] ], | ||
| 332 | + "SherpaOnnxCircularBufferFree" : [ "void", [ FloatArray ] ], | ||
| 333 | + "SherpaOnnxCircularBufferPop" : | ||
| 334 | + [ "void", [ SherpaOnnxCircularBufferPtr, int32_t ] ], | ||
| 335 | + "SherpaOnnxCircularBufferSize" : [ int32_t, [ SherpaOnnxCircularBufferPtr ] ], | ||
| 336 | + "SherpaOnnxCircularBufferHead" : [ int32_t, [ SherpaOnnxCircularBufferPtr ] ], | ||
| 337 | + "SherpaOnnxCircularBufferReset" : [ "void", [ SherpaOnnxCircularBufferPtr ] ], | ||
| 338 | + "SherpaOnnxCreateVoiceActivityDetector" : [ | ||
| 339 | + SherpaOnnxVoiceActivityDetectorPtr, [ SherpaOnnxVadModelConfigPtr, float ] | ||
| 340 | + ], | ||
| 341 | + "SherpaOnnxDestroyVoiceActivityDetector" : | ||
| 342 | + [ "void", [ SherpaOnnxVoiceActivityDetectorPtr ] ], | ||
| 343 | + "SherpaOnnxVoiceActivityDetectorAcceptWaveform" : | ||
| 344 | + [ "void", [ SherpaOnnxVoiceActivityDetectorPtr, floatPtr, int32_t ] ], | ||
| 345 | + "SherpaOnnxVoiceActivityDetectorEmpty" : | ||
| 346 | + [ int32_t, [ SherpaOnnxVoiceActivityDetectorPtr ] ], | ||
| 347 | + "SherpaOnnxVoiceActivityDetectorDetected" : | ||
| 348 | + [ int32_t, [ SherpaOnnxVoiceActivityDetectorPtr ] ], | ||
| 349 | + "SherpaOnnxVoiceActivityDetectorPop" : | ||
| 350 | + [ "void", [ SherpaOnnxVoiceActivityDetectorPtr ] ], | ||
| 351 | + "SherpaOnnxVoiceActivityDetectorClear" : | ||
| 352 | + [ "void", [ SherpaOnnxVoiceActivityDetectorPtr ] ], | ||
| 353 | + "SherpaOnnxVoiceActivityDetectorFront" : | ||
| 354 | + [ SherpaOnnxSpeechSegmentPtr, [ SherpaOnnxVoiceActivityDetectorPtr ] ], | ||
| 355 | + "SherpaOnnxDestroySpeechSegment" : [ "void", [ SherpaOnnxSpeechSegmentPtr ] ], | ||
| 356 | + "SherpaOnnxVoiceActivityDetectorReset" : | ||
| 357 | + [ "void", [ SherpaOnnxVoiceActivityDetectorPtr ] ], | ||
| 358 | + // tts | ||
| 359 | + "SherpaOnnxCreateOfflineTts" : | ||
| 360 | + [ SherpaOnnxOfflineTtsPtr, [ SherpaOnnxOfflineTtsConfigPtr ] ], | ||
| 361 | + "SherpaOnnxDestroyOfflineTts" : [ "void", [ SherpaOnnxOfflineTtsPtr ] ], | ||
| 362 | + "SherpaOnnxOfflineTtsGenerate" : [ | ||
| 363 | + SherpaOnnxGeneratedAudioPtr, | ||
| 364 | + [ SherpaOnnxOfflineTtsPtr, cstring, int32_t, float ] | ||
| 365 | + ], | ||
| 366 | + "SherpaOnnxDestroyOfflineTtsGeneratedAudio" : | ||
| 367 | + [ "void", [ SherpaOnnxGeneratedAudioPtr ] ], | ||
| 368 | + "SherpaOnnxWriteWave" : [ "void", [ floatPtr, int32_t, int32_t, cstring ] ], | ||
| 369 | + | ||
| 370 | + // display | ||
| 371 | + "CreateDisplay" : [ SherpaOnnxDisplayPtr, [ int32_t ] ], | ||
| 372 | + "DestroyDisplay" : [ "void", [ SherpaOnnxDisplayPtr ] ], | ||
| 373 | + "SherpaOnnxPrint" : [ "void", [ SherpaOnnxDisplayPtr, int32_t, cstring ] ], | ||
| 374 | +}); | ||
| 375 | + | ||
| 376 | +class Display { | ||
| 377 | + constructor(maxWordPerLine) { | ||
| 378 | + this.handle = libsherpa_onnx.CreateDisplay(maxWordPerLine); | ||
| 379 | + } | ||
| 380 | + free() { | ||
| 381 | + if (this.handle) { | ||
| 382 | + libsherpa_onnx.DestroyDisplay(this.handle); | ||
| 383 | + this.handle = null; | ||
| 384 | + } | ||
| 385 | + } | ||
| 386 | + | ||
| 387 | + print(idx, s) { libsherpa_onnx.SherpaOnnxPrint(this.handle, idx, s); } | ||
| 388 | +}; | ||
| 389 | + | ||
| 390 | +class OnlineResult { | ||
| 391 | + constructor(text) { this.text = Buffer.from(text, "utf-8").toString(); } | ||
| 392 | +}; | ||
| 393 | + | ||
| 394 | +class OnlineStream { | ||
| 395 | + constructor(handle) { this.handle = handle } | ||
| 396 | + | ||
| 397 | + free() { | ||
| 398 | + if (this.handle) { | ||
| 399 | + libsherpa_onnx.DestroyOnlineStream(this.handle); | ||
| 400 | + this.handle = null; | ||
| 401 | + } | ||
| 402 | + } | ||
| 403 | + | ||
| 404 | + /** | ||
| 405 | + * @param sampleRate {Number} | ||
| 406 | + * @param samples {Float32Array} Containing samples in the range [-1, 1] | ||
| 407 | + */ | ||
| 408 | + acceptWaveform(sampleRate, samples) { | ||
| 409 | + libsherpa_onnx.AcceptWaveform(this.handle, sampleRate, samples, | ||
| 410 | + samples.length); | ||
| 411 | + } | ||
| 412 | +}; | ||
| 413 | + | ||
| 414 | +class OnlineRecognizer { | ||
| 415 | + constructor(config) { | ||
| 416 | + this.config = config; | ||
| 417 | + this.recognizer_handle = | ||
| 418 | + libsherpa_onnx.CreateOnlineRecognizer(config.ref()); | ||
| 419 | + } | ||
| 420 | + | ||
| 421 | + free() { | ||
| 422 | + if (this.recognizer_handle) { | ||
| 423 | + libsherpa_onnx.DestroyOnlineRecognizer(this.recognizer_handle); | ||
| 424 | + this.recognizer_handle = null; | ||
| 425 | + } | ||
| 426 | + } | ||
| 427 | + | ||
| 428 | + createStream() { | ||
| 429 | + let handle = libsherpa_onnx.CreateOnlineStream(this.recognizer_handle); | ||
| 430 | + return new OnlineStream(handle); | ||
| 431 | + } | ||
| 432 | + | ||
| 433 | + isReady(stream) { | ||
| 434 | + return libsherpa_onnx.IsOnlineStreamReady(this.recognizer_handle, | ||
| 435 | + stream.handle) | ||
| 436 | + } | ||
| 437 | + | ||
| 438 | + isEndpoint(stream) { | ||
| 439 | + return libsherpa_onnx.IsEndpoint(this.recognizer_handle, stream.handle); | ||
| 440 | + } | ||
| 441 | + | ||
| 442 | + reset(stream) { libsherpa_onnx.Reset(this.recognizer_handle, stream.handle); } | ||
| 443 | + | ||
| 444 | + decode(stream) { | ||
| 445 | + libsherpa_onnx.DecodeOnlineStream(this.recognizer_handle, stream.handle) | ||
| 446 | + } | ||
| 447 | + | ||
| 448 | + getResult(stream) { | ||
| 449 | + let handle = libsherpa_onnx.GetOnlineStreamResult(this.recognizer_handle, | ||
| 450 | + stream.handle); | ||
| 451 | + let r = handle.deref(); | ||
| 452 | + let ans = new OnlineResult(r.text); | ||
| 453 | + libsherpa_onnx.DestroyOnlineRecognizerResult(handle); | ||
| 454 | + | ||
| 455 | + return ans | ||
| 456 | + } | ||
| 457 | +}; | ||
| 458 | + | ||
| 459 | +class OfflineResult { | ||
| 460 | + constructor(text) { this.text = Buffer.from(text, "utf-8").toString(); } | ||
| 461 | +}; | ||
| 462 | + | ||
| 463 | +class OfflineStream { | ||
| 464 | + constructor(handle) { this.handle = handle } | ||
| 465 | + | ||
| 466 | + free() { | ||
| 467 | + if (this.handle) { | ||
| 468 | + libsherpa_onnx.DestroyOfflineStream(this.handle); | ||
| 469 | + this.handle = null; | ||
| 470 | + } | ||
| 471 | + } | ||
| 472 | + | ||
| 473 | + /** | ||
| 474 | + * @param sampleRate {Number} | ||
| 475 | + * @param samples {Float32Array} Containing samples in the range [-1, 1] | ||
| 476 | + */ | ||
| 477 | + acceptWaveform(sampleRate, samples) { | ||
| 478 | + libsherpa_onnx.AcceptWaveformOffline(this.handle, sampleRate, samples, | ||
| 479 | + samples.length); | ||
| 480 | + } | ||
| 481 | +}; | ||
| 482 | + | ||
| 483 | +class OfflineRecognizer { | ||
| 484 | + constructor(config) { | ||
| 485 | + this.config = config; | ||
| 486 | + this.recognizer_handle = | ||
| 487 | + libsherpa_onnx.CreateOfflineRecognizer(config.ref()); | ||
| 488 | + } | ||
| 489 | + | ||
| 490 | + free() { | ||
| 491 | + if (this.recognizer_handle) { | ||
| 492 | + libsherpa_onnx.DestroyOfflineRecognizer(this.recognizer_handle); | ||
| 493 | + this.recognizer_handle = null; | ||
| 494 | + } | ||
| 495 | + } | ||
| 496 | + | ||
| 497 | + createStream() { | ||
| 498 | + let handle = libsherpa_onnx.CreateOfflineStream(this.recognizer_handle); | ||
| 499 | + return new OfflineStream(handle); | ||
| 500 | + } | ||
| 501 | + | ||
| 502 | + decode(stream) { | ||
| 503 | + libsherpa_onnx.DecodeOfflineStream(this.recognizer_handle, stream.handle) | ||
| 504 | + } | ||
| 505 | + | ||
| 506 | + getResult(stream) { | ||
| 507 | + let handle = libsherpa_onnx.GetOfflineStreamResult(stream.handle); | ||
| 508 | + let r = handle.deref(); | ||
| 509 | + let ans = new OfflineResult(r.text); | ||
| 510 | + libsherpa_onnx.DestroyOfflineRecognizerResult(handle); | ||
| 511 | + | ||
| 512 | + return ans | ||
| 513 | + } | ||
| 514 | +}; | ||
| 515 | + | ||
| 516 | +class SpeechSegment { | ||
| 517 | + constructor(start, samples) { | ||
| 518 | + this.start = start; | ||
| 519 | + this.samples = samples; | ||
| 520 | + } | ||
| 521 | +}; | ||
| 522 | + | ||
| 523 | +// this buffer holds only float entries. | ||
| 524 | +class CircularBuffer { | ||
| 525 | + /** | ||
| 526 | + * @param capacity {int} The capacity of the circular buffer. | ||
| 527 | + */ | ||
| 528 | + constructor(capacity) { | ||
| 529 | + this.handle = libsherpa_onnx.SherpaOnnxCreateCircularBuffer(capacity); | ||
| 530 | + } | ||
| 531 | + | ||
| 532 | + free() { | ||
| 533 | + if (this.handle) { | ||
| 534 | + libsherpa_onnx.SherpaOnnxDestroyCircularBuffer(this.handle); | ||
| 535 | + this.handle = null; | ||
| 536 | + } | ||
| 537 | + } | ||
| 538 | + | ||
| 539 | + /** | ||
| 540 | + * @param samples {Float32Array} | ||
| 541 | + */ | ||
| 542 | + push(samples) { | ||
| 543 | + libsherpa_onnx.SherpaOnnxCircularBufferPush(this.handle, samples, | ||
| 544 | + samples.length); | ||
| 545 | + } | ||
| 546 | + | ||
| 547 | + get(startIndex, n) { | ||
| 548 | + let data = | ||
| 549 | + libsherpa_onnx.SherpaOnnxCircularBufferGet(this.handle, startIndex, n); | ||
| 550 | + | ||
| 551 | + // https://tootallnate.github.io/ref/#exports-reinterpret | ||
| 552 | + const buffer = data.buffer.reinterpret(n * ref.sizeof.float).buffer; | ||
| 553 | + | ||
| 554 | + // create a copy since we are going to free the buffer at the end | ||
| 555 | + let s = new Float32Array(buffer).slice(0); | ||
| 556 | + libsherpa_onnx.SherpaOnnxCircularBufferFree(data); | ||
| 557 | + return s; | ||
| 558 | + } | ||
| 559 | + | ||
| 560 | + pop(n) { libsherpa_onnx.SherpaOnnxCircularBufferPop(this.handle, n); } | ||
| 561 | + | ||
| 562 | + size() { return libsherpa_onnx.SherpaOnnxCircularBufferSize(this.handle); } | ||
| 563 | + | ||
| 564 | + head() { return libsherpa_onnx.SherpaOnnxCircularBufferHead(this.handle); } | ||
| 565 | + | ||
| 566 | + reset() { libsherpa_onnx.SherpaOnnxCircularBufferReset(this.handle); } | ||
| 567 | +}; | ||
| 568 | + | ||
| 569 | +class VoiceActivityDetector { | ||
| 570 | + constructor(config, bufferSizeInSeconds) { | ||
| 571 | + this.config = config; | ||
| 572 | + this.handle = libsherpa_onnx.SherpaOnnxCreateVoiceActivityDetector( | ||
| 573 | + config.ref(), bufferSizeInSeconds); | ||
| 574 | + } | ||
| 575 | + | ||
| 576 | + free() { | ||
| 577 | + if (this.handle) { | ||
| 578 | + libsherpa_onnx.SherpaOnnxDestroyVoiceActivityDetector(this.handle); | ||
| 579 | + } | ||
| 580 | + } | ||
| 581 | + | ||
| 582 | + acceptWaveform(samples) { | ||
| 583 | + libsherpa_onnx.SherpaOnnxVoiceActivityDetectorAcceptWaveform( | ||
| 584 | + this.handle, samples, samples.length); | ||
| 585 | + } | ||
| 586 | + | ||
| 587 | + isEmpty() { | ||
| 588 | + return libsherpa_onnx.SherpaOnnxVoiceActivityDetectorEmpty(this.handle); | ||
| 589 | + } | ||
| 590 | + | ||
| 591 | + isDetected() { | ||
| 592 | + return libsherpa_onnx.SherpaOnnxVoiceActivityDetectorDetected(this.handle); | ||
| 593 | + } | ||
| 594 | + pop() { libsherpa_onnx.SherpaOnnxVoiceActivityDetectorPop(this.handle); } | ||
| 595 | + | ||
| 596 | + clear() { libsherpa_onnx.SherpaOnnxVoiceActivityDetectorClear(this.handle); } | ||
| 597 | + | ||
| 598 | + reset() { libsherpa_onnx.SherpaOnnxVoiceActivityDetectorReset(this.handle); } | ||
| 599 | + | ||
| 600 | + front() { | ||
| 601 | + let segment = | ||
| 602 | + libsherpa_onnx.SherpaOnnxVoiceActivityDetectorFront(this.handle); | ||
| 603 | + | ||
| 604 | + let buffer = | ||
| 605 | + segment.deref() | ||
| 606 | + .samples.buffer.reinterpret(segment.deref().n * ref.sizeof.float) | ||
| 607 | + .buffer; | ||
| 608 | + | ||
| 609 | + let samples = new Float32Array(buffer).slice(0); | ||
| 610 | + let ans = new SpeechSegment(segment.deref().start, samples); | ||
| 611 | + | ||
| 612 | + libsherpa_onnx.SherpaOnnxDestroySpeechSegment(segment); | ||
| 613 | + return ans; | ||
| 614 | + } | ||
| 615 | +}; | ||
| 616 | + | ||
| 617 | +class GeneratedAudio { | ||
| 618 | + constructor(sampleRate, samples) { | ||
| 619 | + this.sampleRate = sampleRate; | ||
| 620 | + this.samples = samples; | ||
| 621 | + } | ||
| 622 | + save(filename) { | ||
| 623 | + libsherpa_onnx.SherpaOnnxWriteWave(this.samples, this.samples.length, | ||
| 624 | + this.sampleRate, filename); | ||
| 625 | + } | ||
| 626 | +}; | ||
| 627 | + | ||
| 628 | +class OfflineTts { | ||
| 629 | + constructor(config) { | ||
| 630 | + this.config = config; | ||
| 631 | + this.handle = libsherpa_onnx.SherpaOnnxCreateOfflineTts(config.ref()); | ||
| 632 | + } | ||
| 633 | + | ||
| 634 | + free() { | ||
| 635 | + if (this.handle) { | ||
| 636 | + libsherpa_onnx.SherpaOnnxDestroyOfflineTts(this.handle); | ||
| 637 | + this.handle = null; | ||
| 638 | + } | ||
| 639 | + } | ||
| 640 | + generate(text, sid, speed) { | ||
| 641 | + let r = libsherpa_onnx.SherpaOnnxOfflineTtsGenerate(this.handle, text, sid, | ||
| 642 | + speed); | ||
| 643 | + const buffer = | ||
| 644 | + r.deref() | ||
| 645 | + .samples.buffer.reinterpret(r.deref().n * ref.sizeof.float) | ||
| 646 | + .buffer; | ||
| 647 | + let samples = new Float32Array(buffer).slice(0); | ||
| 648 | + let sampleRate = r.deref().sampleRate; | ||
| 649 | + | ||
| 650 | + let generatedAudio = new GeneratedAudio(sampleRate, samples); | ||
| 651 | + | ||
| 652 | + libsherpa_onnx.SherpaOnnxDestroyOfflineTtsGeneratedAudio(r); | ||
| 653 | + | ||
| 654 | + return generatedAudio; | ||
| 655 | + } | ||
| 656 | +}; | ||
| 657 | + | ||
| 658 | +// online asr | ||
| 659 | +const OnlineTransducerModelConfig = SherpaOnnxOnlineTransducerModelConfig; | ||
| 660 | +const OnlineModelConfig = SherpaOnnxOnlineModelConfig; | ||
| 661 | +const FeatureConfig = SherpaOnnxFeatureConfig; | ||
| 662 | +const OnlineRecognizerConfig = SherpaOnnxOnlineRecognizerConfig; | ||
| 663 | +const OnlineParaformerModelConfig = SherpaOnnxOnlineParaformerModelConfig; | ||
| 664 | + | ||
| 665 | +// offline asr | ||
| 666 | +const OfflineTransducerModelConfig = SherpaOnnxOfflineTransducerModelConfig; | ||
| 667 | +const OfflineModelConfig = SherpaOnnxOfflineModelConfig; | ||
| 668 | +const OfflineRecognizerConfig = SherpaOnnxOfflineRecognizerConfig; | ||
| 669 | +const OfflineParaformerModelConfig = SherpaOnnxOfflineParaformerModelConfig; | ||
| 670 | +const OfflineWhisperModelConfig = SherpaOnnxOfflineWhisperModelConfig; | ||
| 671 | +const OfflineNemoEncDecCtcModelConfig = | ||
| 672 | + SherpaOnnxOfflineNemoEncDecCtcModelConfig; | ||
| 673 | +const OfflineTdnnModelConfig = SherpaOnnxOfflineTdnnModelConfig; | ||
| 674 | + | ||
| 675 | +// vad | ||
| 676 | +const SileroVadModelConfig = SherpaOnnxSileroVadModelConfig; | ||
| 677 | +const VadModelConfig = SherpaOnnxVadModelConfig; | ||
| 678 | + | ||
| 679 | +// tts | ||
| 680 | +const OfflineTtsVitsModelConfig = SherpaOnnxOfflineTtsVitsModelConfig; | ||
| 681 | +const OfflineTtsModelConfig = SherpaOnnxOfflineTtsModelConfig; | ||
| 682 | +const OfflineTtsConfig = SherpaOnnxOfflineTtsConfig; | ||
| 683 | + | ||
| 684 | +module.exports = { | ||
| 685 | + // online asr | ||
| 686 | + OnlineTransducerModelConfig, | ||
| 687 | + OnlineModelConfig, | ||
| 688 | + FeatureConfig, | ||
| 689 | + OnlineRecognizerConfig, | ||
| 690 | + OnlineRecognizer, | ||
| 691 | + OnlineStream, | ||
| 692 | + OnlineParaformerModelConfig, | ||
| 693 | + | ||
| 694 | + // offline asr | ||
| 695 | + OfflineRecognizer, | ||
| 696 | + OfflineStream, | ||
| 697 | + OfflineTransducerModelConfig, | ||
| 698 | + OfflineModelConfig, | ||
| 699 | + OfflineRecognizerConfig, | ||
| 700 | + OfflineParaformerModelConfig, | ||
| 701 | + OfflineWhisperModelConfig, | ||
| 702 | + OfflineNemoEncDecCtcModelConfig, | ||
| 703 | + OfflineTdnnModelConfig, | ||
| 704 | + // vad | ||
| 705 | + SileroVadModelConfig, | ||
| 706 | + VadModelConfig, | ||
| 707 | + CircularBuffer, | ||
| 708 | + VoiceActivityDetector, | ||
| 709 | + // tts | ||
| 710 | + OfflineTtsVitsModelConfig, | ||
| 711 | + OfflineTtsModelConfig, | ||
| 712 | + OfflineTtsConfig, | ||
| 713 | + OfflineTts, | ||
| 714 | + | ||
| 715 | + // | ||
| 716 | + Display, | ||
| 717 | +}; |
scripts/nodejs/package.json
0 → 100644
| 1 | +{ | ||
| 2 | + "name": "sherpa-onnx2", | ||
| 3 | + "version": "1.8.10", | ||
| 4 | + "description": "Real-time speech recognition with Next-gen Kaldi", | ||
| 5 | + "main": "index.js", | ||
| 6 | + "scripts": { | ||
| 7 | + "test": "echo \"Error: no test specified\" && exit 1" | ||
| 8 | + }, | ||
| 9 | + "repository": { | ||
| 10 | + "type": "git", | ||
| 11 | + "url": "git+https://github.com/k2-fsa/sherpa-onnx.git" | ||
| 12 | + }, | ||
| 13 | + "keywords": [ | ||
| 14 | + "speech-to-text", | ||
| 15 | + "text-to-speech", | ||
| 16 | + "real-time speech recognition", | ||
| 17 | + "without internet connect", | ||
| 18 | + "embedded systems", | ||
| 19 | + "open source", | ||
| 20 | + "zipformer", | ||
| 21 | + "asr", | ||
| 22 | + "speech" | ||
| 23 | + ], | ||
| 24 | + "author": "The next-gen Kaldi team", | ||
| 25 | + "license": "Apache-2.0", | ||
| 26 | + "bugs": { | ||
| 27 | + "url": "https://github.com/k2-fsa/sherpa-onnx/issues" | ||
| 28 | + }, | ||
| 29 | + "homepage": "https://github.com/k2-fsa/sherpa-onnx#readme", | ||
| 30 | + "dependencies": { | ||
| 31 | + "ffi-napi": "^4.0.3", | ||
| 32 | + "npm": "^6.14.18", | ||
| 33 | + "ref-array-napi": "^1.2.2", | ||
| 34 | + "ref-napi": "^3.0.3", | ||
| 35 | + "ref-struct-napi": "^1.1.1" | ||
| 36 | + } | ||
| 37 | +} |
scripts/nodejs/package.json.in
0 → 100644
| 1 | +{ | ||
| 2 | + "name": "sherpa-onnx", | ||
| 3 | + "version": "SHERPA_ONNX_VERSION", | ||
| 4 | + "description": "Real-time speech recognition with Next-gen Kaldi", | ||
| 5 | + "main": "index.js", | ||
| 6 | + "scripts": { | ||
| 7 | + "test": "echo \"Error: no test specified\" && exit 1" | ||
| 8 | + }, | ||
| 9 | + "repository": { | ||
| 10 | + "type": "git", | ||
| 11 | + "url": "git+https://github.com/k2-fsa/sherpa-onnx.git" | ||
| 12 | + }, | ||
| 13 | + "keywords": [ | ||
| 14 | + "speech to text", | ||
| 15 | + "text to speech", | ||
| 16 | + "transcription", | ||
| 17 | + "real-time speech recognition", | ||
| 18 | + "without internet connect", | ||
| 19 | + "embedded systems", | ||
| 20 | + "open source", | ||
| 21 | + "zipformer", | ||
| 22 | + "asr", | ||
| 23 | + "tts", | ||
| 24 | + "stt", | ||
| 25 | + "c++", | ||
| 26 | + "onnxruntime", | ||
| 27 | + "onnx", | ||
| 28 | + "ai", | ||
| 29 | + "next-gen kaldi", | ||
| 30 | + "offline", | ||
| 31 | + "privacy", | ||
| 32 | + "open source", | ||
| 33 | + "streaming speech recognition", | ||
| 34 | + "speech", | ||
| 35 | + "recognition" | ||
| 36 | + ], | ||
| 37 | + "author": "The next-gen Kaldi team", | ||
| 38 | + "license": "Apache-2.0", | ||
| 39 | + "bugs": { | ||
| 40 | + "url": "https://github.com/k2-fsa/sherpa-onnx/issues" | ||
| 41 | + }, | ||
| 42 | + "homepage": "https://github.com/k2-fsa/sherpa-onnx#readme", | ||
| 43 | + "dependencies": { | ||
| 44 | + "ffi-napi": "^4.0.3", | ||
| 45 | + "npm": "^6.14.18", | ||
| 46 | + "ref-array-napi": "^1.2.2", | ||
| 47 | + "ref-napi": "^3.0.3", | ||
| 48 | + "ref-struct-napi": "^1.1.1" | ||
| 49 | + } | ||
| 50 | +} |
scripts/nodejs/run.sh
0 → 100755
| 1 | +#!/usr/bin/env bash | ||
| 2 | +set -ex | ||
| 3 | + | ||
| 4 | +SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd ) | ||
| 5 | +SHERPA_ONNX_DIR=$(realpath $SCRIPT_DIR/../..) | ||
| 6 | +echo "SCRIPT_DIR: $SCRIPT_DIR" | ||
| 7 | +echo "SHERPA_ONNX_DIR: $SHERPA_ONNX_DIR" | ||
| 8 | + | ||
| 9 | +SHERPA_ONNX_VERSION=$(grep "SHERPA_ONNX_VERSION" $SHERPA_ONNX_DIR/CMakeLists.txt | cut -d " " -f 2 | cut -d '"' -f 2) | ||
| 10 | + | ||
| 11 | +echo "SHERPA_ONNX_VERSION $SHERPA_ONNX_VERSION" | ||
| 12 | +sed -i.bak s/SHERPA_ONNX_VERSION/$SHERPA_ONNX_VERSION/g ./package.json.in | ||
| 13 | + | ||
| 14 | +cp package.json.in package.json | ||
| 15 | +rm package.json.in | ||
| 16 | +rm package.json.in.bak | ||
| 17 | +rm .clang-format | ||
| 18 | + | ||
| 19 | +function windows_x64() { | ||
| 20 | + echo "Process Windows (x64)" | ||
| 21 | + mkdir -p lib/windows-x64 | ||
| 22 | + dst=$(realpath lib/windows-x64) | ||
| 23 | + mkdir t | ||
| 24 | + cd t | ||
| 25 | + wget -q https://huggingface.co/csukuangfj/sherpa-onnx-wheels/resolve/main/sherpa_onnx-${SHERPA_ONNX_VERSION}-cp38-cp38-win_amd64.whl | ||
| 26 | + unzip ./sherpa_onnx-${SHERPA_ONNX_VERSION}-cp38-cp38-win_amd64.whl | ||
| 27 | + | ||
| 28 | + cp -v sherpa_onnx-${SHERPA_ONNX_VERSION}.data/data/bin/*.dll $dst | ||
| 29 | + cp -v sherpa_onnx-${SHERPA_ONNX_VERSION}.data/data/bin/*.lib $dst | ||
| 30 | + rm -fv $dst/sherpa-onnx-portaudio.dll | ||
| 31 | + | ||
| 32 | + cd .. | ||
| 33 | + rm -rf t | ||
| 34 | +} | ||
| 35 | + | ||
| 36 | +function windows_x86() { | ||
| 37 | + echo "Process Windows (x86)" | ||
| 38 | + mkdir -p lib/windows-x86 | ||
| 39 | + dst=$(realpath lib/windows-x86) | ||
| 40 | + mkdir t | ||
| 41 | + cd t | ||
| 42 | + wget -q https://huggingface.co/csukuangfj/sherpa-onnx-wheels/resolve/main/sherpa_onnx-${SHERPA_ONNX_VERSION}-cp38-cp38-win32.whl | ||
| 43 | + unzip ./sherpa_onnx-${SHERPA_ONNX_VERSION}-cp38-cp38-win32.whl | ||
| 44 | + | ||
| 45 | + cp -v sherpa_onnx-${SHERPA_ONNX_VERSION}.data/data/bin/*.dll $dst | ||
| 46 | + cp -v sherpa_onnx-${SHERPA_ONNX_VERSION}.data/data/bin/*.lib $dst | ||
| 47 | + rm -fv $dst/sherpa-onnx-portaudio.dll | ||
| 48 | + | ||
| 49 | + cd .. | ||
| 50 | + rm -rf t | ||
| 51 | +} | ||
| 52 | + | ||
| 53 | +function linux_x64() { | ||
| 54 | + echo "Process Linux (x64)" | ||
| 55 | + mkdir -p lib/linux-x64 | ||
| 56 | + dst=$(realpath lib/linux-x64) | ||
| 57 | + mkdir t | ||
| 58 | + cd t | ||
| 59 | + wget -q https://huggingface.co/csukuangfj/sherpa-onnx-wheels/resolve/main/sherpa_onnx-${SHERPA_ONNX_VERSION}-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl | ||
| 60 | + unzip ./sherpa_onnx-${SHERPA_ONNX_VERSION}-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl | ||
| 61 | + | ||
| 62 | + cp -v sherpa_onnx/lib/*.so* $dst | ||
| 63 | + rm -v $dst/libcargs.so | ||
| 64 | + rm -v $dst/libsherpa-onnx-portaudio.so | ||
| 65 | + rm -v $dst/libsherpa-onnx-fst.so | ||
| 66 | + rm -v $dst/libonnxruntime.so | ||
| 67 | + | ||
| 68 | + cd .. | ||
| 69 | + rm -rf t | ||
| 70 | +} | ||
| 71 | + | ||
| 72 | +function osx_x64() { | ||
| 73 | + echo "Process osx-x64" | ||
| 74 | + mkdir -p lib/osx-x64 | ||
| 75 | + dst=$(realpath lib/osx-x64) | ||
| 76 | + mkdir t | ||
| 77 | + cd t | ||
| 78 | + wget -q https://huggingface.co/csukuangfj/sherpa-onnx-wheels/resolve/main/sherpa_onnx-${SHERPA_ONNX_VERSION}-cp38-cp38-macosx_10_14_x86_64.whl | ||
| 79 | + unzip ./sherpa_onnx-${SHERPA_ONNX_VERSION}-cp38-cp38-macosx_10_14_x86_64.whl | ||
| 80 | + | ||
| 81 | + cp -v sherpa_onnx/lib/*.dylib $dst/ | ||
| 82 | + rm -v $dst/libonnxruntime.dylib | ||
| 83 | + rm -v $dst/libcargs.dylib | ||
| 84 | + rm -v $dst/libsherpa-onnx-fst.dylib | ||
| 85 | + rm -v $dst/libsherpa-onnx-portaudio.dylib | ||
| 86 | + | ||
| 87 | + cd .. | ||
| 88 | + rm -rf t | ||
| 89 | +} | ||
| 90 | + | ||
| 91 | +function osx_arm64() { | ||
| 92 | + echo "Process osx-arm64" | ||
| 93 | + mkdir -p lib/osx-arm64 | ||
| 94 | + dst=$(realpath lib/osx-arm64) | ||
| 95 | + mkdir t | ||
| 96 | + cd t | ||
| 97 | + wget -q https://huggingface.co/csukuangfj/sherpa-onnx-wheels/resolve/main/sherpa_onnx-${SHERPA_ONNX_VERSION}-cp38-cp38-macosx_11_0_arm64.whl | ||
| 98 | + unzip ./sherpa_onnx-${SHERPA_ONNX_VERSION}-cp38-cp38-macosx_11_0_arm64.whl | ||
| 99 | + | ||
| 100 | + cp -v sherpa_onnx/lib/*.dylib $dst/ | ||
| 101 | + rm -v $dst/libonnxruntime.dylib | ||
| 102 | + rm -v $dst/libcargs.dylib | ||
| 103 | + rm -v $dst/libsherpa-onnx-fst.dylib | ||
| 104 | + rm -v $dst/libsherpa-onnx-portaudio.dylib | ||
| 105 | + | ||
| 106 | + cd .. | ||
| 107 | + rm -rf t | ||
| 108 | +} | ||
| 109 | + | ||
| 110 | +windows_x64 | ||
| 111 | +ls -lh lib/windows-x64 | ||
| 112 | + | ||
| 113 | +windows_x86 | ||
| 114 | +ls -lh lib/windows-x86 | ||
| 115 | + | ||
| 116 | +linux_x64 | ||
| 117 | +ls -lh lib/linux-x64 | ||
| 118 | + | ||
| 119 | +osx_x64 | ||
| 120 | +ls -lh lib/osx-x64 | ||
| 121 | + | ||
| 122 | +osx_arm64 | ||
| 123 | +ls -lh lib/osx-arm64 |
| @@ -438,6 +438,10 @@ int32_t SherpaOnnxCircularBufferSize(SherpaOnnxCircularBuffer *buffer) { | @@ -438,6 +438,10 @@ int32_t SherpaOnnxCircularBufferSize(SherpaOnnxCircularBuffer *buffer) { | ||
| 438 | return buffer->impl->Size(); | 438 | return buffer->impl->Size(); |
| 439 | } | 439 | } |
| 440 | 440 | ||
| 441 | +int32_t SherpaOnnxCircularBufferHead(SherpaOnnxCircularBuffer *buffer) { | ||
| 442 | + return buffer->impl->Head(); | ||
| 443 | +} | ||
| 444 | + | ||
| 441 | void SherpaOnnxCircularBufferReset(SherpaOnnxCircularBuffer *buffer) { | 445 | void SherpaOnnxCircularBufferReset(SherpaOnnxCircularBuffer *buffer) { |
| 442 | buffer->impl->Reset(); | 446 | buffer->impl->Reset(); |
| 443 | } | 447 | } |
| @@ -553,6 +557,7 @@ SherpaOnnxOfflineTts *SherpaOnnxCreateOfflineTts( | @@ -553,6 +557,7 @@ SherpaOnnxOfflineTts *SherpaOnnxCreateOfflineTts( | ||
| 553 | tts_config.model.num_threads = SHERPA_ONNX_OR(config->model.num_threads, 1); | 557 | tts_config.model.num_threads = SHERPA_ONNX_OR(config->model.num_threads, 1); |
| 554 | tts_config.model.debug = config->model.debug; | 558 | tts_config.model.debug = config->model.debug; |
| 555 | tts_config.model.provider = SHERPA_ONNX_OR(config->model.provider, "cpu"); | 559 | tts_config.model.provider = SHERPA_ONNX_OR(config->model.provider, "cpu"); |
| 560 | + tts_config.rule_fsts = SHERPA_ONNX_OR(config->rule_fsts, ""); | ||
| 556 | 561 | ||
| 557 | if (tts_config.model.debug) { | 562 | if (tts_config.model.debug) { |
| 558 | fprintf(stderr, "%s\n", tts_config.ToString().c_str()); | 563 | fprintf(stderr, "%s\n", tts_config.ToString().c_str()); |
| @@ -130,10 +130,10 @@ SHERPA_ONNX_API typedef struct SherpaOnnxOnlineRecognizerResult { | @@ -130,10 +130,10 @@ SHERPA_ONNX_API typedef struct SherpaOnnxOnlineRecognizerResult { | ||
| 130 | const char *text; | 130 | const char *text; |
| 131 | 131 | ||
| 132 | // Pointer to continuous memory which holds string based tokens | 132 | // Pointer to continuous memory which holds string based tokens |
| 133 | - // which are seperated by \0 | 133 | + // which are separated by \0 |
| 134 | const char *tokens; | 134 | const char *tokens; |
| 135 | 135 | ||
| 136 | - // a pointer array contains the address of the first item in tokens | 136 | + // a pointer array containing the address of the first item in tokens |
| 137 | const char *const *tokens_arr; | 137 | const char *const *tokens_arr; |
| 138 | 138 | ||
| 139 | // Pointer to continuous memory which holds timestamps | 139 | // Pointer to continuous memory which holds timestamps |
| @@ -532,6 +532,11 @@ SHERPA_ONNX_API void SherpaOnnxCircularBufferPop( | @@ -532,6 +532,11 @@ SHERPA_ONNX_API void SherpaOnnxCircularBufferPop( | ||
| 532 | SHERPA_ONNX_API int32_t | 532 | SHERPA_ONNX_API int32_t |
| 533 | SherpaOnnxCircularBufferSize(SherpaOnnxCircularBuffer *buffer); | 533 | SherpaOnnxCircularBufferSize(SherpaOnnxCircularBuffer *buffer); |
| 534 | 534 | ||
| 535 | +// Return the head of the buffer. It's always non-decreasing until you | ||
| 536 | +// invoke SherpaOnnxCircularBufferReset() which resets head to 0. | ||
| 537 | +SHERPA_ONNX_API int32_t | ||
| 538 | +SherpaOnnxCircularBufferHead(SherpaOnnxCircularBuffer *buffer); | ||
| 539 | + | ||
| 535 | // Clear all elements in the buffer | 540 | // Clear all elements in the buffer |
| 536 | SHERPA_ONNX_API void SherpaOnnxCircularBufferReset( | 541 | SHERPA_ONNX_API void SherpaOnnxCircularBufferReset( |
| 537 | SherpaOnnxCircularBuffer *buffer); | 542 | SherpaOnnxCircularBuffer *buffer); |
| @@ -617,6 +622,7 @@ SHERPA_ONNX_API typedef struct SherpaOnnxOfflineTtsModelConfig { | @@ -617,6 +622,7 @@ SHERPA_ONNX_API typedef struct SherpaOnnxOfflineTtsModelConfig { | ||
| 617 | 622 | ||
| 618 | SHERPA_ONNX_API typedef struct SherpaOnnxOfflineTtsConfig { | 623 | SHERPA_ONNX_API typedef struct SherpaOnnxOfflineTtsConfig { |
| 619 | SherpaOnnxOfflineTtsModelConfig model; | 624 | SherpaOnnxOfflineTtsModelConfig model; |
| 625 | + const char *rule_fsts; | ||
| 620 | } SherpaOnnxOfflineTtsConfig; | 626 | } SherpaOnnxOfflineTtsConfig; |
| 621 | 627 | ||
| 622 | SHERPA_ONNX_API typedef struct SherpaOnnxGeneratedAudio { | 628 | SHERPA_ONNX_API typedef struct SherpaOnnxGeneratedAudio { |
| @@ -457,7 +457,7 @@ class OnlineRecognizerParaformerImpl : public OnlineRecognizerImpl { | @@ -457,7 +457,7 @@ class OnlineRecognizerParaformerImpl : public OnlineRecognizerImpl { | ||
| 457 | // (61 - 7) / 6 + 1 = 10 | 457 | // (61 - 7) / 6 + 1 = 10 |
| 458 | 458 | ||
| 459 | int32_t left_chunk_size_ = 5; | 459 | int32_t left_chunk_size_ = 5; |
| 460 | - int32_t right_chunk_size_ = 5; | 460 | + int32_t right_chunk_size_ = 2; |
| 461 | }; | 461 | }; |
| 462 | 462 | ||
| 463 | } // namespace sherpa_onnx | 463 | } // namespace sherpa_onnx |
-
请 注册 或 登录 后发表评论