Fangjun Kuang
Committed by GitHub

support nodejs (#438)

  1 +#!/usr/bin/env bash
  2 +
  3 +set -ex
  4 +
  5 +echo "dir: $d"
  6 +cd $d
  7 +npm install
  8 +git status
  9 +ls -lh
  10 +ls -lh node_modules
  11 +
  12 +# offline asr
  13 +
  14 +wget -q https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-nemo-ctc-en-conformer-small.tar.bz2
  15 +tar xvf sherpa-onnx-nemo-ctc-en-conformer-small.tar.bz2
  16 +rm sherpa-onnx-nemo-ctc-en-conformer-small.tar.bz2
  17 +node ./test-offline-nemo-ctc.js
  18 +rm -rf sherpa-onnx-nemo-ctc-en-conformer-small
  19 +
  20 +wget -q https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-paraformer-zh-2023-03-28.tar.bz2
  21 +tar xvf sherpa-onnx-paraformer-zh-2023-03-28.tar.bz2
  22 +rm sherpa-onnx-paraformer-zh-2023-03-28.tar.bz2
  23 +node ./test-offline-paraformer.js
  24 +rm -rf sherpa-onnx-paraformer-zh-2023-03-28
  25 +
  26 +wget -q https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-zipformer-en-2023-06-26.tar.bz2
  27 +tar xvf sherpa-onnx-zipformer-en-2023-06-26.tar.bz2
  28 +rm sherpa-onnx-zipformer-en-2023-06-26.tar.bz2
  29 +node ./test-offline-transducer.js
  30 +rm -rf sherpa-onnx-zipformer-en-2023-06-26
  31 +
  32 +wget -q https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-whisper-tiny.en.tar.bz2
  33 +tar xvf sherpa-onnx-whisper-tiny.en.tar.bz2
  34 +rm sherpa-onnx-whisper-tiny.en.tar.bz2
  35 +node ./test-offline-whisper.js
  36 +rm -rf sherpa-onnx-whisper-tiny.en
  37 +
  38 +# online asr
  39 +wget -q https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-paraformer-bilingual-zh-en.tar.bz2
  40 +tar xvf sherpa-onnx-streaming-paraformer-bilingual-zh-en.tar.bz2
  41 +rm sherpa-onnx-streaming-paraformer-bilingual-zh-en.tar.bz2
  42 +node ./test-online-paraformer.js
  43 +rm -rf sherpa-onnx-streaming-paraformer-bilingual-zh-en
  44 +
  45 +wget -q https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2
  46 +tar xvf sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2
  47 +rm sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2
  48 +node ./test-online-transducer.js
  49 +rm -rf sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20
  50 +
  51 +# offline tts
  52 +wget -q https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-vctk.tar.bz2
  53 +tar xvf vits-vctk.tar.bz2
  54 +rm vits-vctk.tar.bz2
  55 +node ./test-offline-tts-en.js
  56 +rm -rf vits-vctk
  57 +
  58 +wget -q https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-zh-aishell3.tar.bz2
  59 +tar xvf vits-zh-aishell3.tar.bz2
  60 +rm vits-zh-aishell3.tar.bz2
  61 +node ./test-offline-tts-zh.js
  62 +rm -rf vits-zh-aishell3
@@ -4,6 +4,7 @@ on: @@ -4,6 +4,7 @@ on:
4 push: 4 push:
5 branches: 5 branches:
6 - dot-net 6 - dot-net
  7 + - fix-dot-net
7 tags: 8 tags:
8 - '*' 9 - '*'
9 10
  1 +name: npm
  2 +
  3 +on:
  4 + workflow_dispatch:
  5 +
  6 +concurrency:
  7 + group: npm-${{ github.ref }}
  8 + cancel-in-progress: true
  9 +
  10 +permissions:
  11 + contents: read
  12 +
  13 +jobs:
  14 + nodejs:
  15 + runs-on: ${{ matrix.os }}
  16 + strategy:
  17 + fail-fast: false
  18 + matrix:
  19 + os: [ubuntu-latest]
  20 + python-version: ["3.8"]
  21 +
  22 + steps:
  23 + - uses: actions/checkout@v2
  24 + with:
  25 + fetch-depth: 0
  26 +
  27 + - name: Setup Python ${{ matrix.python-version }}
  28 + uses: actions/setup-python@v2
  29 + with:
  30 + python-version: ${{ matrix.python-version }}
  31 +
  32 + - uses: actions/setup-node@v3
  33 + with:
  34 + node-version: 13
  35 + registry-url: 'https://registry.npmjs.org'
  36 +
  37 + - name: Display node version
  38 + shell: bash
  39 + run: |
  40 + node --version
  41 + npm --version
  42 + cd nodejs-examples
  43 +
  44 + npm install npm@6.14.4 -g
  45 + npm install npm@6.14.4
  46 + npm --version
  47 +
  48 + - name: Build nodejs package
  49 + shell: bash
  50 + env:
  51 + NODE_AUTH_TOKEN: ${{ secrets.NPM_TOKEN }}
  52 + run: |
  53 + cd scripts/nodejs
  54 + ./run.sh
  55 + npm install
  56 + rm run.sh
  57 + npm ci
  58 + npm publish --provenance --access public
  1 +name: test-nodejs-npm
  2 +
  3 +on:
  4 + workflow_dispatch:
  5 +
  6 + schedule:
  7 + # minute (0-59)
  8 + # hour (0-23)
  9 + # day of the month (1-31)
  10 + # month (1-12)
  11 + # day of the week (0-6)
  12 + # nightly build at 23:50 UTC time every day
  13 + - cron: "50 23 * * *"
  14 +
  15 +concurrency:
  16 + group: test-nodejs-npm-${{ github.ref }}
  17 + cancel-in-progress: true
  18 +
  19 +permissions:
  20 + contents: read
  21 +
  22 +jobs:
  23 + test-nodejs-npm:
  24 + runs-on: ${{ matrix.os }}
  25 + strategy:
  26 + fail-fast: false
  27 + matrix:
  28 + os: [ubuntu-latest, macos-latest] #, windows-latest]
  29 + python-version: ["3.8"]
  30 +
  31 + steps:
  32 + - uses: actions/checkout@v4
  33 + with:
  34 + fetch-depth: 0
  35 +
  36 + - name: Setup Python ${{ matrix.python-version }}
  37 + uses: actions/setup-python@v2
  38 + with:
  39 + python-version: ${{ matrix.python-version }}
  40 +
  41 + - uses: actions/setup-node@v3
  42 + with:
  43 + node-version: 13
  44 + registry-url: 'https://registry.npmjs.org'
  45 +
  46 + - name: Display node version
  47 + shell: bash
  48 + run: |
  49 + node --version
  50 + npm --version
  51 +
  52 + - name: Run tests
  53 + shell: bash
  54 + run: |
  55 + node --version
  56 + npm --version
  57 +
  58 + export d=nodejs-examples
  59 + ./.github/scripts/test-nodejs-npm.sh
  1 +name: test-nodejs
  2 +
  3 +on:
  4 + push:
  5 + branches:
  6 + - master
  7 +
  8 + pull_request:
  9 + branches:
  10 + - master
  11 +
  12 + workflow_dispatch:
  13 +
  14 +concurrency:
  15 + group: test-nodejs-${{ github.ref }}
  16 + cancel-in-progress: true
  17 +
  18 +permissions:
  19 + contents: read
  20 +
  21 +jobs:
  22 + test-nodejs:
  23 + runs-on: ${{ matrix.os }}
  24 + strategy:
  25 + fail-fast: false
  26 + matrix:
  27 + os: [ubuntu-latest, macos-latest] #, windows-latest]
  28 + python-version: ["3.8"]
  29 +
  30 + steps:
  31 + - uses: actions/checkout@v4
  32 + with:
  33 + fetch-depth: 0
  34 +
  35 + - name: ccache
  36 + uses: hendrikmuhs/ccache-action@v1.2
  37 + with:
  38 + key: ${{ matrix.os }}-Release-ON
  39 +
  40 + - name: Configure CMake
  41 + shell: bash
  42 + run: |
  43 + export CMAKE_CXX_COMPILER_LAUNCHER=ccache
  44 + export PATH="/usr/lib/ccache:/usr/local/opt/ccache/libexec:$PATH"
  45 + cmake --version
  46 +
  47 + mkdir build
  48 + cd build
  49 + cmake -D CMAKE_BUILD_TYPE=Release -D BUILD_SHARED_LIBS=ON -DCMAKE_INSTALL_PREFIX=./install ..
  50 + make -j2
  51 + make install
  52 + ls -lh install/lib
  53 +
  54 + - name: Setup Python ${{ matrix.python-version }}
  55 + uses: actions/setup-python@v2
  56 + with:
  57 + python-version: ${{ matrix.python-version }}
  58 +
  59 + - name: Copy files
  60 + shell: bash
  61 + run: |
  62 + os=${{ matrix.os }}
  63 + if [[ $os == 'ubuntu-latest' ]]; then
  64 + mkdir -p scripts/nodejs/lib/linux-x64
  65 + dst=scripts/nodejs/lib/linux-x64
  66 + elif [[ $os == 'macos-latest' ]]; then
  67 + mkdir -p scripts/nodejs/lib/osx-x64
  68 + dst=scripts/nodejs/lib/osx-x64
  69 + fi
  70 + cp -v build/install/lib/* $dst/
  71 +
  72 + - name: replace files
  73 + shell: bash
  74 + run: |
  75 + cd nodejs-examples
  76 + files=$(ls -1 *.js)
  77 + for f in ${files[@]}; do
  78 + echo $f
  79 + sed -i.bak s%\'sherpa-onnx\'%\'./index.js\'% $f
  80 + git status
  81 + done
  82 + git diff
  83 + cp *.js ../scripts/nodejs
  84 +
  85 + - uses: actions/setup-node@v3
  86 + with:
  87 + node-version: 13
  88 + registry-url: 'https://registry.npmjs.org'
  89 +
  90 + - name: Display node version
  91 + shell: bash
  92 + run: |
  93 + node --version
  94 + npm --version
  95 +
  96 + - name: Run tests
  97 + shell: bash
  98 + run: |
  99 + node --version
  100 + npm --version
  101 + export d=scripts/nodejs
  102 +
  103 + pushd $d
  104 + npm install
  105 + npm install wav
  106 + popd
  107 +
  108 + ./.github/scripts/test-nodejs-npm.sh
@@ -67,3 +67,6 @@ swift-api-examples/k2fsa-* @@ -67,3 +67,6 @@ swift-api-examples/k2fsa-*
67 run-*.sh 67 run-*.sh
68 two-pass-*.sh 68 two-pass-*.sh
69 build-* 69 build-*
  70 +vits-vctk
  71 +vits-zh-aishell3
  72 +jslint.mjs
1 cmake_minimum_required(VERSION 3.13 FATAL_ERROR) 1 cmake_minimum_required(VERSION 3.13 FATAL_ERROR)
2 project(sherpa-onnx) 2 project(sherpa-onnx)
3 3
4 -set(SHERPA_ONNX_VERSION "1.8.10") 4 +set(SHERPA_ONNX_VERSION "1.8.11")
5 5
6 # Disable warning about 6 # Disable warning about
7 # 7 #
  1 +node_modules
  2 +package-lock.json
  1 +# Introduction
  2 +
  3 +This directory contains nodejs examples for [sherpa-onnx](https://github.com/k2-fsa/sherpa-onnx).
  4 +
  5 +Before you continue, please first install the npm package `sherpa-onnx` by
  6 +
  7 +```bash
  8 +npm install sherpa-onnx
  9 +```
  10 +
  11 +In the following, we describe how to use [sherpa-onnx](https://github.com/k2-fsa/sherpa-onnx)
  12 +for text-to-speech and speech-to-text.
  13 +
  14 +**Caution**: If you get the following error:
  15 +```
  16 +/Users/fangjun/open-source/sherpa-onnx/nodejs-examples/node_modules/ffi-napi/lib/dynamic_library.js:67
  17 + if (match = err.match(/^(([^ \t()])+\.so([^ \t:()])*):([ \t])*/)) {
  18 + ^
  19 +
  20 +TypeError: Cannot read properties of null (reading 'match')
  21 + at new DynamicLibrary (/Users/fangjun/open-source/sherpa-onnx/nodejs-examples/node_modules/ffi-napi/lib/dynamic_library.js:67:21)
  22 + at Object.Library (/Users/fangjun/open-source/sherpa-onnx/nodejs-examples/node_modules/ffi-napi/lib/library.js:47:10)
  23 + at Object.<anonymous> (/Users/fangjun/open-source/sherpa-onnx/nodejs-examples/node_modules/sherpa-onnx3/index.js:268:28)
  24 + at Module._compile (node:internal/modules/cjs/loader:1376:14)
  25 + at Module._extensions..js (node:internal/modules/cjs/loader:1435:10)
  26 + at Module.load (node:internal/modules/cjs/loader:1207:32)
  27 + at Module._load (node:internal/modules/cjs/loader:1023:12)
  28 + at Module.require (node:internal/modules/cjs/loader:1235:19)
  29 + at require (node:internal/modules/helpers:176:18)
  30 + at Object.<anonymous> (/Users/fangjun/open-source/sherpa-onnx/nodejs-examples/test-offline-tts-zh.js:3:21)
  31 +```
  32 +
  33 +Please downgrade your node to version v13.14.0. See also
  34 +https://github.com/node-ffi-napi/node-ffi-napi/issues/244
  35 +and
  36 +https://github.com/node-ffi-napi/node-ffi-napi/issues/97 .
  37 +
  38 +# Text-to-speech
  39 +
  40 +In the following, we demonstrate how to run text-to-speech.
  41 +
  42 +## ./test-offline-tts-en.js
  43 +
  44 +[./test-offline-tts-en.js](./test-offline-tts-en.js) shows how to use
  45 +a VITS pretrained model
  46 +[VCTK](https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/vits.html#vctk-english-multi-speaker-109-speakers)
  47 +for text-to-speech.
  48 +
  49 +You can use the following command to run it:
  50 +
  51 +```bash
  52 +wget -q https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-vctk.tar.bz2
  53 +tar xvf vits-vctk.tar.bz2
  54 +node ./test-offline-tts-en.js
  55 +```
  56 +
  57 +## ./test-offline-tts-zh.js
  58 +
  59 +[./test-offline-tts-zh.js](./test-offline-tts-zh.js) shows how to use
  60 +a VITS pretrained model
  61 +[aishell3](https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/vits.html#vits-model-aishell3)
  62 +for text-to-speech.
  63 +
  64 +You can use the following command to run it:
  65 +
  66 +```bash
  67 +wget -q https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-zh-aishell3.tar.bz2
  68 +tar xvf vits-zh-aishell3.tar.bz2
  69 +node ./test-offline-tts-zh.js
  70 +```
  71 +
  72 +# Speech-to-text
  73 +
  74 +In the following, we demonstrate how to decode files and how to perform
  75 +speech recognition with a microphone with `nodejs`. We need to install two additional
  76 +npm packages:
  77 +
  78 +
  79 +```bash
  80 +npm install wav naudiodon2
  81 +```
  82 +
  83 +## ./test-offline-nemo-ctc.js
  84 +
  85 +[./test-offline-nemo-ctc.js](./test-offline-nemo-ctc.js) demonstrates
  86 +how to decode a file with a NeMo CTC model. In the code we use
  87 +[stt_en_conformer_ctc_small](https://k2-fsa.github.io/sherpa/onnx/pretrained_models/offline-ctc/nemo/english.html#stt-en-conformer-ctc-small).
  88 +
  89 +You can use the following command run it:
  90 +
  91 +```bash
  92 +wget -q https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-nemo-ctc-en-conformer-small.tar.bz2
  93 +tar xvf sherpa-onnx-nemo-ctc-en-conformer-small.tar.bz2
  94 +node ./test-offline-nemo-ctc.js
  95 +```
  96 +
  97 +## ./test-offline-paraformer.js
  98 +
  99 +[./test-offline-paraformer.js](./test-offline-paraformer.js) demonstrates
  100 +how to decode a file with a non-streaming Paraformer model. In the code we use
  101 +[sherpa-onnx-paraformer-zh-2023-03-28](https://k2-fsa.github.io/sherpa/onnx/pretrained_models/offline-paraformer/paraformer-models.html#csukuangfj-sherpa-onnx-paraformer-zh-2023-03-28-chinese).
  102 +
  103 +You can use the following command run it:
  104 +
  105 +```bash
  106 +wget -q https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-paraformer-zh-2023-03-28.tar.bz2
  107 +tar xvf sherpa-onnx-paraformer-zh-2023-03-28.tar.bz2
  108 +node ./test-offline-paraformer.js
  109 +```
  110 +
  111 +## ./test-offline-transducer.js
  112 +
  113 +[./test-offline-transducer.js](./test-offline-transducer.js) demonstrates
  114 +how to decode a file with a non-streaming transducer model. In the code we use
  115 +[sherpa-onnx-zipformer-en-2023-06-26](https://k2-fsa.github.io/sherpa/onnx/pretrained_models/offline-transducer/zipformer-transducer-models.html#csukuangfj-sherpa-onnx-zipformer-en-2023-06-26-english).
  116 +
  117 +You can use the following command run it:
  118 +
  119 +```bash
  120 +wget -q https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-zipformer-en-2023-06-26.tar.bz2
  121 +tar xvf sherpa-onnx-zipformer-en-2023-06-26.tar.bz2
  122 +node ./test-offline-transducer.js
  123 +```
  124 +
  125 +## ./test-offline-whisper.js
  126 +[./test-offline-whisper.js](./test-offline-whisper.js) demonstrates
  127 +how to decode a file with a Whisper model. In the code we use
  128 +[sherpa-onnx-whisper-tiny.en](https://k2-fsa.github.io/sherpa/onnx/pretrained_models/whisper/tiny.en.html).
  129 +
  130 +You can use the following command run it:
  131 +
  132 +```bash
  133 +wget -q https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-whisper-tiny.en.tar.bz2
  134 +tar xvf sherpa-onnx-whisper-tiny.en.tar.bz2
  135 +node ./test-offline-whisper.js
  136 +```
  137 +
  138 +## ./test-online-paraformer-microphone.js
  139 +[./test-online-paraformer-microphone.js](./test-online-paraformer-microphone.js)
  140 +demonstrates how to do real-time speech recognition from microphone
  141 +with a streaming Paraformer model. In the code we use
  142 +[sherpa-onnx-streaming-paraformer-bilingual-zh-en](https://k2-fsa.github.io/sherpa/onnx/pretrained_models/online-paraformer/paraformer-models.html#csukuangfj-sherpa-onnx-streaming-paraformer-bilingual-zh-en-chinese-english).
  143 +
  144 +You can use the following command run it:
  145 +
  146 +```bash
  147 +wget -q https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-paraformer-bilingual-zh-en.tar.bz2
  148 +rm sherpa-onnx-streaming-paraformer-bilingual-zh-en.tar.bz2
  149 +node ./test-online-paraformer-microphone.js
  150 +```
  151 +
  152 +## ./test-online-paraformer.js
  153 +[./test-online-paraformer.js](./test-online-paraformer.js) demonstrates
  154 +how to decode a file using a streaming Paraformer model. In the code we use
  155 +[sherpa-onnx-streaming-paraformer-bilingual-zh-en](https://k2-fsa.github.io/sherpa/onnx/pretrained_models/online-paraformer/paraformer-models.html#csukuangfj-sherpa-onnx-streaming-paraformer-bilingual-zh-en-chinese-english).
  156 +
  157 +You can use the following command run it:
  158 +
  159 +```bash
  160 +wget -q https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-paraformer-bilingual-zh-en.tar.bz2
  161 +rm sherpa-onnx-streaming-paraformer-bilingual-zh-en.tar.bz2
  162 +node ./test-online-paraformer.js
  163 +```
  164 +
  165 +## ./test-online-transducer-microphone.js
  166 +[./test-online-transducer-microphone.js](./test-online-transducer-microphone.js)
  167 +demonstrates how to do real-time speech recognition with microphone using a streaming transducer model. In the code
  168 +we use [sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20](https://k2-fsa.github.io/sherpa/onnx/pretrained_models/online-transducer/zipformer-transducer-models.html#csukuangfj-sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20-bilingual-chinese-english).
  169 +
  170 +
  171 +You can use the following command run it:
  172 +
  173 +```bash
  174 +wget -q https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2
  175 +tar xvf sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2
  176 +node ./test-online-transducer-microphone.js
  177 +```
  178 +
  179 +## ./test-online-transducer.js
  180 +[./test-online-transducer.js](./test-online-transducer.js) demonstrates
  181 +how to decode a file using a streaming transducer model. In the code
  182 +we use [sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20](https://k2-fsa.github.io/sherpa/onnx/pretrained_models/online-transducer/zipformer-transducer-models.html#csukuangfj-sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20-bilingual-chinese-english).
  183 +
  184 +You can use the following command run it:
  185 +
  186 +```bash
  187 +wget -q https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2
  188 +tar xvf sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2
  189 +node ./test-online-transducer.js
  190 +```
  191 +
  192 +## ./test-vad-microphone-offline-paraformer.js
  193 +
  194 +[./test-vad-microphone-offline-paraformer.js](./test-vad-microphone-offline-paraformer.js)
  195 +demonstrates how to use [silero-vad](https://github.com/snakers4/silero-vad)
  196 +with non-streaming Paraformer for speech recognition from microphone.
  197 +
  198 +You can use the following command run it:
  199 +
  200 +```bash
  201 +wget -q https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx
  202 +wget -q https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-paraformer-zh-2023-03-28.tar.bz2
  203 +tar xvf sherpa-onnx-paraformer-zh-2023-03-28.tar.bz2
  204 +node ./test-vad-microphone-offline-paraformer.js
  205 +```
  206 +
  207 +## ./test-vad-microphone-offline-transducer.js
  208 +
  209 +[./test-vad-microphone-offline-transducer.js](./test-vad-microphone-offline-transducer.js)
  210 +demonstrates how to use [silero-vad](https://github.com/snakers4/silero-vad)
  211 +with a non-streaming transducer model for speech recognition from microphone.
  212 +
  213 +You can use the following command run it:
  214 +
  215 +```bash
  216 +wget -q https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx
  217 +wget -q https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-zipformer-en-2023-06-26.tar.bz2
  218 +tar xvf sherpa-onnx-zipformer-en-2023-06-26.tar.bz2
  219 +node ./test-vad-microphone-offline-transducer.js
  220 +```
  221 +
  222 +## ./test-vad-microphone-offline-whisper.js
  223 +
  224 +[./test-vad-microphone-offline-whisper.js](./test-vad-microphone-offline-whisper.js)
  225 +demonstrates how to use [silero-vad](https://github.com/snakers4/silero-vad)
  226 +with whisper for speech recognition from microphone.
  227 +
  228 +You can use the following command run it:
  229 +
  230 +```bash
  231 +wget -q https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx
  232 +wget -q https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-whisper-tiny.en.tar.bz2
  233 +tar xvf sherpa-onnx-whisper-tiny.en.tar.bz2
  234 +node ./test-vad-microphone-offline-whisper.js
  235 +```
  236 +
  237 +## ./test-vad-microphone.js
  238 +
  239 +[./test-vad-microphone.js](./test-vad-microphone.js)
  240 +demonstrates how to use [silero-vad](https://github.com/snakers4/silero-vad).
  241 +
  242 +You can use the following command run it:
  243 +
  244 +```bash
  245 +wget -q https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx
  246 +node ./test-vad-microphone.js
  247 +```
  1 +{
  2 + "dependencies": {
  3 + "naudiodon2": "^2.4.0",
  4 + "sherpa-onnx": "^1.8.11",
  5 + "wav": "^1.0.2"
  6 + }
  7 +}
  1 +// Copyright (c) 2023 Xiaomi Corporation (authors: Fangjun Kuang)
  2 +//
  3 +const fs = require('fs');
  4 +const {Readable} = require('stream');
  5 +const wav = require('wav');
  6 +
  7 +const sherpa_onnx = require('sherpa-onnx');
  8 +
  9 +function createRecognizer() {
  10 + const featConfig = new sherpa_onnx.FeatureConfig();
  11 + featConfig.sampleRate = 16000;
  12 + featConfig.featureDim = 80;
  13 +
  14 + // test online recognizer
  15 + const nemoCtc = new sherpa_onnx.OfflineNemoEncDecCtcModelConfig();
  16 + nemoCtc.model = './sherpa-onnx-nemo-ctc-en-conformer-small/model.int8.onnx';
  17 + const tokens = './sherpa-onnx-nemo-ctc-en-conformer-small/tokens.txt';
  18 +
  19 + const modelConfig = new sherpa_onnx.OfflineModelConfig();
  20 + modelConfig.nemoCtc = nemoCtc;
  21 + modelConfig.tokens = tokens;
  22 + modelConfig.modelType = 'nemo_ctc';
  23 +
  24 + const recognizerConfig = new sherpa_onnx.OfflineRecognizerConfig();
  25 + recognizerConfig.featConfig = featConfig;
  26 + recognizerConfig.modelConfig = modelConfig;
  27 + recognizerConfig.decodingMethod = 'greedy_search';
  28 +
  29 + const recognizer = new sherpa_onnx.OfflineRecognizer(recognizerConfig);
  30 + return recognizer;
  31 +}
  32 +
  33 +recognizer = createRecognizer();
  34 +stream = recognizer.createStream();
  35 +
  36 +const waveFilename =
  37 + './sherpa-onnx-nemo-ctc-en-conformer-small/test_wavs/0.wav';
  38 +
  39 +const reader = new wav.Reader();
  40 +const readable = new Readable().wrap(reader);
  41 +const buf = [];
  42 +
  43 +reader.on('format', ({audioFormat, bitDepth, channels, sampleRate}) => {
  44 + if (sampleRate != recognizer.config.featConfig.sampleRate) {
  45 + throw new Error(`Only support sampleRate ${
  46 + recognizer.config.featConfig.sampleRate}. Given ${sampleRate}`);
  47 + }
  48 +
  49 + if (audioFormat != 1) {
  50 + throw new Error(`Only support PCM format. Given ${audioFormat}`);
  51 + }
  52 +
  53 + if (channels != 1) {
  54 + throw new Error(`Only a single channel. Given ${channel}`);
  55 + }
  56 +
  57 + if (bitDepth != 16) {
  58 + throw new Error(`Only support 16-bit samples. Given ${bitDepth}`);
  59 + }
  60 +});
  61 +
  62 +fs.createReadStream(waveFilename, {highWaterMark: 4096})
  63 + .pipe(reader)
  64 + .on('finish', function(err) {
  65 + // tail padding
  66 + const floatSamples =
  67 + new Float32Array(recognizer.config.featConfig.sampleRate * 0.5);
  68 +
  69 + buf.push(floatSamples);
  70 + const flattened =
  71 + Float32Array.from(buf.reduce((a, b) => [...a, ...b], []));
  72 +
  73 + stream.acceptWaveform(recognizer.config.featConfig.sampleRate, flattened);
  74 + recognizer.decode(stream);
  75 + const r = recognizer.getResult(stream);
  76 + console.log(r.text);
  77 +
  78 + stream.free();
  79 + recognizer.free();
  80 + });
  81 +
  82 +readable.on('readable', function() {
  83 + let chunk;
  84 + while ((chunk = readable.read()) != null) {
  85 + const int16Samples = new Int16Array(
  86 + chunk.buffer, chunk.byteOffset,
  87 + chunk.length / Int16Array.BYTES_PER_ELEMENT);
  88 +
  89 + const floatSamples = new Float32Array(int16Samples.length);
  90 +
  91 + for (let i = 0; i < floatSamples.length; i++) {
  92 + floatSamples[i] = int16Samples[i] / 32768.0;
  93 + }
  94 +
  95 + buf.push(floatSamples);
  96 + }
  97 +});
  1 +// Copyright (c) 2023 Xiaomi Corporation (authors: Fangjun Kuang)
  2 +
  3 +const fs = require('fs');
  4 +const {Readable} = require('stream');
  5 +const wav = require('wav');
  6 +
  7 +const sherpa_onnx = require('sherpa-onnx');
  8 +
  9 +function createRecognizer() {
  10 + const featConfig = new sherpa_onnx.FeatureConfig();
  11 + featConfig.sampleRate = 16000;
  12 + featConfig.featureDim = 80;
  13 +
  14 + // test online recognizer
  15 + const paraformer = new sherpa_onnx.OfflineParaformerModelConfig();
  16 + paraformer.model = './sherpa-onnx-paraformer-zh-2023-03-28/model.onnx';
  17 + const tokens = './sherpa-onnx-paraformer-zh-2023-03-28/tokens.txt';
  18 +
  19 + const modelConfig = new sherpa_onnx.OfflineModelConfig();
  20 + modelConfig.paraformer = paraformer;
  21 + modelConfig.tokens = tokens;
  22 + modelConfig.modelType = 'paraformer';
  23 +
  24 + const recognizerConfig = new sherpa_onnx.OfflineRecognizerConfig();
  25 + recognizerConfig.featConfig = featConfig;
  26 + recognizerConfig.modelConfig = modelConfig;
  27 + recognizerConfig.decodingMethod = 'greedy_search';
  28 +
  29 + const recognizer = new sherpa_onnx.OfflineRecognizer(recognizerConfig);
  30 + return recognizer;
  31 +}
  32 +
  33 +recognizer = createRecognizer();
  34 +stream = recognizer.createStream();
  35 +
  36 +const waveFilename = './sherpa-onnx-paraformer-zh-2023-03-28/test_wavs/0.wav';
  37 +
  38 +const reader = new wav.Reader();
  39 +const readable = new Readable().wrap(reader);
  40 +const buf = [];
  41 +
  42 +reader.on('format', ({audioFormat, bitDepth, channels, sampleRate}) => {
  43 + if (sampleRate != recognizer.config.featConfig.sampleRate) {
  44 + throw new Error(`Only support sampleRate ${
  45 + recognizer.config.featConfig.sampleRate}. Given ${sampleRate}`);
  46 + }
  47 +
  48 + if (audioFormat != 1) {
  49 + throw new Error(`Only support PCM format. Given ${audioFormat}`);
  50 + }
  51 +
  52 + if (channels != 1) {
  53 + throw new Error(`Only a single channel. Given ${channel}`);
  54 + }
  55 +
  56 + if (bitDepth != 16) {
  57 + throw new Error(`Only support 16-bit samples. Given ${bitDepth}`);
  58 + }
  59 +});
  60 +
  61 +fs.createReadStream(waveFilename, {'highWaterMark': 4096})
  62 + .pipe(reader)
  63 + .on('finish', function(err) {
  64 + // tail padding
  65 + const floatSamples =
  66 + new Float32Array(recognizer.config.featConfig.sampleRate * 0.5);
  67 +
  68 + buf.push(floatSamples);
  69 + const flattened =
  70 + Float32Array.from(buf.reduce((a, b) => [...a, ...b], []));
  71 +
  72 + stream.acceptWaveform(recognizer.config.featConfig.sampleRate, flattened);
  73 + recognizer.decode(stream);
  74 + const r = recognizer.getResult(stream);
  75 + console.log(r.text);
  76 +
  77 + stream.free();
  78 + recognizer.free();
  79 + });
  80 +
  81 +readable.on('readable', function() {
  82 + let chunk;
  83 + while ((chunk = readable.read()) != null) {
  84 + const int16Samples = new Int16Array(
  85 + chunk.buffer, chunk.byteOffset,
  86 + chunk.length / Int16Array.BYTES_PER_ELEMENT);
  87 +
  88 + const floatSamples = new Float32Array(int16Samples.length);
  89 + for (let i = 0; i < floatSamples.length; i++) {
  90 + floatSamples[i] = int16Samples[i] / 32768.0;
  91 + }
  92 +
  93 + buf.push(floatSamples);
  94 + }
  95 +});
  1 +// Copyright (c) 2023 Xiaomi Corporation (authors: Fangjun Kuang)
  2 +//
  3 +const fs = require('fs');
  4 +const {Readable} = require('stream');
  5 +const wav = require('wav');
  6 +
  7 +const sherpa_onnx = require('sherpa-onnx');
  8 +
  9 +function createRecognizer() {
  10 + const featConfig = new sherpa_onnx.FeatureConfig();
  11 + featConfig.sampleRate = 16000;
  12 + featConfig.featureDim = 80;
  13 +
  14 + // test online recognizer
  15 + const transducer = new sherpa_onnx.OfflineTransducerModelConfig();
  16 + transducer.encoder =
  17 + './sherpa-onnx-zipformer-en-2023-06-26/encoder-epoch-99-avg-1.onnx';
  18 + transducer.decoder =
  19 + './sherpa-onnx-zipformer-en-2023-06-26/decoder-epoch-99-avg-1.onnx';
  20 + transducer.joiner =
  21 + './sherpa-onnx-zipformer-en-2023-06-26/joiner-epoch-99-avg-1.onnx';
  22 + const tokens = './sherpa-onnx-zipformer-en-2023-06-26/tokens.txt';
  23 +
  24 + const modelConfig = new sherpa_onnx.OfflineModelConfig();
  25 + modelConfig.transducer = transducer;
  26 + modelConfig.tokens = tokens;
  27 + modelConfig.modelType = 'transducer';
  28 +
  29 + const recognizerConfig = new sherpa_onnx.OfflineRecognizerConfig();
  30 + recognizerConfig.featConfig = featConfig;
  31 + recognizerConfig.modelConfig = modelConfig;
  32 + recognizerConfig.decodingMethod = 'greedy_search';
  33 +
  34 + const recognizer = new sherpa_onnx.OfflineRecognizer(recognizerConfig);
  35 + return recognizer;
  36 +}
  37 +
  38 +recognizer = createRecognizer();
  39 +stream = recognizer.createStream();
  40 +
  41 +const waveFilename = './sherpa-onnx-zipformer-en-2023-06-26/test_wavs/0.wav';
  42 +
  43 +const reader = new wav.Reader();
  44 +const readable = new Readable().wrap(reader);
  45 +const buf = [];
  46 +
  47 +reader.on('format', ({audioFormat, bitDepth, channels, sampleRate}) => {
  48 + if (sampleRate != recognizer.config.featConfig.sampleRate) {
  49 + throw new Error(`Only support sampleRate ${
  50 + recognizer.config.featConfig.sampleRate}. Given ${sampleRate}`);
  51 + }
  52 +
  53 + if (audioFormat != 1) {
  54 + throw new Error(`Only support PCM format. Given ${audioFormat}`);
  55 + }
  56 +
  57 + if (channels != 1) {
  58 + throw new Error(`Only a single channel. Given ${channel}`);
  59 + }
  60 +
  61 + if (bitDepth != 16) {
  62 + throw new Error(`Only support 16-bit samples. Given ${bitDepth}`);
  63 + }
  64 +});
  65 +
  66 +fs.createReadStream(waveFilename, {'highWaterMark': 4096})
  67 + .pipe(reader)
  68 + .on('finish', function(err) {
  69 + // tail padding
  70 + const floatSamples =
  71 + new Float32Array(recognizer.config.featConfig.sampleRate * 0.5);
  72 +
  73 + buf.push(floatSamples);
  74 + const flattened =
  75 + Float32Array.from(buf.reduce((a, b) => [...a, ...b], []));
  76 +
  77 + stream.acceptWaveform(recognizer.config.featConfig.sampleRate, flattened);
  78 + recognizer.decode(stream);
  79 + const r = recognizer.getResult(stream);
  80 + console.log(r.text);
  81 +
  82 + stream.free();
  83 + recognizer.free();
  84 + });
  85 +
  86 +readable.on('readable', function() {
  87 + let chunk;
  88 + while ((chunk = readable.read()) != null) {
  89 + const int16Samples = new Int16Array(
  90 + chunk.buffer, chunk.byteOffset,
  91 + chunk.length / Int16Array.BYTES_PER_ELEMENT);
  92 +
  93 + const floatSamples = new Float32Array(int16Samples.length);
  94 + for (let i = 0; i < floatSamples.length; i++) {
  95 + floatSamples[i] = int16Samples[i] / 32768.0;
  96 + }
  97 +
  98 + buf.push(floatSamples);
  99 + }
  100 +});
  1 +// Copyright (c) 2023 Xiaomi Corporation (authors: Fangjun Kuang)
  2 +
  3 +const sherpa_onnx = require('sherpa-onnx');
  4 +
  5 +function createOfflineTts() {
  6 + const vits = new sherpa_onnx.OfflineTtsVitsModelConfig();
  7 + vits.model = './vits-vctk/vits-vctk.onnx';
  8 + vits.lexicon = './vits-vctk/lexicon.txt';
  9 + vits.tokens = './vits-vctk/tokens.txt';
  10 +
  11 + const modelConfig = new sherpa_onnx.OfflineTtsModelConfig();
  12 + modelConfig.vits = vits;
  13 +
  14 + const config = new sherpa_onnx.OfflineTtsConfig();
  15 + config.model = modelConfig;
  16 +
  17 + return new sherpa_onnx.OfflineTts(config);
  18 +}
  19 +
  20 +const tts = createOfflineTts();
  21 +const speakerId = 99;
  22 +const speed = 1.0;
  23 +const audio =
  24 + tts.generate('Good morning. How are you doing?', speakerId, speed);
  25 +audio.save('./test-en.wav');
  26 +console.log('Saved to test-en.wav successfully.');
  27 +tts.free();
  1 +// Copyright (c) 2023 Xiaomi Corporation (authors: Fangjun Kuang)
  2 +
  3 +const sherpa_onnx = require('sherpa-onnx');
  4 +
  5 +function createOfflineTts() {
  6 + const vits = new sherpa_onnx.OfflineTtsVitsModelConfig();
  7 + vits.model = './vits-zh-aishell3/vits-aishell3.onnx';
  8 + vits.lexicon = './vits-zh-aishell3/lexicon.txt';
  9 + vits.tokens = './vits-zh-aishell3/tokens.txt';
  10 +
  11 + const modelConfig = new sherpa_onnx.OfflineTtsModelConfig();
  12 + modelConfig.vits = vits;
  13 +
  14 + const config = new sherpa_onnx.OfflineTtsConfig();
  15 + config.model = modelConfig;
  16 + config.ruleFsts = './vits-zh-aishell3/rule.fst';
  17 +
  18 + return new sherpa_onnx.OfflineTts(config);
  19 +}
  20 +
  21 +const tts = createOfflineTts();
  22 +const speakerId = 66;
  23 +const speed = 1.0;
  24 +const audio = tts.generate('3年前中国总人口是1411778724人', speakerId, speed);
  25 +audio.save('./test-zh.wav');
  26 +console.log('Saved to test-zh.wav successfully.');
  27 +tts.free();
  1 +// Copyright (c) 2023 Xiaomi Corporation (authors: Fangjun Kuang)
  2 +//
  3 +const fs = require('fs');
  4 +const {Readable} = require('stream');
  5 +const wav = require('wav');
  6 +
  7 +const sherpa_onnx = require('sherpa-onnx');
  8 +
  9 +function createRecognizer() {
  10 + const featConfig = new sherpa_onnx.FeatureConfig();
  11 + featConfig.sampleRate = 16000;
  12 + featConfig.featureDim = 80;
  13 +
  14 + // test online recognizer
  15 + const whisper = new sherpa_onnx.OfflineWhisperModelConfig();
  16 + whisper.encoder = './sherpa-onnx-whisper-tiny.en/tiny.en-encoder.int8.onnx';
  17 + whisper.decoder = './sherpa-onnx-whisper-tiny.en/tiny.en-decoder.int8.onnx';
  18 + const tokens = './sherpa-onnx-whisper-tiny.en/tiny.en-tokens.txt';
  19 +
  20 + const modelConfig = new sherpa_onnx.OfflineModelConfig();
  21 + modelConfig.whisper = whisper;
  22 + modelConfig.tokens = tokens;
  23 + modelConfig.modelType = 'whisper';
  24 +
  25 + const recognizerConfig = new sherpa_onnx.OfflineRecognizerConfig();
  26 + recognizerConfig.featConfig = featConfig;
  27 + recognizerConfig.modelConfig = modelConfig;
  28 + recognizerConfig.decodingMethod = 'greedy_search';
  29 +
  30 + const recognizer = new sherpa_onnx.OfflineRecognizer(recognizerConfig);
  31 + return recognizer;
  32 +}
  33 +
  34 +recognizer = createRecognizer();
  35 +stream = recognizer.createStream();
  36 +
  37 +const waveFilename = './sherpa-onnx-whisper-tiny.en/test_wavs/0.wav';
  38 +
  39 +const reader = new wav.Reader();
  40 +const readable = new Readable().wrap(reader);
  41 +const buf = [];
  42 +
  43 +reader.on('format', ({audioFormat, bitDepth, channels, sampleRate}) => {
  44 + if (sampleRate != recognizer.config.featConfig.sampleRate) {
  45 + throw new Error(`Only support sampleRate ${
  46 + recognizer.config.featConfig.sampleRate}. Given ${sampleRate}`);
  47 + }
  48 +
  49 + if (audioFormat != 1) {
  50 + throw new Error(`Only support PCM format. Given ${audioFormat}`);
  51 + }
  52 +
  53 + if (channels != 1) {
  54 + throw new Error(`Only a single channel. Given ${channel}`);
  55 + }
  56 +
  57 + if (bitDepth != 16) {
  58 + throw new Error(`Only support 16-bit samples. Given ${bitDepth}`);
  59 + }
  60 +});
  61 +
  62 +fs.createReadStream(waveFilename, {'highWaterMark': 4096})
  63 + .pipe(reader)
  64 + .on('finish', function(err) {
  65 + // tail padding
  66 + const floatSamples =
  67 + new Float32Array(recognizer.config.featConfig.sampleRate * 0.5);
  68 +
  69 + buf.push(floatSamples);
  70 + const flattened =
  71 + Float32Array.from(buf.reduce((a, b) => [...a, ...b], []));
  72 +
  73 + stream.acceptWaveform(recognizer.config.featConfig.sampleRate, flattened);
  74 + recognizer.decode(stream);
  75 + const r = recognizer.getResult(stream);
  76 + console.log(r.text);
  77 +
  78 + stream.free();
  79 + recognizer.free();
  80 + });
  81 +
  82 +readable.on('readable', function() {
  83 + let chunk;
  84 + while ((chunk = readable.read()) != null) {
  85 + const int16Samples = new Int16Array(
  86 + chunk.buffer, chunk.byteOffset,
  87 + chunk.length / Int16Array.BYTES_PER_ELEMENT);
  88 +
  89 + const floatSamples = new Float32Array(int16Samples.length);
  90 +
  91 + for (let i = 0; i < floatSamples.length; i++) {
  92 + floatSamples[i] = int16Samples[i] / 32768.0;
  93 + }
  94 +
  95 + buf.push(floatSamples);
  96 + }
  97 +});
  1 +// Copyright (c) 2023 Xiaomi Corporation (authors: Fangjun Kuang)
  2 +//
  3 +const portAudio = require('naudiodon2');
  4 +console.log(portAudio.getDevices());
  5 +
  6 +const sherpa_onnx = require('sherpa-onnx');
  7 +
  8 +function createRecognizer() {
  9 + const featConfig = new sherpa_onnx.FeatureConfig();
  10 + featConfig.sampleRate = 16000;
  11 + featConfig.featureDim = 80;
  12 +
  13 + const paraformer = new sherpa_onnx.OnlineParaformerModelConfig();
  14 + paraformer.encoder =
  15 + './sherpa-onnx-streaming-paraformer-bilingual-zh-en/encoder.int8.onnx';
  16 + paraformer.decoder =
  17 + './sherpa-onnx-streaming-paraformer-bilingual-zh-en/decoder.int8.onnx';
  18 + const tokens =
  19 + './sherpa-onnx-streaming-paraformer-bilingual-zh-en/tokens.txt';
  20 +
  21 + const modelConfig = new sherpa_onnx.OnlineModelConfig();
  22 + modelConfig.paraformer = paraformer;
  23 + modelConfig.tokens = tokens;
  24 + modelConfig.modelType = 'paraformer';
  25 +
  26 + const recognizerConfig = new sherpa_onnx.OnlineRecognizerConfig();
  27 + recognizerConfig.featConfig = featConfig;
  28 + recognizerConfig.modelConfig = modelConfig;
  29 + recognizerConfig.decodingMethod = 'greedy_search';
  30 + recognizerConfig.enableEndpoint = 1;
  31 +
  32 + const recognizer = new sherpa_onnx.OnlineRecognizer(recognizerConfig);
  33 + return recognizer;
  34 +}
  35 +recognizer = createRecognizer();
  36 +stream = recognizer.createStream();
  37 +
  38 +display = new sherpa_onnx.Display(50);
  39 +
  40 +let lastText = '';
  41 +let segmentIndex = 0;
  42 +
  43 +const ai = new portAudio.AudioIO({
  44 + inOptions: {
  45 + channelCount: 1,
  46 + closeOnError: true, // Close the stream if an audio error is detected, if
  47 + // set false then just log the error
  48 + deviceId: -1, // Use -1 or omit the deviceId to select the default device
  49 + sampleFormat: portAudio.SampleFormatFloat32,
  50 + sampleRate: recognizer.config.featConfig.sampleRate
  51 + }
  52 +});
  53 +
  54 +ai.on('data', data => {
  55 + const samples = new Float32Array(data.buffer);
  56 +
  57 + stream.acceptWaveform(recognizer.config.featConfig.sampleRate, samples);
  58 +
  59 + while (recognizer.isReady(stream)) {
  60 + recognizer.decode(stream);
  61 + }
  62 +
  63 + const isEndpoint = recognizer.isEndpoint(stream);
  64 + const text = recognizer.getResult(stream).text;
  65 +
  66 + if (text.length > 0 && lastText != text) {
  67 + lastText = text;
  68 + display.print(segmentIndex, lastText);
  69 + }
  70 + if (isEndpoint) {
  71 + if (text.length > 0) {
  72 + lastText = text;
  73 + segmentIndex += 1;
  74 + }
  75 + recognizer.reset(stream)
  76 + }
  77 +});
  78 +
  79 +ai.on('close', () => {
  80 + console.log('Free resources');
  81 + stream.free();
  82 + recognizer.free();
  83 +});
  84 +
  85 +ai.start();
  86 +console.log('Started! Please speak')
  1 +// Copyright (c) 2023 Xiaomi Corporation (authors: Fangjun Kuang)
  2 +//
  3 +const fs = require('fs');
  4 +const {Readable} = require('stream');
  5 +const wav = require('wav');
  6 +
  7 +const sherpa_onnx = require('sherpa-onnx');
  8 +
  9 +function createRecognizer() {
  10 + const featConfig = new sherpa_onnx.FeatureConfig();
  11 + featConfig.sampleRate = 16000;
  12 + featConfig.featureDim = 80;
  13 +
  14 + const paraformer = new sherpa_onnx.OnlineParaformerModelConfig();
  15 + paraformer.encoder =
  16 + './sherpa-onnx-streaming-paraformer-bilingual-zh-en/encoder.onnx';
  17 + paraformer.decoder =
  18 + './sherpa-onnx-streaming-paraformer-bilingual-zh-en/decoder.onnx';
  19 + const tokens =
  20 + './sherpa-onnx-streaming-paraformer-bilingual-zh-en/tokens.txt';
  21 +
  22 + const modelConfig = new sherpa_onnx.OnlineModelConfig();
  23 + modelConfig.paraformer = paraformer;
  24 + modelConfig.tokens = tokens;
  25 + modelConfig.modelType = 'paraformer';
  26 +
  27 + const recognizerConfig = new sherpa_onnx.OnlineRecognizerConfig();
  28 + recognizerConfig.featConfig = featConfig;
  29 + recognizerConfig.modelConfig = modelConfig;
  30 + recognizerConfig.decodingMethod = 'greedy_search';
  31 +
  32 + const recognizer = new sherpa_onnx.OnlineRecognizer(recognizerConfig);
  33 + return recognizer;
  34 +}
  35 +recognizer = createRecognizer();
  36 +stream = recognizer.createStream();
  37 +
  38 +const waveFilename =
  39 + './sherpa-onnx-streaming-paraformer-bilingual-zh-en/test_wavs/0.wav';
  40 +
  41 +const reader = new wav.Reader();
  42 +const readable = new Readable().wrap(reader);
  43 +
  44 +function decode(samples) {
  45 + stream.acceptWaveform(recognizer.config.featConfig.sampleRate, samples);
  46 +
  47 + while (recognizer.isReady(stream)) {
  48 + recognizer.decode(stream);
  49 + }
  50 + const r = recognizer.getResult(stream);
  51 + console.log(r.text);
  52 +}
  53 +
  54 +reader.on('format', ({audioFormat, bitDepth, channels, sampleRate}) => {
  55 + if (sampleRate != recognizer.config.featConfig.sampleRate) {
  56 + throw new Error(`Only support sampleRate ${
  57 + recognizer.config.featConfig.sampleRate}. Given ${sampleRate}`);
  58 + }
  59 +
  60 + if (audioFormat != 1) {
  61 + throw new Error(`Only support PCM format. Given ${audioFormat}`);
  62 + }
  63 +
  64 + if (channels != 1) {
  65 + throw new Error(`Only a single channel. Given ${channel}`);
  66 + }
  67 +
  68 + if (bitDepth != 16) {
  69 + throw new Error(`Only support 16-bit samples. Given ${bitDepth}`);
  70 + }
  71 +});
  72 +
  73 +fs.createReadStream(waveFilename, {'highWaterMark': 4096})
  74 + .pipe(reader)
  75 + .on('finish', function(err) {
  76 + // tail padding
  77 + const floatSamples =
  78 + new Float32Array(recognizer.config.featConfig.sampleRate * 0.5);
  79 + decode(floatSamples);
  80 + stream.free();
  81 + recognizer.free();
  82 + });
  83 +
  84 +readable.on('readable', function() {
  85 + let chunk;
  86 + while ((chunk = readable.read()) != null) {
  87 + const int16Samples = new Int16Array(
  88 + chunk.buffer, chunk.byteOffset,
  89 + chunk.length / Int16Array.BYTES_PER_ELEMENT);
  90 +
  91 + const floatSamples = new Float32Array(int16Samples.length);
  92 +
  93 + for (let i = 0; i < floatSamples.length; i++) {
  94 + floatSamples[i] = int16Samples[i] / 32768.0;
  95 + }
  96 +
  97 + decode(floatSamples);
  98 + }
  99 +});
  1 +// Copyright (c) 2023 Xiaomi Corporation (authors: Fangjun Kuang)
  2 +//
  3 +const portAudio = require('naudiodon2');
  4 +// console.log(portAudio.getDevices());
  5 +
  6 +const sherpa_onnx = require('sherpa-onnx');
  7 +
  8 +function createRecognizer() {
  9 + const featConfig = new sherpa_onnx.FeatureConfig();
  10 + featConfig.sampleRate = 16000;
  11 + featConfig.featureDim = 80;
  12 +
  13 + // test online recognizer
  14 + const transducer = new sherpa_onnx.OnlineTransducerModelConfig();
  15 + transducer.encoder =
  16 + './sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/encoder-epoch-99-avg-1.int8.onnx';
  17 + transducer.decoder =
  18 + './sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/decoder-epoch-99-avg-1.onnx';
  19 + transducer.joiner =
  20 + './sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/joiner-epoch-99-avg-1.onnx';
  21 + const tokens =
  22 + './sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/tokens.txt';
  23 +
  24 + const modelConfig = new sherpa_onnx.OnlineModelConfig();
  25 + modelConfig.transducer = transducer;
  26 + modelConfig.tokens = tokens;
  27 + modelConfig.modelType = 'zipformer';
  28 +
  29 + const recognizerConfig = new sherpa_onnx.OnlineRecognizerConfig();
  30 + recognizerConfig.featConfig = featConfig;
  31 + recognizerConfig.modelConfig = modelConfig;
  32 + recognizerConfig.decodingMethod = 'greedy_search';
  33 + recognizerConfig.enableEndpoint = 1;
  34 +
  35 + const recognizer = new sherpa_onnx.OnlineRecognizer(recognizerConfig);
  36 + return recognizer;
  37 +}
  38 +recognizer = createRecognizer();
  39 +stream = recognizer.createStream();
  40 +display = new sherpa_onnx.Display(50);
  41 +
  42 +let lastText = '';
  43 +let segmentIndex = 0;
  44 +
  45 +const ai = new portAudio.AudioIO({
  46 + inOptions: {
  47 + channelCount: 1,
  48 + closeOnError: true, // Close the stream if an audio error is detected, if
  49 + // set false then just log the error
  50 + deviceId: -1, // Use -1 or omit the deviceId to select the default device
  51 + sampleFormat: portAudio.SampleFormatFloat32,
  52 + sampleRate: recognizer.config.featConfig.sampleRate
  53 + }
  54 +});
  55 +
  56 +ai.on('data', data => {
  57 + const samples = new Float32Array(data.buffer);
  58 +
  59 + stream.acceptWaveform(recognizer.config.featConfig.sampleRate, samples);
  60 +
  61 + while (recognizer.isReady(stream)) {
  62 + recognizer.decode(stream);
  63 + }
  64 +
  65 + const isEndpoint = recognizer.isEndpoint(stream);
  66 + const text = recognizer.getResult(stream).text;
  67 +
  68 + if (text.length > 0 && lastText != text) {
  69 + lastText = text;
  70 + display.print(segmentIndex, lastText);
  71 + }
  72 + if (isEndpoint) {
  73 + if (text.length > 0) {
  74 + lastText = text;
  75 + segmentIndex += 1;
  76 + }
  77 + recognizer.reset(stream)
  78 + }
  79 +});
  80 +
  81 +ai.on('close', () => {
  82 + console.log('Free resources');
  83 + stream.free();
  84 + recognizer.free();
  85 +});
  86 +
  87 +ai.start();
  88 +console.log('Started! Please speak')
  1 +// Copyright (c) 2023 Xiaomi Corporation (authors: Fangjun Kuang)
  2 +//
  3 +const fs = require('fs');
  4 +const {Readable} = require('stream');
  5 +const wav = require('wav');
  6 +
  7 +const sherpa_onnx = require('sherpa-onnx');
  8 +
  9 +function createRecognizer() {
  10 + const featConfig = new sherpa_onnx.FeatureConfig();
  11 + featConfig.sampleRate = 16000;
  12 + featConfig.featureDim = 80;
  13 +
  14 + // test online recognizer
  15 + const transducer = new sherpa_onnx.OnlineTransducerModelConfig();
  16 + transducer.encoder =
  17 + './sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/encoder-epoch-99-avg-1.int8.onnx';
  18 + transducer.decoder =
  19 + './sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/decoder-epoch-99-avg-1.onnx';
  20 + transducer.joiner =
  21 + './sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/joiner-epoch-99-avg-1.onnx';
  22 + const tokens =
  23 + './sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/tokens.txt';
  24 +
  25 + const modelConfig = new sherpa_onnx.OnlineModelConfig();
  26 + modelConfig.transducer = transducer;
  27 + modelConfig.tokens = tokens;
  28 + modelConfig.modelType = 'zipformer';
  29 +
  30 + const recognizerConfig = new sherpa_onnx.OnlineRecognizerConfig();
  31 + recognizerConfig.featConfig = featConfig;
  32 + recognizerConfig.modelConfig = modelConfig;
  33 + recognizerConfig.decodingMethod = 'greedy_search';
  34 +
  35 + recognizer = new sherpa_onnx.OnlineRecognizer(recognizerConfig);
  36 + return recognizer;
  37 +}
  38 +recognizer = createRecognizer();
  39 +stream = recognizer.createStream();
  40 +
  41 +const waveFilename =
  42 + './sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/test_wavs/0.wav';
  43 +
  44 +const reader = new wav.Reader();
  45 +const readable = new Readable().wrap(reader);
  46 +
  47 +function decode(samples) {
  48 + stream.acceptWaveform(recognizer.config.featConfig.sampleRate, samples);
  49 +
  50 + while (recognizer.isReady(stream)) {
  51 + recognizer.decode(stream);
  52 + }
  53 + const r = recognizer.getResult(stream);
  54 + console.log(r.text);
  55 +}
  56 +
  57 +reader.on('format', ({audioFormat, bitDepth, channels, sampleRate}) => {
  58 + if (sampleRate != recognizer.config.featConfig.sampleRate) {
  59 + throw new Error(`Only support sampleRate ${
  60 + recognizer.config.featConfig.sampleRate}. Given ${sampleRate}`);
  61 + }
  62 +
  63 + if (audioFormat != 1) {
  64 + throw new Error(`Only support PCM format. Given ${audioFormat}`);
  65 + }
  66 +
  67 + if (channels != 1) {
  68 + throw new Error(`Only a single channel. Given ${channel}`);
  69 + }
  70 +
  71 + if (bitDepth != 16) {
  72 + throw new Error(`Only support 16-bit samples. Given ${bitDepth}`);
  73 + }
  74 +});
  75 +
  76 +fs.createReadStream(waveFilename, {'highWaterMark': 4096})
  77 + .pipe(reader)
  78 + .on('finish', function(err) {
  79 + // tail padding
  80 + const floatSamples =
  81 + new Float32Array(recognizer.config.featConfig.sampleRate * 0.5);
  82 + decode(floatSamples);
  83 + stream.free();
  84 + recognizer.free();
  85 + });
  86 +
  87 +readable.on('readable', function() {
  88 + let chunk;
  89 + while ((chunk = readable.read()) != null) {
  90 + const int16Samples = new Int16Array(
  91 + chunk.buffer, chunk.byteOffset,
  92 + chunk.length / Int16Array.BYTES_PER_ELEMENT);
  93 +
  94 + const floatSamples = new Float32Array(int16Samples.length);
  95 +
  96 + for (let i = 0; i < floatSamples.length; i++) {
  97 + floatSamples[i] = int16Samples[i] / 32768.0;
  98 + }
  99 +
  100 + decode(floatSamples);
  101 + }
  102 +});
  1 +// Copyright (c) 2023 Xiaomi Corporation (authors: Fangjun Kuang)
  2 +//
  3 +const sherpa_onnx = require('sherpa-onnx3');
  4 +const portAudio = require('naudiodon2');
  5 +console.log(portAudio.getDevices());
  6 +
  7 +function createOfflineRecognizer() {
  8 + const featConfig = new sherpa_onnx.FeatureConfig();
  9 + featConfig.sampleRate = 16000;
  10 + featConfig.featureDim = 80;
  11 +
  12 + // test online recognizer
  13 + const paraformer = new sherpa_onnx.OfflineParaformerModelConfig();
  14 + paraformer.model = './sherpa-onnx-paraformer-zh-2023-03-28/model.int8.onnx';
  15 + const tokens = './sherpa-onnx-paraformer-zh-2023-03-28/tokens.txt';
  16 +
  17 + const modelConfig = new sherpa_onnx.OfflineModelConfig();
  18 + modelConfig.paraformer = paraformer;
  19 + modelConfig.tokens = tokens;
  20 + modelConfig.modelType = 'paraformer';
  21 +
  22 + const recognizerConfig = new sherpa_onnx.OfflineRecognizerConfig();
  23 + recognizerConfig.featConfig = featConfig;
  24 + recognizerConfig.modelConfig = modelConfig;
  25 + recognizerConfig.decodingMethod = 'greedy_search';
  26 +
  27 + const recognizer = new sherpa_onnx.OfflineRecognizer(recognizerConfig);
  28 + return recognizer
  29 +}
  30 +
  31 +function createVad() {
  32 + const sileroVadModelConfig = new sherpa_onnx.SileroVadModelConfig();
  33 + sileroVadModelConfig.model = './silero_vad.onnx';
  34 + sileroVadModelConfig.minSpeechDuration = 0.3; // seconds
  35 + sileroVadModelConfig.minSilenceDuration = 0.3; // seconds
  36 + sileroVadModelConfig.windowSize = 512;
  37 +
  38 + const vadModelConfig = new sherpa_onnx.VadModelConfig();
  39 + vadModelConfig.sileroVad = sileroVadModelConfig;
  40 + vadModelConfig.sampleRate = 16000;
  41 +
  42 + const bufferSizeInSeconds = 60;
  43 + const vad = new sherpa_onnx.VoiceActivityDetector(
  44 + vadModelConfig, bufferSizeInSeconds);
  45 + return vad;
  46 +}
  47 +
  48 +const recognizer = createOfflineRecognizer();
  49 +const vad = createVad();
  50 +
  51 +const bufferSizeInSeconds = 30;
  52 +const buffer =
  53 + new sherpa_onnx.CircularBuffer(bufferSizeInSeconds * vad.config.sampleRate);
  54 +
  55 +var ai = new portAudio.AudioIO({
  56 + inOptions: {
  57 + channelCount: 1,
  58 + sampleFormat: portAudio.SampleFormatFloat32,
  59 + sampleRate: vad.config.sampleRate,
  60 + deviceId: -1, // Use -1 or omit the deviceId to select the default device
  61 + closeOnError: true // Close the stream if an audio error is detected, if
  62 + // set false then just log the error
  63 + }
  64 +});
  65 +
  66 +let printed = false;
  67 +let index = 0;
  68 +ai.on('data', data => {
  69 + const windowSize = vad.config.sileroVad.windowSize;
  70 + buffer.push(new Float32Array(data.buffer));
  71 + while (buffer.size() > windowSize) {
  72 + const samples = buffer.get(buffer.head(), windowSize);
  73 + buffer.pop(windowSize);
  74 + vad.acceptWaveform(samples)
  75 + }
  76 +
  77 + while (!vad.isEmpty()) {
  78 + const segment = vad.front();
  79 + vad.pop();
  80 + const stream = recognizer.createStream();
  81 + stream.acceptWaveform(
  82 + recognizer.config.featConfig.sampleRate, segment.samples);
  83 + recognizer.decode(stream);
  84 + const r = recognizer.getResult(stream);
  85 + stream.free();
  86 + if (r.text.length > 0) {
  87 + console.log(`${index}: ${r.text}`);
  88 + index += 1;
  89 + }
  90 + }
  91 +});
  92 +
  93 +ai.on('close', () => {
  94 + console.log('Free resources');
  95 + recognizer.free();
  96 + vad.free();
  97 + buffer.free();
  98 +});
  99 +
  100 +ai.start();
  101 +console.log('Started! Please speak')
  1 +// Copyright (c) 2023 Xiaomi Corporation (authors: Fangjun Kuang)
  2 +//
  3 +const sherpa_onnx = require('sherpa-onnx');
  4 +const portAudio = require('naudiodon2');
  5 +console.log(portAudio.getDevices());
  6 +
  7 +function createOfflineRecognizer() {
  8 + const featConfig = new sherpa_onnx.FeatureConfig();
  9 + featConfig.sampleRate = 16000;
  10 + featConfig.featureDim = 80;
  11 +
  12 + // test online recognizer
  13 + const transducer = new sherpa_onnx.OfflineTransducerModelConfig();
  14 + transducer.encoder =
  15 + './sherpa-onnx-zipformer-en-2023-06-26/encoder-epoch-99-avg-1.onnx';
  16 + transducer.decoder =
  17 + './sherpa-onnx-zipformer-en-2023-06-26/decoder-epoch-99-avg-1.onnx';
  18 + transducer.joiner =
  19 + './sherpa-onnx-zipformer-en-2023-06-26/joiner-epoch-99-avg-1.onnx';
  20 + const tokens = './sherpa-onnx-zipformer-en-2023-06-26/tokens.txt';
  21 +
  22 + const modelConfig = new sherpa_onnx.OfflineModelConfig();
  23 + modelConfig.transducer = transducer;
  24 + modelConfig.tokens = tokens;
  25 + modelConfig.modelType = 'transducer';
  26 +
  27 + const recognizerConfig = new sherpa_onnx.OfflineRecognizerConfig();
  28 + recognizerConfig.featConfig = featConfig;
  29 + recognizerConfig.modelConfig = modelConfig;
  30 + recognizerConfig.decodingMethod = 'greedy_search';
  31 +
  32 + const recognizer = new sherpa_onnx.OfflineRecognizer(recognizerConfig);
  33 + return recognizer;
  34 +}
  35 +
  36 +function createVad() {
  37 + const sileroVadModelConfig = new sherpa_onnx.SileroVadModelConfig();
  38 + sileroVadModelConfig.model = './silero_vad.onnx';
  39 + sileroVadModelConfig.minSpeechDuration = 0.3; // seconds
  40 + sileroVadModelConfig.minSilenceDuration = 0.3; // seconds
  41 + sileroVadModelConfig.windowSize = 512;
  42 +
  43 + const vadModelConfig = new sherpa_onnx.VadModelConfig();
  44 + vadModelConfig.sileroVad = sileroVadModelConfig;
  45 + vadModelConfig.sampleRate = 16000;
  46 +
  47 + const bufferSizeInSeconds = 60;
  48 + const vad = new sherpa_onnx.VoiceActivityDetector(
  49 + vadModelConfig, bufferSizeInSeconds);
  50 + return vad;
  51 +}
  52 +
  53 +const recognizer = createOfflineRecognizer();
  54 +const vad = createVad();
  55 +
  56 +const bufferSizeInSeconds = 30;
  57 +const buffer =
  58 + new sherpa_onnx.CircularBuffer(bufferSizeInSeconds * vad.config.sampleRate);
  59 +
  60 +const ai = new portAudio.AudioIO({
  61 + inOptions: {
  62 + channelCount: 1,
  63 + closeOnError: true, // Close the stream if an audio error is detected, if
  64 + // set false then just log the error
  65 + deviceId: -1, // Use -1 or omit the deviceId to select the default device
  66 + sampleFormat: portAudio.SampleFormatFloat32,
  67 + sampleRate: vad.config.sampleRate
  68 + }
  69 +});
  70 +
  71 +let printed = false;
  72 +let index = 0;
  73 +ai.on('data', data => {
  74 + const windowSize = vad.config.sileroVad.windowSize;
  75 + buffer.push(new Float32Array(data.buffer));
  76 + while (buffer.size() > windowSize) {
  77 + const samples = buffer.get(buffer.head(), windowSize);
  78 + buffer.pop(windowSize);
  79 + vad.acceptWaveform(samples)
  80 + }
  81 +
  82 + while (!vad.isEmpty()) {
  83 + const segment = vad.front();
  84 + vad.pop();
  85 + const stream = recognizer.createStream();
  86 + stream.acceptWaveform(
  87 + recognizer.config.featConfig.sampleRate, segment.samples);
  88 + recognizer.decode(stream);
  89 + const r = recognizer.getResult(stream);
  90 + stream.free();
  91 + if (r.text.length > 0) {
  92 + console.log(`${index}: ${r.text}`);
  93 + index += 1;
  94 + }
  95 + }
  96 +});
  97 +
  98 +ai.on('close', () => {
  99 + console.log('Free resources');
  100 + recognizer.free();
  101 + vad.free();
  102 + buffer.free();
  103 +});
  104 +
  105 +ai.start();
  106 +console.log('Started! Please speak')
  1 +// Copyright (c) 2023 Xiaomi Corporation (authors: Fangjun Kuang)
  2 +//
  3 +const sherpa_onnx = require('sherpa-onnx');
  4 +const portAudio = require('naudiodon2');
  5 +console.log(portAudio.getDevices());
  6 +
  7 +function createOfflineRecognizer() {
  8 + const featConfig = new sherpa_onnx.FeatureConfig();
  9 + featConfig.sampleRate = 16000;
  10 + featConfig.featureDim = 80;
  11 +
  12 + // test online recognizer
  13 + const whisper = new sherpa_onnx.OfflineWhisperModelConfig();
  14 + whisper.encoder = './sherpa-onnx-whisper-tiny.en/tiny.en-encoder.int8.onnx';
  15 + whisper.decoder = './sherpa-onnx-whisper-tiny.en/tiny.en-decoder.int8.onnx';
  16 + const tokens = './sherpa-onnx-whisper-tiny.en/tiny.en-tokens.txt';
  17 +
  18 + const modelConfig = new sherpa_onnx.OfflineModelConfig();
  19 + modelConfig.whisper = whisper;
  20 + modelConfig.tokens = tokens;
  21 + modelConfig.modelType = 'whisper';
  22 +
  23 + const recognizerConfig = new sherpa_onnx.OfflineRecognizerConfig();
  24 + recognizerConfig.featConfig = featConfig;
  25 + recognizerConfig.modelConfig = modelConfig;
  26 + recognizerConfig.decodingMethod = 'greedy_search';
  27 +
  28 + const recognizer = new sherpa_onnx.OfflineRecognizer(recognizerConfig);
  29 + return recognizer;
  30 +}
  31 +
  32 +function createVad() {
  33 + const sileroVadModelConfig = new sherpa_onnx.SileroVadModelConfig();
  34 + sileroVadModelConfig.model = './silero_vad.onnx';
  35 + sileroVadModelConfig.minSpeechDuration = 0.3; // seconds
  36 + sileroVadModelConfig.minSilenceDuration = 0.3; // seconds
  37 + sileroVadModelConfig.windowSize = 512;
  38 +
  39 + const vadModelConfig = new sherpa_onnx.VadModelConfig();
  40 + vadModelConfig.sileroVad = sileroVadModelConfig;
  41 + vadModelConfig.sampleRate = 16000;
  42 +
  43 + const bufferSizeInSeconds = 60;
  44 + const vad = new sherpa_onnx.VoiceActivityDetector(
  45 + vadModelConfig, bufferSizeInSeconds);
  46 + return vad;
  47 +}
  48 +
  49 +const recognizer = createOfflineRecognizer();
  50 +const vad = createVad();
  51 +
  52 +const bufferSizeInSeconds = 30;
  53 +const buffer =
  54 + new sherpa_onnx.CircularBuffer(bufferSizeInSeconds * vad.config.sampleRate);
  55 +
  56 +const ai = new portAudio.AudioIO({
  57 + inOptions: {
  58 + channelCount: 1,
  59 + closeOnError: true, // Close the stream if an audio error is detected, if
  60 + // set false then just log the error
  61 + deviceId: -1, // Use -1 or omit the deviceId to select the default device
  62 + sampleFormat: portAudio.SampleFormatFloat32,
  63 + sampleRate: vad.config.sampleRate
  64 + }
  65 +});
  66 +
  67 +let printed = false;
  68 +let index = 0;
  69 +ai.on('data', data => {
  70 + const windowSize = vad.config.sileroVad.windowSize;
  71 + buffer.push(new Float32Array(data.buffer));
  72 + while (buffer.size() > windowSize) {
  73 + const samples = buffer.get(buffer.head(), windowSize);
  74 + buffer.pop(windowSize);
  75 + vad.acceptWaveform(samples)
  76 + }
  77 +
  78 + while (!vad.isEmpty()) {
  79 + const segment = vad.front();
  80 + vad.pop();
  81 + const stream = recognizer.createStream();
  82 + stream.acceptWaveform(
  83 + recognizer.config.featConfig.sampleRate, segment.samples);
  84 + recognizer.decode(stream);
  85 + const r = recognizer.getResult(stream);
  86 + stream.free();
  87 + if (r.text.length > 0) {
  88 + console.log(`${index}: ${r.text}`);
  89 + index += 1;
  90 + }
  91 + }
  92 +});
  93 +
  94 +ai.on('close', () => {
  95 + console.log('Free resources');
  96 + recognizer.free();
  97 + vad.free();
  98 + buffer.free();
  99 +});
  100 +
  101 +ai.start();
  102 +console.log('Started! Please speak')
  1 +// Copyright (c) 2023 Xiaomi Corporation (authors: Fangjun Kuang)
  2 +
  3 +const sherpa_onnx = require('sherpa-onnx');
  4 +const portAudio = require('naudiodon2');
  5 +console.log(portAudio.getDevices());
  6 +
  7 +function createVad() {
  8 + const sileroVadModelConfig = new sherpa_onnx.SileroVadModelConfig();
  9 + sileroVadModelConfig.model = './silero_vad.onnx';
  10 + sileroVadModelConfig.minSpeechDuration = 0.3; // seconds
  11 + sileroVadModelConfig.minSilenceDuration = 0.3; // seconds
  12 + sileroVadModelConfig.windowSize = 512;
  13 +
  14 + const vadModelConfig = new sherpa_onnx.VadModelConfig();
  15 + vadModelConfig.sileroVad = sileroVadModelConfig;
  16 + vadModelConfig.sampleRate = 16000;
  17 +
  18 + const bufferSizeInSeconds = 60;
  19 + const vad = new sherpa_onnx.VoiceActivityDetector(
  20 + vadModelConfig, bufferSizeInSeconds);
  21 + return vad;
  22 +}
  23 +vad = createVad();
  24 +const bufferSizeInSeconds = 30;
  25 +const buffer =
  26 + new sherpa_onnx.CircularBuffer(bufferSizeInSeconds * vad.config.sampleRate);
  27 +
  28 +const ai = new portAudio.AudioIO({
  29 + inOptions: {
  30 + channelCount: 1,
  31 + closeOnError: true, // Close the stream if an audio error is detected, if
  32 + // set false then just log the error
  33 + deviceId: -1, // Use -1 or omit the deviceId to select the default device
  34 + sampleFormat: portAudio.SampleFormatFloat32,
  35 + sampleRate: vad.config.sampleRate
  36 + }
  37 +});
  38 +
  39 +let printed = false;
  40 +let index = 0;
  41 +ai.on('data', data => {
  42 + const windowSize = vad.config.sileroVad.windowSize;
  43 + buffer.push(new Float32Array(data.buffer));
  44 + while (buffer.size() > windowSize) {
  45 + const samples = buffer.get(buffer.head(), windowSize);
  46 + buffer.pop(windowSize);
  47 + vad.acceptWaveform(samples)
  48 + if (vad.isDetected() && !printed) {
  49 + console.log(`${index}: Detected speech`)
  50 + printed = true;
  51 + }
  52 +
  53 + if (!vad.isDetected()) {
  54 + printed = false;
  55 + }
  56 +
  57 + while (!vad.isEmpty()) {
  58 + const segment = vad.front();
  59 + vad.pop();
  60 + const duration = segment.samples.length / vad.config.sampleRate;
  61 + console.log(`${index} End of speech. Duration: ${duration} seconds`);
  62 + index += 1;
  63 + }
  64 + }
  65 +});
  66 +
  67 +ai.on('close', () => {
  68 + console.log('Free resources');
  69 + vad.free();
  70 + buffer.free();
  71 +});
  72 +
  73 +ai.start();
  74 +console.log('Started! Please speak')
  1 +Language: JavaScript
  2 +JavaScriptQuotes: Double
  3 +
  1 +node_modules
  2 +jslint.mjs
  1 +# Introduction
  2 +
  3 +Text-to-speech and speech-to-text with [Next-gen Kaldi](https://github.com/k2-fsa/).
  4 +
  5 +It processes everything locally without accessing the Internet.
  6 +
  7 +Please refer to
  8 +https://github.com/k2-fsa/sherpa-onnx/tree/master/nodejs-examples
  9 +for examples.
  1 +// Copyright (c) 2023 Xiaomi Corporation (authors: Fangjun Kuang)
  2 +//
  3 +// Please use
  4 +//
  5 +// npm install ffi-napi ref-struct-napi
  6 +//
  7 +// before you use this file
  8 +//
  9 +//
  10 +// Please use node 13. node 16, 18, 20, and 21 are known not working.
  11 +// See also
  12 +// https://github.com/node-ffi-napi/node-ffi-napi/issues/244
  13 +// and
  14 +// https://github.com/node-ffi-napi/node-ffi-napi/issues/97
  15 +"use strict"
  16 +
  17 +const debug = require("debug")("sherpa-onnx");
  18 +const os = require("os");
  19 +const path = require("path");
  20 +const ffi = require("ffi-napi");
  21 +const ref = require("ref-napi");
  22 +const fs = require("fs");
  23 +var ArrayType = require("ref-array-napi");
  24 +
  25 +const FloatArray = ArrayType(ref.types.float);
  26 +const StructType = require("ref-struct-napi");
  27 +const cstring = ref.types.CString;
  28 +const cstringPtr = ref.refType(cstring);
  29 +const int32_t = ref.types.int32;
  30 +const float = ref.types.float;
  31 +const floatPtr = ref.refType(float);
  32 +
  33 +const SherpaOnnxOnlineTransducerModelConfig = StructType({
  34 + "encoder" : cstring,
  35 + "decoder" : cstring,
  36 + "joiner" : cstring,
  37 +});
  38 +
  39 +const SherpaOnnxOnlineParaformerModelConfig = StructType({
  40 + "encoder" : cstring,
  41 + "decoder" : cstring,
  42 +});
  43 +
  44 +const SherpaOnnxOnlineModelConfig = StructType({
  45 + "transducer" : SherpaOnnxOnlineTransducerModelConfig,
  46 + "paraformer" : SherpaOnnxOnlineParaformerModelConfig,
  47 + "tokens" : cstring,
  48 + "numThreads" : int32_t,
  49 + "provider" : cstring,
  50 + "debug" : int32_t,
  51 + "modelType" : cstring,
  52 +});
  53 +
  54 +const SherpaOnnxFeatureConfig = StructType({
  55 + "sampleRate" : int32_t,
  56 + "featureDim" : int32_t,
  57 +});
  58 +
  59 +const SherpaOnnxOnlineRecognizerConfig = StructType({
  60 + "featConfig" : SherpaOnnxFeatureConfig,
  61 + "modelConfig" : SherpaOnnxOnlineModelConfig,
  62 + "decodingMethod" : cstring,
  63 + "maxActivePaths" : int32_t,
  64 + "enableEndpoint" : int32_t,
  65 + "rule1MinTrailingSilence" : float,
  66 + "rule2MinTrailingSilence" : float,
  67 + "rule3MinUtteranceLength" : float,
  68 + "hotwordsFile" : cstring,
  69 + "hotwordsScore" : float,
  70 +});
  71 +
  72 +const SherpaOnnxOnlineRecognizerResult = StructType({
  73 + "text" : cstring,
  74 + "tokens" : cstring,
  75 + "tokensArr" : cstringPtr,
  76 + "timestamps" : floatPtr,
  77 + "count" : int32_t,
  78 + "json" : cstring,
  79 +});
  80 +
  81 +const SherpaOnnxOnlineRecognizerPtr = ref.refType(ref.types.void);
  82 +const SherpaOnnxOnlineStreamPtr = ref.refType(ref.types.void);
  83 +const SherpaOnnxOnlineStreamPtrPtr = ref.refType(SherpaOnnxOnlineStreamPtr);
  84 +const SherpaOnnxOnlineRecognizerResultPtr =
  85 + ref.refType(SherpaOnnxOnlineRecognizerResult);
  86 +
  87 +const SherpaOnnxOnlineRecognizerConfigPtr =
  88 + ref.refType(SherpaOnnxOnlineRecognizerConfig);
  89 +
  90 +const SherpaOnnxOfflineTransducerModelConfig = StructType({
  91 + "encoder" : cstring,
  92 + "decoder" : cstring,
  93 + "joiner" : cstring,
  94 +});
  95 +
  96 +const SherpaOnnxOfflineParaformerModelConfig = StructType({
  97 + "model" : cstring,
  98 +});
  99 +
  100 +const SherpaOnnxOfflineNemoEncDecCtcModelConfig = StructType({
  101 + "model" : cstring,
  102 +});
  103 +
  104 +const SherpaOnnxOfflineWhisperModelConfig = StructType({
  105 + "encoder" : cstring,
  106 + "decoder" : cstring,
  107 +});
  108 +
  109 +const SherpaOnnxOfflineTdnnModelConfig = StructType({
  110 + "model" : cstring,
  111 +});
  112 +
  113 +const SherpaOnnxOfflineLMConfig = StructType({
  114 + "model" : cstring,
  115 + "scale" : float,
  116 +});
  117 +
  118 +const SherpaOnnxOfflineModelConfig = StructType({
  119 + "transducer" : SherpaOnnxOfflineTransducerModelConfig,
  120 + "paraformer" : SherpaOnnxOfflineParaformerModelConfig,
  121 + "nemoCtc" : SherpaOnnxOfflineNemoEncDecCtcModelConfig,
  122 + "whisper" : SherpaOnnxOfflineWhisperModelConfig,
  123 + "tdnn" : SherpaOnnxOfflineTdnnModelConfig,
  124 + "tokens" : cstring,
  125 + "numThreads" : int32_t,
  126 + "debug" : int32_t,
  127 + "provider" : cstring,
  128 + "modelType" : cstring,
  129 +});
  130 +
  131 +const SherpaOnnxOfflineRecognizerConfig = StructType({
  132 + "featConfig" : SherpaOnnxFeatureConfig,
  133 + "modelConfig" : SherpaOnnxOfflineModelConfig,
  134 + "lmConfig" : SherpaOnnxOfflineLMConfig,
  135 + "decodingMethod" : cstring,
  136 + "maxActivePaths" : int32_t,
  137 + "hotwordsFile" : cstring,
  138 + "hotwordsScore" : float,
  139 +});
  140 +
  141 +const SherpaOnnxOfflineRecognizerResult = StructType({
  142 + "text" : cstring,
  143 + "timestamps" : floatPtr,
  144 + "count" : int32_t,
  145 +});
  146 +
  147 +const SherpaOnnxOfflineRecognizerPtr = ref.refType(ref.types.void);
  148 +const SherpaOnnxOfflineStreamPtr = ref.refType(ref.types.void);
  149 +const SherpaOnnxOfflineStreamPtrPtr = ref.refType(SherpaOnnxOfflineStreamPtr);
  150 +const SherpaOnnxOfflineRecognizerResultPtr =
  151 + ref.refType(SherpaOnnxOfflineRecognizerResult);
  152 +
  153 +const SherpaOnnxOfflineRecognizerConfigPtr =
  154 + ref.refType(SherpaOnnxOfflineRecognizerConfig);
  155 +
  156 +// vad
  157 +const SherpaOnnxSileroVadModelConfig = StructType({
  158 + "model" : cstring,
  159 + "threshold" : float,
  160 + "minSilenceDuration" : float,
  161 + "minSpeechDuration" : float,
  162 + "windowSize" : int32_t,
  163 +});
  164 +
  165 +const SherpaOnnxVadModelConfig = StructType({
  166 + "sileroVad" : SherpaOnnxSileroVadModelConfig,
  167 + "sampleRate" : int32_t,
  168 + "numThreads" : int32_t,
  169 + "provider" : cstring,
  170 + "debug" : int32_t,
  171 +});
  172 +
  173 +const SherpaOnnxSpeechSegment = StructType({
  174 + "start" : int32_t,
  175 + "samples" : FloatArray,
  176 + "n" : int32_t,
  177 +});
  178 +
  179 +const SherpaOnnxVadModelConfigPtr = ref.refType(SherpaOnnxVadModelConfig);
  180 +const SherpaOnnxSpeechSegmentPtr = ref.refType(SherpaOnnxSpeechSegment);
  181 +const SherpaOnnxCircularBufferPtr = ref.refType(ref.types.void);
  182 +const SherpaOnnxVoiceActivityDetectorPtr = ref.refType(ref.types.void);
  183 +
  184 +// tts
  185 +const SherpaOnnxOfflineTtsVitsModelConfig = StructType({
  186 + "model" : cstring,
  187 + "lexicon" : cstring,
  188 + "tokens" : cstring,
  189 + "noiseScale" : float,
  190 + "noiseScaleW" : float,
  191 + "lengthScale" : float,
  192 +});
  193 +
  194 +const SherpaOnnxOfflineTtsModelConfig = StructType({
  195 + "vits" : SherpaOnnxOfflineTtsVitsModelConfig,
  196 + "numThreads" : int32_t,
  197 + "debug" : int32_t,
  198 + "provider" : cstring,
  199 +});
  200 +
  201 +const SherpaOnnxOfflineTtsConfig = StructType({
  202 + "model" : SherpaOnnxOfflineTtsModelConfig,
  203 + "ruleFsts" : cstring,
  204 +});
  205 +
  206 +const SherpaOnnxGeneratedAudio = StructType({
  207 + "samples" : FloatArray,
  208 + "n" : int32_t,
  209 + "sampleRate" : int32_t,
  210 +});
  211 +
  212 +const SherpaOnnxOfflineTtsVitsModelConfigPtr =
  213 + ref.refType(SherpaOnnxOfflineTtsVitsModelConfig);
  214 +const SherpaOnnxOfflineTtsConfigPtr = ref.refType(SherpaOnnxOfflineTtsConfig);
  215 +const SherpaOnnxGeneratedAudioPtr = ref.refType(SherpaOnnxGeneratedAudio);
  216 +const SherpaOnnxOfflineTtsPtr = ref.refType(ref.types.void);
  217 +
  218 +const SherpaOnnxDisplayPtr = ref.refType(ref.types.void);
  219 +
  220 +let soname;
  221 +if (os.platform() == "win32") {
  222 + // see https://nodejs.org/api/process.html#processarch
  223 + if (process.arch == "x64") {
  224 + let currentPath = process.env.Path;
  225 + let dllDirectory = path.resolve(path.join(__dirname, "lib", "win-x64"));
  226 + process.env.Path = currentPath + path.delimiter + dllDirectory;
  227 +
  228 + soname = path.join(__dirname, "lib", "win-x64", "sherpa-onnx-c-api.dll")
  229 + } else if (process.arch == "ia32") {
  230 + let currentPath = process.env.Path;
  231 + let dllDirectory = path.resolve(path.join(__dirname, "lib", "win-x86"));
  232 + process.env.Path = currentPath + path.delimiter + dllDirectory;
  233 +
  234 + soname = path.join(__dirname, "lib", "win-x86", "sherpa-onnx-c-api.dll")
  235 + } else {
  236 + throw new Error(
  237 + `Support only Windows x86 and x64 for now. Given ${process.arch}`);
  238 + }
  239 +} else if (os.platform() == "darwin") {
  240 + if (process.arch == "x64") {
  241 + soname =
  242 + path.join(__dirname, "lib", "osx-x64", "libsherpa-onnx-c-api.dylib");
  243 + } else if (process.arch == "arm64") {
  244 + soname =
  245 + path.join(__dirname, "lib", "osx-arm64", "libsherpa-onnx-c-api.dylib");
  246 + } else {
  247 + throw new Error(
  248 + `Support only macOS x64 and arm64 for now. Given ${process.arch}`);
  249 + }
  250 +} else if (os.platform() == "linux") {
  251 + if (process.arch == "x64") {
  252 + soname =
  253 + path.join(__dirname, "lib", "linux-x64", "libsherpa-onnx-c-api.so");
  254 + } else {
  255 + throw new Error(`Support only Linux x64 for now. Given ${process.arch}`);
  256 + }
  257 +} else {
  258 + throw new Error(`Unsupported platform ${os.platform()}`);
  259 +}
  260 +
  261 +if (!fs.existsSync(soname)) {
  262 + throw new Error(`Cannot find file ${soname}. Please make sure you have run
  263 + ./build.sh`);
  264 +}
  265 +
  266 +debug("soname ", soname)
  267 +
  268 +const libsherpa_onnx = ffi.Library(soname, {
  269 + // online asr
  270 + "CreateOnlineRecognizer" : [
  271 + SherpaOnnxOnlineRecognizerPtr, [ SherpaOnnxOnlineRecognizerConfigPtr ]
  272 + ],
  273 + "DestroyOnlineRecognizer" : [ "void", [ SherpaOnnxOnlineRecognizerPtr ] ],
  274 + "CreateOnlineStream" :
  275 + [ SherpaOnnxOnlineStreamPtr, [ SherpaOnnxOnlineRecognizerPtr ] ],
  276 + "CreateOnlineStreamWithHotwords" :
  277 + [ SherpaOnnxOnlineStreamPtr, [ SherpaOnnxOnlineRecognizerPtr, cstring ] ],
  278 + "DestroyOnlineStream" : [ "void", [ SherpaOnnxOnlineStreamPtr ] ],
  279 + "AcceptWaveform" :
  280 + [ "void", [ SherpaOnnxOnlineStreamPtr, int32_t, floatPtr, int32_t ] ],
  281 + "IsOnlineStreamReady" :
  282 + [ int32_t, [ SherpaOnnxOnlineRecognizerPtr, SherpaOnnxOnlineStreamPtr ] ],
  283 + "DecodeOnlineStream" :
  284 + [ "void", [ SherpaOnnxOnlineRecognizerPtr, SherpaOnnxOnlineStreamPtr ] ],
  285 + "DecodeMultipleOnlineStreams" : [
  286 + "void",
  287 + [ SherpaOnnxOnlineRecognizerPtr, SherpaOnnxOnlineStreamPtrPtr, int32_t ]
  288 + ],
  289 + "GetOnlineStreamResult" : [
  290 + SherpaOnnxOnlineRecognizerResultPtr,
  291 + [ SherpaOnnxOnlineRecognizerPtr, SherpaOnnxOnlineStreamPtr ]
  292 + ],
  293 + "DestroyOnlineRecognizerResult" :
  294 + [ "void", [ SherpaOnnxOnlineRecognizerResultPtr ] ],
  295 + "Reset" :
  296 + [ "void", [ SherpaOnnxOnlineRecognizerPtr, SherpaOnnxOnlineStreamPtr ] ],
  297 + "InputFinished" : [ "void", [ SherpaOnnxOnlineStreamPtr ] ],
  298 + "IsEndpoint" :
  299 + [ int32_t, [ SherpaOnnxOnlineRecognizerPtr, SherpaOnnxOnlineStreamPtr ] ],
  300 +
  301 + // offline asr
  302 + "CreateOfflineRecognizer" : [
  303 + SherpaOnnxOfflineRecognizerPtr, [ SherpaOnnxOfflineRecognizerConfigPtr ]
  304 + ],
  305 + "DestroyOfflineRecognizer" : [ "void", [ SherpaOnnxOfflineRecognizerPtr ] ],
  306 + "CreateOfflineStream" :
  307 + [ SherpaOnnxOfflineStreamPtr, [ SherpaOnnxOfflineRecognizerPtr ] ],
  308 + "DestroyOfflineStream" : [ "void", [ SherpaOnnxOfflineStreamPtr ] ],
  309 + "AcceptWaveformOffline" :
  310 + [ "void", [ SherpaOnnxOfflineStreamPtr, int32_t, floatPtr, int32_t ] ],
  311 + "DecodeOfflineStream" : [
  312 + "void", [ SherpaOnnxOfflineRecognizerPtr, SherpaOnnxOfflineStreamPtr ]
  313 + ],
  314 + "DecodeMultipleOfflineStreams" : [
  315 + "void",
  316 + [ SherpaOnnxOfflineRecognizerPtr, SherpaOnnxOfflineStreamPtrPtr, int32_t ]
  317 + ],
  318 + "GetOfflineStreamResult" :
  319 + [ SherpaOnnxOfflineRecognizerResultPtr, [ SherpaOnnxOfflineStreamPtr ] ],
  320 + "DestroyOfflineRecognizerResult" :
  321 + [ "void", [ SherpaOnnxOfflineRecognizerResultPtr ] ],
  322 +
  323 + // vad
  324 + "SherpaOnnxCreateCircularBuffer" :
  325 + [ SherpaOnnxCircularBufferPtr, [ int32_t ] ],
  326 + "SherpaOnnxDestroyCircularBuffer" :
  327 + [ "void", [ SherpaOnnxCircularBufferPtr ] ],
  328 + "SherpaOnnxCircularBufferPush" :
  329 + [ "void", [ SherpaOnnxCircularBufferPtr, floatPtr, int32_t ] ],
  330 + "SherpaOnnxCircularBufferGet" :
  331 + [ FloatArray, [ SherpaOnnxCircularBufferPtr, int32_t, int32_t ] ],
  332 + "SherpaOnnxCircularBufferFree" : [ "void", [ FloatArray ] ],
  333 + "SherpaOnnxCircularBufferPop" :
  334 + [ "void", [ SherpaOnnxCircularBufferPtr, int32_t ] ],
  335 + "SherpaOnnxCircularBufferSize" : [ int32_t, [ SherpaOnnxCircularBufferPtr ] ],
  336 + "SherpaOnnxCircularBufferHead" : [ int32_t, [ SherpaOnnxCircularBufferPtr ] ],
  337 + "SherpaOnnxCircularBufferReset" : [ "void", [ SherpaOnnxCircularBufferPtr ] ],
  338 + "SherpaOnnxCreateVoiceActivityDetector" : [
  339 + SherpaOnnxVoiceActivityDetectorPtr, [ SherpaOnnxVadModelConfigPtr, float ]
  340 + ],
  341 + "SherpaOnnxDestroyVoiceActivityDetector" :
  342 + [ "void", [ SherpaOnnxVoiceActivityDetectorPtr ] ],
  343 + "SherpaOnnxVoiceActivityDetectorAcceptWaveform" :
  344 + [ "void", [ SherpaOnnxVoiceActivityDetectorPtr, floatPtr, int32_t ] ],
  345 + "SherpaOnnxVoiceActivityDetectorEmpty" :
  346 + [ int32_t, [ SherpaOnnxVoiceActivityDetectorPtr ] ],
  347 + "SherpaOnnxVoiceActivityDetectorDetected" :
  348 + [ int32_t, [ SherpaOnnxVoiceActivityDetectorPtr ] ],
  349 + "SherpaOnnxVoiceActivityDetectorPop" :
  350 + [ "void", [ SherpaOnnxVoiceActivityDetectorPtr ] ],
  351 + "SherpaOnnxVoiceActivityDetectorClear" :
  352 + [ "void", [ SherpaOnnxVoiceActivityDetectorPtr ] ],
  353 + "SherpaOnnxVoiceActivityDetectorFront" :
  354 + [ SherpaOnnxSpeechSegmentPtr, [ SherpaOnnxVoiceActivityDetectorPtr ] ],
  355 + "SherpaOnnxDestroySpeechSegment" : [ "void", [ SherpaOnnxSpeechSegmentPtr ] ],
  356 + "SherpaOnnxVoiceActivityDetectorReset" :
  357 + [ "void", [ SherpaOnnxVoiceActivityDetectorPtr ] ],
  358 + // tts
  359 + "SherpaOnnxCreateOfflineTts" :
  360 + [ SherpaOnnxOfflineTtsPtr, [ SherpaOnnxOfflineTtsConfigPtr ] ],
  361 + "SherpaOnnxDestroyOfflineTts" : [ "void", [ SherpaOnnxOfflineTtsPtr ] ],
  362 + "SherpaOnnxOfflineTtsGenerate" : [
  363 + SherpaOnnxGeneratedAudioPtr,
  364 + [ SherpaOnnxOfflineTtsPtr, cstring, int32_t, float ]
  365 + ],
  366 + "SherpaOnnxDestroyOfflineTtsGeneratedAudio" :
  367 + [ "void", [ SherpaOnnxGeneratedAudioPtr ] ],
  368 + "SherpaOnnxWriteWave" : [ "void", [ floatPtr, int32_t, int32_t, cstring ] ],
  369 +
  370 + // display
  371 + "CreateDisplay" : [ SherpaOnnxDisplayPtr, [ int32_t ] ],
  372 + "DestroyDisplay" : [ "void", [ SherpaOnnxDisplayPtr ] ],
  373 + "SherpaOnnxPrint" : [ "void", [ SherpaOnnxDisplayPtr, int32_t, cstring ] ],
  374 +});
  375 +
  376 +class Display {
  377 + constructor(maxWordPerLine) {
  378 + this.handle = libsherpa_onnx.CreateDisplay(maxWordPerLine);
  379 + }
  380 + free() {
  381 + if (this.handle) {
  382 + libsherpa_onnx.DestroyDisplay(this.handle);
  383 + this.handle = null;
  384 + }
  385 + }
  386 +
  387 + print(idx, s) { libsherpa_onnx.SherpaOnnxPrint(this.handle, idx, s); }
  388 +};
  389 +
  390 +class OnlineResult {
  391 + constructor(text) { this.text = Buffer.from(text, "utf-8").toString(); }
  392 +};
  393 +
  394 +class OnlineStream {
  395 + constructor(handle) { this.handle = handle }
  396 +
  397 + free() {
  398 + if (this.handle) {
  399 + libsherpa_onnx.DestroyOnlineStream(this.handle);
  400 + this.handle = null;
  401 + }
  402 + }
  403 +
  404 + /**
  405 + * @param sampleRate {Number}
  406 + * @param samples {Float32Array} Containing samples in the range [-1, 1]
  407 + */
  408 + acceptWaveform(sampleRate, samples) {
  409 + libsherpa_onnx.AcceptWaveform(this.handle, sampleRate, samples,
  410 + samples.length);
  411 + }
  412 +};
  413 +
  414 +class OnlineRecognizer {
  415 + constructor(config) {
  416 + this.config = config;
  417 + this.recognizer_handle =
  418 + libsherpa_onnx.CreateOnlineRecognizer(config.ref());
  419 + }
  420 +
  421 + free() {
  422 + if (this.recognizer_handle) {
  423 + libsherpa_onnx.DestroyOnlineRecognizer(this.recognizer_handle);
  424 + this.recognizer_handle = null;
  425 + }
  426 + }
  427 +
  428 + createStream() {
  429 + let handle = libsherpa_onnx.CreateOnlineStream(this.recognizer_handle);
  430 + return new OnlineStream(handle);
  431 + }
  432 +
  433 + isReady(stream) {
  434 + return libsherpa_onnx.IsOnlineStreamReady(this.recognizer_handle,
  435 + stream.handle)
  436 + }
  437 +
  438 + isEndpoint(stream) {
  439 + return libsherpa_onnx.IsEndpoint(this.recognizer_handle, stream.handle);
  440 + }
  441 +
  442 + reset(stream) { libsherpa_onnx.Reset(this.recognizer_handle, stream.handle); }
  443 +
  444 + decode(stream) {
  445 + libsherpa_onnx.DecodeOnlineStream(this.recognizer_handle, stream.handle)
  446 + }
  447 +
  448 + getResult(stream) {
  449 + let handle = libsherpa_onnx.GetOnlineStreamResult(this.recognizer_handle,
  450 + stream.handle);
  451 + let r = handle.deref();
  452 + let ans = new OnlineResult(r.text);
  453 + libsherpa_onnx.DestroyOnlineRecognizerResult(handle);
  454 +
  455 + return ans
  456 + }
  457 +};
  458 +
  459 +class OfflineResult {
  460 + constructor(text) { this.text = Buffer.from(text, "utf-8").toString(); }
  461 +};
  462 +
  463 +class OfflineStream {
  464 + constructor(handle) { this.handle = handle }
  465 +
  466 + free() {
  467 + if (this.handle) {
  468 + libsherpa_onnx.DestroyOfflineStream(this.handle);
  469 + this.handle = null;
  470 + }
  471 + }
  472 +
  473 + /**
  474 + * @param sampleRate {Number}
  475 + * @param samples {Float32Array} Containing samples in the range [-1, 1]
  476 + */
  477 + acceptWaveform(sampleRate, samples) {
  478 + libsherpa_onnx.AcceptWaveformOffline(this.handle, sampleRate, samples,
  479 + samples.length);
  480 + }
  481 +};
  482 +
  483 +class OfflineRecognizer {
  484 + constructor(config) {
  485 + this.config = config;
  486 + this.recognizer_handle =
  487 + libsherpa_onnx.CreateOfflineRecognizer(config.ref());
  488 + }
  489 +
  490 + free() {
  491 + if (this.recognizer_handle) {
  492 + libsherpa_onnx.DestroyOfflineRecognizer(this.recognizer_handle);
  493 + this.recognizer_handle = null;
  494 + }
  495 + }
  496 +
  497 + createStream() {
  498 + let handle = libsherpa_onnx.CreateOfflineStream(this.recognizer_handle);
  499 + return new OfflineStream(handle);
  500 + }
  501 +
  502 + decode(stream) {
  503 + libsherpa_onnx.DecodeOfflineStream(this.recognizer_handle, stream.handle)
  504 + }
  505 +
  506 + getResult(stream) {
  507 + let handle = libsherpa_onnx.GetOfflineStreamResult(stream.handle);
  508 + let r = handle.deref();
  509 + let ans = new OfflineResult(r.text);
  510 + libsherpa_onnx.DestroyOfflineRecognizerResult(handle);
  511 +
  512 + return ans
  513 + }
  514 +};
  515 +
  516 +class SpeechSegment {
  517 + constructor(start, samples) {
  518 + this.start = start;
  519 + this.samples = samples;
  520 + }
  521 +};
  522 +
  523 +// this buffer holds only float entries.
  524 +class CircularBuffer {
  525 + /**
  526 + * @param capacity {int} The capacity of the circular buffer.
  527 + */
  528 + constructor(capacity) {
  529 + this.handle = libsherpa_onnx.SherpaOnnxCreateCircularBuffer(capacity);
  530 + }
  531 +
  532 + free() {
  533 + if (this.handle) {
  534 + libsherpa_onnx.SherpaOnnxDestroyCircularBuffer(this.handle);
  535 + this.handle = null;
  536 + }
  537 + }
  538 +
  539 + /**
  540 + * @param samples {Float32Array}
  541 + */
  542 + push(samples) {
  543 + libsherpa_onnx.SherpaOnnxCircularBufferPush(this.handle, samples,
  544 + samples.length);
  545 + }
  546 +
  547 + get(startIndex, n) {
  548 + let data =
  549 + libsherpa_onnx.SherpaOnnxCircularBufferGet(this.handle, startIndex, n);
  550 +
  551 + // https://tootallnate.github.io/ref/#exports-reinterpret
  552 + const buffer = data.buffer.reinterpret(n * ref.sizeof.float).buffer;
  553 +
  554 + // create a copy since we are going to free the buffer at the end
  555 + let s = new Float32Array(buffer).slice(0);
  556 + libsherpa_onnx.SherpaOnnxCircularBufferFree(data);
  557 + return s;
  558 + }
  559 +
  560 + pop(n) { libsherpa_onnx.SherpaOnnxCircularBufferPop(this.handle, n); }
  561 +
  562 + size() { return libsherpa_onnx.SherpaOnnxCircularBufferSize(this.handle); }
  563 +
  564 + head() { return libsherpa_onnx.SherpaOnnxCircularBufferHead(this.handle); }
  565 +
  566 + reset() { libsherpa_onnx.SherpaOnnxCircularBufferReset(this.handle); }
  567 +};
  568 +
  569 +class VoiceActivityDetector {
  570 + constructor(config, bufferSizeInSeconds) {
  571 + this.config = config;
  572 + this.handle = libsherpa_onnx.SherpaOnnxCreateVoiceActivityDetector(
  573 + config.ref(), bufferSizeInSeconds);
  574 + }
  575 +
  576 + free() {
  577 + if (this.handle) {
  578 + libsherpa_onnx.SherpaOnnxDestroyVoiceActivityDetector(this.handle);
  579 + }
  580 + }
  581 +
  582 + acceptWaveform(samples) {
  583 + libsherpa_onnx.SherpaOnnxVoiceActivityDetectorAcceptWaveform(
  584 + this.handle, samples, samples.length);
  585 + }
  586 +
  587 + isEmpty() {
  588 + return libsherpa_onnx.SherpaOnnxVoiceActivityDetectorEmpty(this.handle);
  589 + }
  590 +
  591 + isDetected() {
  592 + return libsherpa_onnx.SherpaOnnxVoiceActivityDetectorDetected(this.handle);
  593 + }
  594 + pop() { libsherpa_onnx.SherpaOnnxVoiceActivityDetectorPop(this.handle); }
  595 +
  596 + clear() { libsherpa_onnx.SherpaOnnxVoiceActivityDetectorClear(this.handle); }
  597 +
  598 + reset() { libsherpa_onnx.SherpaOnnxVoiceActivityDetectorReset(this.handle); }
  599 +
  600 + front() {
  601 + let segment =
  602 + libsherpa_onnx.SherpaOnnxVoiceActivityDetectorFront(this.handle);
  603 +
  604 + let buffer =
  605 + segment.deref()
  606 + .samples.buffer.reinterpret(segment.deref().n * ref.sizeof.float)
  607 + .buffer;
  608 +
  609 + let samples = new Float32Array(buffer).slice(0);
  610 + let ans = new SpeechSegment(segment.deref().start, samples);
  611 +
  612 + libsherpa_onnx.SherpaOnnxDestroySpeechSegment(segment);
  613 + return ans;
  614 + }
  615 +};
  616 +
  617 +class GeneratedAudio {
  618 + constructor(sampleRate, samples) {
  619 + this.sampleRate = sampleRate;
  620 + this.samples = samples;
  621 + }
  622 + save(filename) {
  623 + libsherpa_onnx.SherpaOnnxWriteWave(this.samples, this.samples.length,
  624 + this.sampleRate, filename);
  625 + }
  626 +};
  627 +
  628 +class OfflineTts {
  629 + constructor(config) {
  630 + this.config = config;
  631 + this.handle = libsherpa_onnx.SherpaOnnxCreateOfflineTts(config.ref());
  632 + }
  633 +
  634 + free() {
  635 + if (this.handle) {
  636 + libsherpa_onnx.SherpaOnnxDestroyOfflineTts(this.handle);
  637 + this.handle = null;
  638 + }
  639 + }
  640 + generate(text, sid, speed) {
  641 + let r = libsherpa_onnx.SherpaOnnxOfflineTtsGenerate(this.handle, text, sid,
  642 + speed);
  643 + const buffer =
  644 + r.deref()
  645 + .samples.buffer.reinterpret(r.deref().n * ref.sizeof.float)
  646 + .buffer;
  647 + let samples = new Float32Array(buffer).slice(0);
  648 + let sampleRate = r.deref().sampleRate;
  649 +
  650 + let generatedAudio = new GeneratedAudio(sampleRate, samples);
  651 +
  652 + libsherpa_onnx.SherpaOnnxDestroyOfflineTtsGeneratedAudio(r);
  653 +
  654 + return generatedAudio;
  655 + }
  656 +};
  657 +
  658 +// online asr
  659 +const OnlineTransducerModelConfig = SherpaOnnxOnlineTransducerModelConfig;
  660 +const OnlineModelConfig = SherpaOnnxOnlineModelConfig;
  661 +const FeatureConfig = SherpaOnnxFeatureConfig;
  662 +const OnlineRecognizerConfig = SherpaOnnxOnlineRecognizerConfig;
  663 +const OnlineParaformerModelConfig = SherpaOnnxOnlineParaformerModelConfig;
  664 +
  665 +// offline asr
  666 +const OfflineTransducerModelConfig = SherpaOnnxOfflineTransducerModelConfig;
  667 +const OfflineModelConfig = SherpaOnnxOfflineModelConfig;
  668 +const OfflineRecognizerConfig = SherpaOnnxOfflineRecognizerConfig;
  669 +const OfflineParaformerModelConfig = SherpaOnnxOfflineParaformerModelConfig;
  670 +const OfflineWhisperModelConfig = SherpaOnnxOfflineWhisperModelConfig;
  671 +const OfflineNemoEncDecCtcModelConfig =
  672 + SherpaOnnxOfflineNemoEncDecCtcModelConfig;
  673 +const OfflineTdnnModelConfig = SherpaOnnxOfflineTdnnModelConfig;
  674 +
  675 +// vad
  676 +const SileroVadModelConfig = SherpaOnnxSileroVadModelConfig;
  677 +const VadModelConfig = SherpaOnnxVadModelConfig;
  678 +
  679 +// tts
  680 +const OfflineTtsVitsModelConfig = SherpaOnnxOfflineTtsVitsModelConfig;
  681 +const OfflineTtsModelConfig = SherpaOnnxOfflineTtsModelConfig;
  682 +const OfflineTtsConfig = SherpaOnnxOfflineTtsConfig;
  683 +
  684 +module.exports = {
  685 + // online asr
  686 + OnlineTransducerModelConfig,
  687 + OnlineModelConfig,
  688 + FeatureConfig,
  689 + OnlineRecognizerConfig,
  690 + OnlineRecognizer,
  691 + OnlineStream,
  692 + OnlineParaformerModelConfig,
  693 +
  694 + // offline asr
  695 + OfflineRecognizer,
  696 + OfflineStream,
  697 + OfflineTransducerModelConfig,
  698 + OfflineModelConfig,
  699 + OfflineRecognizerConfig,
  700 + OfflineParaformerModelConfig,
  701 + OfflineWhisperModelConfig,
  702 + OfflineNemoEncDecCtcModelConfig,
  703 + OfflineTdnnModelConfig,
  704 + // vad
  705 + SileroVadModelConfig,
  706 + VadModelConfig,
  707 + CircularBuffer,
  708 + VoiceActivityDetector,
  709 + // tts
  710 + OfflineTtsVitsModelConfig,
  711 + OfflineTtsModelConfig,
  712 + OfflineTtsConfig,
  713 + OfflineTts,
  714 +
  715 + //
  716 + Display,
  717 +};
  1 +{
  2 + "name": "sherpa-onnx2",
  3 + "version": "1.8.10",
  4 + "description": "Real-time speech recognition with Next-gen Kaldi",
  5 + "main": "index.js",
  6 + "scripts": {
  7 + "test": "echo \"Error: no test specified\" && exit 1"
  8 + },
  9 + "repository": {
  10 + "type": "git",
  11 + "url": "git+https://github.com/k2-fsa/sherpa-onnx.git"
  12 + },
  13 + "keywords": [
  14 + "speech-to-text",
  15 + "text-to-speech",
  16 + "real-time speech recognition",
  17 + "without internet connect",
  18 + "embedded systems",
  19 + "open source",
  20 + "zipformer",
  21 + "asr",
  22 + "speech"
  23 + ],
  24 + "author": "The next-gen Kaldi team",
  25 + "license": "Apache-2.0",
  26 + "bugs": {
  27 + "url": "https://github.com/k2-fsa/sherpa-onnx/issues"
  28 + },
  29 + "homepage": "https://github.com/k2-fsa/sherpa-onnx#readme",
  30 + "dependencies": {
  31 + "ffi-napi": "^4.0.3",
  32 + "npm": "^6.14.18",
  33 + "ref-array-napi": "^1.2.2",
  34 + "ref-napi": "^3.0.3",
  35 + "ref-struct-napi": "^1.1.1"
  36 + }
  37 +}
  1 +{
  2 + "name": "sherpa-onnx",
  3 + "version": "SHERPA_ONNX_VERSION",
  4 + "description": "Real-time speech recognition with Next-gen Kaldi",
  5 + "main": "index.js",
  6 + "scripts": {
  7 + "test": "echo \"Error: no test specified\" && exit 1"
  8 + },
  9 + "repository": {
  10 + "type": "git",
  11 + "url": "git+https://github.com/k2-fsa/sherpa-onnx.git"
  12 + },
  13 + "keywords": [
  14 + "speech to text",
  15 + "text to speech",
  16 + "transcription",
  17 + "real-time speech recognition",
  18 + "without internet connect",
  19 + "embedded systems",
  20 + "open source",
  21 + "zipformer",
  22 + "asr",
  23 + "tts",
  24 + "stt",
  25 + "c++",
  26 + "onnxruntime",
  27 + "onnx",
  28 + "ai",
  29 + "next-gen kaldi",
  30 + "offline",
  31 + "privacy",
  32 + "open source",
  33 + "streaming speech recognition",
  34 + "speech",
  35 + "recognition"
  36 + ],
  37 + "author": "The next-gen Kaldi team",
  38 + "license": "Apache-2.0",
  39 + "bugs": {
  40 + "url": "https://github.com/k2-fsa/sherpa-onnx/issues"
  41 + },
  42 + "homepage": "https://github.com/k2-fsa/sherpa-onnx#readme",
  43 + "dependencies": {
  44 + "ffi-napi": "^4.0.3",
  45 + "npm": "^6.14.18",
  46 + "ref-array-napi": "^1.2.2",
  47 + "ref-napi": "^3.0.3",
  48 + "ref-struct-napi": "^1.1.1"
  49 + }
  50 +}
  1 +#!/usr/bin/env bash
  2 +set -ex
  3 +
  4 +SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
  5 +SHERPA_ONNX_DIR=$(realpath $SCRIPT_DIR/../..)
  6 +echo "SCRIPT_DIR: $SCRIPT_DIR"
  7 +echo "SHERPA_ONNX_DIR: $SHERPA_ONNX_DIR"
  8 +
  9 +SHERPA_ONNX_VERSION=$(grep "SHERPA_ONNX_VERSION" $SHERPA_ONNX_DIR/CMakeLists.txt | cut -d " " -f 2 | cut -d '"' -f 2)
  10 +
  11 +echo "SHERPA_ONNX_VERSION $SHERPA_ONNX_VERSION"
  12 +sed -i.bak s/SHERPA_ONNX_VERSION/$SHERPA_ONNX_VERSION/g ./package.json.in
  13 +
  14 +cp package.json.in package.json
  15 +rm package.json.in
  16 +rm package.json.in.bak
  17 +rm .clang-format
  18 +
  19 +function windows_x64() {
  20 + echo "Process Windows (x64)"
  21 + mkdir -p lib/windows-x64
  22 + dst=$(realpath lib/windows-x64)
  23 + mkdir t
  24 + cd t
  25 + wget -q https://huggingface.co/csukuangfj/sherpa-onnx-wheels/resolve/main/sherpa_onnx-${SHERPA_ONNX_VERSION}-cp38-cp38-win_amd64.whl
  26 + unzip ./sherpa_onnx-${SHERPA_ONNX_VERSION}-cp38-cp38-win_amd64.whl
  27 +
  28 + cp -v sherpa_onnx-${SHERPA_ONNX_VERSION}.data/data/bin/*.dll $dst
  29 + cp -v sherpa_onnx-${SHERPA_ONNX_VERSION}.data/data/bin/*.lib $dst
  30 + rm -fv $dst/sherpa-onnx-portaudio.dll
  31 +
  32 + cd ..
  33 + rm -rf t
  34 +}
  35 +
  36 +function windows_x86() {
  37 + echo "Process Windows (x86)"
  38 + mkdir -p lib/windows-x86
  39 + dst=$(realpath lib/windows-x86)
  40 + mkdir t
  41 + cd t
  42 + wget -q https://huggingface.co/csukuangfj/sherpa-onnx-wheels/resolve/main/sherpa_onnx-${SHERPA_ONNX_VERSION}-cp38-cp38-win32.whl
  43 + unzip ./sherpa_onnx-${SHERPA_ONNX_VERSION}-cp38-cp38-win32.whl
  44 +
  45 + cp -v sherpa_onnx-${SHERPA_ONNX_VERSION}.data/data/bin/*.dll $dst
  46 + cp -v sherpa_onnx-${SHERPA_ONNX_VERSION}.data/data/bin/*.lib $dst
  47 + rm -fv $dst/sherpa-onnx-portaudio.dll
  48 +
  49 + cd ..
  50 + rm -rf t
  51 +}
  52 +
  53 +function linux_x64() {
  54 + echo "Process Linux (x64)"
  55 + mkdir -p lib/linux-x64
  56 + dst=$(realpath lib/linux-x64)
  57 + mkdir t
  58 + cd t
  59 + wget -q https://huggingface.co/csukuangfj/sherpa-onnx-wheels/resolve/main/sherpa_onnx-${SHERPA_ONNX_VERSION}-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl
  60 + unzip ./sherpa_onnx-${SHERPA_ONNX_VERSION}-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl
  61 +
  62 + cp -v sherpa_onnx/lib/*.so* $dst
  63 + rm -v $dst/libcargs.so
  64 + rm -v $dst/libsherpa-onnx-portaudio.so
  65 + rm -v $dst/libsherpa-onnx-fst.so
  66 + rm -v $dst/libonnxruntime.so
  67 +
  68 + cd ..
  69 + rm -rf t
  70 +}
  71 +
  72 +function osx_x64() {
  73 + echo "Process osx-x64"
  74 + mkdir -p lib/osx-x64
  75 + dst=$(realpath lib/osx-x64)
  76 + mkdir t
  77 + cd t
  78 + wget -q https://huggingface.co/csukuangfj/sherpa-onnx-wheels/resolve/main/sherpa_onnx-${SHERPA_ONNX_VERSION}-cp38-cp38-macosx_10_14_x86_64.whl
  79 + unzip ./sherpa_onnx-${SHERPA_ONNX_VERSION}-cp38-cp38-macosx_10_14_x86_64.whl
  80 +
  81 + cp -v sherpa_onnx/lib/*.dylib $dst/
  82 + rm -v $dst/libonnxruntime.dylib
  83 + rm -v $dst/libcargs.dylib
  84 + rm -v $dst/libsherpa-onnx-fst.dylib
  85 + rm -v $dst/libsherpa-onnx-portaudio.dylib
  86 +
  87 + cd ..
  88 + rm -rf t
  89 +}
  90 +
  91 +function osx_arm64() {
  92 + echo "Process osx-arm64"
  93 + mkdir -p lib/osx-arm64
  94 + dst=$(realpath lib/osx-arm64)
  95 + mkdir t
  96 + cd t
  97 + wget -q https://huggingface.co/csukuangfj/sherpa-onnx-wheels/resolve/main/sherpa_onnx-${SHERPA_ONNX_VERSION}-cp38-cp38-macosx_11_0_arm64.whl
  98 + unzip ./sherpa_onnx-${SHERPA_ONNX_VERSION}-cp38-cp38-macosx_11_0_arm64.whl
  99 +
  100 + cp -v sherpa_onnx/lib/*.dylib $dst/
  101 + rm -v $dst/libonnxruntime.dylib
  102 + rm -v $dst/libcargs.dylib
  103 + rm -v $dst/libsherpa-onnx-fst.dylib
  104 + rm -v $dst/libsherpa-onnx-portaudio.dylib
  105 +
  106 + cd ..
  107 + rm -rf t
  108 +}
  109 +
  110 +windows_x64
  111 +ls -lh lib/windows-x64
  112 +
  113 +windows_x86
  114 +ls -lh lib/windows-x86
  115 +
  116 +linux_x64
  117 +ls -lh lib/linux-x64
  118 +
  119 +osx_x64
  120 +ls -lh lib/osx-x64
  121 +
  122 +osx_arm64
  123 +ls -lh lib/osx-arm64
@@ -438,6 +438,10 @@ int32_t SherpaOnnxCircularBufferSize(SherpaOnnxCircularBuffer *buffer) { @@ -438,6 +438,10 @@ int32_t SherpaOnnxCircularBufferSize(SherpaOnnxCircularBuffer *buffer) {
438 return buffer->impl->Size(); 438 return buffer->impl->Size();
439 } 439 }
440 440
  441 +int32_t SherpaOnnxCircularBufferHead(SherpaOnnxCircularBuffer *buffer) {
  442 + return buffer->impl->Head();
  443 +}
  444 +
441 void SherpaOnnxCircularBufferReset(SherpaOnnxCircularBuffer *buffer) { 445 void SherpaOnnxCircularBufferReset(SherpaOnnxCircularBuffer *buffer) {
442 buffer->impl->Reset(); 446 buffer->impl->Reset();
443 } 447 }
@@ -553,6 +557,7 @@ SherpaOnnxOfflineTts *SherpaOnnxCreateOfflineTts( @@ -553,6 +557,7 @@ SherpaOnnxOfflineTts *SherpaOnnxCreateOfflineTts(
553 tts_config.model.num_threads = SHERPA_ONNX_OR(config->model.num_threads, 1); 557 tts_config.model.num_threads = SHERPA_ONNX_OR(config->model.num_threads, 1);
554 tts_config.model.debug = config->model.debug; 558 tts_config.model.debug = config->model.debug;
555 tts_config.model.provider = SHERPA_ONNX_OR(config->model.provider, "cpu"); 559 tts_config.model.provider = SHERPA_ONNX_OR(config->model.provider, "cpu");
  560 + tts_config.rule_fsts = SHERPA_ONNX_OR(config->rule_fsts, "");
556 561
557 if (tts_config.model.debug) { 562 if (tts_config.model.debug) {
558 fprintf(stderr, "%s\n", tts_config.ToString().c_str()); 563 fprintf(stderr, "%s\n", tts_config.ToString().c_str());
@@ -130,10 +130,10 @@ SHERPA_ONNX_API typedef struct SherpaOnnxOnlineRecognizerResult { @@ -130,10 +130,10 @@ SHERPA_ONNX_API typedef struct SherpaOnnxOnlineRecognizerResult {
130 const char *text; 130 const char *text;
131 131
132 // Pointer to continuous memory which holds string based tokens 132 // Pointer to continuous memory which holds string based tokens
133 - // which are seperated by \0 133 + // which are separated by \0
134 const char *tokens; 134 const char *tokens;
135 135
136 - // a pointer array contains the address of the first item in tokens 136 + // a pointer array containing the address of the first item in tokens
137 const char *const *tokens_arr; 137 const char *const *tokens_arr;
138 138
139 // Pointer to continuous memory which holds timestamps 139 // Pointer to continuous memory which holds timestamps
@@ -532,6 +532,11 @@ SHERPA_ONNX_API void SherpaOnnxCircularBufferPop( @@ -532,6 +532,11 @@ SHERPA_ONNX_API void SherpaOnnxCircularBufferPop(
532 SHERPA_ONNX_API int32_t 532 SHERPA_ONNX_API int32_t
533 SherpaOnnxCircularBufferSize(SherpaOnnxCircularBuffer *buffer); 533 SherpaOnnxCircularBufferSize(SherpaOnnxCircularBuffer *buffer);
534 534
  535 +// Return the head of the buffer. It's always non-decreasing until you
  536 +// invoke SherpaOnnxCircularBufferReset() which resets head to 0.
  537 +SHERPA_ONNX_API int32_t
  538 +SherpaOnnxCircularBufferHead(SherpaOnnxCircularBuffer *buffer);
  539 +
535 // Clear all elements in the buffer 540 // Clear all elements in the buffer
536 SHERPA_ONNX_API void SherpaOnnxCircularBufferReset( 541 SHERPA_ONNX_API void SherpaOnnxCircularBufferReset(
537 SherpaOnnxCircularBuffer *buffer); 542 SherpaOnnxCircularBuffer *buffer);
@@ -617,6 +622,7 @@ SHERPA_ONNX_API typedef struct SherpaOnnxOfflineTtsModelConfig { @@ -617,6 +622,7 @@ SHERPA_ONNX_API typedef struct SherpaOnnxOfflineTtsModelConfig {
617 622
618 SHERPA_ONNX_API typedef struct SherpaOnnxOfflineTtsConfig { 623 SHERPA_ONNX_API typedef struct SherpaOnnxOfflineTtsConfig {
619 SherpaOnnxOfflineTtsModelConfig model; 624 SherpaOnnxOfflineTtsModelConfig model;
  625 + const char *rule_fsts;
620 } SherpaOnnxOfflineTtsConfig; 626 } SherpaOnnxOfflineTtsConfig;
621 627
622 SHERPA_ONNX_API typedef struct SherpaOnnxGeneratedAudio { 628 SHERPA_ONNX_API typedef struct SherpaOnnxGeneratedAudio {
@@ -457,7 +457,7 @@ class OnlineRecognizerParaformerImpl : public OnlineRecognizerImpl { @@ -457,7 +457,7 @@ class OnlineRecognizerParaformerImpl : public OnlineRecognizerImpl {
457 // (61 - 7) / 6 + 1 = 10 457 // (61 - 7) / 6 + 1 = 10
458 458
459 int32_t left_chunk_size_ = 5; 459 int32_t left_chunk_size_ = 5;
460 - int32_t right_chunk_size_ = 5; 460 + int32_t right_chunk_size_ = 2;
461 }; 461 };
462 462
463 } // namespace sherpa_onnx 463 } // namespace sherpa_onnx