Committed by
GitHub
Add VAD and keyword spotting for the Node package with WebAssembly (#1286)
正在显示
40 个修改的文件
包含
456 行增加
和
524 行删除
| @@ -9,6 +9,28 @@ git status | @@ -9,6 +9,28 @@ git status | ||
| 9 | ls -lh | 9 | ls -lh |
| 10 | ls -lh node_modules | 10 | ls -lh node_modules |
| 11 | 11 | ||
| 12 | +echo '-----vad+whisper----------' | ||
| 13 | + | ||
| 14 | +curl -LS -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-whisper-tiny.en.tar.bz2 | ||
| 15 | +tar xvf sherpa-onnx-whisper-tiny.en.tar.bz2 | ||
| 16 | +rm sherpa-onnx-whisper-tiny.en.tar.bz2 | ||
| 17 | + | ||
| 18 | +curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/Obama.wav | ||
| 19 | +curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx | ||
| 20 | +node ./test-vad-with-non-streaming-asr-whisper.js | ||
| 21 | +rm Obama.wav | ||
| 22 | +rm silero_vad.onnx | ||
| 23 | +rm -rf sherpa-onnx-whisper-tiny.en | ||
| 24 | + | ||
| 25 | +echo "----------keyword spotting----------" | ||
| 26 | + | ||
| 27 | +curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/kws-models/sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01.tar.bz2 | ||
| 28 | +tar xvf sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01.tar.bz2 | ||
| 29 | +rm sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01.tar.bz2 | ||
| 30 | + | ||
| 31 | +node ./test-keyword-spotter-transducer.js | ||
| 32 | +rm -rf sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01 | ||
| 33 | + | ||
| 12 | # offline asr | 34 | # offline asr |
| 13 | # | 35 | # |
| 14 | curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17.tar.bz2 | 36 | curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17.tar.bz2 |
| 1 | name: npm | 1 | name: npm |
| 2 | 2 | ||
| 3 | on: | 3 | on: |
| 4 | + push: | ||
| 5 | + branches: | ||
| 6 | + - npm | ||
| 4 | workflow_dispatch: | 7 | workflow_dispatch: |
| 5 | 8 | ||
| 6 | concurrency: | 9 | concurrency: |
| @@ -27,6 +30,9 @@ jobs: | @@ -27,6 +30,9 @@ jobs: | ||
| 27 | 30 | ||
| 28 | - name: Install emsdk | 31 | - name: Install emsdk |
| 29 | uses: mymindstorm/setup-emsdk@v14 | 32 | uses: mymindstorm/setup-emsdk@v14 |
| 33 | + with: | ||
| 34 | + version: 3.1.51 | ||
| 35 | + actions-cache-folder: 'emsdk-cache' | ||
| 30 | 36 | ||
| 31 | - name: View emsdk version | 37 | - name: View emsdk version |
| 32 | shell: bash | 38 | shell: bash |
| @@ -51,8 +57,6 @@ jobs: | @@ -51,8 +57,6 @@ jobs: | ||
| 51 | 57 | ||
| 52 | - name: Build nodejs package | 58 | - name: Build nodejs package |
| 53 | shell: bash | 59 | shell: bash |
| 54 | - env: | ||
| 55 | - NODE_AUTH_TOKEN: ${{ secrets.NPM_TOKEN }} | ||
| 56 | run: | | 60 | run: | |
| 57 | ./build-wasm-simd-nodejs.sh | 61 | ./build-wasm-simd-nodejs.sh |
| 58 | cp -v build-wasm-simd-nodejs/install/bin/wasm/nodejs/*.js ./scripts/nodejs/ | 62 | cp -v build-wasm-simd-nodejs/install/bin/wasm/nodejs/*.js ./scripts/nodejs/ |
| @@ -71,6 +75,29 @@ jobs: | @@ -71,6 +75,29 @@ jobs: | ||
| 71 | 75 | ||
| 72 | rm package.json.bak | 76 | rm package.json.bak |
| 73 | 77 | ||
| 78 | + - name: Collect files | ||
| 79 | + shell: bash | ||
| 80 | + run: | | ||
| 81 | + dst=sherpa-onnx-wasm-nodejs | ||
| 82 | + mkdir $dst | ||
| 83 | + cp -v scripts/nodejs/* $dst | ||
| 84 | + tar cvjf $dst.tar.bz2 $dst | ||
| 85 | + | ||
| 86 | + echo "---" | ||
| 87 | + ls -h $dst | ||
| 88 | + | ||
| 89 | + - uses: actions/upload-artifact@v4 | ||
| 90 | + with: | ||
| 91 | + name: sherpa-onnx-wasm-nodejs | ||
| 92 | + path: ./*.tar.bz2 | ||
| 93 | + | ||
| 94 | + - name: Build nodejs package | ||
| 95 | + shell: bash | ||
| 96 | + env: | ||
| 97 | + NODE_AUTH_TOKEN: ${{ secrets.NPM_TOKEN }} | ||
| 98 | + run: | | ||
| 99 | + cd scripts/nodejs | ||
| 100 | + | ||
| 74 | git diff | 101 | git diff |
| 75 | 102 | ||
| 76 | npm install | 103 | npm install |
| @@ -55,6 +55,9 @@ jobs: | @@ -55,6 +55,9 @@ jobs: | ||
| 55 | 55 | ||
| 56 | - name: Install emsdk | 56 | - name: Install emsdk |
| 57 | uses: mymindstorm/setup-emsdk@v14 | 57 | uses: mymindstorm/setup-emsdk@v14 |
| 58 | + with: | ||
| 59 | + version: 3.1.51 | ||
| 60 | + actions-cache-folder: 'emsdk-cache' | ||
| 58 | 61 | ||
| 59 | - name: View emsdk version | 62 | - name: View emsdk version |
| 60 | shell: bash | 63 | shell: bash |
| @@ -109,6 +112,7 @@ jobs: | @@ -109,6 +112,7 @@ jobs: | ||
| 109 | node --version | 112 | node --version |
| 110 | npm --version | 113 | npm --version |
| 111 | export d=scripts/nodejs | 114 | export d=scripts/nodejs |
| 115 | + cat $d/index.js | ||
| 112 | 116 | ||
| 113 | pushd $d | 117 | pushd $d |
| 114 | npm install | 118 | npm install |
| 1 | +## 1.10.23 | ||
| 2 | + | ||
| 3 | +* flutter: add lang, emotion, event to OfflineRecognizerResult (#1268) | ||
| 4 | +* Use a separate thread to initialize models for lazarus examples. (#1270) | ||
| 5 | +* Object pascal examples for recording and playing audio with portaudio. (#1271) | ||
| 6 | +* Text to speech API for Object Pascal. (#1273) | ||
| 7 | +* update kotlin api for better release native object and add user-friendly apis. (#1275) | ||
| 8 | +* Update wave-reader.cc to support 8/16/32-bit waves (#1278) | ||
| 9 | +* Add WebAssembly for VAD (#1281) | ||
| 10 | +* WebAssembly example for VAD + Non-streaming ASR (#1284) | ||
| 11 | + | ||
| 1 | ## 1.10.22 | 12 | ## 1.10.22 |
| 2 | 13 | ||
| 3 | * Add Pascal API for reading wave files (#1243) | 14 | * Add Pascal API for reading wave files (#1243) |
| @@ -11,7 +11,7 @@ project(sherpa-onnx) | @@ -11,7 +11,7 @@ project(sherpa-onnx) | ||
| 11 | # ./nodejs-addon-examples | 11 | # ./nodejs-addon-examples |
| 12 | # ./dart-api-examples/ | 12 | # ./dart-api-examples/ |
| 13 | # ./CHANGELOG.md | 13 | # ./CHANGELOG.md |
| 14 | -set(SHERPA_ONNX_VERSION "1.10.22") | 14 | +set(SHERPA_ONNX_VERSION "1.10.23") |
| 15 | 15 | ||
| 16 | # Disable warning about | 16 | # Disable warning about |
| 17 | # | 17 | # |
| @@ -206,6 +206,7 @@ if(SHERPA_ONNX_ENABLE_WASM_NODEJS) | @@ -206,6 +206,7 @@ if(SHERPA_ONNX_ENABLE_WASM_NODEJS) | ||
| 206 | if(NOT SHERPA_ONNX_ENABLE_WASM) | 206 | if(NOT SHERPA_ONNX_ENABLE_WASM) |
| 207 | message(FATAL_ERROR "Please set SHERPA_ONNX_ENABLE_WASM to ON if you enable WASM for NodeJS") | 207 | message(FATAL_ERROR "Please set SHERPA_ONNX_ENABLE_WASM to ON if you enable WASM for NodeJS") |
| 208 | endif() | 208 | endif() |
| 209 | + add_definitions(-DSHERPA_ONNX_ENABLE_WASM_KWS=1) | ||
| 209 | endif() | 210 | endif() |
| 210 | 211 | ||
| 211 | if(SHERPA_ONNX_ENABLE_WASM) | 212 | if(SHERPA_ONNX_ENABLE_WASM) |
| @@ -9,7 +9,7 @@ environment: | @@ -9,7 +9,7 @@ environment: | ||
| 9 | sdk: ^3.4.0 | 9 | sdk: ^3.4.0 |
| 10 | 10 | ||
| 11 | dependencies: | 11 | dependencies: |
| 12 | - sherpa_onnx: ^1.10.22 | 12 | + sherpa_onnx: ^1.10.23 |
| 13 | # sherpa_onnx: | 13 | # sherpa_onnx: |
| 14 | # path: ../../flutter/sherpa_onnx | 14 | # path: ../../flutter/sherpa_onnx |
| 15 | path: ^1.9.0 | 15 | path: ^1.9.0 |
| @@ -5,7 +5,7 @@ description: > | @@ -5,7 +5,7 @@ description: > | ||
| 5 | 5 | ||
| 6 | publish_to: 'none' | 6 | publish_to: 'none' |
| 7 | 7 | ||
| 8 | -version: 1.10.22 | 8 | +version: 1.10.23 |
| 9 | 9 | ||
| 10 | topics: | 10 | topics: |
| 11 | - speech-recognition | 11 | - speech-recognition |
| @@ -30,7 +30,7 @@ dependencies: | @@ -30,7 +30,7 @@ dependencies: | ||
| 30 | record: ^5.1.0 | 30 | record: ^5.1.0 |
| 31 | url_launcher: ^6.2.6 | 31 | url_launcher: ^6.2.6 |
| 32 | 32 | ||
| 33 | - sherpa_onnx: ^1.10.22 | 33 | + sherpa_onnx: ^1.10.23 |
| 34 | # sherpa_onnx: | 34 | # sherpa_onnx: |
| 35 | # path: ../../flutter/sherpa_onnx | 35 | # path: ../../flutter/sherpa_onnx |
| 36 | 36 |
| @@ -5,7 +5,7 @@ description: > | @@ -5,7 +5,7 @@ description: > | ||
| 5 | 5 | ||
| 6 | publish_to: 'none' # Remove this line if you wish to publish to pub.dev | 6 | publish_to: 'none' # Remove this line if you wish to publish to pub.dev |
| 7 | 7 | ||
| 8 | -version: 1.10.22 | 8 | +version: 1.10.23 |
| 9 | 9 | ||
| 10 | environment: | 10 | environment: |
| 11 | sdk: '>=3.4.0 <4.0.0' | 11 | sdk: '>=3.4.0 <4.0.0' |
| @@ -17,7 +17,7 @@ dependencies: | @@ -17,7 +17,7 @@ dependencies: | ||
| 17 | cupertino_icons: ^1.0.6 | 17 | cupertino_icons: ^1.0.6 |
| 18 | path_provider: ^2.1.3 | 18 | path_provider: ^2.1.3 |
| 19 | path: ^1.9.0 | 19 | path: ^1.9.0 |
| 20 | - sherpa_onnx: ^1.10.22 | 20 | + sherpa_onnx: ^1.10.23 |
| 21 | url_launcher: ^6.2.6 | 21 | url_launcher: ^6.2.6 |
| 22 | audioplayers: ^5.0.0 | 22 | audioplayers: ^5.0.0 |
| 23 | 23 |
| @@ -17,7 +17,7 @@ topics: | @@ -17,7 +17,7 @@ topics: | ||
| 17 | - voice-activity-detection | 17 | - voice-activity-detection |
| 18 | 18 | ||
| 19 | # remember to change the version in ../sherpa_onnx_macos/macos/sherpa_onnx_macos.podspec | 19 | # remember to change the version in ../sherpa_onnx_macos/macos/sherpa_onnx_macos.podspec |
| 20 | -version: 1.10.22 | 20 | +version: 1.10.23 |
| 21 | 21 | ||
| 22 | homepage: https://github.com/k2-fsa/sherpa-onnx | 22 | homepage: https://github.com/k2-fsa/sherpa-onnx |
| 23 | 23 | ||
| @@ -30,23 +30,23 @@ dependencies: | @@ -30,23 +30,23 @@ dependencies: | ||
| 30 | flutter: | 30 | flutter: |
| 31 | sdk: flutter | 31 | sdk: flutter |
| 32 | 32 | ||
| 33 | - sherpa_onnx_android: ^1.10.22 | 33 | + sherpa_onnx_android: ^1.10.23 |
| 34 | # sherpa_onnx_android: | 34 | # sherpa_onnx_android: |
| 35 | # path: ../sherpa_onnx_android | 35 | # path: ../sherpa_onnx_android |
| 36 | 36 | ||
| 37 | - sherpa_onnx_macos: ^1.10.22 | 37 | + sherpa_onnx_macos: ^1.10.23 |
| 38 | # sherpa_onnx_macos: | 38 | # sherpa_onnx_macos: |
| 39 | # path: ../sherpa_onnx_macos | 39 | # path: ../sherpa_onnx_macos |
| 40 | 40 | ||
| 41 | - sherpa_onnx_linux: ^1.10.22 | 41 | + sherpa_onnx_linux: ^1.10.23 |
| 42 | # sherpa_onnx_linux: | 42 | # sherpa_onnx_linux: |
| 43 | # path: ../sherpa_onnx_linux | 43 | # path: ../sherpa_onnx_linux |
| 44 | # | 44 | # |
| 45 | - sherpa_onnx_windows: ^1.10.22 | 45 | + sherpa_onnx_windows: ^1.10.23 |
| 46 | # sherpa_onnx_windows: | 46 | # sherpa_onnx_windows: |
| 47 | # path: ../sherpa_onnx_windows | 47 | # path: ../sherpa_onnx_windows |
| 48 | 48 | ||
| 49 | - sherpa_onnx_ios: ^1.10.22 | 49 | + sherpa_onnx_ios: ^1.10.23 |
| 50 | # sherpa_onnx_ios: | 50 | # sherpa_onnx_ios: |
| 51 | # path: ../sherpa_onnx_ios | 51 | # path: ../sherpa_onnx_ios |
| 52 | 52 |
| @@ -7,7 +7,7 @@ | @@ -7,7 +7,7 @@ | ||
| 7 | # https://groups.google.com/g/dart-ffi/c/nUATMBy7r0c | 7 | # https://groups.google.com/g/dart-ffi/c/nUATMBy7r0c |
| 8 | Pod::Spec.new do |s| | 8 | Pod::Spec.new do |s| |
| 9 | s.name = 'sherpa_onnx_ios' | 9 | s.name = 'sherpa_onnx_ios' |
| 10 | - s.version = '1.10.22' | 10 | + s.version = '1.10.23' |
| 11 | s.summary = 'A new Flutter FFI plugin project.' | 11 | s.summary = 'A new Flutter FFI plugin project.' |
| 12 | s.description = <<-DESC | 12 | s.description = <<-DESC |
| 13 | A new Flutter FFI plugin project. | 13 | A new Flutter FFI plugin project. |
| @@ -4,7 +4,7 @@ | @@ -4,7 +4,7 @@ | ||
| 4 | # | 4 | # |
| 5 | Pod::Spec.new do |s| | 5 | Pod::Spec.new do |s| |
| 6 | s.name = 'sherpa_onnx_macos' | 6 | s.name = 'sherpa_onnx_macos' |
| 7 | - s.version = '1.10.22' | 7 | + s.version = '1.10.23' |
| 8 | s.summary = 'sherpa-onnx Flutter FFI plugin project.' | 8 | s.summary = 'sherpa-onnx Flutter FFI plugin project.' |
| 9 | s.description = <<-DESC | 9 | s.description = <<-DESC |
| 10 | sherpa-onnx Flutter FFI plugin project. | 10 | sherpa-onnx Flutter FFI plugin project. |
new-release.sh
0 → 100755
| 1 | +#!/usr/bin/env bash | ||
| 2 | + | ||
| 3 | +find flutter -name *.yaml -type f -exec sed -i.bak 's/1\.10\.22/1\.10\.23/g' {} \; | ||
| 4 | +find dart-api-examples -name *.yaml -type f -exec sed -i.bak 's/1\.10\.22/1\.10\.23/g' {} \; | ||
| 5 | +find flutter-examples -name *.yaml -type f -exec sed -i.bak 's/1\.10\.22/1\.10\.23/g' {} \; | ||
| 6 | +find flutter -name *.podspec -type f -exec sed -i.bak 's/1\.10\.22/1\.10\.23/g' {} \; | ||
| 7 | +find nodejs-addon-examples -name package.json -type f -exec sed -i.bak 's/1\.10\.22/1\.10\.23/g' {} \; |
| @@ -42,11 +42,11 @@ stream.acceptWaveform({samples: tailPadding, sampleRate: wave.sampleRate}); | @@ -42,11 +42,11 @@ stream.acceptWaveform({samples: tailPadding, sampleRate: wave.sampleRate}); | ||
| 42 | 42 | ||
| 43 | const detectedKeywords = []; | 43 | const detectedKeywords = []; |
| 44 | while (kws.isReady(stream)) { | 44 | while (kws.isReady(stream)) { |
| 45 | + kws.decode(stream); | ||
| 45 | const keyword = kws.getResult(stream).keyword; | 46 | const keyword = kws.getResult(stream).keyword; |
| 46 | if (keyword != '') { | 47 | if (keyword != '') { |
| 47 | detectedKeywords.push(keyword); | 48 | detectedKeywords.push(keyword); |
| 48 | } | 49 | } |
| 49 | - kws.decode(stream); | ||
| 50 | } | 50 | } |
| 51 | let stop = Date.now(); | 51 | let stop = Date.now(); |
| 52 | 52 |
| @@ -120,8 +120,8 @@ console.log('Done') | @@ -120,8 +120,8 @@ console.log('Done') | ||
| 120 | const elapsed_seconds = (stop - start) / 1000; | 120 | const elapsed_seconds = (stop - start) / 1000; |
| 121 | const duration = wave.samples.length / wave.sampleRate; | 121 | const duration = wave.samples.length / wave.sampleRate; |
| 122 | const real_time_factor = elapsed_seconds / duration; | 122 | const real_time_factor = elapsed_seconds / duration; |
| 123 | -console.log('Wave duration', duration.toFixed(3), 'secodns') | ||
| 124 | -console.log('Elapsed', elapsed_seconds.toFixed(3), 'secodns') | 123 | +console.log('Wave duration', duration.toFixed(3), 'seconds') |
| 124 | +console.log('Elapsed', elapsed_seconds.toFixed(3), 'seconds') | ||
| 125 | console.log( | 125 | console.log( |
| 126 | `RTF = ${elapsed_seconds.toFixed(3)}/${duration.toFixed(3)} =`, | 126 | `RTF = ${elapsed_seconds.toFixed(3)}/${duration.toFixed(3)} =`, |
| 127 | real_time_factor.toFixed(3)) | 127 | real_time_factor.toFixed(3)) |
| 1 | +// Copyright (c) 2024 Xiaomi Corporation | ||
| 2 | +const sherpa_onnx = require('sherpa-onnx'); | ||
| 3 | + | ||
| 4 | +function createKeywordSpotter() { | ||
| 5 | + // Please download test files from | ||
| 6 | + // https://github.com/k2-fsa/sherpa-onnx/releases/tag/kws-models | ||
| 7 | + const config = { | ||
| 8 | + 'modelConfig': { | ||
| 9 | + 'transducer': { | ||
| 10 | + 'encoder': | ||
| 11 | + './sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/encoder-epoch-12-avg-2-chunk-16-left-64.onnx', | ||
| 12 | + 'decoder': | ||
| 13 | + './sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/decoder-epoch-12-avg-2-chunk-16-left-64.onnx', | ||
| 14 | + 'joiner': | ||
| 15 | + './sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/joiner-epoch-12-avg-2-chunk-16-left-64.onnx', | ||
| 16 | + }, | ||
| 17 | + 'tokens': | ||
| 18 | + './sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/tokens.txt', | ||
| 19 | + }, | ||
| 20 | + keywords: 'w én s ēn t è k ǎ s uǒ @文森特卡索\n' + | ||
| 21 | + 'f ǎ g uó @法国' | ||
| 22 | + }; | ||
| 23 | + | ||
| 24 | + return sherpa_onnx.createKws(config); | ||
| 25 | +} | ||
| 26 | + | ||
| 27 | +const kws = createKeywordSpotter(); | ||
| 28 | +const stream = kws.createStream(); | ||
| 29 | +const waveFilename = | ||
| 30 | + './sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/test_wavs/3.wav'; | ||
| 31 | + | ||
| 32 | +const wave = sherpa_onnx.readWave(waveFilename); | ||
| 33 | +stream.acceptWaveform(wave.sampleRate, wave.samples); | ||
| 34 | + | ||
| 35 | +const tailPadding = new Float32Array(wave.sampleRate * 0.4); | ||
| 36 | +stream.acceptWaveform(kws.config.featConfig.sampleRate, tailPadding); | ||
| 37 | + | ||
| 38 | +const detectedKeywords = []; | ||
| 39 | +while (kws.isReady(stream)) { | ||
| 40 | + kws.decode(stream); | ||
| 41 | + const keyword = kws.getResult(stream).keyword; | ||
| 42 | + if (keyword != '') { | ||
| 43 | + detectedKeywords.push(keyword); | ||
| 44 | + } | ||
| 45 | +} | ||
| 46 | +console.log(detectedKeywords); | ||
| 47 | + | ||
| 48 | +stream.free(); | ||
| 49 | +kws.free(); |
| @@ -7,27 +7,13 @@ const wav = require('wav'); | @@ -7,27 +7,13 @@ const wav = require('wav'); | ||
| 7 | const sherpa_onnx = require('sherpa-onnx'); | 7 | const sherpa_onnx = require('sherpa-onnx'); |
| 8 | 8 | ||
| 9 | function createOfflineRecognizer() { | 9 | function createOfflineRecognizer() { |
| 10 | - let featConfig = { | ||
| 11 | - sampleRate: 16000, | ||
| 12 | - featureDim: 80, | ||
| 13 | - }; | ||
| 14 | - | ||
| 15 | - let modelConfig = { | ||
| 16 | - nemoCtc: { | ||
| 17 | - model: './sherpa-onnx-nemo-ctc-en-conformer-small/model.int8.onnx', | ||
| 18 | - }, | ||
| 19 | - tokens: './sherpa-onnx-nemo-ctc-en-conformer-small/tokens.txt', | ||
| 20 | - numThreads: 1, | ||
| 21 | - debug: 0, | ||
| 22 | - provider: 'cpu', | ||
| 23 | - modelType: 'nemo_ctc', | ||
| 24 | - }; | ||
| 25 | - | ||
| 26 | let config = { | 10 | let config = { |
| 27 | - featConfig: featConfig, | ||
| 28 | - modelConfig: modelConfig, | ||
| 29 | - decodingMethod: 'greedy_search', | ||
| 30 | - maxActivePaths: 4, | 11 | + modelConfig: { |
| 12 | + nemoCtc: { | ||
| 13 | + model: './sherpa-onnx-nemo-ctc-en-conformer-small/model.int8.onnx', | ||
| 14 | + }, | ||
| 15 | + tokens: './sherpa-onnx-nemo-ctc-en-conformer-small/tokens.txt', | ||
| 16 | + } | ||
| 31 | }; | 17 | }; |
| 32 | 18 | ||
| 33 | return sherpa_onnx.createOfflineRecognizer(config); | 19 | return sherpa_onnx.createOfflineRecognizer(config); |
| @@ -38,63 +24,12 @@ const stream = recognizer.createStream(); | @@ -38,63 +24,12 @@ const stream = recognizer.createStream(); | ||
| 38 | 24 | ||
| 39 | const waveFilename = | 25 | const waveFilename = |
| 40 | './sherpa-onnx-nemo-ctc-en-conformer-small/test_wavs/0.wav'; | 26 | './sherpa-onnx-nemo-ctc-en-conformer-small/test_wavs/0.wav'; |
| 27 | +const wave = sherpa_onnx.readWave(waveFilename); | ||
| 28 | +stream.acceptWaveform(wave.sampleRate, wave.samples); | ||
| 41 | 29 | ||
| 42 | -const reader = new wav.Reader(); | ||
| 43 | -const readable = new Readable().wrap(reader); | ||
| 44 | -const buf = []; | ||
| 45 | - | ||
| 46 | -reader.on('format', ({audioFormat, bitDepth, channels, sampleRate}) => { | ||
| 47 | - if (sampleRate != recognizer.config.featConfig.sampleRate) { | ||
| 48 | - throw new Error(`Only support sampleRate ${ | ||
| 49 | - recognizer.config.featConfig.sampleRate}. Given ${sampleRate}`); | ||
| 50 | - } | ||
| 51 | - | ||
| 52 | - if (audioFormat != 1) { | ||
| 53 | - throw new Error(`Only support PCM format. Given ${audioFormat}`); | ||
| 54 | - } | ||
| 55 | - | ||
| 56 | - if (channels != 1) { | ||
| 57 | - throw new Error(`Only a single channel. Given ${channel}`); | ||
| 58 | - } | ||
| 59 | - | ||
| 60 | - if (bitDepth != 16) { | ||
| 61 | - throw new Error(`Only support 16-bit samples. Given ${bitDepth}`); | ||
| 62 | - } | ||
| 63 | -}); | ||
| 64 | - | ||
| 65 | -fs.createReadStream(waveFilename, {highWaterMark: 4096}) | ||
| 66 | - .pipe(reader) | ||
| 67 | - .on('finish', function(err) { | ||
| 68 | - // tail padding | ||
| 69 | - const floatSamples = | ||
| 70 | - new Float32Array(recognizer.config.featConfig.sampleRate * 0.5); | ||
| 71 | - | ||
| 72 | - buf.push(floatSamples); | ||
| 73 | - const flattened = | ||
| 74 | - Float32Array.from(buf.reduce((a, b) => [...a, ...b], [])); | ||
| 75 | - | ||
| 76 | - stream.acceptWaveform(recognizer.config.featConfig.sampleRate, flattened); | ||
| 77 | - recognizer.decode(stream); | ||
| 78 | - const text = recognizer.getResult(stream).text; | ||
| 79 | - console.log(text); | ||
| 80 | - | ||
| 81 | - stream.free(); | ||
| 82 | - recognizer.free(); | ||
| 83 | - }); | ||
| 84 | - | ||
| 85 | -readable.on('readable', function() { | ||
| 86 | - let chunk; | ||
| 87 | - while ((chunk = readable.read()) != null) { | ||
| 88 | - const int16Samples = new Int16Array( | ||
| 89 | - chunk.buffer, chunk.byteOffset, | ||
| 90 | - chunk.length / Int16Array.BYTES_PER_ELEMENT); | ||
| 91 | - | ||
| 92 | - const floatSamples = new Float32Array(int16Samples.length); | ||
| 93 | - | ||
| 94 | - for (let i = 0; i < floatSamples.length; i++) { | ||
| 95 | - floatSamples[i] = int16Samples[i] / 32768.0; | ||
| 96 | - } | 30 | +recognizer.decode(stream); |
| 31 | +const text = recognizer.getResult(stream).text; | ||
| 32 | +console.log(text); | ||
| 97 | 33 | ||
| 98 | - buf.push(floatSamples); | ||
| 99 | - } | ||
| 100 | -}); | 34 | +stream.free(); |
| 35 | +recognizer.free(); |
| @@ -7,27 +7,15 @@ const wav = require('wav'); | @@ -7,27 +7,15 @@ const wav = require('wav'); | ||
| 7 | const sherpa_onnx = require('sherpa-onnx'); | 7 | const sherpa_onnx = require('sherpa-onnx'); |
| 8 | 8 | ||
| 9 | function createOfflineRecognizer() { | 9 | function createOfflineRecognizer() { |
| 10 | - let featConfig = { | ||
| 11 | - sampleRate: 16000, | ||
| 12 | - featureDim: 80, | ||
| 13 | - }; | ||
| 14 | - | ||
| 15 | let modelConfig = { | 10 | let modelConfig = { |
| 16 | paraformer: { | 11 | paraformer: { |
| 17 | model: './sherpa-onnx-paraformer-zh-2023-09-14/model.int8.onnx', | 12 | model: './sherpa-onnx-paraformer-zh-2023-09-14/model.int8.onnx', |
| 18 | }, | 13 | }, |
| 19 | tokens: './sherpa-onnx-paraformer-zh-2023-09-14/tokens.txt', | 14 | tokens: './sherpa-onnx-paraformer-zh-2023-09-14/tokens.txt', |
| 20 | - numThreads: 1, | ||
| 21 | - debug: 0, | ||
| 22 | - provider: 'cpu', | ||
| 23 | - modelType: 'paraformer', | ||
| 24 | }; | 15 | }; |
| 25 | 16 | ||
| 26 | - | ||
| 27 | let config = { | 17 | let config = { |
| 28 | - featConfig: featConfig, | ||
| 29 | modelConfig: modelConfig, | 18 | modelConfig: modelConfig, |
| 30 | - decodingMethod: 'greedy_search', | ||
| 31 | // https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/itn_zh_number.fst | 19 | // https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/itn_zh_number.fst |
| 32 | ruleFsts: './itn_zh_number.fst', | 20 | ruleFsts: './itn_zh_number.fst', |
| 33 | }; | 21 | }; |
| @@ -41,62 +29,12 @@ const stream = recognizer.createStream(); | @@ -41,62 +29,12 @@ const stream = recognizer.createStream(); | ||
| 41 | 29 | ||
| 42 | // https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/itn-zh-number.wav | 30 | // https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/itn-zh-number.wav |
| 43 | const waveFilename = './itn-zh-number.wav'; | 31 | const waveFilename = './itn-zh-number.wav'; |
| 32 | +const wave = sherpa_onnx.readWave(waveFilename); | ||
| 33 | +stream.acceptWaveform(wave.sampleRate, wave.samples); | ||
| 44 | 34 | ||
| 45 | -const reader = new wav.Reader(); | ||
| 46 | -const readable = new Readable().wrap(reader); | ||
| 47 | -const buf = []; | ||
| 48 | - | ||
| 49 | -reader.on('format', ({audioFormat, bitDepth, channels, sampleRate}) => { | ||
| 50 | - if (sampleRate != recognizer.config.featConfig.sampleRate) { | ||
| 51 | - throw new Error(`Only support sampleRate ${ | ||
| 52 | - recognizer.config.featConfig.sampleRate}. Given ${sampleRate}`); | ||
| 53 | - } | ||
| 54 | - | ||
| 55 | - if (audioFormat != 1) { | ||
| 56 | - throw new Error(`Only support PCM format. Given ${audioFormat}`); | ||
| 57 | - } | ||
| 58 | - | ||
| 59 | - if (channels != 1) { | ||
| 60 | - throw new Error(`Only a single channel. Given ${channel}`); | ||
| 61 | - } | ||
| 62 | - | ||
| 63 | - if (bitDepth != 16) { | ||
| 64 | - throw new Error(`Only support 16-bit samples. Given ${bitDepth}`); | ||
| 65 | - } | ||
| 66 | -}); | ||
| 67 | - | ||
| 68 | -fs.createReadStream(waveFilename, {'highWaterMark': 4096}) | ||
| 69 | - .pipe(reader) | ||
| 70 | - .on('finish', function(err) { | ||
| 71 | - // tail padding | ||
| 72 | - const floatSamples = | ||
| 73 | - new Float32Array(recognizer.config.featConfig.sampleRate * 0.5); | ||
| 74 | - | ||
| 75 | - buf.push(floatSamples); | ||
| 76 | - const flattened = | ||
| 77 | - Float32Array.from(buf.reduce((a, b) => [...a, ...b], [])); | ||
| 78 | - | ||
| 79 | - stream.acceptWaveform(recognizer.config.featConfig.sampleRate, flattened); | ||
| 80 | - recognizer.decode(stream); | ||
| 81 | - const text = recognizer.getResult(stream).text; | ||
| 82 | - console.log(text); | ||
| 83 | - | ||
| 84 | - stream.free(); | ||
| 85 | - recognizer.free(); | ||
| 86 | - }); | ||
| 87 | - | ||
| 88 | -readable.on('readable', function() { | ||
| 89 | - let chunk; | ||
| 90 | - while ((chunk = readable.read()) != null) { | ||
| 91 | - const int16Samples = new Int16Array( | ||
| 92 | - chunk.buffer, chunk.byteOffset, | ||
| 93 | - chunk.length / Int16Array.BYTES_PER_ELEMENT); | ||
| 94 | - | ||
| 95 | - const floatSamples = new Float32Array(int16Samples.length); | ||
| 96 | - for (let i = 0; i < floatSamples.length; i++) { | ||
| 97 | - floatSamples[i] = int16Samples[i] / 32768.0; | ||
| 98 | - } | 35 | +recognizer.decode(stream); |
| 36 | +const text = recognizer.getResult(stream).text; | ||
| 37 | +console.log(text); | ||
| 99 | 38 | ||
| 100 | - buf.push(floatSamples); | ||
| 101 | - } | ||
| 102 | -}); | 39 | +stream.free(); |
| 40 | +recognizer.free(); |
| 1 | // Copyright (c) 2023 Xiaomi Corporation (authors: Fangjun Kuang) | 1 | // Copyright (c) 2023 Xiaomi Corporation (authors: Fangjun Kuang) |
| 2 | 2 | ||
| 3 | -const fs = require('fs'); | ||
| 4 | -const {Readable} = require('stream'); | ||
| 5 | -const wav = require('wav'); | ||
| 6 | - | ||
| 7 | const sherpa_onnx = require('sherpa-onnx'); | 3 | const sherpa_onnx = require('sherpa-onnx'); |
| 8 | 4 | ||
| 9 | function createOfflineRecognizer() { | 5 | function createOfflineRecognizer() { |
| 10 | - let featConfig = { | ||
| 11 | - sampleRate: 16000, | ||
| 12 | - featureDim: 80, | ||
| 13 | - }; | ||
| 14 | - | ||
| 15 | let modelConfig = { | 6 | let modelConfig = { |
| 16 | paraformer: { | 7 | paraformer: { |
| 17 | model: './sherpa-onnx-paraformer-zh-2023-09-14/model.int8.onnx', | 8 | model: './sherpa-onnx-paraformer-zh-2023-09-14/model.int8.onnx', |
| 18 | }, | 9 | }, |
| 19 | tokens: './sherpa-onnx-paraformer-zh-2023-09-14/tokens.txt', | 10 | tokens: './sherpa-onnx-paraformer-zh-2023-09-14/tokens.txt', |
| 20 | - numThreads: 1, | ||
| 21 | - debug: 0, | ||
| 22 | - provider: 'cpu', | ||
| 23 | - modelType: 'paraformer', | ||
| 24 | }; | 11 | }; |
| 25 | 12 | ||
| 26 | let config = { | 13 | let config = { |
| 27 | - featConfig: featConfig, | ||
| 28 | modelConfig: modelConfig, | 14 | modelConfig: modelConfig, |
| 29 | - decodingMethod: 'greedy_search', | ||
| 30 | }; | 15 | }; |
| 31 | 16 | ||
| 32 | return sherpa_onnx.createOfflineRecognizer(config); | 17 | return sherpa_onnx.createOfflineRecognizer(config); |
| 33 | } | 18 | } |
| 34 | 19 | ||
| 35 | - | ||
| 36 | const recognizer = createOfflineRecognizer(); | 20 | const recognizer = createOfflineRecognizer(); |
| 37 | const stream = recognizer.createStream(); | 21 | const stream = recognizer.createStream(); |
| 38 | 22 | ||
| 39 | const waveFilename = './sherpa-onnx-paraformer-zh-2023-09-14/test_wavs/0.wav'; | 23 | const waveFilename = './sherpa-onnx-paraformer-zh-2023-09-14/test_wavs/0.wav'; |
| 24 | +const wave = sherpa_onnx.readWave(waveFilename); | ||
| 25 | +stream.acceptWaveform(wave.sampleRate, wave.samples); | ||
| 40 | 26 | ||
| 41 | -const reader = new wav.Reader(); | ||
| 42 | -const readable = new Readable().wrap(reader); | ||
| 43 | -const buf = []; | ||
| 44 | - | ||
| 45 | -reader.on('format', ({audioFormat, bitDepth, channels, sampleRate}) => { | ||
| 46 | - if (sampleRate != recognizer.config.featConfig.sampleRate) { | ||
| 47 | - throw new Error(`Only support sampleRate ${ | ||
| 48 | - recognizer.config.featConfig.sampleRate}. Given ${sampleRate}`); | ||
| 49 | - } | ||
| 50 | - | ||
| 51 | - if (audioFormat != 1) { | ||
| 52 | - throw new Error(`Only support PCM format. Given ${audioFormat}`); | ||
| 53 | - } | ||
| 54 | - | ||
| 55 | - if (channels != 1) { | ||
| 56 | - throw new Error(`Only a single channel. Given ${channel}`); | ||
| 57 | - } | ||
| 58 | - | ||
| 59 | - if (bitDepth != 16) { | ||
| 60 | - throw new Error(`Only support 16-bit samples. Given ${bitDepth}`); | ||
| 61 | - } | ||
| 62 | -}); | ||
| 63 | - | ||
| 64 | -fs.createReadStream(waveFilename, {'highWaterMark': 4096}) | ||
| 65 | - .pipe(reader) | ||
| 66 | - .on('finish', function(err) { | ||
| 67 | - // tail padding | ||
| 68 | - const floatSamples = | ||
| 69 | - new Float32Array(recognizer.config.featConfig.sampleRate * 0.5); | ||
| 70 | - | ||
| 71 | - buf.push(floatSamples); | ||
| 72 | - const flattened = | ||
| 73 | - Float32Array.from(buf.reduce((a, b) => [...a, ...b], [])); | ||
| 74 | - | ||
| 75 | - stream.acceptWaveform(recognizer.config.featConfig.sampleRate, flattened); | ||
| 76 | - recognizer.decode(stream); | ||
| 77 | - const text = recognizer.getResult(stream).text; | ||
| 78 | - console.log(text); | ||
| 79 | - | ||
| 80 | - stream.free(); | ||
| 81 | - recognizer.free(); | ||
| 82 | - }); | ||
| 83 | - | ||
| 84 | -readable.on('readable', function() { | ||
| 85 | - let chunk; | ||
| 86 | - while ((chunk = readable.read()) != null) { | ||
| 87 | - const int16Samples = new Int16Array( | ||
| 88 | - chunk.buffer, chunk.byteOffset, | ||
| 89 | - chunk.length / Int16Array.BYTES_PER_ELEMENT); | ||
| 90 | - | ||
| 91 | - const floatSamples = new Float32Array(int16Samples.length); | ||
| 92 | - for (let i = 0; i < floatSamples.length; i++) { | ||
| 93 | - floatSamples[i] = int16Samples[i] / 32768.0; | ||
| 94 | - } | 27 | +recognizer.decode(stream); |
| 28 | +const text = recognizer.getResult(stream).text; | ||
| 29 | +console.log(text); | ||
| 95 | 30 | ||
| 96 | - buf.push(floatSamples); | ||
| 97 | - } | ||
| 98 | -}); | 31 | +stream.free(); |
| 32 | +recognizer.free(); |
| 1 | // Copyright (c) 2024 Xiaomi Corporation (authors: Fangjun Kuang) | 1 | // Copyright (c) 2024 Xiaomi Corporation (authors: Fangjun Kuang) |
| 2 | 2 | ||
| 3 | -const fs = require('fs'); | ||
| 4 | -const {Readable} = require('stream'); | ||
| 5 | -const wav = require('wav'); | ||
| 6 | - | ||
| 7 | const sherpa_onnx = require('sherpa-onnx'); | 3 | const sherpa_onnx = require('sherpa-onnx'); |
| 8 | 4 | ||
| 9 | function createOfflineRecognizer() { | 5 | function createOfflineRecognizer() { |
| 10 | - let featConfig = { | ||
| 11 | - sampleRate: 16000, | ||
| 12 | - featureDim: 80, | ||
| 13 | - }; | ||
| 14 | - | ||
| 15 | let modelConfig = { | 6 | let modelConfig = { |
| 16 | senseVoice: { | 7 | senseVoice: { |
| 17 | model: | 8 | model: |
| @@ -20,82 +11,26 @@ function createOfflineRecognizer() { | @@ -20,82 +11,26 @@ function createOfflineRecognizer() { | ||
| 20 | useInverseTextNormalization: 1, | 11 | useInverseTextNormalization: 1, |
| 21 | }, | 12 | }, |
| 22 | tokens: './sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/tokens.txt', | 13 | tokens: './sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/tokens.txt', |
| 23 | - numThreads: 1, | ||
| 24 | - debug: 0, | ||
| 25 | - provider: 'cpu', | ||
| 26 | }; | 14 | }; |
| 27 | 15 | ||
| 28 | let config = { | 16 | let config = { |
| 29 | - featConfig: featConfig, | ||
| 30 | modelConfig: modelConfig, | 17 | modelConfig: modelConfig, |
| 31 | - decodingMethod: 'greedy_search', | ||
| 32 | }; | 18 | }; |
| 33 | 19 | ||
| 34 | return sherpa_onnx.createOfflineRecognizer(config); | 20 | return sherpa_onnx.createOfflineRecognizer(config); |
| 35 | } | 21 | } |
| 36 | 22 | ||
| 37 | - | ||
| 38 | const recognizer = createOfflineRecognizer(); | 23 | const recognizer = createOfflineRecognizer(); |
| 39 | const stream = recognizer.createStream(); | 24 | const stream = recognizer.createStream(); |
| 40 | 25 | ||
| 41 | const waveFilename = | 26 | const waveFilename = |
| 42 | './sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/test_wavs/zh.wav'; | 27 | './sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/test_wavs/zh.wav'; |
| 28 | +const wave = sherpa_onnx.readWave(waveFilename); | ||
| 29 | +stream.acceptWaveform(wave.sampleRate, wave.samples); | ||
| 43 | 30 | ||
| 44 | -const reader = new wav.Reader(); | ||
| 45 | -const readable = new Readable().wrap(reader); | ||
| 46 | -const buf = []; | ||
| 47 | - | ||
| 48 | -reader.on('format', ({audioFormat, bitDepth, channels, sampleRate}) => { | ||
| 49 | - if (sampleRate != recognizer.config.featConfig.sampleRate) { | ||
| 50 | - throw new Error(`Only support sampleRate ${ | ||
| 51 | - recognizer.config.featConfig.sampleRate}. Given ${sampleRate}`); | ||
| 52 | - } | ||
| 53 | - | ||
| 54 | - if (audioFormat != 1) { | ||
| 55 | - throw new Error(`Only support PCM format. Given ${audioFormat}`); | ||
| 56 | - } | ||
| 57 | - | ||
| 58 | - if (channels != 1) { | ||
| 59 | - throw new Error(`Only a single channel. Given ${channel}`); | ||
| 60 | - } | ||
| 61 | - | ||
| 62 | - if (bitDepth != 16) { | ||
| 63 | - throw new Error(`Only support 16-bit samples. Given ${bitDepth}`); | ||
| 64 | - } | ||
| 65 | -}); | ||
| 66 | - | ||
| 67 | -fs.createReadStream(waveFilename, {'highWaterMark': 4096}) | ||
| 68 | - .pipe(reader) | ||
| 69 | - .on('finish', function(err) { | ||
| 70 | - // tail padding | ||
| 71 | - const floatSamples = | ||
| 72 | - new Float32Array(recognizer.config.featConfig.sampleRate * 0.5); | ||
| 73 | - | ||
| 74 | - buf.push(floatSamples); | ||
| 75 | - const flattened = | ||
| 76 | - Float32Array.from(buf.reduce((a, b) => [...a, ...b], [])); | ||
| 77 | - | ||
| 78 | - stream.acceptWaveform(recognizer.config.featConfig.sampleRate, flattened); | ||
| 79 | - recognizer.decode(stream); | ||
| 80 | - const text = recognizer.getResult(stream).text; | ||
| 81 | - console.log(text); | ||
| 82 | - | ||
| 83 | - stream.free(); | ||
| 84 | - recognizer.free(); | ||
| 85 | - }); | ||
| 86 | - | ||
| 87 | -readable.on('readable', function() { | ||
| 88 | - let chunk; | ||
| 89 | - while ((chunk = readable.read()) != null) { | ||
| 90 | - const int16Samples = new Int16Array( | ||
| 91 | - chunk.buffer, chunk.byteOffset, | ||
| 92 | - chunk.length / Int16Array.BYTES_PER_ELEMENT); | ||
| 93 | - | ||
| 94 | - const floatSamples = new Float32Array(int16Samples.length); | ||
| 95 | - for (let i = 0; i < floatSamples.length; i++) { | ||
| 96 | - floatSamples[i] = int16Samples[i] / 32768.0; | ||
| 97 | - } | 31 | +recognizer.decode(stream); |
| 32 | +const text = recognizer.getResult(stream).text; | ||
| 33 | +console.log(text); | ||
| 98 | 34 | ||
| 99 | - buf.push(floatSamples); | ||
| 100 | - } | ||
| 101 | -}); | 35 | +stream.free(); |
| 36 | +recognizer.free(); |
| 1 | // Copyright (c) 2023 Xiaomi Corporation (authors: Fangjun Kuang) | 1 | // Copyright (c) 2023 Xiaomi Corporation (authors: Fangjun Kuang) |
| 2 | // | 2 | // |
| 3 | -const fs = require('fs'); | ||
| 4 | -const {Readable} = require('stream'); | ||
| 5 | -const wav = require('wav'); | ||
| 6 | - | ||
| 7 | const sherpa_onnx = require('sherpa-onnx'); | 3 | const sherpa_onnx = require('sherpa-onnx'); |
| 8 | 4 | ||
| 9 | function createOfflineRecognizer() { | 5 | function createOfflineRecognizer() { |
| 10 | - let featConfig = { | ||
| 11 | - sampleRate: 16000, | ||
| 12 | - featureDim: 80, | ||
| 13 | - }; | ||
| 14 | - | ||
| 15 | let modelConfig = { | 6 | let modelConfig = { |
| 16 | transducer: { | 7 | transducer: { |
| 17 | encoder: | 8 | encoder: |
| @@ -22,19 +13,11 @@ function createOfflineRecognizer() { | @@ -22,19 +13,11 @@ function createOfflineRecognizer() { | ||
| 22 | './sherpa-onnx-zipformer-en-2023-06-26/joiner-epoch-99-avg-1.int8.onnx', | 13 | './sherpa-onnx-zipformer-en-2023-06-26/joiner-epoch-99-avg-1.int8.onnx', |
| 23 | }, | 14 | }, |
| 24 | tokens: './sherpa-onnx-zipformer-en-2023-06-26/tokens.txt', | 15 | tokens: './sherpa-onnx-zipformer-en-2023-06-26/tokens.txt', |
| 25 | - numThreads: 1, | ||
| 26 | - debug: 0, | ||
| 27 | - provider: 'cpu', | ||
| 28 | modelType: 'transducer', | 16 | modelType: 'transducer', |
| 29 | }; | 17 | }; |
| 30 | 18 | ||
| 31 | let config = { | 19 | let config = { |
| 32 | - featConfig: featConfig, | ||
| 33 | modelConfig: modelConfig, | 20 | modelConfig: modelConfig, |
| 34 | - decodingMethod: 'greedy_search', | ||
| 35 | - maxActivePaths: 4, | ||
| 36 | - hotwordsFile: '', | ||
| 37 | - hotwordsScore: 1.5, | ||
| 38 | }; | 21 | }; |
| 39 | 22 | ||
| 40 | return sherpa_onnx.createOfflineRecognizer(config); | 23 | return sherpa_onnx.createOfflineRecognizer(config); |
| @@ -43,62 +26,12 @@ const recognizer = createOfflineRecognizer(); | @@ -43,62 +26,12 @@ const recognizer = createOfflineRecognizer(); | ||
| 43 | const stream = recognizer.createStream(); | 26 | const stream = recognizer.createStream(); |
| 44 | 27 | ||
| 45 | const waveFilename = './sherpa-onnx-zipformer-en-2023-06-26/test_wavs/0.wav'; | 28 | const waveFilename = './sherpa-onnx-zipformer-en-2023-06-26/test_wavs/0.wav'; |
| 29 | +const wave = sherpa_onnx.readWave(waveFilename); | ||
| 30 | +stream.acceptWaveform(wave.sampleRate, wave.samples); | ||
| 46 | 31 | ||
| 47 | -const reader = new wav.Reader(); | ||
| 48 | -const readable = new Readable().wrap(reader); | ||
| 49 | -const buf = []; | ||
| 50 | - | ||
| 51 | -reader.on('format', ({audioFormat, bitDepth, channels, sampleRate}) => { | ||
| 52 | - if (sampleRate != recognizer.config.featConfig.sampleRate) { | ||
| 53 | - throw new Error(`Only support sampleRate ${ | ||
| 54 | - recognizer.config.featConfig.sampleRate}. Given ${sampleRate}`); | ||
| 55 | - } | ||
| 56 | - | ||
| 57 | - if (audioFormat != 1) { | ||
| 58 | - throw new Error(`Only support PCM format. Given ${audioFormat}`); | ||
| 59 | - } | ||
| 60 | - | ||
| 61 | - if (channels != 1) { | ||
| 62 | - throw new Error(`Only a single channel. Given ${channel}`); | ||
| 63 | - } | ||
| 64 | - | ||
| 65 | - if (bitDepth != 16) { | ||
| 66 | - throw new Error(`Only support 16-bit samples. Given ${bitDepth}`); | ||
| 67 | - } | ||
| 68 | -}); | ||
| 69 | - | ||
| 70 | -fs.createReadStream(waveFilename, {'highWaterMark': 4096}) | ||
| 71 | - .pipe(reader) | ||
| 72 | - .on('finish', function(err) { | ||
| 73 | - // tail padding | ||
| 74 | - const floatSamples = | ||
| 75 | - new Float32Array(recognizer.config.featConfig.sampleRate * 0.5); | ||
| 76 | - | ||
| 77 | - buf.push(floatSamples); | ||
| 78 | - const flattened = | ||
| 79 | - Float32Array.from(buf.reduce((a, b) => [...a, ...b], [])); | ||
| 80 | - | ||
| 81 | - stream.acceptWaveform(recognizer.config.featConfig.sampleRate, flattened); | ||
| 82 | - recognizer.decode(stream); | ||
| 83 | - const text = recognizer.getResult(stream).text; | ||
| 84 | - console.log(text); | ||
| 85 | - | ||
| 86 | - stream.free(); | ||
| 87 | - recognizer.free(); | ||
| 88 | - }); | ||
| 89 | - | ||
| 90 | -readable.on('readable', function() { | ||
| 91 | - let chunk; | ||
| 92 | - while ((chunk = readable.read()) != null) { | ||
| 93 | - const int16Samples = new Int16Array( | ||
| 94 | - chunk.buffer, chunk.byteOffset, | ||
| 95 | - chunk.length / Int16Array.BYTES_PER_ELEMENT); | ||
| 96 | - | ||
| 97 | - const floatSamples = new Float32Array(int16Samples.length); | ||
| 98 | - for (let i = 0; i < floatSamples.length; i++) { | ||
| 99 | - floatSamples[i] = int16Samples[i] / 32768.0; | ||
| 100 | - } | 32 | +recognizer.decode(stream); |
| 33 | +const text = recognizer.getResult(stream).text; | ||
| 34 | +console.log(text); | ||
| 101 | 35 | ||
| 102 | - buf.push(floatSamples); | ||
| 103 | - } | ||
| 104 | -}); | 36 | +stream.free(); |
| 37 | +recognizer.free(); |
| 1 | // Copyright (c) 2023 Xiaomi Corporation (authors: Fangjun Kuang) | 1 | // Copyright (c) 2023 Xiaomi Corporation (authors: Fangjun Kuang) |
| 2 | // | 2 | // |
| 3 | -const fs = require('fs'); | ||
| 4 | -const {Readable} = require('stream'); | ||
| 5 | -const wav = require('wav'); | ||
| 6 | - | ||
| 7 | const sherpa_onnx = require('sherpa-onnx'); | 3 | const sherpa_onnx = require('sherpa-onnx'); |
| 8 | 4 | ||
| 9 | function createOfflineRecognizer() { | 5 | function createOfflineRecognizer() { |
| 10 | - let featConfig = { | ||
| 11 | - sampleRate: 16000, | ||
| 12 | - featureDim: 80, | ||
| 13 | - }; | ||
| 14 | - | ||
| 15 | let modelConfig = { | 6 | let modelConfig = { |
| 16 | whisper: { | 7 | whisper: { |
| 17 | encoder: './sherpa-onnx-whisper-tiny.en/tiny.en-encoder.int8.onnx', | 8 | encoder: './sherpa-onnx-whisper-tiny.en/tiny.en-encoder.int8.onnx', |
| @@ -21,83 +12,25 @@ function createOfflineRecognizer() { | @@ -21,83 +12,25 @@ function createOfflineRecognizer() { | ||
| 21 | tailPaddings: -1, | 12 | tailPaddings: -1, |
| 22 | }, | 13 | }, |
| 23 | tokens: './sherpa-onnx-whisper-tiny.en/tiny.en-tokens.txt', | 14 | tokens: './sherpa-onnx-whisper-tiny.en/tiny.en-tokens.txt', |
| 24 | - numThreads: 1, | ||
| 25 | - debug: 0, | ||
| 26 | - provider: 'cpu', | ||
| 27 | - modelType: 'whisper', | ||
| 28 | }; | 15 | }; |
| 29 | 16 | ||
| 30 | let config = { | 17 | let config = { |
| 31 | - featConfig: featConfig, | ||
| 32 | modelConfig: modelConfig, | 18 | modelConfig: modelConfig, |
| 33 | - decodingMethod: 'greedy_search', | ||
| 34 | }; | 19 | }; |
| 35 | 20 | ||
| 36 | return sherpa_onnx.createOfflineRecognizer(config); | 21 | return sherpa_onnx.createOfflineRecognizer(config); |
| 37 | } | 22 | } |
| 38 | 23 | ||
| 39 | - | ||
| 40 | recognizer = createOfflineRecognizer(); | 24 | recognizer = createOfflineRecognizer(); |
| 41 | stream = recognizer.createStream(); | 25 | stream = recognizer.createStream(); |
| 42 | 26 | ||
| 43 | const waveFilename = './sherpa-onnx-whisper-tiny.en/test_wavs/0.wav'; | 27 | const waveFilename = './sherpa-onnx-whisper-tiny.en/test_wavs/0.wav'; |
| 28 | +const wave = sherpa_onnx.readWave(waveFilename); | ||
| 29 | +stream.acceptWaveform(wave.sampleRate, wave.samples); | ||
| 44 | 30 | ||
| 45 | -const reader = new wav.Reader(); | ||
| 46 | -const readable = new Readable().wrap(reader); | ||
| 47 | -const buf = []; | ||
| 48 | - | ||
| 49 | -reader.on('format', ({audioFormat, bitDepth, channels, sampleRate}) => { | ||
| 50 | - if (sampleRate != recognizer.config.featConfig.sampleRate) { | ||
| 51 | - throw new Error(`Only support sampleRate ${ | ||
| 52 | - recognizer.config.featConfig.sampleRate}. Given ${sampleRate}`); | ||
| 53 | - } | ||
| 54 | - | ||
| 55 | - if (audioFormat != 1) { | ||
| 56 | - throw new Error(`Only support PCM format. Given ${audioFormat}`); | ||
| 57 | - } | ||
| 58 | - | ||
| 59 | - if (channels != 1) { | ||
| 60 | - throw new Error(`Only a single channel. Given ${channel}`); | ||
| 61 | - } | ||
| 62 | - | ||
| 63 | - if (bitDepth != 16) { | ||
| 64 | - throw new Error(`Only support 16-bit samples. Given ${bitDepth}`); | ||
| 65 | - } | ||
| 66 | -}); | ||
| 67 | - | ||
| 68 | -fs.createReadStream(waveFilename, {'highWaterMark': 4096}) | ||
| 69 | - .pipe(reader) | ||
| 70 | - .on('finish', function(err) { | ||
| 71 | - // tail padding | ||
| 72 | - const floatSamples = | ||
| 73 | - new Float32Array(recognizer.config.featConfig.sampleRate * 0.5); | ||
| 74 | - | ||
| 75 | - buf.push(floatSamples); | ||
| 76 | - const flattened = | ||
| 77 | - Float32Array.from(buf.reduce((a, b) => [...a, ...b], [])); | ||
| 78 | - | ||
| 79 | - stream.acceptWaveform(recognizer.config.featConfig.sampleRate, flattened); | ||
| 80 | - recognizer.decode(stream); | ||
| 81 | - const text = recognizer.getResult(stream).text; | ||
| 82 | - console.log(text); | ||
| 83 | - | ||
| 84 | - stream.free(); | ||
| 85 | - recognizer.free(); | ||
| 86 | - }); | ||
| 87 | - | ||
| 88 | -readable.on('readable', function() { | ||
| 89 | - let chunk; | ||
| 90 | - while ((chunk = readable.read()) != null) { | ||
| 91 | - const int16Samples = new Int16Array( | ||
| 92 | - chunk.buffer, chunk.byteOffset, | ||
| 93 | - chunk.length / Int16Array.BYTES_PER_ELEMENT); | ||
| 94 | - | ||
| 95 | - const floatSamples = new Float32Array(int16Samples.length); | ||
| 96 | - | ||
| 97 | - for (let i = 0; i < floatSamples.length; i++) { | ||
| 98 | - floatSamples[i] = int16Samples[i] / 32768.0; | ||
| 99 | - } | 31 | +recognizer.decode(stream); |
| 32 | +const text = recognizer.getResult(stream).text; | ||
| 33 | +console.log(text); | ||
| 100 | 34 | ||
| 101 | - buf.push(floatSamples); | ||
| 102 | - } | ||
| 103 | -}); | 35 | +stream.free(); |
| 36 | +recognizer.free(); |
| @@ -16,22 +16,10 @@ function createOnlineRecognizer() { | @@ -16,22 +16,10 @@ function createOnlineRecognizer() { | ||
| 16 | let onlineModelConfig = { | 16 | let onlineModelConfig = { |
| 17 | paraformer: onlineParaformerModelConfig, | 17 | paraformer: onlineParaformerModelConfig, |
| 18 | tokens: './sherpa-onnx-streaming-paraformer-bilingual-zh-en/tokens.txt', | 18 | tokens: './sherpa-onnx-streaming-paraformer-bilingual-zh-en/tokens.txt', |
| 19 | - numThreads: 1, | ||
| 20 | - provider: 'cpu', | ||
| 21 | - debug: 1, | ||
| 22 | - modelType: 'paraformer', | ||
| 23 | - }; | ||
| 24 | - | ||
| 25 | - let featureConfig = { | ||
| 26 | - sampleRate: 16000, | ||
| 27 | - featureDim: 80, | ||
| 28 | }; | 19 | }; |
| 29 | 20 | ||
| 30 | let recognizerConfig = { | 21 | let recognizerConfig = { |
| 31 | - featConfig: featureConfig, | ||
| 32 | modelConfig: onlineModelConfig, | 22 | modelConfig: onlineModelConfig, |
| 33 | - decodingMethod: 'greedy_search', | ||
| 34 | - maxActivePaths: 4, | ||
| 35 | enableEndpoint: 1, | 23 | enableEndpoint: 1, |
| 36 | rule1MinTrailingSilence: 2.4, | 24 | rule1MinTrailingSilence: 2.4, |
| 37 | rule2MinTrailingSilence: 1.2, | 25 | rule2MinTrailingSilence: 1.2, |
| @@ -17,26 +17,10 @@ function createOnlineRecognizer() { | @@ -17,26 +17,10 @@ function createOnlineRecognizer() { | ||
| 17 | let onlineModelConfig = { | 17 | let onlineModelConfig = { |
| 18 | paraformer: onlineParaformerModelConfig, | 18 | paraformer: onlineParaformerModelConfig, |
| 19 | tokens: './sherpa-onnx-streaming-paraformer-bilingual-zh-en/tokens.txt', | 19 | tokens: './sherpa-onnx-streaming-paraformer-bilingual-zh-en/tokens.txt', |
| 20 | - numThreads: 1, | ||
| 21 | - provider: 'cpu', | ||
| 22 | - debug: 1, | ||
| 23 | - modelType: 'paraformer', | ||
| 24 | - }; | ||
| 25 | - | ||
| 26 | - let featureConfig = { | ||
| 27 | - sampleRate: 16000, | ||
| 28 | - featureDim: 80, | ||
| 29 | }; | 20 | }; |
| 30 | 21 | ||
| 31 | let recognizerConfig = { | 22 | let recognizerConfig = { |
| 32 | - featConfig: featureConfig, | ||
| 33 | modelConfig: onlineModelConfig, | 23 | modelConfig: onlineModelConfig, |
| 34 | - decodingMethod: 'greedy_search', | ||
| 35 | - maxActivePaths: 4, | ||
| 36 | - enableEndpoint: 1, | ||
| 37 | - rule1MinTrailingSilence: 2.4, | ||
| 38 | - rule2MinTrailingSilence: 1.2, | ||
| 39 | - rule3MinUtteranceLength: 20, | ||
| 40 | }; | 24 | }; |
| 41 | 25 | ||
| 42 | return sherpa_onnx.createOnlineRecognizer(recognizerConfig); | 26 | return sherpa_onnx.createOnlineRecognizer(recognizerConfig); |
| @@ -20,26 +20,10 @@ function createOnlineRecognizer() { | @@ -20,26 +20,10 @@ function createOnlineRecognizer() { | ||
| 20 | transducer: onlineTransducerModelConfig, | 20 | transducer: onlineTransducerModelConfig, |
| 21 | tokens: | 21 | tokens: |
| 22 | './sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/tokens.txt', | 22 | './sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/tokens.txt', |
| 23 | - numThreads: 1, | ||
| 24 | - provider: 'cpu', | ||
| 25 | - debug: 1, | ||
| 26 | - modelType: 'zipformer', | ||
| 27 | - }; | ||
| 28 | - | ||
| 29 | - let featureConfig = { | ||
| 30 | - sampleRate: 16000, | ||
| 31 | - featureDim: 80, | ||
| 32 | }; | 23 | }; |
| 33 | 24 | ||
| 34 | let recognizerConfig = { | 25 | let recognizerConfig = { |
| 35 | - featConfig: featureConfig, | ||
| 36 | modelConfig: onlineModelConfig, | 26 | modelConfig: onlineModelConfig, |
| 37 | - decodingMethod: 'greedy_search', | ||
| 38 | - maxActivePaths: 4, | ||
| 39 | - enableEndpoint: 1, | ||
| 40 | - rule1MinTrailingSilence: 2.4, | ||
| 41 | - rule2MinTrailingSilence: 1.2, | ||
| 42 | - rule3MinUtteranceLength: 20, | ||
| 43 | }; | 27 | }; |
| 44 | 28 | ||
| 45 | return sherpa_onnx.createOnlineRecognizer(recognizerConfig); | 29 | return sherpa_onnx.createOnlineRecognizer(recognizerConfig); |
| 1 | +// Copyright (c) 2023-2024 Xiaomi Corporation (authors: Fangjun Kuang) | ||
| 2 | + | ||
| 3 | +const sherpa_onnx = require('sherpa-onnx'); | ||
| 4 | + | ||
| 5 | +function createRecognizer() { | ||
| 6 | + // Please download test files from | ||
| 7 | + // https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models | ||
| 8 | + const config = { | ||
| 9 | + 'modelConfig': { | ||
| 10 | + 'whisper': { | ||
| 11 | + 'encoder': './sherpa-onnx-whisper-tiny.en/tiny.en-encoder.int8.onnx', | ||
| 12 | + 'decoder': './sherpa-onnx-whisper-tiny.en/tiny.en-decoder.int8.onnx', | ||
| 13 | + 'tailPaddings': 2000, | ||
| 14 | + }, | ||
| 15 | + 'tokens': './sherpa-onnx-whisper-tiny.en/tiny.en-tokens.txt', | ||
| 16 | + 'debug': 0, | ||
| 17 | + } | ||
| 18 | + }; | ||
| 19 | + | ||
| 20 | + return sherpa_onnx.createOfflineRecognizer(config); | ||
| 21 | +} | ||
| 22 | + | ||
| 23 | +function createVad() { | ||
| 24 | + // please download silero_vad.onnx from | ||
| 25 | + // https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx | ||
| 26 | + const config = { | ||
| 27 | + sileroVad: { | ||
| 28 | + model: './silero_vad.onnx', | ||
| 29 | + threshold: 0.5, | ||
| 30 | + minSpeechDuration: 0.25, | ||
| 31 | + minSilenceDuration: 0.5, | ||
| 32 | + windowSize: 512, | ||
| 33 | + }, | ||
| 34 | + sampleRate: 16000, | ||
| 35 | + debug: true, | ||
| 36 | + numThreads: 1, | ||
| 37 | + bufferSizeInSeconds: 60, | ||
| 38 | + }; | ||
| 39 | + | ||
| 40 | + return sherpa_onnx.createVad(config); | ||
| 41 | +} | ||
| 42 | + | ||
| 43 | +const recognizer = createRecognizer(); | ||
| 44 | +const vad = createVad(); | ||
| 45 | + | ||
| 46 | +// please download ./Obama.wav from | ||
| 47 | +// https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models | ||
| 48 | +const waveFilename = './Obama.wav'; | ||
| 49 | +const wave = sherpa_onnx.readWave(waveFilename); | ||
| 50 | + | ||
| 51 | +if (wave.sampleRate != recognizer.config.featConfig.sampleRate) { | ||
| 52 | + throw new Error( | ||
| 53 | + 'Expected sample rate: ${recognizer.config.featConfig.sampleRate}. Given: ${wave.sampleRate}'); | ||
| 54 | +} | ||
| 55 | + | ||
| 56 | +console.log('Started') | ||
| 57 | +let start = Date.now(); | ||
| 58 | + | ||
| 59 | +const windowSize = vad.config.sileroVad.windowSize; | ||
| 60 | +for (let i = 0; i < wave.samples.length; i += windowSize) { | ||
| 61 | + const thisWindow = wave.samples.subarray(i, i + windowSize); | ||
| 62 | + vad.acceptWaveform(thisWindow); | ||
| 63 | + | ||
| 64 | + while (!vad.isEmpty()) { | ||
| 65 | + const segment = vad.front(); | ||
| 66 | + vad.pop(); | ||
| 67 | + | ||
| 68 | + let start_time = segment.start / wave.sampleRate; | ||
| 69 | + let end_time = start_time + segment.samples.length / wave.sampleRate; | ||
| 70 | + | ||
| 71 | + start_time = start_time.toFixed(2); | ||
| 72 | + end_time = end_time.toFixed(2); | ||
| 73 | + | ||
| 74 | + const stream = recognizer.createStream(); | ||
| 75 | + stream.acceptWaveform(wave.sampleRate, segment.samples); | ||
| 76 | + | ||
| 77 | + recognizer.decode(stream); | ||
| 78 | + const r = recognizer.getResult(stream); | ||
| 79 | + if (r.text.length > 0) { | ||
| 80 | + const text = r.text.toLowerCase().trim(); | ||
| 81 | + console.log(`${start_time} -- ${end_time}: ${text}`); | ||
| 82 | + } | ||
| 83 | + | ||
| 84 | + stream.free(); | ||
| 85 | + } | ||
| 86 | +} | ||
| 87 | + | ||
| 88 | +vad.flush(); | ||
| 89 | + | ||
| 90 | +while (!vad.isEmpty()) { | ||
| 91 | + const segment = vad.front(); | ||
| 92 | + vad.pop(); | ||
| 93 | + | ||
| 94 | + let start_time = segment.start / wave.sampleRate; | ||
| 95 | + let end_time = start_time + segment.samples.length / wave.sampleRate; | ||
| 96 | + | ||
| 97 | + start_time = start_time.toFixed(2); | ||
| 98 | + end_time = end_time.toFixed(2); | ||
| 99 | + | ||
| 100 | + const stream = recognizer.createStream(); | ||
| 101 | + stream.acceptWaveform(wave.sampleRate, segment.samples); | ||
| 102 | + | ||
| 103 | + recognizer.decode(stream); | ||
| 104 | + const r = recognizer.getResult(stream); | ||
| 105 | + if (r.text.length > 0) { | ||
| 106 | + const text = r.text.toLowerCase().trim(); | ||
| 107 | + console.log(`${start_time} -- ${end_time}: ${text}`); | ||
| 108 | + } | ||
| 109 | +} | ||
| 110 | + | ||
| 111 | +let stop = Date.now(); | ||
| 112 | +console.log('Done') | ||
| 113 | + | ||
| 114 | +const elapsed_seconds = (stop - start) / 1000; | ||
| 115 | +const duration = wave.samples.length / wave.sampleRate; | ||
| 116 | +const real_time_factor = elapsed_seconds / duration; | ||
| 117 | +console.log('Wave duration', duration.toFixed(3), 'seconds') | ||
| 118 | +console.log('Elapsed', elapsed_seconds.toFixed(3), 'seconds') | ||
| 119 | +console.log( | ||
| 120 | + `RTF = ${elapsed_seconds.toFixed(3)}/${duration.toFixed(3)} =`, | ||
| 121 | + real_time_factor.toFixed(3)) | ||
| 122 | + | ||
| 123 | +vad.free(); | ||
| 124 | +recognizer.free(); |
| @@ -4,6 +4,9 @@ | @@ -4,6 +4,9 @@ | ||
| 4 | const wasmModule = require('./sherpa-onnx-wasm-nodejs.js')(); | 4 | const wasmModule = require('./sherpa-onnx-wasm-nodejs.js')(); |
| 5 | const sherpa_onnx_asr = require('./sherpa-onnx-asr.js'); | 5 | const sherpa_onnx_asr = require('./sherpa-onnx-asr.js'); |
| 6 | const sherpa_onnx_tts = require('./sherpa-onnx-tts.js'); | 6 | const sherpa_onnx_tts = require('./sherpa-onnx-tts.js'); |
| 7 | +const sherpa_onnx_kws = require('./sherpa-onnx-kws.js'); | ||
| 8 | +const sherpa_onnx_wave = require('./sherpa-onnx-wave.js'); | ||
| 9 | +const sherpa_onnx_vad = require('./sherpa-onnx-vad.js'); | ||
| 7 | 10 | ||
| 8 | function createOnlineRecognizer(config) { | 11 | function createOnlineRecognizer(config) { |
| 9 | return sherpa_onnx_asr.createOnlineRecognizer(wasmModule, config); | 12 | return sherpa_onnx_asr.createOnlineRecognizer(wasmModule, config); |
| @@ -17,10 +20,35 @@ function createOfflineTts(config) { | @@ -17,10 +20,35 @@ function createOfflineTts(config) { | ||
| 17 | return sherpa_onnx_tts.createOfflineTts(wasmModule, config); | 20 | return sherpa_onnx_tts.createOfflineTts(wasmModule, config); |
| 18 | } | 21 | } |
| 19 | 22 | ||
| 23 | +function createKws(config) { | ||
| 24 | + return sherpa_onnx_kws.createKws(wasmModule, config); | ||
| 25 | +} | ||
| 26 | + | ||
| 27 | +function createCircularBuffer(capacity) { | ||
| 28 | + return new sherpa_onnx_vad.CircularBuffer(capacity, wasmModule); | ||
| 29 | +} | ||
| 30 | + | ||
| 31 | +function createVad(config) { | ||
| 32 | + return sherpa_onnx_vad.createVad(wasmModule, config); | ||
| 33 | +} | ||
| 34 | + | ||
| 35 | +function readWave(filename) { | ||
| 36 | + return sherpa_onnx_wave.readWave(filename, wasmModule); | ||
| 37 | +} | ||
| 38 | + | ||
| 39 | +function writeWave(filename, data) { | ||
| 40 | + sherpa_onnx_wave.writeWave(filename, data, wasmModule); | ||
| 41 | +} | ||
| 42 | + | ||
| 20 | // Note: online means streaming and offline means non-streaming here. | 43 | // Note: online means streaming and offline means non-streaming here. |
| 21 | // Both of them don't require internet connection. | 44 | // Both of them don't require internet connection. |
| 22 | module.exports = { | 45 | module.exports = { |
| 23 | createOnlineRecognizer, | 46 | createOnlineRecognizer, |
| 24 | createOfflineRecognizer, | 47 | createOfflineRecognizer, |
| 25 | createOfflineTts, | 48 | createOfflineTts, |
| 49 | + createKws, | ||
| 50 | + readWave, | ||
| 51 | + writeWave, | ||
| 52 | + createCircularBuffer, | ||
| 53 | + createVad, | ||
| 26 | }; | 54 | }; |
| @@ -546,7 +546,7 @@ function initSherpaOnnxOfflineWhisperModelConfig(config, Module) { | @@ -546,7 +546,7 @@ function initSherpaOnnxOfflineWhisperModelConfig(config, Module) { | ||
| 546 | Module.setValue(ptr + 12, buffer + offset, 'i8*'); | 546 | Module.setValue(ptr + 12, buffer + offset, 'i8*'); |
| 547 | offset += taskLen; | 547 | offset += taskLen; |
| 548 | 548 | ||
| 549 | - Module.setValue(ptr + 16, config.tailPaddings || -1, 'i32'); | 549 | + Module.setValue(ptr + 16, config.tailPaddings || 2000, 'i32'); |
| 550 | 550 | ||
| 551 | return { | 551 | return { |
| 552 | buffer: buffer, ptr: ptr, len: len, | 552 | buffer: buffer, ptr: ptr, len: len, |
| @@ -69,13 +69,14 @@ function initModelConfig(config, Module) { | @@ -69,13 +69,14 @@ function initModelConfig(config, Module) { | ||
| 69 | 69 | ||
| 70 | const len = transducer.len + paraformer_len + ctc_len + 7 * 4; | 70 | const len = transducer.len + paraformer_len + ctc_len + 7 * 4; |
| 71 | const ptr = Module._malloc(len); | 71 | const ptr = Module._malloc(len); |
| 72 | + Module.HEAPU8.fill(0, ptr, ptr + len); | ||
| 72 | 73 | ||
| 73 | let offset = 0; | 74 | let offset = 0; |
| 74 | Module._CopyHeap(transducer.ptr, transducer.len, ptr + offset); | 75 | Module._CopyHeap(transducer.ptr, transducer.len, ptr + offset); |
| 75 | 76 | ||
| 76 | const tokensLen = Module.lengthBytesUTF8(config.tokens) + 1; | 77 | const tokensLen = Module.lengthBytesUTF8(config.tokens) + 1; |
| 77 | - const providerLen = Module.lengthBytesUTF8(config.provider) + 1; | ||
| 78 | - const modelTypeLen = Module.lengthBytesUTF8(config.modelType) + 1; | 78 | + const providerLen = Module.lengthBytesUTF8(config.provider || 'cpu') + 1; |
| 79 | + const modelTypeLen = Module.lengthBytesUTF8(config.modelType || '') + 1; | ||
| 79 | const modelingUnitLen = Module.lengthBytesUTF8(config.modelingUnit || '') + 1; | 80 | const modelingUnitLen = Module.lengthBytesUTF8(config.modelingUnit || '') + 1; |
| 80 | const bpeVocabLen = Module.lengthBytesUTF8(config.bpeVocab || '') + 1; | 81 | const bpeVocabLen = Module.lengthBytesUTF8(config.bpeVocab || '') + 1; |
| 81 | const bufferLen = | 82 | const bufferLen = |
| @@ -86,10 +87,10 @@ function initModelConfig(config, Module) { | @@ -86,10 +87,10 @@ function initModelConfig(config, Module) { | ||
| 86 | Module.stringToUTF8(config.tokens, buffer, tokensLen); | 87 | Module.stringToUTF8(config.tokens, buffer, tokensLen); |
| 87 | offset += tokensLen; | 88 | offset += tokensLen; |
| 88 | 89 | ||
| 89 | - Module.stringToUTF8(config.provider, buffer + offset, providerLen); | 90 | + Module.stringToUTF8(config.provider || 'cpu', buffer + offset, providerLen); |
| 90 | offset += providerLen; | 91 | offset += providerLen; |
| 91 | 92 | ||
| 92 | - Module.stringToUTF8(config.modelType, buffer + offset, modelTypeLen); | 93 | + Module.stringToUTF8(config.modelType || '', buffer + offset, modelTypeLen); |
| 93 | offset += modelTypeLen; | 94 | offset += modelTypeLen; |
| 94 | 95 | ||
| 95 | Module.stringToUTF8( | 96 | Module.stringToUTF8( |
| @@ -103,7 +104,7 @@ function initModelConfig(config, Module) { | @@ -103,7 +104,7 @@ function initModelConfig(config, Module) { | ||
| 103 | Module.setValue(ptr + offset, buffer, 'i8*'); // tokens | 104 | Module.setValue(ptr + offset, buffer, 'i8*'); // tokens |
| 104 | offset += 4; | 105 | offset += 4; |
| 105 | 106 | ||
| 106 | - Module.setValue(ptr + offset, config.numThreads, 'i32'); | 107 | + Module.setValue(ptr + offset, config.numThreads || 1, 'i32'); |
| 107 | offset += 4; | 108 | offset += 4; |
| 108 | 109 | ||
| 109 | Module.setValue(ptr + offset, buffer + tokensLen, 'i8*'); // provider | 110 | Module.setValue(ptr + offset, buffer + tokensLen, 'i8*'); // provider |
| @@ -134,14 +135,21 @@ function initModelConfig(config, Module) { | @@ -134,14 +135,21 @@ function initModelConfig(config, Module) { | ||
| 134 | 135 | ||
| 135 | function initFeatureExtractorConfig(config, Module) { | 136 | function initFeatureExtractorConfig(config, Module) { |
| 136 | let ptr = Module._malloc(4 * 2); | 137 | let ptr = Module._malloc(4 * 2); |
| 137 | - Module.setValue(ptr, config.samplingRate, 'i32'); | ||
| 138 | - Module.setValue(ptr + 4, config.featureDim, 'i32'); | 138 | + Module.setValue(ptr, config.samplingRate || 16000, 'i32'); |
| 139 | + Module.setValue(ptr + 4, config.featureDim || 80, 'i32'); | ||
| 139 | return { | 140 | return { |
| 140 | ptr: ptr, len: 8, | 141 | ptr: ptr, len: 8, |
| 141 | } | 142 | } |
| 142 | } | 143 | } |
| 143 | 144 | ||
| 144 | function initKwsConfig(config, Module) { | 145 | function initKwsConfig(config, Module) { |
| 146 | + if (!('featConfig' in config)) { | ||
| 147 | + config.featConfig = { | ||
| 148 | + sampleRate: 16000, | ||
| 149 | + featureDim: 80, | ||
| 150 | + }; | ||
| 151 | + } | ||
| 152 | + | ||
| 145 | let featConfig = initFeatureExtractorConfig(config.featConfig, Module); | 153 | let featConfig = initFeatureExtractorConfig(config.featConfig, Module); |
| 146 | 154 | ||
| 147 | let modelConfig = initModelConfig(config.modelConfig, Module); | 155 | let modelConfig = initModelConfig(config.modelConfig, Module); |
| @@ -155,16 +163,16 @@ function initKwsConfig(config, Module) { | @@ -155,16 +163,16 @@ function initKwsConfig(config, Module) { | ||
| 155 | Module._CopyHeap(modelConfig.ptr, modelConfig.len, ptr + offset) | 163 | Module._CopyHeap(modelConfig.ptr, modelConfig.len, ptr + offset) |
| 156 | offset += modelConfig.len; | 164 | offset += modelConfig.len; |
| 157 | 165 | ||
| 158 | - Module.setValue(ptr + offset, config.maxActivePaths, 'i32'); | 166 | + Module.setValue(ptr + offset, config.maxActivePaths || 4, 'i32'); |
| 159 | offset += 4; | 167 | offset += 4; |
| 160 | 168 | ||
| 161 | - Module.setValue(ptr + offset, config.numTrailingBlanks, 'i32'); | 169 | + Module.setValue(ptr + offset, config.numTrailingBlanks || 1, 'i32'); |
| 162 | offset += 4; | 170 | offset += 4; |
| 163 | 171 | ||
| 164 | - Module.setValue(ptr + offset, config.keywordsScore, 'float'); | 172 | + Module.setValue(ptr + offset, config.keywordsScore || 1.0, 'float'); |
| 165 | offset += 4; | 173 | offset += 4; |
| 166 | 174 | ||
| 167 | - Module.setValue(ptr + offset, config.keywordsThreshold, 'float'); | 175 | + Module.setValue(ptr + offset, config.keywordsThreshold || 0.25, 'float'); |
| 168 | offset += 4; | 176 | offset += 4; |
| 169 | 177 | ||
| 170 | let keywordsLen = Module.lengthBytesUTF8(config.keywords) + 1; | 178 | let keywordsLen = Module.lengthBytesUTF8(config.keywords) + 1; |
| @@ -49,6 +49,32 @@ set(exported_functions | @@ -49,6 +49,32 @@ set(exported_functions | ||
| 49 | SherpaOnnxDestroyKeywordSpotter | 49 | SherpaOnnxDestroyKeywordSpotter |
| 50 | SherpaOnnxGetKeywordResult | 50 | SherpaOnnxGetKeywordResult |
| 51 | SherpaOnnxIsKeywordStreamReady | 51 | SherpaOnnxIsKeywordStreamReady |
| 52 | + # VAD | ||
| 53 | + SherpaOnnxCreateCircularBuffer | ||
| 54 | + SherpaOnnxDestroyCircularBuffer | ||
| 55 | + SherpaOnnxCircularBufferPush | ||
| 56 | + SherpaOnnxCircularBufferGet | ||
| 57 | + SherpaOnnxCircularBufferFree | ||
| 58 | + SherpaOnnxCircularBufferPop | ||
| 59 | + SherpaOnnxCircularBufferSize | ||
| 60 | + SherpaOnnxCircularBufferHead | ||
| 61 | + SherpaOnnxCircularBufferReset | ||
| 62 | + SherpaOnnxCreateVoiceActivityDetector | ||
| 63 | + SherpaOnnxDestroyVoiceActivityDetector | ||
| 64 | + SherpaOnnxVoiceActivityDetectorAcceptWaveform | ||
| 65 | + SherpaOnnxVoiceActivityDetectorEmpty | ||
| 66 | + SherpaOnnxVoiceActivityDetectorDetected | ||
| 67 | + SherpaOnnxVoiceActivityDetectorPop | ||
| 68 | + SherpaOnnxVoiceActivityDetectorClear | ||
| 69 | + SherpaOnnxVoiceActivityDetectorFront | ||
| 70 | + SherpaOnnxDestroySpeechSegment | ||
| 71 | + SherpaOnnxVoiceActivityDetectorReset | ||
| 72 | + SherpaOnnxVoiceActivityDetectorFlush | ||
| 73 | + # | ||
| 74 | + SherpaOnnxFileExists | ||
| 75 | + SherpaOnnxReadWave | ||
| 76 | + SherpaOnnxFreeWave | ||
| 77 | + SherpaOnnxWriteWave | ||
| 52 | ) | 78 | ) |
| 53 | 79 | ||
| 54 | 80 | ||
| @@ -82,6 +108,8 @@ install( | @@ -82,6 +108,8 @@ install( | ||
| 82 | ${CMAKE_SOURCE_DIR}/wasm/asr/sherpa-onnx-asr.js | 108 | ${CMAKE_SOURCE_DIR}/wasm/asr/sherpa-onnx-asr.js |
| 83 | ${CMAKE_SOURCE_DIR}/wasm/tts/sherpa-onnx-tts.js | 109 | ${CMAKE_SOURCE_DIR}/wasm/tts/sherpa-onnx-tts.js |
| 84 | ${CMAKE_SOURCE_DIR}/wasm/kws/sherpa-onnx-kws.js | 110 | ${CMAKE_SOURCE_DIR}/wasm/kws/sherpa-onnx-kws.js |
| 111 | + ${CMAKE_SOURCE_DIR}/wasm/vad/sherpa-onnx-vad.js | ||
| 112 | + ${CMAKE_SOURCE_DIR}/wasm/nodejs/sherpa-onnx-wave.js | ||
| 85 | "$<TARGET_FILE_DIR:sherpa-onnx-wasm-nodejs>/sherpa-onnx-wasm-nodejs.js" | 113 | "$<TARGET_FILE_DIR:sherpa-onnx-wasm-nodejs>/sherpa-onnx-wasm-nodejs.js" |
| 86 | "$<TARGET_FILE_DIR:sherpa-onnx-wasm-nodejs>/sherpa-onnx-wasm-nodejs.wasm" | 114 | "$<TARGET_FILE_DIR:sherpa-onnx-wasm-nodejs>/sherpa-onnx-wasm-nodejs.wasm" |
| 87 | DESTINATION | 115 | DESTINATION |
wasm/nodejs/sherpa-onnx-wave.js
0 → 100644
| 1 | +// return an object | ||
| 2 | +// { | ||
| 3 | +// samples: a float32 array | ||
| 4 | +// sampleRate: an integer | ||
| 5 | +// } | ||
| 6 | +function readWave(filename, Module) { | ||
| 7 | + const filenameLen = Module.lengthBytesUTF8(filename) + 1; | ||
| 8 | + const pFilename = Module._malloc(filenameLen); | ||
| 9 | + Module.stringToUTF8(filename, pFilename, filenameLen); | ||
| 10 | + | ||
| 11 | + const w = Module._SherpaOnnxReadWave(pFilename); | ||
| 12 | + Module._free(pFilename); | ||
| 13 | + | ||
| 14 | + | ||
| 15 | + const samplesPtr = Module.HEAP32[w / 4] / 4; | ||
| 16 | + const sampleRate = Module.HEAP32[w / 4 + 1]; | ||
| 17 | + const numSamples = Module.HEAP32[w / 4 + 2]; | ||
| 18 | + | ||
| 19 | + const samples = new Float32Array(numSamples); | ||
| 20 | + for (let i = 0; i < numSamples; i++) { | ||
| 21 | + samples[i] = Module.HEAPF32[samplesPtr + i]; | ||
| 22 | + } | ||
| 23 | + | ||
| 24 | + Module._SherpaOnnxFreeWave(w); | ||
| 25 | + | ||
| 26 | + | ||
| 27 | + return {samples: samples, sampleRate: sampleRate}; | ||
| 28 | +} | ||
| 29 | + | ||
| 30 | +// data is an object | ||
| 31 | +// { | ||
| 32 | +// samples: a float32 array | ||
| 33 | +// sampleRate: an integer | ||
| 34 | +// } | ||
| 35 | +function writeWave(filename, data, Module) { | ||
| 36 | + const pSamples = | ||
| 37 | + Module._malloc(data.samples.length * data.samples.BYTES_PER_ELEMENT); | ||
| 38 | + Module.HEAPF32.set(data.samples, pSamples / data.samples.BYTES_PER_ELEMENT); | ||
| 39 | + | ||
| 40 | + const filenameLen = Module.lengthBytesUTF8(filename) + 1; | ||
| 41 | + const pFilename = Module._malloc(filenameLen); | ||
| 42 | + Module.stringToUTF8(filename, pFilename, filenameLen); | ||
| 43 | + | ||
| 44 | + Module._SherpaOnnxWriteWave( | ||
| 45 | + pSamples, data.samples.length, data.sampleRate, pFilename); | ||
| 46 | + | ||
| 47 | + Module._free(pFilename); | ||
| 48 | + Module._free(pSamples); | ||
| 49 | +} | ||
| 50 | + | ||
| 51 | +if (typeof process == 'object' && typeof process.versions == 'object' && | ||
| 52 | + typeof process.versions.node == 'string') { | ||
| 53 | + module.exports = { | ||
| 54 | + readWave, | ||
| 55 | + writeWave, | ||
| 56 | + }; | ||
| 57 | +} |
-
请 注册 或 登录 后发表评论