Fangjun Kuang
Committed by GitHub

Add VAD and keyword spotting for the Node package with WebAssembly (#1286)

正在显示 40 个修改的文件 包含 456 行增加524 行删除
@@ -9,6 +9,28 @@ git status @@ -9,6 +9,28 @@ git status
9 ls -lh 9 ls -lh
10 ls -lh node_modules 10 ls -lh node_modules
11 11
  12 +echo '-----vad+whisper----------'
  13 +
  14 +curl -LS -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-whisper-tiny.en.tar.bz2
  15 +tar xvf sherpa-onnx-whisper-tiny.en.tar.bz2
  16 +rm sherpa-onnx-whisper-tiny.en.tar.bz2
  17 +
  18 +curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/Obama.wav
  19 +curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx
  20 +node ./test-vad-with-non-streaming-asr-whisper.js
  21 +rm Obama.wav
  22 +rm silero_vad.onnx
  23 +rm -rf sherpa-onnx-whisper-tiny.en
  24 +
  25 +echo "----------keyword spotting----------"
  26 +
  27 +curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/kws-models/sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01.tar.bz2
  28 +tar xvf sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01.tar.bz2
  29 +rm sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01.tar.bz2
  30 +
  31 +node ./test-keyword-spotter-transducer.js
  32 +rm -rf sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01
  33 +
12 # offline asr 34 # offline asr
13 # 35 #
14 curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17.tar.bz2 36 curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17.tar.bz2
1 name: npm 1 name: npm
2 2
3 on: 3 on:
  4 + push:
  5 + branches:
  6 + - npm
4 workflow_dispatch: 7 workflow_dispatch:
5 8
6 concurrency: 9 concurrency:
@@ -27,6 +30,9 @@ jobs: @@ -27,6 +30,9 @@ jobs:
27 30
28 - name: Install emsdk 31 - name: Install emsdk
29 uses: mymindstorm/setup-emsdk@v14 32 uses: mymindstorm/setup-emsdk@v14
  33 + with:
  34 + version: 3.1.51
  35 + actions-cache-folder: 'emsdk-cache'
30 36
31 - name: View emsdk version 37 - name: View emsdk version
32 shell: bash 38 shell: bash
@@ -51,8 +57,6 @@ jobs: @@ -51,8 +57,6 @@ jobs:
51 57
52 - name: Build nodejs package 58 - name: Build nodejs package
53 shell: bash 59 shell: bash
54 - env:  
55 - NODE_AUTH_TOKEN: ${{ secrets.NPM_TOKEN }}  
56 run: | 60 run: |
57 ./build-wasm-simd-nodejs.sh 61 ./build-wasm-simd-nodejs.sh
58 cp -v build-wasm-simd-nodejs/install/bin/wasm/nodejs/*.js ./scripts/nodejs/ 62 cp -v build-wasm-simd-nodejs/install/bin/wasm/nodejs/*.js ./scripts/nodejs/
@@ -71,6 +75,29 @@ jobs: @@ -71,6 +75,29 @@ jobs:
71 75
72 rm package.json.bak 76 rm package.json.bak
73 77
  78 + - name: Collect files
  79 + shell: bash
  80 + run: |
  81 + dst=sherpa-onnx-wasm-nodejs
  82 + mkdir $dst
  83 + cp -v scripts/nodejs/* $dst
  84 + tar cvjf $dst.tar.bz2 $dst
  85 +
  86 + echo "---"
  87 + ls -h $dst
  88 +
  89 + - uses: actions/upload-artifact@v4
  90 + with:
  91 + name: sherpa-onnx-wasm-nodejs
  92 + path: ./*.tar.bz2
  93 +
  94 + - name: Build nodejs package
  95 + shell: bash
  96 + env:
  97 + NODE_AUTH_TOKEN: ${{ secrets.NPM_TOKEN }}
  98 + run: |
  99 + cd scripts/nodejs
  100 +
74 git diff 101 git diff
75 102
76 npm install 103 npm install
@@ -55,6 +55,9 @@ jobs: @@ -55,6 +55,9 @@ jobs:
55 55
56 - name: Install emsdk 56 - name: Install emsdk
57 uses: mymindstorm/setup-emsdk@v14 57 uses: mymindstorm/setup-emsdk@v14
  58 + with:
  59 + version: 3.1.51
  60 + actions-cache-folder: 'emsdk-cache'
58 61
59 - name: View emsdk version 62 - name: View emsdk version
60 shell: bash 63 shell: bash
@@ -109,6 +112,7 @@ jobs: @@ -109,6 +112,7 @@ jobs:
109 node --version 112 node --version
110 npm --version 113 npm --version
111 export d=scripts/nodejs 114 export d=scripts/nodejs
  115 + cat $d/index.js
112 116
113 pushd $d 117 pushd $d
114 npm install 118 npm install
  1 +## 1.10.23
  2 +
  3 +* flutter: add lang, emotion, event to OfflineRecognizerResult (#1268)
  4 +* Use a separate thread to initialize models for lazarus examples. (#1270)
  5 +* Object pascal examples for recording and playing audio with portaudio. (#1271)
  6 +* Text to speech API for Object Pascal. (#1273)
  7 +* update kotlin api for better release native object and add user-friendly apis. (#1275)
  8 +* Update wave-reader.cc to support 8/16/32-bit waves (#1278)
  9 +* Add WebAssembly for VAD (#1281)
  10 +* WebAssembly example for VAD + Non-streaming ASR (#1284)
  11 +
1 ## 1.10.22 12 ## 1.10.22
2 13
3 * Add Pascal API for reading wave files (#1243) 14 * Add Pascal API for reading wave files (#1243)
@@ -11,7 +11,7 @@ project(sherpa-onnx) @@ -11,7 +11,7 @@ project(sherpa-onnx)
11 # ./nodejs-addon-examples 11 # ./nodejs-addon-examples
12 # ./dart-api-examples/ 12 # ./dart-api-examples/
13 # ./CHANGELOG.md 13 # ./CHANGELOG.md
14 -set(SHERPA_ONNX_VERSION "1.10.22") 14 +set(SHERPA_ONNX_VERSION "1.10.23")
15 15
16 # Disable warning about 16 # Disable warning about
17 # 17 #
@@ -206,6 +206,7 @@ if(SHERPA_ONNX_ENABLE_WASM_NODEJS) @@ -206,6 +206,7 @@ if(SHERPA_ONNX_ENABLE_WASM_NODEJS)
206 if(NOT SHERPA_ONNX_ENABLE_WASM) 206 if(NOT SHERPA_ONNX_ENABLE_WASM)
207 message(FATAL_ERROR "Please set SHERPA_ONNX_ENABLE_WASM to ON if you enable WASM for NodeJS") 207 message(FATAL_ERROR "Please set SHERPA_ONNX_ENABLE_WASM to ON if you enable WASM for NodeJS")
208 endif() 208 endif()
  209 + add_definitions(-DSHERPA_ONNX_ENABLE_WASM_KWS=1)
209 endif() 210 endif()
210 211
211 if(SHERPA_ONNX_ENABLE_WASM) 212 if(SHERPA_ONNX_ENABLE_WASM)
@@ -9,7 +9,7 @@ environment: @@ -9,7 +9,7 @@ environment:
9 sdk: ^3.4.0 9 sdk: ^3.4.0
10 10
11 dependencies: 11 dependencies:
12 - sherpa_onnx: ^1.10.22 12 + sherpa_onnx: ^1.10.23
13 path: ^1.9.0 13 path: ^1.9.0
14 args: ^2.5.0 14 args: ^2.5.0
15 15
@@ -9,7 +9,7 @@ environment: @@ -9,7 +9,7 @@ environment:
9 sdk: ^3.4.0 9 sdk: ^3.4.0
10 10
11 dependencies: 11 dependencies:
12 - sherpa_onnx: ^1.10.22 12 + sherpa_onnx: ^1.10.23
13 path: ^1.9.0 13 path: ^1.9.0
14 args: ^2.5.0 14 args: ^2.5.0
15 15
@@ -9,7 +9,7 @@ environment: @@ -9,7 +9,7 @@ environment:
9 sdk: ^3.4.0 9 sdk: ^3.4.0
10 10
11 dependencies: 11 dependencies:
12 - sherpa_onnx: ^1.10.22 12 + sherpa_onnx: ^1.10.23
13 # sherpa_onnx: 13 # sherpa_onnx:
14 # path: ../../flutter/sherpa_onnx 14 # path: ../../flutter/sherpa_onnx
15 path: ^1.9.0 15 path: ^1.9.0
@@ -10,7 +10,7 @@ environment: @@ -10,7 +10,7 @@ environment:
10 10
11 # Add regular dependencies here. 11 # Add regular dependencies here.
12 dependencies: 12 dependencies:
13 - sherpa_onnx: ^1.10.22 13 + sherpa_onnx: ^1.10.23
14 path: ^1.9.0 14 path: ^1.9.0
15 args: ^2.5.0 15 args: ^2.5.0
16 16
@@ -9,7 +9,7 @@ environment: @@ -9,7 +9,7 @@ environment:
9 sdk: ^3.4.0 9 sdk: ^3.4.0
10 10
11 dependencies: 11 dependencies:
12 - sherpa_onnx: ^1.10.22 12 + sherpa_onnx: ^1.10.23
13 path: ^1.9.0 13 path: ^1.9.0
14 args: ^2.5.0 14 args: ^2.5.0
15 15
@@ -11,7 +11,7 @@ environment: @@ -11,7 +11,7 @@ environment:
11 11
12 # Add regular dependencies here. 12 # Add regular dependencies here.
13 dependencies: 13 dependencies:
14 - sherpa_onnx: ^1.10.22 14 + sherpa_onnx: ^1.10.23
15 path: ^1.9.0 15 path: ^1.9.0
16 args: ^2.5.0 16 args: ^2.5.0
17 17
@@ -8,7 +8,7 @@ environment: @@ -8,7 +8,7 @@ environment:
8 8
9 # Add regular dependencies here. 9 # Add regular dependencies here.
10 dependencies: 10 dependencies:
11 - sherpa_onnx: ^1.10.22 11 + sherpa_onnx: ^1.10.23
12 path: ^1.9.0 12 path: ^1.9.0
13 args: ^2.5.0 13 args: ^2.5.0
14 14
@@ -10,7 +10,7 @@ environment: @@ -10,7 +10,7 @@ environment:
10 sdk: ^3.4.0 10 sdk: ^3.4.0
11 11
12 dependencies: 12 dependencies:
13 - sherpa_onnx: ^1.10.22 13 + sherpa_onnx: ^1.10.23
14 path: ^1.9.0 14 path: ^1.9.0
15 args: ^2.5.0 15 args: ^2.5.0
16 16
@@ -9,7 +9,7 @@ environment: @@ -9,7 +9,7 @@ environment:
9 sdk: ^3.4.0 9 sdk: ^3.4.0
10 10
11 dependencies: 11 dependencies:
12 - sherpa_onnx: ^1.10.22 12 + sherpa_onnx: ^1.10.23
13 path: ^1.9.0 13 path: ^1.9.0
14 args: ^2.5.0 14 args: ^2.5.0
15 15
@@ -5,7 +5,7 @@ description: > @@ -5,7 +5,7 @@ description: >
5 5
6 publish_to: 'none' 6 publish_to: 'none'
7 7
8 -version: 1.10.22 8 +version: 1.10.23
9 9
10 topics: 10 topics:
11 - speech-recognition 11 - speech-recognition
@@ -30,7 +30,7 @@ dependencies: @@ -30,7 +30,7 @@ dependencies:
30 record: ^5.1.0 30 record: ^5.1.0
31 url_launcher: ^6.2.6 31 url_launcher: ^6.2.6
32 32
33 - sherpa_onnx: ^1.10.22 33 + sherpa_onnx: ^1.10.23
34 # sherpa_onnx: 34 # sherpa_onnx:
35 # path: ../../flutter/sherpa_onnx 35 # path: ../../flutter/sherpa_onnx
36 36
@@ -5,7 +5,7 @@ description: > @@ -5,7 +5,7 @@ description: >
5 5
6 publish_to: 'none' # Remove this line if you wish to publish to pub.dev 6 publish_to: 'none' # Remove this line if you wish to publish to pub.dev
7 7
8 -version: 1.10.22 8 +version: 1.10.23
9 9
10 environment: 10 environment:
11 sdk: '>=3.4.0 <4.0.0' 11 sdk: '>=3.4.0 <4.0.0'
@@ -17,7 +17,7 @@ dependencies: @@ -17,7 +17,7 @@ dependencies:
17 cupertino_icons: ^1.0.6 17 cupertino_icons: ^1.0.6
18 path_provider: ^2.1.3 18 path_provider: ^2.1.3
19 path: ^1.9.0 19 path: ^1.9.0
20 - sherpa_onnx: ^1.10.22 20 + sherpa_onnx: ^1.10.23
21 url_launcher: ^6.2.6 21 url_launcher: ^6.2.6
22 audioplayers: ^5.0.0 22 audioplayers: ^5.0.0
23 23
@@ -17,7 +17,7 @@ topics: @@ -17,7 +17,7 @@ topics:
17 - voice-activity-detection 17 - voice-activity-detection
18 18
19 # remember to change the version in ../sherpa_onnx_macos/macos/sherpa_onnx_macos.podspec 19 # remember to change the version in ../sherpa_onnx_macos/macos/sherpa_onnx_macos.podspec
20 -version: 1.10.22 20 +version: 1.10.23
21 21
22 homepage: https://github.com/k2-fsa/sherpa-onnx 22 homepage: https://github.com/k2-fsa/sherpa-onnx
23 23
@@ -30,23 +30,23 @@ dependencies: @@ -30,23 +30,23 @@ dependencies:
30 flutter: 30 flutter:
31 sdk: flutter 31 sdk: flutter
32 32
33 - sherpa_onnx_android: ^1.10.22 33 + sherpa_onnx_android: ^1.10.23
34 # sherpa_onnx_android: 34 # sherpa_onnx_android:
35 # path: ../sherpa_onnx_android 35 # path: ../sherpa_onnx_android
36 36
37 - sherpa_onnx_macos: ^1.10.22 37 + sherpa_onnx_macos: ^1.10.23
38 # sherpa_onnx_macos: 38 # sherpa_onnx_macos:
39 # path: ../sherpa_onnx_macos 39 # path: ../sherpa_onnx_macos
40 40
41 - sherpa_onnx_linux: ^1.10.22 41 + sherpa_onnx_linux: ^1.10.23
42 # sherpa_onnx_linux: 42 # sherpa_onnx_linux:
43 # path: ../sherpa_onnx_linux 43 # path: ../sherpa_onnx_linux
44 # 44 #
45 - sherpa_onnx_windows: ^1.10.22 45 + sherpa_onnx_windows: ^1.10.23
46 # sherpa_onnx_windows: 46 # sherpa_onnx_windows:
47 # path: ../sherpa_onnx_windows 47 # path: ../sherpa_onnx_windows
48 48
49 - sherpa_onnx_ios: ^1.10.22 49 + sherpa_onnx_ios: ^1.10.23
50 # sherpa_onnx_ios: 50 # sherpa_onnx_ios:
51 # path: ../sherpa_onnx_ios 51 # path: ../sherpa_onnx_ios
52 52
@@ -7,7 +7,7 @@ @@ -7,7 +7,7 @@
7 # https://groups.google.com/g/dart-ffi/c/nUATMBy7r0c 7 # https://groups.google.com/g/dart-ffi/c/nUATMBy7r0c
8 Pod::Spec.new do |s| 8 Pod::Spec.new do |s|
9 s.name = 'sherpa_onnx_ios' 9 s.name = 'sherpa_onnx_ios'
10 - s.version = '1.10.22' 10 + s.version = '1.10.23'
11 s.summary = 'A new Flutter FFI plugin project.' 11 s.summary = 'A new Flutter FFI plugin project.'
12 s.description = <<-DESC 12 s.description = <<-DESC
13 A new Flutter FFI plugin project. 13 A new Flutter FFI plugin project.
@@ -4,7 +4,7 @@ @@ -4,7 +4,7 @@
4 # 4 #
5 Pod::Spec.new do |s| 5 Pod::Spec.new do |s|
6 s.name = 'sherpa_onnx_macos' 6 s.name = 'sherpa_onnx_macos'
7 - s.version = '1.10.22' 7 + s.version = '1.10.23'
8 s.summary = 'sherpa-onnx Flutter FFI plugin project.' 8 s.summary = 'sherpa-onnx Flutter FFI plugin project.'
9 s.description = <<-DESC 9 s.description = <<-DESC
10 sherpa-onnx Flutter FFI plugin project. 10 sherpa-onnx Flutter FFI plugin project.
  1 +#!/usr/bin/env bash
  2 +
  3 +find flutter -name *.yaml -type f -exec sed -i.bak 's/1\.10\.22/1\.10\.23/g' {} \;
  4 +find dart-api-examples -name *.yaml -type f -exec sed -i.bak 's/1\.10\.22/1\.10\.23/g' {} \;
  5 +find flutter-examples -name *.yaml -type f -exec sed -i.bak 's/1\.10\.22/1\.10\.23/g' {} \;
  6 +find flutter -name *.podspec -type f -exec sed -i.bak 's/1\.10\.22/1\.10\.23/g' {} \;
  7 +find nodejs-addon-examples -name package.json -type f -exec sed -i.bak 's/1\.10\.22/1\.10\.23/g' {} \;
1 { 1 {
2 "dependencies": { 2 "dependencies": {
3 - "sherpa-onnx-node": "^1.10.22" 3 + "sherpa-onnx-node": "^1.10.23"
4 } 4 }
5 } 5 }
@@ -42,11 +42,11 @@ stream.acceptWaveform({samples: tailPadding, sampleRate: wave.sampleRate}); @@ -42,11 +42,11 @@ stream.acceptWaveform({samples: tailPadding, sampleRate: wave.sampleRate});
42 42
43 const detectedKeywords = []; 43 const detectedKeywords = [];
44 while (kws.isReady(stream)) { 44 while (kws.isReady(stream)) {
  45 + kws.decode(stream);
45 const keyword = kws.getResult(stream).keyword; 46 const keyword = kws.getResult(stream).keyword;
46 if (keyword != '') { 47 if (keyword != '') {
47 detectedKeywords.push(keyword); 48 detectedKeywords.push(keyword);
48 } 49 }
49 - kws.decode(stream);  
50 } 50 }
51 let stop = Date.now(); 51 let stop = Date.now();
52 52
@@ -120,8 +120,8 @@ console.log('Done') @@ -120,8 +120,8 @@ console.log('Done')
120 const elapsed_seconds = (stop - start) / 1000; 120 const elapsed_seconds = (stop - start) / 1000;
121 const duration = wave.samples.length / wave.sampleRate; 121 const duration = wave.samples.length / wave.sampleRate;
122 const real_time_factor = elapsed_seconds / duration; 122 const real_time_factor = elapsed_seconds / duration;
123 -console.log('Wave duration', duration.toFixed(3), 'secodns')  
124 -console.log('Elapsed', elapsed_seconds.toFixed(3), 'secodns') 123 +console.log('Wave duration', duration.toFixed(3), 'seconds')
  124 +console.log('Elapsed', elapsed_seconds.toFixed(3), 'seconds')
125 console.log( 125 console.log(
126 `RTF = ${elapsed_seconds.toFixed(3)}/${duration.toFixed(3)} =`, 126 `RTF = ${elapsed_seconds.toFixed(3)}/${duration.toFixed(3)} =`,
127 real_time_factor.toFixed(3)) 127 real_time_factor.toFixed(3))
  1 +// Copyright (c) 2024 Xiaomi Corporation
  2 +const sherpa_onnx = require('sherpa-onnx');
  3 +
  4 +function createKeywordSpotter() {
  5 + // Please download test files from
  6 + // https://github.com/k2-fsa/sherpa-onnx/releases/tag/kws-models
  7 + const config = {
  8 + 'modelConfig': {
  9 + 'transducer': {
  10 + 'encoder':
  11 + './sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/encoder-epoch-12-avg-2-chunk-16-left-64.onnx',
  12 + 'decoder':
  13 + './sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/decoder-epoch-12-avg-2-chunk-16-left-64.onnx',
  14 + 'joiner':
  15 + './sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/joiner-epoch-12-avg-2-chunk-16-left-64.onnx',
  16 + },
  17 + 'tokens':
  18 + './sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/tokens.txt',
  19 + },
  20 + keywords: 'w én s ēn t è k ǎ s uǒ @文森特卡索\n' +
  21 + 'f ǎ g uó @法国'
  22 + };
  23 +
  24 + return sherpa_onnx.createKws(config);
  25 +}
  26 +
  27 +const kws = createKeywordSpotter();
  28 +const stream = kws.createStream();
  29 +const waveFilename =
  30 + './sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/test_wavs/3.wav';
  31 +
  32 +const wave = sherpa_onnx.readWave(waveFilename);
  33 +stream.acceptWaveform(wave.sampleRate, wave.samples);
  34 +
  35 +const tailPadding = new Float32Array(wave.sampleRate * 0.4);
  36 +stream.acceptWaveform(kws.config.featConfig.sampleRate, tailPadding);
  37 +
  38 +const detectedKeywords = [];
  39 +while (kws.isReady(stream)) {
  40 + kws.decode(stream);
  41 + const keyword = kws.getResult(stream).keyword;
  42 + if (keyword != '') {
  43 + detectedKeywords.push(keyword);
  44 + }
  45 +}
  46 +console.log(detectedKeywords);
  47 +
  48 +stream.free();
  49 +kws.free();
@@ -7,27 +7,13 @@ const wav = require('wav'); @@ -7,27 +7,13 @@ const wav = require('wav');
7 const sherpa_onnx = require('sherpa-onnx'); 7 const sherpa_onnx = require('sherpa-onnx');
8 8
9 function createOfflineRecognizer() { 9 function createOfflineRecognizer() {
10 - let featConfig = {  
11 - sampleRate: 16000,  
12 - featureDim: 80,  
13 - };  
14 -  
15 - let modelConfig = {  
16 - nemoCtc: {  
17 - model: './sherpa-onnx-nemo-ctc-en-conformer-small/model.int8.onnx',  
18 - },  
19 - tokens: './sherpa-onnx-nemo-ctc-en-conformer-small/tokens.txt',  
20 - numThreads: 1,  
21 - debug: 0,  
22 - provider: 'cpu',  
23 - modelType: 'nemo_ctc',  
24 - };  
25 -  
26 let config = { 10 let config = {
27 - featConfig: featConfig,  
28 - modelConfig: modelConfig,  
29 - decodingMethod: 'greedy_search',  
30 - maxActivePaths: 4, 11 + modelConfig: {
  12 + nemoCtc: {
  13 + model: './sherpa-onnx-nemo-ctc-en-conformer-small/model.int8.onnx',
  14 + },
  15 + tokens: './sherpa-onnx-nemo-ctc-en-conformer-small/tokens.txt',
  16 + }
31 }; 17 };
32 18
33 return sherpa_onnx.createOfflineRecognizer(config); 19 return sherpa_onnx.createOfflineRecognizer(config);
@@ -38,63 +24,12 @@ const stream = recognizer.createStream(); @@ -38,63 +24,12 @@ const stream = recognizer.createStream();
38 24
39 const waveFilename = 25 const waveFilename =
40 './sherpa-onnx-nemo-ctc-en-conformer-small/test_wavs/0.wav'; 26 './sherpa-onnx-nemo-ctc-en-conformer-small/test_wavs/0.wav';
  27 +const wave = sherpa_onnx.readWave(waveFilename);
  28 +stream.acceptWaveform(wave.sampleRate, wave.samples);
41 29
42 -const reader = new wav.Reader();  
43 -const readable = new Readable().wrap(reader);  
44 -const buf = [];  
45 -  
46 -reader.on('format', ({audioFormat, bitDepth, channels, sampleRate}) => {  
47 - if (sampleRate != recognizer.config.featConfig.sampleRate) {  
48 - throw new Error(`Only support sampleRate ${  
49 - recognizer.config.featConfig.sampleRate}. Given ${sampleRate}`);  
50 - }  
51 -  
52 - if (audioFormat != 1) {  
53 - throw new Error(`Only support PCM format. Given ${audioFormat}`);  
54 - }  
55 -  
56 - if (channels != 1) {  
57 - throw new Error(`Only a single channel. Given ${channel}`);  
58 - }  
59 -  
60 - if (bitDepth != 16) {  
61 - throw new Error(`Only support 16-bit samples. Given ${bitDepth}`);  
62 - }  
63 -});  
64 -  
65 -fs.createReadStream(waveFilename, {highWaterMark: 4096})  
66 - .pipe(reader)  
67 - .on('finish', function(err) {  
68 - // tail padding  
69 - const floatSamples =  
70 - new Float32Array(recognizer.config.featConfig.sampleRate * 0.5);  
71 -  
72 - buf.push(floatSamples);  
73 - const flattened =  
74 - Float32Array.from(buf.reduce((a, b) => [...a, ...b], []));  
75 -  
76 - stream.acceptWaveform(recognizer.config.featConfig.sampleRate, flattened);  
77 - recognizer.decode(stream);  
78 - const text = recognizer.getResult(stream).text;  
79 - console.log(text);  
80 -  
81 - stream.free();  
82 - recognizer.free();  
83 - });  
84 -  
85 -readable.on('readable', function() {  
86 - let chunk;  
87 - while ((chunk = readable.read()) != null) {  
88 - const int16Samples = new Int16Array(  
89 - chunk.buffer, chunk.byteOffset,  
90 - chunk.length / Int16Array.BYTES_PER_ELEMENT);  
91 -  
92 - const floatSamples = new Float32Array(int16Samples.length);  
93 -  
94 - for (let i = 0; i < floatSamples.length; i++) {  
95 - floatSamples[i] = int16Samples[i] / 32768.0;  
96 - } 30 +recognizer.decode(stream);
  31 +const text = recognizer.getResult(stream).text;
  32 +console.log(text);
97 33
98 - buf.push(floatSamples);  
99 - }  
100 -}); 34 +stream.free();
  35 +recognizer.free();
@@ -7,27 +7,15 @@ const wav = require('wav'); @@ -7,27 +7,15 @@ const wav = require('wav');
7 const sherpa_onnx = require('sherpa-onnx'); 7 const sherpa_onnx = require('sherpa-onnx');
8 8
9 function createOfflineRecognizer() { 9 function createOfflineRecognizer() {
10 - let featConfig = {  
11 - sampleRate: 16000,  
12 - featureDim: 80,  
13 - };  
14 -  
15 let modelConfig = { 10 let modelConfig = {
16 paraformer: { 11 paraformer: {
17 model: './sherpa-onnx-paraformer-zh-2023-09-14/model.int8.onnx', 12 model: './sherpa-onnx-paraformer-zh-2023-09-14/model.int8.onnx',
18 }, 13 },
19 tokens: './sherpa-onnx-paraformer-zh-2023-09-14/tokens.txt', 14 tokens: './sherpa-onnx-paraformer-zh-2023-09-14/tokens.txt',
20 - numThreads: 1,  
21 - debug: 0,  
22 - provider: 'cpu',  
23 - modelType: 'paraformer',  
24 }; 15 };
25 16
26 -  
27 let config = { 17 let config = {
28 - featConfig: featConfig,  
29 modelConfig: modelConfig, 18 modelConfig: modelConfig,
30 - decodingMethod: 'greedy_search',  
31 // https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/itn_zh_number.fst 19 // https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/itn_zh_number.fst
32 ruleFsts: './itn_zh_number.fst', 20 ruleFsts: './itn_zh_number.fst',
33 }; 21 };
@@ -41,62 +29,12 @@ const stream = recognizer.createStream(); @@ -41,62 +29,12 @@ const stream = recognizer.createStream();
41 29
42 // https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/itn-zh-number.wav 30 // https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/itn-zh-number.wav
43 const waveFilename = './itn-zh-number.wav'; 31 const waveFilename = './itn-zh-number.wav';
  32 +const wave = sherpa_onnx.readWave(waveFilename);
  33 +stream.acceptWaveform(wave.sampleRate, wave.samples);
44 34
45 -const reader = new wav.Reader();  
46 -const readable = new Readable().wrap(reader);  
47 -const buf = [];  
48 -  
49 -reader.on('format', ({audioFormat, bitDepth, channels, sampleRate}) => {  
50 - if (sampleRate != recognizer.config.featConfig.sampleRate) {  
51 - throw new Error(`Only support sampleRate ${  
52 - recognizer.config.featConfig.sampleRate}. Given ${sampleRate}`);  
53 - }  
54 -  
55 - if (audioFormat != 1) {  
56 - throw new Error(`Only support PCM format. Given ${audioFormat}`);  
57 - }  
58 -  
59 - if (channels != 1) {  
60 - throw new Error(`Only a single channel. Given ${channel}`);  
61 - }  
62 -  
63 - if (bitDepth != 16) {  
64 - throw new Error(`Only support 16-bit samples. Given ${bitDepth}`);  
65 - }  
66 -});  
67 -  
68 -fs.createReadStream(waveFilename, {'highWaterMark': 4096})  
69 - .pipe(reader)  
70 - .on('finish', function(err) {  
71 - // tail padding  
72 - const floatSamples =  
73 - new Float32Array(recognizer.config.featConfig.sampleRate * 0.5);  
74 -  
75 - buf.push(floatSamples);  
76 - const flattened =  
77 - Float32Array.from(buf.reduce((a, b) => [...a, ...b], []));  
78 -  
79 - stream.acceptWaveform(recognizer.config.featConfig.sampleRate, flattened);  
80 - recognizer.decode(stream);  
81 - const text = recognizer.getResult(stream).text;  
82 - console.log(text);  
83 -  
84 - stream.free();  
85 - recognizer.free();  
86 - });  
87 -  
88 -readable.on('readable', function() {  
89 - let chunk;  
90 - while ((chunk = readable.read()) != null) {  
91 - const int16Samples = new Int16Array(  
92 - chunk.buffer, chunk.byteOffset,  
93 - chunk.length / Int16Array.BYTES_PER_ELEMENT);  
94 -  
95 - const floatSamples = new Float32Array(int16Samples.length);  
96 - for (let i = 0; i < floatSamples.length; i++) {  
97 - floatSamples[i] = int16Samples[i] / 32768.0;  
98 - } 35 +recognizer.decode(stream);
  36 +const text = recognizer.getResult(stream).text;
  37 +console.log(text);
99 38
100 - buf.push(floatSamples);  
101 - }  
102 -}); 39 +stream.free();
  40 +recognizer.free();
1 // Copyright (c) 2023 Xiaomi Corporation (authors: Fangjun Kuang) 1 // Copyright (c) 2023 Xiaomi Corporation (authors: Fangjun Kuang)
2 2
3 -const fs = require('fs');  
4 -const {Readable} = require('stream');  
5 -const wav = require('wav');  
6 -  
7 const sherpa_onnx = require('sherpa-onnx'); 3 const sherpa_onnx = require('sherpa-onnx');
8 4
9 function createOfflineRecognizer() { 5 function createOfflineRecognizer() {
10 - let featConfig = {  
11 - sampleRate: 16000,  
12 - featureDim: 80,  
13 - };  
14 -  
15 let modelConfig = { 6 let modelConfig = {
16 paraformer: { 7 paraformer: {
17 model: './sherpa-onnx-paraformer-zh-2023-09-14/model.int8.onnx', 8 model: './sherpa-onnx-paraformer-zh-2023-09-14/model.int8.onnx',
18 }, 9 },
19 tokens: './sherpa-onnx-paraformer-zh-2023-09-14/tokens.txt', 10 tokens: './sherpa-onnx-paraformer-zh-2023-09-14/tokens.txt',
20 - numThreads: 1,  
21 - debug: 0,  
22 - provider: 'cpu',  
23 - modelType: 'paraformer',  
24 }; 11 };
25 12
26 let config = { 13 let config = {
27 - featConfig: featConfig,  
28 modelConfig: modelConfig, 14 modelConfig: modelConfig,
29 - decodingMethod: 'greedy_search',  
30 }; 15 };
31 16
32 return sherpa_onnx.createOfflineRecognizer(config); 17 return sherpa_onnx.createOfflineRecognizer(config);
33 } 18 }
34 19
35 -  
36 const recognizer = createOfflineRecognizer(); 20 const recognizer = createOfflineRecognizer();
37 const stream = recognizer.createStream(); 21 const stream = recognizer.createStream();
38 22
39 const waveFilename = './sherpa-onnx-paraformer-zh-2023-09-14/test_wavs/0.wav'; 23 const waveFilename = './sherpa-onnx-paraformer-zh-2023-09-14/test_wavs/0.wav';
  24 +const wave = sherpa_onnx.readWave(waveFilename);
  25 +stream.acceptWaveform(wave.sampleRate, wave.samples);
40 26
41 -const reader = new wav.Reader();  
42 -const readable = new Readable().wrap(reader);  
43 -const buf = [];  
44 -  
45 -reader.on('format', ({audioFormat, bitDepth, channels, sampleRate}) => {  
46 - if (sampleRate != recognizer.config.featConfig.sampleRate) {  
47 - throw new Error(`Only support sampleRate ${  
48 - recognizer.config.featConfig.sampleRate}. Given ${sampleRate}`);  
49 - }  
50 -  
51 - if (audioFormat != 1) {  
52 - throw new Error(`Only support PCM format. Given ${audioFormat}`);  
53 - }  
54 -  
55 - if (channels != 1) {  
56 - throw new Error(`Only a single channel. Given ${channel}`);  
57 - }  
58 -  
59 - if (bitDepth != 16) {  
60 - throw new Error(`Only support 16-bit samples. Given ${bitDepth}`);  
61 - }  
62 -});  
63 -  
64 -fs.createReadStream(waveFilename, {'highWaterMark': 4096})  
65 - .pipe(reader)  
66 - .on('finish', function(err) {  
67 - // tail padding  
68 - const floatSamples =  
69 - new Float32Array(recognizer.config.featConfig.sampleRate * 0.5);  
70 -  
71 - buf.push(floatSamples);  
72 - const flattened =  
73 - Float32Array.from(buf.reduce((a, b) => [...a, ...b], []));  
74 -  
75 - stream.acceptWaveform(recognizer.config.featConfig.sampleRate, flattened);  
76 - recognizer.decode(stream);  
77 - const text = recognizer.getResult(stream).text;  
78 - console.log(text);  
79 -  
80 - stream.free();  
81 - recognizer.free();  
82 - });  
83 -  
84 -readable.on('readable', function() {  
85 - let chunk;  
86 - while ((chunk = readable.read()) != null) {  
87 - const int16Samples = new Int16Array(  
88 - chunk.buffer, chunk.byteOffset,  
89 - chunk.length / Int16Array.BYTES_PER_ELEMENT);  
90 -  
91 - const floatSamples = new Float32Array(int16Samples.length);  
92 - for (let i = 0; i < floatSamples.length; i++) {  
93 - floatSamples[i] = int16Samples[i] / 32768.0;  
94 - } 27 +recognizer.decode(stream);
  28 +const text = recognizer.getResult(stream).text;
  29 +console.log(text);
95 30
96 - buf.push(floatSamples);  
97 - }  
98 -}); 31 +stream.free();
  32 +recognizer.free();
1 // Copyright (c) 2024 Xiaomi Corporation (authors: Fangjun Kuang) 1 // Copyright (c) 2024 Xiaomi Corporation (authors: Fangjun Kuang)
2 2
3 -const fs = require('fs');  
4 -const {Readable} = require('stream');  
5 -const wav = require('wav');  
6 -  
7 const sherpa_onnx = require('sherpa-onnx'); 3 const sherpa_onnx = require('sherpa-onnx');
8 4
9 function createOfflineRecognizer() { 5 function createOfflineRecognizer() {
10 - let featConfig = {  
11 - sampleRate: 16000,  
12 - featureDim: 80,  
13 - };  
14 -  
15 let modelConfig = { 6 let modelConfig = {
16 senseVoice: { 7 senseVoice: {
17 model: 8 model:
@@ -20,82 +11,26 @@ function createOfflineRecognizer() { @@ -20,82 +11,26 @@ function createOfflineRecognizer() {
20 useInverseTextNormalization: 1, 11 useInverseTextNormalization: 1,
21 }, 12 },
22 tokens: './sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/tokens.txt', 13 tokens: './sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/tokens.txt',
23 - numThreads: 1,  
24 - debug: 0,  
25 - provider: 'cpu',  
26 }; 14 };
27 15
28 let config = { 16 let config = {
29 - featConfig: featConfig,  
30 modelConfig: modelConfig, 17 modelConfig: modelConfig,
31 - decodingMethod: 'greedy_search',  
32 }; 18 };
33 19
34 return sherpa_onnx.createOfflineRecognizer(config); 20 return sherpa_onnx.createOfflineRecognizer(config);
35 } 21 }
36 22
37 -  
38 const recognizer = createOfflineRecognizer(); 23 const recognizer = createOfflineRecognizer();
39 const stream = recognizer.createStream(); 24 const stream = recognizer.createStream();
40 25
41 const waveFilename = 26 const waveFilename =
42 './sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/test_wavs/zh.wav'; 27 './sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/test_wavs/zh.wav';
  28 +const wave = sherpa_onnx.readWave(waveFilename);
  29 +stream.acceptWaveform(wave.sampleRate, wave.samples);
43 30
44 -const reader = new wav.Reader();  
45 -const readable = new Readable().wrap(reader);  
46 -const buf = [];  
47 -  
48 -reader.on('format', ({audioFormat, bitDepth, channels, sampleRate}) => {  
49 - if (sampleRate != recognizer.config.featConfig.sampleRate) {  
50 - throw new Error(`Only support sampleRate ${  
51 - recognizer.config.featConfig.sampleRate}. Given ${sampleRate}`);  
52 - }  
53 -  
54 - if (audioFormat != 1) {  
55 - throw new Error(`Only support PCM format. Given ${audioFormat}`);  
56 - }  
57 -  
58 - if (channels != 1) {  
59 - throw new Error(`Only a single channel. Given ${channel}`);  
60 - }  
61 -  
62 - if (bitDepth != 16) {  
63 - throw new Error(`Only support 16-bit samples. Given ${bitDepth}`);  
64 - }  
65 -});  
66 -  
67 -fs.createReadStream(waveFilename, {'highWaterMark': 4096})  
68 - .pipe(reader)  
69 - .on('finish', function(err) {  
70 - // tail padding  
71 - const floatSamples =  
72 - new Float32Array(recognizer.config.featConfig.sampleRate * 0.5);  
73 -  
74 - buf.push(floatSamples);  
75 - const flattened =  
76 - Float32Array.from(buf.reduce((a, b) => [...a, ...b], []));  
77 -  
78 - stream.acceptWaveform(recognizer.config.featConfig.sampleRate, flattened);  
79 - recognizer.decode(stream);  
80 - const text = recognizer.getResult(stream).text;  
81 - console.log(text);  
82 -  
83 - stream.free();  
84 - recognizer.free();  
85 - });  
86 -  
87 -readable.on('readable', function() {  
88 - let chunk;  
89 - while ((chunk = readable.read()) != null) {  
90 - const int16Samples = new Int16Array(  
91 - chunk.buffer, chunk.byteOffset,  
92 - chunk.length / Int16Array.BYTES_PER_ELEMENT);  
93 -  
94 - const floatSamples = new Float32Array(int16Samples.length);  
95 - for (let i = 0; i < floatSamples.length; i++) {  
96 - floatSamples[i] = int16Samples[i] / 32768.0;  
97 - } 31 +recognizer.decode(stream);
  32 +const text = recognizer.getResult(stream).text;
  33 +console.log(text);
98 34
99 - buf.push(floatSamples);  
100 - }  
101 -}); 35 +stream.free();
  36 +recognizer.free();
1 // Copyright (c) 2023 Xiaomi Corporation (authors: Fangjun Kuang) 1 // Copyright (c) 2023 Xiaomi Corporation (authors: Fangjun Kuang)
2 // 2 //
3 -const fs = require('fs');  
4 -const {Readable} = require('stream');  
5 -const wav = require('wav');  
6 -  
7 const sherpa_onnx = require('sherpa-onnx'); 3 const sherpa_onnx = require('sherpa-onnx');
8 4
9 function createOfflineRecognizer() { 5 function createOfflineRecognizer() {
10 - let featConfig = {  
11 - sampleRate: 16000,  
12 - featureDim: 80,  
13 - };  
14 -  
15 let modelConfig = { 6 let modelConfig = {
16 transducer: { 7 transducer: {
17 encoder: 8 encoder:
@@ -22,19 +13,11 @@ function createOfflineRecognizer() { @@ -22,19 +13,11 @@ function createOfflineRecognizer() {
22 './sherpa-onnx-zipformer-en-2023-06-26/joiner-epoch-99-avg-1.int8.onnx', 13 './sherpa-onnx-zipformer-en-2023-06-26/joiner-epoch-99-avg-1.int8.onnx',
23 }, 14 },
24 tokens: './sherpa-onnx-zipformer-en-2023-06-26/tokens.txt', 15 tokens: './sherpa-onnx-zipformer-en-2023-06-26/tokens.txt',
25 - numThreads: 1,  
26 - debug: 0,  
27 - provider: 'cpu',  
28 modelType: 'transducer', 16 modelType: 'transducer',
29 }; 17 };
30 18
31 let config = { 19 let config = {
32 - featConfig: featConfig,  
33 modelConfig: modelConfig, 20 modelConfig: modelConfig,
34 - decodingMethod: 'greedy_search',  
35 - maxActivePaths: 4,  
36 - hotwordsFile: '',  
37 - hotwordsScore: 1.5,  
38 }; 21 };
39 22
40 return sherpa_onnx.createOfflineRecognizer(config); 23 return sherpa_onnx.createOfflineRecognizer(config);
@@ -43,62 +26,12 @@ const recognizer = createOfflineRecognizer(); @@ -43,62 +26,12 @@ const recognizer = createOfflineRecognizer();
43 const stream = recognizer.createStream(); 26 const stream = recognizer.createStream();
44 27
45 const waveFilename = './sherpa-onnx-zipformer-en-2023-06-26/test_wavs/0.wav'; 28 const waveFilename = './sherpa-onnx-zipformer-en-2023-06-26/test_wavs/0.wav';
  29 +const wave = sherpa_onnx.readWave(waveFilename);
  30 +stream.acceptWaveform(wave.sampleRate, wave.samples);
46 31
47 -const reader = new wav.Reader();  
48 -const readable = new Readable().wrap(reader);  
49 -const buf = [];  
50 -  
51 -reader.on('format', ({audioFormat, bitDepth, channels, sampleRate}) => {  
52 - if (sampleRate != recognizer.config.featConfig.sampleRate) {  
53 - throw new Error(`Only support sampleRate ${  
54 - recognizer.config.featConfig.sampleRate}. Given ${sampleRate}`);  
55 - }  
56 -  
57 - if (audioFormat != 1) {  
58 - throw new Error(`Only support PCM format. Given ${audioFormat}`);  
59 - }  
60 -  
61 - if (channels != 1) {  
62 - throw new Error(`Only a single channel. Given ${channel}`);  
63 - }  
64 -  
65 - if (bitDepth != 16) {  
66 - throw new Error(`Only support 16-bit samples. Given ${bitDepth}`);  
67 - }  
68 -});  
69 -  
70 -fs.createReadStream(waveFilename, {'highWaterMark': 4096})  
71 - .pipe(reader)  
72 - .on('finish', function(err) {  
73 - // tail padding  
74 - const floatSamples =  
75 - new Float32Array(recognizer.config.featConfig.sampleRate * 0.5);  
76 -  
77 - buf.push(floatSamples);  
78 - const flattened =  
79 - Float32Array.from(buf.reduce((a, b) => [...a, ...b], []));  
80 -  
81 - stream.acceptWaveform(recognizer.config.featConfig.sampleRate, flattened);  
82 - recognizer.decode(stream);  
83 - const text = recognizer.getResult(stream).text;  
84 - console.log(text);  
85 -  
86 - stream.free();  
87 - recognizer.free();  
88 - });  
89 -  
90 -readable.on('readable', function() {  
91 - let chunk;  
92 - while ((chunk = readable.read()) != null) {  
93 - const int16Samples = new Int16Array(  
94 - chunk.buffer, chunk.byteOffset,  
95 - chunk.length / Int16Array.BYTES_PER_ELEMENT);  
96 -  
97 - const floatSamples = new Float32Array(int16Samples.length);  
98 - for (let i = 0; i < floatSamples.length; i++) {  
99 - floatSamples[i] = int16Samples[i] / 32768.0;  
100 - } 32 +recognizer.decode(stream);
  33 +const text = recognizer.getResult(stream).text;
  34 +console.log(text);
101 35
102 - buf.push(floatSamples);  
103 - }  
104 -}); 36 +stream.free();
  37 +recognizer.free();
1 // Copyright (c) 2023 Xiaomi Corporation (authors: Fangjun Kuang) 1 // Copyright (c) 2023 Xiaomi Corporation (authors: Fangjun Kuang)
2 // 2 //
3 -const fs = require('fs');  
4 -const {Readable} = require('stream');  
5 -const wav = require('wav');  
6 -  
7 const sherpa_onnx = require('sherpa-onnx'); 3 const sherpa_onnx = require('sherpa-onnx');
8 4
9 function createOfflineRecognizer() { 5 function createOfflineRecognizer() {
10 - let featConfig = {  
11 - sampleRate: 16000,  
12 - featureDim: 80,  
13 - };  
14 -  
15 let modelConfig = { 6 let modelConfig = {
16 whisper: { 7 whisper: {
17 encoder: './sherpa-onnx-whisper-tiny.en/tiny.en-encoder.int8.onnx', 8 encoder: './sherpa-onnx-whisper-tiny.en/tiny.en-encoder.int8.onnx',
@@ -21,83 +12,25 @@ function createOfflineRecognizer() { @@ -21,83 +12,25 @@ function createOfflineRecognizer() {
21 tailPaddings: -1, 12 tailPaddings: -1,
22 }, 13 },
23 tokens: './sherpa-onnx-whisper-tiny.en/tiny.en-tokens.txt', 14 tokens: './sherpa-onnx-whisper-tiny.en/tiny.en-tokens.txt',
24 - numThreads: 1,  
25 - debug: 0,  
26 - provider: 'cpu',  
27 - modelType: 'whisper',  
28 }; 15 };
29 16
30 let config = { 17 let config = {
31 - featConfig: featConfig,  
32 modelConfig: modelConfig, 18 modelConfig: modelConfig,
33 - decodingMethod: 'greedy_search',  
34 }; 19 };
35 20
36 return sherpa_onnx.createOfflineRecognizer(config); 21 return sherpa_onnx.createOfflineRecognizer(config);
37 } 22 }
38 23
39 -  
40 recognizer = createOfflineRecognizer(); 24 recognizer = createOfflineRecognizer();
41 stream = recognizer.createStream(); 25 stream = recognizer.createStream();
42 26
43 const waveFilename = './sherpa-onnx-whisper-tiny.en/test_wavs/0.wav'; 27 const waveFilename = './sherpa-onnx-whisper-tiny.en/test_wavs/0.wav';
  28 +const wave = sherpa_onnx.readWave(waveFilename);
  29 +stream.acceptWaveform(wave.sampleRate, wave.samples);
44 30
45 -const reader = new wav.Reader();  
46 -const readable = new Readable().wrap(reader);  
47 -const buf = [];  
48 -  
49 -reader.on('format', ({audioFormat, bitDepth, channels, sampleRate}) => {  
50 - if (sampleRate != recognizer.config.featConfig.sampleRate) {  
51 - throw new Error(`Only support sampleRate ${  
52 - recognizer.config.featConfig.sampleRate}. Given ${sampleRate}`);  
53 - }  
54 -  
55 - if (audioFormat != 1) {  
56 - throw new Error(`Only support PCM format. Given ${audioFormat}`);  
57 - }  
58 -  
59 - if (channels != 1) {  
60 - throw new Error(`Only a single channel. Given ${channel}`);  
61 - }  
62 -  
63 - if (bitDepth != 16) {  
64 - throw new Error(`Only support 16-bit samples. Given ${bitDepth}`);  
65 - }  
66 -});  
67 -  
68 -fs.createReadStream(waveFilename, {'highWaterMark': 4096})  
69 - .pipe(reader)  
70 - .on('finish', function(err) {  
71 - // tail padding  
72 - const floatSamples =  
73 - new Float32Array(recognizer.config.featConfig.sampleRate * 0.5);  
74 -  
75 - buf.push(floatSamples);  
76 - const flattened =  
77 - Float32Array.from(buf.reduce((a, b) => [...a, ...b], []));  
78 -  
79 - stream.acceptWaveform(recognizer.config.featConfig.sampleRate, flattened);  
80 - recognizer.decode(stream);  
81 - const text = recognizer.getResult(stream).text;  
82 - console.log(text);  
83 -  
84 - stream.free();  
85 - recognizer.free();  
86 - });  
87 -  
88 -readable.on('readable', function() {  
89 - let chunk;  
90 - while ((chunk = readable.read()) != null) {  
91 - const int16Samples = new Int16Array(  
92 - chunk.buffer, chunk.byteOffset,  
93 - chunk.length / Int16Array.BYTES_PER_ELEMENT);  
94 -  
95 - const floatSamples = new Float32Array(int16Samples.length);  
96 -  
97 - for (let i = 0; i < floatSamples.length; i++) {  
98 - floatSamples[i] = int16Samples[i] / 32768.0;  
99 - } 31 +recognizer.decode(stream);
  32 +const text = recognizer.getResult(stream).text;
  33 +console.log(text);
100 34
101 - buf.push(floatSamples);  
102 - }  
103 -}); 35 +stream.free();
  36 +recognizer.free();
@@ -16,22 +16,10 @@ function createOnlineRecognizer() { @@ -16,22 +16,10 @@ function createOnlineRecognizer() {
16 let onlineModelConfig = { 16 let onlineModelConfig = {
17 paraformer: onlineParaformerModelConfig, 17 paraformer: onlineParaformerModelConfig,
18 tokens: './sherpa-onnx-streaming-paraformer-bilingual-zh-en/tokens.txt', 18 tokens: './sherpa-onnx-streaming-paraformer-bilingual-zh-en/tokens.txt',
19 - numThreads: 1,  
20 - provider: 'cpu',  
21 - debug: 1,  
22 - modelType: 'paraformer',  
23 - };  
24 -  
25 - let featureConfig = {  
26 - sampleRate: 16000,  
27 - featureDim: 80,  
28 }; 19 };
29 20
30 let recognizerConfig = { 21 let recognizerConfig = {
31 - featConfig: featureConfig,  
32 modelConfig: onlineModelConfig, 22 modelConfig: onlineModelConfig,
33 - decodingMethod: 'greedy_search',  
34 - maxActivePaths: 4,  
35 enableEndpoint: 1, 23 enableEndpoint: 1,
36 rule1MinTrailingSilence: 2.4, 24 rule1MinTrailingSilence: 2.4,
37 rule2MinTrailingSilence: 1.2, 25 rule2MinTrailingSilence: 1.2,
@@ -17,26 +17,10 @@ function createOnlineRecognizer() { @@ -17,26 +17,10 @@ function createOnlineRecognizer() {
17 let onlineModelConfig = { 17 let onlineModelConfig = {
18 paraformer: onlineParaformerModelConfig, 18 paraformer: onlineParaformerModelConfig,
19 tokens: './sherpa-onnx-streaming-paraformer-bilingual-zh-en/tokens.txt', 19 tokens: './sherpa-onnx-streaming-paraformer-bilingual-zh-en/tokens.txt',
20 - numThreads: 1,  
21 - provider: 'cpu',  
22 - debug: 1,  
23 - modelType: 'paraformer',  
24 - };  
25 -  
26 - let featureConfig = {  
27 - sampleRate: 16000,  
28 - featureDim: 80,  
29 }; 20 };
30 21
31 let recognizerConfig = { 22 let recognizerConfig = {
32 - featConfig: featureConfig,  
33 modelConfig: onlineModelConfig, 23 modelConfig: onlineModelConfig,
34 - decodingMethod: 'greedy_search',  
35 - maxActivePaths: 4,  
36 - enableEndpoint: 1,  
37 - rule1MinTrailingSilence: 2.4,  
38 - rule2MinTrailingSilence: 1.2,  
39 - rule3MinUtteranceLength: 20,  
40 }; 24 };
41 25
42 return sherpa_onnx.createOnlineRecognizer(recognizerConfig); 26 return sherpa_onnx.createOnlineRecognizer(recognizerConfig);
@@ -20,26 +20,10 @@ function createOnlineRecognizer() { @@ -20,26 +20,10 @@ function createOnlineRecognizer() {
20 transducer: onlineTransducerModelConfig, 20 transducer: onlineTransducerModelConfig,
21 tokens: 21 tokens:
22 './sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/tokens.txt', 22 './sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/tokens.txt',
23 - numThreads: 1,  
24 - provider: 'cpu',  
25 - debug: 1,  
26 - modelType: 'zipformer',  
27 - };  
28 -  
29 - let featureConfig = {  
30 - sampleRate: 16000,  
31 - featureDim: 80,  
32 }; 23 };
33 24
34 let recognizerConfig = { 25 let recognizerConfig = {
35 - featConfig: featureConfig,  
36 modelConfig: onlineModelConfig, 26 modelConfig: onlineModelConfig,
37 - decodingMethod: 'greedy_search',  
38 - maxActivePaths: 4,  
39 - enableEndpoint: 1,  
40 - rule1MinTrailingSilence: 2.4,  
41 - rule2MinTrailingSilence: 1.2,  
42 - rule3MinUtteranceLength: 20,  
43 }; 27 };
44 28
45 return sherpa_onnx.createOnlineRecognizer(recognizerConfig); 29 return sherpa_onnx.createOnlineRecognizer(recognizerConfig);
  1 +// Copyright (c) 2023-2024 Xiaomi Corporation (authors: Fangjun Kuang)
  2 +
  3 +const sherpa_onnx = require('sherpa-onnx');
  4 +
  5 +function createRecognizer() {
  6 + // Please download test files from
  7 + // https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
  8 + const config = {
  9 + 'modelConfig': {
  10 + 'whisper': {
  11 + 'encoder': './sherpa-onnx-whisper-tiny.en/tiny.en-encoder.int8.onnx',
  12 + 'decoder': './sherpa-onnx-whisper-tiny.en/tiny.en-decoder.int8.onnx',
  13 + 'tailPaddings': 2000,
  14 + },
  15 + 'tokens': './sherpa-onnx-whisper-tiny.en/tiny.en-tokens.txt',
  16 + 'debug': 0,
  17 + }
  18 + };
  19 +
  20 + return sherpa_onnx.createOfflineRecognizer(config);
  21 +}
  22 +
  23 +function createVad() {
  24 + // please download silero_vad.onnx from
  25 + // https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx
  26 + const config = {
  27 + sileroVad: {
  28 + model: './silero_vad.onnx',
  29 + threshold: 0.5,
  30 + minSpeechDuration: 0.25,
  31 + minSilenceDuration: 0.5,
  32 + windowSize: 512,
  33 + },
  34 + sampleRate: 16000,
  35 + debug: true,
  36 + numThreads: 1,
  37 + bufferSizeInSeconds: 60,
  38 + };
  39 +
  40 + return sherpa_onnx.createVad(config);
  41 +}
  42 +
  43 +const recognizer = createRecognizer();
  44 +const vad = createVad();
  45 +
  46 +// please download ./Obama.wav from
  47 +// https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
  48 +const waveFilename = './Obama.wav';
  49 +const wave = sherpa_onnx.readWave(waveFilename);
  50 +
  51 +if (wave.sampleRate != recognizer.config.featConfig.sampleRate) {
  52 + throw new Error(
  53 + 'Expected sample rate: ${recognizer.config.featConfig.sampleRate}. Given: ${wave.sampleRate}');
  54 +}
  55 +
  56 +console.log('Started')
  57 +let start = Date.now();
  58 +
  59 +const windowSize = vad.config.sileroVad.windowSize;
  60 +for (let i = 0; i < wave.samples.length; i += windowSize) {
  61 + const thisWindow = wave.samples.subarray(i, i + windowSize);
  62 + vad.acceptWaveform(thisWindow);
  63 +
  64 + while (!vad.isEmpty()) {
  65 + const segment = vad.front();
  66 + vad.pop();
  67 +
  68 + let start_time = segment.start / wave.sampleRate;
  69 + let end_time = start_time + segment.samples.length / wave.sampleRate;
  70 +
  71 + start_time = start_time.toFixed(2);
  72 + end_time = end_time.toFixed(2);
  73 +
  74 + const stream = recognizer.createStream();
  75 + stream.acceptWaveform(wave.sampleRate, segment.samples);
  76 +
  77 + recognizer.decode(stream);
  78 + const r = recognizer.getResult(stream);
  79 + if (r.text.length > 0) {
  80 + const text = r.text.toLowerCase().trim();
  81 + console.log(`${start_time} -- ${end_time}: ${text}`);
  82 + }
  83 +
  84 + stream.free();
  85 + }
  86 +}
  87 +
  88 +vad.flush();
  89 +
  90 +while (!vad.isEmpty()) {
  91 + const segment = vad.front();
  92 + vad.pop();
  93 +
  94 + let start_time = segment.start / wave.sampleRate;
  95 + let end_time = start_time + segment.samples.length / wave.sampleRate;
  96 +
  97 + start_time = start_time.toFixed(2);
  98 + end_time = end_time.toFixed(2);
  99 +
  100 + const stream = recognizer.createStream();
  101 + stream.acceptWaveform(wave.sampleRate, segment.samples);
  102 +
  103 + recognizer.decode(stream);
  104 + const r = recognizer.getResult(stream);
  105 + if (r.text.length > 0) {
  106 + const text = r.text.toLowerCase().trim();
  107 + console.log(`${start_time} -- ${end_time}: ${text}`);
  108 + }
  109 +}
  110 +
  111 +let stop = Date.now();
  112 +console.log('Done')
  113 +
  114 +const elapsed_seconds = (stop - start) / 1000;
  115 +const duration = wave.samples.length / wave.sampleRate;
  116 +const real_time_factor = elapsed_seconds / duration;
  117 +console.log('Wave duration', duration.toFixed(3), 'seconds')
  118 +console.log('Elapsed', elapsed_seconds.toFixed(3), 'seconds')
  119 +console.log(
  120 + `RTF = ${elapsed_seconds.toFixed(3)}/${duration.toFixed(3)} =`,
  121 + real_time_factor.toFixed(3))
  122 +
  123 +vad.free();
  124 +recognizer.free();
1 node_modules 1 node_modules
2 jslint.mjs 2 jslint.mjs
  3 +sherpa-onnx-*.js
  4 +sherpa-onnx-*.wasm
@@ -4,6 +4,9 @@ @@ -4,6 +4,9 @@
4 const wasmModule = require('./sherpa-onnx-wasm-nodejs.js')(); 4 const wasmModule = require('./sherpa-onnx-wasm-nodejs.js')();
5 const sherpa_onnx_asr = require('./sherpa-onnx-asr.js'); 5 const sherpa_onnx_asr = require('./sherpa-onnx-asr.js');
6 const sherpa_onnx_tts = require('./sherpa-onnx-tts.js'); 6 const sherpa_onnx_tts = require('./sherpa-onnx-tts.js');
  7 +const sherpa_onnx_kws = require('./sherpa-onnx-kws.js');
  8 +const sherpa_onnx_wave = require('./sherpa-onnx-wave.js');
  9 +const sherpa_onnx_vad = require('./sherpa-onnx-vad.js');
7 10
8 function createOnlineRecognizer(config) { 11 function createOnlineRecognizer(config) {
9 return sherpa_onnx_asr.createOnlineRecognizer(wasmModule, config); 12 return sherpa_onnx_asr.createOnlineRecognizer(wasmModule, config);
@@ -17,10 +20,35 @@ function createOfflineTts(config) { @@ -17,10 +20,35 @@ function createOfflineTts(config) {
17 return sherpa_onnx_tts.createOfflineTts(wasmModule, config); 20 return sherpa_onnx_tts.createOfflineTts(wasmModule, config);
18 } 21 }
19 22
  23 +function createKws(config) {
  24 + return sherpa_onnx_kws.createKws(wasmModule, config);
  25 +}
  26 +
  27 +function createCircularBuffer(capacity) {
  28 + return new sherpa_onnx_vad.CircularBuffer(capacity, wasmModule);
  29 +}
  30 +
  31 +function createVad(config) {
  32 + return sherpa_onnx_vad.createVad(wasmModule, config);
  33 +}
  34 +
  35 +function readWave(filename) {
  36 + return sherpa_onnx_wave.readWave(filename, wasmModule);
  37 +}
  38 +
  39 +function writeWave(filename, data) {
  40 + sherpa_onnx_wave.writeWave(filename, data, wasmModule);
  41 +}
  42 +
20 // Note: online means streaming and offline means non-streaming here. 43 // Note: online means streaming and offline means non-streaming here.
21 // Both of them don't require internet connection. 44 // Both of them don't require internet connection.
22 module.exports = { 45 module.exports = {
23 createOnlineRecognizer, 46 createOnlineRecognizer,
24 createOfflineRecognizer, 47 createOfflineRecognizer,
25 createOfflineTts, 48 createOfflineTts,
  49 + createKws,
  50 + readWave,
  51 + writeWave,
  52 + createCircularBuffer,
  53 + createVad,
26 }; 54 };
@@ -546,7 +546,7 @@ function initSherpaOnnxOfflineWhisperModelConfig(config, Module) { @@ -546,7 +546,7 @@ function initSherpaOnnxOfflineWhisperModelConfig(config, Module) {
546 Module.setValue(ptr + 12, buffer + offset, 'i8*'); 546 Module.setValue(ptr + 12, buffer + offset, 'i8*');
547 offset += taskLen; 547 offset += taskLen;
548 548
549 - Module.setValue(ptr + 16, config.tailPaddings || -1, 'i32'); 549 + Module.setValue(ptr + 16, config.tailPaddings || 2000, 'i32');
550 550
551 return { 551 return {
552 buffer: buffer, ptr: ptr, len: len, 552 buffer: buffer, ptr: ptr, len: len,
@@ -69,13 +69,14 @@ function initModelConfig(config, Module) { @@ -69,13 +69,14 @@ function initModelConfig(config, Module) {
69 69
70 const len = transducer.len + paraformer_len + ctc_len + 7 * 4; 70 const len = transducer.len + paraformer_len + ctc_len + 7 * 4;
71 const ptr = Module._malloc(len); 71 const ptr = Module._malloc(len);
  72 + Module.HEAPU8.fill(0, ptr, ptr + len);
72 73
73 let offset = 0; 74 let offset = 0;
74 Module._CopyHeap(transducer.ptr, transducer.len, ptr + offset); 75 Module._CopyHeap(transducer.ptr, transducer.len, ptr + offset);
75 76
76 const tokensLen = Module.lengthBytesUTF8(config.tokens) + 1; 77 const tokensLen = Module.lengthBytesUTF8(config.tokens) + 1;
77 - const providerLen = Module.lengthBytesUTF8(config.provider) + 1;  
78 - const modelTypeLen = Module.lengthBytesUTF8(config.modelType) + 1; 78 + const providerLen = Module.lengthBytesUTF8(config.provider || 'cpu') + 1;
  79 + const modelTypeLen = Module.lengthBytesUTF8(config.modelType || '') + 1;
79 const modelingUnitLen = Module.lengthBytesUTF8(config.modelingUnit || '') + 1; 80 const modelingUnitLen = Module.lengthBytesUTF8(config.modelingUnit || '') + 1;
80 const bpeVocabLen = Module.lengthBytesUTF8(config.bpeVocab || '') + 1; 81 const bpeVocabLen = Module.lengthBytesUTF8(config.bpeVocab || '') + 1;
81 const bufferLen = 82 const bufferLen =
@@ -86,10 +87,10 @@ function initModelConfig(config, Module) { @@ -86,10 +87,10 @@ function initModelConfig(config, Module) {
86 Module.stringToUTF8(config.tokens, buffer, tokensLen); 87 Module.stringToUTF8(config.tokens, buffer, tokensLen);
87 offset += tokensLen; 88 offset += tokensLen;
88 89
89 - Module.stringToUTF8(config.provider, buffer + offset, providerLen); 90 + Module.stringToUTF8(config.provider || 'cpu', buffer + offset, providerLen);
90 offset += providerLen; 91 offset += providerLen;
91 92
92 - Module.stringToUTF8(config.modelType, buffer + offset, modelTypeLen); 93 + Module.stringToUTF8(config.modelType || '', buffer + offset, modelTypeLen);
93 offset += modelTypeLen; 94 offset += modelTypeLen;
94 95
95 Module.stringToUTF8( 96 Module.stringToUTF8(
@@ -103,7 +104,7 @@ function initModelConfig(config, Module) { @@ -103,7 +104,7 @@ function initModelConfig(config, Module) {
103 Module.setValue(ptr + offset, buffer, 'i8*'); // tokens 104 Module.setValue(ptr + offset, buffer, 'i8*'); // tokens
104 offset += 4; 105 offset += 4;
105 106
106 - Module.setValue(ptr + offset, config.numThreads, 'i32'); 107 + Module.setValue(ptr + offset, config.numThreads || 1, 'i32');
107 offset += 4; 108 offset += 4;
108 109
109 Module.setValue(ptr + offset, buffer + tokensLen, 'i8*'); // provider 110 Module.setValue(ptr + offset, buffer + tokensLen, 'i8*'); // provider
@@ -134,14 +135,21 @@ function initModelConfig(config, Module) { @@ -134,14 +135,21 @@ function initModelConfig(config, Module) {
134 135
135 function initFeatureExtractorConfig(config, Module) { 136 function initFeatureExtractorConfig(config, Module) {
136 let ptr = Module._malloc(4 * 2); 137 let ptr = Module._malloc(4 * 2);
137 - Module.setValue(ptr, config.samplingRate, 'i32');  
138 - Module.setValue(ptr + 4, config.featureDim, 'i32'); 138 + Module.setValue(ptr, config.samplingRate || 16000, 'i32');
  139 + Module.setValue(ptr + 4, config.featureDim || 80, 'i32');
139 return { 140 return {
140 ptr: ptr, len: 8, 141 ptr: ptr, len: 8,
141 } 142 }
142 } 143 }
143 144
144 function initKwsConfig(config, Module) { 145 function initKwsConfig(config, Module) {
  146 + if (!('featConfig' in config)) {
  147 + config.featConfig = {
  148 + sampleRate: 16000,
  149 + featureDim: 80,
  150 + };
  151 + }
  152 +
145 let featConfig = initFeatureExtractorConfig(config.featConfig, Module); 153 let featConfig = initFeatureExtractorConfig(config.featConfig, Module);
146 154
147 let modelConfig = initModelConfig(config.modelConfig, Module); 155 let modelConfig = initModelConfig(config.modelConfig, Module);
@@ -155,16 +163,16 @@ function initKwsConfig(config, Module) { @@ -155,16 +163,16 @@ function initKwsConfig(config, Module) {
155 Module._CopyHeap(modelConfig.ptr, modelConfig.len, ptr + offset) 163 Module._CopyHeap(modelConfig.ptr, modelConfig.len, ptr + offset)
156 offset += modelConfig.len; 164 offset += modelConfig.len;
157 165
158 - Module.setValue(ptr + offset, config.maxActivePaths, 'i32'); 166 + Module.setValue(ptr + offset, config.maxActivePaths || 4, 'i32');
159 offset += 4; 167 offset += 4;
160 168
161 - Module.setValue(ptr + offset, config.numTrailingBlanks, 'i32'); 169 + Module.setValue(ptr + offset, config.numTrailingBlanks || 1, 'i32');
162 offset += 4; 170 offset += 4;
163 171
164 - Module.setValue(ptr + offset, config.keywordsScore, 'float'); 172 + Module.setValue(ptr + offset, config.keywordsScore || 1.0, 'float');
165 offset += 4; 173 offset += 4;
166 174
167 - Module.setValue(ptr + offset, config.keywordsThreshold, 'float'); 175 + Module.setValue(ptr + offset, config.keywordsThreshold || 0.25, 'float');
168 offset += 4; 176 offset += 4;
169 177
170 let keywordsLen = Module.lengthBytesUTF8(config.keywords) + 1; 178 let keywordsLen = Module.lengthBytesUTF8(config.keywords) + 1;
@@ -49,6 +49,32 @@ set(exported_functions @@ -49,6 +49,32 @@ set(exported_functions
49 SherpaOnnxDestroyKeywordSpotter 49 SherpaOnnxDestroyKeywordSpotter
50 SherpaOnnxGetKeywordResult 50 SherpaOnnxGetKeywordResult
51 SherpaOnnxIsKeywordStreamReady 51 SherpaOnnxIsKeywordStreamReady
  52 + # VAD
  53 + SherpaOnnxCreateCircularBuffer
  54 + SherpaOnnxDestroyCircularBuffer
  55 + SherpaOnnxCircularBufferPush
  56 + SherpaOnnxCircularBufferGet
  57 + SherpaOnnxCircularBufferFree
  58 + SherpaOnnxCircularBufferPop
  59 + SherpaOnnxCircularBufferSize
  60 + SherpaOnnxCircularBufferHead
  61 + SherpaOnnxCircularBufferReset
  62 + SherpaOnnxCreateVoiceActivityDetector
  63 + SherpaOnnxDestroyVoiceActivityDetector
  64 + SherpaOnnxVoiceActivityDetectorAcceptWaveform
  65 + SherpaOnnxVoiceActivityDetectorEmpty
  66 + SherpaOnnxVoiceActivityDetectorDetected
  67 + SherpaOnnxVoiceActivityDetectorPop
  68 + SherpaOnnxVoiceActivityDetectorClear
  69 + SherpaOnnxVoiceActivityDetectorFront
  70 + SherpaOnnxDestroySpeechSegment
  71 + SherpaOnnxVoiceActivityDetectorReset
  72 + SherpaOnnxVoiceActivityDetectorFlush
  73 + #
  74 + SherpaOnnxFileExists
  75 + SherpaOnnxReadWave
  76 + SherpaOnnxFreeWave
  77 + SherpaOnnxWriteWave
52 ) 78 )
53 79
54 80
@@ -82,6 +108,8 @@ install( @@ -82,6 +108,8 @@ install(
82 ${CMAKE_SOURCE_DIR}/wasm/asr/sherpa-onnx-asr.js 108 ${CMAKE_SOURCE_DIR}/wasm/asr/sherpa-onnx-asr.js
83 ${CMAKE_SOURCE_DIR}/wasm/tts/sherpa-onnx-tts.js 109 ${CMAKE_SOURCE_DIR}/wasm/tts/sherpa-onnx-tts.js
84 ${CMAKE_SOURCE_DIR}/wasm/kws/sherpa-onnx-kws.js 110 ${CMAKE_SOURCE_DIR}/wasm/kws/sherpa-onnx-kws.js
  111 + ${CMAKE_SOURCE_DIR}/wasm/vad/sherpa-onnx-vad.js
  112 + ${CMAKE_SOURCE_DIR}/wasm/nodejs/sherpa-onnx-wave.js
85 "$<TARGET_FILE_DIR:sherpa-onnx-wasm-nodejs>/sherpa-onnx-wasm-nodejs.js" 113 "$<TARGET_FILE_DIR:sherpa-onnx-wasm-nodejs>/sherpa-onnx-wasm-nodejs.js"
86 "$<TARGET_FILE_DIR:sherpa-onnx-wasm-nodejs>/sherpa-onnx-wasm-nodejs.wasm" 114 "$<TARGET_FILE_DIR:sherpa-onnx-wasm-nodejs>/sherpa-onnx-wasm-nodejs.wasm"
87 DESTINATION 115 DESTINATION
  1 +// return an object
  2 +// {
  3 +// samples: a float32 array
  4 +// sampleRate: an integer
  5 +// }
  6 +function readWave(filename, Module) {
  7 + const filenameLen = Module.lengthBytesUTF8(filename) + 1;
  8 + const pFilename = Module._malloc(filenameLen);
  9 + Module.stringToUTF8(filename, pFilename, filenameLen);
  10 +
  11 + const w = Module._SherpaOnnxReadWave(pFilename);
  12 + Module._free(pFilename);
  13 +
  14 +
  15 + const samplesPtr = Module.HEAP32[w / 4] / 4;
  16 + const sampleRate = Module.HEAP32[w / 4 + 1];
  17 + const numSamples = Module.HEAP32[w / 4 + 2];
  18 +
  19 + const samples = new Float32Array(numSamples);
  20 + for (let i = 0; i < numSamples; i++) {
  21 + samples[i] = Module.HEAPF32[samplesPtr + i];
  22 + }
  23 +
  24 + Module._SherpaOnnxFreeWave(w);
  25 +
  26 +
  27 + return {samples: samples, sampleRate: sampleRate};
  28 +}
  29 +
  30 +// data is an object
  31 +// {
  32 +// samples: a float32 array
  33 +// sampleRate: an integer
  34 +// }
  35 +function writeWave(filename, data, Module) {
  36 + const pSamples =
  37 + Module._malloc(data.samples.length * data.samples.BYTES_PER_ELEMENT);
  38 + Module.HEAPF32.set(data.samples, pSamples / data.samples.BYTES_PER_ELEMENT);
  39 +
  40 + const filenameLen = Module.lengthBytesUTF8(filename) + 1;
  41 + const pFilename = Module._malloc(filenameLen);
  42 + Module.stringToUTF8(filename, pFilename, filenameLen);
  43 +
  44 + Module._SherpaOnnxWriteWave(
  45 + pSamples, data.samples.length, data.sampleRate, pFilename);
  46 +
  47 + Module._free(pFilename);
  48 + Module._free(pSamples);
  49 +}
  50 +
  51 +if (typeof process == 'object' && typeof process.versions == 'object' &&
  52 + typeof process.versions.node == 'string') {
  53 + module.exports = {
  54 + readWave,
  55 + writeWave,
  56 + };
  57 +}