Add WebAssembly for VAD (#1281)

Fangjun Kuang · GitHub
Commit 1ef8a7a20248103500f669c11dd35ff3df2adbe1 1ef8a7a2 1 parent fb09f8fa
.github/workflows/wasm-simd-hf-space-de-tts.yaml
.github/workflows/wasm-simd-hf-space-en-asr-zipformer.yaml
.github/workflows/wasm-simd-hf-space-en-tts.yaml
.github/workflows/wasm-simd-hf-space-silero-vad.yaml
.github/workflows/wasm-simd-hf-space-zh-cantonese-en-asr-paraformer.yaml
.github/workflows/wasm-simd-hf-space-zh-en-asr-paraformer.yaml
.github/workflows/wasm-simd-hf-space-zh-en-asr-zipformer.yaml
CMakeLists.txt
README.md
build-wasm-simd-asr.sh
build-wasm-simd-kws.sh
build-wasm-simd-vad.sh
scripts/node-addon-api/lib/vad.js
wasm/CMakeLists.txt
wasm/vad/CMakeLists.txt
wasm/vad/app-vad.js
wasm/vad/assets/README.md
wasm/vad/index.html
wasm/vad/sherpa-onnx-vad.js
wasm/vad/sherpa-onnx-wasm-main-vad.cc
--- a/.github/workflows/wasm-simd-hf-space-de-tts.yaml
查看文件 @1ef8a7a
+++ b/.github/workflows/wasm-simd-hf-space-de-tts.yaml
查看文件 @1ef8a7a
 name: wasm-simd-hf-space-de-tts
 
 on:
-   release:
-     types:
-       - published
+   push:
+     branches:
+       - wasm
+     tags:
+       - 'v[0-9]+.[0-9]+.[0-9]+*'
 
   workflow_dispatch:
 
@@ -71,6 +73,14 @@ jobs:
           name: sherpa-onnx-wasm-simd-de-tts
           path: ./sherpa-onnx-wasm-simd-*.tar.bz2
 
+       - name: Release
+         if: (github.repository_owner == 'csukuangfj' || github.repository_owner == 'k2-fsa') && github.event_name == 'push' && contains(github.ref, 'refs/tags/')
+         uses: svenstaro/upload-release-action@v2
+         with:
+           file_glob: true
+           overwrite: true
+           file: ./*.tar.bz2
+ 
       - name: Publish to ModelScope
         # if: false
         env:
--- a/.github/workflows/wasm-simd-hf-space-en-asr-zipformer.yaml
查看文件 @1ef8a7a
+++ b/.github/workflows/wasm-simd-hf-space-en-asr-zipformer.yaml
查看文件 @1ef8a7a
 name: wasm-simd-hf-space-en-asr-zipformer
 
 on:
-   release:
-     types:
-       - published
+   push:
+     branches:
+       - wasm
+     tags:
+       - 'v[0-9]+.[0-9]+.[0-9]+*'
 
   workflow_dispatch:
 
@@ -73,6 +75,14 @@ jobs:
           name: sherpa-onnx-wasm-simd-en-asr-zipformer
           path: ./sherpa-onnx-wasm-simd-*.tar.bz2
 
+       - name: Release
+         if: (github.repository_owner == 'csukuangfj' || github.repository_owner == 'k2-fsa') && github.event_name == 'push' && contains(github.ref, 'refs/tags/')
+         uses: svenstaro/upload-release-action@v2
+         with:
+           file_glob: true
+           overwrite: true
+           file: ./*.tar.bz2
+ 
       - name: Publish to ModelScope
         # if: false
         env:
--- a/.github/workflows/wasm-simd-hf-space-en-tts.yaml
查看文件 @1ef8a7a
+++ b/.github/workflows/wasm-simd-hf-space-en-tts.yaml
查看文件 @1ef8a7a
 name: wasm-simd-hf-space-en-tts
 
 on:
-   release:
-     types:
-       - published
+   push:
+     branches:
+       - wasm
+     tags:
+       - 'v[0-9]+.[0-9]+.[0-9]+*'
 
   workflow_dispatch:
 
@@ -69,6 +71,14 @@ jobs:
           name: sherpa-onnx-wasm-simd-en-tts
           path: ./sherpa-onnx-wasm-simd-*.tar.bz2
 
+       - name: Release
+         if: (github.repository_owner == 'csukuangfj' || github.repository_owner == 'k2-fsa') && github.event_name == 'push' && contains(github.ref, 'refs/tags/')
+         uses: svenstaro/upload-release-action@v2
+         with:
+           file_glob: true
+           overwrite: true
+           file: ./*.tar.bz2
+ 
       - name: Publish to ModelScope
         # if: false
         env:
--- a/.github/workflows/wasm-simd-hf-space-silero-vad.yaml 0 → 100644
查看文件 @1ef8a7a
+++ b/.github/workflows/wasm-simd-hf-space-silero-vad.yaml 0 → 100644
查看文件 @1ef8a7a
+ name: wasm-simd-hf-space-silero-vad
+ 
+ on:
+   push:
+     branches:
+       - wasm
+     tags:
+       - 'v[0-9]+.[0-9]+.[0-9]+*'
+ 
+   workflow_dispatch:
+ 
+ concurrency:
+   group: wasm-simd-hf-space-silero-vad-${{ github.ref }}
+   cancel-in-progress: true
+ 
+ jobs:
+   wasm-simd-hf-space-silero-vad:
+     runs-on: ${{ matrix.os }}
+     strategy:
+       fail-fast: false
+       matrix:
+         os: [ubuntu-latest]
+ 
+     steps:
+       - uses: actions/checkout@v4
+         with:
+           fetch-depth: 0
+       - name: Install emsdk
+         uses: mymindstorm/setup-emsdk@v14
+         with:
+           version: 3.1.51
+           actions-cache-folder: 'emsdk-cache'
+ 
+       - name: View emsdk version
+         shell: bash
+         run: |
+           emcc -v
+           echo "--------------------"
+           emcc --check
+ 
+       - name: Download model files
+         shell: bash
+         run: |
+           cd wasm/vad/assets
+           ls -lh
+           echo "----------"
+           curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx
+           ls -lh
+ 
+       - name: Build sherpa-onnx for WebAssembly
+         shell: bash
+         run: |
+           ./build-wasm-simd-vad.sh
+ 
+       - name: collect files
+         shell: bash
+         run: |
+           SHERPA_ONNX_VERSION=v$(grep "SHERPA_ONNX_VERSION" ./CMakeLists.txt  | cut -d " " -f 2  | cut -d '"' -f 2)
+ 
+           dst=sherpa-onnx-wasm-simd-${SHERPA_ONNX_VERSION}-vad
+           mv build-wasm-simd-vad/install/bin/wasm/vad $dst
+           ls -lh $dst
+           tar cjfv $dst.tar.bz2 ./$dst
+ 
+       - name: Upload wasm files
+         uses: actions/upload-artifact@v4
+         with:
+           name: sherpa-onnx-wasm-simd-vad
+           path: ./sherpa-onnx-wasm-simd-*.tar.bz2
+ 
+       - name: Release
+         if: (github.repository_owner == 'csukuangfj' || github.repository_owner == 'k2-fsa') && github.event_name == 'push' && contains(github.ref, 'refs/tags/')
+         uses: svenstaro/upload-release-action@v2
+         with:
+           file_glob: true
+           overwrite: true
+           file: ./*.tar.bz2
+ 
+       - name: Publish to ModelScope
+         # if: false
+         env:
+           MS_TOKEN: ${{ secrets.MODEL_SCOPE_GIT_TOKEN }}
+         uses: nick-fields/retry@v2
+         with:
+           max_attempts: 20
+           timeout_seconds: 200
+           shell: bash
+           command: |
+             SHERPA_ONNX_VERSION=v$(grep "SHERPA_ONNX_VERSION" ./CMakeLists.txt  | cut -d " " -f 2  | cut -d '"' -f 2)
+ 
+             git config --global user.email "csukuangfj@gmail.com"
+             git config --global user.name "Fangjun Kuang"
+ 
+             rm -rf ms
+             export GIT_LFS_SKIP_SMUDGE=1
+             export GIT_CLONE_PROTECTION_ACTIVE=false
+ 
+             git clone https://www.modelscope.cn/studios/csukuangfj/web-assembly-vad-sherpa-onnx.git ms
+             cd ms
+             rm -fv *.js
+             rm -fv *.data
+             git fetch
+             git pull
+             git merge -m "merge remote" --ff origin main
+ 
+             cp -v ../sherpa-onnx-wasm-simd-${SHERPA_ONNX_VERSION}-vad/* .
+ 
+             git status
+             git lfs track "*.data"
+             git lfs track "*.wasm"
+             ls -lh
+ 
+             git add .
+             git commit -m "update model"
+             git push https://oauth2:${MS_TOKEN}@www.modelscope.cn/studios/csukuangfj/web-assembly-vad-sherpa-onnx.git
+ 
+       - name: Publish to huggingface
+         env:
+           HF_TOKEN: ${{ secrets.HF_TOKEN }}
+         uses: nick-fields/retry@v2
+         with:
+           max_attempts: 20
+           timeout_seconds: 200
+           shell: bash
+           command: |
+             SHERPA_ONNX_VERSION=v$(grep "SHERPA_ONNX_VERSION" ./CMakeLists.txt  | cut -d " " -f 2  | cut -d '"' -f 2)
+ 
+             git config --global user.email "csukuangfj@gmail.com"
+             git config --global user.name "Fangjun Kuang"
+ 
+             rm -rf huggingface
+             export GIT_LFS_SKIP_SMUDGE=1
+             export GIT_CLONE_PROTECTION_ACTIVE=false
+ 
+             git clone https://huggingface.co/spaces/k2-fsa/web-assembly-vad-sherpa-onnx huggingface
+             cd huggingface
+             rm -fv *.js
+             rm -fv *.data
+             git fetch
+             git pull
+             git merge -m "merge remote" --ff origin main
+ 
+             cp -v ../sherpa-onnx-wasm-simd-${SHERPA_ONNX_VERSION}-vad/* .
+ 
+             git status
+             git lfs track "*.data"
+             git lfs track "*.wasm"
+             ls -lh
+ 
+             git add .
+             git commit -m "update model"
+             git push https://csukuangfj:$HF_TOKEN@huggingface.co/spaces/k2-fsa/web-assembly-vad-sherpa-onnx main
--- a/.github/workflows/wasm-simd-hf-space-zh-cantonese-en-asr-paraformer.yaml
查看文件 @1ef8a7a
+++ b/.github/workflows/wasm-simd-hf-space-zh-cantonese-en-asr-paraformer.yaml
查看文件 @1ef8a7a
 name: wasm-simd-hf-space-zh-cantonese-en-asr-paraformer
 
 on:
-   release:
-     types:
-       - published
+   push:
+     branches:
+       - wasm
+     tags:
+       - 'v[0-9]+.[0-9]+.[0-9]+*'
 
   workflow_dispatch:
 
@@ -80,6 +82,14 @@ jobs:
           name: sherpa-onnx-wasm-simd-zh-cantonese-en-asr-paraformer
           path: ./sherpa-onnx-wasm-simd-*.tar.bz2
 
+       - name: Release
+         if: (github.repository_owner == 'csukuangfj' || github.repository_owner == 'k2-fsa') && github.event_name == 'push' && contains(github.ref, 'refs/tags/')
+         uses: svenstaro/upload-release-action@v2
+         with:
+           file_glob: true
+           overwrite: true
+           file: ./*.tar.bz2
+ 
       - name: Publish to huggingface
         env:
           HF_TOKEN: ${{ secrets.HF_TOKEN }}
--- a/.github/workflows/wasm-simd-hf-space-zh-en-asr-paraformer.yaml
查看文件 @1ef8a7a
+++ b/.github/workflows/wasm-simd-hf-space-zh-en-asr-paraformer.yaml
查看文件 @1ef8a7a
 name: wasm-simd-hf-space-zh-en-asr-paraformer
 
 on:
-   release:
-     types:
-       - published
+   push:
+     branches:
+       - wasm
+     tags:
+       - 'v[0-9]+.[0-9]+.[0-9]+*'
 
   workflow_dispatch:
 
@@ -80,6 +82,14 @@ jobs:
           name: sherpa-onnx-wasm-simd-zh-en-asr-paraformer
           path: ./sherpa-onnx-wasm-simd-*.tar.bz2
 
+       - name: Release
+         if: (github.repository_owner == 'csukuangfj' || github.repository_owner == 'k2-fsa') && github.event_name == 'push' && contains(github.ref, 'refs/tags/')
+         uses: svenstaro/upload-release-action@v2
+         with:
+           file_glob: true
+           overwrite: true
+           file: ./*.tar.bz2
+ 
       - name: Publish to ModelScope
         # if: false
         env:
--- a/.github/workflows/wasm-simd-hf-space-zh-en-asr-zipformer.yaml
查看文件 @1ef8a7a
+++ b/.github/workflows/wasm-simd-hf-space-zh-en-asr-zipformer.yaml
查看文件 @1ef8a7a
 name: wasm-simd-hf-space-zh-en-asr-zipformer
 
 on:
-   release:
-     types:
-       - published
+   push:
+     branches:
+       - wasm
+     tags:
+       - 'v[0-9]+.[0-9]+.[0-9]+*'
 
   workflow_dispatch:
 
@@ -71,6 +73,14 @@ jobs:
           name: sherpa-onnx-wasm-simd-zh-en-asr-zipformer
           path: ./sherpa-onnx-wasm-simd-*.tar.bz2
 
+       - name: Release
+         if: (github.repository_owner == 'csukuangfj' || github.repository_owner == 'k2-fsa') && github.event_name == 'push' && contains(github.ref, 'refs/tags/')
+         uses: svenstaro/upload-release-action@v2
+         with:
+           file_glob: true
+           overwrite: true
+           file: ./*.tar.bz2
+ 
       - name: Publish to ModelScope
         # if: false
         env:
--- a/CMakeLists.txt
查看文件 @1ef8a7a
+++ b/CMakeLists.txt
查看文件 @1ef8a7a
@@ -35,6 +35,7 @@ option(SHERPA_ONNX_ENABLE_WASM "Whether to enable WASM" OFF)
 option(SHERPA_ONNX_ENABLE_WASM_TTS "Whether to enable WASM for TTS" OFF)
 option(SHERPA_ONNX_ENABLE_WASM_ASR "Whether to enable WASM for ASR" OFF)
 option(SHERPA_ONNX_ENABLE_WASM_KWS "Whether to enable WASM for KWS" OFF)
+ option(SHERPA_ONNX_ENABLE_WASM_VAD "Whether to enable WASM for VAD" OFF)
 option(SHERPA_ONNX_ENABLE_WASM_NODEJS "Whether to enable WASM for NodeJS" OFF)
 option(SHERPA_ONNX_ENABLE_BINARY "Whether to build binaries" ON)
 option(SHERPA_ONNX_ENABLE_TTS "Whether to build TTS related code" ON)
@@ -135,6 +136,7 @@ message(STATUS "SHERPA_ONNX_ENABLE_WASM ${SHERPA_ONNX_ENABLE_WASM}")
 message(STATUS "SHERPA_ONNX_ENABLE_WASM_TTS ${SHERPA_ONNX_ENABLE_WASM_TTS}")
 message(STATUS "SHERPA_ONNX_ENABLE_WASM_ASR ${SHERPA_ONNX_ENABLE_WASM_ASR}")
 message(STATUS "SHERPA_ONNX_ENABLE_WASM_KWS ${SHERPA_ONNX_ENABLE_WASM_KWS}")
+ message(STATUS "SHERPA_ONNX_ENABLE_WASM_VAD ${SHERPA_ONNX_ENABLE_WASM_VAD}")
 message(STATUS "SHERPA_ONNX_ENABLE_WASM_NODEJS ${SHERPA_ONNX_ENABLE_WASM_NODEJS}")
 message(STATUS "SHERPA_ONNX_ENABLE_BINARY ${SHERPA_ONNX_ENABLE_BINARY}")
 message(STATUS "SHERPA_ONNX_ENABLE_TTS ${SHERPA_ONNX_ENABLE_TTS}")
@@ -212,6 +214,10 @@ if(SHERPA_ONNX_ENABLE_WASM_KWS)
   add_definitions(-DSHERPA_ONNX_ENABLE_WASM_KWS=1)
 endif()
 
+ if(SHERPA_ONNX_ENABLE_WASM_VAD)
+   add_definitions(-DSHERPA_ONNX_ENABLE_WASM_VAD=1)
+ endif()
+ 
 if(NOT CMAKE_CXX_STANDARD)
   set(CMAKE_CXX_STANDARD 17 CACHE STRING "The C++ version to be used.")
 endif()
--- a/README.md
查看文件 @1ef8a7a
+++ b/README.md
查看文件 @1ef8a7a
@@ -76,6 +76,32 @@ with the following APIs
   - Swift, Rust
   - Dart, Object Pascal
 
+ ### Links for Huggingface Spaces
+ 
+ You can visit the following Huggingface spaces to try `sherpa-onnx` without
+ installing anything. All you need is a browser.
+ 
+ | Description | URL |
+ |---|---|
+ | Speech recognition | [Click me](https://huggingface.co/spaces/k2-fsa/automatic-speech-recognition)|
+ | Speech recognition with [Whisper](https://github.com/openai/whisper)| [Click me](https://huggingface.co/spaces/k2-fsa/automatic-speech-recognition-with-whisper)|
+ | Speech synthesis | [Click me](https://huggingface.co/spaces/k2-fsa/text-to-speech)|
+ | Generate subtitles| [Click me](https://huggingface.co/spaces/k2-fsa/generate-subtitles-for-videos)|
+ |Audio tagging| [Click me](https://huggingface.co/spaces/k2-fsa/audio-tagging)|
+ |Spoken language identification with [Whisper](https://github.com/openai/whisper)|[Click me](https://huggingface.co/spaces/k2-fsa/spoken-language-identification)|
+ 
+ We also have spaces built using WebAssembly. The are listed below:
+ 
+ | Description | URL| Chinese users|
+ |---|---|---|
+ |Voice activity detection with [silero-vad](https://github.com/snakers4/silero-vad)| [Click me](https://huggingface.co/spaces/k2-fsa/web-assembly-vad-sherpa-onnx)|[地址](https://modelscope.cn/studios/csukuangfj/web-assembly-vad-sherpa-onnx)|
+ |Real-time speech recognition (Chinese + English) with Zipformer | [Click me](https://huggingface.co/spaces/k2-fsa/web-assembly-asr-sherpa-onnx-zh-en)|[地址](https://modelscope.cn/studios/k2-fsa/web-assembly-asr-sherpa-onnx-zh-en)|
+ |Real-time speech recognition (Chinese + English) with Paraformer|[Click me](https://huggingface.co/spaces/k2-fsa/web-assembly-asr-sherpa-onnx-zh-en-paraformer)| [地址](https://modelscope.cn/studios/k2-fsa/web-assembly-asr-sherpa-onnx-zh-en-paraformer)|
+ |Real-time speech recognition (Chinese + English + Cantonese) with Paraformer|[Click me](https://huggingface.co/spaces/k2-fsa/web-assembly-asr-sherpa-onnx-zh-cantonese-en-paraformer)| [地址](https://modelscope.cn/studios/k2-fsa/web-assembly-asr-sherpa-onnx-zh-cantonese-en-paraformer)|
+ |Real-time speech recognition (English) |[Click me](https://huggingface.co/spaces/k2-fsa/web-assembly-asr-sherpa-onnx-en)|[地址](https://modelscope.cn/studios/k2-fsa/web-assembly-asr-sherpa-onnx-en)|
+ |Speech synthesis (English) |[Click me](https://huggingface.co/spaces/k2-fsa/web-assembly-tts-sherpa-onnx-en)| [地址](https://modelscope.cn/studios/k2-fsa/web-assembly-tts-sherpa-onnx-en)|
+ |Speech synthesis (German)|[Click me](https://huggingface.co/spaces/k2-fsa/web-assembly-tts-sherpa-onnx-de)| [地址](https://modelscope.cn/studios/k2-fsa/web-assembly-tts-sherpa-onnx-de)|
+ 
 ### Links for pre-built Android APKs
 
 | Description                    | URL                                                                                     | 中国用户                                                                             |
@@ -130,7 +156,7 @@ with the following APIs
 | Keyword spotting |[Address](https://github.com/k2-fsa/sherpa-onnx/releases/tag/kws-models)|
 | Audio tagging                  | [Address](https://github.com/k2-fsa/sherpa-onnx/releases/tag/audio-tagging-models)|
 | Speaker identification (Speaker ID)         | [Address](https://github.com/k2-fsa/sherpa-onnx/releases/tag/speaker-recongition-models)|
- | Spoken language identification (Language ID) | See multi-lingual Whisper ASR models from  [Speech recognition](https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models) |
+ | Spoken language identification (Language ID) | See multi-lingual [Whisper](https://github.com/openai/whisper) ASR models from  [Speech recognition](https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models) |
 | Punctuation| [Address](https://github.com/k2-fsa/sherpa-onnx/releases/tag/punctuation-models)|
 
 ### Useful links
--- a/build-wasm-simd-asr.sh
查看文件 @1ef8a7a
+++ b/build-wasm-simd-asr.sh
查看文件 @1ef8a7a
@@ -48,6 +48,7 @@ cmake \
   -DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \
   -DSHERPA_ONNX_ENABLE_JNI=OFF \
   -DSHERPA_ONNX_ENABLE_C_API=ON \
+   -DSHERPA_ONNX_ENABLE_TTS=OFF \
   -DSHERPA_ONNX_ENABLE_WEBSOCKET=OFF \
   -DSHERPA_ONNX_ENABLE_GPU=OFF \
   -DSHERPA_ONNX_ENABLE_WASM=ON \
--- a/build-wasm-simd-kws.sh
查看文件 @1ef8a7a
+++ b/build-wasm-simd-kws.sh
查看文件 @1ef8a7a
@@ -43,6 +43,7 @@ cmake \
   -DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \
   -DSHERPA_ONNX_ENABLE_JNI=OFF \
   -DSHERPA_ONNX_ENABLE_C_API=ON \
+   -DSHERPA_ONNX_ENABLE_TTS=OFF \
   -DSHERPA_ONNX_ENABLE_WEBSOCKET=OFF \
   -DSHERPA_ONNX_ENABLE_GPU=OFF \
   -DSHERPA_ONNX_ENABLE_WASM=ON \
--- a/build-wasm-simd-vad.sh 0 → 100755
查看文件 @1ef8a7a
+++ b/build-wasm-simd-vad.sh 0 → 100755
查看文件 @1ef8a7a
+ #!/usr/bin/env bash
+ # Copyright (c)  2024  Xiaomi Corporation
+ #
+ # This script is to build sherpa-onnx for WebAssembly (VAD)
+ 
+ set -ex
+ 
+ if [ x"$EMSCRIPTEN" == x"" ]; then
+   if ! command -v emcc &> /dev/null; then
+     echo "Please install emscripten first"
+     echo ""
+     echo "You can use the following commands to install it:"
+     echo ""
+     echo "git clone https://github.com/emscripten-core/emsdk.git"
+     echo "cd emsdk"
+     echo "git pull"
+     echo "./emsdk install latest"
+     echo "./emsdk activate latest"
+     echo "source ./emsdk_env.sh"
+     exit 1
+   else
+     EMSCRIPTEN=$(dirname $(realpath $(which emcc)))
+   fi
+ fi
+ 
+ export EMSCRIPTEN=$EMSCRIPTEN
+ echo "EMSCRIPTEN: $EMSCRIPTEN"
+ if [ ! -f $EMSCRIPTEN/cmake/Modules/Platform/Emscripten.cmake ]; then
+   echo "Cannot find $EMSCRIPTEN/cmake/Modules/Platform/Emscripten.cmake"
+   echo "Please make sure you have installed emsdk correctly"
+   exit 1
+ fi
+ 
+ mkdir -p build-wasm-simd-vad
+ pushd build-wasm-simd-vad
+ 
+ export SHERPA_ONNX_IS_USING_BUILD_WASM_SH=ON
+ 
+ cmake \
+   -DCMAKE_INSTALL_PREFIX=./install \
+   -DCMAKE_BUILD_TYPE=Release \
+   -DCMAKE_TOOLCHAIN_FILE=$EMSCRIPTEN/cmake/Modules/Platform/Emscripten.cmake \
+   \
+   -DSHERPA_ONNX_ENABLE_PYTHON=OFF \
+   -DSHERPA_ONNX_ENABLE_TESTS=OFF \
+   -DSHERPA_ONNX_ENABLE_CHECK=OFF \
+   -DBUILD_SHARED_LIBS=OFF \
+   -DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \
+   -DSHERPA_ONNX_ENABLE_JNI=OFF \
+   -DSHERPA_ONNX_ENABLE_TTS=OFF \
+   -DSHERPA_ONNX_ENABLE_C_API=ON \
+   -DSHERPA_ONNX_ENABLE_WEBSOCKET=OFF \
+   -DSHERPA_ONNX_ENABLE_GPU=OFF \
+   -DSHERPA_ONNX_ENABLE_WASM=ON \
+   -DSHERPA_ONNX_ENABLE_WASM_VAD=ON \
+   -DSHERPA_ONNX_ENABLE_BINARY=OFF \
+   -DSHERPA_ONNX_LINK_LIBSTDCPP_STATICALLY=OFF \
+   ..
+ make -j2
+ make install
+ 
+ ls -lh install/bin/wasm/vad
--- a/scripts/node-addon-api/lib/vad.js
查看文件 @1ef8a7a
+++ b/scripts/node-addon-api/lib/vad.js
查看文件 @1ef8a7a
@@ -71,7 +71,7 @@ config = {
   /*
 {
   samples: a 1-d float32 array,
-   start: a int32
+   start: an int32
 }
    */
   front(enableExternalBuffer = true) {
--- a/wasm/CMakeLists.txt
查看文件 @1ef8a7a
+++ b/wasm/CMakeLists.txt
查看文件 @1ef8a7a
@@ -10,6 +10,10 @@ if(SHERPA_ONNX_ENABLE_WASM_KWS)
   add_subdirectory(kws)
 endif()
 
+ if(SHERPA_ONNX_ENABLE_WASM_VAD)
+   add_subdirectory(vad)
+ endif()
+ 
 if(SHERPA_ONNX_ENABLE_WASM_NODEJS)
   add_subdirectory(nodejs)
 endif()
--- a/wasm/vad/CMakeLists.txt 0 → 100644
查看文件 @1ef8a7a
+++ b/wasm/vad/CMakeLists.txt 0 → 100644
查看文件 @1ef8a7a
+ if(NOT $ENV{SHERPA_ONNX_IS_USING_BUILD_WASM_SH})
+   message(FATAL_ERROR "Please use ./build-wasm-simd-vad.sh to build for wasm VAD")
+ endif()
+ 
+ if(NOT EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/assets/silero_vad.onnx")
+   message(FATAL_ERROR "Please read ${CMAKE_CURRENT_SOURCE_DIR}/assets/README.md before you continue")
+ endif()
+ 
+ set(exported_functions
+   MyPrint
+   # VAD
+   SherpaOnnxCreateCircularBuffer
+   SherpaOnnxDestroyCircularBuffer
+   SherpaOnnxCircularBufferPush
+   SherpaOnnxCircularBufferGet
+   SherpaOnnxCircularBufferFree
+   SherpaOnnxCircularBufferPop
+   SherpaOnnxCircularBufferSize
+   SherpaOnnxCircularBufferHead
+   SherpaOnnxCircularBufferReset
+   SherpaOnnxCreateVoiceActivityDetector
+   SherpaOnnxDestroyVoiceActivityDetector
+   SherpaOnnxVoiceActivityDetectorAcceptWaveform
+   SherpaOnnxVoiceActivityDetectorEmpty
+   SherpaOnnxVoiceActivityDetectorDetected
+   SherpaOnnxVoiceActivityDetectorPop
+   SherpaOnnxVoiceActivityDetectorClear
+   SherpaOnnxVoiceActivityDetectorFront
+   SherpaOnnxDestroySpeechSegment
+   SherpaOnnxVoiceActivityDetectorReset
+   SherpaOnnxVoiceActivityDetectorFlush
+   #
+ )
+ set(mangled_exported_functions)
+ foreach(x IN LISTS exported_functions)
+   list(APPEND mangled_exported_functions "_${x}")
+ endforeach()
+ list(JOIN mangled_exported_functions "," all_exported_functions)
+ 
+ include_directories(${CMAKE_SOURCE_DIR})
+ set(MY_FLAGS " -s FORCE_FILESYSTEM=1 -s INITIAL_MEMORY=64MB -s ALLOW_MEMORY_GROWTH=1")
+ string(APPEND MY_FLAGS " -sSTACK_SIZE=10485760 ") # 10MB
+ string(APPEND MY_FLAGS " -sEXPORTED_FUNCTIONS=[_CopyHeap,_malloc,_free,${all_exported_functions}] ")
+ string(APPEND MY_FLAGS "--preload-file ${CMAKE_CURRENT_SOURCE_DIR}/assets@. ")
+ string(APPEND MY_FLAGS " -sEXPORTED_RUNTIME_METHODS=['ccall','stringToUTF8','setValue','getValue','lengthBytesUTF8','UTF8ToString'] ")
+ 
+ message(STATUS "MY_FLAGS: ${MY_FLAGS}")
+ 
+ set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${MY_FLAGS}")
+ set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${MY_FLAGS}")
+ set(CMAKE_EXECUTBLE_LINKER_FLAGS "${CMAKE_EXECUTBLE_LINKER_FLAGS} ${MY_FLAGS}")
+ 
+ if (NOT CMAKE_EXECUTABLE_SUFFIX STREQUAL ".js")
+   message(FATAL_ERROR "The default suffix for building executables should be .js!")
+ endif()
+ # set(CMAKE_EXECUTABLE_SUFFIX ".html")
+ 
+ add_executable(sherpa-onnx-wasm-main-vad sherpa-onnx-wasm-main-vad.cc)
+ target_link_libraries(sherpa-onnx-wasm-main-vad sherpa-onnx-c-api)
+ install(TARGETS sherpa-onnx-wasm-main-vad DESTINATION bin/wasm/vad)
+ 
+ install(
+   FILES
+     "$<TARGET_FILE_DIR:sherpa-onnx-wasm-main-vad>/sherpa-onnx-wasm-main-vad.js"
+     "index.html"
+     "sherpa-onnx-vad.js"
+     "app-vad.js"
+     "$<TARGET_FILE_DIR:sherpa-onnx-wasm-main-vad>/sherpa-onnx-wasm-main-vad.wasm"
+     "$<TARGET_FILE_DIR:sherpa-onnx-wasm-main-vad>/sherpa-onnx-wasm-main-vad.data"
+   DESTINATION
+     bin/wasm/vad
+ )
--- a/wasm/vad/app-vad.js 0 → 100644
查看文件 @1ef8a7a
+++ b/wasm/vad/app-vad.js 0 → 100644
查看文件 @1ef8a7a
+ // This file copies and modifies code
+ // from https://mdn.github.io/web-dictaphone/scripts/app.js
+ // and https://gist.github.com/meziantou/edb7217fddfbb70e899e
+ 
+ const startBtn = document.getElementById('startBtn');
+ const stopBtn = document.getElementById('stopBtn');
+ const clearBtn = document.getElementById('clearBtn');
+ const hint = document.getElementById('hint');
+ const soundClips = document.getElementById('sound-clips');
+ 
+ let textArea = document.getElementById('results');
+ 
+ let lastResult = '';
+ let resultList = [];
+ 
+ clearBtn.onclick = function() {
+   resultList = [];
+   textArea.value = getDisplayResult();
+   textArea.scrollTop = textArea.scrollHeight;  // auto scroll
+ };
+ 
+ function getDisplayResult() {
+   let i = 0;
+   let ans = '';
+   for (let s in resultList) {
+     if (resultList[s] == '') {
+       continue;
+     }
+ 
+     if (resultList[s] == 'Speech detected') {
+       ans += '' + i + ': ' + resultList[s];
+       i += 1;
+     } else {
+       ans += ', ' + resultList[s] + '\n';
+     }
+   }
+ 
+   if (lastResult.length > 0) {
+     ans += '' + i + ': ' + lastResult + '\n';
+   }
+   return ans;
+ }
+ 
+ 
+ Module = {};
+ Module.onRuntimeInitialized = function() {
+   console.log('inited!');
+   hint.innerText = 'Model loaded! Please click start';
+ 
+   startBtn.disabled = false;
+ 
+   vad = createVad(Module);
+   console.log('vad is created!', vad);
+ 
+   buffer = new CircularBuffer(30 * 16000, Module);
+   console.log('CircularBuffer is created!', buffer);
+ };
+ 
+ let audioCtx;
+ let mediaStream;
+ 
+ let expectedSampleRate = 16000;
+ let recordSampleRate;  // the sampleRate of the microphone
+ let recorder = null;   // the microphone
+ let leftchannel = [];  // TODO: Use a single channel
+ 
+ let recordingLength = 0;  // number of samples so far
+ 
+ let vad = null;
+ let buffer = null;
+ let printed = false;
+ 
+ if (navigator.mediaDevices.getUserMedia) {
+   console.log('getUserMedia supported.');
+ 
+   // see https://w3c.github.io/mediacapture-main/#dom-mediadevices-getusermedia
+   const constraints = {audio: true};
+ 
+   let onSuccess = function(stream) {
+     if (!audioCtx) {
+       audioCtx = new AudioContext({sampleRate: expectedSampleRate});
+     }
+     console.log(audioCtx);
+     recordSampleRate = audioCtx.sampleRate;
+     console.log('sample rate ' + recordSampleRate);
+ 
+     // creates an audio node from the microphone incoming stream
+     mediaStream = audioCtx.createMediaStreamSource(stream);
+     console.log('media stream', mediaStream);
+ 
+     // https://developer.mozilla.org/en-US/docs/Web/API/AudioContext/createScriptProcessor
+     // bufferSize: the onaudioprocess event is called when the buffer is full
+     var bufferSize = 4096;
+     var numberOfInputChannels = 1;
+     var numberOfOutputChannels = 2;
+     if (audioCtx.createScriptProcessor) {
+       recorder = audioCtx.createScriptProcessor(
+           bufferSize, numberOfInputChannels, numberOfOutputChannels);
+     } else {
+       recorder = audioCtx.createJavaScriptNode(
+           bufferSize, numberOfInputChannels, numberOfOutputChannels);
+     }
+     console.log('recorder', recorder);
+ 
+     recorder.onaudioprocess = function(e) {
+       let samples = new Float32Array(e.inputBuffer.getChannelData(0))
+       samples = downsampleBuffer(samples, expectedSampleRate);
+       buffer.push(samples);
+       while (buffer.size() > vad.config.sileroVad.windowSize) {
+         const s = buffer.get(buffer.head(), vad.config.sileroVad.windowSize);
+         vad.acceptWaveform(s);
+         buffer.pop(vad.config.sileroVad.windowSize);
+ 
+         if (vad.isDetected() && !printed) {
+           printed = true;
+           lastResult = 'Speech detected';
+         }
+ 
+         if (!vad.isDetected()) {
+           printed = false;
+           if (lastResult != '') {
+             resultList.push(lastResult);
+           }
+           lastResult = '';
+         }
+ 
+         while (!vad.isEmpty()) {
+           const segment = vad.front();
+           const duration = segment.samples.length / expectedSampleRate;
+           const durationStr = `Duration: ${duration.toFixed(3)} seconds`;
+           resultList.push(durationStr);
+           vad.pop();
+ 
+           // now save the segment to a wav file
+           let buf = new Int16Array(segment.samples.length);
+           for (var i = 0; i < segment.samples.length; ++i) {
+             let s = segment.samples[i];
+             if (s >= 1)
+               s = 1;
+             else if (s <= -1)
+               s = -1;
+ 
+             buf[i] = s * 32767;
+           }
+ 
+           let clipName = new Date().toISOString() + '--' + durationStr;
+ 
+           const clipContainer = document.createElement('article');
+           const clipLabel = document.createElement('p');
+           const audio = document.createElement('audio');
+           const deleteButton = document.createElement('button');
+ 
+           clipContainer.classList.add('clip');
+           audio.setAttribute('controls', '');
+           deleteButton.textContent = 'Delete';
+           deleteButton.className = 'delete';
+ 
+           clipLabel.textContent = clipName;
+ 
+           clipContainer.appendChild(audio);
+ 
+           clipContainer.appendChild(clipLabel);
+           clipContainer.appendChild(deleteButton);
+           soundClips.appendChild(clipContainer);
+ 
+           audio.controls = true;
+           const blob = toWav(buf);
+ 
+           leftchannel = [];
+           const audioURL = window.URL.createObjectURL(blob);
+           audio.src = audioURL;
+ 
+           deleteButton.onclick = function(e) {
+             let evtTgt = e.target;
+             evtTgt.parentNode.parentNode.removeChild(evtTgt.parentNode);
+           };
+ 
+           clipLabel.onclick = function() {
+             const existingName = clipLabel.textContent;
+             const newClipName = prompt('Enter a new name for your sound clip?');
+             if (newClipName === null) {
+               clipLabel.textContent = existingName;
+             } else {
+               clipLabel.textContent = newClipName;
+             }
+           };
+         }
+       }
+ 
+       textArea.value = getDisplayResult();
+       textArea.scrollTop = textArea.scrollHeight;  // auto scroll
+     };
+ 
+     startBtn.onclick = function() {
+       mediaStream.connect(recorder);
+       recorder.connect(audioCtx.destination);
+ 
+       console.log('recorder started');
+ 
+       stopBtn.disabled = false;
+       startBtn.disabled = true;
+     };
+ 
+     stopBtn.onclick = function() {
+       vad.reset();
+       buffer.reset();
+       console.log('recorder stopped');
+ 
+       // stopBtn recording
+       recorder.disconnect(audioCtx.destination);
+       mediaStream.disconnect(recorder);
+ 
+       startBtn.style.background = '';
+       startBtn.style.color = '';
+       // mediaRecorder.requestData();
+ 
+       stopBtn.disabled = true;
+       startBtn.disabled = false;
+     };
+   };
+ 
+   let onError = function(err) {
+     console.log('The following error occured: ' + err);
+   };
+ 
+   navigator.mediaDevices.getUserMedia(constraints).then(onSuccess, onError);
+ } else {
+   console.log('getUserMedia not supported on your browser!');
+   alert('getUserMedia not supported on your browser!');
+ }
+ 
+ 
+ // this function is copied/modified from
+ // https://gist.github.com/meziantou/edb7217fddfbb70e899e
+ function flatten(listOfSamples) {
+   let n = 0;
+   for (let i = 0; i < listOfSamples.length; ++i) {
+     n += listOfSamples[i].length;
+   }
+   let ans = new Int16Array(n);
+ 
+   let offset = 0;
+   for (let i = 0; i < listOfSamples.length; ++i) {
+     ans.set(listOfSamples[i], offset);
+     offset += listOfSamples[i].length;
+   }
+   return ans;
+ }
+ 
+ // this function is copied/modified from
+ // https://gist.github.com/meziantou/edb7217fddfbb70e899e
+ function toWav(samples) {
+   let buf = new ArrayBuffer(44 + samples.length * 2);
+   var view = new DataView(buf);
+ 
+   // http://soundfile.sapp.org/doc/WaveFormat/
+   //                   F F I R
+   view.setUint32(0, 0x46464952, true);               // chunkID
+   view.setUint32(4, 36 + samples.length * 2, true);  // chunkSize
+   //                   E V A W
+   view.setUint32(8, 0x45564157, true);  // format
+                                         //
+   //                      t m f
+   view.setUint32(12, 0x20746d66, true);          // subchunk1ID
+   view.setUint32(16, 16, true);                  // subchunk1Size, 16 for PCM
+   view.setUint32(20, 1, true);                   // audioFormat, 1 for PCM
+   view.setUint16(22, 1, true);                   // numChannels: 1 channel
+   view.setUint32(24, expectedSampleRate, true);  // sampleRate
+   view.setUint32(28, expectedSampleRate * 2, true);  // byteRate
+   view.setUint16(32, 2, true);                       // blockAlign
+   view.setUint16(34, 16, true);                      // bitsPerSample
+   view.setUint32(36, 0x61746164, true);              // Subchunk2ID
+   view.setUint32(40, samples.length * 2, true);      // subchunk2Size
+ 
+   let offset = 44;
+   for (let i = 0; i < samples.length; ++i) {
+     view.setInt16(offset, samples[i], true);
+     offset += 2;
+   }
+ 
+   return new Blob([view], {type: 'audio/wav'});
+ }
+ 
+ // this function is copied from
+ // https://github.com/awslabs/aws-lex-browser-audio-capture/blob/master/lib/worker.js#L46
+ function downsampleBuffer(buffer, exportSampleRate) {
+   if (exportSampleRate === recordSampleRate) {
+     return buffer;
+   }
+   var sampleRateRatio = recordSampleRate / exportSampleRate;
+   var newLength = Math.round(buffer.length / sampleRateRatio);
+   var result = new Float32Array(newLength);
+   var offsetResult = 0;
+   var offsetBuffer = 0;
+   while (offsetResult < result.length) {
+     var nextOffsetBuffer = Math.round((offsetResult + 1) * sampleRateRatio);
+     var accum = 0, count = 0;
+     for (var i = offsetBuffer; i < nextOffsetBuffer && i < buffer.length; i++) {
+       accum += buffer[i];
+       count++;
+     }
+     result[offsetResult] = accum / count;
+     offsetResult++;
+     offsetBuffer = nextOffsetBuffer;
+   }
+   return result;
+ };
--- a/wasm/vad/assets/README.md 0 → 100644
查看文件 @1ef8a7a
+++ b/wasm/vad/assets/README.md 0 → 100644
查看文件 @1ef8a7a
+ # Introduction
+ 
+ Please download
+ https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx
+ and put `silero_vad.onnx` into the current directory, i.e., `wasm/vad/assets`.
--- a/wasm/vad/index.html 0 → 100644
查看文件 @1ef8a7a
+++ b/wasm/vad/index.html 0 → 100644
查看文件 @1ef8a7a
+ <html lang="en">
+ 
+ <head>
+   <meta charset="utf-8">
+   <meta name="viewport" content="width=device-width" />
+   <title>Next-gen Kaldi WebAssembly with sherpa-onnx for Text-to-speech</title>
+   <style>
+     h1,div {
+       text-align: center;
+     }
+     textarea {
+       width:100%;
+     }
+   </style>
+ </head>
+ 
+ <body>
+   <h1>
+     Next-gen Kaldi + WebAssembly<br/>
+     VAD Demo with <a href="https://github.com/k2-fsa/sherpa-onnx">sherpa-onnx</a><br/>
+     (with <a href="https://github.com/snakers4/silero-vad">silero-vad</a>)
+   </h1>
+ 
+   <div>
+     <span id="hint">Loading model ... ...</span>
+     <br/>
+     <br/>
+     <button id="startBtn" disabled>Start</button>
+     <button id="stopBtn" disabled>Stop</button>
+     <button id="clearBtn">Clear</button>
+     <br/>
+     <br/>
+     <textarea id="results" rows="10" readonly></textarea>
+   </div>
+ 
+   <section flex="1" overflow="auto" id="sound-clips">
+   </section>
+ 
+   <script src="sherpa-onnx-vad.js"></script>
+   <script src="app-vad.js"></script>
+   <script src="sherpa-onnx-wasm-main-vad.js"></script>
+ </body>
--- a/wasm/vad/sherpa-onnx-vad.js 0 → 100644
查看文件 @1ef8a7a
+++ b/wasm/vad/sherpa-onnx-vad.js 0 → 100644
查看文件 @1ef8a7a
+ function freeConfig(config, Module) {
+   if ('buffer' in config) {
+     Module._free(config.buffer);
+   }
+ 
+   if ('sileroVad' in config) {
+     freeConfig(config.sileroVad, Module)
+   }
+ 
+ 
+   Module._free(config.ptr);
+ }
+ 
+ // The user should free the returned pointers
+ function initSherpaOnnxSileroVadModelConfig(config, Module) {
+   const modelLen = Module.lengthBytesUTF8(config.model || '') + 1;
+ 
+   const n = modelLen;
+ 
+   const buffer = Module._malloc(n);
+ 
+   const len = 5 * 4;
+   const ptr = Module._malloc(len);
+ 
+   Module.stringToUTF8(config.model || '', buffer, modelLen);
+ 
+   offset = 0;
+   Module.setValue(ptr, buffer, 'i8*');
+   offset += 4;
+ 
+   Module.setValue(ptr + offset, config.threshold || 0.5, 'float');
+   offset += 4;
+ 
+   Module.setValue(ptr + offset, config.minSilenceDuration || 0.5, 'float');
+   offset += 4;
+ 
+   Module.setValue(ptr + offset, config.minSpeechDuration || 0.25, 'float');
+   offset += 4;
+ 
+   Module.setValue(ptr + offset, config.windowSize || 512, 'i32');
+   offset += 4;
+ 
+   return {
+     buffer: buffer, ptr: ptr, len: len,
+   }
+ }
+ 
+ function initSherpaOnnxVadModelConfig(config, Module) {
+   if (!('sileroVad' in config)) {
+     config.sileroVad = {
+       model: '',
+       threshold: 0.50,
+       minSilenceDuration: 0.50,
+       minSpeechDuration: 0.25,
+       windowSize: 512,
+     };
+   }
+ 
+   const sileroVad =
+       initSherpaOnnxSileroVadModelConfig(config.sileroVad, Module);
+ 
+   const len = sileroVad.len + 4 * 4;
+   const ptr = Module._malloc(len);
+ 
+   const providerLen = Module.lengthBytesUTF8(config.provider || 'cpu') + 1;
+   const buffer = Module._malloc(providerLen);
+   Module.stringToUTF8(config.provider || 'cpu', buffer, providerLen);
+ 
+   let offset = 0;
+   Module._CopyHeap(sileroVad.ptr, sileroVad.len, ptr + offset);
+   offset += sileroVad.len;
+ 
+   Module.setValue(ptr + offset, config.sampleRate || 16000, 'i32');
+   offset += 4;
+ 
+   Module.setValue(ptr + offset, config.numThreads || 1, 'i32');
+   offset += 4;
+ 
+   Module.setValue(ptr + offset, buffer, 'i8*');  // provider
+   offset += 4;
+ 
+   Module.setValue(ptr + offset, config.debug || 0, 'i32');
+   offset += 4;
+ 
+   return {
+     buffer: buffer, ptr: ptr, len: len, sileroVad: sileroVad,
+   }
+ }
+ 
+ function createVad(Module, myConfig) {
+   const sileroVad = {
+     model: './silero_vad.onnx',
+     threshold: 0.50,
+     minSilenceDuration: 0.50,
+     minSpeechDuration: 0.25,
+     windowSize: 512,
+   };
+ 
+   let config = {
+     sileroVad: sileroVad,
+     sampleRate: 16000,
+     numThreads: 1,
+     provider: 'cpu',
+     debug: 1,
+     bufferSizeInSeconds: 30,
+   };
+ 
+   if (myConfig) {
+     config = myConfig;
+   }
+ 
+   return new Vad(config, Module);
+ }
+ 
+ 
+ class CircularBuffer {
+   constructor(capacity, Module) {
+     this.handle = Module._SherpaOnnxCreateCircularBuffer(capacity);
+     this.Module = Module;
+   }
+ 
+   free() {
+     this.Module._SherpaOnnxDestroyCircularBuffer(this.handle);
+     this.handle = 0
+   }
+ 
+   /**
+    * @param samples {Float32Array}
+    */
+   push(samples) {
+     const pointer =
+         this.Module._malloc(samples.length * samples.BYTES_PER_ELEMENT);
+     this.Module.HEAPF32.set(samples, pointer / samples.BYTES_PER_ELEMENT);
+     this.Module._SherpaOnnxCircularBufferPush(
+         this.handle, pointer, samples.length);
+     this.Module._free(pointer);
+   }
+ 
+   get(startIndex, n) {
+     const p =
+         this.Module._SherpaOnnxCircularBufferGet(this.handle, startIndex, n);
+ 
+     const samplesPtr = p / 4;
+     const samples = new Float32Array(n);
+     for (let i = 0; i < n; i++) {
+       samples[i] = this.Module.HEAPF32[samplesPtr + i];
+     }
+ 
+     this.Module._SherpaOnnxCircularBufferFree(p);
+ 
+     return samples;
+   }
+ 
+   pop(n) {
+     this.Module._SherpaOnnxCircularBufferPop(this.handle, n);
+   }
+ 
+   size() {
+     return this.Module._SherpaOnnxCircularBufferSize(this.handle);
+   }
+ 
+   head() {
+     return this.Module._SherpaOnnxCircularBufferHead(this.handle);
+   }
+ 
+   reset() {
+     this.Module._SherpaOnnxCircularBufferReset(this.handle);
+   }
+ }
+ 
+ class Vad {
+   constructor(configObj, Module) {
+     this.config = configObj;
+     const config = initSherpaOnnxVadModelConfig(configObj, Module);
+     Module._MyPrint(config.ptr);
+     const handle = Module._SherpaOnnxCreateVoiceActivityDetector(
+         config.ptr, configObj.bufferSizeInSeconds || 30);
+     freeConfig(config, Module);
+ 
+     this.handle = handle;
+     this.Module = Module;
+   }
+ 
+   free() {
+     this.Module._SherpaOnnxDestroyVoiceActivityDetector(this.handle);
+     this.handle = 0
+   }
+ 
+   // samples is a float32 array
+   acceptWaveform(samples) {
+     const pointer =
+         this.Module._malloc(samples.length * samples.BYTES_PER_ELEMENT);
+     this.Module.HEAPF32.set(samples, pointer / samples.BYTES_PER_ELEMENT);
+     this.Module._SherpaOnnxVoiceActivityDetectorAcceptWaveform(
+         this.handle, pointer, samples.length);
+     this.Module._free(pointer);
+   }
+ 
+   isEmpty() {
+     return this.Module._SherpaOnnxVoiceActivityDetectorEmpty(this.handle) == 1;
+   }
+ 
+   isDetected() {
+     return this.Module._SherpaOnnxVoiceActivityDetectorDetected(this.handle) ==
+         1;
+   }
+ 
+   pop() {
+     this.Module._SherpaOnnxVoiceActivityDetectorPop(this.handle);
+   }
+ 
+   clear() {
+     this.Module._SherpaOnnxVoiceActivityDetectorClear(this.handle);
+   }
+ 
+   /*
+ {
+   samples: a 1-d float32 array,
+   start: an int32
+ }
+    */
+   front() {
+     const h = this.Module._SherpaOnnxVoiceActivityDetectorFront(this.handle);
+ 
+     const start = this.Module.HEAP32[h / 4];
+     const samplesPtr = this.Module.HEAP32[h / 4 + 1] / 4;
+     const numSamples = this.Module.HEAP32[h / 4 + 2];
+ 
+     const samples = new Float32Array(numSamples);
+     for (let i = 0; i < numSamples; i++) {
+       samples[i] = this.Module.HEAPF32[samplesPtr + i];
+     }
+ 
+     this.Module._SherpaOnnxDestroySpeechSegment(h);
+     return {samples: samples, start: start};
+   }
+ 
+   reset() {
+     this.Module._SherpaOnnxVoiceActivityDetectorReset(this.handle);
+   }
+ 
+   flush() {
+     this.Module._SherpaOnnxVoiceActivityDetectorFlush(this.handle);
+   }
+ };
+ 
+ if (typeof process == 'object' && typeof process.versions == 'object' &&
+     typeof process.versions.node == 'string') {
+   module.exports = {
+     createVad,
+     CircularBuffer,
+   };
+ }
--- a/wasm/vad/sherpa-onnx-wasm-main-vad.cc 0 → 100644
查看文件 @1ef8a7a
+++ b/wasm/vad/sherpa-onnx-wasm-main-vad.cc 0 → 100644
查看文件 @1ef8a7a
+ // wasm/sherpa-onnx-wasm-main-vad.cc
+ //
+ // Copyright (c)  2024  Xiaomi Corporation
+ #include <stdio.h>
+ 
+ #include <algorithm>
+ #include <memory>
+ 
+ #include "sherpa-onnx/c-api/c-api.h"
+ 
+ // see also
+ // https://emscripten.org/docs/porting/connecting_cpp_and_javascript/Interacting-with-code.html
+ 
+ extern "C" {
+ 
+ static_assert(sizeof(SherpaOnnxSileroVadModelConfig) == 5 * 4, "");
+ 
+ static_assert(sizeof(SherpaOnnxVadModelConfig) ==
+                   sizeof(SherpaOnnxSileroVadModelConfig) + 4 * 4,
+               "");
+ void MyPrint(SherpaOnnxVadModelConfig *config) {
+   auto silero_vad = &config->silero_vad;
+ 
+   fprintf(stdout, "----------silero_vad config----------\n");
+   fprintf(stdout, "model: %s\n", silero_vad->model);
+   fprintf(stdout, "threshold: %.3f\n", silero_vad->threshold);
+   fprintf(stdout, "min_silence_duration: %.3f\n",
+           silero_vad->min_silence_duration);
+   fprintf(stdout, "min_speech_duration: %.3f\n",
+           silero_vad->min_speech_duration);
+   fprintf(stdout, "window_size: %d\n", silero_vad->window_size);
+ 
+   fprintf(stdout, "----------config----------\n");
+ 
+   fprintf(stdout, "sample_rate: %d\n", config->sample_rate);
+   fprintf(stdout, "num_threads: %d\n", config->num_threads);
+ 
+   fprintf(stdout, "provider: %s\n", config->provider);
+   fprintf(stdout, "debug: %d\n", config->debug);
+ }
+ 
+ void CopyHeap(const char *src, int32_t num_bytes, char *dst) {
+   std::copy(src, src + num_bytes, dst);
+ }
+ }