Add WebAssembly for VAD (#1281)

Fangjun Kuang · GitHub
Commit 1ef8a7a20248103500f669c11dd35ff3df2adbe1 1ef8a7a2 1 parent fb09f8fa
.github/workflows/wasm-simd-hf-space-de-tts.yaml
.github/workflows/wasm-simd-hf-space-en-asr-zipformer.yaml
.github/workflows/wasm-simd-hf-space-en-tts.yaml
.github/workflows/wasm-simd-hf-space-silero-vad.yaml
.github/workflows/wasm-simd-hf-space-zh-cantonese-en-asr-paraformer.yaml
.github/workflows/wasm-simd-hf-space-zh-en-asr-paraformer.yaml
.github/workflows/wasm-simd-hf-space-zh-en-asr-zipformer.yaml
CMakeLists.txt
README.md
build-wasm-simd-asr.sh
build-wasm-simd-kws.sh
build-wasm-simd-vad.sh
scripts/node-addon-api/lib/vad.js
wasm/CMakeLists.txt
wasm/vad/CMakeLists.txt
wasm/vad/app-vad.js
wasm/vad/assets/README.md
wasm/vad/index.html
wasm/vad/sherpa-onnx-vad.js
wasm/vad/sherpa-onnx-wasm-main-vad.cc
--- a/.github/workflows/wasm-simd-hf-space-de-tts.yaml
查看文件 @1ef8a7a
+++ b/.github/workflows/wasm-simd-hf-space-de-tts.yaml
查看文件 @1ef8a7a
 name: wasm-simd-hf-space-de-tts
 on:
-  release:
-    types:
-      - published
+  push:
+    branches:
+      - wasm
+    tags:
+      - 'v[0-9]+.[0-9]+.[0-9]+*'
   workflow_dispatch:
@@ -71,6 +73,14 @@ jobs:
           name: sherpa-onnx-wasm-simd-de-tts
           path: ./sherpa-onnx-wasm-simd-*.tar.bz2
+      - name: Release
+        if: (github.repository_owner == 'csukuangfj' || github.repository_owner == 'k2-fsa') && github.event_name == 'push' && contains(github.ref, 'refs/tags/')
+        uses: svenstaro/upload-release-action@v2
+        with:
+          file_glob: true
+          overwrite: true
+          file: ./*.tar.bz2
+
       - name: Publish to ModelScope
         # if: false
         env:
--- a/.github/workflows/wasm-simd-hf-space-en-asr-zipformer.yaml
查看文件 @1ef8a7a
+++ b/.github/workflows/wasm-simd-hf-space-en-asr-zipformer.yaml
查看文件 @1ef8a7a
 name: wasm-simd-hf-space-en-asr-zipformer
 on:
-  release:
-    types:
-      - published
+  push:
+    branches:
+      - wasm
+    tags:
+      - 'v[0-9]+.[0-9]+.[0-9]+*'
   workflow_dispatch:
@@ -73,6 +75,14 @@ jobs:
           name: sherpa-onnx-wasm-simd-en-asr-zipformer
           path: ./sherpa-onnx-wasm-simd-*.tar.bz2
+      - name: Release
+        if: (github.repository_owner == 'csukuangfj' || github.repository_owner == 'k2-fsa') && github.event_name == 'push' && contains(github.ref, 'refs/tags/')
+        uses: svenstaro/upload-release-action@v2
+        with:
+          file_glob: true
+          overwrite: true
+          file: ./*.tar.bz2
+
       - name: Publish to ModelScope
         # if: false
         env:
--- a/.github/workflows/wasm-simd-hf-space-en-tts.yaml
查看文件 @1ef8a7a
+++ b/.github/workflows/wasm-simd-hf-space-en-tts.yaml
查看文件 @1ef8a7a
 name: wasm-simd-hf-space-en-tts
 on:
-  release:
-    types:
-      - published
+  push:
+    branches:
+      - wasm
+    tags:
+      - 'v[0-9]+.[0-9]+.[0-9]+*'
   workflow_dispatch:
@@ -69,6 +71,14 @@ jobs:
           name: sherpa-onnx-wasm-simd-en-tts
           path: ./sherpa-onnx-wasm-simd-*.tar.bz2
+      - name: Release
+        if: (github.repository_owner == 'csukuangfj' || github.repository_owner == 'k2-fsa') && github.event_name == 'push' && contains(github.ref, 'refs/tags/')
+        uses: svenstaro/upload-release-action@v2
+        with:
+          file_glob: true
+          overwrite: true
+          file: ./*.tar.bz2
+
       - name: Publish to ModelScope
         # if: false
         env:
--- a/.github/workflows/wasm-simd-hf-space-silero-vad.yaml 0 → 100644
查看文件 @1ef8a7a
+++ b/.github/workflows/wasm-simd-hf-space-silero-vad.yaml 0 → 100644
查看文件 @1ef8a7a
+name: wasm-simd-hf-space-silero-vad
+
+on:
+  push:
+    branches:
+      - wasm
+    tags:
+      - 'v[0-9]+.[0-9]+.[0-9]+*'
+
+  workflow_dispatch:
+
+concurrency:
+  group: wasm-simd-hf-space-silero-vad-${{ github.ref }}
+  cancel-in-progress: true
+
+jobs:
+  wasm-simd-hf-space-silero-vad:
+    runs-on: ${{ matrix.os }}
+    strategy:
+      fail-fast: false
+      matrix:
+        os: [ubuntu-latest]
+
+    steps:
+      - uses: actions/checkout@v4
+        with:
+          fetch-depth: 0
+      - name: Install emsdk
+        uses: mymindstorm/setup-emsdk@v14
+        with:
+          version: 3.1.51
+          actions-cache-folder: 'emsdk-cache'
+
+      - name: View emsdk version
+        shell: bash
+        run: |
+          emcc -v
+          echo "--------------------"
+          emcc --check
+
+      - name: Download model files
+        shell: bash
+        run: |
+          cd wasm/vad/assets
+          ls -lh
+          echo "----------"
+          curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx
+          ls -lh
+
+      - name: Build sherpa-onnx for WebAssembly
+        shell: bash
+        run: |
+          ./build-wasm-simd-vad.sh
+
+      - name: collect files
+        shell: bash
+        run: |
+          SHERPA_ONNX_VERSION=v$(grep "SHERPA_ONNX_VERSION" ./CMakeLists.txt  | cut -d " " -f 2  | cut -d '"' -f 2)
+
+          dst=sherpa-onnx-wasm-simd-${SHERPA_ONNX_VERSION}-vad
+          mv build-wasm-simd-vad/install/bin/wasm/vad $dst
+          ls -lh $dst
+          tar cjfv $dst.tar.bz2 ./$dst
+
+      - name: Upload wasm files
+        uses: actions/upload-artifact@v4
+        with:
+          name: sherpa-onnx-wasm-simd-vad
+          path: ./sherpa-onnx-wasm-simd-*.tar.bz2
+
+      - name: Release
+        if: (github.repository_owner == 'csukuangfj' || github.repository_owner == 'k2-fsa') && github.event_name == 'push' && contains(github.ref, 'refs/tags/')
+        uses: svenstaro/upload-release-action@v2
+        with:
+          file_glob: true
+          overwrite: true
+          file: ./*.tar.bz2
+
+      - name: Publish to ModelScope
+        # if: false
+        env:
+          MS_TOKEN: ${{ secrets.MODEL_SCOPE_GIT_TOKEN }}
+        uses: nick-fields/retry@v2
+        with:
+          max_attempts: 20
+          timeout_seconds: 200
+          shell: bash
+          command: |
+            SHERPA_ONNX_VERSION=v$(grep "SHERPA_ONNX_VERSION" ./CMakeLists.txt  | cut -d " " -f 2  | cut -d '"' -f 2)
+
+            git config --global user.email "csukuangfj@gmail.com"
+            git config --global user.name "Fangjun Kuang"
+
+            rm -rf ms
+            export GIT_LFS_SKIP_SMUDGE=1
+            export GIT_CLONE_PROTECTION_ACTIVE=false
+
+            git clone https://www.modelscope.cn/studios/csukuangfj/web-assembly-vad-sherpa-onnx.git ms
+            cd ms
+            rm -fv *.js
+            rm -fv *.data
+            git fetch
+            git pull
+            git merge -m "merge remote" --ff origin main
+
+            cp -v ../sherpa-onnx-wasm-simd-${SHERPA_ONNX_VERSION}-vad/* .
+
+            git status
+            git lfs track "*.data"
+            git lfs track "*.wasm"
+            ls -lh
+
+            git add .
+            git commit -m "update model"
+            git push https://oauth2:${MS_TOKEN}@www.modelscope.cn/studios/csukuangfj/web-assembly-vad-sherpa-onnx.git
+
+      - name: Publish to huggingface
+        env:
+          HF_TOKEN: ${{ secrets.HF_TOKEN }}
+        uses: nick-fields/retry@v2
+        with:
+          max_attempts: 20
+          timeout_seconds: 200
+          shell: bash
+          command: |
+            SHERPA_ONNX_VERSION=v$(grep "SHERPA_ONNX_VERSION" ./CMakeLists.txt  | cut -d " " -f 2  | cut -d '"' -f 2)
+
+            git config --global user.email "csukuangfj@gmail.com"
+            git config --global user.name "Fangjun Kuang"
+
+            rm -rf huggingface
+            export GIT_LFS_SKIP_SMUDGE=1
+            export GIT_CLONE_PROTECTION_ACTIVE=false
+
+            git clone https://huggingface.co/spaces/k2-fsa/web-assembly-vad-sherpa-onnx huggingface
+            cd huggingface
+            rm -fv *.js
+            rm -fv *.data
+            git fetch
+            git pull
+            git merge -m "merge remote" --ff origin main
+
+            cp -v ../sherpa-onnx-wasm-simd-${SHERPA_ONNX_VERSION}-vad/* .
+
+            git status
+            git lfs track "*.data"
+            git lfs track "*.wasm"
+            ls -lh
+
+            git add .
+            git commit -m "update model"
+            git push https://csukuangfj:$HF_TOKEN@huggingface.co/spaces/k2-fsa/web-assembly-vad-sherpa-onnx main
--- a/.github/workflows/wasm-simd-hf-space-zh-cantonese-en-asr-paraformer.yaml
查看文件 @1ef8a7a
+++ b/.github/workflows/wasm-simd-hf-space-zh-cantonese-en-asr-paraformer.yaml
查看文件 @1ef8a7a
 name: wasm-simd-hf-space-zh-cantonese-en-asr-paraformer
 on:
-  release:
-    types:
-      - published
+  push:
+    branches:
+      - wasm
+    tags:
+      - 'v[0-9]+.[0-9]+.[0-9]+*'
   workflow_dispatch:
@@ -80,6 +82,14 @@ jobs:
           name: sherpa-onnx-wasm-simd-zh-cantonese-en-asr-paraformer
           path: ./sherpa-onnx-wasm-simd-*.tar.bz2
+      - name: Release
+        if: (github.repository_owner == 'csukuangfj' || github.repository_owner == 'k2-fsa') && github.event_name == 'push' && contains(github.ref, 'refs/tags/')
+        uses: svenstaro/upload-release-action@v2
+        with:
+          file_glob: true
+          overwrite: true
+          file: ./*.tar.bz2
+
       - name: Publish to huggingface
         env:
           HF_TOKEN: ${{ secrets.HF_TOKEN }}
--- a/.github/workflows/wasm-simd-hf-space-zh-en-asr-paraformer.yaml
查看文件 @1ef8a7a
+++ b/.github/workflows/wasm-simd-hf-space-zh-en-asr-paraformer.yaml
查看文件 @1ef8a7a
 name: wasm-simd-hf-space-zh-en-asr-paraformer
 on:
-  release:
-    types:
-      - published
+  push:
+    branches:
+      - wasm
+    tags:
+      - 'v[0-9]+.[0-9]+.[0-9]+*'
   workflow_dispatch:
@@ -80,6 +82,14 @@ jobs:
           name: sherpa-onnx-wasm-simd-zh-en-asr-paraformer
           path: ./sherpa-onnx-wasm-simd-*.tar.bz2
+      - name: Release
+        if: (github.repository_owner == 'csukuangfj' || github.repository_owner == 'k2-fsa') && github.event_name == 'push' && contains(github.ref, 'refs/tags/')
+        uses: svenstaro/upload-release-action@v2
+        with:
+          file_glob: true
+          overwrite: true
+          file: ./*.tar.bz2
+
       - name: Publish to ModelScope
         # if: false
         env:
--- a/.github/workflows/wasm-simd-hf-space-zh-en-asr-zipformer.yaml
查看文件 @1ef8a7a
+++ b/.github/workflows/wasm-simd-hf-space-zh-en-asr-zipformer.yaml
查看文件 @1ef8a7a
 name: wasm-simd-hf-space-zh-en-asr-zipformer
 on:
-  release:
-    types:
-      - published
+  push:
+    branches:
+      - wasm
+    tags:
+      - 'v[0-9]+.[0-9]+.[0-9]+*'
   workflow_dispatch:
@@ -71,6 +73,14 @@ jobs:
           name: sherpa-onnx-wasm-simd-zh-en-asr-zipformer
           path: ./sherpa-onnx-wasm-simd-*.tar.bz2
+      - name: Release
+        if: (github.repository_owner == 'csukuangfj' || github.repository_owner == 'k2-fsa') && github.event_name == 'push' && contains(github.ref, 'refs/tags/')
+        uses: svenstaro/upload-release-action@v2
+        with:
+          file_glob: true
+          overwrite: true
+          file: ./*.tar.bz2
+
       - name: Publish to ModelScope
         # if: false
         env:
--- a/CMakeLists.txt
查看文件 @1ef8a7a
+++ b/CMakeLists.txt
查看文件 @1ef8a7a
@@ -35,6 +35,7 @@ option(SHERPA_ONNX_ENABLE_WASM "Whether to enable WASM" OFF)
 option(SHERPA_ONNX_ENABLE_WASM_TTS "Whether to enable WASM for TTS" OFF)
 option(SHERPA_ONNX_ENABLE_WASM_ASR "Whether to enable WASM for ASR" OFF)
 option(SHERPA_ONNX_ENABLE_WASM_KWS "Whether to enable WASM for KWS" OFF)
+option(SHERPA_ONNX_ENABLE_WASM_VAD "Whether to enable WASM for VAD" OFF)
 option(SHERPA_ONNX_ENABLE_WASM_NODEJS "Whether to enable WASM for NodeJS" OFF)
 option(SHERPA_ONNX_ENABLE_BINARY "Whether to build binaries" ON)
 option(SHERPA_ONNX_ENABLE_TTS "Whether to build TTS related code" ON)
@@ -135,6 +136,7 @@ message(STATUS "SHERPA_ONNX_ENABLE_WASM ${SHERPA_ONNX_ENABLE_WASM}")
 message(STATUS "SHERPA_ONNX_ENABLE_WASM_TTS ${SHERPA_ONNX_ENABLE_WASM_TTS}")
 message(STATUS "SHERPA_ONNX_ENABLE_WASM_ASR ${SHERPA_ONNX_ENABLE_WASM_ASR}")
 message(STATUS "SHERPA_ONNX_ENABLE_WASM_KWS ${SHERPA_ONNX_ENABLE_WASM_KWS}")
+message(STATUS "SHERPA_ONNX_ENABLE_WASM_VAD ${SHERPA_ONNX_ENABLE_WASM_VAD}")
 message(STATUS "SHERPA_ONNX_ENABLE_WASM_NODEJS ${SHERPA_ONNX_ENABLE_WASM_NODEJS}")
 message(STATUS "SHERPA_ONNX_ENABLE_BINARY ${SHERPA_ONNX_ENABLE_BINARY}")
 message(STATUS "SHERPA_ONNX_ENABLE_TTS ${SHERPA_ONNX_ENABLE_TTS}")
@@ -212,6 +214,10 @@ if(SHERPA_ONNX_ENABLE_WASM_KWS)
   add_definitions(-DSHERPA_ONNX_ENABLE_WASM_KWS=1)
 endif()
+if(SHERPA_ONNX_ENABLE_WASM_VAD)
+  add_definitions(-DSHERPA_ONNX_ENABLE_WASM_VAD=1)
+endif()
+
 if(NOT CMAKE_CXX_STANDARD)
   set(CMAKE_CXX_STANDARD 17 CACHE STRING "The C++ version to be used.")
 endif()
--- a/README.md
查看文件 @1ef8a7a
+++ b/README.md
查看文件 @1ef8a7a
@@ -76,6 +76,32 @@ with the following APIs
   - Swift, Rust
   - Dart, Object Pascal
+### Links for Huggingface Spaces
+
+You can visit the following Huggingface spaces to try `sherpa-onnx` without
+installing anything. All you need is a browser.
+
+| Description | URL |
+|---|---|
+| Speech recognition | [Click me](https://huggingface.co/spaces/k2-fsa/automatic-speech-recognition)|
+| Speech recognition with [Whisper](https://github.com/openai/whisper)| [Click me](https://huggingface.co/spaces/k2-fsa/automatic-speech-recognition-with-whisper)|
+| Speech synthesis | [Click me](https://huggingface.co/spaces/k2-fsa/text-to-speech)|
+| Generate subtitles| [Click me](https://huggingface.co/spaces/k2-fsa/generate-subtitles-for-videos)|
+|Audio tagging| [Click me](https://huggingface.co/spaces/k2-fsa/audio-tagging)|
+|Spoken language identification with [Whisper](https://github.com/openai/whisper)|[Click me](https://huggingface.co/spaces/k2-fsa/spoken-language-identification)|
+
+We also have spaces built using WebAssembly. The are listed below:
+
+| Description | URL| Chinese users|
+|---|---|---|
+|Voice activity detection with [silero-vad](https://github.com/snakers4/silero-vad)| [Click me](https://huggingface.co/spaces/k2-fsa/web-assembly-vad-sherpa-onnx)|[地址](https://modelscope.cn/studios/csukuangfj/web-assembly-vad-sherpa-onnx)|
+|Real-time speech recognition (Chinese + English) with Zipformer | [Click me](https://huggingface.co/spaces/k2-fsa/web-assembly-asr-sherpa-onnx-zh-en)|[地址](https://modelscope.cn/studios/k2-fsa/web-assembly-asr-sherpa-onnx-zh-en)|
+|Real-time speech recognition (Chinese + English) with Paraformer|[Click me](https://huggingface.co/spaces/k2-fsa/web-assembly-asr-sherpa-onnx-zh-en-paraformer)| [地址](https://modelscope.cn/studios/k2-fsa/web-assembly-asr-sherpa-onnx-zh-en-paraformer)|
+|Real-time speech recognition (Chinese + English + Cantonese) with Paraformer|[Click me](https://huggingface.co/spaces/k2-fsa/web-assembly-asr-sherpa-onnx-zh-cantonese-en-paraformer)| [地址](https://modelscope.cn/studios/k2-fsa/web-assembly-asr-sherpa-onnx-zh-cantonese-en-paraformer)|
+|Real-time speech recognition (English) |[Click me](https://huggingface.co/spaces/k2-fsa/web-assembly-asr-sherpa-onnx-en)|[地址](https://modelscope.cn/studios/k2-fsa/web-assembly-asr-sherpa-onnx-en)|
+|Speech synthesis (English) |[Click me](https://huggingface.co/spaces/k2-fsa/web-assembly-tts-sherpa-onnx-en)| [地址](https://modelscope.cn/studios/k2-fsa/web-assembly-tts-sherpa-onnx-en)|
+|Speech synthesis (German)|[Click me](https://huggingface.co/spaces/k2-fsa/web-assembly-tts-sherpa-onnx-de)| [地址](https://modelscope.cn/studios/k2-fsa/web-assembly-tts-sherpa-onnx-de)|
+
 ### Links for pre-built Android APKs
 | Description                    | URL                                                                                     | 中国用户                                                                             |
@@ -130,7 +156,7 @@ with the following APIs
 | Keyword spotting |[Address](https://github.com/k2-fsa/sherpa-onnx/releases/tag/kws-models)|
 | Audio tagging                  | [Address](https://github.com/k2-fsa/sherpa-onnx/releases/tag/audio-tagging-models)|
 | Speaker identification (Speaker ID)         | [Address](https://github.com/k2-fsa/sherpa-onnx/releases/tag/speaker-recongition-models)|
-| Spoken language identification (Language ID) | See multi-lingual Whisper ASR models from  [Speech recognition](https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models) |
+| Spoken language identification (Language ID) | See multi-lingual [Whisper](https://github.com/openai/whisper) ASR models from  [Speech recognition](https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models) |
 | Punctuation| [Address](https://github.com/k2-fsa/sherpa-onnx/releases/tag/punctuation-models)|
 ### Useful links
--- a/build-wasm-simd-asr.sh
查看文件 @1ef8a7a
+++ b/build-wasm-simd-asr.sh
查看文件 @1ef8a7a
@@ -48,6 +48,7 @@ cmake \
   -DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \
   -DSHERPA_ONNX_ENABLE_JNI=OFF \
   -DSHERPA_ONNX_ENABLE_C_API=ON \
+  -DSHERPA_ONNX_ENABLE_TTS=OFF \
   -DSHERPA_ONNX_ENABLE_WEBSOCKET=OFF \
   -DSHERPA_ONNX_ENABLE_GPU=OFF \
   -DSHERPA_ONNX_ENABLE_WASM=ON \
--- a/build-wasm-simd-kws.sh
查看文件 @1ef8a7a
+++ b/build-wasm-simd-kws.sh
查看文件 @1ef8a7a
@@ -43,6 +43,7 @@ cmake \
   -DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \
   -DSHERPA_ONNX_ENABLE_JNI=OFF \
   -DSHERPA_ONNX_ENABLE_C_API=ON \
+  -DSHERPA_ONNX_ENABLE_TTS=OFF \
   -DSHERPA_ONNX_ENABLE_WEBSOCKET=OFF \
   -DSHERPA_ONNX_ENABLE_GPU=OFF \
   -DSHERPA_ONNX_ENABLE_WASM=ON \
--- a/build-wasm-simd-vad.sh 0 → 100755
查看文件 @1ef8a7a
+++ b/build-wasm-simd-vad.sh 0 → 100755
查看文件 @1ef8a7a
+#!/usr/bin/env bash
+# Copyright (c)  2024  Xiaomi Corporation
+#
+# This script is to build sherpa-onnx for WebAssembly (VAD)
+
+set -ex
+
+if [ x"$EMSCRIPTEN" == x"" ]; then
+  if ! command -v emcc &> /dev/null; then
+    echo "Please install emscripten first"
+    echo ""
+    echo "You can use the following commands to install it:"
+    echo ""
+    echo "git clone https://github.com/emscripten-core/emsdk.git"
+    echo "cd emsdk"
+    echo "git pull"
+    echo "./emsdk install latest"
+    echo "./emsdk activate latest"
+    echo "source ./emsdk_env.sh"
+    exit 1
+  else
+    EMSCRIPTEN=$(dirname $(realpath $(which emcc)))
+  fi
+fi
+
+export EMSCRIPTEN=$EMSCRIPTEN
+echo "EMSCRIPTEN: $EMSCRIPTEN"
+if [ ! -f $EMSCRIPTEN/cmake/Modules/Platform/Emscripten.cmake ]; then
+  echo "Cannot find $EMSCRIPTEN/cmake/Modules/Platform/Emscripten.cmake"
+  echo "Please make sure you have installed emsdk correctly"
+  exit 1
+fi
+
+mkdir -p build-wasm-simd-vad
+pushd build-wasm-simd-vad
+
+export SHERPA_ONNX_IS_USING_BUILD_WASM_SH=ON
+
+cmake \
+  -DCMAKE_INSTALL_PREFIX=./install \
+  -DCMAKE_BUILD_TYPE=Release \
+  -DCMAKE_TOOLCHAIN_FILE=$EMSCRIPTEN/cmake/Modules/Platform/Emscripten.cmake \
+  \
+  -DSHERPA_ONNX_ENABLE_PYTHON=OFF \
+  -DSHERPA_ONNX_ENABLE_TESTS=OFF \
+  -DSHERPA_ONNX_ENABLE_CHECK=OFF \
+  -DBUILD_SHARED_LIBS=OFF \
+  -DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \
+  -DSHERPA_ONNX_ENABLE_JNI=OFF \
+  -DSHERPA_ONNX_ENABLE_TTS=OFF \
+  -DSHERPA_ONNX_ENABLE_C_API=ON \
+  -DSHERPA_ONNX_ENABLE_WEBSOCKET=OFF \
+  -DSHERPA_ONNX_ENABLE_GPU=OFF \
+  -DSHERPA_ONNX_ENABLE_WASM=ON \
+  -DSHERPA_ONNX_ENABLE_WASM_VAD=ON \
+  -DSHERPA_ONNX_ENABLE_BINARY=OFF \
+  -DSHERPA_ONNX_LINK_LIBSTDCPP_STATICALLY=OFF \
+  ..
+make -j2
+make install
+
+ls -lh install/bin/wasm/vad
--- a/scripts/node-addon-api/lib/vad.js
查看文件 @1ef8a7a
+++ b/scripts/node-addon-api/lib/vad.js
查看文件 @1ef8a7a
@@ -71,7 +71,7 @@ config = {
   /*
 {
   samples: a 1-d float32 array,
-  start: a int32
+  start: an int32
 }
    */
   front(enableExternalBuffer = true) {
--- a/wasm/CMakeLists.txt
查看文件 @1ef8a7a
+++ b/wasm/CMakeLists.txt
查看文件 @1ef8a7a
@@ -10,6 +10,10 @@ if(SHERPA_ONNX_ENABLE_WASM_KWS)
   add_subdirectory(kws)
 endif()
+if(SHERPA_ONNX_ENABLE_WASM_VAD)
+  add_subdirectory(vad)
+endif()
+
 if(SHERPA_ONNX_ENABLE_WASM_NODEJS)
   add_subdirectory(nodejs)
 endif()
--- a/wasm/vad/CMakeLists.txt 0 → 100644
查看文件 @1ef8a7a
+++ b/wasm/vad/CMakeLists.txt 0 → 100644
查看文件 @1ef8a7a
+if(NOT $ENV{SHERPA_ONNX_IS_USING_BUILD_WASM_SH})
+  message(FATAL_ERROR "Please use ./build-wasm-simd-vad.sh to build for wasm VAD")
+endif()
+
+if(NOT EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/assets/silero_vad.onnx")
+  message(FATAL_ERROR "Please read ${CMAKE_CURRENT_SOURCE_DIR}/assets/README.md before you continue")
+endif()
+
+set(exported_functions
+  MyPrint
+  # VAD
+  SherpaOnnxCreateCircularBuffer
+  SherpaOnnxDestroyCircularBuffer
+  SherpaOnnxCircularBufferPush
+  SherpaOnnxCircularBufferGet
+  SherpaOnnxCircularBufferFree
+  SherpaOnnxCircularBufferPop
+  SherpaOnnxCircularBufferSize
+  SherpaOnnxCircularBufferHead
+  SherpaOnnxCircularBufferReset
+  SherpaOnnxCreateVoiceActivityDetector
+  SherpaOnnxDestroyVoiceActivityDetector
+  SherpaOnnxVoiceActivityDetectorAcceptWaveform
+  SherpaOnnxVoiceActivityDetectorEmpty
+  SherpaOnnxVoiceActivityDetectorDetected
+  SherpaOnnxVoiceActivityDetectorPop
+  SherpaOnnxVoiceActivityDetectorClear
+  SherpaOnnxVoiceActivityDetectorFront
+  SherpaOnnxDestroySpeechSegment
+  SherpaOnnxVoiceActivityDetectorReset
+  SherpaOnnxVoiceActivityDetectorFlush
+  #
+)
+set(mangled_exported_functions)
+foreach(x IN LISTS exported_functions)
+  list(APPEND mangled_exported_functions "_${x}")
+endforeach()
+list(JOIN mangled_exported_functions "," all_exported_functions)
+
+include_directories(${CMAKE_SOURCE_DIR})
+set(MY_FLAGS " -s FORCE_FILESYSTEM=1 -s INITIAL_MEMORY=64MB -s ALLOW_MEMORY_GROWTH=1")
+string(APPEND MY_FLAGS " -sSTACK_SIZE=10485760 ") # 10MB
+string(APPEND MY_FLAGS " -sEXPORTED_FUNCTIONS=[_CopyHeap,_malloc,_free,${all_exported_functions}] ")
+string(APPEND MY_FLAGS "--preload-file ${CMAKE_CURRENT_SOURCE_DIR}/assets@. ")
+string(APPEND MY_FLAGS " -sEXPORTED_RUNTIME_METHODS=['ccall','stringToUTF8','setValue','getValue','lengthBytesUTF8','UTF8ToString'] ")
+
+message(STATUS "MY_FLAGS: ${MY_FLAGS}")
+
+set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${MY_FLAGS}")
+set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${MY_FLAGS}")
+set(CMAKE_EXECUTBLE_LINKER_FLAGS "${CMAKE_EXECUTBLE_LINKER_FLAGS} ${MY_FLAGS}")
+
+if (NOT CMAKE_EXECUTABLE_SUFFIX STREQUAL ".js")
+  message(FATAL_ERROR "The default suffix for building executables should be .js!")
+endif()
+# set(CMAKE_EXECUTABLE_SUFFIX ".html")
+
+add_executable(sherpa-onnx-wasm-main-vad sherpa-onnx-wasm-main-vad.cc)
+target_link_libraries(sherpa-onnx-wasm-main-vad sherpa-onnx-c-api)
+install(TARGETS sherpa-onnx-wasm-main-vad DESTINATION bin/wasm/vad)
+
+install(
+  FILES
+    "$<TARGET_FILE_DIR:sherpa-onnx-wasm-main-vad>/sherpa-onnx-wasm-main-vad.js"
+    "index.html"
+    "sherpa-onnx-vad.js"
+    "app-vad.js"
+    "$<TARGET_FILE_DIR:sherpa-onnx-wasm-main-vad>/sherpa-onnx-wasm-main-vad.wasm"
+    "$<TARGET_FILE_DIR:sherpa-onnx-wasm-main-vad>/sherpa-onnx-wasm-main-vad.data"
+  DESTINATION
+    bin/wasm/vad
+)
--- a/wasm/vad/app-vad.js 0 → 100644
查看文件 @1ef8a7a
+++ b/wasm/vad/app-vad.js 0 → 100644
查看文件 @1ef8a7a
+// This file copies and modifies code
+// from https://mdn.github.io/web-dictaphone/scripts/app.js
+// and https://gist.github.com/meziantou/edb7217fddfbb70e899e
+
+const startBtn = document.getElementById('startBtn');
+const stopBtn = document.getElementById('stopBtn');
+const clearBtn = document.getElementById('clearBtn');
+const hint = document.getElementById('hint');
+const soundClips = document.getElementById('sound-clips');
+
+let textArea = document.getElementById('results');
+
+let lastResult = '';
+let resultList = [];
+
+clearBtn.onclick = function() {
+  resultList = [];
+  textArea.value = getDisplayResult();
+  textArea.scrollTop = textArea.scrollHeight;  // auto scroll
+};
+
+function getDisplayResult() {
+  let i = 0;
+  let ans = '';
+  for (let s in resultList) {
+    if (resultList[s] == '') {
+      continue;
+    }
+
+    if (resultList[s] == 'Speech detected') {
+      ans += '' + i + ': ' + resultList[s];
+      i += 1;
+    } else {
+      ans += ', ' + resultList[s] + '\n';
+    }
+  }
+
+  if (lastResult.length > 0) {
+    ans += '' + i + ': ' + lastResult + '\n';
+  }
+  return ans;
+}
+
+
+Module = {};
+Module.onRuntimeInitialized = function() {
+  console.log('inited!');
+  hint.innerText = 'Model loaded! Please click start';
+
+  startBtn.disabled = false;
+
+  vad = createVad(Module);
+  console.log('vad is created!', vad);
+
+  buffer = new CircularBuffer(30 * 16000, Module);
+  console.log('CircularBuffer is created!', buffer);
+};
+
+let audioCtx;
+let mediaStream;
+
+let expectedSampleRate = 16000;
+let recordSampleRate;  // the sampleRate of the microphone
+let recorder = null;   // the microphone
+let leftchannel = [];  // TODO: Use a single channel
+
+let recordingLength = 0;  // number of samples so far
+
+let vad = null;
+let buffer = null;
+let printed = false;
+
+if (navigator.mediaDevices.getUserMedia) {
+  console.log('getUserMedia supported.');
+
+  // see https://w3c.github.io/mediacapture-main/#dom-mediadevices-getusermedia
+  const constraints = {audio: true};
+
+  let onSuccess = function(stream) {
+    if (!audioCtx) {
+      audioCtx = new AudioContext({sampleRate: expectedSampleRate});
+    }
+    console.log(audioCtx);
+    recordSampleRate = audioCtx.sampleRate;
+    console.log('sample rate ' + recordSampleRate);
+
+    // creates an audio node from the microphone incoming stream
+    mediaStream = audioCtx.createMediaStreamSource(stream);
+    console.log('media stream', mediaStream);
+
+    // https://developer.mozilla.org/en-US/docs/Web/API/AudioContext/createScriptProcessor
+    // bufferSize: the onaudioprocess event is called when the buffer is full
+    var bufferSize = 4096;
+    var numberOfInputChannels = 1;
+    var numberOfOutputChannels = 2;
+    if (audioCtx.createScriptProcessor) {
+      recorder = audioCtx.createScriptProcessor(
+          bufferSize, numberOfInputChannels, numberOfOutputChannels);
+    } else {
+      recorder = audioCtx.createJavaScriptNode(
+          bufferSize, numberOfInputChannels, numberOfOutputChannels);
+    }
+    console.log('recorder', recorder);
+
+    recorder.onaudioprocess = function(e) {
+      let samples = new Float32Array(e.inputBuffer.getChannelData(0))
+      samples = downsampleBuffer(samples, expectedSampleRate);
+      buffer.push(samples);
+      while (buffer.size() > vad.config.sileroVad.windowSize) {
+        const s = buffer.get(buffer.head(), vad.config.sileroVad.windowSize);
+        vad.acceptWaveform(s);
+        buffer.pop(vad.config.sileroVad.windowSize);
+
+        if (vad.isDetected() && !printed) {
+          printed = true;
+          lastResult = 'Speech detected';
+        }
+
+        if (!vad.isDetected()) {
+          printed = false;
+          if (lastResult != '') {
+            resultList.push(lastResult);
+          }
+          lastResult = '';
+        }
+
+        while (!vad.isEmpty()) {
+          const segment = vad.front();
+          const duration = segment.samples.length / expectedSampleRate;
+          const durationStr = `Duration: ${duration.toFixed(3)} seconds`;
+          resultList.push(durationStr);
+          vad.pop();
+
+          // now save the segment to a wav file
+          let buf = new Int16Array(segment.samples.length);
+          for (var i = 0; i < segment.samples.length; ++i) {
+            let s = segment.samples[i];
+            if (s >= 1)
+              s = 1;
+            else if (s <= -1)
+              s = -1;
+
+            buf[i] = s * 32767;
+          }
+
+          let clipName = new Date().toISOString() + '--' + durationStr;
+
+          const clipContainer = document.createElement('article');
+          const clipLabel = document.createElement('p');
+          const audio = document.createElement('audio');
+          const deleteButton = document.createElement('button');
+
+          clipContainer.classList.add('clip');
+          audio.setAttribute('controls', '');
+          deleteButton.textContent = 'Delete';
+          deleteButton.className = 'delete';
+
+          clipLabel.textContent = clipName;
+
+          clipContainer.appendChild(audio);
+
+          clipContainer.appendChild(clipLabel);
+          clipContainer.appendChild(deleteButton);
+          soundClips.appendChild(clipContainer);
+
+          audio.controls = true;
+          const blob = toWav(buf);
+
+          leftchannel = [];
+          const audioURL = window.URL.createObjectURL(blob);
+          audio.src = audioURL;
+
+          deleteButton.onclick = function(e) {
+            let evtTgt = e.target;
+            evtTgt.parentNode.parentNode.removeChild(evtTgt.parentNode);
+          };
+
+          clipLabel.onclick = function() {
+            const existingName = clipLabel.textContent;
+            const newClipName = prompt('Enter a new name for your sound clip?');
+            if (newClipName === null) {
+              clipLabel.textContent = existingName;
+            } else {
+              clipLabel.textContent = newClipName;
+            }
+          };
+        }
+      }
+
+      textArea.value = getDisplayResult();
+      textArea.scrollTop = textArea.scrollHeight;  // auto scroll
+    };
+
+    startBtn.onclick = function() {
+      mediaStream.connect(recorder);
+      recorder.connect(audioCtx.destination);
+
+      console.log('recorder started');
+
+      stopBtn.disabled = false;
+      startBtn.disabled = true;
+    };
+
+    stopBtn.onclick = function() {
+      vad.reset();
+      buffer.reset();
+      console.log('recorder stopped');
+
+      // stopBtn recording
+      recorder.disconnect(audioCtx.destination);
+      mediaStream.disconnect(recorder);
+
+      startBtn.style.background = '';
+      startBtn.style.color = '';
+      // mediaRecorder.requestData();
+
+      stopBtn.disabled = true;
+      startBtn.disabled = false;
+    };
+  };
+
+  let onError = function(err) {
+    console.log('The following error occured: ' + err);
+  };
+
+  navigator.mediaDevices.getUserMedia(constraints).then(onSuccess, onError);
+} else {
+  console.log('getUserMedia not supported on your browser!');
+  alert('getUserMedia not supported on your browser!');
+}
+
+
+// this function is copied/modified from
+// https://gist.github.com/meziantou/edb7217fddfbb70e899e
+function flatten(listOfSamples) {
+  let n = 0;
+  for (let i = 0; i < listOfSamples.length; ++i) {
+    n += listOfSamples[i].length;
+  }
+  let ans = new Int16Array(n);
+
+  let offset = 0;
+  for (let i = 0; i < listOfSamples.length; ++i) {
+    ans.set(listOfSamples[i], offset);
+    offset += listOfSamples[i].length;
+  }
+  return ans;
+}
+
+// this function is copied/modified from
+// https://gist.github.com/meziantou/edb7217fddfbb70e899e
+function toWav(samples) {
+  let buf = new ArrayBuffer(44 + samples.length * 2);
+  var view = new DataView(buf);
+
+  // http://soundfile.sapp.org/doc/WaveFormat/
+  //                   F F I R
+  view.setUint32(0, 0x46464952, true);               // chunkID
+  view.setUint32(4, 36 + samples.length * 2, true);  // chunkSize
+  //                   E V A W
+  view.setUint32(8, 0x45564157, true);  // format
+                                        //
+  //                      t m f
+  view.setUint32(12, 0x20746d66, true);          // subchunk1ID
+  view.setUint32(16, 16, true);                  // subchunk1Size, 16 for PCM
+  view.setUint32(20, 1, true);                   // audioFormat, 1 for PCM
+  view.setUint16(22, 1, true);                   // numChannels: 1 channel
+  view.setUint32(24, expectedSampleRate, true);  // sampleRate
+  view.setUint32(28, expectedSampleRate * 2, true);  // byteRate
+  view.setUint16(32, 2, true);                       // blockAlign
+  view.setUint16(34, 16, true);                      // bitsPerSample
+  view.setUint32(36, 0x61746164, true);              // Subchunk2ID
+  view.setUint32(40, samples.length * 2, true);      // subchunk2Size
+
+  let offset = 44;
+  for (let i = 0; i < samples.length; ++i) {
+    view.setInt16(offset, samples[i], true);
+    offset += 2;
+  }
+
+  return new Blob([view], {type: 'audio/wav'});
+}
+
+// this function is copied from
+// https://github.com/awslabs/aws-lex-browser-audio-capture/blob/master/lib/worker.js#L46
+function downsampleBuffer(buffer, exportSampleRate) {
+  if (exportSampleRate === recordSampleRate) {
+    return buffer;
+  }
+  var sampleRateRatio = recordSampleRate / exportSampleRate;
+  var newLength = Math.round(buffer.length / sampleRateRatio);
+  var result = new Float32Array(newLength);
+  var offsetResult = 0;
+  var offsetBuffer = 0;
+  while (offsetResult < result.length) {
+    var nextOffsetBuffer = Math.round((offsetResult + 1) * sampleRateRatio);
+    var accum = 0, count = 0;
+    for (var i = offsetBuffer; i < nextOffsetBuffer && i < buffer.length; i++) {
+      accum += buffer[i];
+      count++;
+    }
+    result[offsetResult] = accum / count;
+    offsetResult++;
+    offsetBuffer = nextOffsetBuffer;
+  }
+  return result;
+};
--- a/wasm/vad/assets/README.md 0 → 100644
查看文件 @1ef8a7a
+++ b/wasm/vad/assets/README.md 0 → 100644
查看文件 @1ef8a7a
+# Introduction
+
+Please download
+https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx
+and put `silero_vad.onnx` into the current directory, i.e., `wasm/vad/assets`.
--- a/wasm/vad/index.html 0 → 100644
查看文件 @1ef8a7a
+++ b/wasm/vad/index.html 0 → 100644
查看文件 @1ef8a7a
+<html lang="en">
+
+<head>
+  <meta charset="utf-8">
+  <meta name="viewport" content="width=device-width" />
+  <title>Next-gen Kaldi WebAssembly with sherpa-onnx for Text-to-speech</title>
+  <style>
+    h1,div {
+      text-align: center;
+    }
+    textarea {
+      width:100%;
+    }
+  </style>
+</head>
+
+<body>
+  <h1>
+    Next-gen Kaldi + WebAssembly<br/>
+    VAD Demo with <a href="https://github.com/k2-fsa/sherpa-onnx">sherpa-onnx</a><br/>
+    (with <a href="https://github.com/snakers4/silero-vad">silero-vad</a>)
+  </h1>
+
+  <div>
+    <span id="hint">Loading model ... ...</span>
+    <br/>
+    <br/>
+    <button id="startBtn" disabled>Start</button>
+    <button id="stopBtn" disabled>Stop</button>
+    <button id="clearBtn">Clear</button>
+    <br/>
+    <br/>
+    <textarea id="results" rows="10" readonly></textarea>
+  </div>
+
+  <section flex="1" overflow="auto" id="sound-clips">
+  </section>
+
+  <script src="sherpa-onnx-vad.js"></script>
+  <script src="app-vad.js"></script>
+  <script src="sherpa-onnx-wasm-main-vad.js"></script>
+</body>
--- a/wasm/vad/sherpa-onnx-vad.js 0 → 100644
查看文件 @1ef8a7a
+++ b/wasm/vad/sherpa-onnx-vad.js 0 → 100644
查看文件 @1ef8a7a
+function freeConfig(config, Module) {
+  if ('buffer' in config) {
+    Module._free(config.buffer);
+  }
+
+  if ('sileroVad' in config) {
+    freeConfig(config.sileroVad, Module)
+  }
+
+
+  Module._free(config.ptr);
+}
+
+// The user should free the returned pointers
+function initSherpaOnnxSileroVadModelConfig(config, Module) {
+  const modelLen = Module.lengthBytesUTF8(config.model || '') + 1;
+
+  const n = modelLen;
+
+  const buffer = Module._malloc(n);
+
+  const len = 5 * 4;
+  const ptr = Module._malloc(len);
+
+  Module.stringToUTF8(config.model || '', buffer, modelLen);
+
+  offset = 0;
+  Module.setValue(ptr, buffer, 'i8*');
+  offset += 4;
+
+  Module.setValue(ptr + offset, config.threshold || 0.5, 'float');
+  offset += 4;
+
+  Module.setValue(ptr + offset, config.minSilenceDuration || 0.5, 'float');
+  offset += 4;
+
+  Module.setValue(ptr + offset, config.minSpeechDuration || 0.25, 'float');
+  offset += 4;
+
+  Module.setValue(ptr + offset, config.windowSize || 512, 'i32');
+  offset += 4;
+
+  return {
+    buffer: buffer, ptr: ptr, len: len,
+  }
+}
+
+function initSherpaOnnxVadModelConfig(config, Module) {
+  if (!('sileroVad' in config)) {
+    config.sileroVad = {
+      model: '',
+      threshold: 0.50,
+      minSilenceDuration: 0.50,
+      minSpeechDuration: 0.25,
+      windowSize: 512,
+    };
+  }
+
+  const sileroVad =
+      initSherpaOnnxSileroVadModelConfig(config.sileroVad, Module);
+
+  const len = sileroVad.len + 4 * 4;
+  const ptr = Module._malloc(len);
+
+  const providerLen = Module.lengthBytesUTF8(config.provider || 'cpu') + 1;
+  const buffer = Module._malloc(providerLen);
+  Module.stringToUTF8(config.provider || 'cpu', buffer, providerLen);
+
+  let offset = 0;
+  Module._CopyHeap(sileroVad.ptr, sileroVad.len, ptr + offset);
+  offset += sileroVad.len;
+
+  Module.setValue(ptr + offset, config.sampleRate || 16000, 'i32');
+  offset += 4;
+
+  Module.setValue(ptr + offset, config.numThreads || 1, 'i32');
+  offset += 4;
+
+  Module.setValue(ptr + offset, buffer, 'i8*');  // provider
+  offset += 4;
+
+  Module.setValue(ptr + offset, config.debug || 0, 'i32');
+  offset += 4;
+
+  return {
+    buffer: buffer, ptr: ptr, len: len, sileroVad: sileroVad,
+  }
+}
+
+function createVad(Module, myConfig) {
+  const sileroVad = {
+    model: './silero_vad.onnx',
+    threshold: 0.50,
+    minSilenceDuration: 0.50,
+    minSpeechDuration: 0.25,
+    windowSize: 512,
+  };
+
+  let config = {
+    sileroVad: sileroVad,
+    sampleRate: 16000,
+    numThreads: 1,
+    provider: 'cpu',
+    debug: 1,
+    bufferSizeInSeconds: 30,
+  };
+
+  if (myConfig) {
+    config = myConfig;
+  }
+
+  return new Vad(config, Module);
+}
+
+
+class CircularBuffer {
+  constructor(capacity, Module) {
+    this.handle = Module._SherpaOnnxCreateCircularBuffer(capacity);
+    this.Module = Module;
+  }
+
+  free() {
+    this.Module._SherpaOnnxDestroyCircularBuffer(this.handle);
+    this.handle = 0
+  }
+
+  /**
+   * @param samples {Float32Array}
+   */
+  push(samples) {
+    const pointer =
+        this.Module._malloc(samples.length * samples.BYTES_PER_ELEMENT);
+    this.Module.HEAPF32.set(samples, pointer / samples.BYTES_PER_ELEMENT);
+    this.Module._SherpaOnnxCircularBufferPush(
+        this.handle, pointer, samples.length);
+    this.Module._free(pointer);
+  }
+
+  get(startIndex, n) {
+    const p =
+        this.Module._SherpaOnnxCircularBufferGet(this.handle, startIndex, n);
+
+    const samplesPtr = p / 4;
+    const samples = new Float32Array(n);
+    for (let i = 0; i < n; i++) {
+      samples[i] = this.Module.HEAPF32[samplesPtr + i];
+    }
+
+    this.Module._SherpaOnnxCircularBufferFree(p);
+
+    return samples;
+  }
+
+  pop(n) {
+    this.Module._SherpaOnnxCircularBufferPop(this.handle, n);
+  }
+
+  size() {
+    return this.Module._SherpaOnnxCircularBufferSize(this.handle);
+  }
+
+  head() {
+    return this.Module._SherpaOnnxCircularBufferHead(this.handle);
+  }
+
+  reset() {
+    this.Module._SherpaOnnxCircularBufferReset(this.handle);
+  }
+}
+
+class Vad {
+  constructor(configObj, Module) {
+    this.config = configObj;
+    const config = initSherpaOnnxVadModelConfig(configObj, Module);
+    Module._MyPrint(config.ptr);
+    const handle = Module._SherpaOnnxCreateVoiceActivityDetector(
+        config.ptr, configObj.bufferSizeInSeconds || 30);
+    freeConfig(config, Module);
+
+    this.handle = handle;
+    this.Module = Module;
+  }
+
+  free() {
+    this.Module._SherpaOnnxDestroyVoiceActivityDetector(this.handle);
+    this.handle = 0
+  }
+
+  // samples is a float32 array
+  acceptWaveform(samples) {
+    const pointer =
+        this.Module._malloc(samples.length * samples.BYTES_PER_ELEMENT);
+    this.Module.HEAPF32.set(samples, pointer / samples.BYTES_PER_ELEMENT);
+    this.Module._SherpaOnnxVoiceActivityDetectorAcceptWaveform(
+        this.handle, pointer, samples.length);
+    this.Module._free(pointer);
+  }
+
+  isEmpty() {
+    return this.Module._SherpaOnnxVoiceActivityDetectorEmpty(this.handle) == 1;
+  }
+
+  isDetected() {
+    return this.Module._SherpaOnnxVoiceActivityDetectorDetected(this.handle) ==
+        1;
+  }
+
+  pop() {
+    this.Module._SherpaOnnxVoiceActivityDetectorPop(this.handle);
+  }
+
+  clear() {
+    this.Module._SherpaOnnxVoiceActivityDetectorClear(this.handle);
+  }
+
+  /*
+{
+  samples: a 1-d float32 array,
+  start: an int32
+}
+   */
+  front() {
+    const h = this.Module._SherpaOnnxVoiceActivityDetectorFront(this.handle);
+
+    const start = this.Module.HEAP32[h / 4];
+    const samplesPtr = this.Module.HEAP32[h / 4 + 1] / 4;
+    const numSamples = this.Module.HEAP32[h / 4 + 2];
+
+    const samples = new Float32Array(numSamples);
+    for (let i = 0; i < numSamples; i++) {
+      samples[i] = this.Module.HEAPF32[samplesPtr + i];
+    }
+
+    this.Module._SherpaOnnxDestroySpeechSegment(h);
+    return {samples: samples, start: start};
+  }
+
+  reset() {
+    this.Module._SherpaOnnxVoiceActivityDetectorReset(this.handle);
+  }
+
+  flush() {
+    this.Module._SherpaOnnxVoiceActivityDetectorFlush(this.handle);
+  }
+};
+
+if (typeof process == 'object' && typeof process.versions == 'object' &&
+    typeof process.versions.node == 'string') {
+  module.exports = {
+    createVad,
+    CircularBuffer,
+  };
+}
--- a/wasm/vad/sherpa-onnx-wasm-main-vad.cc 0 → 100644
查看文件 @1ef8a7a
+++ b/wasm/vad/sherpa-onnx-wasm-main-vad.cc 0 → 100644
查看文件 @1ef8a7a
+// wasm/sherpa-onnx-wasm-main-vad.cc
+//
+// Copyright (c)  2024  Xiaomi Corporation
+#include <stdio.h>
+
+#include <algorithm>
+#include <memory>
+
+#include "sherpa-onnx/c-api/c-api.h"
+
+// see also
+// https://emscripten.org/docs/porting/connecting_cpp_and_javascript/Interacting-with-code.html
+
+extern "C" {
+
+static_assert(sizeof(SherpaOnnxSileroVadModelConfig) == 5 * 4, "");
+
+static_assert(sizeof(SherpaOnnxVadModelConfig) ==
+                  sizeof(SherpaOnnxSileroVadModelConfig) + 4 * 4,
+              "");
+void MyPrint(SherpaOnnxVadModelConfig *config) {
+  auto silero_vad = &config->silero_vad;
+
+  fprintf(stdout, "----------silero_vad config----------\n");
+  fprintf(stdout, "model: %s\n", silero_vad->model);
+  fprintf(stdout, "threshold: %.3f\n", silero_vad->threshold);
+  fprintf(stdout, "min_silence_duration: %.3f\n",
+          silero_vad->min_silence_duration);
+  fprintf(stdout, "min_speech_duration: %.3f\n",
+          silero_vad->min_speech_duration);
+  fprintf(stdout, "window_size: %d\n", silero_vad->window_size);
+
+  fprintf(stdout, "----------config----------\n");
+
+  fprintf(stdout, "sample_rate: %d\n", config->sample_rate);
+  fprintf(stdout, "num_threads: %d\n", config->num_threads);
+
+  fprintf(stdout, "provider: %s\n", config->provider);
+  fprintf(stdout, "debug: %d\n", config->debug);
+}
+
+void CopyHeap(const char *src, int32_t num_bytes, char *dst) {
+  std::copy(src, src + num_bytes, dst);
+}
+}