WebAssembly example for VAD + Non-streaming ASR (#1284)

Fangjun Kuang · GitHub
Commit 537e163dd012aec3b250af77697908d37759c057 537e163d 1 parent 1ef8a7a2
.github/workflows/wasm-simd-hf-space-de-tts.yaml
.github/workflows/wasm-simd-hf-space-en-asr-zipformer.yaml
.github/workflows/wasm-simd-hf-space-en-tts.yaml
.github/workflows/wasm-simd-hf-space-silero-vad.yaml
.github/workflows/wasm-simd-hf-space-vad-asr.yaml
.github/workflows/wasm-simd-hf-space-zh-cantonese-en-asr-paraformer.yaml
.github/workflows/wasm-simd-hf-space-zh-en-asr-paraformer.yaml
.github/workflows/wasm-simd-hf-space-zh-en-asr-zipformer.yaml
CMakeLists.txt
README.md
build-wasm-simd-vad-asr.sh
scripts/wasm/generate-vad-asr.py
scripts/wasm/run-vad-asr.sh.in
sherpa-onnx/c-api/c-api.cc
sherpa-onnx/c-api/c-api.h
wasm/CMakeLists.txt
wasm/asr/assets/README.md
wasm/asr/index.html
wasm/tts/assets/README.md
wasm/vad-asr/CMakeLists.txt
--- a/.github/workflows/wasm-simd-hf-space-de-tts.yaml
查看文件 @537e163
+++ b/.github/workflows/wasm-simd-hf-space-de-tts.yaml
查看文件 @537e163
@@ -25,8 +25,12 @@ jobs:
       - uses: actions/checkout@v4
         with:
           fetch-depth: 0
+ 
       - name: Install emsdk
         uses: mymindstorm/setup-emsdk@v14
+         with:
+           version: 3.1.51
+           actions-cache-folder: 'emsdk-cache'
 
       - name: View emsdk version
         shell: bash
--- a/.github/workflows/wasm-simd-hf-space-en-asr-zipformer.yaml
查看文件 @537e163
+++ b/.github/workflows/wasm-simd-hf-space-en-asr-zipformer.yaml
查看文件 @537e163
@@ -27,6 +27,9 @@ jobs:
           fetch-depth: 0
       - name: Install emsdk
         uses: mymindstorm/setup-emsdk@v14
+         with:
+           version: 3.1.51
+           actions-cache-folder: 'emsdk-cache'
 
       - name: View emsdk version
         shell: bash
--- a/.github/workflows/wasm-simd-hf-space-en-tts.yaml
查看文件 @537e163
+++ b/.github/workflows/wasm-simd-hf-space-en-tts.yaml
查看文件 @537e163
@@ -25,8 +25,12 @@ jobs:
       - uses: actions/checkout@v4
         with:
           fetch-depth: 0
+ 
       - name: Install emsdk
         uses: mymindstorm/setup-emsdk@v14
+         with:
+           version: 3.1.51
+           actions-cache-folder: 'emsdk-cache'
 
       - name: View emsdk version
         shell: bash
--- a/.github/workflows/wasm-simd-hf-space-silero-vad.yaml
查看文件 @537e163
+++ b/.github/workflows/wasm-simd-hf-space-silero-vad.yaml
查看文件 @537e163
@@ -25,6 +25,7 @@ jobs:
       - uses: actions/checkout@v4
         with:
           fetch-depth: 0
+ 
       - name: Install emsdk
         uses: mymindstorm/setup-emsdk@v14
         with:
--- a/.github/workflows/wasm-simd-hf-space-vad-asr.yaml 0 → 100644
查看文件 @537e163
+++ b/.github/workflows/wasm-simd-hf-space-vad-asr.yaml 0 → 100644
查看文件 @537e163
+ name: wasm-simd-hf-space-vad-asr
+ 
+ on:
+   push:
+     branches:
+       - wasm
+     tags:
+       - 'v[0-9]+.[0-9]+.[0-9]+*'
+ 
+   workflow_dispatch:
+ 
+ concurrency:
+   group: wasm-simd-hf-space-vad-asr${{ github.ref }}
+   cancel-in-progress: true
+ 
+ jobs:
+   wasm-simd-hf-space-vad-asr:
+     name: ${{ matrix.index }}/${{ matrix.total }}
+     runs-on: ${{ matrix.os }}
+     strategy:
+       fail-fast: false
+       matrix:
+         os: [ubuntu-latest]
+         total: ["8"]
+         index: ["0", "1", "2", "3", "4", "5", "6", "7"]
+ 
+     steps:
+       - uses: actions/checkout@v4
+         with:
+           fetch-depth: 0
+ 
+       - name: Install Python dependencies
+         shell: bash
+         run: |
+           python3 -m pip install --upgrade pip jinja2
+ 
+       - name: Install emsdk
+         uses: mymindstorm/setup-emsdk@v14
+         with:
+           version: 3.1.51
+           actions-cache-folder: 'emsdk-cache'
+ 
+       - name: View emsdk version
+         shell: bash
+         run: |
+           emcc -v
+           echo "--------------------"
+           emcc --check
+ 
+       - name: Generate build script
+         shell: bash
+         run: |
+           cd scripts/wasm
+ 
+           total=${{ matrix.total }}
+           index=${{ matrix.index }}
+ 
+           ./generate-vad-asr.py --total $total --index $index
+ 
+           chmod +x run-vad-asr.sh
+           mv -v ./run-vad-asr.sh ../..
+ 
+       - name: Show build scripts
+         shell: bash
+         run: |
+           cat ./run-vad-asr.sh
+ 
+       - uses: actions/upload-artifact@v4
+         with:
+           name: run-vad-asr-${{ matrix.index }}
+           path: ./run-vad-asr.sh
+ 
+       - name: Build sherpa-onnx for WebAssembly
+         shell: bash
+         env:
+           MS_TOKEN: ${{ secrets.MODEL_SCOPE_GIT_TOKEN }}
+           HF_TOKEN: ${{ secrets.HF_TOKEN }}
+         run: |
+           ./run-vad-asr.sh
+ 
+       - name: Release jar
+         if: (github.repository_owner == 'csukuangfj' || github.repository_owner == 'k2-fsa') && github.event_name == 'push' && contains(github.ref, 'refs/tags/')
+         uses: svenstaro/upload-release-action@v2
+         with:
+           file_glob: true
+           overwrite: true
+           file: ./*.tar.bz2
+ 
+       - name: Upload wasm files
+         uses: actions/upload-artifact@v4
+         with:
+           name: sherpa-onnx-wasm-simd-vad-asr-${{ matrix.index }}
+           path: ./sherpa-onnx-wasm-simd-*.tar.bz2
--- a/.github/workflows/wasm-simd-hf-space-zh-cantonese-en-asr-paraformer.yaml
查看文件 @537e163
+++ b/.github/workflows/wasm-simd-hf-space-zh-cantonese-en-asr-paraformer.yaml
查看文件 @537e163
@@ -25,8 +25,12 @@ jobs:
       - uses: actions/checkout@v4
         with:
           fetch-depth: 0
+ 
       - name: Install emsdk
         uses: mymindstorm/setup-emsdk@v14
+         with:
+           version: 3.1.51
+           actions-cache-folder: 'emsdk-cache'
 
       - name: View emsdk version
         shell: bash
--- a/.github/workflows/wasm-simd-hf-space-zh-en-asr-paraformer.yaml
查看文件 @537e163
+++ b/.github/workflows/wasm-simd-hf-space-zh-en-asr-paraformer.yaml
查看文件 @537e163
@@ -25,8 +25,12 @@ jobs:
       - uses: actions/checkout@v4
         with:
           fetch-depth: 0
+ 
       - name: Install emsdk
         uses: mymindstorm/setup-emsdk@v14
+         with:
+           version: 3.1.51
+           actions-cache-folder: 'emsdk-cache'
 
       - name: View emsdk version
         shell: bash
--- a/.github/workflows/wasm-simd-hf-space-zh-en-asr-zipformer.yaml
查看文件 @537e163
+++ b/.github/workflows/wasm-simd-hf-space-zh-en-asr-zipformer.yaml
查看文件 @537e163
@@ -25,8 +25,12 @@ jobs:
       - uses: actions/checkout@v4
         with:
           fetch-depth: 0
+ 
       - name: Install emsdk
         uses: mymindstorm/setup-emsdk@v14
+         with:
+           version: 3.1.51
+           actions-cache-folder: 'emsdk-cache'
 
       - name: View emsdk version
         shell: bash
--- a/CMakeLists.txt
查看文件 @537e163
+++ b/CMakeLists.txt
查看文件 @537e163
@@ -36,6 +36,7 @@ option(SHERPA_ONNX_ENABLE_WASM_TTS "Whether to enable WASM for TTS" OFF)
 option(SHERPA_ONNX_ENABLE_WASM_ASR "Whether to enable WASM for ASR" OFF)
 option(SHERPA_ONNX_ENABLE_WASM_KWS "Whether to enable WASM for KWS" OFF)
 option(SHERPA_ONNX_ENABLE_WASM_VAD "Whether to enable WASM for VAD" OFF)
+ option(SHERPA_ONNX_ENABLE_WASM_VAD_ASR "Whether to enable WASM for VAD+ASR" OFF)
 option(SHERPA_ONNX_ENABLE_WASM_NODEJS "Whether to enable WASM for NodeJS" OFF)
 option(SHERPA_ONNX_ENABLE_BINARY "Whether to build binaries" ON)
 option(SHERPA_ONNX_ENABLE_TTS "Whether to build TTS related code" ON)
@@ -137,6 +138,7 @@ message(STATUS "SHERPA_ONNX_ENABLE_WASM_TTS ${SHERPA_ONNX_ENABLE_WASM_TTS}")
 message(STATUS "SHERPA_ONNX_ENABLE_WASM_ASR ${SHERPA_ONNX_ENABLE_WASM_ASR}")
 message(STATUS "SHERPA_ONNX_ENABLE_WASM_KWS ${SHERPA_ONNX_ENABLE_WASM_KWS}")
 message(STATUS "SHERPA_ONNX_ENABLE_WASM_VAD ${SHERPA_ONNX_ENABLE_WASM_VAD}")
+ message(STATUS "SHERPA_ONNX_ENABLE_WASM_VAD_ASR ${SHERPA_ONNX_ENABLE_WASM_VAD_ASR}")
 message(STATUS "SHERPA_ONNX_ENABLE_WASM_NODEJS ${SHERPA_ONNX_ENABLE_WASM_NODEJS}")
 message(STATUS "SHERPA_ONNX_ENABLE_BINARY ${SHERPA_ONNX_ENABLE_BINARY}")
 message(STATUS "SHERPA_ONNX_ENABLE_TTS ${SHERPA_ONNX_ENABLE_TTS}")
@@ -211,11 +213,22 @@ if(SHERPA_ONNX_ENABLE_WASM)
 endif()
 
 if(SHERPA_ONNX_ENABLE_WASM_KWS)
+   if(NOT SHERPA_ONNX_ENABLE_WASM)
+     message(FATAL_ERROR "Please set SHERPA_ONNX_ENABLE_WASM to ON if you enable WASM for KWS")
+   endif()
   add_definitions(-DSHERPA_ONNX_ENABLE_WASM_KWS=1)
 endif()
 
 if(SHERPA_ONNX_ENABLE_WASM_VAD)
-   add_definitions(-DSHERPA_ONNX_ENABLE_WASM_VAD=1)
+   if(NOT SHERPA_ONNX_ENABLE_WASM)
+     message(FATAL_ERROR "Please set SHERPA_ONNX_ENABLE_WASM to ON if you enable WASM for VAD")
+   endif()
+ endif()
+ 
+ if(SHERPA_ONNX_ENABLE_WASM_VAD_ASR)
+   if(NOT SHERPA_ONNX_ENABLE_WASM)
+     message(FATAL_ERROR "Please set SHERPA_ONNX_ENABLE_WASM to ON if you enable WASM for VAD+ASR")
+   endif()
 endif()
 
 if(NOT CMAKE_CXX_STANDARD)
--- a/README.md
查看文件 @537e163
+++ b/README.md
查看文件 @537e163
@@ -15,7 +15,7 @@
 ### Supported platforms
 
 |Architecture| Android | iOS     | Windows    | macOS | linux |
- |------------|------------------|---------------|------------|-------|-------|
+ |------------|---------|---------|------------|-------|-------|
 |   x64      |  ✔️      |         |   ✔️        | ✔️     |  ✔️    |
 |   x86      |  ✔️      |         |   ✔️        |       |       |
 |   arm64    |  ✔️      | ✔️       |   ✔️        | ✔️     |  ✔️    |
@@ -37,7 +37,7 @@
 |-------|----------|----------|------------|
 | ✔️     |  ✔️       |   ✔️      |    ✔️       |
 
- For Rust support, please see https://github.com/thewh1teagle/sherpa-rs
+ For Rust support, please see [sherpa-rs][sherpa-rs]
 
 It also supports WebAssembly.
 
@@ -51,7 +51,7 @@ This repository supports running the following functions **locally**
   - Speaker verification
   - Spoken language identification
   - Audio tagging
-   - VAD (e.g., [silero-vad](https://github.com/snakers4/silero-vad))
+   - VAD (e.g., [silero-vad][silero-vad])
   - Keyword spotting
 
 on the following platforms and operating systems:
@@ -62,11 +62,12 @@ on the following platforms and operating systems:
   - iOS
   - NodeJS
   - WebAssembly
-   - [Raspberry Pi](https://www.raspberrypi.com/)
-   - [RV1126](https://www.rock-chips.com/uploads/pdf/2022.8.26/191/RV1126%20Brief%20Datasheet.pdf)
-   - [LicheePi4A](https://sipeed.com/licheepi4a)
-   - [VisionFive 2](https://www.starfivetech.com/en/site/boards)
-   - [旭日X3派](https://developer.horizon.ai/api/v1/fileData/documents_pi/index.html)
+   - [Raspberry Pi][Raspberry Pi]
+   - [RV1126][RV1126]
+   - [LicheePi4A][LicheePi4A]
+   - [VisionFive 2][VisionFive 2]
+   - [旭日X3派][旭日X3派]
+   - [爱芯派][爱芯派]
   - etc
 
 with the following APIs
@@ -82,58 +83,67 @@ You can visit the following Huggingface spaces to try `sherpa-onnx` without
 installing anything. All you need is a browser.
 
 | Description                                           | URL                                |
- |---|---|
- | Speech recognition | [Click me](https://huggingface.co/spaces/k2-fsa/automatic-speech-recognition)|
- | Speech recognition with [Whisper](https://github.com/openai/whisper)| [Click me](https://huggingface.co/spaces/k2-fsa/automatic-speech-recognition-with-whisper)|
- | Speech synthesis | [Click me](https://huggingface.co/spaces/k2-fsa/text-to-speech)|
- | Generate subtitles| [Click me](https://huggingface.co/spaces/k2-fsa/generate-subtitles-for-videos)|
- |Audio tagging| [Click me](https://huggingface.co/spaces/k2-fsa/audio-tagging)|
- |Spoken language identification with [Whisper](https://github.com/openai/whisper)|[Click me](https://huggingface.co/spaces/k2-fsa/spoken-language-identification)|
+ |-------------------------------------------------------|------------------------------------|
+ | Speech recognition                                    | [Click me][hf-space-asr]           |
+ | Speech recognition with [Whisper][Whisper]            | [Click me][hf-space-asr-whisper]   |
+ | Speech synthesis                                      | [Click me][hf-space-tts]           |
+ | Generate subtitles                                    | [Click me][hf-space-subtitle]      |
+ | Audio tagging                                         | [Click me][hf-space-audio-tagging] |
+ | Spoken language identification with [Whisper][Whisper]| [Click me][hf-space-slid-whisper]  |
 
 We also have spaces built using WebAssembly. The are listed below:
 
- | Description | URL| Chinese users|
- |---|---|---|
- |Voice activity detection with [silero-vad](https://github.com/snakers4/silero-vad)| [Click me](https://huggingface.co/spaces/k2-fsa/web-assembly-vad-sherpa-onnx)|[地址](https://modelscope.cn/studios/csukuangfj/web-assembly-vad-sherpa-onnx)|
- |Real-time speech recognition (Chinese + English) with Zipformer | [Click me](https://huggingface.co/spaces/k2-fsa/web-assembly-asr-sherpa-onnx-zh-en)|[地址](https://modelscope.cn/studios/k2-fsa/web-assembly-asr-sherpa-onnx-zh-en)|
- |Real-time speech recognition (Chinese + English) with Paraformer|[Click me](https://huggingface.co/spaces/k2-fsa/web-assembly-asr-sherpa-onnx-zh-en-paraformer)| [地址](https://modelscope.cn/studios/k2-fsa/web-assembly-asr-sherpa-onnx-zh-en-paraformer)|
- |Real-time speech recognition (Chinese + English + Cantonese) with Paraformer|[Click me](https://huggingface.co/spaces/k2-fsa/web-assembly-asr-sherpa-onnx-zh-cantonese-en-paraformer)| [地址](https://modelscope.cn/studios/k2-fsa/web-assembly-asr-sherpa-onnx-zh-cantonese-en-paraformer)|
- |Real-time speech recognition (English) |[Click me](https://huggingface.co/spaces/k2-fsa/web-assembly-asr-sherpa-onnx-en)|[地址](https://modelscope.cn/studios/k2-fsa/web-assembly-asr-sherpa-onnx-en)|
- |Speech synthesis (English) |[Click me](https://huggingface.co/spaces/k2-fsa/web-assembly-tts-sherpa-onnx-en)| [地址](https://modelscope.cn/studios/k2-fsa/web-assembly-tts-sherpa-onnx-en)|
- |Speech synthesis (German)|[Click me](https://huggingface.co/spaces/k2-fsa/web-assembly-tts-sherpa-onnx-de)| [地址](https://modelscope.cn/studios/k2-fsa/web-assembly-tts-sherpa-onnx-de)|
+ | Description                                                                              | Huggingface space| ModelScope space|
+ |------------------------------------------------------------------------------------------|------------------|-----------------|
+ |Voice activity detection with [silero-vad][silero-vad]                                    | [Click me][wasm-hf-vad]|[地址][wasm-ms-vad]|
+ |Real-time speech recognition (Chinese + English) with Zipformer                           | [Click me][wasm-hf-streaming-asr-zh-en-zipformer]|[地址][wasm-hf-streaming-asr-zh-en-zipformer]|
+ |Real-time speech recognition (Chinese + English) with Paraformer                          |[Click me][wasm-hf-streaming-asr-zh-en-paraformer]| [地址][wasm-ms-streaming-asr-zh-en-paraformer]|
+ |Real-time speech recognition (Chinese + English + Cantonese) with [Paraformer-large][Paraformer-large]|[Click me][wasm-hf-streaming-asr-zh-en-yue-paraformer]| [地址][wasm-ms-streaming-asr-zh-en-yue-paraformer]|
+ |Real-time speech recognition (English) |[Click me][wasm-hf-streaming-asr-en-zipformer]    |[地址][wasm-ms-streaming-asr-en-zipformer]|
+ |VAD + speech recognition (Chinese + English + Korean + Japanese + Cantonese) with [SenseVoice][SenseVoice]|[Click me][wasm-hf-vad-asr-zh-en-ko-ja-yue-sense-voice]| [地址][wasm-ms-vad-asr-zh-en-ko-ja-yue-sense-voice]|
+ |VAD + speech recognition (English) with [Whisper][Whisper] tiny.en|[Click me][wasm-hf-vad-asr-en-whisper-tiny-en]| [地址][wasm-ms-vad-asr-en-whisper-tiny-en]|
+ |VAD + speech recognition (English) with Zipformer trained with [GigaSpeech][GigaSpeech]    |[Click me][wasm-hf-vad-asr-en-zipformer-gigaspeech]| [地址][wasm-ms-vad-asr-en-zipformer-gigaspeech]|
+ |VAD + speech recognition (Chinese) with Zipformer trained with [WenetSpeech][WenetSpeech]  |[Click me][wasm-hf-vad-asr-zh-zipformer-wenetspeech]| [地址][wasm-ms-vad-asr-zh-zipformer-wenetspeech]|
+ |VAD + speech recognition (Japanese) with Zipformer trained with [ReazonSpeech][ReazonSpeech]|[Click me][wasm-hf-vad-asr-ja-zipformer-reazonspeech]| [地址][wasm-ms-vad-asr-ja-zipformer-reazonspeech]|
+ |VAD + speech recognition (Thai) with Zipformer trained with [GigaSpeech2][GigaSpeech2]      |[Click me][wasm-hf-vad-asr-th-zipformer-gigaspeech2]| [地址][wasm-ms-vad-asr-th-zipformer-gigaspeech2]|
+ |VAD + speech recognition (Chinese 多种方言) with a [TeleSpeech-ASR][TeleSpeech-ASR] CTC model|[Click me][wasm-hf-vad-asr-zh-telespeech]| [地址][wasm-ms-vad-asr-zh-telespeech]|
+ |VAD + speech recognition (English + Chinese, 及多种中文方言) with Paraformer-large          |[Click me][wasm-hf-vad-asr-zh-en-paraformer-large]| [地址][wasm-ms-vad-asr-zh-en-paraformer-large]|
+ |VAD + speech recognition (English + Chinese, 及多种中文方言) with Paraformer-small          |[Click me][wasm-hf-vad-asr-zh-en-paraformer-small]| [地址][wasm-ms-vad-asr-zh-en-paraformer-small]|
+ |Speech synthesis (English)                                                                  |[Click me][wasm-hf-tts-piper-en]| [地址][wasm-ms-tts-piper-en]|
+ |Speech synthesis (German)                                                                   |[Click me][wasm-hf-tts-piper-de]| [地址][wasm-ms-tts-piper-de]|
 
 ### Links for pre-built Android APKs
 
 | Description                            | URL                          | 中国用户                    |
- |--------------------------------|-----------------------------------------------------------------------------------------|--------------------------------------------------------------------------------------|
- | Streaming speech recognition             | [Address](https://k2-fsa.github.io/sherpa/onnx/android/apk.html)                        | [点此](https://k2-fsa.github.io/sherpa/onnx/android/apk-cn.html)                        |
- | Text-to-speech | [Address](https://k2-fsa.github.io/sherpa/onnx/tts/apk-engine.html)                     | [点此](https://k2-fsa.github.io/sherpa/onnx/tts/apk-engine-cn.html)                     |
- |Voice activity detection (VAD) | [Address](https://k2-fsa.github.io/sherpa/onnx/vad/apk.html) | [点此](https://k2-fsa.github.io/sherpa/onnx/vad/apk-cn.html)|
- |VAD + non-streaming speech recognition| [Address](https://k2-fsa.github.io/sherpa/onnx/vad/apk-asr.html)| [点此](https://k2-fsa.github.io/sherpa/onnx/vad/apk-asr-cn.html)|
- |Two-pass speech recognition| [Address](https://k2-fsa.github.io/sherpa/onnx/android/apk-2pass.html)| [点此](https://k2-fsa.github.io/sherpa/onnx/android/apk-2pass-cn.html)|
- | Audio tagging                  | [Address](https://k2-fsa.github.io/sherpa/onnx/audio-tagging/apk.html)                  | [点此](https://k2-fsa.github.io/sherpa/onnx/audio-tagging/apk-cn.html)                  |
- | Audio tagging (WearOS)         | [Address](https://k2-fsa.github.io/sherpa/onnx/audio-tagging/apk-wearos.html)           | [点此](https://k2-fsa.github.io/sherpa/onnx/audio-tagging/apk-wearos-cn.html)           |
- | Speaker identification         | [Address](https://k2-fsa.github.io/sherpa/onnx/speaker-identification/apk.html)         | [点此](https://k2-fsa.github.io/sherpa/onnx/speaker-identification/apk-cn.html)         |
- | Spoken language identification | [Address](https://k2-fsa.github.io/sherpa/onnx/spoken-language-identification/apk.html) | [点此](https://k2-fsa.github.io/sherpa/onnx/spoken-language-identification/apk-cn.html) |
- |Keyword spotting| [Address](https://k2-fsa.github.io/sherpa/onnx/kws/apk.html)| [点此](https://k2-fsa.github.io/sherpa/onnx/kws/apk-cn.html)|
+ |----------------------------------------|------------------------------|-----------------------------|
+ | Streaming speech recognition           | [Address][apk-streaming-asr] | [点此][apk-streaming-asr-cn]|
+ | Text-to-speech                         | [Address][apk-tts]           | [点此][apk-tts-cn]          |
+ | Voice activity detection (VAD)         | [Address][apk-vad]           | [点此][apk-vad-cn]          |
+ | VAD + non-streaming speech recognition | [Address][apk-vad-asr]       | [点此][apk-vad-asr-cn]      |
+ | Two-pass speech recognition            | [Address][apk-2pass]         | [点此][apk-2pass-cn]        |
+ | Audio tagging                          | [Address][apk-at]            | [点此][apk-at-cn]           |
+ | Audio tagging (WearOS)                 | [Address][apk-at-wearos]     | [点此][apk-at-wearos-cn]    |
+ | Speaker identification                 | [Address][apk-sid]           | [点此][apk-sid-cn]          |
+ | Spoken language identification         | [Address][apk-slid]          | [点此][apk-slid-cn]         |
+ | Keyword spotting                       | [Address][apk-kws]           | [点此][apk-kws-cn]          |
 
 ### Links for pre-built Flutter APPs
 
 #### Real-time speech recognition
 
 | Description                    | URL                                 | 中国用户                            |
- |--------------------------------|---------------------------------------------------------------------|---------------------------------------------------------------------|
- | Streaming speech recognition   | [Address](https://k2-fsa.github.io/sherpa/onnx/flutter/asr/app.html)| [点此](https://k2-fsa.github.io/sherpa/onnx/flutter/asr/app-cn.html)|
+ |--------------------------------|-------------------------------------|-------------------------------------|
+ | Streaming speech recognition   | [Address][apk-flutter-streaming-asr]| [点此][apk-flutter-streaming-asr-cn]|
 
 #### Text-to-speech
 
 | Description                              | URL                                | 中国用户                           |
- |--------------------------------|--------------------------------------------------------------|-----------------------------------------------------------------------------|
- | Android (arm64-v8a, armeabi-v7a, x86_64) | [Address](https://k2-fsa.github.io/sherpa/onnx/flutter/tts-android.html) | [点此](https://k2-fsa.github.io/sherpa/onnx/flutter/tts-android-cn.html)|
- | Linux (x64)    | [Address](https://k2-fsa.github.io/sherpa/onnx/flutter/tts-linux.html)       | [点此](https://k2-fsa.github.io/sherpa/onnx/flutter/tts-linux-cn.html)      |
- | macOS (x64)    | [Address](https://k2-fsa.github.io/sherpa/onnx/flutter/tts-macos-x64.html)   | [点此](https://k2-fsa.github.io/sherpa/onnx/flutter/tts-macos-x64-cn.html)  |
- | macOS (arm64)  | [Address](https://k2-fsa.github.io/sherpa/onnx/flutter/tts-macos-arm64.html) | [点此](https://k2-fsa.github.io/sherpa/onnx/flutter/tts-macos-arm64-cn.html)|
- | Windows (x64)  | [Address](https://k2-fsa.github.io/sherpa/onnx/flutter/tts-win.html)         | [点此](https://k2-fsa.github.io/sherpa/onnx/flutter/tts-win-cn.html)        |
+ |------------------------------------------|------------------------------------|------------------------------------|
+ | Android (arm64-v8a, armeabi-v7a, x86_64) | [Address][flutter-tts-android]     | [点此][flutter-tts-android-cn]     |
+ | Linux (x64)                              | [Address][flutter-tts-linux]       | [点此][flutter-tts-linux-cn]       |
+ | macOS (x64)                              | [Address][flutter-tts-macos-x64]   | [点此][flutter-tts-macos-arm64-cn] |
+ | macOS (arm64)                            | [Address][flutter-tts-macos-arm64] | [点此][flutter-tts-macos-x64-cn]   |
+ | Windows (x64)                            | [Address][flutter-tts-win-x64]     | [点此][flutter-tts-win-x64-cn]     |
 
 > Note: You need to build from source for iOS.
 
@@ -142,22 +152,22 @@ We also have spaces built using WebAssembly. The are listed below:
 #### Generating subtitles
 
 | Description                    | URL                        | 中国用户                   |
- |--------------------------------|---------------------------------------------------------------------|---------------------------------------------------------------------|
- | Generate subtitles (生成字幕)   | [Address](https://k2-fsa.github.io/sherpa/onnx/lazarus/download-generated-subtitles.html)| [点此](https://k2-fsa.github.io/sherpa/onnx/lazarus/download-generated-subtitles-cn.html)|
+ |--------------------------------|----------------------------|----------------------------|
+ | Generate subtitles (生成字幕)  | [Address][lazarus-subtitle]| [点此][lazarus-subtitle-cn]|
 
 
 ### Links for pre-trained models
 
 | Description                                 | URL                                                                                   |
- |--------------------------------|--------------------------------------------------------------------------------------------------------------------------------|
- | Speech recognition (speech to text, ASR)             | [Address](https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models)              |
- | Text-to-speech (TTS)                 | [Address](https://github.com/k2-fsa/sherpa-onnx/releases/tag/tts-models)                             |
- | VAD | [Address](https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx)|
- | Keyword spotting |[Address](https://github.com/k2-fsa/sherpa-onnx/releases/tag/kws-models)|
- | Audio tagging                  | [Address](https://github.com/k2-fsa/sherpa-onnx/releases/tag/audio-tagging-models)|
- | Speaker identification (Speaker ID)         | [Address](https://github.com/k2-fsa/sherpa-onnx/releases/tag/speaker-recongition-models)|
- | Spoken language identification (Language ID) | See multi-lingual [Whisper](https://github.com/openai/whisper) ASR models from  [Speech recognition](https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models) |
- | Punctuation| [Address](https://github.com/k2-fsa/sherpa-onnx/releases/tag/punctuation-models)|
+ |---------------------------------------------|---------------------------------------------------------------------------------------|
+ | Speech recognition (speech to text, ASR)    | [Address][asr-models]                                                                 |
+ | Text-to-speech (TTS)                        | [Address][tts-models]                                                                 |
+ | VAD                                         | [Address][vad-models]                                                                 |
+ | Keyword spotting                            | [Address][kws-models]                                                                 |
+ | Audio tagging                               | [Address][at-models]                                                                  |
+ | Speaker identification (Speaker ID)         | [Address][sid-models]                                                                 |
+ | Spoken language identification (Language ID)| See multi-lingual [Whisper][Whisper] ASR models from  [Speech recognition][asr-models]|
+ | Punctuation                                 | [Address][punct-models]                                                               |
 
 ### Useful links
 
@@ -169,3 +179,100 @@ We also have spaces built using WebAssembly. The are listed below:
 Please see
 https://k2-fsa.github.io/sherpa/social-groups.html
 for 新一代 Kaldi **微信交流群** and **QQ 交流群**.
+ 
+ [sherpa-rs]: https://github.com/thewh1teagle/sherpa-rs
+ [silero-vad]: https://github.com/snakers4/silero-vad
+ [Raspberry Pi]: https://www.raspberrypi.com/
+ [RV1126]: https://www.rock-chips.com/uploads/pdf/2022.8.26/191/RV1126%20Brief%20Datasheet.pdf
+ [LicheePi4A]: https://sipeed.com/licheepi4a
+ [VisionFive 2]: https://www.starfivetech.com/en/site/boards
+ [旭日X3派]: https://developer.horizon.ai/api/v1/fileData/documents_pi/index.html
+ [爱芯派]: https://wiki.sipeed.com/hardware/zh/maixIII/ax-pi/axpi.html
+ [hf-space-asr]: https://huggingface.co/spaces/k2-fsa/automatic-speech-recognition
+ [Whisper]: https://github.com/openai/whisper
+ [hf-space-asr-whisper]: https://huggingface.co/spaces/k2-fsa/automatic-speech-recognition-with-whisper
+ [hf-space-tts]: https://huggingface.co/spaces/k2-fsa/text-to-speech
+ [hf-space-subtitle]: https://huggingface.co/spaces/k2-fsa/generate-subtitles-for-videos
+ [hf-space-audio-tagging]: https://huggingface.co/spaces/k2-fsa/audio-tagging
+ [hf-space-slid-whisper]: https://huggingface.co/spaces/k2-fsa/spoken-language-identification
+ [wasm-hf-vad]: https://huggingface.co/spaces/k2-fsa/web-assembly-vad-sherpa-onnx
+ [wasm-ms-vad]: https://modelscope.cn/studios/csukuangfj/web-assembly-vad-sherpa-onnx
+ [wasm-hf-streaming-asr-zh-en-zipformer]: https://huggingface.co/spaces/k2-fsa/web-assembly-asr-sherpa-onnx-zh-en
+ [wasm-ms-streaming-asr-zh-en-zipformer]: https://modelscope.cn/studios/k2-fsa/web-assembly-asr-sherpa-onnx-zh-en
+ [wasm-hf-streaming-asr-zh-en-paraformer]: https://huggingface.co/spaces/k2-fsa/web-assembly-asr-sherpa-onnx-zh-en-paraformer
+ [wasm-ms-streaming-asr-zh-en-paraformer]: https://modelscope.cn/studios/k2-fsa/web-assembly-asr-sherpa-onnx-zh-en-paraformer
+ [Paraformer-large]: https://www.modelscope.cn/models/damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch/summary
+ [wasm-hf-streaming-asr-zh-en-yue-paraformer]: https://huggingface.co/spaces/k2-fsa/web-assembly-asr-sherpa-onnx-zh-cantonese-en-paraformer
+ [wasm-ms-streaming-asr-zh-en-yue-paraformer]: https://modelscope.cn/studios/k2-fsa/web-assembly-asr-sherpa-onnx-zh-cantonese-en-paraformer
+ [wasm-hf-streaming-asr-en-zipformer]: https://huggingface.co/spaces/k2-fsa/web-assembly-asr-sherpa-onnx-en
+ [wasm-ms-streaming-asr-en-zipformer]: https://modelscope.cn/studios/k2-fsa/web-assembly-asr-sherpa-onnx-en
+ [SenseVoice]: https://github.com/FunAudioLLM/SenseVoice
+ [wasm-hf-vad-asr-zh-en-ko-ja-yue-sense-voice]: https://huggingface.co/spaces/k2-fsa/web-assembly-vad-asr-sherpa-onnx-zh-en-ja-ko-cantonese-sense-voice
+ [wasm-ms-vad-asr-zh-en-ko-ja-yue-sense-voice]: https://www.modelscope.cn/studios/csukuangfj/web-assembly-vad-asr-sherpa-onnx-zh-en-jp-ko-cantonese-sense-voice
+ [wasm-hf-vad-asr-en-whisper-tiny-en]: https://huggingface.co/spaces/k2-fsa/web-assembly-vad-asr-sherpa-onnx-en-whisper-tiny
+ [wasm-ms-vad-asr-en-whisper-tiny-en]: https://www.modelscope.cn/studios/csukuangfj/web-assembly-vad-asr-sherpa-onnx-en-whisper-tiny
+ [wasm-hf-vad-asr-en-zipformer-gigaspeech]: https://huggingface.co/spaces/k2-fsa/web-assembly-vad-asr-sherpa-onnx-en-zipformer-gigaspeech
+ [wasm-ms-vad-asr-en-zipformer-gigaspeech]: https://www.modelscope.cn/studios/k2-fsa/web-assembly-vad-asr-sherpa-onnx-en-zipformer-gigaspeech
+ [wasm-hf-vad-asr-zh-zipformer-wenetspeech]: https://huggingface.co/spaces/k2-fsa/web-assembly-vad-asr-sherpa-onnx-zh-zipformer-wenetspeech
+ [wasm-ms-vad-asr-zh-zipformer-wenetspeech]: https://www.modelscope.cn/studios/k2-fsa/web-assembly-vad-asr-sherpa-onnx-zh-zipformer-wenetspeech
+ [ReazonSpeech]: https://research.reazon.jp/_static/reazonspeech_nlp2023.pdf
+ [wasm-hf-vad-asr-ja-zipformer-reazonspeech]: https://huggingface.co/spaces/k2-fsa/web-assembly-vad-asr-sherpa-onnx-ja-zipformer
+ [wasm-ms-vad-asr-ja-zipformer-reazonspeech]: https://www.modelscope.cn/studios/csukuangfj/web-assembly-vad-asr-sherpa-onnx-ja-zipformer
+ [GigaSpeech2]: https://github.com/SpeechColab/GigaSpeech2
+ [wasm-hf-vad-asr-th-zipformer-gigaspeech2]: https://huggingface.co/spaces/k2-fsa/web-assembly-vad-asr-sherpa-onnx-th-zipformer
+ [wasm-ms-vad-asr-th-zipformer-gigaspeech2]: https://www.modelscope.cn/studios/csukuangfj/web-assembly-vad-asr-sherpa-onnx-th-zipformer
+ [TeleSpeech-ASR]: https://github.com/Tele-AI/TeleSpeech-ASR
+ [wasm-hf-vad-asr-zh-telespeech]: https://huggingface.co/spaces/k2-fsa/web-assembly-vad-asr-sherpa-onnx-zh-telespeech
+ [wasm-ms-vad-asr-zh-telespeech]: https://www.modelscope.cn/studios/k2-fsa/web-assembly-vad-asr-sherpa-onnx-zh-telespeech
+ [wasm-hf-vad-asr-zh-en-paraformer-large]: https://huggingface.co/spaces/k2-fsa/web-assembly-vad-asr-sherpa-onnx-zh-en-paraformer
+ [wasm-ms-vad-asr-zh-en-paraformer-large]: https://www.modelscope.cn/studios/k2-fsa/web-assembly-vad-asr-sherpa-onnx-zh-en-paraformer
+ [wasm-hf-vad-asr-zh-en-paraformer-small]: https://huggingface.co/spaces/k2-fsa/web-assembly-vad-asr-sherpa-onnx-zh-en-paraformer-small
+ [wasm-ms-vad-asr-zh-en-paraformer-small]: https://www.modelscope.cn/studios/k2-fsa/web-assembly-vad-asr-sherpa-onnx-zh-en-paraformer-small
+ [wasm-hf-tts-piper-en]: https://huggingface.co/spaces/k2-fsa/web-assembly-tts-sherpa-onnx-en
+ [wasm-ms-tts-piper-en]: https://modelscope.cn/studios/k2-fsa/web-assembly-tts-sherpa-onnx-en
+ [wasm-hf-tts-piper-de]: https://huggingface.co/spaces/k2-fsa/web-assembly-tts-sherpa-onnx-de
+ [wasm-ms-tts-piper-de]: https://modelscope.cn/studios/k2-fsa/web-assembly-tts-sherpa-onnx-de
+ [apk-streaming-asr]: https://k2-fsa.github.io/sherpa/onnx/android/apk.html
+ [apk-streaming-asr-cn]: https://k2-fsa.github.io/sherpa/onnx/android/apk-cn.html
+ [apk-tts]: https://k2-fsa.github.io/sherpa/onnx/tts/apk-engine.html
+ [apk-tts-cn]: https://k2-fsa.github.io/sherpa/onnx/tts/apk-engine-cn.html
+ [apk-vad]: https://k2-fsa.github.io/sherpa/onnx/vad/apk.html
+ [apk-vad-cn]: https://k2-fsa.github.io/sherpa/onnx/vad/apk-cn.html
+ [apk-vad-asr]: https://k2-fsa.github.io/sherpa/onnx/vad/apk-asr.html
+ [apk-vad-asr-cn]: https://k2-fsa.github.io/sherpa/onnx/vad/apk-asr-cn.html
+ [apk-2pass]: https://k2-fsa.github.io/sherpa/onnx/android/apk-2pass.html
+ [apk-2pass-cn]: https://k2-fsa.github.io/sherpa/onnx/android/apk-2pass-cn.html
+ [apk-at]: https://k2-fsa.github.io/sherpa/onnx/audio-tagging/apk.html
+ [apk-at-cn]: https://k2-fsa.github.io/sherpa/onnx/audio-tagging/apk-cn.html
+ [apk-at-wearos]: https://k2-fsa.github.io/sherpa/onnx/audio-tagging/apk-wearos.html
+ [apk-at-wearos-cn]: https://k2-fsa.github.io/sherpa/onnx/audio-tagging/apk-wearos-cn.html
+ [apk-sid]: https://k2-fsa.github.io/sherpa/onnx/speaker-identification/apk.html
+ [apk-sid-cn]: https://k2-fsa.github.io/sherpa/onnx/speaker-identification/apk-cn.html
+ [apk-slid]: https://k2-fsa.github.io/sherpa/onnx/spoken-language-identification/apk.html
+ [apk-slid-cn]: https://k2-fsa.github.io/sherpa/onnx/spoken-language-identification/apk-cn.html
+ [apk-kws]: https://k2-fsa.github.io/sherpa/onnx/kws/apk.html
+ [apk-kws-cn]: https://k2-fsa.github.io/sherpa/onnx/kws/apk-cn.html
+ [apk-flutter-streaming-asr]: https://k2-fsa.github.io/sherpa/onnx/flutter/asr/app.html
+ [apk-flutter-streaming-asr-cn]: https://k2-fsa.github.io/sherpa/onnx/flutter/asr/app-cn.html
+ [flutter-tts-android]: https://k2-fsa.github.io/sherpa/onnx/flutter/tts-android.html
+ [flutter-tts-android-cn]: https://k2-fsa.github.io/sherpa/onnx/flutter/tts-android-cn.html
+ [flutter-tts-linux]: https://k2-fsa.github.io/sherpa/onnx/flutter/tts-linux.html
+ [flutter-tts-linux-cn]: https://k2-fsa.github.io/sherpa/onnx/flutter/tts-linux-cn.html
+ [flutter-tts-macos-x64]: https://k2-fsa.github.io/sherpa/onnx/flutter/tts-macos-x64.html
+ [flutter-tts-macos-arm64-cn]: https://k2-fsa.github.io/sherpa/onnx/flutter/tts-macos-x64-cn.html
+ [flutter-tts-macos-arm64]: https://k2-fsa.github.io/sherpa/onnx/flutter/tts-macos-arm64.html
+ [flutter-tts-macos-x64-cn]: https://k2-fsa.github.io/sherpa/onnx/flutter/tts-macos-arm64-cn.html
+ [flutter-tts-win-x64]: https://k2-fsa.github.io/sherpa/onnx/flutter/tts-win.html
+ [flutter-tts-win-x64-cn]: https://k2-fsa.github.io/sherpa/onnx/flutter/tts-win-cn.html
+ [lazarus-subtitle]: https://k2-fsa.github.io/sherpa/onnx/lazarus/download-generated-subtitles.html
+ [lazarus-subtitle-cn]: https://k2-fsa.github.io/sherpa/onnx/lazarus/download-generated-subtitles-cn.html
+ [asr-models]: https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
+ [tts-models]: https://github.com/k2-fsa/sherpa-onnx/releases/tag/tts-models
+ [vad-models]: https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx
+ [kws-models]: https://github.com/k2-fsa/sherpa-onnx/releases/tag/kws-models
+ [at-models]: https://github.com/k2-fsa/sherpa-onnx/releases/tag/audio-tagging-models
+ [sid-models]: https://github.com/k2-fsa/sherpa-onnx/releases/tag/speaker-recongition-models
+ [slid-models]: https://github.com/k2-fsa/sherpa-onnx/releases/tag/speaker-recongition-models
+ [punct-models]: https://github.com/k2-fsa/sherpa-onnx/releases/tag/punctuation-models
+ [GigaSpeech]: https://github.com/SpeechColab/GigaSpeech
+ [WenetSpeech]: https://github.com/wenet-e2e/WenetSpeech
--- a/build-wasm-simd-vad-asr.sh 0 → 100755
查看文件 @537e163
+++ b/build-wasm-simd-vad-asr.sh 0 → 100755
查看文件 @537e163
+ #!/usr/bin/env bash
+ # Copyright (c)  2024  Xiaomi Corporation
+ #
+ # This script is to build sherpa-onnx for WebAssembly (VAD+ASR)
+ # Note: ASR here means non-streaming ASR
+ 
+ set -ex
+ 
+ if [ x"$EMSCRIPTEN" == x"" ]; then
+   if ! command -v emcc &> /dev/null; then
+     echo "Please install emscripten first"
+     echo ""
+     echo "You can use the following commands to install it:"
+     echo ""
+     echo "git clone https://github.com/emscripten-core/emsdk.git"
+     echo "cd emsdk"
+     echo "git pull"
+     echo "./emsdk install latest"
+     echo "./emsdk activate latest"
+     echo "source ./emsdk_env.sh"
+     exit 1
+   else
+     EMSCRIPTEN=$(dirname $(realpath $(which emcc)))
+   fi
+ fi
+ 
+ export EMSCRIPTEN=$EMSCRIPTEN
+ echo "EMSCRIPTEN: $EMSCRIPTEN"
+ if [ ! -f $EMSCRIPTEN/cmake/Modules/Platform/Emscripten.cmake ]; then
+   echo "Cannot find $EMSCRIPTEN/cmake/Modules/Platform/Emscripten.cmake"
+   echo "Please make sure you have installed emsdk correctly"
+   exit 1
+ fi
+ 
+ mkdir -p build-wasm-simd-vad-asr
+ pushd build-wasm-simd-vad-asr
+ 
+ export SHERPA_ONNX_IS_USING_BUILD_WASM_SH=ON
+ 
+ cmake \
+   -DCMAKE_INSTALL_PREFIX=./install \
+   -DCMAKE_BUILD_TYPE=Release \
+   -DCMAKE_TOOLCHAIN_FILE=$EMSCRIPTEN/cmake/Modules/Platform/Emscripten.cmake \
+   \
+   -DSHERPA_ONNX_ENABLE_PYTHON=OFF \
+   -DSHERPA_ONNX_ENABLE_TESTS=OFF \
+   -DSHERPA_ONNX_ENABLE_CHECK=OFF \
+   -DBUILD_SHARED_LIBS=OFF \
+   -DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \
+   -DSHERPA_ONNX_ENABLE_JNI=OFF \
+   -DSHERPA_ONNX_ENABLE_TTS=OFF \
+   -DSHERPA_ONNX_ENABLE_C_API=ON \
+   -DSHERPA_ONNX_ENABLE_WEBSOCKET=OFF \
+   -DSHERPA_ONNX_ENABLE_GPU=OFF \
+   -DSHERPA_ONNX_ENABLE_WASM=ON \
+   -DSHERPA_ONNX_ENABLE_WASM_VAD_ASR=ON \
+   -DSHERPA_ONNX_ENABLE_BINARY=OFF \
+   -DSHERPA_ONNX_LINK_LIBSTDCPP_STATICALLY=OFF \
+   ..
+ make -j2
+ make install
+ 
+ echo "pwd: $PWD"
+ 
+ cp -fv ../wasm/vad/sherpa-onnx-vad.js ./install/bin/wasm/vad-asr/
+ cp -fv ../wasm/asr/sherpa-onnx-asr.js ./install/bin/wasm/vad-asr/
+ 
+ ls -lh install/bin/wasm/vad-asr
--- a/scripts/wasm/generate-vad-asr.py 0 → 100755
查看文件 @537e163
+++ b/scripts/wasm/generate-vad-asr.py 0 → 100755
查看文件 @537e163
+ #!/usr/bin/env python3
+ 
+ import argparse
+ from dataclasses import dataclass
+ from typing import List, Optional
+ 
+ import jinja2
+ 
+ 
+ def get_args():
+     parser = argparse.ArgumentParser()
+     parser.add_argument(
+         "--total",
+         type=int,
+         default=1,
+         help="Number of runners",
+     )
+     parser.add_argument(
+         "--index",
+         type=int,
+         default=0,
+         help="Index of the current runner",
+     )
+     return parser.parse_args()
+ 
+ 
+ @dataclass
+ class Model:
+     model_name: str
+     hf: str  # huggingface space name
+     ms: str  # modelscope space name
+     short_name: str
+     cmd: str = ""
+ 
+ 
+ def get_models():
+     models = [
+         Model(
+             model_name="sherpa-onnx-whisper-tiny.en",
+             hf="k2-fsa/web-assembly-vad-asr-sherpa-onnx-en-whisper-tiny",
+             ms="csukuangfj/web-assembly-vad-asr-sherpa-onnx-en-whisper-tiny",
+             short_name="vad-asr-en-whisper_tiny",
+             cmd="""
+             pushd $model_name
+             mv -v tiny.en-encoder.int8.onnx ../whisper-encoder.onnx
+             mv -v tiny.en-decoder.int8.onnx ../whisper-decoder.onnx
+             mv -v tiny.en-tokens.txt ../tokens.txt
+             popd
+             rm -rf $model_name
+             sed -i.bak 's/Zipformer/Whisper tiny.en supporting English 英文/g' ../index.html
+             git diff
+             """,
+         ),
+         Model(
+             model_name="sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17",
+             hf="k2-fsa/web-assembly-vad-asr-sherpa-onnx-zh-en-ja-ko-cantonese-sense-voice",
+             ms="csukuangfj/web-assembly-vad-asr-sherpa-onnx-zh-en-jp-ko-cantonese-sense-voice",
+             short_name="vad-asr-zh_en_ja_ko_cantonese-sense_voice_small",
+             cmd="""
+             pushd $model_name
+             mv -v model.int8.onnx ../sense-voice.onnx
+             mv -v tokens.txt ../
+             popd
+             rm -rf $model_name
+             sed -i.bak 's/Zipformer/SenseVoice Small supporting English, Chinese, Japanese, Korean, Cantonese 中英日韩粤/g' ../index.html
+             git diff
+             """,
+         ),
+         Model(
+             model_name="sherpa-onnx-paraformer-zh-2023-09-14",
+             hf="k2-fsa/web-assembly-vad-asr-sherpa-onnx-zh-en-paraformer",
+             ms="k2-fsa/web-assembly-vad-asr-sherpa-onnx-zh-en-paraformer",
+             short_name="vad-asr-zh_en-paraformer_large",
+             cmd="""
+             pushd $model_name
+             mv -v model.int8.onnx ../paraformer.onnx
+             mv -v tokens.txt ../
+             popd
+             rm -rf $model_name
+             sed -i.bak 's/Zipformer/Paraformer supporting Chinese, English 中英/g' ../index.html
+             git diff
+             """,
+         ),
+         Model(
+             model_name="sherpa-onnx-paraformer-zh-small-2024-03-09",
+             hf="k2-fsa/web-assembly-vad-asr-sherpa-onnx-zh-en-paraformer-small",
+             ms="k2-fsa/web-assembly-vad-asr-sherpa-onnx-zh-en-paraformer-small",
+             short_name="vad-asr-zh_en-paraformer_small",
+             cmd="""
+             pushd $model_name
+             mv -v model.int8.onnx ../paraformer.onnx
+             mv -v tokens.txt ../
+             popd
+             rm -rf $model_name
+             sed -i.bak 's/Zipformer/Paraformer-small supporting Chinese, English 中英文/g' ../index.html
+             git diff
+             """,
+         ),
+         Model(
+             model_name="sherpa-onnx-zipformer-gigaspeech-2023-12-12",
+             hf="k2-fsa/web-assembly-vad-asr-sherpa-onnx-en-zipformer-gigaspeech",
+             ms="k2-fsa/web-assembly-vad-asr-sherpa-onnx-en-zipformer-gigaspeech",
+             short_name="vad-asr-en-zipformer_gigaspeech",
+             cmd="""
+             pushd $model_name
+             mv encoder-epoch-30-avg-1.int8.onnx ../transducer-encoder.onnx
+             mv decoder-epoch-30-avg-1.onnx ../transducer-decoder.onnx
+             mv joiner-epoch-30-avg-1.int8.onnx ../transducer-joiner.onnx
+             mv tokens.txt ../
+             popd
+             rm -rf $model_name
+             sed -i.bak 's/Zipformer/Zipformer supporting English 英语/g' ../index.html
+             git diff
+             """,
+         ),
+         Model(
+             model_name="icefall-asr-zipformer-wenetspeech-20230615",
+             hf="k2-fsa/web-assembly-vad-asr-sherpa-onnx-zh-zipformer-wenetspeech",
+             ms="k2-fsa/web-assembly-vad-asr-sherpa-onnx-zh-zipformer-wenetspeech",
+             short_name="vad-asr-zh-zipformer_wenetspeech",
+             cmd="""
+             pushd $model_name
+             mv -v data/lang_char/tokens.txt ../
+             mv -v exp/encoder-epoch-12-avg-4.int8.onnx ../transducer-encoder.onnx
+             mv -v exp/decoder-epoch-12-avg-4.onnx ../transducer-decoder.onnx
+             mv -v exp/joiner-epoch-12-avg-4.int8.onnx ../transducer-joiner.onnx
+             popd
+             rm -rf $model_name
+             sed -i.bak 's/Zipformer/Zipformer supporting Chinese 中文/g' ../index.html
+             git diff
+             """,
+         ),
+         Model(
+             model_name="sherpa-onnx-zipformer-ja-reazonspeech-2024-08-01",
+             hf="k2-fsa/web-assembly-vad-asr-sherpa-onnx-ja-zipformer",
+             ms="csukuangfj/web-assembly-vad-asr-sherpa-onnx-ja-zipformer",
+             short_name="vad-asr-ja-zipformer_reazonspeech",
+             cmd="""
+             pushd $model_name
+             mv encoder-epoch-99-avg-1.int8.onnx ../transducer-encoder.onnx
+             mv decoder-epoch-99-avg-1.onnx ../transducer-decoder.onnx
+             mv joiner-epoch-99-avg-1.int8.onnx ../transducer-joiner.onnx
+             mv tokens.txt ../
+             popd
+             rm -rf $model_name
+             sed -i.bak 's/Zipformer/Zipformer supporting Japanese 日语/g' ../index.html
+             git diff
+             """,
+         ),
+         Model(
+             model_name="sherpa-onnx-zipformer-thai-2024-06-20",
+             hf="k2-fsa/web-assembly-vad-asr-sherpa-onnx-th-zipformer",
+             ms="csukuangfj/web-assembly-vad-asr-sherpa-onnx-th-zipformer",
+             short_name="vad-asr-th-zipformer_gigaspeech2",
+             cmd="""
+             pushd $model_name
+             mv encoder-epoch-12-avg-5.int8.onnx ../transducer-encoder.onnx
+             mv decoder-epoch-12-avg-5.onnx ../transducer-decoder.onnx
+             mv joiner-epoch-12-avg-5.int8.onnx ../transducer-joiner.onnx
+             mv tokens.txt ../
+             popd
+             rm -rf $model_name
+             sed -i.bak 's/Zipformer/Zipformer supporting Thai 泰语/g' ../index.html
+             git diff
+             """,
+         ),
+         Model(
+             model_name="sherpa-onnx-telespeech-ctc-int8-zh-2024-06-04",
+             hf="k2-fsa/web-assembly-vad-asr-sherpa-onnx-zh-telespeech",
+             ms="k2-fsa/web-assembly-vad-asr-sherpa-onnx-zh-telespeech",
+             short_name="vad-asr-zh-telespeech",
+             cmd="""
+             pushd $model_name
+             mv model.int8.onnx ../telespeech.onnx
+             mv tokens.txt ../
+             popd
+             rm -rf $model_name
+             sed -i.bak 's/Zipformer/TeleSpeech-ASR supporting Chinese 多种中文方言/g' ../index.html
+             git diff
+             """,
+         ),
+     ]
+     return models
+ 
+ 
+ def main():
+     args = get_args()
+     index = args.index
+     total = args.total
+     assert 0 <= index < total, (index, total)
+ 
+     all_model_list = get_models()
+ 
+     num_models = len(all_model_list)
+ 
+     num_per_runner = num_models // total
+     if num_per_runner <= 0:
+         raise ValueError(f"num_models: {num_models}, num_runners: {total}")
+ 
+     start = index * num_per_runner
+     end = start + num_per_runner
+ 
+     remaining = num_models - args.total * num_per_runner
+ 
+     print(f"{index}/{total}: {start}-{end}/{num_models}")
+ 
+     d = dict()
+     d["model_list"] = all_model_list[start:end]
+     if index < remaining:
+         s = args.total * num_per_runner + index
+         d["model_list"].append(all_model_list[s])
+         print(f"{s}/{num_models}")
+ 
+     filename_list = [
+         "./run-vad-asr.sh",
+     ]
+     for filename in filename_list:
+         environment = jinja2.Environment()
+         with open(f"{filename}.in") as f:
+             s = f.read()
+         template = environment.from_string(s)
+ 
+         s = template.render(**d)
+         with open(filename, "w") as f:
+             print(s, file=f)
+ 
+ 
+ if __name__ == "__main__":
+     main()
--- a/scripts/wasm/run-vad-asr.sh.in 0 → 100644
查看文件 @537e163
+++ b/scripts/wasm/run-vad-asr.sh.in 0 → 100644
查看文件 @537e163
+ #!/usr/bin/env bash
+ #
+ # Build WebAssembly APPs for huggingface spaces and modelscope spaces
+ 
+ set -ex
+ 
+ log() {
+   # This function is from espnet
+   local fname=${BASH_SOURCE[1]##*/}
+   echo -e "$(date '+%Y-%m-%d %H:%M:%S') (${fname}:${BASH_LINENO[0]}:${FUNCNAME[1]}) $*"
+ }
+ 
+ SHERPA_ONNX_VERSION=$(grep "SHERPA_ONNX_VERSION" ./CMakeLists.txt  | cut -d " " -f 2  | cut -d '"' -f 2)
+ 
+ 
+ {% for model in model_list %}
+ model_name={{ model.model_name }}
+ short_name={{ model.short_name }}
+ hf_name={{ model.hf }}
+ ms_name={{ model.ms }}
+ 
+ pushd wasm/vad-asr
+ git checkout .
+ rm -rf assets
+ mkdir assets
+ cd assets
+ curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx
+ curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/${model_name}.tar.bz2
+ tar xvf ${model_name}.tar.bz2
+ rm ${model_name}.tar.bz2
+ 
+ {{ model.cmd }}
+ 
+ popd
+ 
+ ls -lh wasm/vad-asr/assets
+ 
+ rm -rf build-wasm-simd-vad-asr/install
+ rm -rf build-wasm-simd-vad-asr/wasm
+ 
+ ./build-wasm-simd-vad-asr.sh
+ 
+ dst=sherpa-onnx-wasm-simd-${SHERPA_ONNX_VERSION}-${short_name}
+ mv build-wasm-simd-vad-asr/install/bin/wasm/vad-asr $dst
+ ls -lh $dst
+ tar cjfv $dst.tar.bz2 ./$dst
+ ls -lh *.tar.bz2
+ 
+ git config --global user.email "csukuangfj@gmail.com"
+ git config --global user.name "Fangjun Kuang"
+ 
+ export GIT_LFS_SKIP_SMUDGE=1
+ export GIT_CLONE_PROTECTION_ACTIVE=false
+ 
+ rm -rf ms
+ git clone https://www.modelscope.cn/studios/$ms_name.git ms
+ 
+ cd ms
+ cp -v ../$dst/* .
+ 
+ git status
+ git lfs track "*.data"
+ git lfs track "*.wasm"
+ ls -lh
+ 
+ git add .
+ git commit -m "update model"
+ git push https://oauth2:${MS_TOKEN}@www.modelscope.cn/studios/$ms_name.git
+ cd ..
+ rm -rf ms
+ 
+ rm -rf huggingface
+ 
+ git clone https://huggingface.co/spaces/$hf_name huggingface
+ cd huggingface
+ cp -v ../$dst/* .
+ 
+ git status
+ git lfs track "*.data"
+ git lfs track "*.wasm"
+ ls -lh
+ 
+ git add .
+ git commit -m "update model"
+ git push https://csukuangfj:$HF_TOKEN@huggingface.co/spaces/$hf_name main
+ cd ..
+ rm -rf huggingface
+ rm -rf $dst
+ 
+ ls -lh *.tar.bz2
+ 
+ {% endfor %}
--- a/sherpa-onnx/c-api/c-api.cc
查看文件 @537e163
+++ b/sherpa-onnx/c-api/c-api.cc
查看文件 @537e163
@@ -13,6 +13,7 @@
 #include "sherpa-onnx/csrc/audio-tagging.h"
 #include "sherpa-onnx/csrc/circular-buffer.h"
 #include "sherpa-onnx/csrc/display.h"
+ #include "sherpa-onnx/csrc/file-utils.h"
 #include "sherpa-onnx/csrc/keyword-spotter.h"
 #include "sherpa-onnx/csrc/macros.h"
 #include "sherpa-onnx/csrc/offline-punctuation.h"
@@ -1638,3 +1639,7 @@ int32_t SherpaOnnxLinearResamplerResampleGetOutputSampleRate(
 void SherpaOnnxLinearResamplerReset(SherpaOnnxLinearResampler *p) {
   p->impl->Reset();
 }
+ 
+ int32_t SherpaOnnxFileExists(const char *filename) {
+   return sherpa_onnx::FileExists(filename);
+ }
--- a/sherpa-onnx/c-api/c-api.h
查看文件 @537e163
+++ b/sherpa-onnx/c-api/c-api.h
查看文件 @537e163
@@ -1361,6 +1361,9 @@ SHERPA_ONNX_API int32_t SherpaOnnxLinearResamplerResampleGetInputSampleRate(
 SHERPA_ONNX_API int32_t SherpaOnnxLinearResamplerResampleGetOutputSampleRate(
     const SherpaOnnxLinearResampler *p);
 
+ // Return 1 if the file exists; return 0 if the file does not exist.
+ SHERPA_ONNX_API int32_t SherpaOnnxFileExists(const char *filename);
+ 
 #if defined(__GNUC__)
 #pragma GCC diagnostic pop
 #endif
--- a/wasm/CMakeLists.txt
查看文件 @537e163
+++ b/wasm/CMakeLists.txt
查看文件 @537e163
@@ -14,6 +14,10 @@ if(SHERPA_ONNX_ENABLE_WASM_VAD)
   add_subdirectory(vad)
 endif()
 
+ if(SHERPA_ONNX_ENABLE_WASM_VAD_ASR)
+   add_subdirectory(vad-asr)
+ endif()
+ 
 if(SHERPA_ONNX_ENABLE_WASM_NODEJS)
   add_subdirectory(nodejs)
 endif()
--- a/wasm/asr/assets/README.md
查看文件 @537e163
+++ b/wasm/asr/assets/README.md
查看文件 @537e163
@@ -80,3 +80,10 @@ assets fangjun$ tree -L 1
 
 0 directories, 4 files
 ```
+ 
+ You can find example build scripts at:
+ 
+   - Streaming Zipformer (English + Chinese): https://github.com/k2-fsa/sherpa-onnx/blob/master/.github/workflows/ wasm-simd-hf-space-zh-en-asr-zipformer.yaml
+   - Streaming Zipformer (English): https://github.com/k2-fsa/sherpa-onnx/blob/master/.github/workflows/wasm-simd-hf-space-en-asr-zipformer.yaml
+   - Streaming Paraformer (English + Chinese): https://github.com/k2-fsa/sherpa-onnx/blob/master/.github/workflows/wasm-simd-hf-space-zh-en-asr-paraformer.yaml
+   - Streaming Paraformer (English + Chinese + Cantonese): https://github.com/k2-fsa/sherpa-onnx/blob/master/.github/workflows/wasm-simd-hf-space-zh-cantonese-en-asr-paraformer.yaml
--- a/wasm/asr/index.html
查看文件 @537e163
+++ b/wasm/asr/index.html
查看文件 @537e163
@@ -3,7 +3,7 @@
 <head>
   <meta charset="utf-8">
   <meta name="viewport" content="width=device-width" />
-   <title>Next-gen Kaldi WebAssembly with sherpa-onnx for Text-to-speech</title>
+   <title>Next-gen Kaldi WebAssembly with sherpa-onnx for ASR</title>
   <style>
     h1,div {
       text-align: center;
--- a/wasm/tts/assets/README.md
查看文件 @537e163
+++ b/wasm/tts/assets/README.md
查看文件 @537e163
@@ -30,3 +30,8 @@ assets fangjun$ tree -L 1
 
 1 directory, 3 files
 ```
+ 
+ You can find example build scripts at:
+ 
+   - English TTS: https://github.com/k2-fsa/sherpa-onnx/blob/master/.github/workflows/wasm-simd-hf-space-en-tts.yaml
+   - German TTS: https://github.com/k2-fsa/sherpa-onnx/blob/master/.github/workflows/wasm-simd-hf-space-de-tts.yaml
--- a/wasm/vad-asr/CMakeLists.txt 0 → 100644
查看文件 @537e163
+++ b/wasm/vad-asr/CMakeLists.txt 0 → 100644
查看文件 @537e163
+ if(NOT $ENV{SHERPA_ONNX_IS_USING_BUILD_WASM_SH})
+   message(FATAL_ERROR "Please use ./build-wasm-simd-vad.sh to build for wasm VAD")
+ endif()
+ 
+ if(NOT EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/assets/silero_vad.onnx" OR NOT EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/assets/tokens.txt")
+   message(FATAL_ERROR "Please read ${CMAKE_CURRENT_SOURCE_DIR}/assets/README.md before you continue")
+ endif()
+ 
+ set(exported_functions
+   # VAD
+   SherpaOnnxCreateCircularBuffer
+   SherpaOnnxDestroyCircularBuffer
+   SherpaOnnxCircularBufferPush
+   SherpaOnnxCircularBufferGet
+   SherpaOnnxCircularBufferFree
+   SherpaOnnxCircularBufferPop
+   SherpaOnnxCircularBufferSize
+   SherpaOnnxCircularBufferHead
+   SherpaOnnxCircularBufferReset
+   SherpaOnnxCreateVoiceActivityDetector
+   SherpaOnnxDestroyVoiceActivityDetector
+   SherpaOnnxVoiceActivityDetectorAcceptWaveform
+   SherpaOnnxVoiceActivityDetectorEmpty
+   SherpaOnnxVoiceActivityDetectorDetected
+   SherpaOnnxVoiceActivityDetectorPop
+   SherpaOnnxVoiceActivityDetectorClear
+   SherpaOnnxVoiceActivityDetectorFront
+   SherpaOnnxDestroySpeechSegment
+   SherpaOnnxVoiceActivityDetectorReset
+   SherpaOnnxVoiceActivityDetectorFlush
+   # non-streaming ASR
+   SherpaOnnxAcceptWaveformOffline
+   SherpaOnnxCreateOfflineRecognizer
+   SherpaOnnxCreateOfflineStream
+   SherpaOnnxDecodeMultipleOfflineStreams
+   SherpaOnnxDecodeOfflineStream
+   SherpaOnnxDestroyOfflineRecognizer
+   SherpaOnnxDestroyOfflineRecognizerResult
+   SherpaOnnxDestroyOfflineStream
+   SherpaOnnxDestroyOfflineStreamResultJson
+   SherpaOnnxGetOfflineStreamResult
+   SherpaOnnxGetOfflineStreamResultAsJson
+   #
+   SherpaOnnxFileExists
+ )
+ set(mangled_exported_functions)
+ foreach(x IN LISTS exported_functions)
+   list(APPEND mangled_exported_functions "_${x}")
+ endforeach()
+ list(JOIN mangled_exported_functions "," all_exported_functions)
+ 
+ include_directories(${CMAKE_SOURCE_DIR})
+ set(MY_FLAGS " -s FORCE_FILESYSTEM=1 -s INITIAL_MEMORY=512MB -s ALLOW_MEMORY_GROWTH=1")
+ string(APPEND MY_FLAGS " -sSTACK_SIZE=10485760 ") # 10MB
+ string(APPEND MY_FLAGS " -sEXPORTED_FUNCTIONS=[_CopyHeap,_malloc,_free,${all_exported_functions}] ")
+ string(APPEND MY_FLAGS "--preload-file ${CMAKE_CURRENT_SOURCE_DIR}/assets@. ")
+ string(APPEND MY_FLAGS " -sEXPORTED_RUNTIME_METHODS=['ccall','stringToUTF8','setValue','getValue','lengthBytesUTF8','UTF8ToString'] ")
+ 
+ message(STATUS "MY_FLAGS: ${MY_FLAGS}")
+ 
+ set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${MY_FLAGS}")
+ set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${MY_FLAGS}")
+ set(CMAKE_EXECUTBLE_LINKER_FLAGS "${CMAKE_EXECUTBLE_LINKER_FLAGS} ${MY_FLAGS}")
+ 
+ if (NOT CMAKE_EXECUTABLE_SUFFIX STREQUAL ".js")
+   message(FATAL_ERROR "The default suffix for building executables should be .js!")
+ endif()
+ # set(CMAKE_EXECUTABLE_SUFFIX ".html")
+ 
+ add_executable(sherpa-onnx-wasm-main-vad-asr sherpa-onnx-wasm-main-vad-asr.cc)
+ target_link_libraries(sherpa-onnx-wasm-main-vad-asr sherpa-onnx-c-api)
+ install(TARGETS sherpa-onnx-wasm-main-vad-asr DESTINATION bin/wasm/vad-asr)
+ 
+ install(
+   FILES
+     "$<TARGET_FILE_DIR:sherpa-onnx-wasm-main-vad-asr>/sherpa-onnx-wasm-main-vad-asr.js"
+     "index.html"
+     "app-vad-asr.js"
+     "$<TARGET_FILE_DIR:sherpa-onnx-wasm-main-vad-asr>/sherpa-onnx-wasm-main-vad-asr.wasm"
+     "$<TARGET_FILE_DIR:sherpa-onnx-wasm-main-vad-asr>/sherpa-onnx-wasm-main-vad-asr.data"
+   DESTINATION
+     bin/wasm/vad-asr
+ )
--- a/wasm/vad-asr/app-vad-asr.js 0 → 100644
查看文件 @537e163
+++ b/wasm/vad-asr/app-vad-asr.js 0 → 100644
查看文件 @537e163
+ // This file copies and modifies code
+ // from https://mdn.github.io/web-dictaphone/scripts/app.js
+ // and https://gist.github.com/meziantou/edb7217fddfbb70e899e
+ 
+ const startBtn = document.getElementById('startBtn');
+ const stopBtn = document.getElementById('stopBtn');
+ const clearBtn = document.getElementById('clearBtn');
+ const hint = document.getElementById('hint');
+ const soundClips = document.getElementById('sound-clips');
+ 
+ let textArea = document.getElementById('results');
+ 
+ let lastResult = '';
+ let resultList = [];
+ 
+ clearBtn.onclick = function() {
+   resultList = [];
+   textArea.value = getDisplayResult();
+   textArea.scrollTop = textArea.scrollHeight;  // auto scroll
+ };
+ 
+ function getDisplayResult() {
+   let i = 0;
+   let ans = '';
+   for (let s in resultList) {
+     if (resultList[s] == '') {
+       continue;
+     }
+ 
+     if (resultList[s] == 'Speech detected') {
+       ans += '' + i + ': ' + resultList[s];
+       i += 1;
+     } else {
+       ans += ', ' + resultList[s] + '\n';
+     }
+   }
+ 
+   if (lastResult.length > 0) {
+     ans += '' + i + ': ' + lastResult + '\n';
+   }
+   return ans;
+ }
+ 
+ 
+ 
+ Module = {};
+ 
+ let audioCtx;
+ let mediaStream;
+ 
+ let expectedSampleRate = 16000;
+ let recordSampleRate;  // the sampleRate of the microphone
+ let recorder = null;   // the microphone
+ let leftchannel = [];  // TODO: Use a single channel
+ 
+ let recordingLength = 0;  // number of samples so far
+ 
+ let vad = null;
+ let buffer = null;
+ let recognizer = null;
+ let printed = false;
+ 
+ function fileExists(filename) {
+   const filenameLen = Module.lengthBytesUTF8(filename) + 1;
+   const buffer = Module._malloc(filenameLen);
+   Module.stringToUTF8(filename, buffer, filenameLen);
+ 
+   let exists = Module._SherpaOnnxFileExists(buffer);
+ 
+   Module._free(buffer);
+ 
+   return exists;
+ }
+ 
+ function createOfflineRecognizerSenseVoice() {}
+ 
+ function initOfflineRecognizer() {
+   let config = {
+     modelConfig: {
+       debug: 1,
+       tokens: './tokens.txt',
+     },
+   };
+   if (fileExists('sense-voice.onnx') == 1) {
+     config.modelConfig.senseVoice = {
+       model: './sense-voice.onnx',
+       useInverseTextNormalization: 1,
+     };
+   } else if (fileExists('whisper-encoder.onnx')) {
+     config.modelConfig.whisper = {
+       encoder: './whisper-encoder.onnx',
+       decoder: './whisper-decoder.onnx',
+     };
+   } else if (fileExists('transducer-encoder.onnx')) {
+     config.modelConfig.transducer = {
+       encoder: './transducer-encoder.onnx',
+       decoder: './transducer-decoder.onnx',
+       joiner: './transducer-joiner.onnx',
+     };
+     config.modelConfig.modelType = 'transducer';
+   } else if (fileExists('nemo-transducer-encoder.onnx')) {
+     config.modelConfig.transducer = {
+       encoder: './nemo-transducer-encoder.onnx',
+       decoder: './nemo-transducer-decoder.onnx',
+       joiner: './nemo-transducer-joiner.onnx',
+     };
+     config.modelConfig.modelType = 'nemo_transducer';
+   } else if (fileExists('paraformer.onnx')) {
+     config.modelConfig.paraformer = {
+       model: './paraformer.onnx',
+     };
+   } else if (fileExists('telespeech.onnx')) {
+     config.modelConfig.telespeechCtc = './telespeech.onnx';
+   } else {
+     console.log('Please specify a model.');
+     alert('Please specify a model.');
+   }
+ 
+   recognizer = new OfflineRecognizer(config, Module);
+ }
+ 
+ Module.onRuntimeInitialized = function() {
+   console.log('inited!');
+   hint.innerText = 'Model loaded! Please click start';
+ 
+   startBtn.disabled = false;
+ 
+   vad = createVad(Module);
+   console.log('vad is created!', vad);
+ 
+   buffer = new CircularBuffer(30 * 16000, Module);
+   console.log('CircularBuffer is created!', buffer);
+ 
+   initOfflineRecognizer();
+ };
+ 
+ 
+ 
+ if (navigator.mediaDevices.getUserMedia) {
+   console.log('getUserMedia supported.');
+ 
+   // see https://w3c.github.io/mediacapture-main/#dom-mediadevices-getusermedia
+   const constraints = {audio: true};
+ 
+   let onSuccess = function(stream) {
+     if (!audioCtx) {
+       audioCtx = new AudioContext({sampleRate: expectedSampleRate});
+     }
+     console.log(audioCtx);
+     recordSampleRate = audioCtx.sampleRate;
+     console.log('sample rate ' + recordSampleRate);
+ 
+     // creates an audio node from the microphone incoming stream
+     mediaStream = audioCtx.createMediaStreamSource(stream);
+     console.log('media stream', mediaStream);
+ 
+     // https://developer.mozilla.org/en-US/docs/Web/API/AudioContext/createScriptProcessor
+     // bufferSize: the onaudioprocess event is called when the buffer is full
+     var bufferSize = 4096;
+     var numberOfInputChannels = 1;
+     var numberOfOutputChannels = 2;
+     if (audioCtx.createScriptProcessor) {
+       recorder = audioCtx.createScriptProcessor(
+           bufferSize, numberOfInputChannels, numberOfOutputChannels);
+     } else {
+       recorder = audioCtx.createJavaScriptNode(
+           bufferSize, numberOfInputChannels, numberOfOutputChannels);
+     }
+     console.log('recorder', recorder);
+ 
+     recorder.onaudioprocess = function(e) {
+       let samples = new Float32Array(e.inputBuffer.getChannelData(0))
+       samples = downsampleBuffer(samples, expectedSampleRate);
+       buffer.push(samples);
+       while (buffer.size() > vad.config.sileroVad.windowSize) {
+         const s = buffer.get(buffer.head(), vad.config.sileroVad.windowSize);
+         vad.acceptWaveform(s);
+         buffer.pop(vad.config.sileroVad.windowSize);
+ 
+         if (vad.isDetected() && !printed) {
+           printed = true;
+           lastResult = 'Speech detected';
+         }
+ 
+         if (!vad.isDetected()) {
+           printed = false;
+           if (lastResult != '') {
+             resultList.push(lastResult);
+           }
+           lastResult = '';
+         }
+ 
+         while (!vad.isEmpty()) {
+           const segment = vad.front();
+           const duration = segment.samples.length / expectedSampleRate;
+           let durationStr = `Duration: ${duration.toFixed(3)} seconds`;
+           vad.pop();
+ 
+           // non-streaming asr
+           const stream = recognizer.createStream();
+           stream.acceptWaveform(expectedSampleRate, segment.samples);
+           recognizer.decode(stream);
+           let recognitionResult = recognizer.getResult(stream);
+           console.log(recognitionResult);
+           let text = recognitionResult.text;
+           stream.free();
+           console.log(text);
+ 
+           if (text != '') {
+             durationStr += `. Result: ${text}`;
+           }
+ 
+           resultList.push(durationStr);
+ 
+ 
+           // now save the segment to a wav file
+           let buf = new Int16Array(segment.samples.length);
+           for (var i = 0; i < segment.samples.length; ++i) {
+             let s = segment.samples[i];
+             if (s >= 1)
+               s = 1;
+             else if (s <= -1)
+               s = -1;
+ 
+             buf[i] = s * 32767;
+           }
+ 
+           let clipName = new Date().toISOString() + '--' + durationStr;
+ 
+           const clipContainer = document.createElement('article');
+           const clipLabel = document.createElement('p');
+           const audio = document.createElement('audio');
+           const deleteButton = document.createElement('button');
+ 
+           clipContainer.classList.add('clip');
+           audio.setAttribute('controls', '');
+           deleteButton.textContent = 'Delete';
+           deleteButton.className = 'delete';
+ 
+           clipLabel.textContent = clipName;
+ 
+           clipContainer.appendChild(audio);
+ 
+           clipContainer.appendChild(clipLabel);
+           clipContainer.appendChild(deleteButton);
+           soundClips.appendChild(clipContainer);
+ 
+           audio.controls = true;
+           const blob = toWav(buf);
+ 
+           leftchannel = [];
+           const audioURL = window.URL.createObjectURL(blob);
+           audio.src = audioURL;
+ 
+           deleteButton.onclick = function(e) {
+             let evtTgt = e.target;
+             evtTgt.parentNode.parentNode.removeChild(evtTgt.parentNode);
+           };
+ 
+           clipLabel.onclick = function() {
+             const existingName = clipLabel.textContent;
+             const newClipName = prompt('Enter a new name for your sound clip?');
+             if (newClipName === null) {
+               clipLabel.textContent = existingName;
+             } else {
+               clipLabel.textContent = newClipName;
+             }
+           };
+         }
+       }
+ 
+       textArea.value = getDisplayResult();
+       textArea.scrollTop = textArea.scrollHeight;  // auto scroll
+     };
+ 
+     startBtn.onclick = function() {
+       mediaStream.connect(recorder);
+       recorder.connect(audioCtx.destination);
+ 
+       console.log('recorder started');
+ 
+       stopBtn.disabled = false;
+       startBtn.disabled = true;
+     };
+ 
+     stopBtn.onclick = function() {
+       vad.reset();
+       buffer.reset();
+       console.log('recorder stopped');
+ 
+       // stopBtn recording
+       recorder.disconnect(audioCtx.destination);
+       mediaStream.disconnect(recorder);
+ 
+       startBtn.style.background = '';
+       startBtn.style.color = '';
+       // mediaRecorder.requestData();
+ 
+       stopBtn.disabled = true;
+       startBtn.disabled = false;
+     };
+   };
+ 
+   let onError = function(err) {
+     console.log('The following error occured: ' + err);
+   };
+ 
+   navigator.mediaDevices.getUserMedia(constraints).then(onSuccess, onError);
+ } else {
+   console.log('getUserMedia not supported on your browser!');
+   alert('getUserMedia not supported on your browser!');
+ }
+ 
+ 
+ // this function is copied/modified from
+ // https://gist.github.com/meziantou/edb7217fddfbb70e899e
+ function flatten(listOfSamples) {
+   let n = 0;
+   for (let i = 0; i < listOfSamples.length; ++i) {
+     n += listOfSamples[i].length;
+   }
+   let ans = new Int16Array(n);
+ 
+   let offset = 0;
+   for (let i = 0; i < listOfSamples.length; ++i) {
+     ans.set(listOfSamples[i], offset);
+     offset += listOfSamples[i].length;
+   }
+   return ans;
+ }
+ 
+ // this function is copied/modified from
+ // https://gist.github.com/meziantou/edb7217fddfbb70e899e
+ function toWav(samples) {
+   let buf = new ArrayBuffer(44 + samples.length * 2);
+   var view = new DataView(buf);
+ 
+   // http://soundfile.sapp.org/doc/WaveFormat/
+   //                   F F I R
+   view.setUint32(0, 0x46464952, true);               // chunkID
+   view.setUint32(4, 36 + samples.length * 2, true);  // chunkSize
+   //                   E V A W
+   view.setUint32(8, 0x45564157, true);  // format
+                                         //
+   //                      t m f
+   view.setUint32(12, 0x20746d66, true);          // subchunk1ID
+   view.setUint32(16, 16, true);                  // subchunk1Size, 16 for PCM
+   view.setUint32(20, 1, true);                   // audioFormat, 1 for PCM
+   view.setUint16(22, 1, true);                   // numChannels: 1 channel
+   view.setUint32(24, expectedSampleRate, true);  // sampleRate
+   view.setUint32(28, expectedSampleRate * 2, true);  // byteRate
+   view.setUint16(32, 2, true);                       // blockAlign
+   view.setUint16(34, 16, true);                      // bitsPerSample
+   view.setUint32(36, 0x61746164, true);              // Subchunk2ID
+   view.setUint32(40, samples.length * 2, true);      // subchunk2Size
+ 
+   let offset = 44;
+   for (let i = 0; i < samples.length; ++i) {
+     view.setInt16(offset, samples[i], true);
+     offset += 2;
+   }
+ 
+   return new Blob([view], {type: 'audio/wav'});
+ }
+ 
+ // this function is copied from
+ // https://github.com/awslabs/aws-lex-browser-audio-capture/blob/master/lib/worker.js#L46
+ function downsampleBuffer(buffer, exportSampleRate) {
+   if (exportSampleRate === recordSampleRate) {
+     return buffer;
+   }
+   var sampleRateRatio = recordSampleRate / exportSampleRate;
+   var newLength = Math.round(buffer.length / sampleRateRatio);
+   var result = new Float32Array(newLength);
+   var offsetResult = 0;
+   var offsetBuffer = 0;
+   while (offsetResult < result.length) {
+     var nextOffsetBuffer = Math.round((offsetResult + 1) * sampleRateRatio);
+     var accum = 0, count = 0;
+     for (var i = offsetBuffer; i < nextOffsetBuffer && i < buffer.length; i++) {
+       accum += buffer[i];
+       count++;
+     }
+     result[offsetResult] = accum / count;
+     offsetResult++;
+     offsetBuffer = nextOffsetBuffer;
+   }
+   return result;
+ };
--- a/wasm/vad-asr/assets/README.md 0 → 100644
查看文件 @537e163
+++ b/wasm/vad-asr/assets/README.md 0 → 100644
查看文件 @537e163
+ # Introduction
+ 
+ ## Download VAD models
+ 
+ Please download
+ https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx
+ and put `silero_vad.onnx` into the current directory, i.e., `wasm/vad/assets`.
+ 
+ ## Download non-streaming ASR models
+ 
+ Please refer to
+ https://k2-fsa.github.io/sherpa/onnx/pretrained_models/index.html
+ to download a non-streaming ASR model, i.e., an offline ASR model.
+ 
+ After downloading, you should rename the model files.
+ 
+ Please refer to
+ https://k2-fsa.github.io/sherpa/onnx/lazarus/generate-subtitles.html#download-a-speech-recognition-model
+ for how to rename.
+ 
+ You can find example build scripts at the following address:
+ 
+   https://github.com/k2-fsa/sherpa-onnx/blob/master/.github/workflows/wasm-simd-hf-space-vad-asr.yaml
--- a/wasm/vad-asr/index.html 0 → 100644
查看文件 @537e163
+++ b/wasm/vad-asr/index.html 0 → 100644
查看文件 @537e163
+ <html lang="en">
+ 
+ <head>
+   <meta charset="utf-8">
+   <meta name="viewport" content="width=device-width" />
+   <title>Next-gen Kaldi WebAssembly with sherpa-onnx for VAD + ASR</title>
+   <style>
+     h1,div {
+       text-align: center;
+     }
+     textarea {
+       width:100%;
+     }
+   </style>
+ </head>
+ 
+ <body>
+   <h1>
+     Next-gen Kaldi + WebAssembly<br/>
+     VAD+ASR Demo with <a href="https://github.com/k2-fsa/sherpa-onnx">sherpa-onnx</a><br/>
+     (with Zipformer)
+   </h1>
+ 
+   <div>
+     <span id="hint">Loading model ... ...</span>
+     <br/>
+     <br/>
+     <button id="startBtn" disabled>Start</button>
+     <button id="stopBtn" disabled>Stop</button>
+     <button id="clearBtn">Clear</button>
+     <br/>
+     <br/>
+     <textarea id="results" rows="10" readonly></textarea>
+   </div>
+ 
+   <section flex="1" overflow="auto" id="sound-clips">
+   </section>
+ 
+   <script src="sherpa-onnx-asr.js"></script>
+   <script src="sherpa-onnx-vad.js"></script>
+   <script src="app-vad-asr.js"></script>
+   <script src="sherpa-onnx-wasm-main-vad-asr.js"></script>
+ </body>
--- a/wasm/vad-asr/sherpa-onnx-asr.js 0 → 120000
查看文件 @537e163
+++ b/wasm/vad-asr/sherpa-onnx-asr.js 0 → 120000
查看文件 @537e163
+ ../asr/sherpa-onnx-asr.js
\ No newline at end of file
--- a/wasm/vad-asr/sherpa-onnx-vad.js 0 → 120000
查看文件 @537e163
+++ b/wasm/vad-asr/sherpa-onnx-vad.js 0 → 120000
查看文件 @537e163
+ ../vad/sherpa-onnx-vad.js
\ No newline at end of file
--- a/wasm/vad-asr/sherpa-onnx-wasm-main-vad-asr.cc 0 → 100644
查看文件 @537e163
+++ b/wasm/vad-asr/sherpa-onnx-wasm-main-vad-asr.cc 0 → 100644
查看文件 @537e163
+ // wasm/sherpa-onnx-wasm-main-vad-asr.cc
+ //
+ // Copyright (c)  2024  Xiaomi Corporation
+ #include <stdio.h>
+ 
+ #include <algorithm>
+ #include <memory>
+ 
+ #include "sherpa-onnx/c-api/c-api.h"
+ 
+ // see also
+ // https://emscripten.org/docs/porting/connecting_cpp_and_javascript/Interacting-with-code.html
+ 
+ extern "C" {
+ 
+ void CopyHeap(const char *src, int32_t num_bytes, char *dst) {
+   std::copy(src, src + num_bytes, dst);
+ }
+ }
--- a/wasm/vad/assets/README.md
查看文件 @537e163
+++ b/wasm/vad/assets/README.md
查看文件 @537e163
@@ -3,3 +3,6 @@
 Please download
 https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx
 and put `silero_vad.onnx` into the current directory, i.e., `wasm/vad/assets`.
+ 
+ You can find example build script at
+ https://github.com/k2-fsa/sherpa-onnx/blob/master/.github/workflows/wasm-simd-hf-space-silero-vad.yaml
--- a/wasm/vad/index.html
查看文件 @537e163
+++ b/wasm/vad/index.html
查看文件 @537e163
@@ -3,7 +3,7 @@
 <head>
   <meta charset="utf-8">
   <meta name="viewport" content="width=device-width" />
-   <title>Next-gen Kaldi WebAssembly with sherpa-onnx for Text-to-speech</title>
+   <title>Next-gen Kaldi WebAssembly with sherpa-onnx for VAD</title>
   <style>
     h1,div {
       text-align: center;
--- a/wasm/vad/sherpa-onnx-vad.js
查看文件 @537e163
+++ b/wasm/vad/sherpa-onnx-vad.js
查看文件 @537e163
@@ -172,7 +172,6 @@ class Vad {
   constructor(configObj, Module) {
     this.config = configObj;
     const config = initSherpaOnnxVadModelConfig(configObj, Module);
-     Module._MyPrint(config.ptr);
     const handle = Module._SherpaOnnxCreateVoiceActivityDetector(
         config.ptr, configObj.bufferSizeInSeconds || 30);
     freeConfig(config, Module);