Fangjun Kuang
Committed by GitHub

Add WebAssembly (WASM) for speech enhancement GTCRN models (#2002)

@@ -144,8 +144,7 @@ jobs: @@ -144,8 +144,7 @@ jobs:
144 144
145 git clone https://huggingface.co/spaces/k2-fsa/web-assembly-asr-sherpa-onnx-en huggingface 145 git clone https://huggingface.co/spaces/k2-fsa/web-assembly-asr-sherpa-onnx-en huggingface
146 cd huggingface 146 cd huggingface
147 - rm -fv *.js  
148 - rm -fv *.data 147 + rm -rf ./*
149 git fetch 148 git fetch
150 git pull 149 git pull
151 git merge -m "merge remote" --ff origin main 150 git merge -m "merge remote" --ff origin main
  1 +name: wasm-simd-hf-space-speech-enhancement-gtcrn
  2 +
  3 +on:
  4 + push:
  5 + branches:
  6 + - wasm
  7 + - wasm-gtcrn
  8 + tags:
  9 + - 'v[0-9]+.[0-9]+.[0-9]+*'
  10 +
  11 + workflow_dispatch:
  12 +
  13 +concurrency:
  14 + group: wasm-simd-hf-space-speech-enhancement-gtcrn-${{ github.ref }}
  15 + cancel-in-progress: true
  16 +
  17 +jobs:
  18 + wasm-simd-hf-space-speech-enhancement-gtcrn:
  19 + name: wasm gtcrn
  20 + runs-on: ${{ matrix.os }}
  21 + strategy:
  22 + fail-fast: false
  23 + matrix:
  24 + os: [ubuntu-latest]
  25 +
  26 + steps:
  27 + - uses: actions/checkout@v4
  28 + with:
  29 + fetch-depth: 0
  30 +
  31 + - name: Install emsdk
  32 + uses: mymindstorm/setup-emsdk@v14
  33 + with:
  34 + version: 3.1.53
  35 + actions-cache-folder: 'emsdk-cache'
  36 +
  37 + - name: View emsdk version
  38 + shell: bash
  39 + run: |
  40 + emcc -v
  41 + echo "--------------------"
  42 + emcc --check
  43 +
  44 + - name: Download model
  45 + shell: bash
  46 + run: |
  47 + cd wasm/speech-enhancement/assets
  48 + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speech-enhancement-models/gtcrn_simple.onnx
  49 + mv gtcrn_simple.onnx gtcrn.onnx
  50 +
  51 + - name: build
  52 + shell: bash
  53 + run: |
  54 + ./build-wasm-simd-speech-enhancement.sh
  55 +
  56 + - name: collect files
  57 + shell: bash
  58 + run: |
  59 + SHERPA_ONNX_VERSION=v$(grep "SHERPA_ONNX_VERSION" ./CMakeLists.txt | cut -d " " -f 2 | cut -d '"' -f 2)
  60 +
  61 + d=sherpa-onnx-wasm-simd-${SHERPA_ONNX_VERSION}-speech-enhancement-gtcrn
  62 + mv build-wasm-simd-speech-enhancement/install/bin/wasm/speech-enhancement $d
  63 + ls -lh $d
  64 + tar cjfv $d.tar.bz2 $d
  65 +
  66 + echo "---"
  67 +
  68 + ls -lh *.tar.bz2
  69 +
  70 + - uses: actions/upload-artifact@v4
  71 + with:
  72 + name: wasm-speech-enhancement-gtcrn
  73 + path: ./*.tar.bz2
  74 +
  75 + - name: Release
  76 + # if: github.repository_owner == 'csukuangfj' && github.event_name == 'push' && contains(github.ref, 'refs/tags/')
  77 + uses: svenstaro/upload-release-action@v2
  78 + with:
  79 + file_glob: true
  80 + overwrite: true
  81 + file: ./*.tar.bz2
  82 + repo_name: k2-fsa/sherpa-onnx
  83 + repo_token: ${{ secrets.UPLOAD_GH_SHERPA_ONNX_TOKEN }}
  84 + tag: v1.10.46
  85 +
  86 + - name: Release
  87 + if: github.repository_owner == 'k2-fsa' && github.event_name == 'push' && contains(github.ref, 'refs/tags/')
  88 + uses: svenstaro/upload-release-action@v2
  89 + with:
  90 + file_glob: true
  91 + overwrite: true
  92 + file: ./*.tar.bz2
  93 +
  94 + - name: Publish to ModelScope
  95 + # if: false
  96 + env:
  97 + MS_TOKEN: ${{ secrets.MODEL_SCOPE_GIT_TOKEN }}
  98 + uses: nick-fields/retry@v2
  99 + with:
  100 + max_attempts: 20
  101 + timeout_seconds: 200
  102 + shell: bash
  103 + command: |
  104 + SHERPA_ONNX_VERSION=v$(grep "SHERPA_ONNX_VERSION" ./CMakeLists.txt | cut -d " " -f 2 | cut -d '"' -f 2)
  105 +
  106 + git config --global user.email "csukuangfj@gmail.com"
  107 + git config --global user.name "Fangjun Kuang"
  108 +
  109 + rm -rf ms
  110 + export GIT_LFS_SKIP_SMUDGE=1
  111 + export GIT_CLONE_PROTECTION_ACTIVE=false
  112 +
  113 + git clone http://www.modelscope.cn/studios/csukuangfj/wasm-speech-enhancement-gtcrn.git ms
  114 +
  115 + cd ms
  116 + rm -fv *.js
  117 + rm -fv *.data
  118 +
  119 + git fetch
  120 + git pull
  121 + git merge -m "merge remote" --ff origin main
  122 +
  123 + cp -v ../sherpa-onnx-wasm-simd-${SHERPA_ONNX_VERSION}-*/* .
  124 +
  125 + git status
  126 + git lfs track "*.data"
  127 + git lfs track "*.wasm"
  128 + ls -lh
  129 +
  130 + git add .
  131 + git commit -m "update model"
  132 + git push http://oauth2:${MS_TOKEN}@www.modelscope.cn/studios/csukuangfj/wasm-speech-enhancement-gtcrn.git
  133 +
  134 + - name: Publish to huggingface
  135 + env:
  136 + HF_TOKEN: ${{ secrets.HF_TOKEN }}
  137 + uses: nick-fields/retry@v2
  138 + with:
  139 + max_attempts: 20
  140 + timeout_seconds: 200
  141 + shell: bash
  142 + command: |
  143 + SHERPA_ONNX_VERSION=v$(grep "SHERPA_ONNX_VERSION" ./CMakeLists.txt | cut -d " " -f 2 | cut -d '"' -f 2)
  144 +
  145 + git config --global user.email "csukuangfj@gmail.com"
  146 + git config --global user.name "Fangjun Kuang"
  147 +
  148 + rm -rf huggingface
  149 + export GIT_LFS_SKIP_SMUDGE=1
  150 + export GIT_CLONE_PROTECTION_ACTIVE=false
  151 +
  152 + git clone https://huggingface.co/spaces/k2-fsa/wasm-speech-enhancement-gtcrn huggingface
  153 + cd huggingface
  154 + rm -fv *.js
  155 + rm -fv *.data
  156 + git fetch
  157 + git pull
  158 + git merge -m "merge remote" --ff origin main
  159 +
  160 + cp -v ../sherpa-onnx-wasm-simd-${SHERPA_ONNX_VERSION}-*/* .
  161 +
  162 + git status
  163 + git lfs track "*.data"
  164 + git lfs track "*.wasm"
  165 + ls -lh
  166 +
  167 + git add .
  168 + git commit -m "update model"
  169 + git push https://csukuangfj:$HF_TOKEN@huggingface.co/spaces/k2-fsa/wasm-speech-enhancement-gtcrn main
@@ -38,6 +38,7 @@ option(SHERPA_ONNX_ENABLE_WASM_KWS "Whether to enable WASM for KWS" OFF) @@ -38,6 +38,7 @@ option(SHERPA_ONNX_ENABLE_WASM_KWS "Whether to enable WASM for KWS" OFF)
38 option(SHERPA_ONNX_ENABLE_WASM_VAD "Whether to enable WASM for VAD" OFF) 38 option(SHERPA_ONNX_ENABLE_WASM_VAD "Whether to enable WASM for VAD" OFF)
39 option(SHERPA_ONNX_ENABLE_WASM_VAD_ASR "Whether to enable WASM for VAD+ASR" OFF) 39 option(SHERPA_ONNX_ENABLE_WASM_VAD_ASR "Whether to enable WASM for VAD+ASR" OFF)
40 option(SHERPA_ONNX_ENABLE_WASM_NODEJS "Whether to enable WASM for NodeJS" OFF) 40 option(SHERPA_ONNX_ENABLE_WASM_NODEJS "Whether to enable WASM for NodeJS" OFF)
  41 +option(SHERPA_ONNX_ENABLE_WASM_SPEECH_ENHANCEMENT "Whether to enable WASM for speech enhancement" OFF)
41 option(SHERPA_ONNX_ENABLE_BINARY "Whether to build binaries" ON) 42 option(SHERPA_ONNX_ENABLE_BINARY "Whether to build binaries" ON)
42 option(SHERPA_ONNX_ENABLE_TTS "Whether to build TTS related code" ON) 43 option(SHERPA_ONNX_ENABLE_TTS "Whether to build TTS related code" ON)
43 option(SHERPA_ONNX_ENABLE_SPEAKER_DIARIZATION "Whether to build speaker diarization related code" ON) 44 option(SHERPA_ONNX_ENABLE_SPEAKER_DIARIZATION "Whether to build speaker diarization related code" ON)
@@ -149,6 +150,7 @@ message(STATUS "SHERPA_ONNX_ENABLE_WASM_KWS ${SHERPA_ONNX_ENABLE_WASM_KWS}") @@ -149,6 +150,7 @@ message(STATUS "SHERPA_ONNX_ENABLE_WASM_KWS ${SHERPA_ONNX_ENABLE_WASM_KWS}")
149 message(STATUS "SHERPA_ONNX_ENABLE_WASM_VAD ${SHERPA_ONNX_ENABLE_WASM_VAD}") 150 message(STATUS "SHERPA_ONNX_ENABLE_WASM_VAD ${SHERPA_ONNX_ENABLE_WASM_VAD}")
150 message(STATUS "SHERPA_ONNX_ENABLE_WASM_VAD_ASR ${SHERPA_ONNX_ENABLE_WASM_VAD_ASR}") 151 message(STATUS "SHERPA_ONNX_ENABLE_WASM_VAD_ASR ${SHERPA_ONNX_ENABLE_WASM_VAD_ASR}")
151 message(STATUS "SHERPA_ONNX_ENABLE_WASM_NODEJS ${SHERPA_ONNX_ENABLE_WASM_NODEJS}") 152 message(STATUS "SHERPA_ONNX_ENABLE_WASM_NODEJS ${SHERPA_ONNX_ENABLE_WASM_NODEJS}")
  153 +message(STATUS "SHERPA_ONNX_ENABLE_WASM_SPEECH_ENHANCEMENT ${SHERPA_ONNX_ENABLE_WASM_SPEECH_ENHANCEMENT}")
152 message(STATUS "SHERPA_ONNX_ENABLE_BINARY ${SHERPA_ONNX_ENABLE_BINARY}") 154 message(STATUS "SHERPA_ONNX_ENABLE_BINARY ${SHERPA_ONNX_ENABLE_BINARY}")
153 message(STATUS "SHERPA_ONNX_ENABLE_TTS ${SHERPA_ONNX_ENABLE_TTS}") 155 message(STATUS "SHERPA_ONNX_ENABLE_TTS ${SHERPA_ONNX_ENABLE_TTS}")
154 message(STATUS "SHERPA_ONNX_ENABLE_SPEAKER_DIARIZATION ${SHERPA_ONNX_ENABLE_SPEAKER_DIARIZATION}") 156 message(STATUS "SHERPA_ONNX_ENABLE_SPEAKER_DIARIZATION ${SHERPA_ONNX_ENABLE_SPEAKER_DIARIZATION}")
@@ -261,6 +263,12 @@ if(SHERPA_ONNX_ENABLE_WASM_VAD_ASR) @@ -261,6 +263,12 @@ if(SHERPA_ONNX_ENABLE_WASM_VAD_ASR)
261 endif() 263 endif()
262 endif() 264 endif()
263 265
  266 +if(SHERPA_ONNX_ENABLE_WASM_SPEECH_ENHANCEMENT)
  267 + if(NOT SHERPA_ONNX_ENABLE_WASM)
  268 + message(FATAL_ERROR "Please set SHERPA_ONNX_ENABLE_WASM to ON if you enable WASM for speech enhancement")
  269 + endif()
  270 +endif()
  271 +
264 if(NOT CMAKE_CXX_STANDARD) 272 if(NOT CMAKE_CXX_STANDARD)
265 set(CMAKE_CXX_STANDARD 17 CACHE STRING "The C++ version to be used.") 273 set(CMAKE_CXX_STANDARD 17 CACHE STRING "The C++ version to be used.")
266 endif() 274 endif()
@@ -29,6 +29,7 @@ echo "EMSCRIPTEN: $EMSCRIPTEN" @@ -29,6 +29,7 @@ echo "EMSCRIPTEN: $EMSCRIPTEN"
29 if [ ! -f $EMSCRIPTEN/cmake/Modules/Platform/Emscripten.cmake ]; then 29 if [ ! -f $EMSCRIPTEN/cmake/Modules/Platform/Emscripten.cmake ]; then
30 echo "Cannot find $EMSCRIPTEN/cmake/Modules/Platform/Emscripten.cmake" 30 echo "Cannot find $EMSCRIPTEN/cmake/Modules/Platform/Emscripten.cmake"
31 echo "Please make sure you have installed emsdk correctly" 31 echo "Please make sure you have installed emsdk correctly"
  32 + echo "Hint: emsdk 3.1.53 is known to work. Other versions may not work"
32 exit 1 33 exit 1
33 fi 34 fi
34 35
@@ -24,6 +24,7 @@ echo "EMSCRIPTEN: $EMSCRIPTEN" @@ -24,6 +24,7 @@ echo "EMSCRIPTEN: $EMSCRIPTEN"
24 if [ ! -f $EMSCRIPTEN/cmake/Modules/Platform/Emscripten.cmake ]; then 24 if [ ! -f $EMSCRIPTEN/cmake/Modules/Platform/Emscripten.cmake ]; then
25 echo "Cannot find $EMSCRIPTEN/cmake/Modules/Platform/Emscripten.cmake" 25 echo "Cannot find $EMSCRIPTEN/cmake/Modules/Platform/Emscripten.cmake"
26 echo "Please make sure you have installed emsdk correctly" 26 echo "Please make sure you have installed emsdk correctly"
  27 + echo "Hint: emsdk 3.1.53 is known to work. Other versions may not work"
27 exit 1 28 exit 1
28 fi 29 fi
29 30
@@ -31,6 +31,7 @@ echo "EMSCRIPTEN: $EMSCRIPTEN" @@ -31,6 +31,7 @@ echo "EMSCRIPTEN: $EMSCRIPTEN"
31 if [ ! -f $EMSCRIPTEN/cmake/Modules/Platform/Emscripten.cmake ]; then 31 if [ ! -f $EMSCRIPTEN/cmake/Modules/Platform/Emscripten.cmake ]; then
32 echo "Cannot find $EMSCRIPTEN/cmake/Modules/Platform/Emscripten.cmake" 32 echo "Cannot find $EMSCRIPTEN/cmake/Modules/Platform/Emscripten.cmake"
33 echo "Please make sure you have installed emsdk correctly" 33 echo "Please make sure you have installed emsdk correctly"
  34 + echo "Hint: emsdk 3.1.53 is known to work. Other versions may not work"
34 exit 1 35 exit 1
35 fi 36 fi
36 37
@@ -29,6 +29,7 @@ echo "EMSCRIPTEN: $EMSCRIPTEN" @@ -29,6 +29,7 @@ echo "EMSCRIPTEN: $EMSCRIPTEN"
29 if [ ! -f $EMSCRIPTEN/cmake/Modules/Platform/Emscripten.cmake ]; then 29 if [ ! -f $EMSCRIPTEN/cmake/Modules/Platform/Emscripten.cmake ]; then
30 echo "Cannot find $EMSCRIPTEN/cmake/Modules/Platform/Emscripten.cmake" 30 echo "Cannot find $EMSCRIPTEN/cmake/Modules/Platform/Emscripten.cmake"
31 echo "Please make sure you have installed emsdk correctly" 31 echo "Please make sure you have installed emsdk correctly"
  32 + echo "Hint: emsdk 3.1.53 is known to work. Other versions may not work"
32 exit 1 33 exit 1
33 fi 34 fi
34 35
  1 +#!/usr/bin/env bash
  2 +# Copyright (c) 2025 Xiaomi Corporation
  3 +#
  4 +# This script is to build sherpa-onnx for WebAssembly (Speech Enhancement)
  5 +
  6 +set -ex
  7 +
  8 +if [ x"$EMSCRIPTEN" == x"" ]; then
  9 + if ! command -v emcc &> /dev/null; then
  10 + echo "Please install emscripten first"
  11 + echo ""
  12 + echo "You can use the following commands to install it:"
  13 + echo ""
  14 + echo "git clone https://github.com/emscripten-core/emsdk.git"
  15 + echo "cd emsdk"
  16 + echo "git pull"
  17 + echo "./emsdk install 3.1.53"
  18 + echo "./emsdk activate 3.1.53"
  19 + echo "source ./emsdk_env.sh"
  20 + exit 1
  21 + else
  22 + EMSCRIPTEN=$(dirname $(realpath $(which emcc)))
  23 + emcc --version
  24 + fi
  25 +fi
  26 +
  27 +export EMSCRIPTEN=$EMSCRIPTEN
  28 +echo "EMSCRIPTEN: $EMSCRIPTEN"
  29 +if [ ! -f $EMSCRIPTEN/cmake/Modules/Platform/Emscripten.cmake ]; then
  30 + echo "Cannot find $EMSCRIPTEN/cmake/Modules/Platform/Emscripten.cmake"
  31 + echo "Please make sure you have installed emsdk correctly"
  32 + echo "Hint: emsdk 3.1.53 is known to work. Other versions may not work"
  33 + exit 1
  34 +fi
  35 +
  36 +mkdir -p build-wasm-simd-speech-enhancement
  37 +pushd build-wasm-simd-speech-enhancement
  38 +
  39 +export SHERPA_ONNX_IS_USING_BUILD_WASM_SH=ON
  40 +
  41 +cmake \
  42 + -DCMAKE_INSTALL_PREFIX=./install \
  43 + -DCMAKE_BUILD_TYPE=Release \
  44 + -DCMAKE_TOOLCHAIN_FILE=$EMSCRIPTEN/cmake/Modules/Platform/Emscripten.cmake \
  45 + \
  46 + -DSHERPA_ONNX_ENABLE_PYTHON=OFF \
  47 + -DSHERPA_ONNX_ENABLE_TESTS=OFF \
  48 + -DSHERPA_ONNX_ENABLE_CHECK=OFF \
  49 + -DBUILD_SHARED_LIBS=OFF \
  50 + -DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \
  51 + -DSHERPA_ONNX_ENABLE_JNI=OFF \
  52 + -DSHERPA_ONNX_ENABLE_C_API=ON \
  53 + -DSHERPA_ONNX_ENABLE_WEBSOCKET=OFF \
  54 + -DSHERPA_ONNX_ENABLE_GPU=OFF \
  55 + -DSHERPA_ONNX_ENABLE_WASM=ON \
  56 + -DSHERPA_ONNX_ENABLE_WASM_SPEECH_ENHANCEMENT=ON \
  57 + -DSHERPA_ONNX_ENABLE_BINARY=OFF \
  58 + -DSHERPA_ONNX_LINK_LIBSTDCPP_STATICALLY=OFF \
  59 + ..
  60 +make -j2
  61 +make install
  62 +
  63 +ls -lh install/bin/wasm/speech-enhancement
@@ -29,6 +29,7 @@ echo "EMSCRIPTEN: $EMSCRIPTEN" @@ -29,6 +29,7 @@ echo "EMSCRIPTEN: $EMSCRIPTEN"
29 if [ ! -f $EMSCRIPTEN/cmake/Modules/Platform/Emscripten.cmake ]; then 29 if [ ! -f $EMSCRIPTEN/cmake/Modules/Platform/Emscripten.cmake ]; then
30 echo "Cannot find $EMSCRIPTEN/cmake/Modules/Platform/Emscripten.cmake" 30 echo "Cannot find $EMSCRIPTEN/cmake/Modules/Platform/Emscripten.cmake"
31 echo "Please make sure you have installed emsdk correctly" 31 echo "Please make sure you have installed emsdk correctly"
  32 + echo "Hint: emsdk 3.1.53 is known to work. Other versions may not work"
32 exit 1 33 exit 1
33 fi 34 fi
34 35
@@ -30,6 +30,7 @@ echo "EMSCRIPTEN: $EMSCRIPTEN" @@ -30,6 +30,7 @@ echo "EMSCRIPTEN: $EMSCRIPTEN"
30 if [ ! -f $EMSCRIPTEN/cmake/Modules/Platform/Emscripten.cmake ]; then 30 if [ ! -f $EMSCRIPTEN/cmake/Modules/Platform/Emscripten.cmake ]; then
31 echo "Cannot find $EMSCRIPTEN/cmake/Modules/Platform/Emscripten.cmake" 31 echo "Cannot find $EMSCRIPTEN/cmake/Modules/Platform/Emscripten.cmake"
32 echo "Please make sure you have installed emsdk correctly" 32 echo "Please make sure you have installed emsdk correctly"
  33 + echo "Hint: emsdk 3.1.53 is known to work. Other versions may not work"
33 exit 1 34 exit 1
34 fi 35 fi
35 36
@@ -29,6 +29,7 @@ echo "EMSCRIPTEN: $EMSCRIPTEN" @@ -29,6 +29,7 @@ echo "EMSCRIPTEN: $EMSCRIPTEN"
29 if [ ! -f $EMSCRIPTEN/cmake/Modules/Platform/Emscripten.cmake ]; then 29 if [ ! -f $EMSCRIPTEN/cmake/Modules/Platform/Emscripten.cmake ]; then
30 echo "Cannot find $EMSCRIPTEN/cmake/Modules/Platform/Emscripten.cmake" 30 echo "Cannot find $EMSCRIPTEN/cmake/Modules/Platform/Emscripten.cmake"
31 echo "Please make sure you have installed emsdk correctly" 31 echo "Please make sure you have installed emsdk correctly"
  32 + echo "Hint: emsdk 3.1.53 is known to work. Other versions may not work"
32 exit 1 33 exit 1
33 fi 34 fi
34 35
@@ -18,6 +18,10 @@ if(SHERPA_ONNX_ENABLE_WASM_VAD_ASR) @@ -18,6 +18,10 @@ if(SHERPA_ONNX_ENABLE_WASM_VAD_ASR)
18 add_subdirectory(vad-asr) 18 add_subdirectory(vad-asr)
19 endif() 19 endif()
20 20
  21 +if(SHERPA_ONNX_ENABLE_WASM_SPEECH_ENHANCEMENT)
  22 + add_subdirectory(speech-enhancement)
  23 +endif()
  24 +
21 if(SHERPA_ONNX_ENABLE_WASM_SPEAKER_DIARIZATION) 25 if(SHERPA_ONNX_ENABLE_WASM_SPEAKER_DIARIZATION)
22 add_subdirectory(speaker-diarization) 26 add_subdirectory(speaker-diarization)
23 endif() 27 endif()
@@ -5,7 +5,6 @@ @@ -5,7 +5,6 @@
5 const startBtn = document.getElementById('startBtn'); 5 const startBtn = document.getElementById('startBtn');
6 const stopBtn = document.getElementById('stopBtn'); 6 const stopBtn = document.getElementById('stopBtn');
7 const clearBtn = document.getElementById('clearBtn'); 7 const clearBtn = document.getElementById('clearBtn');
8 -const hint = document.getElementById('hint');  
9 const soundClips = document.getElementById('sound-clips'); 8 const soundClips = document.getElementById('sound-clips');
10 9
11 let textArea = document.getElementById('results'); 10 let textArea = document.getElementById('results');
@@ -16,7 +15,7 @@ let resultList = []; @@ -16,7 +15,7 @@ let resultList = [];
16 clearBtn.onclick = function() { 15 clearBtn.onclick = function() {
17 resultList = []; 16 resultList = [];
18 textArea.value = getDisplayResult(); 17 textArea.value = getDisplayResult();
19 - textArea.scrollTop = textArea.scrollHeight; // auto scroll 18 + textArea.scrollTop = textArea.scrollHeight; // auto scroll
20 }; 19 };
21 20
22 function getDisplayResult() { 21 function getDisplayResult() {
@@ -37,11 +36,39 @@ function getDisplayResult() { @@ -37,11 +36,39 @@ function getDisplayResult() {
37 return ans; 36 return ans;
38 } 37 }
39 38
40 -  
41 Module = {}; 39 Module = {};
  40 +
  41 +// https://emscripten.org/docs/api_reference/module.html#Module.locateFile
  42 +Module.locateFile = function(path, scriptDirectory = '') {
  43 + console.log(`path: ${path}, scriptDirectory: ${scriptDirectory}`);
  44 + return scriptDirectory + path;
  45 +};
  46 +
  47 +// https://emscripten.org/docs/api_reference/module.html#Module.locateFile
  48 +Module.setStatus = function(status) {
  49 + console.log(`status ${status}`);
  50 + const statusElement = document.getElementById('status');
  51 + if (status == "Running...") {
  52 + status = 'Model downloaded. Initializing recongizer...'
  53 + }
  54 + statusElement.textContent = status;
  55 + if (status === '') {
  56 + statusElement.style.display = 'none';
  57 + // statusElement.parentNode.removeChild(statusElement);
  58 +
  59 + document.querySelectorAll('.tab-content').forEach((tabContentElement) => {
  60 + tabContentElement.classList.remove('loading');
  61 + });
  62 + } else {
  63 + statusElement.style.display = 'block';
  64 + document.querySelectorAll('.tab-content').forEach((tabContentElement) => {
  65 + tabContentElement.classList.add('loading');
  66 + });
  67 + }
  68 +};
  69 +
42 Module.onRuntimeInitialized = function() { 70 Module.onRuntimeInitialized = function() {
43 console.log('inited!'); 71 console.log('inited!');
44 - hint.innerText = 'Model loaded! Please click start';  
45 72
46 startBtn.disabled = false; 73 startBtn.disabled = false;
47 74
@@ -53,11 +80,11 @@ let audioCtx; @@ -53,11 +80,11 @@ let audioCtx;
53 let mediaStream; 80 let mediaStream;
54 81
55 let expectedSampleRate = 16000; 82 let expectedSampleRate = 16000;
56 -let recordSampleRate; // the sampleRate of the microphone  
57 -let recorder = null; // the microphone  
58 -let leftchannel = []; // TODO: Use a single channel 83 +let recordSampleRate; // the sampleRate of the microphone
  84 +let recorder = null; // the microphone
  85 +let leftchannel = []; // TODO: Use a single channel
59 86
60 -let recordingLength = 0; // number of samples so far 87 +let recordingLength = 0; // number of samples so far
61 88
62 let recognizer = null; 89 let recognizer = null;
63 let recognizer_stream = null; 90 let recognizer_stream = null;
@@ -66,11 +93,11 @@ if (navigator.mediaDevices.getUserMedia) { @@ -66,11 +93,11 @@ if (navigator.mediaDevices.getUserMedia) {
66 console.log('getUserMedia supported.'); 93 console.log('getUserMedia supported.');
67 94
68 // see https://w3c.github.io/mediacapture-main/#dom-mediadevices-getusermedia 95 // see https://w3c.github.io/mediacapture-main/#dom-mediadevices-getusermedia
69 - const constraints = {audio: true}; 96 + const constraints = {audio : true};
70 97
71 let onSuccess = function(stream) { 98 let onSuccess = function(stream) {
72 if (!audioCtx) { 99 if (!audioCtx) {
73 - audioCtx = new AudioContext({sampleRate: 16000}); 100 + audioCtx = new AudioContext({sampleRate : 16000});
74 } 101 }
75 console.log(audioCtx); 102 console.log(audioCtx);
76 recordSampleRate = audioCtx.sampleRate; 103 recordSampleRate = audioCtx.sampleRate;
@@ -120,7 +147,6 @@ if (navigator.mediaDevices.getUserMedia) { @@ -120,7 +147,6 @@ if (navigator.mediaDevices.getUserMedia) {
120 result = recognizer.getResult(recognizer_stream).text; 147 result = recognizer.getResult(recognizer_stream).text;
121 } 148 }
122 149
123 -  
124 if (result.length > 0 && lastResult != result) { 150 if (result.length > 0 && lastResult != result) {
125 lastResult = result; 151 lastResult = result;
126 } 152 }
@@ -134,7 +160,7 @@ if (navigator.mediaDevices.getUserMedia) { @@ -134,7 +160,7 @@ if (navigator.mediaDevices.getUserMedia) {
134 } 160 }
135 161
136 textArea.value = getDisplayResult(); 162 textArea.value = getDisplayResult();
137 - textArea.scrollTop = textArea.scrollHeight; // auto scroll 163 + textArea.scrollTop = textArea.scrollHeight; // auto scroll
138 164
139 let buf = new Int16Array(samples.length); 165 let buf = new Int16Array(samples.length);
140 for (var i = 0; i < samples.length; ++i) { 166 for (var i = 0; i < samples.length; ++i) {
@@ -221,9 +247,8 @@ if (navigator.mediaDevices.getUserMedia) { @@ -221,9 +247,8 @@ if (navigator.mediaDevices.getUserMedia) {
221 }; 247 };
222 }; 248 };
223 249
224 - let onError = function(err) {  
225 - console.log('The following error occured: ' + err);  
226 - }; 250 + let onError = function(
  251 + err) { console.log('The following error occured: ' + err); };
227 252
228 navigator.mediaDevices.getUserMedia(constraints).then(onSuccess, onError); 253 navigator.mediaDevices.getUserMedia(constraints).then(onSuccess, onError);
229 } else { 254 } else {
@@ -231,7 +256,6 @@ if (navigator.mediaDevices.getUserMedia) { @@ -231,7 +256,6 @@ if (navigator.mediaDevices.getUserMedia) {
231 alert('getUserMedia not supported on your browser!'); 256 alert('getUserMedia not supported on your browser!');
232 } 257 }
233 258
234 -  
235 // this function is copied/modified from 259 // this function is copied/modified from
236 // https://gist.github.com/meziantou/edb7217fddfbb70e899e 260 // https://gist.github.com/meziantou/edb7217fddfbb70e899e
237 function flatten(listOfSamples) { 261 function flatten(listOfSamples) {
@@ -257,22 +281,22 @@ function toWav(samples) { @@ -257,22 +281,22 @@ function toWav(samples) {
257 281
258 // http://soundfile.sapp.org/doc/WaveFormat/ 282 // http://soundfile.sapp.org/doc/WaveFormat/
259 // F F I R 283 // F F I R
260 - view.setUint32(0, 0x46464952, true); // chunkID  
261 - view.setUint32(4, 36 + samples.length * 2, true); // chunkSize 284 + view.setUint32(0, 0x46464952, true); // chunkID
  285 + view.setUint32(4, 36 + samples.length * 2, true); // chunkSize
262 // E V A W 286 // E V A W
263 - view.setUint32(8, 0x45564157, true); // format  
264 - // 287 + view.setUint32(8, 0x45564157, true); // format
  288 + //
265 // t m f 289 // t m f
266 - view.setUint32(12, 0x20746d66, true); // subchunk1ID  
267 - view.setUint32(16, 16, true); // subchunk1Size, 16 for PCM  
268 - view.setUint32(20, 1, true); // audioFormat, 1 for PCM  
269 - view.setUint16(22, 1, true); // numChannels: 1 channel  
270 - view.setUint32(24, expectedSampleRate, true); // sampleRate  
271 - view.setUint32(28, expectedSampleRate * 2, true); // byteRate  
272 - view.setUint16(32, 2, true); // blockAlign  
273 - view.setUint16(34, 16, true); // bitsPerSample  
274 - view.setUint32(36, 0x61746164, true); // Subchunk2ID  
275 - view.setUint32(40, samples.length * 2, true); // subchunk2Size 290 + view.setUint32(12, 0x20746d66, true); // subchunk1ID
  291 + view.setUint32(16, 16, true); // subchunk1Size, 16 for PCM
  292 + view.setUint32(20, 1, true); // audioFormat, 1 for PCM
  293 + view.setUint16(22, 1, true); // numChannels: 1 channel
  294 + view.setUint32(24, expectedSampleRate, true); // sampleRate
  295 + view.setUint32(28, expectedSampleRate * 2, true); // byteRate
  296 + view.setUint16(32, 2, true); // blockAlign
  297 + view.setUint16(34, 16, true); // bitsPerSample
  298 + view.setUint32(36, 0x61746164, true); // Subchunk2ID
  299 + view.setUint32(40, samples.length * 2, true); // subchunk2Size
276 300
277 let offset = 44; 301 let offset = 44;
278 for (let i = 0; i < samples.length; ++i) { 302 for (let i = 0; i < samples.length; ++i) {
@@ -280,7 +304,7 @@ function toWav(samples) { @@ -280,7 +304,7 @@ function toWav(samples) {
280 offset += 2; 304 offset += 2;
281 } 305 }
282 306
283 - return new Blob([view], {type: 'audio/wav'}); 307 + return new Blob([ view ], {type : 'audio/wav'});
284 } 308 }
285 309
286 // this function is copied from 310 // this function is copied from
@@ -11,30 +11,70 @@ @@ -11,30 +11,70 @@
11 textarea { 11 textarea {
12 width:100%; 12 width:100%;
13 } 13 }
  14 + .loading {
  15 + display: none !important;
  16 + }
14 </style> 17 </style>
15 </head> 18 </head>
16 19
17 -<body> 20 +<body style="font-family: 'Source Sans Pro', sans-serif; background-color: #f9fafb; color: #333; display: flex; flex-direction: column; align-items: center; height: 100vh; margin: 0;">
18 <h1> 21 <h1>
19 Next-gen Kaldi + WebAssembly<br/> 22 Next-gen Kaldi + WebAssembly<br/>
20 ASR Demo with <a href="https://github.com/k2-fsa/sherpa-onnx">sherpa-onnx</a><br/> 23 ASR Demo with <a href="https://github.com/k2-fsa/sherpa-onnx">sherpa-onnx</a><br/>
21 (with Zipformer) 24 (with Zipformer)
22 </h1> 25 </h1>
23 26
24 - <div>  
25 - <span id="hint">Loading model ... ...</span>  
26 - <br/>  
27 - <br/>  
28 - <button id="startBtn" disabled>Start</button>  
29 - <button id="stopBtn" disabled>Stop</button>  
30 - <button id="clearBtn">Clear</button>  
31 - <br/>  
32 - <br/>  
33 - <textarea id="results" rows="10" readonly></textarea> 27 + <div style="width: 100%; max-width: 900px; background: #fff; padding: 1.5rem; border-radius: 8px; box-shadow: 0 2px 8px rgba(0, 0, 0, 0.1); flex: 1;">
  28 + <div id="status">Loading...</div>
  29 +
  30 + <div id="singleAudioContent" class="tab-content loading">
  31 + <div style="display: flex; gap: 1.5rem;">
  32 + <div style="flex: 1; display: flex; flex-direction: row; align-items: center; gap: 1rem;">
  33 + <button id="startBtn" disabled>Start</button>
  34 + <button id="stopBtn" disabled>Stop</button>
  35 + <button id="clearBtn">Clear</button>
  36 + </div>
  37 + </div>
  38 +
  39 + <div style="flex: 1; display: flex; flex-direction: column; gap: 1rem;">
  40 + <div style="font-size: 1rem; font-weight: bold; padding: 0.5rem 1rem; background-color: #f8f9fa; border-radius: 8px; color: #6c757d;">Transcript</div>
  41 + <textarea id="results" rows="10" placeholder="Output will appear here..." readonly style="flex: 1; padding: 0.75rem; font-size: 1rem; border: 1px solid #ced4da; border-radius: 8px; resize: none; background-color: #f8f9fa;"></textarea>
  42 + </div>
  43 + </div>
  44 +
  45 + <section flex="1" overflow="auto" id="sound-clips">
  46 + </section>
  47 +
34 </div> 48 </div>
35 49
36 - <section flex="1" overflow="auto" id="sound-clips">  
37 - </section> 50 + <!-- Footer Section -->
  51 + <div style="width: 100%; max-width: 900px; margin-top: 1.5rem; background: #fff; padding: 1.5rem; border-radius: 8px; box-shadow: 0 2px 8px rgba(0, 0, 0, 0.1); text-align: left; font-size: 0.9rem; color: #6c757d;">
  52 + <h3>Description</h3>
  53 + <ul>
  54 + <li>Everything is <strong>open-sourced.</strong> <a href="https://github.com/k2-fsa/sherpa-onnx">code</a></li>
  55 + <li>If you have any issues, please either <a href="https://github.com/k2-fsa/sherpa-onnx/issues">file a ticket</a> or contact us via</li>
  56 + <ul>
  57 + <li><a href="https://k2-fsa.github.io/sherpa/social-groups.html#wechat">WeChat group</a></li>
  58 + <li><a href="https://k2-fsa.github.io/sherpa/social-groups.html#qq">QQ group</a></li>
  59 + <li><a href="https://k2-fsa.github.io/sherpa/social-groups.html#bilibili-b">Bilibili</a></li>
  60 + </ul>
  61 + </ul>
  62 + <h3>About This Demo</h3>
  63 + <ul>
  64 + <li><strong>Private and Secure:</strong> All processing is done locally on your device (CPU) within your browser with a single thread. No server is involved, ensuring privacy and security. You can disconnect from the Internet once this page is loaded.</li>
  65 + <li><strong>Efficient Resource Usage:</strong> No GPU is required, leaving system resources available for webLLM analysis.</li>
  66 + </ul>
  67 + <h3>Latest Update</h3>
  68 + <ul>
  69 + <li>Update UI.</li>
  70 + <li>First working version.</li>
  71 + </ul>
  72 +
  73 + <h3>Acknowledgement</h3>
  74 + <ul>
  75 + <li>We refer to <a href="https://huggingface.co/spaces/Banafo/Kroko-Streaming-ASR-Wasm">https://huggingface.co/spaces/Banafo/Kroko-Streaming-ASR-Wasm</a> for the UI part.</li>
  76 + </ul>
  77 + </div>
38 78
39 <script src="sherpa-onnx-asr.js"></script> 79 <script src="sherpa-onnx-asr.js"></script>
40 <script src="app-asr.js"></script> 80 <script src="app-asr.js"></script>
@@ -84,6 +84,7 @@ set(exported_functions @@ -84,6 +84,7 @@ set(exported_functions
84 # 84 #
85 SherpaOnnxFileExists 85 SherpaOnnxFileExists
86 SherpaOnnxReadWave 86 SherpaOnnxReadWave
  87 + SherpaOnnxReadWaveFromBinaryData
87 SherpaOnnxFreeWave 88 SherpaOnnxFreeWave
88 SherpaOnnxWriteWave 89 SherpaOnnxWriteWave
89 ) 90 )
@@ -23,6 +23,36 @@ function readWave(filename, Module) { @@ -23,6 +23,36 @@ function readWave(filename, Module) {
23 23
24 Module._SherpaOnnxFreeWave(w); 24 Module._SherpaOnnxFreeWave(w);
25 25
  26 + return {samples: samples, sampleRate: sampleRate};
  27 +}
  28 +
  29 +function readWaveFromBinaryData(uint8Array) {
  30 + const numBytes = uint8Array.length * uint8Array.BYTES_PER_ELEMENT;
  31 + const pointer = this.Module._malloc(numBytes);
  32 +
  33 + const dataOnHeap = new Uint8Array(Module.HEAPU8.buffer, pointer, numBytes);
  34 + dataOnHeap.set(uint8Array);
  35 +
  36 + const w = this.Module._SherpaOnnxReadWaveFromBinaryData(
  37 + dataOnHeap.byteOffset, numBytes);
  38 + if (w == 0) {
  39 + console.log('Failed to read wave from binary data');
  40 + return null;
  41 + }
  42 +
  43 + this.Module._free(pointer);
  44 +
  45 + const samplesPtr = Module.HEAP32[w / 4] / 4;
  46 + const sampleRate = Module.HEAP32[w / 4 + 1];
  47 + const numSamples = Module.HEAP32[w / 4 + 2];
  48 +
  49 + const samples = new Float32Array(numSamples);
  50 + for (let i = 0; i < numSamples; i++) {
  51 + samples[i] = Module.HEAPF32[samplesPtr + i];
  52 + }
  53 +
  54 + Module._SherpaOnnxFreeWave(w);
  55 +
26 56
27 return {samples: samples, sampleRate: sampleRate}; 57 return {samples: samples, sampleRate: sampleRate};
28 } 58 }
@@ -53,5 +83,6 @@ if (typeof process == 'object' && typeof process.versions == 'object' && @@ -53,5 +83,6 @@ if (typeof process == 'object' && typeof process.versions == 'object' &&
53 module.exports = { 83 module.exports = {
54 readWave, 84 readWave,
55 writeWave, 85 writeWave,
  86 + readWaveFromBinaryData,
56 }; 87 };
57 } 88 }
  1 +if(NOT $ENV{SHERPA_ONNX_IS_USING_BUILD_WASM_SH})
  2 + message(FATAL_ERROR "Please use ./build-wasm-simd-speech-enhancement.sh to build for wasm speech enhancement")
  3 +endif()
  4 +
  5 +if(NOT EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/assets/gtcrn.onnx")
  6 + message(FATAL_ERROR "Please read ${CMAKE_CURRENT_SOURCE_DIR}/assets/README.md before you continue")
  7 +endif()
  8 +
  9 +set(exported_functions
  10 + MyPrint
  11 + SherpaOnnxCreateOfflineSpeechDenoiser
  12 + SherpaOnnxDestroyOfflineSpeechDenoiser
  13 + SherpaOnnxOfflineSpeechDenoiserGetSampleRate
  14 + SherpaOnnxOfflineSpeechDenoiserRun
  15 + SherpaOnnxDestroyDenoisedAudio
  16 + SherpaOnnxWriteWave
  17 + SherpaOnnxReadWave
  18 + SherpaOnnxReadWaveFromBinaryData
  19 + SherpaOnnxFreeWave
  20 +)
  21 +set(mangled_exported_functions)
  22 +foreach(x IN LISTS exported_functions)
  23 + list(APPEND mangled_exported_functions "_${x}")
  24 +endforeach()
  25 +list(JOIN mangled_exported_functions "," all_exported_functions)
  26 +
  27 +
  28 +include_directories(${CMAKE_SOURCE_DIR})
  29 +set(MY_FLAGS " -s FORCE_FILESYSTEM=1 -s INITIAL_MEMORY=128MB -s ALLOW_MEMORY_GROWTH=1")
  30 +string(APPEND MY_FLAGS " -sSTACK_SIZE=10485760 ") # 10MB
  31 +string(APPEND MY_FLAGS " -sEXPORTED_FUNCTIONS=[_CopyHeap,_malloc,_free,${all_exported_functions}] ")
  32 +string(APPEND MY_FLAGS "--preload-file ${CMAKE_CURRENT_SOURCE_DIR}/assets@. ")
  33 +string(APPEND MY_FLAGS " -sEXPORTED_RUNTIME_METHODS=['ccall','stringToUTF8','setValue','getValue','lengthBytesUTF8','UTF8ToString'] ")
  34 +
  35 +message(STATUS "MY_FLAGS: ${MY_FLAGS}")
  36 +
  37 +set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${MY_FLAGS}")
  38 +set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${MY_FLAGS}")
  39 +set(CMAKE_EXECUTBLE_LINKER_FLAGS "${CMAKE_EXECUTBLE_LINKER_FLAGS} ${MY_FLAGS}")
  40 +
  41 +if (NOT CMAKE_EXECUTABLE_SUFFIX STREQUAL ".js")
  42 + message(FATAL_ERROR "The default suffix for building executables should be .js!")
  43 +endif()
  44 +# set(CMAKE_EXECUTABLE_SUFFIX ".html")
  45 +
  46 +add_executable(sherpa-onnx-wasm-main-speech-enhancement sherpa-onnx-wasm-main-speech-enhancement.cc)
  47 +target_link_libraries(sherpa-onnx-wasm-main-speech-enhancement sherpa-onnx-c-api)
  48 +install(TARGETS sherpa-onnx-wasm-main-speech-enhancement DESTINATION bin/wasm/speech-enhancement)
  49 +
  50 +install(
  51 + FILES
  52 + "$<TARGET_FILE_DIR:sherpa-onnx-wasm-main-speech-enhancement>/sherpa-onnx-wasm-main-speech-enhancement.js"
  53 + "index.html"
  54 + "sherpa-onnx-speech-enhancement.js"
  55 + "../nodejs/sherpa-onnx-wave.js"
  56 + "app-speech-enhancement.js"
  57 + "$<TARGET_FILE_DIR:sherpa-onnx-wasm-main-speech-enhancement>/sherpa-onnx-wasm-main-speech-enhancement.wasm"
  58 + "$<TARGET_FILE_DIR:sherpa-onnx-wasm-main-speech-enhancement>/sherpa-onnx-wasm-main-speech-enhancement.data"
  59 + DESTINATION
  60 + bin/wasm/speech-enhancement
  61 +)
  1 +
  2 +const fileInput = document.getElementById('fileInput');
  3 +
  4 +let speech_denoiser = null;
  5 +const inAudioPlayback = document.getElementById('inAudioPlayback');
  6 +const outAudioPlayback = document.getElementById('outAudioPlayback');
  7 +
  8 +Module = {};
  9 +
  10 +// https://emscripten.org/docs/api_reference/module.html#Module.locateFile
  11 +Module.locateFile = function(path, scriptDirectory = '') {
  12 + console.log(`path: ${path}, scriptDirectory: ${scriptDirectory}`);
  13 + return scriptDirectory + path;
  14 +};
  15 +
  16 +// https://emscripten.org/docs/api_reference/module.html#Module.locateFile
  17 +Module.setStatus = function(status) {
  18 + console.log(`status ${status}`);
  19 + const statusElement = document.getElementById('status');
  20 + statusElement.textContent = status;
  21 + if (status === '') {
  22 + statusElement.style.display = 'none';
  23 + document.querySelectorAll('.tab-content').forEach((tabContentElement) => {
  24 + tabContentElement.classList.remove('loading');
  25 + });
  26 + } else {
  27 + statusElement.style.display = 'block';
  28 + document.querySelectorAll('.tab-content').forEach((tabContentElement) => {
  29 + tabContentElement.classList.add('loading');
  30 + });
  31 + }
  32 +};
  33 +
  34 +Module.onRuntimeInitialized = function() {
  35 + console.log('Model files downloaded!');
  36 +
  37 + console.log('Initializing speech denoiser ......');
  38 + speech_denoiser = createOfflineSpeechDenoiser(Module)
  39 +};
  40 +
  41 +async function process(wave) {
  42 + let denoised = speech_denoiser.run(wave.samples, wave.sampleRate);
  43 + console.log(denoised);
  44 +
  45 + let int16Samples = new Int16Array(denoised.samples.length);
  46 + for (var i = 0; i < denoised.samples.length; ++i) {
  47 + let s = denoised.samples[i];
  48 + if (s >= 1)
  49 + s = 1;
  50 + else if (s <= -1)
  51 + s = -1;
  52 +
  53 + int16Samples[i] = s * 32767;
  54 + }
  55 +
  56 + let blob = toWav(int16Samples, denoised.sampleRate);
  57 + const objectUrl = URL.createObjectURL(blob);
  58 + console.log(objectUrl);
  59 +
  60 + outAudioPlayback.src = objectUrl;
  61 + outAudioPlayback.controls = true;
  62 + outAudioPlayback.style.display = 'block';
  63 +}
  64 +
  65 +fileInput.addEventListener('change', function(event) {
  66 + if (!event.target.files || !event.target.files[0]) {
  67 + console.log('No file selected.');
  68 + return;
  69 + }
  70 +
  71 + const file = event.target.files[0];
  72 + console.log('Selected file:', file.name, file.type, file.size, 'bytes');
  73 + const reader = new FileReader();
  74 + reader.onload = function(ev) {
  75 + console.log('FileReader onload called.');
  76 + const arrayBuffer = ev.target.result;
  77 + console.log('ArrayBuffer length:', arrayBuffer.byteLength);
  78 +
  79 + const uint8Array = new Uint8Array(arrayBuffer);
  80 + const wave = readWaveFromBinaryData(uint8Array);
  81 + if (wave == null) {
  82 + alert(
  83 + `${file.name} is not a valid .wav file. Please select a *.wav file`);
  84 + return;
  85 + }
  86 +
  87 +
  88 + var url = URL.createObjectURL(file);
  89 + console.log(`url: ${url}`);
  90 + inAudioPlayback.src = url;
  91 + inAudioPlayback.style.display = 'block';
  92 +
  93 + process(wave)
  94 + console.log('process done')
  95 + };
  96 + reader.onerror = function(err) {
  97 + console.error('FileReader error:', err);
  98 + };
  99 + console.log('Starting FileReader.readAsArrayBuffer...');
  100 + reader.readAsArrayBuffer(file);
  101 +});
  102 +
  103 +// this function is copied/modified from
  104 +// https://gist.github.com/meziantou/edb7217fddfbb70e899e
  105 +function toWav(samples, sampleRate) {
  106 + let buf = new ArrayBuffer(44 + samples.length * 2);
  107 + var view = new DataView(buf);
  108 +
  109 + // http://soundfile.sapp.org/doc/WaveFormat/
  110 + // F F I R
  111 + view.setUint32(0, 0x46464952, true); // chunkID
  112 + view.setUint32(4, 36 + samples.length * 2, true); // chunkSize
  113 + // E V A W
  114 + view.setUint32(8, 0x45564157, true); // format
  115 + //
  116 + // t m f
  117 + view.setUint32(12, 0x20746d66, true); // subchunk1ID
  118 + view.setUint32(16, 16, true); // subchunk1Size, 16 for PCM
  119 + view.setUint32(20, 1, true); // audioFormat, 1 for PCM
  120 + view.setUint16(22, 1, true); // numChannels: 1 channel
  121 + view.setUint32(24, sampleRate, true); // sampleRate
  122 + view.setUint32(28, sampleRate * 2, true); // byteRate
  123 + view.setUint16(32, 2, true); // blockAlign
  124 + view.setUint16(34, 16, true); // bitsPerSample
  125 + view.setUint32(36, 0x61746164, true); // Subchunk2ID
  126 + view.setUint32(40, samples.length * 2, true); // subchunk2Size
  127 +
  128 + let offset = 44;
  129 + for (let i = 0; i < samples.length; ++i) {
  130 + view.setInt16(offset, samples[i], true);
  131 + offset += 2;
  132 + }
  133 +
  134 + return new Blob([view], {type: 'audio/wav'});
  135 +}
  1 +# Introduction
  2 +
  3 +## Huggingface space
  4 +
  5 +You can visit https://huggingface.co/spaces/k2-fsa/wasm-speech-enhancement-gtcrn
  6 +to try it in your browser without building or installing anything.
  7 +
  8 +You can also visit
  9 +https://modelscope.cn/studios/csukuangfj/wasm-speech-enhancement-gtcrn
  10 +
  11 +## Usage
  12 +
  13 +Please refer to
  14 +https://github.com/k2-fsa/sherpa-onnx/releases/tag/speech-enhancement-models
  15 +to download a model.
  16 +
  17 +The following is an example:
  18 +
  19 +```bash
  20 +cd sherpa-onnx/wasm/speech-enhancement/assets
  21 +wget https://github.com/k2-fsa/sherpa-onnx/releases/download/speech-enhancement-models/gtcrn_simple.onnx
  22 +
  23 +mv gtcrn_simple.onnx gtcrn.onnx
  24 +```
  25 +
  26 +You should have the following files in `assets` before you can run
  27 +`build-wasm-simd-speech-enhancement.sh`
  28 +
  29 +```
  30 +(py38) fangjuns-MacBook-Pro:assets fangjun$ tree .
  31 +.
  32 +├── README.md
  33 +└── gtcrn.onnx
  34 +
  35 +0 directories, 2 files
  36 +(py38) fangjuns-MacBook-Pro:assets fangjun$ ls -lh
  37 +total 1056
  38 +-rw-r--r-- 1 fangjun staff 466B Mar 12 16:13 README.md
  39 +-rw-r--r-- 1 fangjun staff 523K Mar 12 16:14 gtcrn.onnx
  40 +```
  1 +<html lang="en">
  2 +
  3 +<!--
  4 +The UI code is modified from
  5 +https://huggingface.co/spaces/Banafo/Kroko-Streaming-ASR-Wasm
  6 +-->
  7 +
  8 +<head>
  9 + <meta charset="utf-8">
  10 + <meta name="viewport" content="width=device-width" />
  11 + <title>Next-gen Kaldi WebAssembly with sherpa-onnx for speech enhancement</title>
  12 + <style>
  13 + h1,div {
  14 + text-align: center;
  15 + }
  16 + textarea {
  17 + width:100%;
  18 + }
  19 + .loading {
  20 + display: none !important;
  21 + }
  22 + </style>
  23 +</head>
  24 +
  25 +<body>
  26 + <h1>
  27 + Next-gen Kaldi + WebAssembly<br/>
  28 + Speech Enhancement with <a href="https://github.com/k2-fsa/sherpa-onnx">sherpa-onnx</a><br/>
  29 + using <a href="https://github.com/Xiaobin-Rong/gtcrn">GTCRN</a>
  30 + </h1>
  31 +
  32 + <div id="status">Loading...</div>
  33 +
  34 + <div id="singleAudioContent" class="tab-content loading">
  35 + <div style="display: flex; gap: 1.5rem;">
  36 + <!-- Input Section -->
  37 + <div style="flex: 1; display: flex; flex-direction: column; gap: 1rem;">
  38 + <div style="font-size: 1rem; font-weight: bold; padding: 0.5rem 1rem; background-color: #f8f9fa; border-radius: 8px; display: flex; align-items: center; gap: 0.5rem; color: #6c757d;">
  39 + <span style="line-height: 1;">🎵</span> Input
  40 + </div>
  41 +
  42 + <!-- Drag and Drop / File Upload -->
  43 + <div id="dropzone" style="border: 2px dashed #ced4da; border-radius: 8px; padding: 2rem; text-align: center; color: #6c757d; cursor: pointer; background-color: #f8f9fa; transition: background-color 0.3s, border-color 0.3s; position: relative;">
  44 + <input type="file" id="fileInput" accept=".wav" style="position: absolute; top: 0; left: 0; opacity: 0; width: 100%; height: 100%; cursor: pointer;" />
  45 + <p style="margin: 0;">Drop Audio Here (*.wav)<br>- or -<br>Click to Upload</p>
  46 + </div>
  47 + <audio id="inAudioPlayback" controls style="display: none; margin-top: 1rem; width: 100%;"></audio>
  48 + </div>
  49 + </div>
  50 +
  51 + <div style="display: flex; gap: 1.5rem;">
  52 + <!-- Output Section -->
  53 + <div style="flex: 1; display: flex; flex-direction: column; gap: 1rem;">
  54 + <div style="font-size: 1rem; font-weight: bold; padding: 0.5rem 1rem; background-color: #f8f9fa; border-radius: 8px; display: flex; align-items: center; gap: 0.5rem; color: #6c757d;">
  55 + <span style="line-height: 1;">🎵</span> Output
  56 + </div>
  57 + <audio id="outAudioPlayback" controls style="display: none; margin-top: 1rem; width: 100%;"></audio>
  58 + </div>
  59 + </div>
  60 +
  61 + <!-- Footer Section -->
  62 + <div style="width: 100%; max-width: 900px; margin-top: 1.5rem; background: #fff; padding: 1.5rem; border-radius: 8px; box-shadow: 0 2px 8px rgba(0, 0, 0, 0.1); text-align: left; font-size: 0.9rem; color: #6c757d;">
  63 + <h3>Description</h3>
  64 + <ul>
  65 + <li>Everything is <strong>open-sourced.</strong> <a href="https://github.com/k2-fsa/sherpa-onnx">code</a></li>
  66 + <li>The model is from <a href="https://github.com/Xiaobin-Rong/gtcrn">GTCRN</a></li>
  67 + <li>Please upload .wav files</li>
  68 + <ul>
  69 + <li>You can download noisy test wave files from <a href="https://htmlpreview.github.io/?https://github.com/Xiaobin-Rong/gtcrn_demo/blob/main/index.html">https://htmlpreview.github.io/?https://github.com/Xiaobin-Rong/gtcrn_demo/blob/main/index.html</a></li>
  70 + </ul>
  71 + <li>If you have any issues, please either <a href="https://github.com/k2-fsa/sherpa-onnx/issues">file a ticket</a> or contact us via</li>
  72 + <ul>
  73 + <li><a href="https://k2-fsa.github.io/sherpa/social-groups.html#wechat">WeChat group</a></li>
  74 + <li><a href="https://k2-fsa.github.io/sherpa/social-groups.html#qq">QQ group</a></li>
  75 + <li><a href="https://k2-fsa.github.io/sherpa/social-groups.html#bilibili-b">Bilibili</a></li>
  76 + </ul>
  77 + </ul>
  78 + <h3>About This Demo</h3>
  79 + <ul>
  80 + <li><strong>Private and Secure:</strong> All processing is done locally on your device (CPU) within your browser with a single thread. No server is involved, ensuring privacy and security. You can disconnect from the Internet once this page is loaded.</li>
  81 + <li><strong>Efficient Resource Usage:</strong> No GPU is required, leaving system resources available for webLLM analysis.</li>
  82 + </ul>
  83 + <h3>Latest Update</h3>
  84 + <ul>
  85 + <li>First working version.</li>
  86 + </ul>
  87 +
  88 + <h3>Acknowledgement</h3>
  89 + <ul>
  90 + <li>We refer to <a href="https://huggingface.co/spaces/Banafo/Kroko-Streaming-ASR-Wasm">https://huggingface.co/spaces/Banafo/Kroko-Streaming-ASR-Wasm</a> for the UI part.</li>
  91 + </ul>
  92 + </div>
  93 +
  94 + <script src="app-speech-enhancement.js"></script>
  95 + <script src="sherpa-onnx-wave.js"></script>
  96 + <script src="sherpa-onnx-speech-enhancement.js"></script>
  97 + <script src="sherpa-onnx-wasm-main-speech-enhancement.js"></script>
  98 +</body>
  1 +function freeConfig(config, Module) {
  2 + if ('buffer' in config) {
  3 + Module._free(config.buffer);
  4 + }
  5 +
  6 + if ('config' in config) {
  7 + freeConfig(config.config, Module)
  8 + }
  9 +
  10 + if ('gtcrn' in config) {
  11 + freeConfig(config.gtcrn, Module)
  12 + }
  13 +
  14 + Module._free(config.ptr);
  15 +}
  16 +
  17 +function initSherpaOnnxOfflineSpeechDenoiserGtcrnModelConfig(config, Module) {
  18 + if (!('model' in config)) {
  19 + config.model = '';
  20 + }
  21 +
  22 + const modelLen = Module.lengthBytesUTF8(config.model) + 1;
  23 +
  24 + const n = modelLen;
  25 +
  26 + const buffer = Module._malloc(n);
  27 +
  28 + const len = 1 * 4;
  29 + const ptr = Module._malloc(len);
  30 +
  31 + let offset = 0;
  32 + Module.stringToUTF8(config.model, buffer + offset, modelLen);
  33 + offset += modelLen;
  34 +
  35 + offset = 0;
  36 + Module.setValue(ptr, buffer + offset, 'i8*');
  37 + offset += modelLen;
  38 +
  39 + return {
  40 + buffer: buffer, ptr: ptr, len: len,
  41 + }
  42 +}
  43 +
  44 +function initSherpaOnnxOfflineSpeechDenoiserModelConfig(config, Module) {
  45 + if (!('gtcrn' in config)) {
  46 + config.gtcrn = {model: ''};
  47 + }
  48 +
  49 + const gtcrn =
  50 + initSherpaOnnxOfflineSpeechDenoiserGtcrnModelConfig(config.gtcrn, Module);
  51 +
  52 + const len = gtcrn.len + 3 * 4;
  53 + const ptr = Module._malloc(len);
  54 +
  55 + let offset = 0;
  56 + Module._CopyHeap(gtcrn.ptr, gtcrn.len, ptr + offset);
  57 + offset += gtcrn.len;
  58 +
  59 + Module.setValue(ptr + offset, config.numThreads || 1, 'i32');
  60 + offset += 4;
  61 +
  62 + Module.setValue(ptr + offset, config.debug || 0, 'i32');
  63 + offset += 4;
  64 +
  65 + const providerLen = Module.lengthBytesUTF8(config.provider || 'cpu') + 1;
  66 + const buffer = Module._malloc(providerLen);
  67 + Module.stringToUTF8(config.provider || 'cpu', buffer, providerLen);
  68 + Module.setValue(ptr + offset, buffer, 'i8*');
  69 + offset += 4;
  70 +
  71 + return {buffer: buffer, ptr: ptr, len: len, gtcrn: gtcrn};
  72 +}
  73 +
  74 +function initSherpaOnnxOfflineSpeechDenoiserConfig(config, Module) {
  75 + if (!('model' in config)) {
  76 + config.model = {
  77 + gtcrn: {model: ''},
  78 + provider: 'cpu',
  79 + debug: 1,
  80 + numThreads: 1,
  81 + };
  82 + }
  83 +
  84 + const modelConfig =
  85 + initSherpaOnnxOfflineSpeechDenoiserModelConfig(config.model, Module);
  86 + const len = modelConfig.len;
  87 + const ptr = Module._malloc(len);
  88 +
  89 + let offset = 0;
  90 + Module._CopyHeap(modelConfig.ptr, modelConfig.len, ptr + offset);
  91 + offset += modelConfig.len;
  92 +
  93 + return {
  94 + ptr: ptr, len: len, config: modelConfig,
  95 + }
  96 +}
  97 +
  98 +class OfflineSpeechDenoiser {
  99 + constructor(configObj, Module) {
  100 + console.log(configObj)
  101 + const config = initSherpaOnnxOfflineSpeechDenoiserConfig(configObj, Module)
  102 + // Module._MyPrint(config.ptr);
  103 + const handle = Module._SherpaOnnxCreateOfflineSpeechDenoiser(config.ptr);
  104 +
  105 + freeConfig(config, Module);
  106 +
  107 + this.handle = handle;
  108 + this.sampleRate =
  109 + Module._SherpaOnnxOfflineSpeechDenoiserGetSampleRate(this.handle);
  110 + this.Module = Module
  111 + }
  112 +
  113 + free() {
  114 + this.Module._SherpaOnnxDestroyOfflineSpeechDenoiser(this.handle);
  115 + this.handle = 0
  116 + }
  117 +
  118 + /**
  119 + * @param samples {Float32Array} Containing samples in the range [-1, 1]
  120 + * @param sampleRate {Number}
  121 + */
  122 + run(samples, sampleRate) {
  123 + const pointer =
  124 + this.Module._malloc(samples.length * samples.BYTES_PER_ELEMENT);
  125 + this.Module.HEAPF32.set(samples, pointer / samples.BYTES_PER_ELEMENT);
  126 + const h = this.Module._SherpaOnnxOfflineSpeechDenoiserRun(
  127 + this.handle, pointer, samples.length, sampleRate);
  128 + this.Module._free(pointer);
  129 +
  130 + const numSamples = this.Module.HEAP32[h / 4 + 1];
  131 + const denoisedSampleRate = this.Module.HEAP32[h / 4 + 2];
  132 +
  133 + const samplesPtr = this.Module.HEAP32[h / 4] / 4;
  134 + const denoisedSamples = new Float32Array(numSamples);
  135 + for (let i = 0; i < numSamples; i++) {
  136 + denoisedSamples[i] = this.Module.HEAPF32[samplesPtr + i];
  137 + }
  138 +
  139 + this.Module._SherpaOnnxDestroyDenoisedAudio(h);
  140 + return {samples: denoisedSamples, sampleRate: denoisedSampleRate};
  141 + }
  142 +
  143 + save(filename, audio) {
  144 + const samples = audio.samples;
  145 + const sampleRate = audio.sampleRate;
  146 + const ptr = this.Module._malloc(samples.length * 4);
  147 + for (let i = 0; i < samples.length; i++) {
  148 + this.Module.HEAPF32[ptr / 4 + i] = samples[i];
  149 + }
  150 +
  151 + const filenameLen = this.Module.lengthBytesUTF8(filename) + 1;
  152 + const buffer = this.Module._malloc(filenameLen);
  153 + this.Module.stringToUTF8(filename, buffer, filenameLen);
  154 + this.Module._SherpaOnnxWriteWave(ptr, samples.length, sampleRate, buffer);
  155 + this.Module._free(buffer);
  156 + this.Module._free(ptr);
  157 + }
  158 +}
  159 +
  160 +function createOfflineSpeechDenoiser(Module, myConfig) {
  161 + let config = {
  162 + model: {
  163 + gtcrn: {model: './gtcrn.onnx'},
  164 + debug: 0,
  165 + },
  166 + };
  167 +
  168 + if (myConfig) {
  169 + config = myConfig;
  170 + }
  171 +
  172 + return new OfflineSpeechDenoiser(config, Module);
  173 +}
  174 +
  175 +if (typeof process == 'object' && typeof process.versions == 'object' &&
  176 + typeof process.versions.node == 'string') {
  177 + module.exports = {
  178 + createOfflineSpeechDenoiser,
  179 + };
  180 +}
  1 +// wasm/sherpa-onnx-wasm-main-speech-enhancement.cc
  2 +//
  3 +// Copyright (c) 2025 Xiaomi Corporation
  4 +#include <stdio.h>
  5 +
  6 +#include <algorithm>
  7 +#include <memory>
  8 +
  9 +#include "sherpa-onnx/c-api/c-api.h"
  10 +
  11 +// see also
  12 +// https://emscripten.org/docs/porting/connecting_cpp_and_javascript/Interacting-with-code.html
  13 +
  14 +extern "C" {
  15 +
  16 +static_assert(sizeof(SherpaOnnxOfflineSpeechDenoiserGtcrnModelConfig) == 1 * 4,
  17 + "");
  18 +static_assert(sizeof(SherpaOnnxOfflineSpeechDenoiserModelConfig) ==
  19 + sizeof(SherpaOnnxOfflineSpeechDenoiserGtcrnModelConfig) +
  20 + 3 * 4,
  21 + "");
  22 +static_assert(sizeof(SherpaOnnxOfflineSpeechDenoiserConfig) ==
  23 + sizeof(SherpaOnnxOfflineSpeechDenoiserModelConfig),
  24 + "");
  25 +
  26 +void MyPrint(SherpaOnnxOfflineSpeechDenoiserConfig *config) {
  27 + auto model = &config->model;
  28 + auto gtcrn = &model->gtcrn;
  29 + fprintf(stdout, "----------offline speech denoiser model config----------\n");
  30 + fprintf(stdout, "gtcrn: %s\n", gtcrn->model);
  31 + fprintf(stdout, "num threads: %d\n", model->num_threads);
  32 + fprintf(stdout, "debug: %d\n", model->debug);
  33 + fprintf(stdout, "provider: %s\n", model->provider);
  34 +}
  35 +
  36 +void CopyHeap(const char *src, int32_t num_bytes, char *dst) {
  37 + std::copy(src, src + num_bytes, dst);
  38 +}
  39 +}
  1 +../nodejs/sherpa-onnx-wave.js
1 const generateBtn = document.getElementById('generateBtn'); 1 const generateBtn = document.getElementById('generateBtn');
2 -const hint = document.getElementById('hint');  
3 const speakerIdLabel = document.getElementById('speakerIdLabel'); 2 const speakerIdLabel = document.getElementById('speakerIdLabel');
4 const speakerIdInput = document.getElementById('speakerId'); 3 const speakerIdInput = document.getElementById('speakerId');
5 const speedInput = document.getElementById('speed'); 4 const speedInput = document.getElementById('speed');
@@ -11,13 +10,41 @@ speedValue.innerHTML = speedInput.value; @@ -11,13 +10,41 @@ speedValue.innerHTML = speedInput.value;
11 10
12 let index = 0; 11 let index = 0;
13 12
14 -  
15 let tts = null; 13 let tts = null;
16 14
17 let audioCtx = null; 15 let audioCtx = null;
18 16
19 -  
20 Module = {}; 17 Module = {};
  18 +
  19 +// https://emscripten.org/docs/api_reference/module.html#Module.locateFile
  20 +Module.locateFile = function(path, scriptDirectory = '') {
  21 + console.log(`path: ${path}, scriptDirectory: ${scriptDirectory}`);
  22 + return scriptDirectory + path;
  23 +};
  24 +
  25 +// https://emscripten.org/docs/api_reference/module.html#Module.locateFile
  26 +Module.setStatus = function(status) {
  27 + console.log(`status ${status}`);
  28 + const statusElement = document.getElementById('status');
  29 + if (status == "Running...") {
  30 + status = 'Model downloaded. Initializing text to speech model...'
  31 + }
  32 + statusElement.textContent = status;
  33 + if (status === '') {
  34 + statusElement.style.display = 'none';
  35 + // statusElement.parentNode.removeChild(statusElement);
  36 +
  37 + document.querySelectorAll('.tab-content').forEach((tabContentElement) => {
  38 + tabContentElement.classList.remove('loading');
  39 + });
  40 + } else {
  41 + statusElement.style.display = 'block';
  42 + document.querySelectorAll('.tab-content').forEach((tabContentElement) => {
  43 + tabContentElement.classList.add('loading');
  44 + });
  45 + }
  46 +};
  47 +
21 Module.onRuntimeInitialized = function() { 48 Module.onRuntimeInitialized = function() {
22 console.log('Model files downloaded!'); 49 console.log('Model files downloaded!');
23 50
@@ -27,17 +54,10 @@ Module.onRuntimeInitialized = function() { @@ -27,17 +54,10 @@ Module.onRuntimeInitialized = function() {
27 speakerIdLabel.innerHTML = `Speaker ID (0 - ${tts.numSpeakers - 1}):`; 54 speakerIdLabel.innerHTML = `Speaker ID (0 - ${tts.numSpeakers - 1}):`;
28 } 55 }
29 56
30 - hint.innerText =  
31 - 'Initialized! Please enter text and click the Generate button.';  
32 -  
33 -  
34 -  
35 generateBtn.disabled = false; 57 generateBtn.disabled = false;
36 }; 58 };
37 59
38 -speedInput.oninput = function() {  
39 - speedValue.innerHTML = this.value;  
40 -}; 60 +speedInput.oninput = function() { speedValue.innerHTML = this.value; };
41 61
42 generateBtn.onclick = function() { 62 generateBtn.onclick = function() {
43 let speakerId = speakerIdInput.value; 63 let speakerId = speakerIdInput.value;
@@ -69,12 +89,12 @@ generateBtn.onclick = function() { @@ -69,12 +89,12 @@ generateBtn.onclick = function() {
69 console.log('text', text); 89 console.log('text', text);
70 90
71 let audio = 91 let audio =
72 - tts.generate({text: text, sid: speakerId, speed: speedInput.value}); 92 + tts.generate({text : text, sid : speakerId, speed : speedInput.value});
73 93
74 console.log(audio.samples.length, audio.sampleRate); 94 console.log(audio.samples.length, audio.sampleRate);
75 95
76 if (!audioCtx) { 96 if (!audioCtx) {
77 - audioCtx = new AudioContext({sampleRate: tts.sampleRate}); 97 + audioCtx = new AudioContext({sampleRate : tts.sampleRate});
78 } 98 }
79 99
80 const buffer = audioCtx.createBuffer(1, audio.samples.length, tts.sampleRate); 100 const buffer = audioCtx.createBuffer(1, audio.samples.length, tts.sampleRate);
@@ -155,22 +175,22 @@ function toWav(floatSamples, sampleRate) { @@ -155,22 +175,22 @@ function toWav(floatSamples, sampleRate) {
155 175
156 // http://soundfile.sapp.org/doc/WaveFormat/ 176 // http://soundfile.sapp.org/doc/WaveFormat/
157 // F F I R 177 // F F I R
158 - view.setUint32(0, 0x46464952, true); // chunkID  
159 - view.setUint32(4, 36 + samples.length * 2, true); // chunkSize 178 + view.setUint32(0, 0x46464952, true); // chunkID
  179 + view.setUint32(4, 36 + samples.length * 2, true); // chunkSize
160 // E V A W 180 // E V A W
161 - view.setUint32(8, 0x45564157, true); // format  
162 - // 181 + view.setUint32(8, 0x45564157, true); // format
  182 + //
163 // t m f 183 // t m f
164 - view.setUint32(12, 0x20746d66, true); // subchunk1ID  
165 - view.setUint32(16, 16, true); // subchunk1Size, 16 for PCM  
166 - view.setUint32(20, 1, true); // audioFormat, 1 for PCM  
167 - view.setUint16(22, 1, true); // numChannels: 1 channel  
168 - view.setUint32(24, sampleRate, true); // sampleRate  
169 - view.setUint32(28, sampleRate * 2, true); // byteRate  
170 - view.setUint16(32, 2, true); // blockAlign  
171 - view.setUint16(34, 16, true); // bitsPerSample  
172 - view.setUint32(36, 0x61746164, true); // Subchunk2ID  
173 - view.setUint32(40, samples.length * 2, true); // subchunk2Size 184 + view.setUint32(12, 0x20746d66, true); // subchunk1ID
  185 + view.setUint32(16, 16, true); // subchunk1Size, 16 for PCM
  186 + view.setUint32(20, 1, true); // audioFormat, 1 for PCM
  187 + view.setUint16(22, 1, true); // numChannels: 1 channel
  188 + view.setUint32(24, sampleRate, true); // sampleRate
  189 + view.setUint32(28, sampleRate * 2, true); // byteRate
  190 + view.setUint16(32, 2, true); // blockAlign
  191 + view.setUint16(34, 16, true); // bitsPerSample
  192 + view.setUint32(36, 0x61746164, true); // Subchunk2ID
  193 + view.setUint32(40, samples.length * 2, true); // subchunk2Size
174 194
175 let offset = 44; 195 let offset = 44;
176 for (let i = 0; i < samples.length; ++i) { 196 for (let i = 0; i < samples.length; ++i) {
@@ -178,5 +198,5 @@ function toWav(floatSamples, sampleRate) { @@ -178,5 +198,5 @@ function toWav(floatSamples, sampleRate) {
178 offset += 2; 198 offset += 2;
179 } 199 }
180 200
181 - return new Blob([view], {type: 'audio/wav'}); 201 + return new Blob([ view ], {type : 'audio/wav'});
182 } 202 }
@@ -5,7 +5,7 @@ https://github.com/k2-fsa/sherpa-onnx/releases/tag/tts-models @@ -5,7 +5,7 @@ https://github.com/k2-fsa/sherpa-onnx/releases/tag/tts-models
5 to download a model. 5 to download a model.
6 6
7 The following is an example: 7 The following is an example:
8 -``` 8 +```bash
9 cd sherpa-onnx/wasm/tts/assets 9 cd sherpa-onnx/wasm/tts/assets
10 10
11 wget -q https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-piper-en_US-libritts_r-medium.tar.bz2 11 wget -q https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-piper-en_US-libritts_r-medium.tar.bz2
@@ -11,34 +11,70 @@ @@ -11,34 +11,70 @@
11 textarea { 11 textarea {
12 width:100%; 12 width:100%;
13 } 13 }
  14 + .loading {
  15 + display: none !important;
  16 + }
14 </style> 17 </style>
15 </head> 18 </head>
16 19
17 -<body> 20 +<body style="font-family: 'Source Sans Pro', sans-serif; background-color: #f9fafb; color: #333; display: flex; flex-direction: column; align-items: center; height: 100vh; margin: 0;">
18 <h1> 21 <h1>
19 Next-gen Kaldi + WebAssembly<br/> 22 Next-gen Kaldi + WebAssembly<br/>
20 Text-to-speech Demo with <a href="https://github.com/k2-fsa/sherpa-onnx">sherpa-onnx</a> 23 Text-to-speech Demo with <a href="https://github.com/k2-fsa/sherpa-onnx">sherpa-onnx</a>
21 </h1> 24 </h1>
22 - <div>  
23 - <span id="hint">Loading model ... ...</span>  
24 - <br/>  
25 - <br/>  
26 - <label for="speakerId" id="speakerIdLabel">Speaker ID: </label>  
27 - <input type="text" id="speakerId" name="speakerId" value="0" />  
28 - <br/>  
29 - <br/>  
30 - <label for="speed" id="speedLabel">Speed: </label>  
31 - <input type="range" id="speed" name="speed" min="0.4" max="3.5" step="0.1" value="1.0" />  
32 - <span id="speedValue"></span>  
33 - <br/>  
34 - <br/>  
35 - <textarea id="text" rows="10" placeholder="Please enter your text here and click the Generate button"></textarea>  
36 - <br/>  
37 - <br/>  
38 - <button id="generateBtn" disabled>Generate</button> 25 +
  26 + <div style="width: 100%; max-width: 900px; background: #fff; padding: 1.5rem; border-radius: 8px; box-shadow: 0 2px 8px rgba(0, 0, 0, 0.1); flex: 1;">
  27 + <div id="status">Loading...</div>
  28 +
  29 + <div id="singleAudioContent" class="tab-content loading">
  30 + <label for="speakerId" id="speakerIdLabel">Speaker ID: </label>
  31 + <input type="text" id="speakerId" name="speakerId" value="0" />
  32 + <br/>
  33 + <br/>
  34 + <label for="speed" id="speedLabel">Speed: </label>
  35 + <input type="range" id="speed" name="speed" min="0.4" max="3.5" step="0.1" value="1.0" />
  36 + <span id="speedValue"></span>
  37 + <br/>
  38 + <br/>
  39 + <textarea id="text" rows="10" placeholder="Please enter your text here and click the Generate button"></textarea>
  40 + <br/>
  41 + <br/>
  42 + <button id="generateBtn" disabled>Generate</button>
  43 + </div>
  44 +
  45 + <section flex="1" overflow="auto" id="sound-clips">
  46 + </section>
39 </div> 47 </div>
40 - <section flex="1" overflow="auto" id="sound-clips">  
41 - </section> 48 +
  49 + <!-- Footer Section -->
  50 + <div style="width: 100%; max-width: 900px; margin-top: 1.5rem; background: #fff; padding: 1.5rem; border-radius: 8px; box-shadow: 0 2px 8px rgba(0, 0, 0, 0.1); text-align: left; font-size: 0.9rem; color: #6c757d;">
  51 + <h3>Description</h3>
  52 + <ul>
  53 + <li>Everything is <strong>open-sourced.</strong> <a href="https://github.com/k2-fsa/sherpa-onnx">code</a></li>
  54 + <li>If you have any issues, please either <a href="https://github.com/k2-fsa/sherpa-onnx/issues">file a ticket</a> or contact us via</li>
  55 + <ul>
  56 + <li><a href="https://k2-fsa.github.io/sherpa/social-groups.html#wechat">WeChat group</a></li>
  57 + <li><a href="https://k2-fsa.github.io/sherpa/social-groups.html#qq">QQ group</a></li>
  58 + <li><a href="https://k2-fsa.github.io/sherpa/social-groups.html#bilibili-b">Bilibili</a></li>
  59 + </ul>
  60 + </ul>
  61 + <h3>About This Demo</h3>
  62 + <ul>
  63 + <li><strong>Private and Secure:</strong> All processing is done locally on your device (CPU) within your browser with a single thread. No server is involved, ensuring privacy and security. You can disconnect from the Internet once this page is loaded.</li>
  64 + <li><strong>Efficient Resource Usage:</strong> No GPU is required, leaving system resources available for webLLM analysis.</li>
  65 + </ul>
  66 + <h3>Latest Update</h3>
  67 + <ul>
  68 + <li>Update UI.</li>
  69 + <li>First working version.</li>
  70 + </ul>
  71 +
  72 + <h3>Acknowledgement</h3>
  73 + <ul>
  74 + <li>We refer to <a href="https://huggingface.co/spaces/Banafo/Kroko-Streaming-ASR-Wasm">https://huggingface.co/spaces/Banafo/Kroko-Streaming-ASR-Wasm</a> for the UI part.</li>
  75 + </ul>
  76 + </div>
  77 +
42 78
43 <script src="app-tts.js"></script> 79 <script src="app-tts.js"></script>
44 <script src="sherpa-onnx-tts.js"></script> 80 <script src="sherpa-onnx-tts.js"></script>
@@ -263,7 +263,7 @@ function initSherpaOnnxOfflineTtsModelConfig(config, Module) { @@ -263,7 +263,7 @@ function initSherpaOnnxOfflineTtsModelConfig(config, Module) {
263 263
264 const providerLen = Module.lengthBytesUTF8(config.provider || 'cpu') + 1; 264 const providerLen = Module.lengthBytesUTF8(config.provider || 'cpu') + 1;
265 const buffer = Module._malloc(providerLen); 265 const buffer = Module._malloc(providerLen);
266 - Module.stringToUTF8(config.provider, buffer, providerLen); 266 + Module.stringToUTF8(config.provider || 'cpu', buffer, providerLen);
267 Module.setValue(ptr + offset, buffer, 'i8*'); 267 Module.setValue(ptr + offset, buffer, 'i8*');
268 offset += 4; 268 offset += 4;
269 269
@@ -5,7 +5,6 @@ @@ -5,7 +5,6 @@
5 const startBtn = document.getElementById('startBtn'); 5 const startBtn = document.getElementById('startBtn');
6 const stopBtn = document.getElementById('stopBtn'); 6 const stopBtn = document.getElementById('stopBtn');
7 const clearBtn = document.getElementById('clearBtn'); 7 const clearBtn = document.getElementById('clearBtn');
8 -const hint = document.getElementById('hint');  
9 const soundClips = document.getElementById('sound-clips'); 8 const soundClips = document.getElementById('sound-clips');
10 9
11 let textArea = document.getElementById('results'); 10 let textArea = document.getElementById('results');
@@ -16,7 +15,7 @@ let resultList = []; @@ -16,7 +15,7 @@ let resultList = [];
16 clearBtn.onclick = function() { 15 clearBtn.onclick = function() {
17 resultList = []; 16 resultList = [];
18 textArea.value = getDisplayResult(); 17 textArea.value = getDisplayResult();
19 - textArea.scrollTop = textArea.scrollHeight; // auto scroll 18 + textArea.scrollTop = textArea.scrollHeight; // auto scroll
20 }; 19 };
21 20
22 function getDisplayResult() { 21 function getDisplayResult() {
@@ -41,19 +40,17 @@ function getDisplayResult() { @@ -41,19 +40,17 @@ function getDisplayResult() {
41 return ans; 40 return ans;
42 } 41 }
43 42
44 -  
45 -  
46 Module = {}; 43 Module = {};
47 44
48 let audioCtx; 45 let audioCtx;
49 let mediaStream; 46 let mediaStream;
50 47
51 let expectedSampleRate = 16000; 48 let expectedSampleRate = 16000;
52 -let recordSampleRate; // the sampleRate of the microphone  
53 -let recorder = null; // the microphone  
54 -let leftchannel = []; // TODO: Use a single channel 49 +let recordSampleRate; // the sampleRate of the microphone
  50 +let recorder = null; // the microphone
  51 +let leftchannel = []; // TODO: Use a single channel
55 52
56 -let recordingLength = 0; // number of samples so far 53 +let recordingLength = 0; // number of samples so far
57 54
58 let vad = null; 55 let vad = null;
59 let buffer = null; 56 let buffer = null;
@@ -76,47 +73,47 @@ function createOfflineRecognizerSenseVoice() {} @@ -76,47 +73,47 @@ function createOfflineRecognizerSenseVoice() {}
76 73
77 function initOfflineRecognizer() { 74 function initOfflineRecognizer() {
78 let config = { 75 let config = {
79 - modelConfig: {  
80 - debug: 1,  
81 - tokens: './tokens.txt', 76 + modelConfig : {
  77 + debug : 1,
  78 + tokens : './tokens.txt',
82 }, 79 },
83 }; 80 };
84 if (fileExists('sense-voice.onnx') == 1) { 81 if (fileExists('sense-voice.onnx') == 1) {
85 config.modelConfig.senseVoice = { 82 config.modelConfig.senseVoice = {
86 - model: './sense-voice.onnx',  
87 - useInverseTextNormalization: 1, 83 + model : './sense-voice.onnx',
  84 + useInverseTextNormalization : 1,
88 }; 85 };
89 } else if (fileExists('whisper-encoder.onnx')) { 86 } else if (fileExists('whisper-encoder.onnx')) {
90 config.modelConfig.whisper = { 87 config.modelConfig.whisper = {
91 - encoder: './whisper-encoder.onnx',  
92 - decoder: './whisper-decoder.onnx', 88 + encoder : './whisper-encoder.onnx',
  89 + decoder : './whisper-decoder.onnx',
93 }; 90 };
94 } else if (fileExists('transducer-encoder.onnx')) { 91 } else if (fileExists('transducer-encoder.onnx')) {
95 config.modelConfig.transducer = { 92 config.modelConfig.transducer = {
96 - encoder: './transducer-encoder.onnx',  
97 - decoder: './transducer-decoder.onnx',  
98 - joiner: './transducer-joiner.onnx', 93 + encoder : './transducer-encoder.onnx',
  94 + decoder : './transducer-decoder.onnx',
  95 + joiner : './transducer-joiner.onnx',
99 }; 96 };
100 config.modelConfig.modelType = 'transducer'; 97 config.modelConfig.modelType = 'transducer';
101 } else if (fileExists('nemo-transducer-encoder.onnx')) { 98 } else if (fileExists('nemo-transducer-encoder.onnx')) {
102 config.modelConfig.transducer = { 99 config.modelConfig.transducer = {
103 - encoder: './nemo-transducer-encoder.onnx',  
104 - decoder: './nemo-transducer-decoder.onnx',  
105 - joiner: './nemo-transducer-joiner.onnx', 100 + encoder : './nemo-transducer-encoder.onnx',
  101 + decoder : './nemo-transducer-decoder.onnx',
  102 + joiner : './nemo-transducer-joiner.onnx',
106 }; 103 };
107 config.modelConfig.modelType = 'nemo_transducer'; 104 config.modelConfig.modelType = 'nemo_transducer';
108 } else if (fileExists('paraformer.onnx')) { 105 } else if (fileExists('paraformer.onnx')) {
109 config.modelConfig.paraformer = { 106 config.modelConfig.paraformer = {
110 - model: './paraformer.onnx', 107 + model : './paraformer.onnx',
111 }; 108 };
112 } else if (fileExists('telespeech.onnx')) { 109 } else if (fileExists('telespeech.onnx')) {
113 config.modelConfig.telespeechCtc = './telespeech.onnx'; 110 config.modelConfig.telespeechCtc = './telespeech.onnx';
114 } else if (fileExists('moonshine-preprocessor.onnx')) { 111 } else if (fileExists('moonshine-preprocessor.onnx')) {
115 config.modelConfig.moonshine = { 112 config.modelConfig.moonshine = {
116 - preprocessor: './moonshine-preprocessor.onnx',  
117 - encoder: './moonshine-encoder.onnx',  
118 - uncachedDecoder: './moonshine-uncached-decoder.onnx',  
119 - cachedDecoder: './moonshine-cached-decoder.onnx' 113 + preprocessor : './moonshine-preprocessor.onnx',
  114 + encoder : './moonshine-encoder.onnx',
  115 + uncachedDecoder : './moonshine-uncached-decoder.onnx',
  116 + cachedDecoder : './moonshine-cached-decoder.onnx'
120 }; 117 };
121 } else { 118 } else {
122 console.log('Please specify a model.'); 119 console.log('Please specify a model.');
@@ -126,9 +123,37 @@ function initOfflineRecognizer() { @@ -126,9 +123,37 @@ function initOfflineRecognizer() {
126 recognizer = new OfflineRecognizer(config, Module); 123 recognizer = new OfflineRecognizer(config, Module);
127 } 124 }
128 125
  126 +// https://emscripten.org/docs/api_reference/module.html#Module.locateFile
  127 +Module.locateFile = function(path, scriptDirectory = '') {
  128 + console.log(`path: ${path}, scriptDirectory: ${scriptDirectory}`);
  129 + return scriptDirectory + path;
  130 +};
  131 +
  132 +// https://emscripten.org/docs/api_reference/module.html#Module.locateFile
  133 +Module.setStatus = function(status) {
  134 + console.log(`status ${status}`);
  135 + const statusElement = document.getElementById('status');
  136 + if (status == "Running...") {
  137 + status = 'Model downloaded. Initializing recongizer...'
  138 + }
  139 + statusElement.textContent = status;
  140 + if (status === '') {
  141 + statusElement.style.display = 'none';
  142 + // statusElement.parentNode.removeChild(statusElement);
  143 +
  144 + document.querySelectorAll('.tab-content').forEach((tabContentElement) => {
  145 + tabContentElement.classList.remove('loading');
  146 + });
  147 + } else {
  148 + statusElement.style.display = 'block';
  149 + document.querySelectorAll('.tab-content').forEach((tabContentElement) => {
  150 + tabContentElement.classList.add('loading');
  151 + });
  152 + }
  153 +};
  154 +
129 Module.onRuntimeInitialized = function() { 155 Module.onRuntimeInitialized = function() {
130 console.log('inited!'); 156 console.log('inited!');
131 - hint.innerText = 'Model loaded! Please click start';  
132 157
133 startBtn.disabled = false; 158 startBtn.disabled = false;
134 159
@@ -141,17 +166,15 @@ Module.onRuntimeInitialized = function() { @@ -141,17 +166,15 @@ Module.onRuntimeInitialized = function() {
141 initOfflineRecognizer(); 166 initOfflineRecognizer();
142 }; 167 };
143 168
144 -  
145 -  
146 if (navigator.mediaDevices.getUserMedia) { 169 if (navigator.mediaDevices.getUserMedia) {
147 console.log('getUserMedia supported.'); 170 console.log('getUserMedia supported.');
148 171
149 // see https://w3c.github.io/mediacapture-main/#dom-mediadevices-getusermedia 172 // see https://w3c.github.io/mediacapture-main/#dom-mediadevices-getusermedia
150 - const constraints = {audio: true}; 173 + const constraints = {audio : true};
151 174
152 let onSuccess = function(stream) { 175 let onSuccess = function(stream) {
153 if (!audioCtx) { 176 if (!audioCtx) {
154 - audioCtx = new AudioContext({sampleRate: expectedSampleRate}); 177 + audioCtx = new AudioContext({sampleRate : expectedSampleRate});
155 } 178 }
156 console.log(audioCtx); 179 console.log(audioCtx);
157 recordSampleRate = audioCtx.sampleRate; 180 recordSampleRate = audioCtx.sampleRate;
@@ -219,7 +242,6 @@ if (navigator.mediaDevices.getUserMedia) { @@ -219,7 +242,6 @@ if (navigator.mediaDevices.getUserMedia) {
219 242
220 resultList.push(durationStr); 243 resultList.push(durationStr);
221 244
222 -  
223 // now save the segment to a wav file 245 // now save the segment to a wav file
224 let buf = new Int16Array(segment.samples.length); 246 let buf = new Int16Array(segment.samples.length);
225 for (var i = 0; i < segment.samples.length; ++i) { 247 for (var i = 0; i < segment.samples.length; ++i) {
@@ -277,7 +299,7 @@ if (navigator.mediaDevices.getUserMedia) { @@ -277,7 +299,7 @@ if (navigator.mediaDevices.getUserMedia) {
277 } 299 }
278 300
279 textArea.value = getDisplayResult(); 301 textArea.value = getDisplayResult();
280 - textArea.scrollTop = textArea.scrollHeight; // auto scroll 302 + textArea.scrollTop = textArea.scrollHeight; // auto scroll
281 }; 303 };
282 304
283 startBtn.onclick = function() { 305 startBtn.onclick = function() {
@@ -308,9 +330,8 @@ if (navigator.mediaDevices.getUserMedia) { @@ -308,9 +330,8 @@ if (navigator.mediaDevices.getUserMedia) {
308 }; 330 };
309 }; 331 };
310 332
311 - let onError = function(err) {  
312 - console.log('The following error occured: ' + err);  
313 - }; 333 + let onError = function(
  334 + err) { console.log('The following error occured: ' + err); };
314 335
315 navigator.mediaDevices.getUserMedia(constraints).then(onSuccess, onError); 336 navigator.mediaDevices.getUserMedia(constraints).then(onSuccess, onError);
316 } else { 337 } else {
@@ -318,7 +339,6 @@ if (navigator.mediaDevices.getUserMedia) { @@ -318,7 +339,6 @@ if (navigator.mediaDevices.getUserMedia) {
318 alert('getUserMedia not supported on your browser!'); 339 alert('getUserMedia not supported on your browser!');
319 } 340 }
320 341
321 -  
322 // this function is copied/modified from 342 // this function is copied/modified from
323 // https://gist.github.com/meziantou/edb7217fddfbb70e899e 343 // https://gist.github.com/meziantou/edb7217fddfbb70e899e
324 function flatten(listOfSamples) { 344 function flatten(listOfSamples) {
@@ -344,22 +364,22 @@ function toWav(samples) { @@ -344,22 +364,22 @@ function toWav(samples) {
344 364
345 // http://soundfile.sapp.org/doc/WaveFormat/ 365 // http://soundfile.sapp.org/doc/WaveFormat/
346 // F F I R 366 // F F I R
347 - view.setUint32(0, 0x46464952, true); // chunkID  
348 - view.setUint32(4, 36 + samples.length * 2, true); // chunkSize 367 + view.setUint32(0, 0x46464952, true); // chunkID
  368 + view.setUint32(4, 36 + samples.length * 2, true); // chunkSize
349 // E V A W 369 // E V A W
350 - view.setUint32(8, 0x45564157, true); // format  
351 - // 370 + view.setUint32(8, 0x45564157, true); // format
  371 + //
352 // t m f 372 // t m f
353 - view.setUint32(12, 0x20746d66, true); // subchunk1ID  
354 - view.setUint32(16, 16, true); // subchunk1Size, 16 for PCM  
355 - view.setUint32(20, 1, true); // audioFormat, 1 for PCM  
356 - view.setUint16(22, 1, true); // numChannels: 1 channel  
357 - view.setUint32(24, expectedSampleRate, true); // sampleRate  
358 - view.setUint32(28, expectedSampleRate * 2, true); // byteRate  
359 - view.setUint16(32, 2, true); // blockAlign  
360 - view.setUint16(34, 16, true); // bitsPerSample  
361 - view.setUint32(36, 0x61746164, true); // Subchunk2ID  
362 - view.setUint32(40, samples.length * 2, true); // subchunk2Size 373 + view.setUint32(12, 0x20746d66, true); // subchunk1ID
  374 + view.setUint32(16, 16, true); // subchunk1Size, 16 for PCM
  375 + view.setUint32(20, 1, true); // audioFormat, 1 for PCM
  376 + view.setUint16(22, 1, true); // numChannels: 1 channel
  377 + view.setUint32(24, expectedSampleRate, true); // sampleRate
  378 + view.setUint32(28, expectedSampleRate * 2, true); // byteRate
  379 + view.setUint16(32, 2, true); // blockAlign
  380 + view.setUint16(34, 16, true); // bitsPerSample
  381 + view.setUint32(36, 0x61746164, true); // Subchunk2ID
  382 + view.setUint32(40, samples.length * 2, true); // subchunk2Size
363 383
364 let offset = 44; 384 let offset = 44;
365 for (let i = 0; i < samples.length; ++i) { 385 for (let i = 0; i < samples.length; ++i) {
@@ -367,7 +387,7 @@ function toWav(samples) { @@ -367,7 +387,7 @@ function toWav(samples) {
367 offset += 2; 387 offset += 2;
368 } 388 }
369 389
370 - return new Blob([view], {type: 'audio/wav'}); 390 + return new Blob([ view ], {type : 'audio/wav'});
371 } 391 }
372 392
373 // this function is copied from 393 // this function is copied from
@@ -11,30 +11,68 @@ @@ -11,30 +11,68 @@
11 textarea { 11 textarea {
12 width:100%; 12 width:100%;
13 } 13 }
  14 + .loading {
  15 + display: none !important;
  16 + }
14 </style> 17 </style>
15 </head> 18 </head>
16 19
17 -<body> 20 +<body style="font-family: 'Source Sans Pro', sans-serif; background-color: #f9fafb; color: #333; display: flex; flex-direction: column; align-items: center; height: 100vh; margin: 0;">
18 <h1> 21 <h1>
19 Next-gen Kaldi + WebAssembly<br/> 22 Next-gen Kaldi + WebAssembly<br/>
20 VAD+ASR Demo with <a href="https://github.com/k2-fsa/sherpa-onnx">sherpa-onnx</a><br/> 23 VAD+ASR Demo with <a href="https://github.com/k2-fsa/sherpa-onnx">sherpa-onnx</a><br/>
21 (with Zipformer) 24 (with Zipformer)
22 </h1> 25 </h1>
23 26
24 - <div>  
25 - <span id="hint">Loading model ... ...</span>  
26 - <br/>  
27 - <br/>  
28 - <button id="startBtn" disabled>Start</button>  
29 - <button id="stopBtn" disabled>Stop</button>  
30 - <button id="clearBtn">Clear</button>  
31 - <br/>  
32 - <br/>  
33 - <textarea id="results" rows="10" readonly></textarea> 27 + <div style="width: 100%; max-width: 900px; background: #fff; padding: 1.5rem; border-radius: 8px; box-shadow: 0 2px 8px rgba(0, 0, 0, 0.1); flex: 1;">
  28 + <div id="status">Loading...</div>
  29 +
  30 + <div id="singleAudioContent" class="tab-content loading">
  31 + <div style="display: flex; gap: 1.5rem;">
  32 + <div style="flex: 1; display: flex; flex-direction: row; align-items: center; gap: 1rem;">
  33 + <button id="startBtn" disabled>Start</button>
  34 + <button id="stopBtn" disabled>Stop</button>
  35 + <button id="clearBtn">Clear</button>
  36 + </div>
  37 + </div>
  38 +
  39 + <div style="flex: 1; display: flex; flex-direction: column; gap: 1rem;">
  40 + <div style="font-size: 1rem; font-weight: bold; padding: 0.5rem 1rem; background-color: #f8f9fa; border-radius: 8px; color: #6c757d;">Transcript</div>
  41 + <textarea id="results" rows="10" placeholder="Output will appear here..." readonly style="flex: 1; padding: 0.75rem; font-size: 1rem; border: 1px solid #ced4da; border-radius: 8px; resize: none; background-color: #f8f9fa;"></textarea>
  42 + </div>
  43 +
  44 + <section flex="1" overflow="auto" id="sound-clips">
  45 + </section>
34 </div> 46 </div>
35 47
36 - <section flex="1" overflow="auto" id="sound-clips">  
37 - </section> 48 + <!-- Footer Section -->
  49 + <div style="width: 100%; max-width: 900px; margin-top: 1.5rem; background: #fff; padding: 1.5rem; border-radius: 8px; box-shadow: 0 2px 8px rgba(0, 0, 0, 0.1); text-align: left; font-size: 0.9rem; color: #6c757d;">
  50 + <h3>Description</h3>
  51 + <ul>
  52 + <li>Everything is <strong>open-sourced.</strong> <a href="https://github.com/k2-fsa/sherpa-onnx">code</a></li>
  53 + <li>If you have any issues, please either <a href="https://github.com/k2-fsa/sherpa-onnx/issues">file a ticket</a> or contact us via</li>
  54 + <ul>
  55 + <li><a href="https://k2-fsa.github.io/sherpa/social-groups.html#wechat">WeChat group</a></li>
  56 + <li><a href="https://k2-fsa.github.io/sherpa/social-groups.html#qq">QQ group</a></li>
  57 + <li><a href="https://k2-fsa.github.io/sherpa/social-groups.html#bilibili-b">Bilibili</a></li>
  58 + </ul>
  59 + </ul>
  60 + <h3>About This Demo</h3>
  61 + <ul>
  62 + <li><strong>Private and Secure:</strong> All processing is done locally on your device (CPU) within your browser with a single thread. No server is involved, ensuring privacy and security. You can disconnect from the Internet once this page is loaded.</li>
  63 + <li><strong>Efficient Resource Usage:</strong> No GPU is required, leaving system resources available for webLLM analysis.</li>
  64 + </ul>
  65 + <h3>Latest Update</h3>
  66 + <ul>
  67 + <li>Update UI.</li>
  68 + <li>First working version.</li>
  69 + </ul>
  70 +
  71 + <h3>Acknowledgement</h3>
  72 + <ul>
  73 + <li>We refer to <a href="https://huggingface.co/spaces/Banafo/Kroko-Streaming-ASR-Wasm">https://huggingface.co/spaces/Banafo/Kroko-Streaming-ASR-Wasm</a> for the UI part.</li>
  74 + </ul>
  75 + </div>
38 76
39 <script src="sherpa-onnx-asr.js"></script> 77 <script src="sherpa-onnx-asr.js"></script>
40 <script src="sherpa-onnx-vad.js"></script> 78 <script src="sherpa-onnx-vad.js"></script>