Committed by
GitHub
Add JavaScript (WebAssembly) API for ten-vad (#2382)
Add support for the ten-vad model alongside silero-vad in the WebAssembly VAD API, update the UI and documentation, and extend examples and CI workflows to handle the new model. - Extend C++ bindings and printing logic to include ten-vad configuration. - Implement JavaScript init/free routines and runtime detection for ten-vad. - Update UI layout, README assets, example scripts, and CI workflow to support ten-vad.
正在显示
10 个修改的文件
包含
427 行增加
和
25 行删除
| 1 | +name: wasm-simd-hf-space-ten-vad | ||
| 2 | + | ||
| 3 | +on: | ||
| 4 | + push: | ||
| 5 | + branches: | ||
| 6 | + - wasm | ||
| 7 | + - wasm-ten-vad | ||
| 8 | + tags: | ||
| 9 | + - 'v[0-9]+.[0-9]+.[0-9]+*' | ||
| 10 | + | ||
| 11 | + workflow_dispatch: | ||
| 12 | + | ||
| 13 | +concurrency: | ||
| 14 | + group: wasm-simd-hf-space-ten-vad-${{ github.ref }} | ||
| 15 | + cancel-in-progress: true | ||
| 16 | + | ||
| 17 | +jobs: | ||
| 18 | + wasm-simd-hf-space-ten-vad: | ||
| 19 | + runs-on: ${{ matrix.os }} | ||
| 20 | + strategy: | ||
| 21 | + fail-fast: false | ||
| 22 | + matrix: | ||
| 23 | + os: [ubuntu-latest] | ||
| 24 | + | ||
| 25 | + steps: | ||
| 26 | + - uses: actions/checkout@v4 | ||
| 27 | + with: | ||
| 28 | + fetch-depth: 0 | ||
| 29 | + | ||
| 30 | + - name: Update version | ||
| 31 | + shell: bash | ||
| 32 | + run: | | ||
| 33 | + ./new-release.sh | ||
| 34 | + git diff . | ||
| 35 | + | ||
| 36 | + - name: Install emsdk | ||
| 37 | + uses: mymindstorm/setup-emsdk@v14 | ||
| 38 | + with: | ||
| 39 | + version: 3.1.53 | ||
| 40 | + actions-cache-folder: 'emsdk-cache' | ||
| 41 | + | ||
| 42 | + - name: View emsdk version | ||
| 43 | + shell: bash | ||
| 44 | + run: | | ||
| 45 | + emcc -v | ||
| 46 | + echo "--------------------" | ||
| 47 | + emcc --check | ||
| 48 | + | ||
| 49 | + - name: Download model files | ||
| 50 | + shell: bash | ||
| 51 | + run: | | ||
| 52 | + cd wasm/vad/assets | ||
| 53 | + ls -lh | ||
| 54 | + echo "----------" | ||
| 55 | + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/ten-vad.onnx | ||
| 56 | + ls -lh | ||
| 57 | + cd .. | ||
| 58 | + sed -i.bak "s|.*(with <a .*| (with <a href="https://github.com/TEN-framework/ten-vad">ten-vad</a>)|" ./index.html | ||
| 59 | + git diff . | ||
| 60 | + | ||
| 61 | + - name: Build sherpa-onnx for WebAssembly | ||
| 62 | + shell: bash | ||
| 63 | + run: | | ||
| 64 | + ./build-wasm-simd-vad.sh | ||
| 65 | + | ||
| 66 | + - name: collect files | ||
| 67 | + shell: bash | ||
| 68 | + run: | | ||
| 69 | + SHERPA_ONNX_VERSION=v$(grep "SHERPA_ONNX_VERSION" ./CMakeLists.txt | cut -d " " -f 2 | cut -d '"' -f 2) | ||
| 70 | + | ||
| 71 | + dst=sherpa-onnx-wasm-simd-${SHERPA_ONNX_VERSION}-ten-vad | ||
| 72 | + mv build-wasm-simd-vad/install/bin/wasm/vad $dst | ||
| 73 | + ls -lh $dst | ||
| 74 | + tar cjfv $dst.tar.bz2 ./$dst | ||
| 75 | + | ||
| 76 | + - name: Upload wasm files | ||
| 77 | + uses: actions/upload-artifact@v4 | ||
| 78 | + with: | ||
| 79 | + name: sherpa-onnx-wasm-simd-ten-vad | ||
| 80 | + path: ./sherpa-onnx-wasm-simd-*.tar.bz2 | ||
| 81 | + | ||
| 82 | + - name: Release | ||
| 83 | + if: (github.repository_owner == 'csukuangfj' || github.repository_owner == 'k2-fsa') && github.event_name == 'push' && contains(github.ref, 'refs/tags/') | ||
| 84 | + uses: svenstaro/upload-release-action@v2 | ||
| 85 | + with: | ||
| 86 | + file_glob: true | ||
| 87 | + overwrite: true | ||
| 88 | + file: ./*.tar.bz2 | ||
| 89 | + | ||
| 90 | + - name: Publish to ModelScope | ||
| 91 | + # if: false | ||
| 92 | + env: | ||
| 93 | + MS_TOKEN: ${{ secrets.MODEL_SCOPE_GIT_TOKEN }} | ||
| 94 | + uses: nick-fields/retry@v2 | ||
| 95 | + with: | ||
| 96 | + max_attempts: 20 | ||
| 97 | + timeout_seconds: 200 | ||
| 98 | + shell: bash | ||
| 99 | + command: | | ||
| 100 | + SHERPA_ONNX_VERSION=v$(grep "SHERPA_ONNX_VERSION" ./CMakeLists.txt | cut -d " " -f 2 | cut -d '"' -f 2) | ||
| 101 | + | ||
| 102 | + git config --global user.email "csukuangfj@gmail.com" | ||
| 103 | + git config --global user.name "Fangjun Kuang" | ||
| 104 | + | ||
| 105 | + rm -rf ms | ||
| 106 | + export GIT_LFS_SKIP_SMUDGE=1 | ||
| 107 | + export GIT_CLONE_PROTECTION_ACTIVE=false | ||
| 108 | + | ||
| 109 | + git clone https://www.modelscope.cn/studios/csukuangfj/web-assembly-ten-vad-sherpa-onnx.git ms | ||
| 110 | + cd ms | ||
| 111 | + rm -fv *.js | ||
| 112 | + rm -fv *.data | ||
| 113 | + git fetch | ||
| 114 | + git pull | ||
| 115 | + git merge -m "merge remote" --ff origin main | ||
| 116 | + | ||
| 117 | + cp -v ../sherpa-onnx-wasm-simd-${SHERPA_ONNX_VERSION}-ten-vad/* . | ||
| 118 | + | ||
| 119 | + git status | ||
| 120 | + git lfs track "*.data" | ||
| 121 | + git lfs track "*.wasm" | ||
| 122 | + ls -lh | ||
| 123 | + | ||
| 124 | + git add . | ||
| 125 | + git commit -m "update model" | ||
| 126 | + git push https://oauth2:${MS_TOKEN}@www.modelscope.cn/studios/csukuangfj/web-assembly-ten-vad-sherpa-onnx.git | ||
| 127 | + | ||
| 128 | + - name: Publish to huggingface | ||
| 129 | + env: | ||
| 130 | + HF_TOKEN: ${{ secrets.HF_TOKEN }} | ||
| 131 | + uses: nick-fields/retry@v2 | ||
| 132 | + with: | ||
| 133 | + max_attempts: 20 | ||
| 134 | + timeout_seconds: 200 | ||
| 135 | + shell: bash | ||
| 136 | + command: | | ||
| 137 | + SHERPA_ONNX_VERSION=v$(grep "SHERPA_ONNX_VERSION" ./CMakeLists.txt | cut -d " " -f 2 | cut -d '"' -f 2) | ||
| 138 | + | ||
| 139 | + git config --global user.email "csukuangfj@gmail.com" | ||
| 140 | + git config --global user.name "Fangjun Kuang" | ||
| 141 | + | ||
| 142 | + rm -rf huggingface | ||
| 143 | + export GIT_LFS_SKIP_SMUDGE=1 | ||
| 144 | + export GIT_CLONE_PROTECTION_ACTIVE=false | ||
| 145 | + | ||
| 146 | + git clone https://huggingface.co/spaces/k2-fsa/web-assembly-ten-vad-sherpa-onnx huggingface | ||
| 147 | + cd huggingface | ||
| 148 | + rm -fv *.js | ||
| 149 | + rm -fv *.data | ||
| 150 | + git fetch | ||
| 151 | + git pull | ||
| 152 | + git merge -m "merge remote" --ff origin main | ||
| 153 | + | ||
| 154 | + cp -v ../sherpa-onnx-wasm-simd-${SHERPA_ONNX_VERSION}-ten-vad/* . | ||
| 155 | + | ||
| 156 | + git status | ||
| 157 | + git lfs track "*.data" | ||
| 158 | + git lfs track "*.wasm" | ||
| 159 | + ls -lh | ||
| 160 | + | ||
| 161 | + git add . | ||
| 162 | + git commit -m "update model" | ||
| 163 | + git push https://csukuangfj:$HF_TOKEN@huggingface.co/spaces/k2-fsa/web-assembly-ten-vad-sherpa-onnx main |
| @@ -26,6 +26,15 @@ function createRecognizer() { | @@ -26,6 +26,15 @@ function createRecognizer() { | ||
| 26 | function createVad() { | 26 | function createVad() { |
| 27 | // please download silero_vad.onnx from | 27 | // please download silero_vad.onnx from |
| 28 | // https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx | 28 | // https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx |
| 29 | + // | ||
| 30 | + // please download ten-vad.onnx from | ||
| 31 | + // https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/ten-vad.onnx | ||
| 32 | + // | ||
| 33 | + // You only need one vad | ||
| 34 | + // | ||
| 35 | + // To use ten-vad.onnx, please set sileroVad.model to '' | ||
| 36 | + // and set tenVad.model to 'ten-vad.onnx' | ||
| 37 | + // | ||
| 29 | const config = { | 38 | const config = { |
| 30 | sileroVad: { | 39 | sileroVad: { |
| 31 | model: './silero_vad.onnx', | 40 | model: './silero_vad.onnx', |
| @@ -35,12 +44,22 @@ function createVad() { | @@ -35,12 +44,22 @@ function createVad() { | ||
| 35 | maxSpeechDuration: 5, | 44 | maxSpeechDuration: 5, |
| 36 | windowSize: 512, | 45 | windowSize: 512, |
| 37 | }, | 46 | }, |
| 47 | + tenVad: { | ||
| 48 | + // model: './ten-vad.onnx', | ||
| 49 | + model: '', | ||
| 50 | + threshold: 0.5, | ||
| 51 | + minSpeechDuration: 0.25, | ||
| 52 | + minSilenceDuration: 0.5, | ||
| 53 | + maxSpeechDuration: 5, | ||
| 54 | + windowSize: 256, | ||
| 55 | + }, | ||
| 38 | sampleRate: 16000, | 56 | sampleRate: 16000, |
| 39 | debug: true, | 57 | debug: true, |
| 40 | numThreads: 1, | 58 | numThreads: 1, |
| 41 | bufferSizeInSeconds: 60, | 59 | bufferSizeInSeconds: 60, |
| 42 | }; | 60 | }; |
| 43 | 61 | ||
| 62 | + | ||
| 44 | return sherpa_onnx.createVad(config); | 63 | return sherpa_onnx.createVad(config); |
| 45 | } | 64 | } |
| 46 | 65 | ||
| @@ -60,7 +79,11 @@ if (wave.sampleRate != recognizer.config.featConfig.sampleRate) { | @@ -60,7 +79,11 @@ if (wave.sampleRate != recognizer.config.featConfig.sampleRate) { | ||
| 60 | console.log('Started') | 79 | console.log('Started') |
| 61 | let start = Date.now(); | 80 | let start = Date.now(); |
| 62 | 81 | ||
| 63 | -const windowSize = vad.config.sileroVad.windowSize; | 82 | +let windowSize = vad.config.sileroVad.windowSize; |
| 83 | +if (vad.config.tenVad.model != '') { | ||
| 84 | + windowSize = vad.config.tenVad.windowSize; | ||
| 85 | +} | ||
| 86 | + | ||
| 64 | for (let i = 0; i < wave.samples.length; i += windowSize) { | 87 | for (let i = 0; i < wave.samples.length; i += windowSize) { |
| 65 | const thisWindow = wave.samples.subarray(i, i + windowSize); | 88 | const thisWindow = wave.samples.subarray(i, i + windowSize); |
| 66 | vad.acceptWaveform(thisWindow); | 89 | vad.acceptWaveform(thisWindow); |
| @@ -59,7 +59,7 @@ rm -rf sherpa-onnx-streaming-paraformer-bilingual-zh-en | @@ -59,7 +59,7 @@ rm -rf sherpa-onnx-streaming-paraformer-bilingual-zh-en | ||
| 59 | 59 | ||
| 60 | cd ../ | 60 | cd ../ |
| 61 | 61 | ||
| 62 | -sed -i.bak s/"type = 0"/"type = 1"/g ./sherpa-onnx.js | 62 | +sed -i.bak s/"type = 0"/"type = 1"/g ./sherpa-onnx-asr.js |
| 63 | sed -i.bak s/Zipformer/Paraformer/g ./index.html | 63 | sed -i.bak s/Zipformer/Paraformer/g ./index.html |
| 64 | 64 | ||
| 65 | cd ../.. | 65 | cd ../.. |
| @@ -69,8 +69,6 @@ function fileExists(filename) { | @@ -69,8 +69,6 @@ function fileExists(filename) { | ||
| 69 | return exists; | 69 | return exists; |
| 70 | } | 70 | } |
| 71 | 71 | ||
| 72 | -function createOfflineRecognizerSenseVoice() {} | ||
| 73 | - | ||
| 74 | function initOfflineRecognizer() { | 72 | function initOfflineRecognizer() { |
| 75 | let config = { | 73 | let config = { |
| 76 | modelConfig: { | 74 | modelConfig: { |
| @@ -2,7 +2,7 @@ if(NOT $ENV{SHERPA_ONNX_IS_USING_BUILD_WASM_SH}) | @@ -2,7 +2,7 @@ if(NOT $ENV{SHERPA_ONNX_IS_USING_BUILD_WASM_SH}) | ||
| 2 | message(FATAL_ERROR "Please use ./build-wasm-simd-vad.sh to build for wasm VAD") | 2 | message(FATAL_ERROR "Please use ./build-wasm-simd-vad.sh to build for wasm VAD") |
| 3 | endif() | 3 | endif() |
| 4 | 4 | ||
| 5 | -if(NOT EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/assets/silero_vad.onnx") | 5 | +if(NOT EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/assets/silero_vad.onnx" AND NOT EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/assets/ten-vad.onnx" ) |
| 6 | message(FATAL_ERROR "Please read ${CMAKE_CURRENT_SOURCE_DIR}/assets/README.md before you continue") | 6 | message(FATAL_ERROR "Please read ${CMAKE_CURRENT_SOURCE_DIR}/assets/README.md before you continue") |
| 7 | endif() | 7 | endif() |
| 8 | 8 | ||
| @@ -30,6 +30,7 @@ set(exported_functions | @@ -30,6 +30,7 @@ set(exported_functions | ||
| 30 | SherpaOnnxVoiceActivityDetectorReset | 30 | SherpaOnnxVoiceActivityDetectorReset |
| 31 | SherpaOnnxVoiceActivityDetectorFlush | 31 | SherpaOnnxVoiceActivityDetectorFlush |
| 32 | # | 32 | # |
| 33 | + SherpaOnnxFileExists | ||
| 33 | ) | 34 | ) |
| 34 | set(mangled_exported_functions) | 35 | set(mangled_exported_functions) |
| 35 | foreach(x IN LISTS exported_functions) | 36 | foreach(x IN LISTS exported_functions) |
| @@ -5,7 +5,6 @@ | @@ -5,7 +5,6 @@ | ||
| 5 | const startBtn = document.getElementById('startBtn'); | 5 | const startBtn = document.getElementById('startBtn'); |
| 6 | const stopBtn = document.getElementById('stopBtn'); | 6 | const stopBtn = document.getElementById('stopBtn'); |
| 7 | const clearBtn = document.getElementById('clearBtn'); | 7 | const clearBtn = document.getElementById('clearBtn'); |
| 8 | -const hint = document.getElementById('hint'); | ||
| 9 | const soundClips = document.getElementById('sound-clips'); | 8 | const soundClips = document.getElementById('sound-clips'); |
| 10 | 9 | ||
| 11 | let textArea = document.getElementById('results'); | 10 | let textArea = document.getElementById('results'); |
| @@ -43,19 +42,98 @@ function getDisplayResult() { | @@ -43,19 +42,98 @@ function getDisplayResult() { | ||
| 43 | 42 | ||
| 44 | 43 | ||
| 45 | Module = {}; | 44 | Module = {}; |
| 45 | + | ||
| 46 | +// https://emscripten.org/docs/api_reference/module.html#Module.locateFile | ||
| 47 | +Module.locateFile = function(path, scriptDirectory = '') { | ||
| 48 | + console.log(`path: ${path}, scriptDirectory: ${scriptDirectory}`); | ||
| 49 | + return scriptDirectory + path; | ||
| 50 | +}; | ||
| 51 | + | ||
| 52 | +// https://emscripten.org/docs/api_reference/module.html#Module.locateFile | ||
| 53 | +Module.setStatus = function(status) { | ||
| 54 | + console.log(`status ${status}`); | ||
| 55 | + const statusElement = document.getElementById('status'); | ||
| 56 | + if (status == 'Running...') { | ||
| 57 | + status = 'Model downloaded. Initializing vad...' | ||
| 58 | + } | ||
| 59 | + statusElement.textContent = status; | ||
| 60 | + if (status === '') { | ||
| 61 | + statusElement.style.display = 'none'; | ||
| 62 | + // statusElement.parentNode.removeChild(statusElement); | ||
| 63 | + | ||
| 64 | + document.querySelectorAll('.tab-content').forEach((tabContentElement) => { | ||
| 65 | + tabContentElement.classList.remove('loading'); | ||
| 66 | + }); | ||
| 67 | + } else { | ||
| 68 | + statusElement.style.display = 'block'; | ||
| 69 | + document.querySelectorAll('.tab-content').forEach((tabContentElement) => { | ||
| 70 | + tabContentElement.classList.add('loading'); | ||
| 71 | + }); | ||
| 72 | + } | ||
| 73 | +}; | ||
| 74 | + | ||
| 46 | Module.onRuntimeInitialized = function() { | 75 | Module.onRuntimeInitialized = function() { |
| 47 | console.log('inited!'); | 76 | console.log('inited!'); |
| 48 | - hint.innerText = 'Model loaded! Please click start'; | ||
| 49 | 77 | ||
| 50 | startBtn.disabled = false; | 78 | startBtn.disabled = false; |
| 51 | 79 | ||
| 52 | - vad = createVad(Module); | 80 | + initVad(); |
| 53 | console.log('vad is created!', vad); | 81 | console.log('vad is created!', vad); |
| 54 | 82 | ||
| 55 | buffer = new CircularBuffer(30 * 16000, Module); | 83 | buffer = new CircularBuffer(30 * 16000, Module); |
| 56 | console.log('CircularBuffer is created!', buffer); | 84 | console.log('CircularBuffer is created!', buffer); |
| 57 | }; | 85 | }; |
| 58 | 86 | ||
| 87 | +function fileExists(filename) { | ||
| 88 | + const filenameLen = Module.lengthBytesUTF8(filename) + 1; | ||
| 89 | + const buffer = Module._malloc(filenameLen); | ||
| 90 | + Module.stringToUTF8(filename, buffer, filenameLen); | ||
| 91 | + | ||
| 92 | + let exists = Module._SherpaOnnxFileExists(buffer); | ||
| 93 | + | ||
| 94 | + Module._free(buffer); | ||
| 95 | + | ||
| 96 | + return exists; | ||
| 97 | +} | ||
| 98 | + | ||
| 99 | +function initVad() { | ||
| 100 | + const sileroVad = { | ||
| 101 | + model: '', | ||
| 102 | + threshold: 0.50, | ||
| 103 | + minSilenceDuration: 0.50, | ||
| 104 | + minSpeechDuration: 0.25, | ||
| 105 | + maxSpeechDuration: 20, | ||
| 106 | + windowSize: 512, | ||
| 107 | + }; | ||
| 108 | + | ||
| 109 | + const tenVad = { | ||
| 110 | + model: '', | ||
| 111 | + threshold: 0.50, | ||
| 112 | + minSilenceDuration: 0.50, | ||
| 113 | + minSpeechDuration: 0.25, | ||
| 114 | + maxSpeechDuration: 20, | ||
| 115 | + windowSize: 256, | ||
| 116 | + }; | ||
| 117 | + | ||
| 118 | + let config = { | ||
| 119 | + sileroVad: sileroVad, | ||
| 120 | + tenVad: tenVad, | ||
| 121 | + sampleRate: 16000, | ||
| 122 | + numThreads: 1, | ||
| 123 | + provider: 'cpu', | ||
| 124 | + debug: 1, | ||
| 125 | + bufferSizeInSeconds: 30, | ||
| 126 | + }; | ||
| 127 | + | ||
| 128 | + if (fileExists('silero_vad.onnx') == 1) { | ||
| 129 | + config.sileroVad.model = 'silero_vad.onnx' | ||
| 130 | + } else if (fileExists('ten-vad.onnx') == 1) { | ||
| 131 | + config.tenVad.model = 'ten-vad.onnx' | ||
| 132 | + } | ||
| 133 | + | ||
| 134 | + vad = createVad(Module, config); | ||
| 135 | +} | ||
| 136 | + | ||
| 59 | let audioCtx; | 137 | let audioCtx; |
| 60 | let mediaStream; | 138 | let mediaStream; |
| 61 | 139 |
| 1 | # Introduction | 1 | # Introduction |
| 2 | 2 | ||
| 3 | +## Use silero-vad | ||
| 4 | + | ||
| 3 | Please download | 5 | Please download |
| 4 | https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx | 6 | https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx |
| 5 | and put `silero_vad.onnx` into the current directory, i.e., `wasm/vad/assets`. | 7 | and put `silero_vad.onnx` into the current directory, i.e., `wasm/vad/assets`. |
| 6 | 8 | ||
| 7 | You can find example build script at | 9 | You can find example build script at |
| 8 | https://github.com/k2-fsa/sherpa-onnx/blob/master/.github/workflows/wasm-simd-hf-space-silero-vad.yaml | 10 | https://github.com/k2-fsa/sherpa-onnx/blob/master/.github/workflows/wasm-simd-hf-space-silero-vad.yaml |
| 11 | + | ||
| 12 | +``` | ||
| 13 | +cd /path/to/sherpa-onnx/wasm/vad/assets | ||
| 14 | +wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx | ||
| 15 | +``` | ||
| 16 | + | ||
| 17 | +## Use ten-vad | ||
| 18 | + | ||
| 19 | +Please download | ||
| 20 | +https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/ten-vad.onnx | ||
| 21 | +and put `ten-vad.onnx` into the current directory, i.e., `wasm/vad/assets`. | ||
| 22 | + | ||
| 23 | +You can find example build script at | ||
| 24 | +https://github.com/k2-fsa/sherpa-onnx/blob/master/.github/workflows/wasm-simd-hf-space-ten-vad.yaml | ||
| 25 | + | ||
| 26 | +``` | ||
| 27 | +cd /path/to/sherpa-onnx/wasm/vad/assets | ||
| 28 | +wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/ten-vad.onnx | ||
| 29 | +cd .. | ||
| 30 | +sed -i.bak "s|.*(with <a .*| (with <a href="https://github.com/TEN-framework/ten-vad">ten-vad</a>)|" ./index.html | ||
| 31 | + | ||
| 32 | +``` |
| @@ -11,30 +11,67 @@ | @@ -11,30 +11,67 @@ | ||
| 11 | textarea { | 11 | textarea { |
| 12 | width:100%; | 12 | width:100%; |
| 13 | } | 13 | } |
| 14 | + .loading { | ||
| 15 | + display: none !important; | ||
| 16 | + } | ||
| 14 | </style> | 17 | </style> |
| 15 | </head> | 18 | </head> |
| 16 | 19 | ||
| 17 | -<body> | 20 | +<body style="font-family: 'Source Sans Pro', sans-serif; background-color: #f9fafb; color: #333; display: flex; flex-direction: column; align-items: center; height: 100vh; margin: 0;"> |
| 18 | <h1> | 21 | <h1> |
| 19 | Next-gen Kaldi + WebAssembly<br/> | 22 | Next-gen Kaldi + WebAssembly<br/> |
| 20 | - VAD Demo with <a href="https://github.com/k2-fsa/sherpa-onnx">sherpa-onnx</a><br/> | 23 | + VAD Demo using <a href="https://github.com/k2-fsa/sherpa-onnx">sherpa-onnx</a><br/> |
| 21 | (with <a href="https://github.com/snakers4/silero-vad">silero-vad</a>) | 24 | (with <a href="https://github.com/snakers4/silero-vad">silero-vad</a>) |
| 22 | </h1> | 25 | </h1> |
| 23 | 26 | ||
| 24 | - <div> | ||
| 25 | - <span id="hint">Loading model ... ...</span> | ||
| 26 | - <br/> | ||
| 27 | - <br/> | ||
| 28 | - <button id="startBtn" disabled>Start</button> | ||
| 29 | - <button id="stopBtn" disabled>Stop</button> | ||
| 30 | - <button id="clearBtn">Clear</button> | ||
| 31 | - <br/> | ||
| 32 | - <br/> | ||
| 33 | - <textarea id="results" rows="10" readonly></textarea> | 27 | + <div style="width: 100%; max-width: 900px; background: #fff; padding: 1.5rem; border-radius: 8px; box-shadow: 0 2px 8px rgba(0, 0, 0, 0.1); flex: 1;"> |
| 28 | + <div id="status">Loading...</div> | ||
| 29 | + | ||
| 30 | + <div id="singleAudioContent" class="tab-content loading"> | ||
| 31 | + <div style="display: flex; gap: 1.5rem;"> | ||
| 32 | + <div style="flex: 1; display: flex; flex-direction: row; align-items: center; gap: 1rem;"> | ||
| 33 | + <button id="startBtn" disabled>Start</button> | ||
| 34 | + <button id="stopBtn" disabled>Stop</button> | ||
| 35 | + <button id="clearBtn">Clear</button> | ||
| 36 | + </div> | ||
| 37 | + </div> | ||
| 38 | + | ||
| 39 | + <div style="flex: 1; display: flex; flex-direction: column; gap: 1rem;"> | ||
| 40 | + <textarea id="results" rows="10" placeholder="Please click start and speak. Output will appear here..." readonly style="flex: 1; padding: 0.75rem; font-size: 1rem; border: 1px solid #ced4da; border-radius: 8px; resize: none; background-color: #f8f9fa;"></textarea> | ||
| 41 | + </div> | ||
| 42 | + | ||
| 43 | + <section flex="1" overflow="auto" id="sound-clips"> | ||
| 44 | + </section> | ||
| 34 | </div> | 45 | </div> |
| 35 | 46 | ||
| 36 | - <section flex="1" overflow="auto" id="sound-clips"> | ||
| 37 | - </section> | 47 | + <!-- Footer Section --> |
| 48 | + <div style="width: 100%; max-width: 900px; margin-top: 1.5rem; background: #fff; padding: 1.5rem; border-radius: 8px; box-shadow: 0 2px 8px rgba(0, 0, 0, 0.1); text-align: left; font-size: 0.9rem; color: #6c757d;"> | ||
| 49 | + <h3>Description</h3> | ||
| 50 | + <ul> | ||
| 51 | + <li>Everything is <strong>open-sourced.</strong> <a href="https://github.com/k2-fsa/sherpa-onnx">code</a></li> | ||
| 52 | + <li>If you have any issues, please either <a href="https://github.com/k2-fsa/sherpa-onnx/issues">file a ticket</a> or contact us via</li> | ||
| 53 | + <ul> | ||
| 54 | + <li><a href="https://k2-fsa.github.io/sherpa/social-groups.html#wechat">WeChat group</a></li> | ||
| 55 | + <li><a href="https://k2-fsa.github.io/sherpa/social-groups.html#qq">QQ group</a></li> | ||
| 56 | + <li><a href="https://k2-fsa.github.io/sherpa/social-groups.html#bilibili-b">Bilibili</a></li> | ||
| 57 | + </ul> | ||
| 58 | + </ul> | ||
| 59 | + <h3>About This Demo</h3> | ||
| 60 | + <ul> | ||
| 61 | + <li><strong>Private and Secure:</strong> All processing is done locally on your device (CPU) within your browser with a single thread. No server is involved, ensuring privacy and security. You can disconnect from the Internet once this page is loaded.</li> | ||
| 62 | + <li><strong>Efficient Resource Usage:</strong> No GPU is required, leaving system resources available for webLLM analysis.</li> | ||
| 63 | + </ul> | ||
| 64 | + <h3>Latest Update</h3> | ||
| 65 | + <ul> | ||
| 66 | + <li>Update UI.</li> | ||
| 67 | + <li>First working version.</li> | ||
| 68 | + </ul> | ||
| 69 | + | ||
| 70 | + <h3>Acknowledgement</h3> | ||
| 71 | + <ul> | ||
| 72 | + <li>We refer to <a href="https://huggingface.co/spaces/Banafo/Kroko-Streaming-ASR-Wasm">https://huggingface.co/spaces/Banafo/Kroko-Streaming-ASR-Wasm</a> for the UI part.</li> | ||
| 73 | + </ul> | ||
| 74 | + </div> | ||
| 38 | 75 | ||
| 39 | <script src="sherpa-onnx-vad.js"></script> | 76 | <script src="sherpa-onnx-vad.js"></script> |
| 40 | <script src="app-vad.js"></script> | 77 | <script src="app-vad.js"></script> |
| @@ -7,6 +7,10 @@ function freeConfig(config, Module) { | @@ -7,6 +7,10 @@ function freeConfig(config, Module) { | ||
| 7 | freeConfig(config.sileroVad, Module) | 7 | freeConfig(config.sileroVad, Module) |
| 8 | } | 8 | } |
| 9 | 9 | ||
| 10 | + if ('tenVad' in config) { | ||
| 11 | + freeConfig(config.tenVad, Module) | ||
| 12 | + } | ||
| 13 | + | ||
| 10 | 14 | ||
| 11 | Module._free(config.ptr); | 15 | Module._free(config.ptr); |
| 12 | } | 16 | } |
| @@ -48,6 +52,42 @@ function initSherpaOnnxSileroVadModelConfig(config, Module) { | @@ -48,6 +52,42 @@ function initSherpaOnnxSileroVadModelConfig(config, Module) { | ||
| 48 | } | 52 | } |
| 49 | } | 53 | } |
| 50 | 54 | ||
| 55 | +function initSherpaOnnxTenVadModelConfig(config, Module) { | ||
| 56 | + const modelLen = Module.lengthBytesUTF8(config.model || '') + 1; | ||
| 57 | + | ||
| 58 | + const n = modelLen; | ||
| 59 | + | ||
| 60 | + const buffer = Module._malloc(n); | ||
| 61 | + | ||
| 62 | + const len = 6 * 4; | ||
| 63 | + const ptr = Module._malloc(len); | ||
| 64 | + | ||
| 65 | + Module.stringToUTF8(config.model || '', buffer, modelLen); | ||
| 66 | + | ||
| 67 | + offset = 0; | ||
| 68 | + Module.setValue(ptr, buffer, 'i8*'); | ||
| 69 | + offset += 4; | ||
| 70 | + | ||
| 71 | + Module.setValue(ptr + offset, config.threshold || 0.5, 'float'); | ||
| 72 | + offset += 4; | ||
| 73 | + | ||
| 74 | + Module.setValue(ptr + offset, config.minSilenceDuration || 0.5, 'float'); | ||
| 75 | + offset += 4; | ||
| 76 | + | ||
| 77 | + Module.setValue(ptr + offset, config.minSpeechDuration || 0.25, 'float'); | ||
| 78 | + offset += 4; | ||
| 79 | + | ||
| 80 | + Module.setValue(ptr + offset, config.windowSize || 256, 'i32'); | ||
| 81 | + offset += 4; | ||
| 82 | + | ||
| 83 | + Module.setValue(ptr + offset, config.maxSpeechDuration || 20, 'float'); | ||
| 84 | + offset += 4; | ||
| 85 | + | ||
| 86 | + return { | ||
| 87 | + buffer: buffer, ptr: ptr, len: len, | ||
| 88 | + } | ||
| 89 | +} | ||
| 90 | + | ||
| 51 | function initSherpaOnnxVadModelConfig(config, Module) { | 91 | function initSherpaOnnxVadModelConfig(config, Module) { |
| 52 | if (!('sileroVad' in config)) { | 92 | if (!('sileroVad' in config)) { |
| 53 | config.sileroVad = { | 93 | config.sileroVad = { |
| @@ -60,10 +100,23 @@ function initSherpaOnnxVadModelConfig(config, Module) { | @@ -60,10 +100,23 @@ function initSherpaOnnxVadModelConfig(config, Module) { | ||
| 60 | }; | 100 | }; |
| 61 | } | 101 | } |
| 62 | 102 | ||
| 103 | + if (!('tenVad' in config)) { | ||
| 104 | + config.tenVad = { | ||
| 105 | + model: '', | ||
| 106 | + threshold: 0.50, | ||
| 107 | + minSilenceDuration: 0.50, | ||
| 108 | + minSpeechDuration: 0.25, | ||
| 109 | + windowSize: 256, | ||
| 110 | + maxSpeechDuration: 20, | ||
| 111 | + }; | ||
| 112 | + } | ||
| 113 | + | ||
| 63 | const sileroVad = | 114 | const sileroVad = |
| 64 | initSherpaOnnxSileroVadModelConfig(config.sileroVad, Module); | 115 | initSherpaOnnxSileroVadModelConfig(config.sileroVad, Module); |
| 65 | 116 | ||
| 66 | - const len = sileroVad.len + 4 * 4; | 117 | + const tenVad = initSherpaOnnxTenVadModelConfig(config.tenVad, Module); |
| 118 | + | ||
| 119 | + const len = sileroVad.len + 4 * 4 + tenVad.len; | ||
| 67 | const ptr = Module._malloc(len); | 120 | const ptr = Module._malloc(len); |
| 68 | 121 | ||
| 69 | const providerLen = Module.lengthBytesUTF8(config.provider || 'cpu') + 1; | 122 | const providerLen = Module.lengthBytesUTF8(config.provider || 'cpu') + 1; |
| @@ -86,8 +139,11 @@ function initSherpaOnnxVadModelConfig(config, Module) { | @@ -86,8 +139,11 @@ function initSherpaOnnxVadModelConfig(config, Module) { | ||
| 86 | Module.setValue(ptr + offset, config.debug || 0, 'i32'); | 139 | Module.setValue(ptr + offset, config.debug || 0, 'i32'); |
| 87 | offset += 4; | 140 | offset += 4; |
| 88 | 141 | ||
| 142 | + Module._CopyHeap(tenVad.ptr, tenVad.len, ptr + offset); | ||
| 143 | + offset += tenVad.len; | ||
| 144 | + | ||
| 89 | return { | 145 | return { |
| 90 | - buffer: buffer, ptr: ptr, len: len, sileroVad: sileroVad, | 146 | + buffer: buffer, ptr: ptr, len: len, sileroVad: sileroVad, tenVad: tenVad |
| 91 | } | 147 | } |
| 92 | } | 148 | } |
| 93 | 149 | ||
| @@ -101,8 +157,18 @@ function createVad(Module, myConfig) { | @@ -101,8 +157,18 @@ function createVad(Module, myConfig) { | ||
| 101 | windowSize: 512, | 157 | windowSize: 512, |
| 102 | }; | 158 | }; |
| 103 | 159 | ||
| 160 | + const tenVad = { | ||
| 161 | + model: '', | ||
| 162 | + threshold: 0.50, | ||
| 163 | + minSilenceDuration: 0.50, | ||
| 164 | + minSpeechDuration: 0.25, | ||
| 165 | + maxSpeechDuration: 20, | ||
| 166 | + windowSize: 256, | ||
| 167 | + }; | ||
| 168 | + | ||
| 104 | let config = { | 169 | let config = { |
| 105 | sileroVad: sileroVad, | 170 | sileroVad: sileroVad, |
| 171 | + tenVad: tenVad, | ||
| 106 | sampleRate: 16000, | 172 | sampleRate: 16000, |
| 107 | numThreads: 1, | 173 | numThreads: 1, |
| 108 | provider: 'cpu', | 174 | provider: 'cpu', |
| @@ -14,12 +14,15 @@ | @@ -14,12 +14,15 @@ | ||
| 14 | extern "C" { | 14 | extern "C" { |
| 15 | 15 | ||
| 16 | static_assert(sizeof(SherpaOnnxSileroVadModelConfig) == 6 * 4, ""); | 16 | static_assert(sizeof(SherpaOnnxSileroVadModelConfig) == 6 * 4, ""); |
| 17 | +static_assert(sizeof(SherpaOnnxTenVadModelConfig) == 6 * 4, ""); | ||
| 17 | 18 | ||
| 18 | static_assert(sizeof(SherpaOnnxVadModelConfig) == | 19 | static_assert(sizeof(SherpaOnnxVadModelConfig) == |
| 19 | - sizeof(SherpaOnnxSileroVadModelConfig) + 4 * 4, | 20 | + sizeof(SherpaOnnxSileroVadModelConfig) + 4 * 4 + |
| 21 | + sizeof(SherpaOnnxTenVadModelConfig), | ||
| 20 | ""); | 22 | ""); |
| 21 | void MyPrint(SherpaOnnxVadModelConfig *config) { | 23 | void MyPrint(SherpaOnnxVadModelConfig *config) { |
| 22 | auto silero_vad = &config->silero_vad; | 24 | auto silero_vad = &config->silero_vad; |
| 25 | + auto ten_vad = &config->ten_vad; | ||
| 23 | 26 | ||
| 24 | fprintf(stdout, "----------silero_vad config----------\n"); | 27 | fprintf(stdout, "----------silero_vad config----------\n"); |
| 25 | fprintf(stdout, "model: %s\n", silero_vad->model); | 28 | fprintf(stdout, "model: %s\n", silero_vad->model); |
| @@ -32,6 +35,15 @@ void MyPrint(SherpaOnnxVadModelConfig *config) { | @@ -32,6 +35,15 @@ void MyPrint(SherpaOnnxVadModelConfig *config) { | ||
| 32 | fprintf(stdout, "max_speech_duration: %.3f\n", | 35 | fprintf(stdout, "max_speech_duration: %.3f\n", |
| 33 | silero_vad->max_speech_duration); | 36 | silero_vad->max_speech_duration); |
| 34 | 37 | ||
| 38 | + fprintf(stdout, "----------ten_vad config----------\n"); | ||
| 39 | + fprintf(stdout, "model: %s\n", ten_vad->model); | ||
| 40 | + fprintf(stdout, "threshold: %.3f\n", ten_vad->threshold); | ||
| 41 | + fprintf(stdout, "min_silence_duration: %.3f\n", | ||
| 42 | + ten_vad->min_silence_duration); | ||
| 43 | + fprintf(stdout, "min_speech_duration: %.3f\n", ten_vad->min_speech_duration); | ||
| 44 | + fprintf(stdout, "window_size: %d\n", ten_vad->window_size); | ||
| 45 | + fprintf(stdout, "max_speech_duration: %.3f\n", ten_vad->max_speech_duration); | ||
| 46 | + | ||
| 35 | fprintf(stdout, "----------config----------\n"); | 47 | fprintf(stdout, "----------config----------\n"); |
| 36 | 48 | ||
| 37 | fprintf(stdout, "sample_rate: %d\n", config->sample_rate); | 49 | fprintf(stdout, "sample_rate: %d\n", config->sample_rate); |
-
请 注册 或 登录 后发表评论