Committed by
GitHub
Add JavaScript API (wasm) for speech enhancement GTCRN models (#2007)
正在显示
11 个修改的文件
包含
96 行增加
和
18 行删除
| 1 | { | 1 | { |
| 2 | "name": "sherpa-onnx-PLATFORM2-ARCH", | 2 | "name": "sherpa-onnx-PLATFORM2-ARCH", |
| 3 | "version": "SHERPA_ONNX_VERSION", | 3 | "version": "SHERPA_ONNX_VERSION", |
| 4 | - "description": "Speech-to-text, text-to-speech, and speaker diarization using Next-gen Kaldi without internet connection", | 4 | + "description": "Speech-to-text, text-to-speech, speaker diarization, and speech enhancement using Next-gen Kaldi without internet connection", |
| 5 | "main": "index.js", | 5 | "main": "index.js", |
| 6 | "scripts": { | 6 | "scripts": { |
| 7 | "test": "echo \"Error: no test specified\" && exit 1" | 7 | "test": "echo \"Error: no test specified\" && exit 1" |
| @@ -46,7 +46,9 @@ | @@ -46,7 +46,9 @@ | ||
| 46 | "vad", | 46 | "vad", |
| 47 | "node-addon-api", | 47 | "node-addon-api", |
| 48 | "speaker id", | 48 | "speaker id", |
| 49 | - "language id" | 49 | + "language id", |
| 50 | + "speech enhancement", | ||
| 51 | + "denoising" | ||
| 50 | ], | 52 | ], |
| 51 | "author": "The next-gen Kaldi team", | 53 | "author": "The next-gen Kaldi team", |
| 52 | "license": "Apache-2.0", | 54 | "license": "Apache-2.0", |
| 1 | { | 1 | { |
| 2 | "name": "sherpa-onnx-node", | 2 | "name": "sherpa-onnx-node", |
| 3 | "version": "SHERPA_ONNX_VERSION", | 3 | "version": "SHERPA_ONNX_VERSION", |
| 4 | - "description": "Speech-to-text, text-to-speech, and speaker diarization using Next-gen Kaldi without internet connection", | 4 | + "description": "Speech-to-text, text-to-speech, speaker diarization, and speech enhancement using Next-gen Kaldi without internet connection", |
| 5 | "main": "sherpa-onnx.js", | 5 | "main": "sherpa-onnx.js", |
| 6 | "scripts": { | 6 | "scripts": { |
| 7 | "test": "echo \"Error: no test specified\" && exit 1" | 7 | "test": "echo \"Error: no test specified\" && exit 1" |
| @@ -46,7 +46,9 @@ | @@ -46,7 +46,9 @@ | ||
| 46 | "vad", | 46 | "vad", |
| 47 | "node-addon-api", | 47 | "node-addon-api", |
| 48 | "speaker id", | 48 | "speaker id", |
| 49 | - "language id" | 49 | + "language id", |
| 50 | + "speech enhancement", | ||
| 51 | + "denoising" | ||
| 50 | ], | 52 | ], |
| 51 | "author": "The next-gen Kaldi team", | 53 | "author": "The next-gen Kaldi team", |
| 52 | "license": "Apache-2.0", | 54 | "license": "Apache-2.0", |
| @@ -9,6 +9,16 @@ git status | @@ -9,6 +9,16 @@ git status | ||
| 9 | ls -lh | 9 | ls -lh |
| 10 | ls -lh node_modules | 10 | ls -lh node_modules |
| 11 | 11 | ||
| 12 | +# speech enhancement | ||
| 13 | +curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speech-enhancement-models/gtcrn_simple.onnx | ||
| 14 | +curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speech-enhancement-models/inp_16k.wav | ||
| 15 | +node ./test-offline-speech-enhancement-gtcrn.js | ||
| 16 | +ls -lh *.wav | ||
| 17 | +rm gtcrn_simple.onnx | ||
| 18 | +rm inp_16k.wav | ||
| 19 | +rm enhanced-16k.wav | ||
| 20 | + | ||
| 21 | + | ||
| 12 | # offline tts | 22 | # offline tts |
| 13 | # | 23 | # |
| 14 | curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/kokoro-multi-lang-v1_0.tar.bz2 | 24 | curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/kokoro-multi-lang-v1_0.tar.bz2 |
| @@ -4,7 +4,6 @@ on: | @@ -4,7 +4,6 @@ on: | ||
| 4 | push: | 4 | push: |
| 5 | branches: | 5 | branches: |
| 6 | - wasm | 6 | - wasm |
| 7 | - - wasm-gtcrn | ||
| 8 | tags: | 7 | tags: |
| 9 | - 'v[0-9]+.[0-9]+.[0-9]+*' | 8 | - 'v[0-9]+.[0-9]+.[0-9]+*' |
| 10 | 9 | ||
| @@ -79,9 +78,9 @@ jobs: | @@ -79,9 +78,9 @@ jobs: | ||
| 79 | file_glob: true | 78 | file_glob: true |
| 80 | overwrite: true | 79 | overwrite: true |
| 81 | file: ./*.tar.bz2 | 80 | file: ./*.tar.bz2 |
| 82 | - repo_name: k2-fsa/sherpa-onnx | ||
| 83 | - repo_token: ${{ secrets.UPLOAD_GH_SHERPA_ONNX_TOKEN }} | ||
| 84 | - tag: v1.10.46 | 81 | + # repo_name: k2-fsa/sherpa-onnx |
| 82 | + # repo_token: ${{ secrets.UPLOAD_GH_SHERPA_ONNX_TOKEN }} | ||
| 83 | + # tag: v1.10.46 | ||
| 85 | 84 | ||
| 86 | - name: Release | 85 | - name: Release |
| 87 | if: github.repository_owner == 'k2-fsa' && github.event_name == 'push' && contains(github.ref, 'refs/tags/') | 86 | if: github.repository_owner == 'k2-fsa' && github.event_name == 'push' && contains(github.ref, 'refs/tags/') |
| @@ -24,6 +24,16 @@ In the following, we describe how to use [sherpa-onnx](https://github.com/k2-fsa | @@ -24,6 +24,16 @@ In the following, we describe how to use [sherpa-onnx](https://github.com/k2-fsa | ||
| 24 | for text-to-speech and speech-to-text. | 24 | for text-to-speech and speech-to-text. |
| 25 | 25 | ||
| 26 | 26 | ||
| 27 | +# Speech enhancement | ||
| 28 | + | ||
| 29 | +In the following, we demonstrate how to run speech enhancement. | ||
| 30 | + | ||
| 31 | +```bash | ||
| 32 | +curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speech-enhancement-models/gtcrn_simple.onnx | ||
| 33 | +curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speech-enhancement-models/inp_16k.wav | ||
| 34 | +node ./test-offline-speech-enhancement-gtcrn.js | ||
| 35 | +``` | ||
| 36 | + | ||
| 27 | # Speaker diarization | 37 | # Speaker diarization |
| 28 | 38 | ||
| 29 | In the following, we demonstrate how to run speaker diarization. | 39 | In the following, we demonstrate how to run speaker diarization. |
| 1 | +// Copyright (c) 2025 Xiaomi Corporation (authors: Fangjun Kuang) | ||
| 2 | +// | ||
| 3 | +// Please download ./gtcrn_simple.onnx and ./inp_16k.wav used in this file | ||
| 4 | +// from | ||
| 5 | +// https://github.com/k2-fsa/sherpa-onnx/releases/tag/speech-enhancement-models | ||
| 6 | +// | ||
| 7 | +// This script shows how to use speech enhancement API from sherpa-onnx. | ||
| 8 | +const sherpa_onnx = require('sherpa-onnx'); | ||
| 9 | + | ||
| 10 | +function createOfflineSpeechDenoiser() { | ||
| 11 | + let config = { | ||
| 12 | + model: { | ||
| 13 | + gtcrn: {model: './gtcrn_simple.onnx'}, | ||
| 14 | + debug: 1, | ||
| 15 | + }, | ||
| 16 | + }; | ||
| 17 | + | ||
| 18 | + return sherpa_onnx.createOfflineSpeechDenoiser(config); | ||
| 19 | +} | ||
| 20 | + | ||
| 21 | +speech_denoiser = createOfflineSpeechDenoiser(); | ||
| 22 | + | ||
| 23 | +const waveFilename = './inp_16k.wav'; | ||
| 24 | +const wave = sherpa_onnx.readWave(waveFilename); | ||
| 25 | + | ||
| 26 | +const denoised = speech_denoiser.run(wave.samples, wave.sampleRate); | ||
| 27 | +sherpa_onnx.writeWave('./enhanced-16k.wav', denoised); | ||
| 28 | +console.log('Saved to ./enhanced-16k.wav'); | ||
| 29 | + | ||
| 30 | +speech_denoiser.free(); |
| @@ -9,6 +9,10 @@ const sherpa_onnx_wave = require('./sherpa-onnx-wave.js'); | @@ -9,6 +9,10 @@ const sherpa_onnx_wave = require('./sherpa-onnx-wave.js'); | ||
| 9 | const sherpa_onnx_vad = require('./sherpa-onnx-vad.js'); | 9 | const sherpa_onnx_vad = require('./sherpa-onnx-vad.js'); |
| 10 | const sherpa_onnx_speaker_diarization = | 10 | const sherpa_onnx_speaker_diarization = |
| 11 | require('./sherpa-onnx-speaker-diarization.js'); | 11 | require('./sherpa-onnx-speaker-diarization.js'); |
| 12 | +const sherpa_onnx_speech_enhancement = | ||
| 13 | + require('./sherpa-onnx-speech-enhancement.js'); | ||
| 14 | + | ||
| 15 | + | ||
| 12 | 16 | ||
| 13 | function createOnlineRecognizer(config) { | 17 | function createOnlineRecognizer(config) { |
| 14 | return sherpa_onnx_asr.createOnlineRecognizer(wasmModule, config); | 18 | return sherpa_onnx_asr.createOnlineRecognizer(wasmModule, config); |
| @@ -47,6 +51,15 @@ function writeWave(filename, data) { | @@ -47,6 +51,15 @@ function writeWave(filename, data) { | ||
| 47 | sherpa_onnx_wave.writeWave(filename, data, wasmModule); | 51 | sherpa_onnx_wave.writeWave(filename, data, wasmModule); |
| 48 | } | 52 | } |
| 49 | 53 | ||
| 54 | +function readWaveFromBinaryData(uint8Array) { | ||
| 55 | + return sherpa_onnx_wave.readWaveFromBinaryData(uint8Array, wasmModule); | ||
| 56 | +} | ||
| 57 | + | ||
| 58 | +function createOfflineSpeechDenoiser(config) { | ||
| 59 | + return sherpa_onnx_speech_enhancement.createOfflineSpeechDenoiser( | ||
| 60 | + wasmModule, config); | ||
| 61 | +} | ||
| 62 | + | ||
| 50 | // Note: online means streaming and offline means non-streaming here. | 63 | // Note: online means streaming and offline means non-streaming here. |
| 51 | // Both of them don't require internet connection. | 64 | // Both of them don't require internet connection. |
| 52 | module.exports = { | 65 | module.exports = { |
| @@ -55,8 +68,10 @@ module.exports = { | @@ -55,8 +68,10 @@ module.exports = { | ||
| 55 | createOfflineTts, | 68 | createOfflineTts, |
| 56 | createKws, | 69 | createKws, |
| 57 | readWave, | 70 | readWave, |
| 71 | + readWaveFromBinaryData, | ||
| 58 | writeWave, | 72 | writeWave, |
| 59 | createCircularBuffer, | 73 | createCircularBuffer, |
| 60 | createVad, | 74 | createVad, |
| 61 | createOfflineSpeakerDiarization, | 75 | createOfflineSpeakerDiarization, |
| 76 | + createOfflineSpeechDenoiser, | ||
| 62 | }; | 77 | }; |
| 1 | { | 1 | { |
| 2 | "name": "sherpa-onnx", | 2 | "name": "sherpa-onnx", |
| 3 | "version": "SHERPA_ONNX_VERSION", | 3 | "version": "SHERPA_ONNX_VERSION", |
| 4 | - "description": "Speech-to-text and text-to-speech using Next-gen Kaldi without internet connection", | 4 | + "description": "Speech-to-text, text-to-speech, speaker diarization, and speech enhancement using Next-gen Kaldi without internet connection", |
| 5 | "main": "index.js", | 5 | "main": "index.js", |
| 6 | "scripts": { | 6 | "scripts": { |
| 7 | "test": "echo \"Error: no test specified\" && exit 1" | 7 | "test": "echo \"Error: no test specified\" && exit 1" |
| @@ -34,7 +34,9 @@ | @@ -34,7 +34,9 @@ | ||
| 34 | "speech", | 34 | "speech", |
| 35 | "recognition", | 35 | "recognition", |
| 36 | "WebAssembly", | 36 | "WebAssembly", |
| 37 | - "wasm" | 37 | + "wasm", |
| 38 | + "speech enhancement", | ||
| 39 | + "denoising" | ||
| 38 | ], | 40 | ], |
| 39 | "author": "The next-gen Kaldi team", | 41 | "author": "The next-gen Kaldi team", |
| 40 | "license": "Apache-2.0", | 42 | "license": "Apache-2.0", |
| @@ -49,6 +49,7 @@ set(exported_functions | @@ -49,6 +49,7 @@ set(exported_functions | ||
| 49 | SherpaOnnxDestroyKeywordSpotter | 49 | SherpaOnnxDestroyKeywordSpotter |
| 50 | SherpaOnnxGetKeywordResult | 50 | SherpaOnnxGetKeywordResult |
| 51 | SherpaOnnxIsKeywordStreamReady | 51 | SherpaOnnxIsKeywordStreamReady |
| 52 | + SherpaOnnxResetKeywordStream | ||
| 52 | # VAD | 53 | # VAD |
| 53 | SherpaOnnxCreateCircularBuffer | 54 | SherpaOnnxCreateCircularBuffer |
| 54 | SherpaOnnxDestroyCircularBuffer | 55 | SherpaOnnxDestroyCircularBuffer |
| @@ -87,6 +88,12 @@ set(exported_functions | @@ -87,6 +88,12 @@ set(exported_functions | ||
| 87 | SherpaOnnxReadWaveFromBinaryData | 88 | SherpaOnnxReadWaveFromBinaryData |
| 88 | SherpaOnnxFreeWave | 89 | SherpaOnnxFreeWave |
| 89 | SherpaOnnxWriteWave | 90 | SherpaOnnxWriteWave |
| 91 | + # speech enhancement | ||
| 92 | + SherpaOnnxCreateOfflineSpeechDenoiser | ||
| 93 | + SherpaOnnxDestroyDenoisedAudio | ||
| 94 | + SherpaOnnxDestroyOfflineSpeechDenoiser | ||
| 95 | + SherpaOnnxOfflineSpeechDenoiserGetSampleRate | ||
| 96 | + SherpaOnnxOfflineSpeechDenoiserRun | ||
| 90 | ) | 97 | ) |
| 91 | 98 | ||
| 92 | 99 | ||
| @@ -122,6 +129,7 @@ install( | @@ -122,6 +129,7 @@ install( | ||
| 122 | ${CMAKE_SOURCE_DIR}/wasm/kws/sherpa-onnx-kws.js | 129 | ${CMAKE_SOURCE_DIR}/wasm/kws/sherpa-onnx-kws.js |
| 123 | ${CMAKE_SOURCE_DIR}/wasm/vad/sherpa-onnx-vad.js | 130 | ${CMAKE_SOURCE_DIR}/wasm/vad/sherpa-onnx-vad.js |
| 124 | ${CMAKE_SOURCE_DIR}/wasm/speaker-diarization/sherpa-onnx-speaker-diarization.js | 131 | ${CMAKE_SOURCE_DIR}/wasm/speaker-diarization/sherpa-onnx-speaker-diarization.js |
| 132 | + ${CMAKE_SOURCE_DIR}/wasm/speech-enhancement/sherpa-onnx-speech-enhancement.js | ||
| 125 | ${CMAKE_SOURCE_DIR}/wasm/nodejs/sherpa-onnx-wave.js | 133 | ${CMAKE_SOURCE_DIR}/wasm/nodejs/sherpa-onnx-wave.js |
| 126 | "$<TARGET_FILE_DIR:sherpa-onnx-wasm-nodejs>/sherpa-onnx-wasm-nodejs.js" | 134 | "$<TARGET_FILE_DIR:sherpa-onnx-wasm-nodejs>/sherpa-onnx-wasm-nodejs.js" |
| 127 | "$<TARGET_FILE_DIR:sherpa-onnx-wasm-nodejs>/sherpa-onnx-wasm-nodejs.wasm" | 135 | "$<TARGET_FILE_DIR:sherpa-onnx-wasm-nodejs>/sherpa-onnx-wasm-nodejs.wasm" |
| @@ -26,21 +26,21 @@ function readWave(filename, Module) { | @@ -26,21 +26,21 @@ function readWave(filename, Module) { | ||
| 26 | return {samples: samples, sampleRate: sampleRate}; | 26 | return {samples: samples, sampleRate: sampleRate}; |
| 27 | } | 27 | } |
| 28 | 28 | ||
| 29 | -function readWaveFromBinaryData(uint8Array) { | 29 | +function readWaveFromBinaryData(uint8Array, Module) { |
| 30 | const numBytes = uint8Array.length * uint8Array.BYTES_PER_ELEMENT; | 30 | const numBytes = uint8Array.length * uint8Array.BYTES_PER_ELEMENT; |
| 31 | - const pointer = this.Module._malloc(numBytes); | 31 | + const pointer = Module._malloc(numBytes); |
| 32 | 32 | ||
| 33 | const dataOnHeap = new Uint8Array(Module.HEAPU8.buffer, pointer, numBytes); | 33 | const dataOnHeap = new Uint8Array(Module.HEAPU8.buffer, pointer, numBytes); |
| 34 | dataOnHeap.set(uint8Array); | 34 | dataOnHeap.set(uint8Array); |
| 35 | 35 | ||
| 36 | - const w = this.Module._SherpaOnnxReadWaveFromBinaryData( | ||
| 37 | - dataOnHeap.byteOffset, numBytes); | 36 | + const w = |
| 37 | + Module._SherpaOnnxReadWaveFromBinaryData(dataOnHeap.byteOffset, numBytes); | ||
| 38 | if (w == 0) { | 38 | if (w == 0) { |
| 39 | console.log('Failed to read wave from binary data'); | 39 | console.log('Failed to read wave from binary data'); |
| 40 | return null; | 40 | return null; |
| 41 | } | 41 | } |
| 42 | 42 | ||
| 43 | - this.Module._free(pointer); | 43 | + Module._free(pointer); |
| 44 | 44 | ||
| 45 | const samplesPtr = Module.HEAP32[w / 4] / 4; | 45 | const samplesPtr = Module.HEAP32[w / 4] / 4; |
| 46 | const sampleRate = Module.HEAP32[w / 4 + 1]; | 46 | const sampleRate = Module.HEAP32[w / 4 + 1]; |
| @@ -9,14 +9,14 @@ endif() | @@ -9,14 +9,14 @@ endif() | ||
| 9 | set(exported_functions | 9 | set(exported_functions |
| 10 | MyPrint | 10 | MyPrint |
| 11 | SherpaOnnxCreateOfflineSpeechDenoiser | 11 | SherpaOnnxCreateOfflineSpeechDenoiser |
| 12 | + SherpaOnnxDestroyDenoisedAudio | ||
| 12 | SherpaOnnxDestroyOfflineSpeechDenoiser | 13 | SherpaOnnxDestroyOfflineSpeechDenoiser |
| 14 | + SherpaOnnxFreeWave | ||
| 13 | SherpaOnnxOfflineSpeechDenoiserGetSampleRate | 15 | SherpaOnnxOfflineSpeechDenoiserGetSampleRate |
| 14 | SherpaOnnxOfflineSpeechDenoiserRun | 16 | SherpaOnnxOfflineSpeechDenoiserRun |
| 15 | - SherpaOnnxDestroyDenoisedAudio | ||
| 16 | - SherpaOnnxWriteWave | ||
| 17 | SherpaOnnxReadWave | 17 | SherpaOnnxReadWave |
| 18 | SherpaOnnxReadWaveFromBinaryData | 18 | SherpaOnnxReadWaveFromBinaryData |
| 19 | - SherpaOnnxFreeWave | 19 | + SherpaOnnxWriteWave |
| 20 | ) | 20 | ) |
| 21 | set(mangled_exported_functions) | 21 | set(mangled_exported_functions) |
| 22 | foreach(x IN LISTS exported_functions) | 22 | foreach(x IN LISTS exported_functions) |
-
请 注册 或 登录 后发表评论