Fangjun Kuang
Committed by GitHub

Add JavaScript API (wasm) for speech enhancement GTCRN models (#2007)

1 { 1 {
2 "name": "sherpa-onnx-PLATFORM2-ARCH", 2 "name": "sherpa-onnx-PLATFORM2-ARCH",
3 "version": "SHERPA_ONNX_VERSION", 3 "version": "SHERPA_ONNX_VERSION",
4 - "description": "Speech-to-text, text-to-speech, and speaker diarization using Next-gen Kaldi without internet connection", 4 + "description": "Speech-to-text, text-to-speech, speaker diarization, and speech enhancement using Next-gen Kaldi without internet connection",
5 "main": "index.js", 5 "main": "index.js",
6 "scripts": { 6 "scripts": {
7 "test": "echo \"Error: no test specified\" && exit 1" 7 "test": "echo \"Error: no test specified\" && exit 1"
@@ -46,7 +46,9 @@ @@ -46,7 +46,9 @@
46 "vad", 46 "vad",
47 "node-addon-api", 47 "node-addon-api",
48 "speaker id", 48 "speaker id",
49 - "language id" 49 + "language id",
  50 + "speech enhancement",
  51 + "denoising"
50 ], 52 ],
51 "author": "The next-gen Kaldi team", 53 "author": "The next-gen Kaldi team",
52 "license": "Apache-2.0", 54 "license": "Apache-2.0",
1 { 1 {
2 "name": "sherpa-onnx-node", 2 "name": "sherpa-onnx-node",
3 "version": "SHERPA_ONNX_VERSION", 3 "version": "SHERPA_ONNX_VERSION",
4 - "description": "Speech-to-text, text-to-speech, and speaker diarization using Next-gen Kaldi without internet connection", 4 + "description": "Speech-to-text, text-to-speech, speaker diarization, and speech enhancement using Next-gen Kaldi without internet connection",
5 "main": "sherpa-onnx.js", 5 "main": "sherpa-onnx.js",
6 "scripts": { 6 "scripts": {
7 "test": "echo \"Error: no test specified\" && exit 1" 7 "test": "echo \"Error: no test specified\" && exit 1"
@@ -46,7 +46,9 @@ @@ -46,7 +46,9 @@
46 "vad", 46 "vad",
47 "node-addon-api", 47 "node-addon-api",
48 "speaker id", 48 "speaker id",
49 - "language id" 49 + "language id",
  50 + "speech enhancement",
  51 + "denoising"
50 ], 52 ],
51 "author": "The next-gen Kaldi team", 53 "author": "The next-gen Kaldi team",
52 "license": "Apache-2.0", 54 "license": "Apache-2.0",
@@ -9,6 +9,16 @@ git status @@ -9,6 +9,16 @@ git status
9 ls -lh 9 ls -lh
10 ls -lh node_modules 10 ls -lh node_modules
11 11
  12 +# speech enhancement
  13 +curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speech-enhancement-models/gtcrn_simple.onnx
  14 +curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speech-enhancement-models/inp_16k.wav
  15 +node ./test-offline-speech-enhancement-gtcrn.js
  16 +ls -lh *.wav
  17 +rm gtcrn_simple.onnx
  18 +rm inp_16k.wav
  19 +rm enhanced-16k.wav
  20 +
  21 +
12 # offline tts 22 # offline tts
13 # 23 #
14 curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/kokoro-multi-lang-v1_0.tar.bz2 24 curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/kokoro-multi-lang-v1_0.tar.bz2
@@ -4,7 +4,6 @@ on: @@ -4,7 +4,6 @@ on:
4 push: 4 push:
5 branches: 5 branches:
6 - wasm 6 - wasm
7 - - wasm-gtcrn  
8 tags: 7 tags:
9 - 'v[0-9]+.[0-9]+.[0-9]+*' 8 - 'v[0-9]+.[0-9]+.[0-9]+*'
10 9
@@ -79,9 +78,9 @@ jobs: @@ -79,9 +78,9 @@ jobs:
79 file_glob: true 78 file_glob: true
80 overwrite: true 79 overwrite: true
81 file: ./*.tar.bz2 80 file: ./*.tar.bz2
82 - repo_name: k2-fsa/sherpa-onnx  
83 - repo_token: ${{ secrets.UPLOAD_GH_SHERPA_ONNX_TOKEN }}  
84 - tag: v1.10.46 81 + # repo_name: k2-fsa/sherpa-onnx
  82 + # repo_token: ${{ secrets.UPLOAD_GH_SHERPA_ONNX_TOKEN }}
  83 + # tag: v1.10.46
85 84
86 - name: Release 85 - name: Release
87 if: github.repository_owner == 'k2-fsa' && github.event_name == 'push' && contains(github.ref, 'refs/tags/') 86 if: github.repository_owner == 'k2-fsa' && github.event_name == 'push' && contains(github.ref, 'refs/tags/')
@@ -24,6 +24,16 @@ In the following, we describe how to use [sherpa-onnx](https://github.com/k2-fsa @@ -24,6 +24,16 @@ In the following, we describe how to use [sherpa-onnx](https://github.com/k2-fsa
24 for text-to-speech and speech-to-text. 24 for text-to-speech and speech-to-text.
25 25
26 26
  27 +# Speech enhancement
  28 +
  29 +In the following, we demonstrate how to run speech enhancement.
  30 +
  31 +```bash
  32 +curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speech-enhancement-models/gtcrn_simple.onnx
  33 +curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speech-enhancement-models/inp_16k.wav
  34 +node ./test-offline-speech-enhancement-gtcrn.js
  35 +```
  36 +
27 # Speaker diarization 37 # Speaker diarization
28 38
29 In the following, we demonstrate how to run speaker diarization. 39 In the following, we demonstrate how to run speaker diarization.
  1 +// Copyright (c) 2025 Xiaomi Corporation (authors: Fangjun Kuang)
  2 +//
  3 +// Please download ./gtcrn_simple.onnx and ./inp_16k.wav used in this file
  4 +// from
  5 +// https://github.com/k2-fsa/sherpa-onnx/releases/tag/speech-enhancement-models
  6 +//
  7 +// This script shows how to use speech enhancement API from sherpa-onnx.
  8 +const sherpa_onnx = require('sherpa-onnx');
  9 +
  10 +function createOfflineSpeechDenoiser() {
  11 + let config = {
  12 + model: {
  13 + gtcrn: {model: './gtcrn_simple.onnx'},
  14 + debug: 1,
  15 + },
  16 + };
  17 +
  18 + return sherpa_onnx.createOfflineSpeechDenoiser(config);
  19 +}
  20 +
  21 +speech_denoiser = createOfflineSpeechDenoiser();
  22 +
  23 +const waveFilename = './inp_16k.wav';
  24 +const wave = sherpa_onnx.readWave(waveFilename);
  25 +
  26 +const denoised = speech_denoiser.run(wave.samples, wave.sampleRate);
  27 +sherpa_onnx.writeWave('./enhanced-16k.wav', denoised);
  28 +console.log('Saved to ./enhanced-16k.wav');
  29 +
  30 +speech_denoiser.free();
@@ -9,6 +9,10 @@ const sherpa_onnx_wave = require('./sherpa-onnx-wave.js'); @@ -9,6 +9,10 @@ const sherpa_onnx_wave = require('./sherpa-onnx-wave.js');
9 const sherpa_onnx_vad = require('./sherpa-onnx-vad.js'); 9 const sherpa_onnx_vad = require('./sherpa-onnx-vad.js');
10 const sherpa_onnx_speaker_diarization = 10 const sherpa_onnx_speaker_diarization =
11 require('./sherpa-onnx-speaker-diarization.js'); 11 require('./sherpa-onnx-speaker-diarization.js');
  12 +const sherpa_onnx_speech_enhancement =
  13 + require('./sherpa-onnx-speech-enhancement.js');
  14 +
  15 +
12 16
13 function createOnlineRecognizer(config) { 17 function createOnlineRecognizer(config) {
14 return sherpa_onnx_asr.createOnlineRecognizer(wasmModule, config); 18 return sherpa_onnx_asr.createOnlineRecognizer(wasmModule, config);
@@ -47,6 +51,15 @@ function writeWave(filename, data) { @@ -47,6 +51,15 @@ function writeWave(filename, data) {
47 sherpa_onnx_wave.writeWave(filename, data, wasmModule); 51 sherpa_onnx_wave.writeWave(filename, data, wasmModule);
48 } 52 }
49 53
  54 +function readWaveFromBinaryData(uint8Array) {
  55 + return sherpa_onnx_wave.readWaveFromBinaryData(uint8Array, wasmModule);
  56 +}
  57 +
  58 +function createOfflineSpeechDenoiser(config) {
  59 + return sherpa_onnx_speech_enhancement.createOfflineSpeechDenoiser(
  60 + wasmModule, config);
  61 +}
  62 +
50 // Note: online means streaming and offline means non-streaming here. 63 // Note: online means streaming and offline means non-streaming here.
51 // Both of them don't require internet connection. 64 // Both of them don't require internet connection.
52 module.exports = { 65 module.exports = {
@@ -55,8 +68,10 @@ module.exports = { @@ -55,8 +68,10 @@ module.exports = {
55 createOfflineTts, 68 createOfflineTts,
56 createKws, 69 createKws,
57 readWave, 70 readWave,
  71 + readWaveFromBinaryData,
58 writeWave, 72 writeWave,
59 createCircularBuffer, 73 createCircularBuffer,
60 createVad, 74 createVad,
61 createOfflineSpeakerDiarization, 75 createOfflineSpeakerDiarization,
  76 + createOfflineSpeechDenoiser,
62 }; 77 };
1 { 1 {
2 "name": "sherpa-onnx", 2 "name": "sherpa-onnx",
3 "version": "SHERPA_ONNX_VERSION", 3 "version": "SHERPA_ONNX_VERSION",
4 - "description": "Speech-to-text and text-to-speech using Next-gen Kaldi without internet connection", 4 + "description": "Speech-to-text, text-to-speech, speaker diarization, and speech enhancement using Next-gen Kaldi without internet connection",
5 "main": "index.js", 5 "main": "index.js",
6 "scripts": { 6 "scripts": {
7 "test": "echo \"Error: no test specified\" && exit 1" 7 "test": "echo \"Error: no test specified\" && exit 1"
@@ -34,7 +34,9 @@ @@ -34,7 +34,9 @@
34 "speech", 34 "speech",
35 "recognition", 35 "recognition",
36 "WebAssembly", 36 "WebAssembly",
37 - "wasm" 37 + "wasm",
  38 + "speech enhancement",
  39 + "denoising"
38 ], 40 ],
39 "author": "The next-gen Kaldi team", 41 "author": "The next-gen Kaldi team",
40 "license": "Apache-2.0", 42 "license": "Apache-2.0",
@@ -49,6 +49,7 @@ set(exported_functions @@ -49,6 +49,7 @@ set(exported_functions
49 SherpaOnnxDestroyKeywordSpotter 49 SherpaOnnxDestroyKeywordSpotter
50 SherpaOnnxGetKeywordResult 50 SherpaOnnxGetKeywordResult
51 SherpaOnnxIsKeywordStreamReady 51 SherpaOnnxIsKeywordStreamReady
  52 + SherpaOnnxResetKeywordStream
52 # VAD 53 # VAD
53 SherpaOnnxCreateCircularBuffer 54 SherpaOnnxCreateCircularBuffer
54 SherpaOnnxDestroyCircularBuffer 55 SherpaOnnxDestroyCircularBuffer
@@ -87,6 +88,12 @@ set(exported_functions @@ -87,6 +88,12 @@ set(exported_functions
87 SherpaOnnxReadWaveFromBinaryData 88 SherpaOnnxReadWaveFromBinaryData
88 SherpaOnnxFreeWave 89 SherpaOnnxFreeWave
89 SherpaOnnxWriteWave 90 SherpaOnnxWriteWave
  91 + # speech enhancement
  92 + SherpaOnnxCreateOfflineSpeechDenoiser
  93 + SherpaOnnxDestroyDenoisedAudio
  94 + SherpaOnnxDestroyOfflineSpeechDenoiser
  95 + SherpaOnnxOfflineSpeechDenoiserGetSampleRate
  96 + SherpaOnnxOfflineSpeechDenoiserRun
90 ) 97 )
91 98
92 99
@@ -122,6 +129,7 @@ install( @@ -122,6 +129,7 @@ install(
122 ${CMAKE_SOURCE_DIR}/wasm/kws/sherpa-onnx-kws.js 129 ${CMAKE_SOURCE_DIR}/wasm/kws/sherpa-onnx-kws.js
123 ${CMAKE_SOURCE_DIR}/wasm/vad/sherpa-onnx-vad.js 130 ${CMAKE_SOURCE_DIR}/wasm/vad/sherpa-onnx-vad.js
124 ${CMAKE_SOURCE_DIR}/wasm/speaker-diarization/sherpa-onnx-speaker-diarization.js 131 ${CMAKE_SOURCE_DIR}/wasm/speaker-diarization/sherpa-onnx-speaker-diarization.js
  132 + ${CMAKE_SOURCE_DIR}/wasm/speech-enhancement/sherpa-onnx-speech-enhancement.js
125 ${CMAKE_SOURCE_DIR}/wasm/nodejs/sherpa-onnx-wave.js 133 ${CMAKE_SOURCE_DIR}/wasm/nodejs/sherpa-onnx-wave.js
126 "$<TARGET_FILE_DIR:sherpa-onnx-wasm-nodejs>/sherpa-onnx-wasm-nodejs.js" 134 "$<TARGET_FILE_DIR:sherpa-onnx-wasm-nodejs>/sherpa-onnx-wasm-nodejs.js"
127 "$<TARGET_FILE_DIR:sherpa-onnx-wasm-nodejs>/sherpa-onnx-wasm-nodejs.wasm" 135 "$<TARGET_FILE_DIR:sherpa-onnx-wasm-nodejs>/sherpa-onnx-wasm-nodejs.wasm"
@@ -26,21 +26,21 @@ function readWave(filename, Module) { @@ -26,21 +26,21 @@ function readWave(filename, Module) {
26 return {samples: samples, sampleRate: sampleRate}; 26 return {samples: samples, sampleRate: sampleRate};
27 } 27 }
28 28
29 -function readWaveFromBinaryData(uint8Array) { 29 +function readWaveFromBinaryData(uint8Array, Module) {
30 const numBytes = uint8Array.length * uint8Array.BYTES_PER_ELEMENT; 30 const numBytes = uint8Array.length * uint8Array.BYTES_PER_ELEMENT;
31 - const pointer = this.Module._malloc(numBytes); 31 + const pointer = Module._malloc(numBytes);
32 32
33 const dataOnHeap = new Uint8Array(Module.HEAPU8.buffer, pointer, numBytes); 33 const dataOnHeap = new Uint8Array(Module.HEAPU8.buffer, pointer, numBytes);
34 dataOnHeap.set(uint8Array); 34 dataOnHeap.set(uint8Array);
35 35
36 - const w = this.Module._SherpaOnnxReadWaveFromBinaryData(  
37 - dataOnHeap.byteOffset, numBytes); 36 + const w =
  37 + Module._SherpaOnnxReadWaveFromBinaryData(dataOnHeap.byteOffset, numBytes);
38 if (w == 0) { 38 if (w == 0) {
39 console.log('Failed to read wave from binary data'); 39 console.log('Failed to read wave from binary data');
40 return null; 40 return null;
41 } 41 }
42 42
43 - this.Module._free(pointer); 43 + Module._free(pointer);
44 44
45 const samplesPtr = Module.HEAP32[w / 4] / 4; 45 const samplesPtr = Module.HEAP32[w / 4] / 4;
46 const sampleRate = Module.HEAP32[w / 4 + 1]; 46 const sampleRate = Module.HEAP32[w / 4 + 1];
@@ -9,14 +9,14 @@ endif() @@ -9,14 +9,14 @@ endif()
9 set(exported_functions 9 set(exported_functions
10 MyPrint 10 MyPrint
11 SherpaOnnxCreateOfflineSpeechDenoiser 11 SherpaOnnxCreateOfflineSpeechDenoiser
  12 + SherpaOnnxDestroyDenoisedAudio
12 SherpaOnnxDestroyOfflineSpeechDenoiser 13 SherpaOnnxDestroyOfflineSpeechDenoiser
  14 + SherpaOnnxFreeWave
13 SherpaOnnxOfflineSpeechDenoiserGetSampleRate 15 SherpaOnnxOfflineSpeechDenoiserGetSampleRate
14 SherpaOnnxOfflineSpeechDenoiserRun 16 SherpaOnnxOfflineSpeechDenoiserRun
15 - SherpaOnnxDestroyDenoisedAudio  
16 - SherpaOnnxWriteWave  
17 SherpaOnnxReadWave 17 SherpaOnnxReadWave
18 SherpaOnnxReadWaveFromBinaryData 18 SherpaOnnxReadWaveFromBinaryData
19 - SherpaOnnxFreeWave 19 + SherpaOnnxWriteWave
20 ) 20 )
21 set(mangled_exported_functions) 21 set(mangled_exported_functions)
22 foreach(x IN LISTS exported_functions) 22 foreach(x IN LISTS exported_functions)