Fangjun Kuang
Committed by GitHub

Add JavaScript API (wasm) for speech enhancement GTCRN models (#2007)

{
"name": "sherpa-onnx-PLATFORM2-ARCH",
"version": "SHERPA_ONNX_VERSION",
"description": "Speech-to-text, text-to-speech, and speaker diarization using Next-gen Kaldi without internet connection",
"description": "Speech-to-text, text-to-speech, speaker diarization, and speech enhancement using Next-gen Kaldi without internet connection",
"main": "index.js",
"scripts": {
"test": "echo \"Error: no test specified\" && exit 1"
... ... @@ -46,7 +46,9 @@
"vad",
"node-addon-api",
"speaker id",
"language id"
"language id",
"speech enhancement",
"denoising"
],
"author": "The next-gen Kaldi team",
"license": "Apache-2.0",
... ...
{
"name": "sherpa-onnx-node",
"version": "SHERPA_ONNX_VERSION",
"description": "Speech-to-text, text-to-speech, and speaker diarization using Next-gen Kaldi without internet connection",
"description": "Speech-to-text, text-to-speech, speaker diarization, and speech enhancement using Next-gen Kaldi without internet connection",
"main": "sherpa-onnx.js",
"scripts": {
"test": "echo \"Error: no test specified\" && exit 1"
... ... @@ -46,7 +46,9 @@
"vad",
"node-addon-api",
"speaker id",
"language id"
"language id",
"speech enhancement",
"denoising"
],
"author": "The next-gen Kaldi team",
"license": "Apache-2.0",
... ...
... ... @@ -9,6 +9,16 @@ git status
ls -lh
ls -lh node_modules
# speech enhancement
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speech-enhancement-models/gtcrn_simple.onnx
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speech-enhancement-models/inp_16k.wav
node ./test-offline-speech-enhancement-gtcrn.js
ls -lh *.wav
rm gtcrn_simple.onnx
rm inp_16k.wav
rm enhanced-16k.wav
# offline tts
#
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/kokoro-multi-lang-v1_0.tar.bz2
... ...
... ... @@ -4,7 +4,6 @@ on:
push:
branches:
- wasm
- wasm-gtcrn
tags:
- 'v[0-9]+.[0-9]+.[0-9]+*'
... ... @@ -79,9 +78,9 @@ jobs:
file_glob: true
overwrite: true
file: ./*.tar.bz2
repo_name: k2-fsa/sherpa-onnx
repo_token: ${{ secrets.UPLOAD_GH_SHERPA_ONNX_TOKEN }}
tag: v1.10.46
# repo_name: k2-fsa/sherpa-onnx
# repo_token: ${{ secrets.UPLOAD_GH_SHERPA_ONNX_TOKEN }}
# tag: v1.10.46
- name: Release
if: github.repository_owner == 'k2-fsa' && github.event_name == 'push' && contains(github.ref, 'refs/tags/')
... ...
... ... @@ -24,6 +24,16 @@ In the following, we describe how to use [sherpa-onnx](https://github.com/k2-fsa
for text-to-speech and speech-to-text.
# Speech enhancement
In the following, we demonstrate how to run speech enhancement.
```bash
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speech-enhancement-models/gtcrn_simple.onnx
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speech-enhancement-models/inp_16k.wav
node ./test-offline-speech-enhancement-gtcrn.js
```
# Speaker diarization
In the following, we demonstrate how to run speaker diarization.
... ...
// Copyright (c) 2025 Xiaomi Corporation (authors: Fangjun Kuang)
//
// Please download ./gtcrn_simple.onnx and ./inp_16k.wav used in this file
// from
// https://github.com/k2-fsa/sherpa-onnx/releases/tag/speech-enhancement-models
//
// This script shows how to use speech enhancement API from sherpa-onnx.
const sherpa_onnx = require('sherpa-onnx');
function createOfflineSpeechDenoiser() {
let config = {
model: {
gtcrn: {model: './gtcrn_simple.onnx'},
debug: 1,
},
};
return sherpa_onnx.createOfflineSpeechDenoiser(config);
}
speech_denoiser = createOfflineSpeechDenoiser();
const waveFilename = './inp_16k.wav';
const wave = sherpa_onnx.readWave(waveFilename);
const denoised = speech_denoiser.run(wave.samples, wave.sampleRate);
sherpa_onnx.writeWave('./enhanced-16k.wav', denoised);
console.log('Saved to ./enhanced-16k.wav');
speech_denoiser.free();
... ...
... ... @@ -9,6 +9,10 @@ const sherpa_onnx_wave = require('./sherpa-onnx-wave.js');
const sherpa_onnx_vad = require('./sherpa-onnx-vad.js');
const sherpa_onnx_speaker_diarization =
require('./sherpa-onnx-speaker-diarization.js');
const sherpa_onnx_speech_enhancement =
require('./sherpa-onnx-speech-enhancement.js');
function createOnlineRecognizer(config) {
return sherpa_onnx_asr.createOnlineRecognizer(wasmModule, config);
... ... @@ -47,6 +51,15 @@ function writeWave(filename, data) {
sherpa_onnx_wave.writeWave(filename, data, wasmModule);
}
function readWaveFromBinaryData(uint8Array) {
return sherpa_onnx_wave.readWaveFromBinaryData(uint8Array, wasmModule);
}
function createOfflineSpeechDenoiser(config) {
return sherpa_onnx_speech_enhancement.createOfflineSpeechDenoiser(
wasmModule, config);
}
// Note: online means streaming and offline means non-streaming here.
// Both of them don't require internet connection.
module.exports = {
... ... @@ -55,8 +68,10 @@ module.exports = {
createOfflineTts,
createKws,
readWave,
readWaveFromBinaryData,
writeWave,
createCircularBuffer,
createVad,
createOfflineSpeakerDiarization,
createOfflineSpeechDenoiser,
};
... ...
{
"name": "sherpa-onnx",
"version": "SHERPA_ONNX_VERSION",
"description": "Speech-to-text and text-to-speech using Next-gen Kaldi without internet connection",
"description": "Speech-to-text, text-to-speech, speaker diarization, and speech enhancement using Next-gen Kaldi without internet connection",
"main": "index.js",
"scripts": {
"test": "echo \"Error: no test specified\" && exit 1"
... ... @@ -34,7 +34,9 @@
"speech",
"recognition",
"WebAssembly",
"wasm"
"wasm",
"speech enhancement",
"denoising"
],
"author": "The next-gen Kaldi team",
"license": "Apache-2.0",
... ...
... ... @@ -49,6 +49,7 @@ set(exported_functions
SherpaOnnxDestroyKeywordSpotter
SherpaOnnxGetKeywordResult
SherpaOnnxIsKeywordStreamReady
SherpaOnnxResetKeywordStream
# VAD
SherpaOnnxCreateCircularBuffer
SherpaOnnxDestroyCircularBuffer
... ... @@ -87,6 +88,12 @@ set(exported_functions
SherpaOnnxReadWaveFromBinaryData
SherpaOnnxFreeWave
SherpaOnnxWriteWave
# speech enhancement
SherpaOnnxCreateOfflineSpeechDenoiser
SherpaOnnxDestroyDenoisedAudio
SherpaOnnxDestroyOfflineSpeechDenoiser
SherpaOnnxOfflineSpeechDenoiserGetSampleRate
SherpaOnnxOfflineSpeechDenoiserRun
)
... ... @@ -122,6 +129,7 @@ install(
${CMAKE_SOURCE_DIR}/wasm/kws/sherpa-onnx-kws.js
${CMAKE_SOURCE_DIR}/wasm/vad/sherpa-onnx-vad.js
${CMAKE_SOURCE_DIR}/wasm/speaker-diarization/sherpa-onnx-speaker-diarization.js
${CMAKE_SOURCE_DIR}/wasm/speech-enhancement/sherpa-onnx-speech-enhancement.js
${CMAKE_SOURCE_DIR}/wasm/nodejs/sherpa-onnx-wave.js
"$<TARGET_FILE_DIR:sherpa-onnx-wasm-nodejs>/sherpa-onnx-wasm-nodejs.js"
"$<TARGET_FILE_DIR:sherpa-onnx-wasm-nodejs>/sherpa-onnx-wasm-nodejs.wasm"
... ...
... ... @@ -26,21 +26,21 @@ function readWave(filename, Module) {
return {samples: samples, sampleRate: sampleRate};
}
function readWaveFromBinaryData(uint8Array) {
function readWaveFromBinaryData(uint8Array, Module) {
const numBytes = uint8Array.length * uint8Array.BYTES_PER_ELEMENT;
const pointer = this.Module._malloc(numBytes);
const pointer = Module._malloc(numBytes);
const dataOnHeap = new Uint8Array(Module.HEAPU8.buffer, pointer, numBytes);
dataOnHeap.set(uint8Array);
const w = this.Module._SherpaOnnxReadWaveFromBinaryData(
dataOnHeap.byteOffset, numBytes);
const w =
Module._SherpaOnnxReadWaveFromBinaryData(dataOnHeap.byteOffset, numBytes);
if (w == 0) {
console.log('Failed to read wave from binary data');
return null;
}
this.Module._free(pointer);
Module._free(pointer);
const samplesPtr = Module.HEAP32[w / 4] / 4;
const sampleRate = Module.HEAP32[w / 4 + 1];
... ...
... ... @@ -9,14 +9,14 @@ endif()
set(exported_functions
MyPrint
SherpaOnnxCreateOfflineSpeechDenoiser
SherpaOnnxDestroyDenoisedAudio
SherpaOnnxDestroyOfflineSpeechDenoiser
SherpaOnnxFreeWave
SherpaOnnxOfflineSpeechDenoiserGetSampleRate
SherpaOnnxOfflineSpeechDenoiserRun
SherpaOnnxDestroyDenoisedAudio
SherpaOnnxWriteWave
SherpaOnnxReadWave
SherpaOnnxReadWaveFromBinaryData
SherpaOnnxFreeWave
SherpaOnnxWriteWave
)
set(mangled_exported_functions)
foreach(x IN LISTS exported_functions)
... ...