Fangjun Kuang
Committed by GitHub

Add JavaScript API (WebAssembly) for FireRedAsr model. (#1874)

... ... @@ -14,6 +14,7 @@ find dart-api-examples -name *.yaml -type f -exec sed -i.bak 's/1\.10\.43/1\.10\
find flutter-examples -name *.yaml -type f -exec sed -i.bak 's/1\.10\.43/1\.10\.44/g' {} \;
find flutter -name *.podspec -type f -exec sed -i.bak 's/1\.10\.43/1\.10\.44/g' {} \;
find nodejs-addon-examples -name package.json -type f -exec sed -i.bak 's/1\.10\.43/1\.10\.44/g' {} \;
find nodejs-examples -name package.json -type f -exec sed -i.bak 's/1\.10\.43/1\.10\.44/g' {} \;
find harmony-os -name "README.md" -type f -exec sed -i.bak 's/1\.10\.43/1\.10\.44/g' {} \;
find harmony-os -name oh-package.json5 -type f -exec sed -i.bak 's/1\.10\.43/1\.10\.44/g' {} \;
... ...
... ... @@ -216,6 +216,21 @@ tar xvf sherpa-onnx-whisper-tiny.en.tar.bz2
node ./test-offline-whisper.js
```
## ./test-offline-fire-red-asr.js
[./test-offline-fire-red-asr.js](./test-offline-fire-red-asr.js) demonstrates
how to decode a file with a FireRedAsr AED model.
You can use the following command to run it:
```bash
wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-fire-red-asr-large-zh_en-2025-02-16.tar.bz2
tar xvf sherpa-onnx-fire-red-asr-large-zh_en-2025-02-16.tar.bz2
rm sherpa-onnx-fire-red-asr-large-zh_en-2025-02-16.tar.bz2
node ./test-offline-fire-red-asr.js
```
## ./test-offline-moonshine.js
[./test-offline-moonshine.js](./test-offline-moonshine.js) demonstrates
... ...
{
"dependencies": {
"naudiodon2": "^2.4.0",
"sherpa-onnx": "*",
"sherpa-onnx": "^1.10.44",
"wav": "^1.0.2"
}
}
... ...
// Copyright (c) 2025 Xiaomi Corporation (authors: Fangjun Kuang)
//
const sherpa_onnx = require('sherpa-onnx');
function createOfflineRecognizer() {
let modelConfig = {
fireRedAsr: {
encoder:
'./sherpa-onnx-fire-red-asr-large-zh_en-2025-02-16/encoder.int8.onnx',
decoder:
'./sherpa-onnx-fire-red-asr-large-zh_en-2025-02-16/decoder.int8.onnx',
},
tokens: './sherpa-onnx-fire-red-asr-large-zh_en-2025-02-16/tokens.txt',
debug: 1,
};
let config = {
modelConfig: modelConfig,
};
return sherpa_onnx.createOfflineRecognizer(config);
}
recognizer = createOfflineRecognizer();
stream = recognizer.createStream();
const waveFilename =
'./sherpa-onnx-fire-red-asr-large-zh_en-2025-02-16/test_wavs/0.wav';
const wave = sherpa_onnx.readWave(waveFilename);
stream.acceptWaveform(wave.sampleRate, wave.samples);
recognizer.decode(stream);
const text = recognizer.getResult(stream).text;
console.log(text);
stream.free();
recognizer.free();
... ...
... ... @@ -35,6 +35,10 @@ function freeConfig(config, Module) {
freeConfig(config.whisper, Module)
}
if ('fireRedAsr' in config) {
freeConfig(config.fireRedAsr, Module)
}
if ('moonshine' in config) {
freeConfig(config.moonshine, Module)
}
... ... @@ -651,6 +655,35 @@ function initSherpaOnnxOfflineMoonshineModelConfig(config, Module) {
}
}
function initSherpaOnnxOfflineFireRedAsrModelConfig(config, Module) {
const encoderLen = Module.lengthBytesUTF8(config.encoder || '') + 1;
const decoderLen = Module.lengthBytesUTF8(config.decoder || '') + 1;
const n = encoderLen + decoderLen;
const buffer = Module._malloc(n);
const len = 2 * 4; // 2 pointers
const ptr = Module._malloc(len);
let offset = 0;
Module.stringToUTF8(config.encoder || '', buffer + offset, encoderLen);
offset += encoderLen;
Module.stringToUTF8(config.decoder || '', buffer + offset, decoderLen);
offset += decoderLen;
offset = 0;
Module.setValue(ptr, buffer + offset, 'i8*');
offset += encoderLen;
Module.setValue(ptr + 4, buffer + offset, 'i8*');
offset += decoderLen;
return {
buffer: buffer, ptr: ptr, len: len,
}
}
function initSherpaOnnxOfflineTdnnModelConfig(config, Module) {
const n = Module.lengthBytesUTF8(config.model || '') + 1;
const buffer = Module._malloc(n);
... ... @@ -755,6 +788,13 @@ function initSherpaOnnxOfflineModelConfig(config, Module) {
};
}
if (!('fireRedAsr' in config)) {
config.fireRedAsr = {
encoder: '',
decoder: '',
};
}
if (!('tdnn' in config)) {
config.tdnn = {
model: '',
... ... @@ -789,8 +829,11 @@ function initSherpaOnnxOfflineModelConfig(config, Module) {
const moonshine =
initSherpaOnnxOfflineMoonshineModelConfig(config.moonshine, Module);
const fireRedAsr =
initSherpaOnnxOfflineFireRedAsrModelConfig(config.fireRedAsr, Module);
const len = transducer.len + paraformer.len + nemoCtc.len + whisper.len +
tdnn.len + 8 * 4 + senseVoice.len + moonshine.len;
tdnn.len + 8 * 4 + senseVoice.len + moonshine.len + fireRedAsr.len;
const ptr = Module._malloc(len);
... ... @@ -884,11 +927,15 @@ function initSherpaOnnxOfflineModelConfig(config, Module) {
offset += senseVoice.len;
Module._CopyHeap(moonshine.ptr, moonshine.len, ptr + offset);
offset += moonshine.len;
Module._CopyHeap(fireRedAsr.ptr, fireRedAsr.len, ptr + offset);
offset += fireRedAsr.len;
return {
buffer: buffer, ptr: ptr, len: len, transducer: transducer,
paraformer: paraformer, nemoCtc: nemoCtc, whisper: whisper, tdnn: tdnn,
senseVoice: senseVoice, moonshine: moonshine,
senseVoice: senseVoice, moonshine: moonshine, fireRedAsr: fireRedAsr
}
}
... ...
... ... @@ -15,6 +15,7 @@ static_assert(sizeof(SherpaOnnxOfflineParaformerModelConfig) == 4, "");
static_assert(sizeof(SherpaOnnxOfflineNemoEncDecCtcModelConfig) == 4, "");
static_assert(sizeof(SherpaOnnxOfflineWhisperModelConfig) == 5 * 4, "");
static_assert(sizeof(SherpaOnnxOfflineFireRedAsrModelConfig) == 2 * 4, "");
static_assert(sizeof(SherpaOnnxOfflineMoonshineModelConfig) == 4 * 4, "");
static_assert(sizeof(SherpaOnnxOfflineTdnnModelConfig) == 4, "");
static_assert(sizeof(SherpaOnnxOfflineSenseVoiceModelConfig) == 3 * 4, "");
... ... @@ -27,7 +28,9 @@ static_assert(sizeof(SherpaOnnxOfflineModelConfig) ==
sizeof(SherpaOnnxOfflineWhisperModelConfig) +
sizeof(SherpaOnnxOfflineTdnnModelConfig) + 8 * 4 +
sizeof(SherpaOnnxOfflineSenseVoiceModelConfig) +
sizeof(SherpaOnnxOfflineMoonshineModelConfig),
sizeof(SherpaOnnxOfflineMoonshineModelConfig) +
sizeof(SherpaOnnxOfflineFireRedAsrModelConfig),
"");
static_assert(sizeof(SherpaOnnxFeatureConfig) == 2 * 4, "");
static_assert(sizeof(SherpaOnnxOfflineRecognizerConfig) ==
... ... @@ -69,6 +72,7 @@ void PrintOfflineRecognizerConfig(SherpaOnnxOfflineRecognizerConfig *config) {
auto tdnn = &model_config->tdnn;
auto sense_voice = &model_config->sense_voice;
auto moonshine = &model_config->moonshine;
auto fire_red_asr = &model_config->fire_red_asr;
fprintf(stdout, "----------offline transducer model config----------\n");
fprintf(stdout, "encoder: %s\n", transducer->encoder);
... ... @@ -102,6 +106,10 @@ void PrintOfflineRecognizerConfig(SherpaOnnxOfflineRecognizerConfig *config) {
fprintf(stdout, "uncached_decoder: %s\n", moonshine->uncached_decoder);
fprintf(stdout, "cached_decoder: %s\n", moonshine->cached_decoder);
fprintf(stdout, "----------offline FireRedAsr model config----------\n");
fprintf(stdout, "encoder: %s\n", fire_red_asr->encoder);
fprintf(stdout, "decoder: %s\n", fire_red_asr->decoder);
fprintf(stdout, "tokens: %s\n", model_config->tokens);
fprintf(stdout, "num_threads: %d\n", model_config->num_threads);
fprintf(stdout, "provider: %s\n", model_config->provider);
... ...