Fangjun Kuang
Committed by GitHub

Add JavaScript API (WASM) for homophone replacer (#2157)

... ... @@ -144,7 +144,18 @@ tar xvf sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17.tar.bz2
rm sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17.tar.bz2
node ./test-offline-sense-voice.js
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/hr-files/dict.tar.bz2
tar xf dict.tar.bz2
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/hr-files/replace.fst
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/hr-files/test-hr.wav
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/hr-files/lexicon.txt
node ./test-offline-sense-voice-with-hr.js
rm -rf sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17
rm -rf dict replace.fst test-hr.wav lexicon.txt
curl -LS -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-paraformer-zh-2023-09-14.tar.bz2
ls -lh
... ...
... ... @@ -20,7 +20,7 @@ jobs:
strategy:
fail-fast: false
matrix:
os: [ubuntu-20.04]
os: [ubuntu-22.04]
python-version: ["3.7", "3.8", "3.9", "3.10", "3.11", "3.12", "3.13"]
steps:
... ...
... ... @@ -20,7 +20,7 @@ jobs:
strategy:
fail-fast: false
matrix:
os: [ubuntu-latest]
os: [ubuntu-22.04]
python-version: ["cp37", "cp38", "cp39", "cp310", "cp311", "cp312", "cp313"]
manylinux: [manylinux2014] #, manylinux_2_28]
... ...
... ... @@ -35,11 +35,11 @@ jobs:
matrix:
# See https://github.com/actions/runner-images
include:
- os: ubuntu-20.04
- os: ubuntu-22.04
python-version: "3.7"
- os: ubuntu-20.04
- os: ubuntu-22.04
python-version: "3.8"
- os: ubuntu-20.04
- os: ubuntu-22.04
python-version: "3.9"
- os: ubuntu-22.04
python-version: "3.10"
... ... @@ -48,7 +48,7 @@ jobs:
- os: ubuntu-22.04
python-version: "3.12"
- os: macos-12
- os: macos-13
python-version: "3.8"
- os: macos-13
... ... @@ -137,8 +137,8 @@ jobs:
export PATH=/c/hostedtoolcache/windows/Python/3.9.13/x64/bin:$PATH
export PATH=/c/hostedtoolcache/windows/Python/3.10.11/x64/bin:$PATH
export PATH=/c/hostedtoolcache/windows/Python/3.11.9/x64/bin:$PATH
export PATH=/c/hostedtoolcache/windows/Python/3.12.9/x64/bin:$PATH
export PATH=/c/hostedtoolcache/windows/Python/3.13.2/x64/bin:$PATH
export PATH=/c/hostedtoolcache/windows/Python/3.12.10/x64/bin:$PATH
export PATH=/c/hostedtoolcache/windows/Python/3.13.3/x64/bin:$PATH
which sherpa-onnx
sherpa-onnx --help
... ...
... ... @@ -40,7 +40,7 @@ jobs:
strategy:
fail-fast: false
matrix:
os: [macos-latest, macos-14, ubuntu-20.04, ubuntu-22.04, windows-latest]
os: [macos-latest, macos-14, ubuntu-latest, ubuntu-22.04, windows-latest]
node-version: ["16", "17", "18", "19", "21", "22"]
steps:
... ...
... ... @@ -30,11 +30,11 @@ jobs:
matrix:
# See https://github.com/actions/runner-images
include:
- os: ubuntu-20.04
- os: ubuntu-22.04
python-version: "3.7"
- os: ubuntu-20.04
- os: ubuntu-22.04
python-version: "3.8"
- os: ubuntu-20.04
- os: ubuntu-22.04
python-version: "3.9"
- os: ubuntu-22.04
python-version: "3.10"
... ... @@ -45,7 +45,7 @@ jobs:
- os: ubuntu-22.04
python-version: "3.13"
- os: macos-12
- os: macos-13
python-version: "3.8"
- os: macos-13
... ... @@ -110,8 +110,8 @@ jobs:
export PATH=/c/hostedtoolcache/windows/Python/3.9.13/x64/bin:$PATH
export PATH=/c/hostedtoolcache/windows/Python/3.10.11/x64/bin:$PATH
export PATH=/c/hostedtoolcache/windows/Python/3.11.9/x64/bin:$PATH
export PATH=/c/hostedtoolcache/windows/Python/3.12.9/x64/bin:$PATH
export PATH=/c/hostedtoolcache/windows/Python/3.13.2/x64/bin:$PATH
export PATH=/c/hostedtoolcache/windows/Python/3.12.10/x64/bin:$PATH
export PATH=/c/hostedtoolcache/windows/Python/3.13.3/x64/bin:$PATH
sherpa-onnx --help
sherpa-onnx-keyword-spotter --help
... ...
... ... @@ -33,7 +33,7 @@ jobs:
strategy:
fail-fast: false
matrix:
os: [ubuntu-20.04, ubuntu-22.04, windows-latest, macos-latest, macos-14]
os: [ubuntu-latest, ubuntu-22.04, windows-latest, macos-latest, macos-14]
python-version: ["3.10"]
model_type: ["transducer", "paraformer", "nemo_ctc", "whisper", "tdnn"]
... ...
... ... @@ -33,7 +33,7 @@ jobs:
strategy:
fail-fast: false
matrix:
os: [ubuntu-20.04, ubuntu-22.04, windows-latest, macos-latest, macos-14]
os: [ubuntu-latest, ubuntu-22.04, windows-latest, macos-latest, macos-14]
python-version: ["3.10"]
model_type: ["transducer", "paraformer", "zipformer2-ctc"]
... ...
... ... @@ -182,10 +182,32 @@ tar xvf sherpa-onnx-paraformer-zh-2023-09-14.tar.bz2
node ./test-offline-paraformer.js
```
## ./test-offline-sense-voice-with-hr.js
[./test-offline-sense-voice-with-hr.js](./test-offline-sense-voice-with-hr.js) demonstrates
how to decode a file with a non-streaming SenseVoice model with homophone replacer.
You can use the following command to run it:
```bash
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17.tar.bz2
tar xvf sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17.tar.bz2
rm sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17.tar.bz2
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/hr-files/dict.tar.bz2
tar xf dict.tar.bz2
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/hr-files/replace.fst
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/hr-files/test-hr.wav
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/hr-files/lexicon.txt
node ./test-offline-sense-voice-with-hr.js
```
## ./test-offline-sense-voice.js
[./test-offline-sense-voice.js](./test-offline-sense-voice.js) demonstrates
how to decode a file with a non-streaming Paraformer model.
how to decode a file with a non-streaming SenseVoice model.
You can use the following command to run it:
... ...
// Copyright (c) 2024-2025 Xiaomi Corporation (authors: Fangjun Kuang)
const sherpa_onnx = require('sherpa-onnx');
function createOfflineRecognizer() {
let modelConfig = {
senseVoice: {
model:
'./sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/model.int8.onnx',
language: '',
useInverseTextNormalization: 1,
},
tokens: './sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/tokens.txt',
};
let config = {
modelConfig: modelConfig,
hr: {
dictDir: './dict',
lexicon: './lexicon.txt',
ruleFsts: './replace.fst',
},
};
return sherpa_onnx.createOfflineRecognizer(config);
}
const recognizer = createOfflineRecognizer();
const stream = recognizer.createStream();
const waveFilename = './test-hr.wav';
const wave = sherpa_onnx.readWave(waveFilename);
stream.acceptWaveform(wave.sampleRate, wave.samples);
recognizer.decode(stream);
const text = recognizer.getResult(stream).text;
console.log(text);
stream.free();
recognizer.free();
... ...
... ... @@ -63,6 +63,10 @@ function freeConfig(config, Module) {
freeConfig(config.ctcFstDecoder, Module)
}
if ('hr' in config) {
freeConfig(config.hr, Module)
}
Module._free(config.ptr);
}
... ... @@ -281,6 +285,34 @@ function initSherpaOnnxFeatureConfig(config, Module) {
return {ptr: ptr, len: len};
}
function initSherpaOnnxHomophoneReplacerConfig(config, Module) {
const len = 3 * 4;
const ptr = Module._malloc(len);
const dictDirLen = Module.lengthBytesUTF8(config.dictDir || '') + 1;
const lexiconLen = Module.lengthBytesUTF8(config.lexicon || '') + 1;
const ruleFstsLen = Module.lengthBytesUTF8(config.ruleFsts || '') + 1;
const bufferLen = dictDirLen + lexiconLen + ruleFstsLen;
const buffer = Module._malloc(bufferLen);
let offset = 0
Module.stringToUTF8(config.dictDir || '', buffer + offset, dictDirLen);
offset += dictDirLen;
Module.stringToUTF8(config.lexicon || '', buffer + offset, lexiconLen);
offset += lexiconLen;
Module.stringToUTF8(config.ruleFsts || '', buffer + offset, ruleFstsLen);
offset += ruleFstsLen;
Module.setValue(ptr, buffer, 'i8*');
Module.setValue(ptr + 4, buffer + dictDirLen, 'i8*');
Module.setValue(ptr + 8, buffer + dictDirLen + lexiconLen, 'i8*');
return {ptr: ptr, len: len, buffer: buffer};
}
function initSherpaOnnxOnlineCtcFstDecoderConfig(config, Module) {
const len = 2 * 4;
const ptr = Module._malloc(len);
... ... @@ -317,12 +349,21 @@ function initSherpaOnnxOnlineRecognizerConfig(config, Module) {
config.hotwordsBufSize = 0;
}
if (!('hr' in config)) {
config.hr = {
dictDir: '',
lexicon: '',
ruleFsts: '',
};
}
const feat = initSherpaOnnxFeatureConfig(config.featConfig, Module);
const model = initSherpaOnnxOnlineModelConfig(config.modelConfig, Module);
const ctcFstDecoder = initSherpaOnnxOnlineCtcFstDecoderConfig(
config.ctcFstDecoderConfig, Module)
const hr = initSherpaOnnxHomophoneReplacerConfig(config.hr, Module);
const len = feat.len + model.len + 8 * 4 + ctcFstDecoder.len + 5 * 4;
const len = feat.len + model.len + 8 * 4 + ctcFstDecoder.len + 5 * 4 + hr.len;
const ptr = Module._malloc(len);
let offset = 0;
... ... @@ -411,9 +452,12 @@ function initSherpaOnnxOnlineRecognizerConfig(config, Module) {
Module.setValue(ptr + offset, config.hotwordsBufSize || 0, 'i32');
offset += 4;
Module._CopyHeap(hr.ptr, hr.len, ptr + offset);
offset += hr.len;
return {
buffer: buffer, ptr: ptr, len: len, feat: feat, model: model,
ctcFstDecoder: ctcFstDecoder
ctcFstDecoder: ctcFstDecoder, hr: hr,
}
}
... ... @@ -989,11 +1033,20 @@ function initSherpaOnnxOfflineRecognizerConfig(config, Module) {
};
}
if (!('hr' in config)) {
config.hr = {
dictDir: '',
lexicon: '',
ruleFsts: '',
};
}
const feat = initSherpaOnnxFeatureConfig(config.featConfig, Module);
const model = initSherpaOnnxOfflineModelConfig(config.modelConfig, Module);
const lm = initSherpaOnnxOfflineLMConfig(config.lmConfig, Module);
const hr = initSherpaOnnxHomophoneReplacerConfig(config.hr, Module);
const len = feat.len + model.len + lm.len + 7 * 4;
const len = feat.len + model.len + lm.len + 7 * 4 + hr.len;
const ptr = Module._malloc(len);
let offset = 0;
... ... @@ -1056,8 +1109,12 @@ function initSherpaOnnxOfflineRecognizerConfig(config, Module) {
Module.setValue(ptr + offset, config.blankPenalty || 0, 'float');
offset += 4;
Module._CopyHeap(hr.ptr, hr.len, ptr + offset);
offset += hr.len;
return {
buffer: buffer, ptr: ptr, len: len, feat: feat, model: model, lm: lm
buffer: buffer, ptr: ptr, len: len, feat: feat, model: model, lm: lm,
hr: hr,
}
}
... ...
... ... @@ -26,7 +26,8 @@ static_assert(sizeof(SherpaOnnxOnlineCtcFstDecoderConfig) == 2 * 4, "");
static_assert(sizeof(SherpaOnnxOnlineRecognizerConfig) ==
sizeof(SherpaOnnxFeatureConfig) +
sizeof(SherpaOnnxOnlineModelConfig) + 8 * 4 +
sizeof(SherpaOnnxOnlineCtcFstDecoderConfig) + 5 * 4,
sizeof(SherpaOnnxOnlineCtcFstDecoderConfig) + 5 * 4 +
sizeof(SherpaOnnxHomophoneReplacerConfig),
"");
void MyPrint(SherpaOnnxOnlineRecognizerConfig *config) {
... ... @@ -82,6 +83,11 @@ void MyPrint(SherpaOnnxOnlineRecognizerConfig *config) {
fprintf(stdout, "graph: %s\n", config->ctc_fst_decoder_config.graph);
fprintf(stdout, "max_active: %d\n",
config->ctc_fst_decoder_config.max_active);
fprintf(stdout, "----------hr config----------\n");
fprintf(stdout, "dict_dir: %s\n", config->hr.dict_dir);
fprintf(stdout, "lexicon: %s\n", config->hr.lexicon);
fprintf(stdout, "rule_fsts: %s\n", config->hr.rule_fsts);
}
void CopyHeap(const char *src, int32_t num_bytes, char *dst) {
... ...
... ... @@ -38,7 +38,8 @@ static_assert(sizeof(SherpaOnnxFeatureConfig) == 2 * 4, "");
static_assert(sizeof(SherpaOnnxOfflineRecognizerConfig) ==
sizeof(SherpaOnnxFeatureConfig) +
sizeof(SherpaOnnxOfflineLMConfig) +
sizeof(SherpaOnnxOfflineModelConfig) + 7 * 4,
sizeof(SherpaOnnxOfflineModelConfig) + 7 * 4 +
sizeof(SherpaOnnxHomophoneReplacerConfig),
"");
void PrintOfflineTtsConfig(SherpaOnnxOfflineTtsConfig *tts_config) {
... ... @@ -137,6 +138,10 @@ void PrintOfflineRecognizerConfig(SherpaOnnxOfflineRecognizerConfig *config) {
fprintf(stdout, "rule_fsts: %s\n", config->rule_fsts);
fprintf(stdout, "rule_fars: %s\n", config->rule_fars);
fprintf(stdout, "blank_penalty: %f\n", config->blank_penalty);
fprintf(stdout, "----------hr config----------\n");
fprintf(stdout, "dict_dir: %s\n", config->hr.dict_dir);
fprintf(stdout, "lexicon: %s\n", config->hr.lexicon);
fprintf(stdout, "rule_fsts: %s\n", config->hr.rule_fsts);
}
void CopyHeap(const char *src, int32_t num_bytes, char *dst) {
... ...