Add JavaScript API (WASM) for homophone replacer (#2157)

Fangjun Kuang · GitHub
Commit a0aef1f6cde75a93d2fde253a0b1806ae2c632d3 a0aef1f6 1 parent e51c37eb
.github/scripts/test-nodejs-npm.sh
.github/workflows/build-wheels-linux-cuda.yaml
.github/workflows/build-wheels-linux.yaml
.github/workflows/test-build-wheel.yaml
.github/workflows/test-nodejs-addon-npm.yaml
.github/workflows/test-pip-install.yaml
.github/workflows/test-python-offline-websocket-server.yaml
.github/workflows/test-python-online-websocket-server.yaml
nodejs-examples/README.md
nodejs-examples/test-offline-sense-voice-with-hr.js
wasm/asr/sherpa-onnx-asr.js
wasm/asr/sherpa-onnx-wasm-main-asr.cc
wasm/nodejs/sherpa-onnx-wasm-nodejs.cc
--- a/.github/scripts/test-nodejs-npm.sh
查看文件 @a0aef1f
+++ b/.github/scripts/test-nodejs-npm.sh
查看文件 @a0aef1f
@@ -144,7 +144,18 @@ tar xvf sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17.tar.bz2
 rm sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17.tar.bz2
 
 node ./test-offline-sense-voice.js
+ 
+ curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/hr-files/dict.tar.bz2
+ tar xf dict.tar.bz2
+ 
+ curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/hr-files/replace.fst
+ curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/hr-files/test-hr.wav
+ curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/hr-files/lexicon.txt
+ 
+ node ./test-offline-sense-voice-with-hr.js
+ 
 rm -rf sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17
+ rm -rf dict replace.fst test-hr.wav lexicon.txt
 
 curl -LS -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-paraformer-zh-2023-09-14.tar.bz2
 ls -lh
--- a/.github/workflows/build-wheels-linux-cuda.yaml
查看文件 @a0aef1f
+++ b/.github/workflows/build-wheels-linux-cuda.yaml
查看文件 @a0aef1f
@@ -20,7 +20,7 @@ jobs:
     strategy:
       fail-fast: false
       matrix:
-         os: [ubuntu-20.04]
+         os: [ubuntu-22.04]
         python-version: ["3.7", "3.8", "3.9", "3.10", "3.11", "3.12", "3.13"]
 
     steps:
--- a/.github/workflows/build-wheels-linux.yaml
查看文件 @a0aef1f
+++ b/.github/workflows/build-wheels-linux.yaml
查看文件 @a0aef1f
@@ -20,7 +20,7 @@ jobs:
     strategy:
       fail-fast: false
       matrix:
-         os: [ubuntu-latest]
+         os: [ubuntu-22.04]
         python-version: ["cp37", "cp38", "cp39", "cp310", "cp311", "cp312", "cp313"]
         manylinux: [manylinux2014] #, manylinux_2_28]
 
--- a/.github/workflows/test-build-wheel.yaml
查看文件 @a0aef1f
+++ b/.github/workflows/test-build-wheel.yaml
查看文件 @a0aef1f
@@ -35,11 +35,11 @@ jobs:
       matrix:
         # See https://github.com/actions/runner-images
         include:
-           - os: ubuntu-20.04
+           - os: ubuntu-22.04
             python-version: "3.7"
-           - os: ubuntu-20.04
+           - os: ubuntu-22.04
             python-version: "3.8"
-           - os: ubuntu-20.04
+           - os: ubuntu-22.04
             python-version: "3.9"
           - os: ubuntu-22.04
             python-version: "3.10"
@@ -48,7 +48,7 @@ jobs:
           - os: ubuntu-22.04
             python-version: "3.12"
 
-           - os: macos-12
+           - os: macos-13
             python-version: "3.8"
 
           - os: macos-13
@@ -137,8 +137,8 @@ jobs:
           export PATH=/c/hostedtoolcache/windows/Python/3.9.13/x64/bin:$PATH
           export PATH=/c/hostedtoolcache/windows/Python/3.10.11/x64/bin:$PATH
           export PATH=/c/hostedtoolcache/windows/Python/3.11.9/x64/bin:$PATH
-           export PATH=/c/hostedtoolcache/windows/Python/3.12.9/x64/bin:$PATH
-           export PATH=/c/hostedtoolcache/windows/Python/3.13.2/x64/bin:$PATH
+           export PATH=/c/hostedtoolcache/windows/Python/3.12.10/x64/bin:$PATH
+           export PATH=/c/hostedtoolcache/windows/Python/3.13.3/x64/bin:$PATH
 
           which sherpa-onnx
           sherpa-onnx --help
--- a/.github/workflows/test-nodejs-addon-npm.yaml
查看文件 @a0aef1f
+++ b/.github/workflows/test-nodejs-addon-npm.yaml
查看文件 @a0aef1f
@@ -40,7 +40,7 @@ jobs:
     strategy:
       fail-fast: false
       matrix:
-         os: [macos-latest, macos-14, ubuntu-20.04, ubuntu-22.04, windows-latest]
+         os: [macos-latest, macos-14, ubuntu-latest, ubuntu-22.04, windows-latest]
         node-version: ["16", "17", "18", "19", "21", "22"]
 
     steps:
--- a/.github/workflows/test-pip-install.yaml
查看文件 @a0aef1f
+++ b/.github/workflows/test-pip-install.yaml
查看文件 @a0aef1f
@@ -30,11 +30,11 @@ jobs:
       matrix:
         # See https://github.com/actions/runner-images
         include:
-           - os: ubuntu-20.04
+           - os: ubuntu-22.04
             python-version: "3.7"
-           - os: ubuntu-20.04
+           - os: ubuntu-22.04
             python-version: "3.8"
-           - os: ubuntu-20.04
+           - os: ubuntu-22.04
             python-version: "3.9"
           - os: ubuntu-22.04
             python-version: "3.10"
@@ -45,7 +45,7 @@ jobs:
           - os: ubuntu-22.04
             python-version: "3.13"
 
-           - os: macos-12
+           - os: macos-13
             python-version: "3.8"
 
           - os: macos-13
@@ -110,8 +110,8 @@ jobs:
           export PATH=/c/hostedtoolcache/windows/Python/3.9.13/x64/bin:$PATH
           export PATH=/c/hostedtoolcache/windows/Python/3.10.11/x64/bin:$PATH
           export PATH=/c/hostedtoolcache/windows/Python/3.11.9/x64/bin:$PATH
-           export PATH=/c/hostedtoolcache/windows/Python/3.12.9/x64/bin:$PATH
-           export PATH=/c/hostedtoolcache/windows/Python/3.13.2/x64/bin:$PATH
+           export PATH=/c/hostedtoolcache/windows/Python/3.12.10/x64/bin:$PATH
+           export PATH=/c/hostedtoolcache/windows/Python/3.13.3/x64/bin:$PATH
 
           sherpa-onnx --help
           sherpa-onnx-keyword-spotter --help
--- a/.github/workflows/test-python-offline-websocket-server.yaml
查看文件 @a0aef1f
+++ b/.github/workflows/test-python-offline-websocket-server.yaml
查看文件 @a0aef1f
@@ -33,7 +33,7 @@ jobs:
     strategy:
       fail-fast: false
       matrix:
-         os: [ubuntu-20.04, ubuntu-22.04, windows-latest, macos-latest, macos-14]
+         os: [ubuntu-latest, ubuntu-22.04, windows-latest, macos-latest, macos-14]
         python-version: ["3.10"]
         model_type: ["transducer", "paraformer", "nemo_ctc", "whisper", "tdnn"]
 
--- a/.github/workflows/test-python-online-websocket-server.yaml
查看文件 @a0aef1f
+++ b/.github/workflows/test-python-online-websocket-server.yaml
查看文件 @a0aef1f
@@ -33,7 +33,7 @@ jobs:
     strategy:
       fail-fast: false
       matrix:
-         os: [ubuntu-20.04, ubuntu-22.04, windows-latest, macos-latest, macos-14]
+         os: [ubuntu-latest, ubuntu-22.04, windows-latest, macos-latest, macos-14]
         python-version: ["3.10"]
         model_type: ["transducer", "paraformer", "zipformer2-ctc"]
 
--- a/nodejs-examples/README.md
查看文件 @a0aef1f
+++ b/nodejs-examples/README.md
查看文件 @a0aef1f
@@ -182,10 +182,32 @@ tar xvf sherpa-onnx-paraformer-zh-2023-09-14.tar.bz2
 node ./test-offline-paraformer.js
 ```
 
+ ## ./test-offline-sense-voice-with-hr.js
+ 
+ [./test-offline-sense-voice-with-hr.js](./test-offline-sense-voice-with-hr.js) demonstrates
+ how to decode a file with a non-streaming SenseVoice model with homophone replacer.
+ 
+ You can use the following command to run it:
+ 
+ ```bash
+ curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17.tar.bz2
+ tar xvf sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17.tar.bz2
+ rm sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17.tar.bz2
+ 
+ curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/hr-files/dict.tar.bz2
+ tar xf dict.tar.bz2
+ 
+ curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/hr-files/replace.fst
+ curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/hr-files/test-hr.wav
+ curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/hr-files/lexicon.txt
+ 
+ node ./test-offline-sense-voice-with-hr.js
+ ```
+ 
 ## ./test-offline-sense-voice.js
 
 [./test-offline-sense-voice.js](./test-offline-sense-voice.js) demonstrates
- how to decode a file with a non-streaming Paraformer model.
+ how to decode a file with a non-streaming SenseVoice model.
 
 You can use the following command to run it:
 
--- a/nodejs-examples/test-offline-sense-voice-with-hr.js 0 → 100644
查看文件 @a0aef1f
+++ b/nodejs-examples/test-offline-sense-voice-with-hr.js 0 → 100644
查看文件 @a0aef1f
+ // Copyright (c)  2024-2025  Xiaomi Corporation (authors: Fangjun Kuang)
+ 
+ const sherpa_onnx = require('sherpa-onnx');
+ 
+ function createOfflineRecognizer() {
+   let modelConfig = {
+     senseVoice: {
+       model:
+           './sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/model.int8.onnx',
+       language: '',
+       useInverseTextNormalization: 1,
+     },
+     tokens: './sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/tokens.txt',
+   };
+ 
+   let config = {
+     modelConfig: modelConfig,
+     hr: {
+       dictDir: './dict',
+       lexicon: './lexicon.txt',
+       ruleFsts: './replace.fst',
+     },
+   };
+ 
+   return sherpa_onnx.createOfflineRecognizer(config);
+ }
+ 
+ const recognizer = createOfflineRecognizer();
+ const stream = recognizer.createStream();
+ 
+ const waveFilename = './test-hr.wav';
+ const wave = sherpa_onnx.readWave(waveFilename);
+ stream.acceptWaveform(wave.sampleRate, wave.samples);
+ 
+ recognizer.decode(stream);
+ const text = recognizer.getResult(stream).text;
+ console.log(text);
+ 
+ stream.free();
+ recognizer.free();
--- a/wasm/asr/sherpa-onnx-asr.js
查看文件 @a0aef1f
+++ b/wasm/asr/sherpa-onnx-asr.js
查看文件 @a0aef1f
@@ -63,6 +63,10 @@ function freeConfig(config, Module) {
     freeConfig(config.ctcFstDecoder, Module)
   }
 
+   if ('hr' in config) {
+     freeConfig(config.hr, Module)
+   }
+ 
   Module._free(config.ptr);
 }
 
@@ -281,6 +285,34 @@ function initSherpaOnnxFeatureConfig(config, Module) {
   return {ptr: ptr, len: len};
 }
 
+ function initSherpaOnnxHomophoneReplacerConfig(config, Module) {
+   const len = 3 * 4;
+   const ptr = Module._malloc(len);
+ 
+   const dictDirLen = Module.lengthBytesUTF8(config.dictDir || '') + 1;
+   const lexiconLen = Module.lengthBytesUTF8(config.lexicon || '') + 1;
+   const ruleFstsLen = Module.lengthBytesUTF8(config.ruleFsts || '') + 1;
+ 
+   const bufferLen = dictDirLen + lexiconLen + ruleFstsLen;
+ 
+   const buffer = Module._malloc(bufferLen);
+   let offset = 0
+   Module.stringToUTF8(config.dictDir || '', buffer + offset, dictDirLen);
+   offset += dictDirLen;
+ 
+   Module.stringToUTF8(config.lexicon || '', buffer + offset, lexiconLen);
+   offset += lexiconLen;
+ 
+   Module.stringToUTF8(config.ruleFsts || '', buffer + offset, ruleFstsLen);
+   offset += ruleFstsLen;
+ 
+   Module.setValue(ptr, buffer, 'i8*');
+   Module.setValue(ptr + 4, buffer + dictDirLen, 'i8*');
+   Module.setValue(ptr + 8, buffer + dictDirLen + lexiconLen, 'i8*');
+ 
+   return {ptr: ptr, len: len, buffer: buffer};
+ }
+ 
 function initSherpaOnnxOnlineCtcFstDecoderConfig(config, Module) {
   const len = 2 * 4;
   const ptr = Module._malloc(len);
@@ -317,12 +349,21 @@ function initSherpaOnnxOnlineRecognizerConfig(config, Module) {
     config.hotwordsBufSize = 0;
   }
 
+   if (!('hr' in config)) {
+     config.hr = {
+       dictDir: '',
+       lexicon: '',
+       ruleFsts: '',
+     };
+   }
+ 
   const feat = initSherpaOnnxFeatureConfig(config.featConfig, Module);
   const model = initSherpaOnnxOnlineModelConfig(config.modelConfig, Module);
   const ctcFstDecoder = initSherpaOnnxOnlineCtcFstDecoderConfig(
       config.ctcFstDecoderConfig, Module)
+   const hr = initSherpaOnnxHomophoneReplacerConfig(config.hr, Module);
 
-   const len = feat.len + model.len + 8 * 4 + ctcFstDecoder.len + 5 * 4;
+   const len = feat.len + model.len + 8 * 4 + ctcFstDecoder.len + 5 * 4 + hr.len;
   const ptr = Module._malloc(len);
 
   let offset = 0;
@@ -411,9 +452,12 @@ function initSherpaOnnxOnlineRecognizerConfig(config, Module) {
   Module.setValue(ptr + offset, config.hotwordsBufSize || 0, 'i32');
   offset += 4;
 
+   Module._CopyHeap(hr.ptr, hr.len, ptr + offset);
+   offset += hr.len;
+ 
   return {
     buffer: buffer, ptr: ptr, len: len, feat: feat, model: model,
-         ctcFstDecoder: ctcFstDecoder
+         ctcFstDecoder: ctcFstDecoder, hr: hr,
   }
 }
 
@@ -989,11 +1033,20 @@ function initSherpaOnnxOfflineRecognizerConfig(config, Module) {
     };
   }
 
+   if (!('hr' in config)) {
+     config.hr = {
+       dictDir: '',
+       lexicon: '',
+       ruleFsts: '',
+     };
+   }
+ 
   const feat = initSherpaOnnxFeatureConfig(config.featConfig, Module);
   const model = initSherpaOnnxOfflineModelConfig(config.modelConfig, Module);
   const lm = initSherpaOnnxOfflineLMConfig(config.lmConfig, Module);
+   const hr = initSherpaOnnxHomophoneReplacerConfig(config.hr, Module);
 
-   const len = feat.len + model.len + lm.len + 7 * 4;
+   const len = feat.len + model.len + lm.len + 7 * 4 + hr.len;
   const ptr = Module._malloc(len);
 
   let offset = 0;
@@ -1056,8 +1109,12 @@ function initSherpaOnnxOfflineRecognizerConfig(config, Module) {
   Module.setValue(ptr + offset, config.blankPenalty || 0, 'float');
   offset += 4;
 
+   Module._CopyHeap(hr.ptr, hr.len, ptr + offset);
+   offset += hr.len;
+ 
   return {
-     buffer: buffer, ptr: ptr, len: len, feat: feat, model: model, lm: lm
+     buffer: buffer, ptr: ptr, len: len, feat: feat, model: model, lm: lm,
+         hr: hr,
   }
 }
 
--- a/wasm/asr/sherpa-onnx-wasm-main-asr.cc
查看文件 @a0aef1f
+++ b/wasm/asr/sherpa-onnx-wasm-main-asr.cc
查看文件 @a0aef1f
@@ -26,7 +26,8 @@ static_assert(sizeof(SherpaOnnxOnlineCtcFstDecoderConfig) == 2 * 4, "");
 static_assert(sizeof(SherpaOnnxOnlineRecognizerConfig) ==
                   sizeof(SherpaOnnxFeatureConfig) +
                       sizeof(SherpaOnnxOnlineModelConfig) + 8 * 4 +
-                       sizeof(SherpaOnnxOnlineCtcFstDecoderConfig) + 5 * 4,
+                       sizeof(SherpaOnnxOnlineCtcFstDecoderConfig) + 5 * 4 +
+                       sizeof(SherpaOnnxHomophoneReplacerConfig),
               "");
 
 void MyPrint(SherpaOnnxOnlineRecognizerConfig *config) {
@@ -82,6 +83,11 @@ void MyPrint(SherpaOnnxOnlineRecognizerConfig *config) {
   fprintf(stdout, "graph: %s\n", config->ctc_fst_decoder_config.graph);
   fprintf(stdout, "max_active: %d\n",
           config->ctc_fst_decoder_config.max_active);
+ 
+   fprintf(stdout, "----------hr config----------\n");
+   fprintf(stdout, "dict_dir: %s\n", config->hr.dict_dir);
+   fprintf(stdout, "lexicon: %s\n", config->hr.lexicon);
+   fprintf(stdout, "rule_fsts: %s\n", config->hr.rule_fsts);
 }
 
 void CopyHeap(const char *src, int32_t num_bytes, char *dst) {
--- a/wasm/nodejs/sherpa-onnx-wasm-nodejs.cc
查看文件 @a0aef1f
+++ b/wasm/nodejs/sherpa-onnx-wasm-nodejs.cc
查看文件 @a0aef1f
@@ -38,7 +38,8 @@ static_assert(sizeof(SherpaOnnxFeatureConfig) == 2 * 4, "");
 static_assert(sizeof(SherpaOnnxOfflineRecognizerConfig) ==
                   sizeof(SherpaOnnxFeatureConfig) +
                       sizeof(SherpaOnnxOfflineLMConfig) +
-                       sizeof(SherpaOnnxOfflineModelConfig) + 7 * 4,
+                       sizeof(SherpaOnnxOfflineModelConfig) + 7 * 4 +
+                       sizeof(SherpaOnnxHomophoneReplacerConfig),
               "");
 
 void PrintOfflineTtsConfig(SherpaOnnxOfflineTtsConfig *tts_config) {
@@ -137,6 +138,10 @@ void PrintOfflineRecognizerConfig(SherpaOnnxOfflineRecognizerConfig *config) {
   fprintf(stdout, "rule_fsts: %s\n", config->rule_fsts);
   fprintf(stdout, "rule_fars: %s\n", config->rule_fars);
   fprintf(stdout, "blank_penalty: %f\n", config->blank_penalty);
+   fprintf(stdout, "----------hr config----------\n");
+   fprintf(stdout, "dict_dir: %s\n", config->hr.dict_dir);
+   fprintf(stdout, "lexicon: %s\n", config->hr.lexicon);
+   fprintf(stdout, "rule_fsts: %s\n", config->hr.rule_fsts);
 }
 
 void CopyHeap(const char *src, int32_t num_bytes, char *dst) {