Committed by
GitHub
Add JavaScript API (WebAssembly) for FireRedAsr model. (#1874)
正在显示
6 个修改的文件
包含
112 行增加
和
4 行删除
| @@ -14,6 +14,7 @@ find dart-api-examples -name *.yaml -type f -exec sed -i.bak 's/1\.10\.43/1\.10\ | @@ -14,6 +14,7 @@ find dart-api-examples -name *.yaml -type f -exec sed -i.bak 's/1\.10\.43/1\.10\ | ||
| 14 | find flutter-examples -name *.yaml -type f -exec sed -i.bak 's/1\.10\.43/1\.10\.44/g' {} \; | 14 | find flutter-examples -name *.yaml -type f -exec sed -i.bak 's/1\.10\.43/1\.10\.44/g' {} \; |
| 15 | find flutter -name *.podspec -type f -exec sed -i.bak 's/1\.10\.43/1\.10\.44/g' {} \; | 15 | find flutter -name *.podspec -type f -exec sed -i.bak 's/1\.10\.43/1\.10\.44/g' {} \; |
| 16 | find nodejs-addon-examples -name package.json -type f -exec sed -i.bak 's/1\.10\.43/1\.10\.44/g' {} \; | 16 | find nodejs-addon-examples -name package.json -type f -exec sed -i.bak 's/1\.10\.43/1\.10\.44/g' {} \; |
| 17 | +find nodejs-examples -name package.json -type f -exec sed -i.bak 's/1\.10\.43/1\.10\.44/g' {} \; | ||
| 17 | 18 | ||
| 18 | find harmony-os -name "README.md" -type f -exec sed -i.bak 's/1\.10\.43/1\.10\.44/g' {} \; | 19 | find harmony-os -name "README.md" -type f -exec sed -i.bak 's/1\.10\.43/1\.10\.44/g' {} \; |
| 19 | find harmony-os -name oh-package.json5 -type f -exec sed -i.bak 's/1\.10\.43/1\.10\.44/g' {} \; | 20 | find harmony-os -name oh-package.json5 -type f -exec sed -i.bak 's/1\.10\.43/1\.10\.44/g' {} \; |
| @@ -216,6 +216,21 @@ tar xvf sherpa-onnx-whisper-tiny.en.tar.bz2 | @@ -216,6 +216,21 @@ tar xvf sherpa-onnx-whisper-tiny.en.tar.bz2 | ||
| 216 | node ./test-offline-whisper.js | 216 | node ./test-offline-whisper.js |
| 217 | ``` | 217 | ``` |
| 218 | 218 | ||
| 219 | +## ./test-offline-fire-red-asr.js | ||
| 220 | + | ||
| 221 | +[./test-offline-fire-red-asr.js](./test-offline-fire-red-asr.js) demonstrates | ||
| 222 | +how to decode a file with a FireRedAsr AED model. | ||
| 223 | + | ||
| 224 | +You can use the following command to run it: | ||
| 225 | + | ||
| 226 | +```bash | ||
| 227 | +wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-fire-red-asr-large-zh_en-2025-02-16.tar.bz2 | ||
| 228 | +tar xvf sherpa-onnx-fire-red-asr-large-zh_en-2025-02-16.tar.bz2 | ||
| 229 | +rm sherpa-onnx-fire-red-asr-large-zh_en-2025-02-16.tar.bz2 | ||
| 230 | + | ||
| 231 | +node ./test-offline-fire-red-asr.js | ||
| 232 | +``` | ||
| 233 | + | ||
| 219 | ## ./test-offline-moonshine.js | 234 | ## ./test-offline-moonshine.js |
| 220 | 235 | ||
| 221 | [./test-offline-moonshine.js](./test-offline-moonshine.js) demonstrates | 236 | [./test-offline-moonshine.js](./test-offline-moonshine.js) demonstrates |
nodejs-examples/test-offline-fire-red-asr.js
0 → 100644
| 1 | +// Copyright (c) 2025 Xiaomi Corporation (authors: Fangjun Kuang) | ||
| 2 | +// | ||
| 3 | +const sherpa_onnx = require('sherpa-onnx'); | ||
| 4 | + | ||
| 5 | +function createOfflineRecognizer() { | ||
| 6 | + let modelConfig = { | ||
| 7 | + fireRedAsr: { | ||
| 8 | + encoder: | ||
| 9 | + './sherpa-onnx-fire-red-asr-large-zh_en-2025-02-16/encoder.int8.onnx', | ||
| 10 | + decoder: | ||
| 11 | + './sherpa-onnx-fire-red-asr-large-zh_en-2025-02-16/decoder.int8.onnx', | ||
| 12 | + }, | ||
| 13 | + tokens: './sherpa-onnx-fire-red-asr-large-zh_en-2025-02-16/tokens.txt', | ||
| 14 | + debug: 1, | ||
| 15 | + }; | ||
| 16 | + | ||
| 17 | + let config = { | ||
| 18 | + modelConfig: modelConfig, | ||
| 19 | + }; | ||
| 20 | + | ||
| 21 | + return sherpa_onnx.createOfflineRecognizer(config); | ||
| 22 | +} | ||
| 23 | + | ||
| 24 | +recognizer = createOfflineRecognizer(); | ||
| 25 | +stream = recognizer.createStream(); | ||
| 26 | + | ||
| 27 | +const waveFilename = | ||
| 28 | + './sherpa-onnx-fire-red-asr-large-zh_en-2025-02-16/test_wavs/0.wav'; | ||
| 29 | +const wave = sherpa_onnx.readWave(waveFilename); | ||
| 30 | +stream.acceptWaveform(wave.sampleRate, wave.samples); | ||
| 31 | + | ||
| 32 | +recognizer.decode(stream); | ||
| 33 | +const text = recognizer.getResult(stream).text; | ||
| 34 | +console.log(text); | ||
| 35 | + | ||
| 36 | +stream.free(); | ||
| 37 | +recognizer.free(); |
| @@ -35,6 +35,10 @@ function freeConfig(config, Module) { | @@ -35,6 +35,10 @@ function freeConfig(config, Module) { | ||
| 35 | freeConfig(config.whisper, Module) | 35 | freeConfig(config.whisper, Module) |
| 36 | } | 36 | } |
| 37 | 37 | ||
| 38 | + if ('fireRedAsr' in config) { | ||
| 39 | + freeConfig(config.fireRedAsr, Module) | ||
| 40 | + } | ||
| 41 | + | ||
| 38 | if ('moonshine' in config) { | 42 | if ('moonshine' in config) { |
| 39 | freeConfig(config.moonshine, Module) | 43 | freeConfig(config.moonshine, Module) |
| 40 | } | 44 | } |
| @@ -651,6 +655,35 @@ function initSherpaOnnxOfflineMoonshineModelConfig(config, Module) { | @@ -651,6 +655,35 @@ function initSherpaOnnxOfflineMoonshineModelConfig(config, Module) { | ||
| 651 | } | 655 | } |
| 652 | } | 656 | } |
| 653 | 657 | ||
| 658 | +function initSherpaOnnxOfflineFireRedAsrModelConfig(config, Module) { | ||
| 659 | + const encoderLen = Module.lengthBytesUTF8(config.encoder || '') + 1; | ||
| 660 | + const decoderLen = Module.lengthBytesUTF8(config.decoder || '') + 1; | ||
| 661 | + | ||
| 662 | + const n = encoderLen + decoderLen; | ||
| 663 | + const buffer = Module._malloc(n); | ||
| 664 | + | ||
| 665 | + const len = 2 * 4; // 2 pointers | ||
| 666 | + const ptr = Module._malloc(len); | ||
| 667 | + | ||
| 668 | + let offset = 0; | ||
| 669 | + Module.stringToUTF8(config.encoder || '', buffer + offset, encoderLen); | ||
| 670 | + offset += encoderLen; | ||
| 671 | + | ||
| 672 | + Module.stringToUTF8(config.decoder || '', buffer + offset, decoderLen); | ||
| 673 | + offset += decoderLen; | ||
| 674 | + | ||
| 675 | + offset = 0; | ||
| 676 | + Module.setValue(ptr, buffer + offset, 'i8*'); | ||
| 677 | + offset += encoderLen; | ||
| 678 | + | ||
| 679 | + Module.setValue(ptr + 4, buffer + offset, 'i8*'); | ||
| 680 | + offset += decoderLen; | ||
| 681 | + | ||
| 682 | + return { | ||
| 683 | + buffer: buffer, ptr: ptr, len: len, | ||
| 684 | + } | ||
| 685 | +} | ||
| 686 | + | ||
| 654 | function initSherpaOnnxOfflineTdnnModelConfig(config, Module) { | 687 | function initSherpaOnnxOfflineTdnnModelConfig(config, Module) { |
| 655 | const n = Module.lengthBytesUTF8(config.model || '') + 1; | 688 | const n = Module.lengthBytesUTF8(config.model || '') + 1; |
| 656 | const buffer = Module._malloc(n); | 689 | const buffer = Module._malloc(n); |
| @@ -755,6 +788,13 @@ function initSherpaOnnxOfflineModelConfig(config, Module) { | @@ -755,6 +788,13 @@ function initSherpaOnnxOfflineModelConfig(config, Module) { | ||
| 755 | }; | 788 | }; |
| 756 | } | 789 | } |
| 757 | 790 | ||
| 791 | + if (!('fireRedAsr' in config)) { | ||
| 792 | + config.fireRedAsr = { | ||
| 793 | + encoder: '', | ||
| 794 | + decoder: '', | ||
| 795 | + }; | ||
| 796 | + } | ||
| 797 | + | ||
| 758 | if (!('tdnn' in config)) { | 798 | if (!('tdnn' in config)) { |
| 759 | config.tdnn = { | 799 | config.tdnn = { |
| 760 | model: '', | 800 | model: '', |
| @@ -789,8 +829,11 @@ function initSherpaOnnxOfflineModelConfig(config, Module) { | @@ -789,8 +829,11 @@ function initSherpaOnnxOfflineModelConfig(config, Module) { | ||
| 789 | const moonshine = | 829 | const moonshine = |
| 790 | initSherpaOnnxOfflineMoonshineModelConfig(config.moonshine, Module); | 830 | initSherpaOnnxOfflineMoonshineModelConfig(config.moonshine, Module); |
| 791 | 831 | ||
| 832 | + const fireRedAsr = | ||
| 833 | + initSherpaOnnxOfflineFireRedAsrModelConfig(config.fireRedAsr, Module); | ||
| 834 | + | ||
| 792 | const len = transducer.len + paraformer.len + nemoCtc.len + whisper.len + | 835 | const len = transducer.len + paraformer.len + nemoCtc.len + whisper.len + |
| 793 | - tdnn.len + 8 * 4 + senseVoice.len + moonshine.len; | 836 | + tdnn.len + 8 * 4 + senseVoice.len + moonshine.len + fireRedAsr.len; |
| 794 | 837 | ||
| 795 | const ptr = Module._malloc(len); | 838 | const ptr = Module._malloc(len); |
| 796 | 839 | ||
| @@ -884,11 +927,15 @@ function initSherpaOnnxOfflineModelConfig(config, Module) { | @@ -884,11 +927,15 @@ function initSherpaOnnxOfflineModelConfig(config, Module) { | ||
| 884 | offset += senseVoice.len; | 927 | offset += senseVoice.len; |
| 885 | 928 | ||
| 886 | Module._CopyHeap(moonshine.ptr, moonshine.len, ptr + offset); | 929 | Module._CopyHeap(moonshine.ptr, moonshine.len, ptr + offset); |
| 930 | + offset += moonshine.len; | ||
| 931 | + | ||
| 932 | + Module._CopyHeap(fireRedAsr.ptr, fireRedAsr.len, ptr + offset); | ||
| 933 | + offset += fireRedAsr.len; | ||
| 887 | 934 | ||
| 888 | return { | 935 | return { |
| 889 | buffer: buffer, ptr: ptr, len: len, transducer: transducer, | 936 | buffer: buffer, ptr: ptr, len: len, transducer: transducer, |
| 890 | paraformer: paraformer, nemoCtc: nemoCtc, whisper: whisper, tdnn: tdnn, | 937 | paraformer: paraformer, nemoCtc: nemoCtc, whisper: whisper, tdnn: tdnn, |
| 891 | - senseVoice: senseVoice, moonshine: moonshine, | 938 | + senseVoice: senseVoice, moonshine: moonshine, fireRedAsr: fireRedAsr |
| 892 | } | 939 | } |
| 893 | } | 940 | } |
| 894 | 941 |
| @@ -15,6 +15,7 @@ static_assert(sizeof(SherpaOnnxOfflineParaformerModelConfig) == 4, ""); | @@ -15,6 +15,7 @@ static_assert(sizeof(SherpaOnnxOfflineParaformerModelConfig) == 4, ""); | ||
| 15 | 15 | ||
| 16 | static_assert(sizeof(SherpaOnnxOfflineNemoEncDecCtcModelConfig) == 4, ""); | 16 | static_assert(sizeof(SherpaOnnxOfflineNemoEncDecCtcModelConfig) == 4, ""); |
| 17 | static_assert(sizeof(SherpaOnnxOfflineWhisperModelConfig) == 5 * 4, ""); | 17 | static_assert(sizeof(SherpaOnnxOfflineWhisperModelConfig) == 5 * 4, ""); |
| 18 | +static_assert(sizeof(SherpaOnnxOfflineFireRedAsrModelConfig) == 2 * 4, ""); | ||
| 18 | static_assert(sizeof(SherpaOnnxOfflineMoonshineModelConfig) == 4 * 4, ""); | 19 | static_assert(sizeof(SherpaOnnxOfflineMoonshineModelConfig) == 4 * 4, ""); |
| 19 | static_assert(sizeof(SherpaOnnxOfflineTdnnModelConfig) == 4, ""); | 20 | static_assert(sizeof(SherpaOnnxOfflineTdnnModelConfig) == 4, ""); |
| 20 | static_assert(sizeof(SherpaOnnxOfflineSenseVoiceModelConfig) == 3 * 4, ""); | 21 | static_assert(sizeof(SherpaOnnxOfflineSenseVoiceModelConfig) == 3 * 4, ""); |
| @@ -27,7 +28,9 @@ static_assert(sizeof(SherpaOnnxOfflineModelConfig) == | @@ -27,7 +28,9 @@ static_assert(sizeof(SherpaOnnxOfflineModelConfig) == | ||
| 27 | sizeof(SherpaOnnxOfflineWhisperModelConfig) + | 28 | sizeof(SherpaOnnxOfflineWhisperModelConfig) + |
| 28 | sizeof(SherpaOnnxOfflineTdnnModelConfig) + 8 * 4 + | 29 | sizeof(SherpaOnnxOfflineTdnnModelConfig) + 8 * 4 + |
| 29 | sizeof(SherpaOnnxOfflineSenseVoiceModelConfig) + | 30 | sizeof(SherpaOnnxOfflineSenseVoiceModelConfig) + |
| 30 | - sizeof(SherpaOnnxOfflineMoonshineModelConfig), | 31 | + sizeof(SherpaOnnxOfflineMoonshineModelConfig) + |
| 32 | + sizeof(SherpaOnnxOfflineFireRedAsrModelConfig), | ||
| 33 | + | ||
| 31 | ""); | 34 | ""); |
| 32 | static_assert(sizeof(SherpaOnnxFeatureConfig) == 2 * 4, ""); | 35 | static_assert(sizeof(SherpaOnnxFeatureConfig) == 2 * 4, ""); |
| 33 | static_assert(sizeof(SherpaOnnxOfflineRecognizerConfig) == | 36 | static_assert(sizeof(SherpaOnnxOfflineRecognizerConfig) == |
| @@ -69,6 +72,7 @@ void PrintOfflineRecognizerConfig(SherpaOnnxOfflineRecognizerConfig *config) { | @@ -69,6 +72,7 @@ void PrintOfflineRecognizerConfig(SherpaOnnxOfflineRecognizerConfig *config) { | ||
| 69 | auto tdnn = &model_config->tdnn; | 72 | auto tdnn = &model_config->tdnn; |
| 70 | auto sense_voice = &model_config->sense_voice; | 73 | auto sense_voice = &model_config->sense_voice; |
| 71 | auto moonshine = &model_config->moonshine; | 74 | auto moonshine = &model_config->moonshine; |
| 75 | + auto fire_red_asr = &model_config->fire_red_asr; | ||
| 72 | 76 | ||
| 73 | fprintf(stdout, "----------offline transducer model config----------\n"); | 77 | fprintf(stdout, "----------offline transducer model config----------\n"); |
| 74 | fprintf(stdout, "encoder: %s\n", transducer->encoder); | 78 | fprintf(stdout, "encoder: %s\n", transducer->encoder); |
| @@ -102,6 +106,10 @@ void PrintOfflineRecognizerConfig(SherpaOnnxOfflineRecognizerConfig *config) { | @@ -102,6 +106,10 @@ void PrintOfflineRecognizerConfig(SherpaOnnxOfflineRecognizerConfig *config) { | ||
| 102 | fprintf(stdout, "uncached_decoder: %s\n", moonshine->uncached_decoder); | 106 | fprintf(stdout, "uncached_decoder: %s\n", moonshine->uncached_decoder); |
| 103 | fprintf(stdout, "cached_decoder: %s\n", moonshine->cached_decoder); | 107 | fprintf(stdout, "cached_decoder: %s\n", moonshine->cached_decoder); |
| 104 | 108 | ||
| 109 | + fprintf(stdout, "----------offline FireRedAsr model config----------\n"); | ||
| 110 | + fprintf(stdout, "encoder: %s\n", fire_red_asr->encoder); | ||
| 111 | + fprintf(stdout, "decoder: %s\n", fire_red_asr->decoder); | ||
| 112 | + | ||
| 105 | fprintf(stdout, "tokens: %s\n", model_config->tokens); | 113 | fprintf(stdout, "tokens: %s\n", model_config->tokens); |
| 106 | fprintf(stdout, "num_threads: %d\n", model_config->num_threads); | 114 | fprintf(stdout, "num_threads: %d\n", model_config->num_threads); |
| 107 | fprintf(stdout, "provider: %s\n", model_config->provider); | 115 | fprintf(stdout, "provider: %s\n", model_config->provider); |
-
请 注册 或 登录 后发表评论