Committed by
GitHub
Add C/CXX/JavaScript API for NeMo Canary models (#2357)
This PR introduces support for NeMo Canary models across C, C++, and JavaScript APIs by adding new Canary configuration structures, updating bindings, extending examples, and enhancing CI workflows. - Add OfflineCanaryModelConfig to all language bindings (C, C++, JS, ETS). - Implement SetConfig methods and NAPI wrappers for updating recognizer config at runtime. - Update examples and CI scripts to demonstrate and test NeMo Canary model usage.
正在显示
28 个修改的文件
包含
736 行增加
和
66 行删除
| @@ -10,6 +10,16 @@ arch=$(node -p "require('os').arch()") | @@ -10,6 +10,16 @@ arch=$(node -p "require('os').arch()") | ||
| 10 | platform=$(node -p "require('os').platform()") | 10 | platform=$(node -p "require('os').platform()") |
| 11 | node_version=$(node -p "process.versions.node.split('.')[0]") | 11 | node_version=$(node -p "process.versions.node.split('.')[0]") |
| 12 | 12 | ||
| 13 | +echo "----------non-streaming ASR NeMo Canary----------" | ||
| 14 | + | ||
| 15 | +curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8.tar.bz2 | ||
| 16 | +tar xvf sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8.tar.bz2 | ||
| 17 | +rm sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8.tar.bz2 | ||
| 18 | + | ||
| 19 | +node ./test_asr_non_streaming_nemo_canary.js | ||
| 20 | + | ||
| 21 | +rm -rf sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8 | ||
| 22 | + | ||
| 13 | echo "----------non-streaming ASR Zipformer CTC----------" | 23 | echo "----------non-streaming ASR Zipformer CTC----------" |
| 14 | curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-zipformer-ctc-zh-int8-2025-07-03.tar.bz2 | 24 | curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-zipformer-ctc-zh-int8-2025-07-03.tar.bz2 |
| 15 | 25 |
| @@ -9,6 +9,14 @@ git status | @@ -9,6 +9,14 @@ git status | ||
| 9 | ls -lh | 9 | ls -lh |
| 10 | ls -lh node_modules | 10 | ls -lh node_modules |
| 11 | 11 | ||
| 12 | +# asr with offline nemo canary | ||
| 13 | +curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8.tar.bz2 | ||
| 14 | +tar xvf sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8.tar.bz2 | ||
| 15 | +rm sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8.tar.bz2 | ||
| 16 | + | ||
| 17 | +node ./test-offline-nemo-canary.js | ||
| 18 | +rm -rf sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8 | ||
| 19 | + | ||
| 12 | # asr with offline zipformer ctc | 20 | # asr with offline zipformer ctc |
| 13 | curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-zipformer-ctc-zh-int8-2025-07-03.tar.bz2 | 21 | curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-zipformer-ctc-zh-int8-2025-07-03.tar.bz2 |
| 14 | 22 |
| @@ -77,16 +77,6 @@ time $EXE \ | @@ -77,16 +77,6 @@ time $EXE \ | ||
| 77 | $repo/test_wavs/DEV_T0000000001.wav \ | 77 | $repo/test_wavs/DEV_T0000000001.wav \ |
| 78 | $repo/test_wavs/DEV_T0000000002.wav | 78 | $repo/test_wavs/DEV_T0000000002.wav |
| 79 | 79 | ||
| 80 | -log "test int8" | ||
| 81 | - | ||
| 82 | -time $EXE \ | ||
| 83 | - --debug=1 \ | ||
| 84 | - --zipformer2-ctc-model=$repo/ctc-epoch-20-avg-1-chunk-16-left-128.int8.onnx \ | ||
| 85 | - --tokens=$repo/tokens.txt \ | ||
| 86 | - $repo/test_wavs/DEV_T0000000000.wav \ | ||
| 87 | - $repo/test_wavs/DEV_T0000000001.wav \ | ||
| 88 | - $repo/test_wavs/DEV_T0000000002.wav | ||
| 89 | - | ||
| 90 | rm -rf $repo | 80 | rm -rf $repo |
| 91 | 81 | ||
| 92 | log "------------------------------------------------------------" | 82 | log "------------------------------------------------------------" |
| @@ -127,6 +127,36 @@ jobs: | @@ -127,6 +127,36 @@ jobs: | ||
| 127 | rm -rf dict lexicon.txt test-hr.wav replace.fst | 127 | rm -rf dict lexicon.txt test-hr.wav replace.fst |
| 128 | rm -v $name | 128 | rm -v $name |
| 129 | 129 | ||
| 130 | + - name: Test NeMo Canary | ||
| 131 | + shell: bash | ||
| 132 | + run: | | ||
| 133 | + name=nemo-canary-c-api | ||
| 134 | + gcc -o $name ./c-api-examples/$name.c \ | ||
| 135 | + -I ./build/install/include \ | ||
| 136 | + -L ./build/install/lib/ \ | ||
| 137 | + -l sherpa-onnx-c-api \ | ||
| 138 | + -l onnxruntime | ||
| 139 | + | ||
| 140 | + ls -lh $name | ||
| 141 | + | ||
| 142 | + if [[ ${{ matrix.os }} == ubuntu-latest || ${{ matrix.os }} == ubuntu-22.04-arm ]]; then | ||
| 143 | + ldd ./$name | ||
| 144 | + echo "----" | ||
| 145 | + readelf -d ./$name | ||
| 146 | + fi | ||
| 147 | + | ||
| 148 | + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8.tar.bz2 | ||
| 149 | + tar xvf sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8.tar.bz2 | ||
| 150 | + rm sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8.tar.bz2 | ||
| 151 | + | ||
| 152 | + export LD_LIBRARY_PATH=$PWD/build/install/lib:$LD_LIBRARY_PATH | ||
| 153 | + export DYLD_LIBRARY_PATH=$PWD/build/install/lib:$DYLD_LIBRARY_PATH | ||
| 154 | + | ||
| 155 | + ./$name | ||
| 156 | + | ||
| 157 | + rm $name | ||
| 158 | + rm -rf sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8 | ||
| 159 | + | ||
| 130 | - name: Test Dolphin CTC | 160 | - name: Test Dolphin CTC |
| 131 | shell: bash | 161 | shell: bash |
| 132 | run: | | 162 | run: | |
| @@ -87,6 +87,40 @@ jobs: | @@ -87,6 +87,40 @@ jobs: | ||
| 87 | otool -L ./install/lib/libsherpa-onnx-cxx-api.dylib | 87 | otool -L ./install/lib/libsherpa-onnx-cxx-api.dylib |
| 88 | fi | 88 | fi |
| 89 | 89 | ||
| 90 | + - name: Test NeMo Canary | ||
| 91 | + shell: bash | ||
| 92 | + run: | | ||
| 93 | + name=nemo-canary-cxx-api | ||
| 94 | + g++ -std=c++17 -o $name ./cxx-api-examples/$name.cc \ | ||
| 95 | + -I ./build/install/include \ | ||
| 96 | + -L ./build/install/lib/ \ | ||
| 97 | + -l sherpa-onnx-cxx-api \ | ||
| 98 | + -l sherpa-onnx-c-api \ | ||
| 99 | + -l onnxruntime | ||
| 100 | + | ||
| 101 | + ls -lh $name | ||
| 102 | + | ||
| 103 | + if [[ ${{ matrix.os }} == ubuntu-latest || ${{ matrix.os }} == ubuntu-22.04-arm ]]; then | ||
| 104 | + ldd ./$name | ||
| 105 | + echo "----" | ||
| 106 | + readelf -d ./$name | ||
| 107 | + fi | ||
| 108 | + | ||
| 109 | + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8.tar.bz2 | ||
| 110 | + tar xvf sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8.tar.bz2 | ||
| 111 | + rm sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8.tar.bz2 | ||
| 112 | + | ||
| 113 | + ls -lh sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8 | ||
| 114 | + echo "---" | ||
| 115 | + | ||
| 116 | + export LD_LIBRARY_PATH=$PWD/build/install/lib:$LD_LIBRARY_PATH | ||
| 117 | + export DYLD_LIBRARY_PATH=$PWD/build/install/lib:$DYLD_LIBRARY_PATH | ||
| 118 | + | ||
| 119 | + ./$name | ||
| 120 | + | ||
| 121 | + rm -rf sherpa-onnx-nemo-canary-* | ||
| 122 | + rm -v ./$name | ||
| 123 | + | ||
| 90 | - name: Test streaming zipformer with Homophone replacer | 124 | - name: Test streaming zipformer with Homophone replacer |
| 91 | shell: bash | 125 | shell: bash |
| 92 | run: | | 126 | run: | |
| @@ -53,6 +53,9 @@ target_link_libraries(whisper-c-api sherpa-onnx-c-api) | @@ -53,6 +53,9 @@ target_link_libraries(whisper-c-api sherpa-onnx-c-api) | ||
| 53 | add_executable(fire-red-asr-c-api fire-red-asr-c-api.c) | 53 | add_executable(fire-red-asr-c-api fire-red-asr-c-api.c) |
| 54 | target_link_libraries(fire-red-asr-c-api sherpa-onnx-c-api) | 54 | target_link_libraries(fire-red-asr-c-api sherpa-onnx-c-api) |
| 55 | 55 | ||
| 56 | +add_executable(nemo-canary-c-api nemo-canary-c-api.c) | ||
| 57 | +target_link_libraries(nemo-canary-c-api sherpa-onnx-c-api) | ||
| 58 | + | ||
| 56 | add_executable(sense-voice-c-api sense-voice-c-api.c) | 59 | add_executable(sense-voice-c-api sense-voice-c-api.c) |
| 57 | target_link_libraries(sense-voice-c-api sherpa-onnx-c-api) | 60 | target_link_libraries(sense-voice-c-api sherpa-onnx-c-api) |
| 58 | 61 |
c-api-examples/nemo-canary-c-api.c
0 → 100644
| 1 | +// c-api-examples/nemo-canary-c-api.c | ||
| 2 | +// | ||
| 3 | +// Copyright (c) 2025 Xiaomi Corporation | ||
| 4 | + | ||
| 5 | +// We assume you have pre-downloaded the Nemo Canary model | ||
| 6 | +// from https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models | ||
| 7 | +// An example is given below: | ||
| 8 | +// | ||
| 9 | +// clang-format off | ||
| 10 | +// | ||
| 11 | +// wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8.tar.bz2 | ||
| 12 | +// tar xvf sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8.tar.bz2 | ||
| 13 | +// rm sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8.tar.bz2 | ||
| 14 | +// | ||
| 15 | +// clang-format on | ||
| 16 | +// | ||
| 17 | +// see https://k2-fsa.github.io/sherpa/onnx/nemo/canary.html | ||
| 18 | +// for details | ||
| 19 | + | ||
| 20 | +#include <stdio.h> | ||
| 21 | +#include <stdlib.h> | ||
| 22 | +#include <string.h> | ||
| 23 | + | ||
| 24 | +#include "sherpa-onnx/c-api/c-api.h" | ||
| 25 | + | ||
| 26 | +int32_t main() { | ||
| 27 | + const char *wav_filename = | ||
| 28 | + "./sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8/test_wavs/de.wav"; | ||
| 29 | + const char *encoder_filename = | ||
| 30 | + "sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8/encoder.int8.onnx"; | ||
| 31 | + const char *decoder_filename = | ||
| 32 | + "sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8/decoder.int8.onnx"; | ||
| 33 | + const char *tokens_filename = | ||
| 34 | + "sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8/tokens.txt"; | ||
| 35 | + const char *provider = "cpu"; | ||
| 36 | + | ||
| 37 | + const SherpaOnnxWave *wave = SherpaOnnxReadWave(wav_filename); | ||
| 38 | + if (wave == NULL) { | ||
| 39 | + fprintf(stderr, "Failed to read %s\n", wav_filename); | ||
| 40 | + return -1; | ||
| 41 | + } | ||
| 42 | + | ||
| 43 | + // Offline model config | ||
| 44 | + SherpaOnnxOfflineModelConfig offline_model_config; | ||
| 45 | + memset(&offline_model_config, 0, sizeof(offline_model_config)); | ||
| 46 | + | ||
| 47 | + // set debug to 1 to view more logs | ||
| 48 | + offline_model_config.debug = 0; | ||
| 49 | + | ||
| 50 | + offline_model_config.num_threads = 1; | ||
| 51 | + offline_model_config.provider = provider; | ||
| 52 | + offline_model_config.tokens = tokens_filename; | ||
| 53 | + offline_model_config.canary.encoder = encoder_filename; | ||
| 54 | + offline_model_config.canary.decoder = decoder_filename; | ||
| 55 | + | ||
| 56 | + // so it output punctuations and cases | ||
| 57 | + offline_model_config.canary.use_pnc = 1; | ||
| 58 | + | ||
| 59 | + offline_model_config.canary.src_lang = "de"; | ||
| 60 | + | ||
| 61 | + // since there is a German audio, you can set tgt_lang to en or de | ||
| 62 | + offline_model_config.canary.tgt_lang = "en"; | ||
| 63 | + | ||
| 64 | + // Recognizer config | ||
| 65 | + SherpaOnnxOfflineRecognizerConfig recognizer_config; | ||
| 66 | + memset(&recognizer_config, 0, sizeof(recognizer_config)); | ||
| 67 | + recognizer_config.decoding_method = "greedy_search"; | ||
| 68 | + recognizer_config.model_config = offline_model_config; | ||
| 69 | + | ||
| 70 | + const SherpaOnnxOfflineRecognizer *recognizer = | ||
| 71 | + SherpaOnnxCreateOfflineRecognizer(&recognizer_config); | ||
| 72 | + | ||
| 73 | + if (recognizer == NULL) { | ||
| 74 | + fprintf(stderr, "Please check your config!\n"); | ||
| 75 | + | ||
| 76 | + SherpaOnnxFreeWave(wave); | ||
| 77 | + | ||
| 78 | + return -1; | ||
| 79 | + } | ||
| 80 | + | ||
| 81 | + const SherpaOnnxOfflineStream *stream = | ||
| 82 | + SherpaOnnxCreateOfflineStream(recognizer); | ||
| 83 | + | ||
| 84 | + SherpaOnnxAcceptWaveformOffline(stream, wave->sample_rate, wave->samples, | ||
| 85 | + wave->num_samples); | ||
| 86 | + SherpaOnnxDecodeOfflineStream(recognizer, stream); | ||
| 87 | + const SherpaOnnxOfflineRecognizerResult *result = | ||
| 88 | + SherpaOnnxGetOfflineStreamResult(stream); | ||
| 89 | + | ||
| 90 | + fprintf(stderr, "Decoded text (English): %s\n", result->text); | ||
| 91 | + | ||
| 92 | + SherpaOnnxDestroyOfflineRecognizerResult(result); | ||
| 93 | + SherpaOnnxDestroyOfflineStream(stream); | ||
| 94 | + | ||
| 95 | + // now output German text | ||
| 96 | + recognizer_config.model_config.canary.tgt_lang = "de"; | ||
| 97 | + SherpaOnnxOfflineRecognizerSetConfig(recognizer, &recognizer_config); | ||
| 98 | + | ||
| 99 | + stream = SherpaOnnxCreateOfflineStream(recognizer); | ||
| 100 | + | ||
| 101 | + SherpaOnnxAcceptWaveformOffline(stream, wave->sample_rate, wave->samples, | ||
| 102 | + wave->num_samples); | ||
| 103 | + SherpaOnnxDecodeOfflineStream(recognizer, stream); | ||
| 104 | + result = SherpaOnnxGetOfflineStreamResult(stream); | ||
| 105 | + | ||
| 106 | + fprintf(stderr, "Decoded text (German): %s\n", result->text); | ||
| 107 | + | ||
| 108 | + SherpaOnnxDestroyOfflineRecognizerResult(result); | ||
| 109 | + SherpaOnnxDestroyOfflineStream(stream); | ||
| 110 | + | ||
| 111 | + SherpaOnnxDestroyOfflineRecognizer(recognizer); | ||
| 112 | + SherpaOnnxFreeWave(wave); | ||
| 113 | + | ||
| 114 | + return 0; | ||
| 115 | +} |
| @@ -54,7 +54,7 @@ int32_t main() { | @@ -54,7 +54,7 @@ int32_t main() { | ||
| 54 | "DEV_T0000000000.wav"; | 54 | "DEV_T0000000000.wav"; |
| 55 | const char *model_filename = | 55 | const char *model_filename = |
| 56 | "sherpa-onnx-streaming-zipformer-ctc-multi-zh-hans-2023-12-13/" | 56 | "sherpa-onnx-streaming-zipformer-ctc-multi-zh-hans-2023-12-13/" |
| 57 | - "ctc-epoch-20-avg-1-chunk-16-left-128.int8.onnx"; | 57 | + "ctc-epoch-20-avg-1-chunk-16-left-128.onnx"; |
| 58 | const char *tokens_filename = | 58 | const char *tokens_filename = |
| 59 | "sherpa-onnx-streaming-zipformer-ctc-multi-zh-hans-2023-12-13/tokens.txt"; | 59 | "sherpa-onnx-streaming-zipformer-ctc-multi-zh-hans-2023-12-13/tokens.txt"; |
| 60 | const char *provider = "cpu"; | 60 | const char *provider = "cpu"; |
| @@ -27,6 +27,9 @@ target_link_libraries(moonshine-cxx-api sherpa-onnx-cxx-api) | @@ -27,6 +27,9 @@ target_link_libraries(moonshine-cxx-api sherpa-onnx-cxx-api) | ||
| 27 | add_executable(sense-voice-cxx-api ./sense-voice-cxx-api.cc) | 27 | add_executable(sense-voice-cxx-api ./sense-voice-cxx-api.cc) |
| 28 | target_link_libraries(sense-voice-cxx-api sherpa-onnx-cxx-api) | 28 | target_link_libraries(sense-voice-cxx-api sherpa-onnx-cxx-api) |
| 29 | 29 | ||
| 30 | +add_executable(nemo-canary-cxx-api ./nemo-canary-cxx-api.cc) | ||
| 31 | +target_link_libraries(nemo-canary-cxx-api sherpa-onnx-cxx-api) | ||
| 32 | + | ||
| 30 | if(SHERPA_ONNX_ENABLE_PORTAUDIO) | 33 | if(SHERPA_ONNX_ENABLE_PORTAUDIO) |
| 31 | add_executable(sense-voice-simulate-streaming-microphone-cxx-api | 34 | add_executable(sense-voice-simulate-streaming-microphone-cxx-api |
| 32 | ./sense-voice-simulate-streaming-microphone-cxx-api.cc | 35 | ./sense-voice-simulate-streaming-microphone-cxx-api.cc |
cxx-api-examples/nemo-canary-cxx-api.cc
0 → 100644
| 1 | +// cxx-api-examples/nemo-canary-cxx-api.cc | ||
| 2 | +// | ||
| 3 | +// Copyright (c) 2025 Xiaomi Corporation | ||
| 4 | + | ||
| 5 | +// | ||
| 6 | +// This file demonstrates how to use NeMo Canary models with | ||
| 7 | +// sherpa-onnx's C++ API. | ||
| 8 | +// | ||
| 9 | +// clang-format off | ||
| 10 | +// | ||
| 11 | +// wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8.tar.bz2 | ||
| 12 | +// tar xvf sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8.tar.bz2 | ||
| 13 | +// rm sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8.tar.bz2 | ||
| 14 | +// | ||
| 15 | +// clang-format on | ||
| 16 | +// | ||
| 17 | +// see https://k2-fsa.github.io/sherpa/onnx/nemo/canary.html | ||
| 18 | +// for details | ||
| 19 | + | ||
| 20 | +#include <chrono> // NOLINT | ||
| 21 | +#include <iostream> | ||
| 22 | +#include <string> | ||
| 23 | + | ||
| 24 | +#include "sherpa-onnx/c-api/cxx-api.h" | ||
| 25 | + | ||
| 26 | +int32_t main() { | ||
| 27 | + using namespace sherpa_onnx::cxx; // NOLINT | ||
| 28 | + OfflineRecognizerConfig config; | ||
| 29 | + | ||
| 30 | + config.model_config.canary.encoder = | ||
| 31 | + "sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8/encoder.int8.onnx"; | ||
| 32 | + config.model_config.canary.decoder = | ||
| 33 | + "sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8/decoder.int8.onnx"; | ||
| 34 | + | ||
| 35 | + // our input audio is German, so we set src_lang to "de" | ||
| 36 | + config.model_config.canary.src_lang = "de"; | ||
| 37 | + | ||
| 38 | + // we can set tgt_lang either to de or en in this specific case | ||
| 39 | + config.model_config.canary.tgt_lang = "en"; | ||
| 40 | + config.model_config.tokens = | ||
| 41 | + "sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8/tokens.txt"; | ||
| 42 | + | ||
| 43 | + config.model_config.num_threads = 1; | ||
| 44 | + | ||
| 45 | + std::cout << "Loading model\n"; | ||
| 46 | + OfflineRecognizer recognizer = OfflineRecognizer::Create(config); | ||
| 47 | + if (!recognizer.Get()) { | ||
| 48 | + std::cerr << "Please check your config\n"; | ||
| 49 | + return -1; | ||
| 50 | + } | ||
| 51 | + std::cout << "Loading model done\n"; | ||
| 52 | + | ||
| 53 | + std::string wave_filename = | ||
| 54 | + "./sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8/test_wavs/de.wav"; | ||
| 55 | + | ||
| 56 | + Wave wave = ReadWave(wave_filename); | ||
| 57 | + if (wave.samples.empty()) { | ||
| 58 | + std::cerr << "Failed to read: '" << wave_filename << "'\n"; | ||
| 59 | + return -1; | ||
| 60 | + } | ||
| 61 | + | ||
| 62 | + std::cout << "Start recognition\n"; | ||
| 63 | + const auto begin = std::chrono::steady_clock::now(); | ||
| 64 | + | ||
| 65 | + OfflineStream stream = recognizer.CreateStream(); | ||
| 66 | + stream.AcceptWaveform(wave.sample_rate, wave.samples.data(), | ||
| 67 | + wave.samples.size()); | ||
| 68 | + | ||
| 69 | + recognizer.Decode(&stream); | ||
| 70 | + | ||
| 71 | + OfflineRecognizerResult result = recognizer.GetResult(&stream); | ||
| 72 | + | ||
| 73 | + const auto end = std::chrono::steady_clock::now(); | ||
| 74 | + const float elapsed_seconds = | ||
| 75 | + std::chrono::duration_cast<std::chrono::milliseconds>(end - begin) | ||
| 76 | + .count() / | ||
| 77 | + 1000.; | ||
| 78 | + float duration = wave.samples.size() / static_cast<float>(wave.sample_rate); | ||
| 79 | + float rtf = elapsed_seconds / duration; | ||
| 80 | + | ||
| 81 | + std::cout << "text (English): " << result.text << "\n"; | ||
| 82 | + printf("Number of threads: %d\n", config.model_config.num_threads); | ||
| 83 | + printf("Duration: %.3fs\n", duration); | ||
| 84 | + printf("Elapsed seconds: %.3fs\n", elapsed_seconds); | ||
| 85 | + printf("(Real time factor) RTF = %.3f / %.3f = %.3f\n", elapsed_seconds, | ||
| 86 | + duration, rtf); | ||
| 87 | + | ||
| 88 | + // now output text in German | ||
| 89 | + config.model_config.canary.tgt_lang = "de"; | ||
| 90 | + recognizer.SetConfig(config); | ||
| 91 | + stream = recognizer.CreateStream(); | ||
| 92 | + stream.AcceptWaveform(wave.sample_rate, wave.samples.data(), | ||
| 93 | + wave.samples.size()); | ||
| 94 | + | ||
| 95 | + recognizer.Decode(&stream); | ||
| 96 | + | ||
| 97 | + result = recognizer.GetResult(&stream); | ||
| 98 | + std::cout << "text (German): " << result.text << "\n"; | ||
| 99 | + | ||
| 100 | + return 0; | ||
| 101 | +} |
| @@ -7,6 +7,7 @@ export { Samples, | @@ -7,6 +7,7 @@ export { Samples, | ||
| 7 | OfflineStream, | 7 | OfflineStream, |
| 8 | FeatureConfig, | 8 | FeatureConfig, |
| 9 | HomophoneReplacerConfig, | 9 | HomophoneReplacerConfig, |
| 10 | + OfflineCanaryModelConfig, | ||
| 10 | OfflineDolphinModelConfig, | 11 | OfflineDolphinModelConfig, |
| 11 | OfflineTransducerModelConfig, | 12 | OfflineTransducerModelConfig, |
| 12 | OfflineParaformerModelConfig, | 13 | OfflineParaformerModelConfig, |
| @@ -93,6 +93,27 @@ static SherpaOnnxOfflineNemoEncDecCtcModelConfig GetOfflineNeMoCtcModelConfig( | @@ -93,6 +93,27 @@ static SherpaOnnxOfflineNemoEncDecCtcModelConfig GetOfflineNeMoCtcModelConfig( | ||
| 93 | return c; | 93 | return c; |
| 94 | } | 94 | } |
| 95 | 95 | ||
| 96 | +static SherpaOnnxOfflineCanaryModelConfig GetOfflineCanaryModelConfig( | ||
| 97 | + Napi::Object obj) { | ||
| 98 | + SherpaOnnxOfflineCanaryModelConfig c; | ||
| 99 | + memset(&c, 0, sizeof(c)); | ||
| 100 | + c.use_pnc = 1; // Align default with JS default | ||
| 101 | + | ||
| 102 | + if (!obj.Has("canary") || !obj.Get("canary").IsObject()) { | ||
| 103 | + return c; | ||
| 104 | + } | ||
| 105 | + | ||
| 106 | + Napi::Object o = obj.Get("canary").As<Napi::Object>(); | ||
| 107 | + | ||
| 108 | + SHERPA_ONNX_ASSIGN_ATTR_STR(encoder, encoder); | ||
| 109 | + SHERPA_ONNX_ASSIGN_ATTR_STR(decoder, decoder); | ||
| 110 | + SHERPA_ONNX_ASSIGN_ATTR_STR(src_lang, srcLang); | ||
| 111 | + SHERPA_ONNX_ASSIGN_ATTR_STR(tgt_lang, tgtLang); | ||
| 112 | + SHERPA_ONNX_ASSIGN_ATTR_INT32(use_pnc, usePnc); | ||
| 113 | + | ||
| 114 | + return c; | ||
| 115 | +} | ||
| 116 | + | ||
| 96 | static SherpaOnnxOfflineWhisperModelConfig GetOfflineWhisperModelConfig( | 117 | static SherpaOnnxOfflineWhisperModelConfig GetOfflineWhisperModelConfig( |
| 97 | Napi::Object obj) { | 118 | Napi::Object obj) { |
| 98 | SherpaOnnxOfflineWhisperModelConfig c; | 119 | SherpaOnnxOfflineWhisperModelConfig c; |
| @@ -203,6 +224,7 @@ static SherpaOnnxOfflineModelConfig GetOfflineModelConfig(Napi::Object obj) { | @@ -203,6 +224,7 @@ static SherpaOnnxOfflineModelConfig GetOfflineModelConfig(Napi::Object obj) { | ||
| 203 | c.fire_red_asr = GetOfflineFireRedAsrModelConfig(o); | 224 | c.fire_red_asr = GetOfflineFireRedAsrModelConfig(o); |
| 204 | c.dolphin = GetOfflineDolphinModelConfig(o); | 225 | c.dolphin = GetOfflineDolphinModelConfig(o); |
| 205 | c.zipformer_ctc = GetOfflineZipformerCtcModelConfig(o); | 226 | c.zipformer_ctc = GetOfflineZipformerCtcModelConfig(o); |
| 227 | + c.canary = GetOfflineCanaryModelConfig(o); | ||
| 206 | 228 | ||
| 207 | SHERPA_ONNX_ASSIGN_ATTR_STR(tokens, tokens); | 229 | SHERPA_ONNX_ASSIGN_ATTR_STR(tokens, tokens); |
| 208 | SHERPA_ONNX_ASSIGN_ATTR_INT32(num_threads, numThreads); | 230 | SHERPA_ONNX_ASSIGN_ATTR_INT32(num_threads, numThreads); |
| @@ -241,39 +263,7 @@ static SherpaOnnxOfflineLMConfig GetOfflineLMConfig(Napi::Object obj) { | @@ -241,39 +263,7 @@ static SherpaOnnxOfflineLMConfig GetOfflineLMConfig(Napi::Object obj) { | ||
| 241 | return c; | 263 | return c; |
| 242 | } | 264 | } |
| 243 | 265 | ||
| 244 | -static Napi::External<SherpaOnnxOfflineRecognizer> | ||
| 245 | -CreateOfflineRecognizerWrapper(const Napi::CallbackInfo &info) { | ||
| 246 | - Napi::Env env = info.Env(); | ||
| 247 | -#if __OHOS__ | ||
| 248 | - // the last argument is the NativeResourceManager | ||
| 249 | - if (info.Length() != 2) { | ||
| 250 | - std::ostringstream os; | ||
| 251 | - os << "Expect only 2 arguments. Given: " << info.Length(); | ||
| 252 | - | ||
| 253 | - Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException(); | ||
| 254 | - | ||
| 255 | - return {}; | ||
| 256 | - } | ||
| 257 | -#else | ||
| 258 | - if (info.Length() != 1) { | ||
| 259 | - std::ostringstream os; | ||
| 260 | - os << "Expect only 1 argument. Given: " << info.Length(); | ||
| 261 | - | ||
| 262 | - Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException(); | ||
| 263 | - | ||
| 264 | - return {}; | ||
| 265 | - } | ||
| 266 | -#endif | ||
| 267 | - | ||
| 268 | - if (!info[0].IsObject()) { | ||
| 269 | - Napi::TypeError::New(env, "Expect an object as the argument") | ||
| 270 | - .ThrowAsJavaScriptException(); | ||
| 271 | - | ||
| 272 | - return {}; | ||
| 273 | - } | ||
| 274 | - | ||
| 275 | - Napi::Object o = info[0].As<Napi::Object>(); | ||
| 276 | - | 266 | +static SherpaOnnxOfflineRecognizerConfig ParseConfig(Napi::Object o) { |
| 277 | SherpaOnnxOfflineRecognizerConfig c; | 267 | SherpaOnnxOfflineRecognizerConfig c; |
| 278 | memset(&c, 0, sizeof(c)); | 268 | memset(&c, 0, sizeof(c)); |
| 279 | c.feat_config = GetFeatureConfig(o); | 269 | c.feat_config = GetFeatureConfig(o); |
| @@ -289,19 +279,10 @@ CreateOfflineRecognizerWrapper(const Napi::CallbackInfo &info) { | @@ -289,19 +279,10 @@ CreateOfflineRecognizerWrapper(const Napi::CallbackInfo &info) { | ||
| 289 | SHERPA_ONNX_ASSIGN_ATTR_STR(rule_fars, ruleFars); | 279 | SHERPA_ONNX_ASSIGN_ATTR_STR(rule_fars, ruleFars); |
| 290 | SHERPA_ONNX_ASSIGN_ATTR_FLOAT(blank_penalty, blankPenalty); | 280 | SHERPA_ONNX_ASSIGN_ATTR_FLOAT(blank_penalty, blankPenalty); |
| 291 | 281 | ||
| 292 | -#if __OHOS__ | ||
| 293 | - std::unique_ptr<NativeResourceManager, | ||
| 294 | - decltype(&OH_ResourceManager_ReleaseNativeResourceManager)> | ||
| 295 | - mgr(OH_ResourceManager_InitNativeResourceManager(env, info[1]), | ||
| 296 | - &OH_ResourceManager_ReleaseNativeResourceManager); | ||
| 297 | - | ||
| 298 | - const SherpaOnnxOfflineRecognizer *recognizer = | ||
| 299 | - SherpaOnnxCreateOfflineRecognizerOHOS(&c, mgr.get()); | ||
| 300 | -#else | ||
| 301 | - const SherpaOnnxOfflineRecognizer *recognizer = | ||
| 302 | - SherpaOnnxCreateOfflineRecognizer(&c); | ||
| 303 | -#endif | 282 | + return c; |
| 283 | +} | ||
| 304 | 284 | ||
| 285 | +static void FreeConfig(const SherpaOnnxOfflineRecognizerConfig &c) { | ||
| 305 | SHERPA_ONNX_DELETE_C_STR(c.model_config.transducer.encoder); | 286 | SHERPA_ONNX_DELETE_C_STR(c.model_config.transducer.encoder); |
| 306 | SHERPA_ONNX_DELETE_C_STR(c.model_config.transducer.decoder); | 287 | SHERPA_ONNX_DELETE_C_STR(c.model_config.transducer.decoder); |
| 307 | SHERPA_ONNX_DELETE_C_STR(c.model_config.transducer.joiner); | 288 | SHERPA_ONNX_DELETE_C_STR(c.model_config.transducer.joiner); |
| @@ -331,6 +312,11 @@ CreateOfflineRecognizerWrapper(const Napi::CallbackInfo &info) { | @@ -331,6 +312,11 @@ CreateOfflineRecognizerWrapper(const Napi::CallbackInfo &info) { | ||
| 331 | SHERPA_ONNX_DELETE_C_STR(c.model_config.dolphin.model); | 312 | SHERPA_ONNX_DELETE_C_STR(c.model_config.dolphin.model); |
| 332 | SHERPA_ONNX_DELETE_C_STR(c.model_config.zipformer_ctc.model); | 313 | SHERPA_ONNX_DELETE_C_STR(c.model_config.zipformer_ctc.model); |
| 333 | 314 | ||
| 315 | + SHERPA_ONNX_DELETE_C_STR(c.model_config.canary.encoder); | ||
| 316 | + SHERPA_ONNX_DELETE_C_STR(c.model_config.canary.decoder); | ||
| 317 | + SHERPA_ONNX_DELETE_C_STR(c.model_config.canary.src_lang); | ||
| 318 | + SHERPA_ONNX_DELETE_C_STR(c.model_config.canary.tgt_lang); | ||
| 319 | + | ||
| 334 | SHERPA_ONNX_DELETE_C_STR(c.model_config.tokens); | 320 | SHERPA_ONNX_DELETE_C_STR(c.model_config.tokens); |
| 335 | SHERPA_ONNX_DELETE_C_STR(c.model_config.provider); | 321 | SHERPA_ONNX_DELETE_C_STR(c.model_config.provider); |
| 336 | SHERPA_ONNX_DELETE_C_STR(c.model_config.model_type); | 322 | SHERPA_ONNX_DELETE_C_STR(c.model_config.model_type); |
| @@ -347,6 +333,57 @@ CreateOfflineRecognizerWrapper(const Napi::CallbackInfo &info) { | @@ -347,6 +333,57 @@ CreateOfflineRecognizerWrapper(const Napi::CallbackInfo &info) { | ||
| 347 | SHERPA_ONNX_DELETE_C_STR(c.hr.dict_dir); | 333 | SHERPA_ONNX_DELETE_C_STR(c.hr.dict_dir); |
| 348 | SHERPA_ONNX_DELETE_C_STR(c.hr.lexicon); | 334 | SHERPA_ONNX_DELETE_C_STR(c.hr.lexicon); |
| 349 | SHERPA_ONNX_DELETE_C_STR(c.hr.rule_fsts); | 335 | SHERPA_ONNX_DELETE_C_STR(c.hr.rule_fsts); |
| 336 | +} | ||
| 337 | + | ||
| 338 | +static Napi::External<SherpaOnnxOfflineRecognizer> | ||
| 339 | +CreateOfflineRecognizerWrapper(const Napi::CallbackInfo &info) { | ||
| 340 | + Napi::Env env = info.Env(); | ||
| 341 | +#if __OHOS__ | ||
| 342 | + // the last argument is the NativeResourceManager | ||
| 343 | + if (info.Length() != 2) { | ||
| 344 | + std::ostringstream os; | ||
| 345 | + os << "Expect only 2 arguments. Given: " << info.Length(); | ||
| 346 | + | ||
| 347 | + Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException(); | ||
| 348 | + | ||
| 349 | + return {}; | ||
| 350 | + } | ||
| 351 | +#else | ||
| 352 | + if (info.Length() != 1) { | ||
| 353 | + std::ostringstream os; | ||
| 354 | + os << "Expect only 1 argument. Given: " << info.Length(); | ||
| 355 | + | ||
| 356 | + Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException(); | ||
| 357 | + | ||
| 358 | + return {}; | ||
| 359 | + } | ||
| 360 | +#endif | ||
| 361 | + | ||
| 362 | + if (!info[0].IsObject()) { | ||
| 363 | + Napi::TypeError::New(env, "Expect an object as the argument") | ||
| 364 | + .ThrowAsJavaScriptException(); | ||
| 365 | + | ||
| 366 | + return {}; | ||
| 367 | + } | ||
| 368 | + | ||
| 369 | + Napi::Object o = info[0].As<Napi::Object>(); | ||
| 370 | + | ||
| 371 | + SherpaOnnxOfflineRecognizerConfig c = ParseConfig(o); | ||
| 372 | + | ||
| 373 | +#if __OHOS__ | ||
| 374 | + std::unique_ptr<NativeResourceManager, | ||
| 375 | + decltype(&OH_ResourceManager_ReleaseNativeResourceManager)> | ||
| 376 | + mgr(OH_ResourceManager_InitNativeResourceManager(env, info[1]), | ||
| 377 | + &OH_ResourceManager_ReleaseNativeResourceManager); | ||
| 378 | + | ||
| 379 | + const SherpaOnnxOfflineRecognizer *recognizer = | ||
| 380 | + SherpaOnnxCreateOfflineRecognizerOHOS(&c, mgr.get()); | ||
| 381 | +#else | ||
| 382 | + const SherpaOnnxOfflineRecognizer *recognizer = | ||
| 383 | + SherpaOnnxCreateOfflineRecognizer(&c); | ||
| 384 | +#endif | ||
| 385 | + | ||
| 386 | + FreeConfig(c); | ||
| 350 | 387 | ||
| 351 | if (!recognizer) { | 388 | if (!recognizer) { |
| 352 | Napi::TypeError::New(env, "Please check your config!") | 389 | Napi::TypeError::New(env, "Please check your config!") |
| @@ -470,6 +507,43 @@ static void AcceptWaveformOfflineWrapper(const Napi::CallbackInfo &info) { | @@ -470,6 +507,43 @@ static void AcceptWaveformOfflineWrapper(const Napi::CallbackInfo &info) { | ||
| 470 | #endif | 507 | #endif |
| 471 | } | 508 | } |
| 472 | 509 | ||
| 510 | +static void OfflineRecognizerSetConfigWrapper(const Napi::CallbackInfo &info) { | ||
| 511 | + Napi::Env env = info.Env(); | ||
| 512 | + if (info.Length() != 2) { | ||
| 513 | + std::ostringstream os; | ||
| 514 | + os << "Expect only 2 arguments. Given: " << info.Length(); | ||
| 515 | + | ||
| 516 | + Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException(); | ||
| 517 | + | ||
| 518 | + return; | ||
| 519 | + } | ||
| 520 | + | ||
| 521 | + if (!info[0].IsExternal()) { | ||
| 522 | + Napi::TypeError::New(env, | ||
| 523 | + "Argument 0 should be an offline recognizer pointer.") | ||
| 524 | + .ThrowAsJavaScriptException(); | ||
| 525 | + | ||
| 526 | + return; | ||
| 527 | + } | ||
| 528 | + | ||
| 529 | + if (!info[1].IsObject()) { | ||
| 530 | + Napi::TypeError::New(env, "Expect an object as the second argument") | ||
| 531 | + .ThrowAsJavaScriptException(); | ||
| 532 | + | ||
| 533 | + return; | ||
| 534 | + } | ||
| 535 | + | ||
| 536 | + Napi::Object o = info[1].As<Napi::Object>(); | ||
| 537 | + SherpaOnnxOfflineRecognizerConfig c = ParseConfig(o); | ||
| 538 | + | ||
| 539 | + const SherpaOnnxOfflineRecognizer *recognizer = | ||
| 540 | + info[0].As<Napi::External<SherpaOnnxOfflineRecognizer>>().Data(); | ||
| 541 | + | ||
| 542 | + SherpaOnnxOfflineRecognizerSetConfig(recognizer, &c); | ||
| 543 | + | ||
| 544 | + FreeConfig(c); | ||
| 545 | +} | ||
| 546 | + | ||
| 473 | static void DecodeOfflineStreamWrapper(const Napi::CallbackInfo &info) { | 547 | static void DecodeOfflineStreamWrapper(const Napi::CallbackInfo &info) { |
| 474 | Napi::Env env = info.Env(); | 548 | Napi::Env env = info.Env(); |
| 475 | if (info.Length() != 2) { | 549 | if (info.Length() != 2) { |
| @@ -548,6 +622,9 @@ void InitNonStreamingAsr(Napi::Env env, Napi::Object exports) { | @@ -548,6 +622,9 @@ void InitNonStreamingAsr(Napi::Env env, Napi::Object exports) { | ||
| 548 | exports.Set(Napi::String::New(env, "decodeOfflineStream"), | 622 | exports.Set(Napi::String::New(env, "decodeOfflineStream"), |
| 549 | Napi::Function::New(env, DecodeOfflineStreamWrapper)); | 623 | Napi::Function::New(env, DecodeOfflineStreamWrapper)); |
| 550 | 624 | ||
| 625 | + exports.Set(Napi::String::New(env, "offlineRecognizerSetConfig"), | ||
| 626 | + Napi::Function::New(env, OfflineRecognizerSetConfigWrapper)); | ||
| 627 | + | ||
| 551 | exports.Set(Napi::String::New(env, "getOfflineStreamResultAsJson"), | 628 | exports.Set(Napi::String::New(env, "getOfflineStreamResultAsJson"), |
| 552 | Napi::Function::New(env, GetOfflineStreamResultAsJsonWrapper)); | 629 | Napi::Function::New(env, GetOfflineStreamResultAsJsonWrapper)); |
| 553 | } | 630 | } |
| @@ -22,6 +22,7 @@ export const voiceActivityDetectorFlush: (handle: object) => void; | @@ -22,6 +22,7 @@ export const voiceActivityDetectorFlush: (handle: object) => void; | ||
| 22 | 22 | ||
| 23 | export const createOfflineRecognizer: (config: object, mgr?: object) => object; | 23 | export const createOfflineRecognizer: (config: object, mgr?: object) => object; |
| 24 | export const createOfflineStream: (handle: object) => object; | 24 | export const createOfflineStream: (handle: object) => object; |
| 25 | +export const offlineRecognizerSetConfig: (handle: object, config: object) => void; | ||
| 25 | export const acceptWaveformOffline: (handle: object, audio: object) => void; | 26 | export const acceptWaveformOffline: (handle: object, audio: object) => void; |
| 26 | export const decodeOfflineStream: (handle: object, streamHandle: object) => void; | 27 | export const decodeOfflineStream: (handle: object, streamHandle: object) => void; |
| 27 | export const getOfflineStreamResultAsJson: (streamHandle: object) => string; | 28 | export const getOfflineStreamResultAsJson: (streamHandle: object) => string; |
| @@ -4,6 +4,7 @@ import { | @@ -4,6 +4,7 @@ import { | ||
| 4 | createOfflineStream, | 4 | createOfflineStream, |
| 5 | decodeOfflineStream, | 5 | decodeOfflineStream, |
| 6 | getOfflineStreamResultAsJson, | 6 | getOfflineStreamResultAsJson, |
| 7 | + offlineRecognizerSetConfig, | ||
| 7 | } from 'libsherpa_onnx.so'; | 8 | } from 'libsherpa_onnx.so'; |
| 8 | 9 | ||
| 9 | export interface Samples { | 10 | export interface Samples { |
| @@ -67,6 +68,14 @@ export class OfflineWhisperModelConfig { | @@ -67,6 +68,14 @@ export class OfflineWhisperModelConfig { | ||
| 67 | public tailPaddings: number = -1; | 68 | public tailPaddings: number = -1; |
| 68 | } | 69 | } |
| 69 | 70 | ||
| 71 | +export class OfflineCanaryModelConfig { | ||
| 72 | + public encoder: string = ''; | ||
| 73 | + public decoder: string = ''; | ||
| 74 | + public srcLang: string = ''; | ||
| 75 | + public tgtLang: string = ''; | ||
| 76 | + public usePnc: number = 1; | ||
| 77 | +} | ||
| 78 | + | ||
| 70 | export class OfflineTdnnModelConfig { | 79 | export class OfflineTdnnModelConfig { |
| 71 | public model: string = ''; | 80 | public model: string = ''; |
| 72 | } | 81 | } |
| @@ -102,6 +111,7 @@ export class OfflineModelConfig { | @@ -102,6 +111,7 @@ export class OfflineModelConfig { | ||
| 102 | public moonshine: OfflineMoonshineModelConfig = new OfflineMoonshineModelConfig(); | 111 | public moonshine: OfflineMoonshineModelConfig = new OfflineMoonshineModelConfig(); |
| 103 | public dolphin: OfflineDolphinModelConfig = new OfflineDolphinModelConfig(); | 112 | public dolphin: OfflineDolphinModelConfig = new OfflineDolphinModelConfig(); |
| 104 | public zipformerCtc: OfflineZipformerCtcModelConfig = new OfflineZipformerCtcModelConfig(); | 113 | public zipformerCtc: OfflineZipformerCtcModelConfig = new OfflineZipformerCtcModelConfig(); |
| 114 | + public canary: OfflineCanaryModelConfig = new OfflineCanaryModelConfig(); | ||
| 105 | } | 115 | } |
| 106 | 116 | ||
| 107 | export class OfflineLMConfig { | 117 | export class OfflineLMConfig { |
| @@ -151,6 +161,10 @@ export class OfflineRecognizer { | @@ -151,6 +161,10 @@ export class OfflineRecognizer { | ||
| 151 | this.config = config | 161 | this.config = config |
| 152 | } | 162 | } |
| 153 | 163 | ||
| 164 | + setConfig(config: OfflineRecognizerConfig) { | ||
| 165 | + offlineRecognizerSetConfig(this.handle, config); | ||
| 166 | + } | ||
| 167 | + | ||
| 154 | createStream(): OfflineStream { | 168 | createStream(): OfflineStream { |
| 155 | const handle: object = createOfflineStream(this.handle); | 169 | const handle: object = createOfflineStream(this.handle); |
| 156 | return new OfflineStream(handle); | 170 | return new OfflineStream(handle); |
| @@ -123,6 +123,7 @@ The following tables list the examples in this folder. | @@ -123,6 +123,7 @@ The following tables list the examples in this folder. | ||
| 123 | |[./test_asr_non_streaming_moonshine.js](./test_asr_non_streaming_moonshine.js)|Non-streaming speech recognition from a file using [Moonshine](https://github.com/usefulsensors/moonshine)| | 123 | |[./test_asr_non_streaming_moonshine.js](./test_asr_non_streaming_moonshine.js)|Non-streaming speech recognition from a file using [Moonshine](https://github.com/usefulsensors/moonshine)| |
| 124 | |[./test_vad_with_non_streaming_asr_moonshine.js](./test_vad_with_non_streaming_asr_moonshine.js)| Non-streaming speech recognition from a file using [Moonshine](https://github.com/usefulsensors/moonshine) + [Silero VAD](https://github.com/snakers4/silero-vad)| | 124 | |[./test_vad_with_non_streaming_asr_moonshine.js](./test_vad_with_non_streaming_asr_moonshine.js)| Non-streaming speech recognition from a file using [Moonshine](https://github.com/usefulsensors/moonshine) + [Silero VAD](https://github.com/snakers4/silero-vad)| |
| 125 | |[./test_asr_non_streaming_nemo_ctc.js](./test_asr_non_streaming_nemo_ctc.js)|Non-streaming speech recognition from a file using a [NeMo](https://github.com/NVIDIA/NeMo) CTC model with greedy search| | 125 | |[./test_asr_non_streaming_nemo_ctc.js](./test_asr_non_streaming_nemo_ctc.js)|Non-streaming speech recognition from a file using a [NeMo](https://github.com/NVIDIA/NeMo) CTC model with greedy search| |
| 126 | +|[./test_asr_non_streaming_nemo_canary.js](./test_asr_non_streaming_nemo_canary.js)|Non-streaming speech recognition from a file using a [NeMo](https://github.com/NVIDIA/NeMo) [Canary](https://k2-fsa.github.io/sherpa/onnx/nemo/canary.html#sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8-english-spanish-german-french) model| | ||
| 126 | |[./test_asr_non_streaming_zipformer_ctc.js](./test_asr_non_streaming_zipformer_ctc.js)|Non-streaming speech recognition from a file using a Zipformer CTC model with greedy search| | 127 | |[./test_asr_non_streaming_zipformer_ctc.js](./test_asr_non_streaming_zipformer_ctc.js)|Non-streaming speech recognition from a file using a Zipformer CTC model with greedy search| |
| 127 | |[./test_asr_non_streaming_nemo_parakeet_tdt_v2.js](./test_asr_non_streaming_nemo_parakeet_tdt_v2.js)|Non-streaming speech recognition from a file using a [NeMo](https://github.com/NVIDIA/NeMo) [parakeet-tdt-0.6b-v2](https://k2-fsa.github.io/sherpa/onnx/pretrained_models/offline-transducer/nemo-transducer-models.html#sherpa-onnx-nemo-parakeet-tdt-0-6b-v2-int8-english) model with greedy search| | 128 | |[./test_asr_non_streaming_nemo_parakeet_tdt_v2.js](./test_asr_non_streaming_nemo_parakeet_tdt_v2.js)|Non-streaming speech recognition from a file using a [NeMo](https://github.com/NVIDIA/NeMo) [parakeet-tdt-0.6b-v2](https://k2-fsa.github.io/sherpa/onnx/pretrained_models/offline-transducer/nemo-transducer-models.html#sherpa-onnx-nemo-parakeet-tdt-0-6b-v2-int8-english) model with greedy search| |
| 128 | |[./test_asr_non_streaming_dolphin_ctc.js](./test_asr_non_streaming_dolphin_ctc.js)|Non-streaming speech recognition from a file using a [Dolphinhttps://github.com/DataoceanAI/Dolphin]) CTC model with greedy search| | 129 | |[./test_asr_non_streaming_dolphin_ctc.js](./test_asr_non_streaming_dolphin_ctc.js)|Non-streaming speech recognition from a file using a [Dolphinhttps://github.com/DataoceanAI/Dolphin]) CTC model with greedy search| |
| @@ -389,6 +390,16 @@ npm install naudiodon2 | @@ -389,6 +390,16 @@ npm install naudiodon2 | ||
| 389 | node ./test_vad_asr_non_streaming_zipformer_ctc_microphone.js | 390 | node ./test_vad_asr_non_streaming_zipformer_ctc_microphone.js |
| 390 | ``` | 391 | ``` |
| 391 | 392 | ||
| 393 | +### Non-streaming speech recognition with NeMo Canary models | ||
| 394 | + | ||
| 395 | +```bash | ||
| 396 | +wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8.tar.bz2 | ||
| 397 | +tar xvf sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8.tar.bz2 | ||
| 398 | +rm sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8.tar.bz2 | ||
| 399 | + | ||
| 400 | +node ./test_asr_non_streaming_nemo_canary.js | ||
| 401 | +``` | ||
| 402 | + | ||
| 392 | ### Non-streaming speech recognition with NeMo CTC models | 403 | ### Non-streaming speech recognition with NeMo CTC models |
| 393 | 404 | ||
| 394 | ```bash | 405 | ```bash |
| 1 | +// Copyright (c) 2024 Xiaomi Corporation | ||
| 2 | +const sherpa_onnx = require('sherpa-onnx-node'); | ||
| 3 | + | ||
| 4 | +// Please download test files from | ||
| 5 | +// https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models | ||
| 6 | +const config = { | ||
| 7 | + 'featConfig': { | ||
| 8 | + 'sampleRate': 16000, | ||
| 9 | + 'featureDim': 80, | ||
| 10 | + }, | ||
| 11 | + 'modelConfig': { | ||
| 12 | + 'canary': { | ||
| 13 | + 'encoder': | ||
| 14 | + './sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8/encoder.int8.onnx', | ||
| 15 | + 'decoder': | ||
| 16 | + './sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8/decoder.int8.onnx', | ||
| 17 | + 'srcLang': 'en', | ||
| 18 | + 'tgtLang': 'en', | ||
| 19 | + 'usePnc': 1, | ||
| 20 | + }, | ||
| 21 | + 'tokens': | ||
| 22 | + './sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8/tokens.txt', | ||
| 23 | + 'numThreads': 2, | ||
| 24 | + 'provider': 'cpu', | ||
| 25 | + 'debug': 0, | ||
| 26 | + } | ||
| 27 | +}; | ||
| 28 | + | ||
| 29 | +const waveFilename = | ||
| 30 | + './sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8/test_wavs/en.wav'; | ||
| 31 | + | ||
| 32 | +const recognizer = new sherpa_onnx.OfflineRecognizer(config); | ||
| 33 | +console.log('Started') | ||
| 34 | +let start = Date.now(); | ||
| 35 | +let stream = recognizer.createStream(); | ||
| 36 | +const wave = sherpa_onnx.readWave(waveFilename); | ||
| 37 | +stream.acceptWaveform({sampleRate: wave.sampleRate, samples: wave.samples}); | ||
| 38 | + | ||
| 39 | +recognizer.decode(stream); | ||
| 40 | +let result = recognizer.getResult(stream) | ||
| 41 | +let stop = Date.now(); | ||
| 42 | +console.log('Done') | ||
| 43 | + | ||
| 44 | +const elapsed_seconds = (stop - start) / 1000; | ||
| 45 | +const duration = wave.samples.length / wave.sampleRate; | ||
| 46 | +const real_time_factor = elapsed_seconds / duration; | ||
| 47 | +console.log('Wave duration', duration.toFixed(3), 'seconds') | ||
| 48 | +console.log('Elapsed', elapsed_seconds.toFixed(3), 'seconds') | ||
| 49 | +console.log( | ||
| 50 | + `RTF = ${elapsed_seconds.toFixed(3)}/${duration.toFixed(3)} =`, | ||
| 51 | + real_time_factor.toFixed(3)) | ||
| 52 | +console.log(waveFilename) | ||
| 53 | +console.log('result (English)\n', result) | ||
| 54 | + | ||
| 55 | +stream = recognizer.createStream(); | ||
| 56 | +stream.acceptWaveform({sampleRate: wave.sampleRate, samples: wave.samples}); | ||
| 57 | +recognizer.config.modelConfig.canary.tgtLang = 'de'; | ||
| 58 | +recognizer.setConfig(recognizer.config); | ||
| 59 | + | ||
| 60 | +recognizer.decode(stream); | ||
| 61 | +result = recognizer.getResult(stream) | ||
| 62 | +console.log('result (German)\n', result) |
| @@ -63,7 +63,7 @@ for text-to-speech. | @@ -63,7 +63,7 @@ for text-to-speech. | ||
| 63 | You can use the following command to run it: | 63 | You can use the following command to run it: |
| 64 | 64 | ||
| 65 | ```bash | 65 | ```bash |
| 66 | -wget https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/kokoro-en-v0_19.tar.bz2 | 66 | +curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/kokoro-en-v0_19.tar.bz2 |
| 67 | tar xf kokoro-en-v0_19.tar.bz2 | 67 | tar xf kokoro-en-v0_19.tar.bz2 |
| 68 | rm kokoro-en-v0_19.tar.bz2 | 68 | rm kokoro-en-v0_19.tar.bz2 |
| 69 | 69 | ||
| @@ -154,6 +154,22 @@ rm sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02.tar.bz2 | @@ -154,6 +154,22 @@ rm sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02.tar.bz2 | ||
| 154 | node ./test-offline-dolphin-ctc.js | 154 | node ./test-offline-dolphin-ctc.js |
| 155 | ``` | 155 | ``` |
| 156 | 156 | ||
| 157 | +## ./test-offline-nemo-canary.js | ||
| 158 | + | ||
| 159 | +[./test-offline-nemo-canary.js](./test-offline-nemo-canary.js) demonstrates | ||
| 160 | +how to decode a file with a NeMo Canary model. In the code we use | ||
| 161 | +[sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8](https://k2-fsa.github.io/sherpa/onnx/nemo/canary.html#sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8-english-spanish-german-french). | ||
| 162 | + | ||
| 163 | +You can use the following command to run it: | ||
| 164 | + | ||
| 165 | +```bash | ||
| 166 | +wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8.tar.bz2 | ||
| 167 | +tar xvf sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8.tar.bz2 | ||
| 168 | +rm sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8.tar.bz2 | ||
| 169 | + | ||
| 170 | +node ./test-offline-nemo-canary.js | ||
| 171 | +``` | ||
| 172 | + | ||
| 157 | ## ./test-offline-zipformer-ctc.js | 173 | ## ./test-offline-zipformer-ctc.js |
| 158 | 174 | ||
| 159 | [./test-offline-zipformer-ctc.js](./test-offline-zipformer-ctc.js) demonstrates | 175 | [./test-offline-zipformer-ctc.js](./test-offline-zipformer-ctc.js) demonstrates |
nodejs-examples/test-offline-nemo-canary.js
0 → 100644
| 1 | +// Copyright (c) 2025 Xiaomi Corporation (authors: Fangjun Kuang) | ||
| 2 | +// | ||
| 3 | +const fs = require('fs'); | ||
| 4 | +const {Readable} = require('stream'); | ||
| 5 | +const wav = require('wav'); | ||
| 6 | + | ||
| 7 | +const sherpa_onnx = require('sherpa-onnx'); | ||
| 8 | + | ||
| 9 | +function createOfflineRecognizer() { | ||
| 10 | + let config = { | ||
| 11 | + modelConfig: { | ||
| 12 | + canary: { | ||
| 13 | + encoder: | ||
| 14 | + './sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8/encoder.int8.onnx', | ||
| 15 | + decoder: | ||
| 16 | + './sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8/decoder.int8.onnx', | ||
| 17 | + srcLang: 'en', | ||
| 18 | + tgtLang: 'en', | ||
| 19 | + usePnc: 1, | ||
| 20 | + }, | ||
| 21 | + debug: 0, | ||
| 22 | + tokens: | ||
| 23 | + './sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8/tokens.txt', | ||
| 24 | + } | ||
| 25 | + }; | ||
| 26 | + | ||
| 27 | + return sherpa_onnx.createOfflineRecognizer(config); | ||
| 28 | +} | ||
| 29 | + | ||
| 30 | +const recognizer = createOfflineRecognizer(); | ||
| 31 | +let stream = recognizer.createStream(); | ||
| 32 | + | ||
| 33 | +const waveFilename = | ||
| 34 | + './sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8/test_wavs/en.wav'; | ||
| 35 | +const wave = sherpa_onnx.readWave(waveFilename); | ||
| 36 | +stream.acceptWaveform(wave.sampleRate, wave.samples); | ||
| 37 | + | ||
| 38 | +recognizer.decode(stream); | ||
| 39 | +let text = recognizer.getResult(stream).text; | ||
| 40 | +console.log(`text in English: ${text}`); | ||
| 41 | + | ||
| 42 | +stream.free(); | ||
| 43 | + | ||
| 44 | +// now output German text | ||
| 45 | +recognizer.config.modelConfig.canary.tgtLang = 'de'; | ||
| 46 | +recognizer.setConfig(recognizer.config); | ||
| 47 | + | ||
| 48 | +stream = recognizer.createStream(); | ||
| 49 | +stream.acceptWaveform(wave.sampleRate, wave.samples); | ||
| 50 | +recognizer.decode(stream); | ||
| 51 | +text = recognizer.getResult(stream).text; | ||
| 52 | + | ||
| 53 | +console.log(`text in German: ${text}`); | ||
| 54 | + | ||
| 55 | +stream.free(); | ||
| 56 | +recognizer.free(); |
| @@ -24,6 +24,10 @@ class OfflineRecognizer { | @@ -24,6 +24,10 @@ class OfflineRecognizer { | ||
| 24 | return new OfflineStream(handle); | 24 | return new OfflineStream(handle); |
| 25 | } | 25 | } |
| 26 | 26 | ||
| 27 | + setConfig(config) { | ||
| 28 | + addon.offlineRecognizerSetConfig(this.handle, config); | ||
| 29 | + } | ||
| 30 | + | ||
| 27 | decode(stream) { | 31 | decode(stream) { |
| 28 | addon.decodeOfflineStream(this.handle, stream.handle); | 32 | addon.decodeOfflineStream(this.handle, stream.handle); |
| 29 | } | 33 | } |
| @@ -487,6 +487,21 @@ static sherpa_onnx::OfflineRecognizerConfig GetOfflineRecognizerConfig( | @@ -487,6 +487,21 @@ static sherpa_onnx::OfflineRecognizerConfig GetOfflineRecognizerConfig( | ||
| 487 | recognizer_config.model_config.zipformer_ctc.model = | 487 | recognizer_config.model_config.zipformer_ctc.model = |
| 488 | SHERPA_ONNX_OR(config->model_config.zipformer_ctc.model, ""); | 488 | SHERPA_ONNX_OR(config->model_config.zipformer_ctc.model, ""); |
| 489 | 489 | ||
| 490 | + recognizer_config.model_config.canary.encoder = | ||
| 491 | + SHERPA_ONNX_OR(config->model_config.canary.encoder, ""); | ||
| 492 | + | ||
| 493 | + recognizer_config.model_config.canary.decoder = | ||
| 494 | + SHERPA_ONNX_OR(config->model_config.canary.decoder, ""); | ||
| 495 | + | ||
| 496 | + recognizer_config.model_config.canary.src_lang = | ||
| 497 | + SHERPA_ONNX_OR(config->model_config.canary.src_lang, ""); | ||
| 498 | + | ||
| 499 | + recognizer_config.model_config.canary.tgt_lang = | ||
| 500 | + SHERPA_ONNX_OR(config->model_config.canary.tgt_lang, ""); | ||
| 501 | + | ||
| 502 | + recognizer_config.model_config.canary.use_pnc = | ||
| 503 | + config->model_config.canary.use_pnc; | ||
| 504 | + | ||
| 490 | recognizer_config.lm_config.model = | 505 | recognizer_config.lm_config.model = |
| 491 | SHERPA_ONNX_OR(config->lm_config.model, ""); | 506 | SHERPA_ONNX_OR(config->lm_config.model, ""); |
| 492 | recognizer_config.lm_config.scale = | 507 | recognizer_config.lm_config.scale = |
| @@ -420,6 +420,14 @@ SHERPA_ONNX_API typedef struct SherpaOnnxOfflineWhisperModelConfig { | @@ -420,6 +420,14 @@ SHERPA_ONNX_API typedef struct SherpaOnnxOfflineWhisperModelConfig { | ||
| 420 | int32_t tail_paddings; | 420 | int32_t tail_paddings; |
| 421 | } SherpaOnnxOfflineWhisperModelConfig; | 421 | } SherpaOnnxOfflineWhisperModelConfig; |
| 422 | 422 | ||
| 423 | +SHERPA_ONNX_API typedef struct SherpaOnnxOfflineCanaryModelConfig { | ||
| 424 | + const char *encoder; | ||
| 425 | + const char *decoder; | ||
| 426 | + const char *src_lang; | ||
| 427 | + const char *tgt_lang; | ||
| 428 | + int32_t use_pnc; | ||
| 429 | +} SherpaOnnxOfflineCanaryModelConfig; | ||
| 430 | + | ||
| 423 | SHERPA_ONNX_API typedef struct SherpaOnnxOfflineFireRedAsrModelConfig { | 431 | SHERPA_ONNX_API typedef struct SherpaOnnxOfflineFireRedAsrModelConfig { |
| 424 | const char *encoder; | 432 | const char *encoder; |
| 425 | const char *decoder; | 433 | const char *decoder; |
| @@ -479,6 +487,7 @@ SHERPA_ONNX_API typedef struct SherpaOnnxOfflineModelConfig { | @@ -479,6 +487,7 @@ SHERPA_ONNX_API typedef struct SherpaOnnxOfflineModelConfig { | ||
| 479 | SherpaOnnxOfflineFireRedAsrModelConfig fire_red_asr; | 487 | SherpaOnnxOfflineFireRedAsrModelConfig fire_red_asr; |
| 480 | SherpaOnnxOfflineDolphinModelConfig dolphin; | 488 | SherpaOnnxOfflineDolphinModelConfig dolphin; |
| 481 | SherpaOnnxOfflineZipformerCtcModelConfig zipformer_ctc; | 489 | SherpaOnnxOfflineZipformerCtcModelConfig zipformer_ctc; |
| 490 | + SherpaOnnxOfflineCanaryModelConfig canary; | ||
| 482 | } SherpaOnnxOfflineModelConfig; | 491 | } SherpaOnnxOfflineModelConfig; |
| 483 | 492 | ||
| 484 | SHERPA_ONNX_API typedef struct SherpaOnnxOfflineRecognizerConfig { | 493 | SHERPA_ONNX_API typedef struct SherpaOnnxOfflineRecognizerConfig { |
| @@ -193,7 +193,7 @@ void OfflineStream::AcceptWaveform(int32_t sample_rate, const float *samples, | @@ -193,7 +193,7 @@ void OfflineStream::AcceptWaveform(int32_t sample_rate, const float *samples, | ||
| 193 | SherpaOnnxAcceptWaveformOffline(p_, sample_rate, samples, n); | 193 | SherpaOnnxAcceptWaveformOffline(p_, sample_rate, samples, n); |
| 194 | } | 194 | } |
| 195 | 195 | ||
| 196 | -OfflineRecognizer OfflineRecognizer::Create( | 196 | +static SherpaOnnxOfflineRecognizerConfig Convert( |
| 197 | const OfflineRecognizerConfig &config) { | 197 | const OfflineRecognizerConfig &config) { |
| 198 | struct SherpaOnnxOfflineRecognizerConfig c; | 198 | struct SherpaOnnxOfflineRecognizerConfig c; |
| 199 | memset(&c, 0, sizeof(c)); | 199 | memset(&c, 0, sizeof(c)); |
| @@ -256,6 +256,12 @@ OfflineRecognizer OfflineRecognizer::Create( | @@ -256,6 +256,12 @@ OfflineRecognizer OfflineRecognizer::Create( | ||
| 256 | c.model_config.zipformer_ctc.model = | 256 | c.model_config.zipformer_ctc.model = |
| 257 | config.model_config.zipformer_ctc.model.c_str(); | 257 | config.model_config.zipformer_ctc.model.c_str(); |
| 258 | 258 | ||
| 259 | + c.model_config.canary.encoder = config.model_config.canary.encoder.c_str(); | ||
| 260 | + c.model_config.canary.decoder = config.model_config.canary.decoder.c_str(); | ||
| 261 | + c.model_config.canary.src_lang = config.model_config.canary.src_lang.c_str(); | ||
| 262 | + c.model_config.canary.tgt_lang = config.model_config.canary.tgt_lang.c_str(); | ||
| 263 | + c.model_config.canary.use_pnc = config.model_config.canary.use_pnc; | ||
| 264 | + | ||
| 259 | c.lm_config.model = config.lm_config.model.c_str(); | 265 | c.lm_config.model = config.lm_config.model.c_str(); |
| 260 | c.lm_config.scale = config.lm_config.scale; | 266 | c.lm_config.scale = config.lm_config.scale; |
| 261 | 267 | ||
| @@ -273,10 +279,22 @@ OfflineRecognizer OfflineRecognizer::Create( | @@ -273,10 +279,22 @@ OfflineRecognizer OfflineRecognizer::Create( | ||
| 273 | c.hr.lexicon = config.hr.lexicon.c_str(); | 279 | c.hr.lexicon = config.hr.lexicon.c_str(); |
| 274 | c.hr.rule_fsts = config.hr.rule_fsts.c_str(); | 280 | c.hr.rule_fsts = config.hr.rule_fsts.c_str(); |
| 275 | 281 | ||
| 282 | + return c; | ||
| 283 | +} | ||
| 284 | + | ||
| 285 | +OfflineRecognizer OfflineRecognizer::Create( | ||
| 286 | + const OfflineRecognizerConfig &config) { | ||
| 287 | + auto c = Convert(config); | ||
| 288 | + | ||
| 276 | auto p = SherpaOnnxCreateOfflineRecognizer(&c); | 289 | auto p = SherpaOnnxCreateOfflineRecognizer(&c); |
| 277 | return OfflineRecognizer(p); | 290 | return OfflineRecognizer(p); |
| 278 | } | 291 | } |
| 279 | 292 | ||
| 293 | +void OfflineRecognizer::SetConfig(const OfflineRecognizerConfig &config) const { | ||
| 294 | + auto c = Convert(config); | ||
| 295 | + SherpaOnnxOfflineRecognizerSetConfig(p_, &c); | ||
| 296 | +} | ||
| 297 | + | ||
| 280 | OfflineRecognizer::OfflineRecognizer(const SherpaOnnxOfflineRecognizer *p) | 298 | OfflineRecognizer::OfflineRecognizer(const SherpaOnnxOfflineRecognizer *p) |
| 281 | : MoveOnly<OfflineRecognizer, SherpaOnnxOfflineRecognizer>(p) {} | 299 | : MoveOnly<OfflineRecognizer, SherpaOnnxOfflineRecognizer>(p) {} |
| 282 | 300 |
| @@ -223,6 +223,14 @@ struct SHERPA_ONNX_API OfflineWhisperModelConfig { | @@ -223,6 +223,14 @@ struct SHERPA_ONNX_API OfflineWhisperModelConfig { | ||
| 223 | int32_t tail_paddings = -1; | 223 | int32_t tail_paddings = -1; |
| 224 | }; | 224 | }; |
| 225 | 225 | ||
| 226 | +struct SHERPA_ONNX_API OfflineCanaryModelConfig { | ||
| 227 | + std::string encoder; | ||
| 228 | + std::string decoder; | ||
| 229 | + std::string src_lang; | ||
| 230 | + std::string tgt_lang; | ||
| 231 | + bool use_pnc = true; | ||
| 232 | +}; | ||
| 233 | + | ||
| 226 | struct SHERPA_ONNX_API OfflineFireRedAsrModelConfig { | 234 | struct SHERPA_ONNX_API OfflineFireRedAsrModelConfig { |
| 227 | std::string encoder; | 235 | std::string encoder; |
| 228 | std::string decoder; | 236 | std::string decoder; |
| @@ -273,6 +281,7 @@ struct SHERPA_ONNX_API OfflineModelConfig { | @@ -273,6 +281,7 @@ struct SHERPA_ONNX_API OfflineModelConfig { | ||
| 273 | OfflineFireRedAsrModelConfig fire_red_asr; | 281 | OfflineFireRedAsrModelConfig fire_red_asr; |
| 274 | OfflineDolphinModelConfig dolphin; | 282 | OfflineDolphinModelConfig dolphin; |
| 275 | OfflineZipformerCtcModelConfig zipformer_ctc; | 283 | OfflineZipformerCtcModelConfig zipformer_ctc; |
| 284 | + OfflineCanaryModelConfig canary; | ||
| 276 | }; | 285 | }; |
| 277 | 286 | ||
| 278 | struct SHERPA_ONNX_API OfflineLMConfig { | 287 | struct SHERPA_ONNX_API OfflineLMConfig { |
| @@ -335,6 +344,8 @@ class SHERPA_ONNX_API OfflineRecognizer | @@ -335,6 +344,8 @@ class SHERPA_ONNX_API OfflineRecognizer | ||
| 335 | 344 | ||
| 336 | OfflineRecognizerResult GetResult(const OfflineStream *s) const; | 345 | OfflineRecognizerResult GetResult(const OfflineStream *s) const; |
| 337 | 346 | ||
| 347 | + void SetConfig(const OfflineRecognizerConfig &config) const; | ||
| 348 | + | ||
| 338 | private: | 349 | private: |
| 339 | explicit OfflineRecognizer(const SherpaOnnxOfflineRecognizer *p); | 350 | explicit OfflineRecognizer(const SherpaOnnxOfflineRecognizer *p); |
| 340 | }; | 351 | }; |
| @@ -45,7 +45,7 @@ Usage: | @@ -45,7 +45,7 @@ Usage: | ||
| 45 | 45 | ||
| 46 | ./bin/sherpa-onnx \ | 46 | ./bin/sherpa-onnx \ |
| 47 | --debug=1 \ | 47 | --debug=1 \ |
| 48 | - --zipformer2-ctc-model=./sherpa-onnx-streaming-zipformer-ctc-multi-zh-hans-2023-12-13/ctc-epoch-20-avg-1-chunk-16-left-128.int8.onnx \ | 48 | + --zipformer2-ctc-model=./sherpa-onnx-streaming-zipformer-ctc-multi-zh-hans-2023-12-13/ctc-epoch-20-avg-1-chunk-16-left-128.onnx \ |
| 49 | --tokens=./sherpa-onnx-streaming-zipformer-ctc-multi-zh-hans-2023-12-13/tokens.txt \ | 49 | --tokens=./sherpa-onnx-streaming-zipformer-ctc-multi-zh-hans-2023-12-13/tokens.txt \ |
| 50 | ./sherpa-onnx-streaming-zipformer-ctc-multi-zh-hans-2023-12-13/test_wavs/DEV_T0000000000.wav \ | 50 | ./sherpa-onnx-streaming-zipformer-ctc-multi-zh-hans-2023-12-13/test_wavs/DEV_T0000000000.wav \ |
| 51 | ./sherpa-onnx-streaming-zipformer-ctc-multi-zh-hans-2023-12-13/test_wavs/DEV_T0000000001.wav \ | 51 | ./sherpa-onnx-streaming-zipformer-ctc-multi-zh-hans-2023-12-13/test_wavs/DEV_T0000000001.wav \ |
| @@ -12,7 +12,6 @@ set(exported_functions | @@ -12,7 +12,6 @@ set(exported_functions | ||
| 12 | SherpaOnnxCreateOnlineRecognizer | 12 | SherpaOnnxCreateOnlineRecognizer |
| 13 | SherpaOnnxCreateOnlineStream | 13 | SherpaOnnxCreateOnlineStream |
| 14 | SherpaOnnxDecodeOnlineStream | 14 | SherpaOnnxDecodeOnlineStream |
| 15 | - SherpaOnnxDestroyOfflineStreamResultJson | ||
| 16 | SherpaOnnxDestroyOnlineRecognizer | 15 | SherpaOnnxDestroyOnlineRecognizer |
| 17 | SherpaOnnxDestroyOnlineRecognizerResult | 16 | SherpaOnnxDestroyOnlineRecognizerResult |
| 18 | SherpaOnnxDestroyOnlineStream | 17 | SherpaOnnxDestroyOnlineStream |
| @@ -59,6 +59,10 @@ function freeConfig(config, Module) { | @@ -59,6 +59,10 @@ function freeConfig(config, Module) { | ||
| 59 | freeConfig(config.senseVoice, Module) | 59 | freeConfig(config.senseVoice, Module) |
| 60 | } | 60 | } |
| 61 | 61 | ||
| 62 | + if ('canary' in config) { | ||
| 63 | + freeConfig(config.canary, Module) | ||
| 64 | + } | ||
| 65 | + | ||
| 62 | if ('lm' in config) { | 66 | if ('lm' in config) { |
| 63 | freeConfig(config.lm, Module) | 67 | freeConfig(config.lm, Module) |
| 64 | } | 68 | } |
| @@ -246,7 +250,7 @@ function initSherpaOnnxOnlineModelConfig(config, Module) { | @@ -246,7 +250,7 @@ function initSherpaOnnxOnlineModelConfig(config, Module) { | ||
| 246 | Module.setValue(ptr + offset, buffer + tokensLen, 'i8*'); // provider | 250 | Module.setValue(ptr + offset, buffer + tokensLen, 'i8*'); // provider |
| 247 | offset += 4; | 251 | offset += 4; |
| 248 | 252 | ||
| 249 | - Module.setValue(ptr + offset, config.debug || 0, 'i32'); | 253 | + Module.setValue(ptr + offset, config.debug ?? 1, 'i32'); |
| 250 | offset += 4; | 254 | offset += 4; |
| 251 | 255 | ||
| 252 | Module.setValue( | 256 | Module.setValue( |
| @@ -692,6 +696,51 @@ function initSherpaOnnxOfflineWhisperModelConfig(config, Module) { | @@ -692,6 +696,51 @@ function initSherpaOnnxOfflineWhisperModelConfig(config, Module) { | ||
| 692 | } | 696 | } |
| 693 | } | 697 | } |
| 694 | 698 | ||
| 699 | +function initSherpaOnnxOfflineCanaryModelConfig(config, Module) { | ||
| 700 | + const encoderLen = Module.lengthBytesUTF8(config.encoder || '') + 1; | ||
| 701 | + const decoderLen = Module.lengthBytesUTF8(config.decoder || '') + 1; | ||
| 702 | + const srcLangLen = Module.lengthBytesUTF8(config.srcLang || '') + 1; | ||
| 703 | + const tgtLangLen = Module.lengthBytesUTF8(config.tgtLang || '') + 1; | ||
| 704 | + | ||
| 705 | + const n = encoderLen + decoderLen + srcLangLen + tgtLangLen; | ||
| 706 | + const buffer = Module._malloc(n); | ||
| 707 | + | ||
| 708 | + const len = 5 * 4; // 4 pointers + 1 int32 | ||
| 709 | + const ptr = Module._malloc(len); | ||
| 710 | + | ||
| 711 | + let offset = 0; | ||
| 712 | + Module.stringToUTF8(config.encoder || '', buffer + offset, encoderLen); | ||
| 713 | + offset += encoderLen; | ||
| 714 | + | ||
| 715 | + Module.stringToUTF8(config.decoder || '', buffer + offset, decoderLen); | ||
| 716 | + offset += decoderLen; | ||
| 717 | + | ||
| 718 | + Module.stringToUTF8(config.srcLang || '', buffer + offset, srcLangLen); | ||
| 719 | + offset += srcLangLen; | ||
| 720 | + | ||
| 721 | + Module.stringToUTF8(config.tgtLang || '', buffer + offset, tgtLangLen); | ||
| 722 | + offset += tgtLangLen; | ||
| 723 | + | ||
| 724 | + offset = 0; | ||
| 725 | + Module.setValue(ptr, buffer + offset, 'i8*'); | ||
| 726 | + offset += encoderLen; | ||
| 727 | + | ||
| 728 | + Module.setValue(ptr + 4, buffer + offset, 'i8*'); | ||
| 729 | + offset += decoderLen; | ||
| 730 | + | ||
| 731 | + Module.setValue(ptr + 8, buffer + offset, 'i8*'); | ||
| 732 | + offset += srcLangLen; | ||
| 733 | + | ||
| 734 | + Module.setValue(ptr + 12, buffer + offset, 'i8*'); | ||
| 735 | + offset += tgtLangLen; | ||
| 736 | + | ||
| 737 | + Module.setValue(ptr + 16, config.usePnc ?? 1, 'i32'); | ||
| 738 | + | ||
| 739 | + return { | ||
| 740 | + buffer: buffer, ptr: ptr, len: len, | ||
| 741 | + } | ||
| 742 | +} | ||
| 743 | + | ||
| 695 | function initSherpaOnnxOfflineMoonshineModelConfig(config, Module) { | 744 | function initSherpaOnnxOfflineMoonshineModelConfig(config, Module) { |
| 696 | const preprocessorLen = Module.lengthBytesUTF8(config.preprocessor || '') + 1; | 745 | const preprocessorLen = Module.lengthBytesUTF8(config.preprocessor || '') + 1; |
| 697 | const encoderLen = Module.lengthBytesUTF8(config.encoder || '') + 1; | 746 | const encoderLen = Module.lengthBytesUTF8(config.encoder || '') + 1; |
| @@ -811,7 +860,7 @@ function initSherpaOnnxOfflineSenseVoiceModelConfig(config, Module) { | @@ -811,7 +860,7 @@ function initSherpaOnnxOfflineSenseVoiceModelConfig(config, Module) { | ||
| 811 | Module.setValue(ptr + 4, buffer + offset, 'i8*'); | 860 | Module.setValue(ptr + 4, buffer + offset, 'i8*'); |
| 812 | offset += languageLen; | 861 | offset += languageLen; |
| 813 | 862 | ||
| 814 | - Module.setValue(ptr + 8, config.useInverseTextNormalization || 0, 'i32'); | 863 | + Module.setValue(ptr + 8, config.useInverseTextNormalization ?? 0, 'i32'); |
| 815 | 864 | ||
| 816 | return { | 865 | return { |
| 817 | buffer: buffer, ptr: ptr, len: len, | 866 | buffer: buffer, ptr: ptr, len: len, |
| @@ -907,6 +956,16 @@ function initSherpaOnnxOfflineModelConfig(config, Module) { | @@ -907,6 +956,16 @@ function initSherpaOnnxOfflineModelConfig(config, Module) { | ||
| 907 | }; | 956 | }; |
| 908 | } | 957 | } |
| 909 | 958 | ||
| 959 | + if (!('canary' in config)) { | ||
| 960 | + config.canary = { | ||
| 961 | + encoder: '', | ||
| 962 | + decoder: '', | ||
| 963 | + srcLang: '', | ||
| 964 | + tgtLang: '', | ||
| 965 | + usePnc: 1, | ||
| 966 | + }; | ||
| 967 | + } | ||
| 968 | + | ||
| 910 | const transducer = | 969 | const transducer = |
| 911 | initSherpaOnnxOfflineTransducerModelConfig(config.transducer, Module); | 970 | initSherpaOnnxOfflineTransducerModelConfig(config.transducer, Module); |
| 912 | 971 | ||
| @@ -936,9 +995,11 @@ function initSherpaOnnxOfflineModelConfig(config, Module) { | @@ -936,9 +995,11 @@ function initSherpaOnnxOfflineModelConfig(config, Module) { | ||
| 936 | const zipformerCtc = | 995 | const zipformerCtc = |
| 937 | initSherpaOnnxOfflineZipformerCtcModelConfig(config.zipformerCtc, Module); | 996 | initSherpaOnnxOfflineZipformerCtcModelConfig(config.zipformerCtc, Module); |
| 938 | 997 | ||
| 998 | + const canary = initSherpaOnnxOfflineCanaryModelConfig(config.canary, Module); | ||
| 999 | + | ||
| 939 | const len = transducer.len + paraformer.len + nemoCtc.len + whisper.len + | 1000 | const len = transducer.len + paraformer.len + nemoCtc.len + whisper.len + |
| 940 | tdnn.len + 8 * 4 + senseVoice.len + moonshine.len + fireRedAsr.len + | 1001 | tdnn.len + 8 * 4 + senseVoice.len + moonshine.len + fireRedAsr.len + |
| 941 | - dolphin.len + zipformerCtc.len; | 1002 | + dolphin.len + zipformerCtc.len + canary.len; |
| 942 | 1003 | ||
| 943 | const ptr = Module._malloc(len); | 1004 | const ptr = Module._malloc(len); |
| 944 | 1005 | ||
| @@ -1000,7 +1061,7 @@ function initSherpaOnnxOfflineModelConfig(config, Module) { | @@ -1000,7 +1061,7 @@ function initSherpaOnnxOfflineModelConfig(config, Module) { | ||
| 1000 | Module.setValue(ptr + offset, config.numThreads || 1, 'i32'); | 1061 | Module.setValue(ptr + offset, config.numThreads || 1, 'i32'); |
| 1001 | offset += 4; | 1062 | offset += 4; |
| 1002 | 1063 | ||
| 1003 | - Module.setValue(ptr + offset, config.debug || 0, 'i32'); | 1064 | + Module.setValue(ptr + offset, config.debug ?? 1, 'i32'); |
| 1004 | offset += 4; | 1065 | offset += 4; |
| 1005 | 1066 | ||
| 1006 | Module.setValue(ptr + offset, buffer + tokensLen, 'i8*'); // provider | 1067 | Module.setValue(ptr + offset, buffer + tokensLen, 'i8*'); // provider |
| @@ -1043,11 +1104,14 @@ function initSherpaOnnxOfflineModelConfig(config, Module) { | @@ -1043,11 +1104,14 @@ function initSherpaOnnxOfflineModelConfig(config, Module) { | ||
| 1043 | Module._CopyHeap(zipformerCtc.ptr, zipformerCtc.len, ptr + offset); | 1104 | Module._CopyHeap(zipformerCtc.ptr, zipformerCtc.len, ptr + offset); |
| 1044 | offset += zipformerCtc.len; | 1105 | offset += zipformerCtc.len; |
| 1045 | 1106 | ||
| 1107 | + Module._CopyHeap(canary.ptr, canary.len, ptr + offset); | ||
| 1108 | + offset += canary.len; | ||
| 1109 | + | ||
| 1046 | return { | 1110 | return { |
| 1047 | buffer: buffer, ptr: ptr, len: len, transducer: transducer, | 1111 | buffer: buffer, ptr: ptr, len: len, transducer: transducer, |
| 1048 | paraformer: paraformer, nemoCtc: nemoCtc, whisper: whisper, tdnn: tdnn, | 1112 | paraformer: paraformer, nemoCtc: nemoCtc, whisper: whisper, tdnn: tdnn, |
| 1049 | senseVoice: senseVoice, moonshine: moonshine, fireRedAsr: fireRedAsr, | 1113 | senseVoice: senseVoice, moonshine: moonshine, fireRedAsr: fireRedAsr, |
| 1050 | - dolphin: dolphin, zipformerCtc: zipformerCtc | 1114 | + dolphin: dolphin, zipformerCtc: zipformerCtc, canary: canary, |
| 1051 | } | 1115 | } |
| 1052 | } | 1116 | } |
| 1053 | 1117 | ||
| @@ -1189,6 +1253,13 @@ class OfflineRecognizer { | @@ -1189,6 +1253,13 @@ class OfflineRecognizer { | ||
| 1189 | this.Module = Module; | 1253 | this.Module = Module; |
| 1190 | } | 1254 | } |
| 1191 | 1255 | ||
| 1256 | + setConfig(configObj) { | ||
| 1257 | + const config = | ||
| 1258 | + initSherpaOnnxOfflineRecognizerConfig(configObj, this.Module); | ||
| 1259 | + this.Module._SherpaOnnxOfflineRecognizerSetConfig(this.handle, config.ptr); | ||
| 1260 | + freeConfig(config, this.Module); | ||
| 1261 | + } | ||
| 1262 | + | ||
| 1192 | free() { | 1263 | free() { |
| 1193 | this.Module._SherpaOnnxDestroyOfflineRecognizer(this.handle); | 1264 | this.Module._SherpaOnnxDestroyOfflineRecognizer(this.handle); |
| 1194 | this.handle = 0 | 1265 | this.handle = 0 |
| @@ -41,6 +41,7 @@ set(exported_functions | @@ -41,6 +41,7 @@ set(exported_functions | ||
| 41 | SherpaOnnxDestroyOfflineStreamResultJson | 41 | SherpaOnnxDestroyOfflineStreamResultJson |
| 42 | SherpaOnnxGetOfflineStreamResult | 42 | SherpaOnnxGetOfflineStreamResult |
| 43 | SherpaOnnxGetOfflineStreamResultAsJson | 43 | SherpaOnnxGetOfflineStreamResultAsJson |
| 44 | + SherpaOnnxOfflineRecognizerSetConfig | ||
| 44 | # online kws | 45 | # online kws |
| 45 | SherpaOnnxCreateKeywordSpotter | 46 | SherpaOnnxCreateKeywordSpotter |
| 46 | SherpaOnnxCreateKeywordStream | 47 | SherpaOnnxCreateKeywordStream |
| @@ -21,6 +21,7 @@ static_assert(sizeof(SherpaOnnxOfflineFireRedAsrModelConfig) == 2 * 4, ""); | @@ -21,6 +21,7 @@ static_assert(sizeof(SherpaOnnxOfflineFireRedAsrModelConfig) == 2 * 4, ""); | ||
| 21 | static_assert(sizeof(SherpaOnnxOfflineMoonshineModelConfig) == 4 * 4, ""); | 21 | static_assert(sizeof(SherpaOnnxOfflineMoonshineModelConfig) == 4 * 4, ""); |
| 22 | static_assert(sizeof(SherpaOnnxOfflineTdnnModelConfig) == 4, ""); | 22 | static_assert(sizeof(SherpaOnnxOfflineTdnnModelConfig) == 4, ""); |
| 23 | static_assert(sizeof(SherpaOnnxOfflineSenseVoiceModelConfig) == 3 * 4, ""); | 23 | static_assert(sizeof(SherpaOnnxOfflineSenseVoiceModelConfig) == 3 * 4, ""); |
| 24 | +static_assert(sizeof(SherpaOnnxOfflineCanaryModelConfig) == 5 * 4, ""); | ||
| 24 | static_assert(sizeof(SherpaOnnxOfflineLMConfig) == 2 * 4, ""); | 25 | static_assert(sizeof(SherpaOnnxOfflineLMConfig) == 2 * 4, ""); |
| 25 | 26 | ||
| 26 | static_assert(sizeof(SherpaOnnxOfflineModelConfig) == | 27 | static_assert(sizeof(SherpaOnnxOfflineModelConfig) == |
| @@ -33,7 +34,8 @@ static_assert(sizeof(SherpaOnnxOfflineModelConfig) == | @@ -33,7 +34,8 @@ static_assert(sizeof(SherpaOnnxOfflineModelConfig) == | ||
| 33 | sizeof(SherpaOnnxOfflineMoonshineModelConfig) + | 34 | sizeof(SherpaOnnxOfflineMoonshineModelConfig) + |
| 34 | sizeof(SherpaOnnxOfflineFireRedAsrModelConfig) + | 35 | sizeof(SherpaOnnxOfflineFireRedAsrModelConfig) + |
| 35 | sizeof(SherpaOnnxOfflineDolphinModelConfig) + | 36 | sizeof(SherpaOnnxOfflineDolphinModelConfig) + |
| 36 | - sizeof(SherpaOnnxOfflineZipformerCtcModelConfig), | 37 | + sizeof(SherpaOnnxOfflineZipformerCtcModelConfig) + |
| 38 | + sizeof(SherpaOnnxOfflineCanaryModelConfig), | ||
| 37 | 39 | ||
| 38 | ""); | 40 | ""); |
| 39 | static_assert(sizeof(SherpaOnnxFeatureConfig) == 2 * 4, ""); | 41 | static_assert(sizeof(SherpaOnnxFeatureConfig) == 2 * 4, ""); |
| @@ -80,6 +82,7 @@ void PrintOfflineRecognizerConfig(SherpaOnnxOfflineRecognizerConfig *config) { | @@ -80,6 +82,7 @@ void PrintOfflineRecognizerConfig(SherpaOnnxOfflineRecognizerConfig *config) { | ||
| 80 | auto fire_red_asr = &model_config->fire_red_asr; | 82 | auto fire_red_asr = &model_config->fire_red_asr; |
| 81 | auto dolphin = &model_config->dolphin; | 83 | auto dolphin = &model_config->dolphin; |
| 82 | auto zipformer_ctc = &model_config->zipformer_ctc; | 84 | auto zipformer_ctc = &model_config->zipformer_ctc; |
| 85 | + auto canary = &model_config->canary; | ||
| 83 | 86 | ||
| 84 | fprintf(stdout, "----------offline transducer model config----------\n"); | 87 | fprintf(stdout, "----------offline transducer model config----------\n"); |
| 85 | fprintf(stdout, "encoder: %s\n", transducer->encoder); | 88 | fprintf(stdout, "encoder: %s\n", transducer->encoder); |
| @@ -123,6 +126,13 @@ void PrintOfflineRecognizerConfig(SherpaOnnxOfflineRecognizerConfig *config) { | @@ -123,6 +126,13 @@ void PrintOfflineRecognizerConfig(SherpaOnnxOfflineRecognizerConfig *config) { | ||
| 123 | fprintf(stdout, "----------offline zipformer ctc model config----------\n"); | 126 | fprintf(stdout, "----------offline zipformer ctc model config----------\n"); |
| 124 | fprintf(stdout, "model: %s\n", zipformer_ctc->model); | 127 | fprintf(stdout, "model: %s\n", zipformer_ctc->model); |
| 125 | 128 | ||
| 129 | + fprintf(stdout, "----------offline NeMo Canary model config----------\n"); | ||
| 130 | + fprintf(stdout, "encoder: %s\n", canary->encoder); | ||
| 131 | + fprintf(stdout, "decoder: %s\n", canary->decoder); | ||
| 132 | + fprintf(stdout, "src_lang: %s\n", canary->src_lang); | ||
| 133 | + fprintf(stdout, "tgt_lang: %s\n", canary->tgt_lang); | ||
| 134 | + fprintf(stdout, "use_pnc: %d\n", canary->use_pnc); | ||
| 135 | + | ||
| 126 | fprintf(stdout, "tokens: %s\n", model_config->tokens); | 136 | fprintf(stdout, "tokens: %s\n", model_config->tokens); |
| 127 | fprintf(stdout, "num_threads: %d\n", model_config->num_threads); | 137 | fprintf(stdout, "num_threads: %d\n", model_config->num_threads); |
| 128 | fprintf(stdout, "provider: %s\n", model_config->provider); | 138 | fprintf(stdout, "provider: %s\n", model_config->provider); |
-
请 注册 或 登录 后发表评论