Fangjun Kuang
Committed by GitHub

Add C/CXX/JavaScript API for NeMo Canary models (#2357)

This PR introduces support for NeMo Canary models across C, C++, and JavaScript APIs 
by adding new Canary configuration structures, updating bindings, extending examples,
and enhancing CI workflows.

- Add OfflineCanaryModelConfig to all language bindings (C, C++, JS, ETS).
- Implement SetConfig methods and NAPI wrappers for updating recognizer config at runtime.
- Update examples and CI scripts to demonstrate and test NeMo Canary model usage.
@@ -10,6 +10,16 @@ arch=$(node -p "require('os').arch()") @@ -10,6 +10,16 @@ arch=$(node -p "require('os').arch()")
10 platform=$(node -p "require('os').platform()") 10 platform=$(node -p "require('os').platform()")
11 node_version=$(node -p "process.versions.node.split('.')[0]") 11 node_version=$(node -p "process.versions.node.split('.')[0]")
12 12
  13 +echo "----------non-streaming ASR NeMo Canary----------"
  14 +
  15 +curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8.tar.bz2
  16 +tar xvf sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8.tar.bz2
  17 +rm sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8.tar.bz2
  18 +
  19 +node ./test_asr_non_streaming_nemo_canary.js
  20 +
  21 +rm -rf sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8
  22 +
13 echo "----------non-streaming ASR Zipformer CTC----------" 23 echo "----------non-streaming ASR Zipformer CTC----------"
14 curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-zipformer-ctc-zh-int8-2025-07-03.tar.bz2 24 curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-zipformer-ctc-zh-int8-2025-07-03.tar.bz2
15 25
@@ -9,6 +9,14 @@ git status @@ -9,6 +9,14 @@ git status
9 ls -lh 9 ls -lh
10 ls -lh node_modules 10 ls -lh node_modules
11 11
  12 +# asr with offline nemo canary
  13 +curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8.tar.bz2
  14 +tar xvf sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8.tar.bz2
  15 +rm sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8.tar.bz2
  16 +
  17 +node ./test-offline-nemo-canary.js
  18 +rm -rf sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8
  19 +
12 # asr with offline zipformer ctc 20 # asr with offline zipformer ctc
13 curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-zipformer-ctc-zh-int8-2025-07-03.tar.bz2 21 curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-zipformer-ctc-zh-int8-2025-07-03.tar.bz2
14 22
@@ -77,16 +77,6 @@ time $EXE \ @@ -77,16 +77,6 @@ time $EXE \
77 $repo/test_wavs/DEV_T0000000001.wav \ 77 $repo/test_wavs/DEV_T0000000001.wav \
78 $repo/test_wavs/DEV_T0000000002.wav 78 $repo/test_wavs/DEV_T0000000002.wav
79 79
80 -log "test int8"  
81 -  
82 -time $EXE \  
83 - --debug=1 \  
84 - --zipformer2-ctc-model=$repo/ctc-epoch-20-avg-1-chunk-16-left-128.int8.onnx \  
85 - --tokens=$repo/tokens.txt \  
86 - $repo/test_wavs/DEV_T0000000000.wav \  
87 - $repo/test_wavs/DEV_T0000000001.wav \  
88 - $repo/test_wavs/DEV_T0000000002.wav  
89 -  
90 rm -rf $repo 80 rm -rf $repo
91 81
92 log "------------------------------------------------------------" 82 log "------------------------------------------------------------"
@@ -127,6 +127,36 @@ jobs: @@ -127,6 +127,36 @@ jobs:
127 rm -rf dict lexicon.txt test-hr.wav replace.fst 127 rm -rf dict lexicon.txt test-hr.wav replace.fst
128 rm -v $name 128 rm -v $name
129 129
  130 + - name: Test NeMo Canary
  131 + shell: bash
  132 + run: |
  133 + name=nemo-canary-c-api
  134 + gcc -o $name ./c-api-examples/$name.c \
  135 + -I ./build/install/include \
  136 + -L ./build/install/lib/ \
  137 + -l sherpa-onnx-c-api \
  138 + -l onnxruntime
  139 +
  140 + ls -lh $name
  141 +
  142 + if [[ ${{ matrix.os }} == ubuntu-latest || ${{ matrix.os }} == ubuntu-22.04-arm ]]; then
  143 + ldd ./$name
  144 + echo "----"
  145 + readelf -d ./$name
  146 + fi
  147 +
  148 + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8.tar.bz2
  149 + tar xvf sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8.tar.bz2
  150 + rm sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8.tar.bz2
  151 +
  152 + export LD_LIBRARY_PATH=$PWD/build/install/lib:$LD_LIBRARY_PATH
  153 + export DYLD_LIBRARY_PATH=$PWD/build/install/lib:$DYLD_LIBRARY_PATH
  154 +
  155 + ./$name
  156 +
  157 + rm $name
  158 + rm -rf sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8
  159 +
130 - name: Test Dolphin CTC 160 - name: Test Dolphin CTC
131 shell: bash 161 shell: bash
132 run: | 162 run: |
@@ -87,6 +87,40 @@ jobs: @@ -87,6 +87,40 @@ jobs:
87 otool -L ./install/lib/libsherpa-onnx-cxx-api.dylib 87 otool -L ./install/lib/libsherpa-onnx-cxx-api.dylib
88 fi 88 fi
89 89
  90 + - name: Test NeMo Canary
  91 + shell: bash
  92 + run: |
  93 + name=nemo-canary-cxx-api
  94 + g++ -std=c++17 -o $name ./cxx-api-examples/$name.cc \
  95 + -I ./build/install/include \
  96 + -L ./build/install/lib/ \
  97 + -l sherpa-onnx-cxx-api \
  98 + -l sherpa-onnx-c-api \
  99 + -l onnxruntime
  100 +
  101 + ls -lh $name
  102 +
  103 + if [[ ${{ matrix.os }} == ubuntu-latest || ${{ matrix.os }} == ubuntu-22.04-arm ]]; then
  104 + ldd ./$name
  105 + echo "----"
  106 + readelf -d ./$name
  107 + fi
  108 +
  109 + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8.tar.bz2
  110 + tar xvf sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8.tar.bz2
  111 + rm sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8.tar.bz2
  112 +
  113 + ls -lh sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8
  114 + echo "---"
  115 +
  116 + export LD_LIBRARY_PATH=$PWD/build/install/lib:$LD_LIBRARY_PATH
  117 + export DYLD_LIBRARY_PATH=$PWD/build/install/lib:$DYLD_LIBRARY_PATH
  118 +
  119 + ./$name
  120 +
  121 + rm -rf sherpa-onnx-nemo-canary-*
  122 + rm -v ./$name
  123 +
90 - name: Test streaming zipformer with Homophone replacer 124 - name: Test streaming zipformer with Homophone replacer
91 shell: bash 125 shell: bash
92 run: | 126 run: |
@@ -53,6 +53,9 @@ target_link_libraries(whisper-c-api sherpa-onnx-c-api) @@ -53,6 +53,9 @@ target_link_libraries(whisper-c-api sherpa-onnx-c-api)
53 add_executable(fire-red-asr-c-api fire-red-asr-c-api.c) 53 add_executable(fire-red-asr-c-api fire-red-asr-c-api.c)
54 target_link_libraries(fire-red-asr-c-api sherpa-onnx-c-api) 54 target_link_libraries(fire-red-asr-c-api sherpa-onnx-c-api)
55 55
  56 +add_executable(nemo-canary-c-api nemo-canary-c-api.c)
  57 +target_link_libraries(nemo-canary-c-api sherpa-onnx-c-api)
  58 +
56 add_executable(sense-voice-c-api sense-voice-c-api.c) 59 add_executable(sense-voice-c-api sense-voice-c-api.c)
57 target_link_libraries(sense-voice-c-api sherpa-onnx-c-api) 60 target_link_libraries(sense-voice-c-api sherpa-onnx-c-api)
58 61
  1 +// c-api-examples/nemo-canary-c-api.c
  2 +//
  3 +// Copyright (c) 2025 Xiaomi Corporation
  4 +
  5 +// We assume you have pre-downloaded the Nemo Canary model
  6 +// from https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
  7 +// An example is given below:
  8 +//
  9 +// clang-format off
  10 +//
  11 +// wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8.tar.bz2
  12 +// tar xvf sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8.tar.bz2
  13 +// rm sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8.tar.bz2
  14 +//
  15 +// clang-format on
  16 +//
  17 +// see https://k2-fsa.github.io/sherpa/onnx/nemo/canary.html
  18 +// for details
  19 +
  20 +#include <stdio.h>
  21 +#include <stdlib.h>
  22 +#include <string.h>
  23 +
  24 +#include "sherpa-onnx/c-api/c-api.h"
  25 +
  26 +int32_t main() {
  27 + const char *wav_filename =
  28 + "./sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8/test_wavs/de.wav";
  29 + const char *encoder_filename =
  30 + "sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8/encoder.int8.onnx";
  31 + const char *decoder_filename =
  32 + "sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8/decoder.int8.onnx";
  33 + const char *tokens_filename =
  34 + "sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8/tokens.txt";
  35 + const char *provider = "cpu";
  36 +
  37 + const SherpaOnnxWave *wave = SherpaOnnxReadWave(wav_filename);
  38 + if (wave == NULL) {
  39 + fprintf(stderr, "Failed to read %s\n", wav_filename);
  40 + return -1;
  41 + }
  42 +
  43 + // Offline model config
  44 + SherpaOnnxOfflineModelConfig offline_model_config;
  45 + memset(&offline_model_config, 0, sizeof(offline_model_config));
  46 +
  47 + // set debug to 1 to view more logs
  48 + offline_model_config.debug = 0;
  49 +
  50 + offline_model_config.num_threads = 1;
  51 + offline_model_config.provider = provider;
  52 + offline_model_config.tokens = tokens_filename;
  53 + offline_model_config.canary.encoder = encoder_filename;
  54 + offline_model_config.canary.decoder = decoder_filename;
  55 +
  56 + // so it output punctuations and cases
  57 + offline_model_config.canary.use_pnc = 1;
  58 +
  59 + offline_model_config.canary.src_lang = "de";
  60 +
  61 + // since there is a German audio, you can set tgt_lang to en or de
  62 + offline_model_config.canary.tgt_lang = "en";
  63 +
  64 + // Recognizer config
  65 + SherpaOnnxOfflineRecognizerConfig recognizer_config;
  66 + memset(&recognizer_config, 0, sizeof(recognizer_config));
  67 + recognizer_config.decoding_method = "greedy_search";
  68 + recognizer_config.model_config = offline_model_config;
  69 +
  70 + const SherpaOnnxOfflineRecognizer *recognizer =
  71 + SherpaOnnxCreateOfflineRecognizer(&recognizer_config);
  72 +
  73 + if (recognizer == NULL) {
  74 + fprintf(stderr, "Please check your config!\n");
  75 +
  76 + SherpaOnnxFreeWave(wave);
  77 +
  78 + return -1;
  79 + }
  80 +
  81 + const SherpaOnnxOfflineStream *stream =
  82 + SherpaOnnxCreateOfflineStream(recognizer);
  83 +
  84 + SherpaOnnxAcceptWaveformOffline(stream, wave->sample_rate, wave->samples,
  85 + wave->num_samples);
  86 + SherpaOnnxDecodeOfflineStream(recognizer, stream);
  87 + const SherpaOnnxOfflineRecognizerResult *result =
  88 + SherpaOnnxGetOfflineStreamResult(stream);
  89 +
  90 + fprintf(stderr, "Decoded text (English): %s\n", result->text);
  91 +
  92 + SherpaOnnxDestroyOfflineRecognizerResult(result);
  93 + SherpaOnnxDestroyOfflineStream(stream);
  94 +
  95 + // now output German text
  96 + recognizer_config.model_config.canary.tgt_lang = "de";
  97 + SherpaOnnxOfflineRecognizerSetConfig(recognizer, &recognizer_config);
  98 +
  99 + stream = SherpaOnnxCreateOfflineStream(recognizer);
  100 +
  101 + SherpaOnnxAcceptWaveformOffline(stream, wave->sample_rate, wave->samples,
  102 + wave->num_samples);
  103 + SherpaOnnxDecodeOfflineStream(recognizer, stream);
  104 + result = SherpaOnnxGetOfflineStreamResult(stream);
  105 +
  106 + fprintf(stderr, "Decoded text (German): %s\n", result->text);
  107 +
  108 + SherpaOnnxDestroyOfflineRecognizerResult(result);
  109 + SherpaOnnxDestroyOfflineStream(stream);
  110 +
  111 + SherpaOnnxDestroyOfflineRecognizer(recognizer);
  112 + SherpaOnnxFreeWave(wave);
  113 +
  114 + return 0;
  115 +}
@@ -54,7 +54,7 @@ int32_t main() { @@ -54,7 +54,7 @@ int32_t main() {
54 "DEV_T0000000000.wav"; 54 "DEV_T0000000000.wav";
55 const char *model_filename = 55 const char *model_filename =
56 "sherpa-onnx-streaming-zipformer-ctc-multi-zh-hans-2023-12-13/" 56 "sherpa-onnx-streaming-zipformer-ctc-multi-zh-hans-2023-12-13/"
57 - "ctc-epoch-20-avg-1-chunk-16-left-128.int8.onnx"; 57 + "ctc-epoch-20-avg-1-chunk-16-left-128.onnx";
58 const char *tokens_filename = 58 const char *tokens_filename =
59 "sherpa-onnx-streaming-zipformer-ctc-multi-zh-hans-2023-12-13/tokens.txt"; 59 "sherpa-onnx-streaming-zipformer-ctc-multi-zh-hans-2023-12-13/tokens.txt";
60 const char *provider = "cpu"; 60 const char *provider = "cpu";
@@ -27,6 +27,9 @@ target_link_libraries(moonshine-cxx-api sherpa-onnx-cxx-api) @@ -27,6 +27,9 @@ target_link_libraries(moonshine-cxx-api sherpa-onnx-cxx-api)
27 add_executable(sense-voice-cxx-api ./sense-voice-cxx-api.cc) 27 add_executable(sense-voice-cxx-api ./sense-voice-cxx-api.cc)
28 target_link_libraries(sense-voice-cxx-api sherpa-onnx-cxx-api) 28 target_link_libraries(sense-voice-cxx-api sherpa-onnx-cxx-api)
29 29
  30 +add_executable(nemo-canary-cxx-api ./nemo-canary-cxx-api.cc)
  31 +target_link_libraries(nemo-canary-cxx-api sherpa-onnx-cxx-api)
  32 +
30 if(SHERPA_ONNX_ENABLE_PORTAUDIO) 33 if(SHERPA_ONNX_ENABLE_PORTAUDIO)
31 add_executable(sense-voice-simulate-streaming-microphone-cxx-api 34 add_executable(sense-voice-simulate-streaming-microphone-cxx-api
32 ./sense-voice-simulate-streaming-microphone-cxx-api.cc 35 ./sense-voice-simulate-streaming-microphone-cxx-api.cc
  1 +// cxx-api-examples/nemo-canary-cxx-api.cc
  2 +//
  3 +// Copyright (c) 2025 Xiaomi Corporation
  4 +
  5 +//
  6 +// This file demonstrates how to use NeMo Canary models with
  7 +// sherpa-onnx's C++ API.
  8 +//
  9 +// clang-format off
  10 +//
  11 +// wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8.tar.bz2
  12 +// tar xvf sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8.tar.bz2
  13 +// rm sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8.tar.bz2
  14 +//
  15 +// clang-format on
  16 +//
  17 +// see https://k2-fsa.github.io/sherpa/onnx/nemo/canary.html
  18 +// for details
  19 +
  20 +#include <chrono> // NOLINT
  21 +#include <iostream>
  22 +#include <string>
  23 +
  24 +#include "sherpa-onnx/c-api/cxx-api.h"
  25 +
  26 +int32_t main() {
  27 + using namespace sherpa_onnx::cxx; // NOLINT
  28 + OfflineRecognizerConfig config;
  29 +
  30 + config.model_config.canary.encoder =
  31 + "sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8/encoder.int8.onnx";
  32 + config.model_config.canary.decoder =
  33 + "sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8/decoder.int8.onnx";
  34 +
  35 + // our input audio is German, so we set src_lang to "de"
  36 + config.model_config.canary.src_lang = "de";
  37 +
  38 + // we can set tgt_lang either to de or en in this specific case
  39 + config.model_config.canary.tgt_lang = "en";
  40 + config.model_config.tokens =
  41 + "sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8/tokens.txt";
  42 +
  43 + config.model_config.num_threads = 1;
  44 +
  45 + std::cout << "Loading model\n";
  46 + OfflineRecognizer recognizer = OfflineRecognizer::Create(config);
  47 + if (!recognizer.Get()) {
  48 + std::cerr << "Please check your config\n";
  49 + return -1;
  50 + }
  51 + std::cout << "Loading model done\n";
  52 +
  53 + std::string wave_filename =
  54 + "./sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8/test_wavs/de.wav";
  55 +
  56 + Wave wave = ReadWave(wave_filename);
  57 + if (wave.samples.empty()) {
  58 + std::cerr << "Failed to read: '" << wave_filename << "'\n";
  59 + return -1;
  60 + }
  61 +
  62 + std::cout << "Start recognition\n";
  63 + const auto begin = std::chrono::steady_clock::now();
  64 +
  65 + OfflineStream stream = recognizer.CreateStream();
  66 + stream.AcceptWaveform(wave.sample_rate, wave.samples.data(),
  67 + wave.samples.size());
  68 +
  69 + recognizer.Decode(&stream);
  70 +
  71 + OfflineRecognizerResult result = recognizer.GetResult(&stream);
  72 +
  73 + const auto end = std::chrono::steady_clock::now();
  74 + const float elapsed_seconds =
  75 + std::chrono::duration_cast<std::chrono::milliseconds>(end - begin)
  76 + .count() /
  77 + 1000.;
  78 + float duration = wave.samples.size() / static_cast<float>(wave.sample_rate);
  79 + float rtf = elapsed_seconds / duration;
  80 +
  81 + std::cout << "text (English): " << result.text << "\n";
  82 + printf("Number of threads: %d\n", config.model_config.num_threads);
  83 + printf("Duration: %.3fs\n", duration);
  84 + printf("Elapsed seconds: %.3fs\n", elapsed_seconds);
  85 + printf("(Real time factor) RTF = %.3f / %.3f = %.3f\n", elapsed_seconds,
  86 + duration, rtf);
  87 +
  88 + // now output text in German
  89 + config.model_config.canary.tgt_lang = "de";
  90 + recognizer.SetConfig(config);
  91 + stream = recognizer.CreateStream();
  92 + stream.AcceptWaveform(wave.sample_rate, wave.samples.data(),
  93 + wave.samples.size());
  94 +
  95 + recognizer.Decode(&stream);
  96 +
  97 + result = recognizer.GetResult(&stream);
  98 + std::cout << "text (German): " << result.text << "\n";
  99 +
  100 + return 0;
  101 +}
@@ -7,6 +7,7 @@ export { Samples, @@ -7,6 +7,7 @@ export { Samples,
7 OfflineStream, 7 OfflineStream,
8 FeatureConfig, 8 FeatureConfig,
9 HomophoneReplacerConfig, 9 HomophoneReplacerConfig,
  10 + OfflineCanaryModelConfig,
10 OfflineDolphinModelConfig, 11 OfflineDolphinModelConfig,
11 OfflineTransducerModelConfig, 12 OfflineTransducerModelConfig,
12 OfflineParaformerModelConfig, 13 OfflineParaformerModelConfig,
@@ -93,6 +93,27 @@ static SherpaOnnxOfflineNemoEncDecCtcModelConfig GetOfflineNeMoCtcModelConfig( @@ -93,6 +93,27 @@ static SherpaOnnxOfflineNemoEncDecCtcModelConfig GetOfflineNeMoCtcModelConfig(
93 return c; 93 return c;
94 } 94 }
95 95
  96 +static SherpaOnnxOfflineCanaryModelConfig GetOfflineCanaryModelConfig(
  97 + Napi::Object obj) {
  98 + SherpaOnnxOfflineCanaryModelConfig c;
  99 + memset(&c, 0, sizeof(c));
  100 + c.use_pnc = 1; // Align default with JS default
  101 +
  102 + if (!obj.Has("canary") || !obj.Get("canary").IsObject()) {
  103 + return c;
  104 + }
  105 +
  106 + Napi::Object o = obj.Get("canary").As<Napi::Object>();
  107 +
  108 + SHERPA_ONNX_ASSIGN_ATTR_STR(encoder, encoder);
  109 + SHERPA_ONNX_ASSIGN_ATTR_STR(decoder, decoder);
  110 + SHERPA_ONNX_ASSIGN_ATTR_STR(src_lang, srcLang);
  111 + SHERPA_ONNX_ASSIGN_ATTR_STR(tgt_lang, tgtLang);
  112 + SHERPA_ONNX_ASSIGN_ATTR_INT32(use_pnc, usePnc);
  113 +
  114 + return c;
  115 +}
  116 +
96 static SherpaOnnxOfflineWhisperModelConfig GetOfflineWhisperModelConfig( 117 static SherpaOnnxOfflineWhisperModelConfig GetOfflineWhisperModelConfig(
97 Napi::Object obj) { 118 Napi::Object obj) {
98 SherpaOnnxOfflineWhisperModelConfig c; 119 SherpaOnnxOfflineWhisperModelConfig c;
@@ -203,6 +224,7 @@ static SherpaOnnxOfflineModelConfig GetOfflineModelConfig(Napi::Object obj) { @@ -203,6 +224,7 @@ static SherpaOnnxOfflineModelConfig GetOfflineModelConfig(Napi::Object obj) {
203 c.fire_red_asr = GetOfflineFireRedAsrModelConfig(o); 224 c.fire_red_asr = GetOfflineFireRedAsrModelConfig(o);
204 c.dolphin = GetOfflineDolphinModelConfig(o); 225 c.dolphin = GetOfflineDolphinModelConfig(o);
205 c.zipformer_ctc = GetOfflineZipformerCtcModelConfig(o); 226 c.zipformer_ctc = GetOfflineZipformerCtcModelConfig(o);
  227 + c.canary = GetOfflineCanaryModelConfig(o);
206 228
207 SHERPA_ONNX_ASSIGN_ATTR_STR(tokens, tokens); 229 SHERPA_ONNX_ASSIGN_ATTR_STR(tokens, tokens);
208 SHERPA_ONNX_ASSIGN_ATTR_INT32(num_threads, numThreads); 230 SHERPA_ONNX_ASSIGN_ATTR_INT32(num_threads, numThreads);
@@ -241,39 +263,7 @@ static SherpaOnnxOfflineLMConfig GetOfflineLMConfig(Napi::Object obj) { @@ -241,39 +263,7 @@ static SherpaOnnxOfflineLMConfig GetOfflineLMConfig(Napi::Object obj) {
241 return c; 263 return c;
242 } 264 }
243 265
244 -static Napi::External<SherpaOnnxOfflineRecognizer>  
245 -CreateOfflineRecognizerWrapper(const Napi::CallbackInfo &info) {  
246 - Napi::Env env = info.Env();  
247 -#if __OHOS__  
248 - // the last argument is the NativeResourceManager  
249 - if (info.Length() != 2) {  
250 - std::ostringstream os;  
251 - os << "Expect only 2 arguments. Given: " << info.Length();  
252 -  
253 - Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();  
254 -  
255 - return {};  
256 - }  
257 -#else  
258 - if (info.Length() != 1) {  
259 - std::ostringstream os;  
260 - os << "Expect only 1 argument. Given: " << info.Length();  
261 -  
262 - Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();  
263 -  
264 - return {};  
265 - }  
266 -#endif  
267 -  
268 - if (!info[0].IsObject()) {  
269 - Napi::TypeError::New(env, "Expect an object as the argument")  
270 - .ThrowAsJavaScriptException();  
271 -  
272 - return {};  
273 - }  
274 -  
275 - Napi::Object o = info[0].As<Napi::Object>();  
276 - 266 +static SherpaOnnxOfflineRecognizerConfig ParseConfig(Napi::Object o) {
277 SherpaOnnxOfflineRecognizerConfig c; 267 SherpaOnnxOfflineRecognizerConfig c;
278 memset(&c, 0, sizeof(c)); 268 memset(&c, 0, sizeof(c));
279 c.feat_config = GetFeatureConfig(o); 269 c.feat_config = GetFeatureConfig(o);
@@ -289,19 +279,10 @@ CreateOfflineRecognizerWrapper(const Napi::CallbackInfo &info) { @@ -289,19 +279,10 @@ CreateOfflineRecognizerWrapper(const Napi::CallbackInfo &info) {
289 SHERPA_ONNX_ASSIGN_ATTR_STR(rule_fars, ruleFars); 279 SHERPA_ONNX_ASSIGN_ATTR_STR(rule_fars, ruleFars);
290 SHERPA_ONNX_ASSIGN_ATTR_FLOAT(blank_penalty, blankPenalty); 280 SHERPA_ONNX_ASSIGN_ATTR_FLOAT(blank_penalty, blankPenalty);
291 281
292 -#if __OHOS__  
293 - std::unique_ptr<NativeResourceManager,  
294 - decltype(&OH_ResourceManager_ReleaseNativeResourceManager)>  
295 - mgr(OH_ResourceManager_InitNativeResourceManager(env, info[1]),  
296 - &OH_ResourceManager_ReleaseNativeResourceManager);  
297 -  
298 - const SherpaOnnxOfflineRecognizer *recognizer =  
299 - SherpaOnnxCreateOfflineRecognizerOHOS(&c, mgr.get());  
300 -#else  
301 - const SherpaOnnxOfflineRecognizer *recognizer =  
302 - SherpaOnnxCreateOfflineRecognizer(&c);  
303 -#endif 282 + return c;
  283 +}
304 284
  285 +static void FreeConfig(const SherpaOnnxOfflineRecognizerConfig &c) {
305 SHERPA_ONNX_DELETE_C_STR(c.model_config.transducer.encoder); 286 SHERPA_ONNX_DELETE_C_STR(c.model_config.transducer.encoder);
306 SHERPA_ONNX_DELETE_C_STR(c.model_config.transducer.decoder); 287 SHERPA_ONNX_DELETE_C_STR(c.model_config.transducer.decoder);
307 SHERPA_ONNX_DELETE_C_STR(c.model_config.transducer.joiner); 288 SHERPA_ONNX_DELETE_C_STR(c.model_config.transducer.joiner);
@@ -331,6 +312,11 @@ CreateOfflineRecognizerWrapper(const Napi::CallbackInfo &info) { @@ -331,6 +312,11 @@ CreateOfflineRecognizerWrapper(const Napi::CallbackInfo &info) {
331 SHERPA_ONNX_DELETE_C_STR(c.model_config.dolphin.model); 312 SHERPA_ONNX_DELETE_C_STR(c.model_config.dolphin.model);
332 SHERPA_ONNX_DELETE_C_STR(c.model_config.zipformer_ctc.model); 313 SHERPA_ONNX_DELETE_C_STR(c.model_config.zipformer_ctc.model);
333 314
  315 + SHERPA_ONNX_DELETE_C_STR(c.model_config.canary.encoder);
  316 + SHERPA_ONNX_DELETE_C_STR(c.model_config.canary.decoder);
  317 + SHERPA_ONNX_DELETE_C_STR(c.model_config.canary.src_lang);
  318 + SHERPA_ONNX_DELETE_C_STR(c.model_config.canary.tgt_lang);
  319 +
334 SHERPA_ONNX_DELETE_C_STR(c.model_config.tokens); 320 SHERPA_ONNX_DELETE_C_STR(c.model_config.tokens);
335 SHERPA_ONNX_DELETE_C_STR(c.model_config.provider); 321 SHERPA_ONNX_DELETE_C_STR(c.model_config.provider);
336 SHERPA_ONNX_DELETE_C_STR(c.model_config.model_type); 322 SHERPA_ONNX_DELETE_C_STR(c.model_config.model_type);
@@ -347,6 +333,57 @@ CreateOfflineRecognizerWrapper(const Napi::CallbackInfo &info) { @@ -347,6 +333,57 @@ CreateOfflineRecognizerWrapper(const Napi::CallbackInfo &info) {
347 SHERPA_ONNX_DELETE_C_STR(c.hr.dict_dir); 333 SHERPA_ONNX_DELETE_C_STR(c.hr.dict_dir);
348 SHERPA_ONNX_DELETE_C_STR(c.hr.lexicon); 334 SHERPA_ONNX_DELETE_C_STR(c.hr.lexicon);
349 SHERPA_ONNX_DELETE_C_STR(c.hr.rule_fsts); 335 SHERPA_ONNX_DELETE_C_STR(c.hr.rule_fsts);
  336 +}
  337 +
  338 +static Napi::External<SherpaOnnxOfflineRecognizer>
  339 +CreateOfflineRecognizerWrapper(const Napi::CallbackInfo &info) {
  340 + Napi::Env env = info.Env();
  341 +#if __OHOS__
  342 + // the last argument is the NativeResourceManager
  343 + if (info.Length() != 2) {
  344 + std::ostringstream os;
  345 + os << "Expect only 2 arguments. Given: " << info.Length();
  346 +
  347 + Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();
  348 +
  349 + return {};
  350 + }
  351 +#else
  352 + if (info.Length() != 1) {
  353 + std::ostringstream os;
  354 + os << "Expect only 1 argument. Given: " << info.Length();
  355 +
  356 + Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();
  357 +
  358 + return {};
  359 + }
  360 +#endif
  361 +
  362 + if (!info[0].IsObject()) {
  363 + Napi::TypeError::New(env, "Expect an object as the argument")
  364 + .ThrowAsJavaScriptException();
  365 +
  366 + return {};
  367 + }
  368 +
  369 + Napi::Object o = info[0].As<Napi::Object>();
  370 +
  371 + SherpaOnnxOfflineRecognizerConfig c = ParseConfig(o);
  372 +
  373 +#if __OHOS__
  374 + std::unique_ptr<NativeResourceManager,
  375 + decltype(&OH_ResourceManager_ReleaseNativeResourceManager)>
  376 + mgr(OH_ResourceManager_InitNativeResourceManager(env, info[1]),
  377 + &OH_ResourceManager_ReleaseNativeResourceManager);
  378 +
  379 + const SherpaOnnxOfflineRecognizer *recognizer =
  380 + SherpaOnnxCreateOfflineRecognizerOHOS(&c, mgr.get());
  381 +#else
  382 + const SherpaOnnxOfflineRecognizer *recognizer =
  383 + SherpaOnnxCreateOfflineRecognizer(&c);
  384 +#endif
  385 +
  386 + FreeConfig(c);
350 387
351 if (!recognizer) { 388 if (!recognizer) {
352 Napi::TypeError::New(env, "Please check your config!") 389 Napi::TypeError::New(env, "Please check your config!")
@@ -470,6 +507,43 @@ static void AcceptWaveformOfflineWrapper(const Napi::CallbackInfo &info) { @@ -470,6 +507,43 @@ static void AcceptWaveformOfflineWrapper(const Napi::CallbackInfo &info) {
470 #endif 507 #endif
471 } 508 }
472 509
  510 +static void OfflineRecognizerSetConfigWrapper(const Napi::CallbackInfo &info) {
  511 + Napi::Env env = info.Env();
  512 + if (info.Length() != 2) {
  513 + std::ostringstream os;
  514 + os << "Expect only 2 arguments. Given: " << info.Length();
  515 +
  516 + Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();
  517 +
  518 + return;
  519 + }
  520 +
  521 + if (!info[0].IsExternal()) {
  522 + Napi::TypeError::New(env,
  523 + "Argument 0 should be an offline recognizer pointer.")
  524 + .ThrowAsJavaScriptException();
  525 +
  526 + return;
  527 + }
  528 +
  529 + if (!info[1].IsObject()) {
  530 + Napi::TypeError::New(env, "Expect an object as the second argument")
  531 + .ThrowAsJavaScriptException();
  532 +
  533 + return;
  534 + }
  535 +
  536 + Napi::Object o = info[1].As<Napi::Object>();
  537 + SherpaOnnxOfflineRecognizerConfig c = ParseConfig(o);
  538 +
  539 + const SherpaOnnxOfflineRecognizer *recognizer =
  540 + info[0].As<Napi::External<SherpaOnnxOfflineRecognizer>>().Data();
  541 +
  542 + SherpaOnnxOfflineRecognizerSetConfig(recognizer, &c);
  543 +
  544 + FreeConfig(c);
  545 +}
  546 +
473 static void DecodeOfflineStreamWrapper(const Napi::CallbackInfo &info) { 547 static void DecodeOfflineStreamWrapper(const Napi::CallbackInfo &info) {
474 Napi::Env env = info.Env(); 548 Napi::Env env = info.Env();
475 if (info.Length() != 2) { 549 if (info.Length() != 2) {
@@ -548,6 +622,9 @@ void InitNonStreamingAsr(Napi::Env env, Napi::Object exports) { @@ -548,6 +622,9 @@ void InitNonStreamingAsr(Napi::Env env, Napi::Object exports) {
548 exports.Set(Napi::String::New(env, "decodeOfflineStream"), 622 exports.Set(Napi::String::New(env, "decodeOfflineStream"),
549 Napi::Function::New(env, DecodeOfflineStreamWrapper)); 623 Napi::Function::New(env, DecodeOfflineStreamWrapper));
550 624
  625 + exports.Set(Napi::String::New(env, "offlineRecognizerSetConfig"),
  626 + Napi::Function::New(env, OfflineRecognizerSetConfigWrapper));
  627 +
551 exports.Set(Napi::String::New(env, "getOfflineStreamResultAsJson"), 628 exports.Set(Napi::String::New(env, "getOfflineStreamResultAsJson"),
552 Napi::Function::New(env, GetOfflineStreamResultAsJsonWrapper)); 629 Napi::Function::New(env, GetOfflineStreamResultAsJsonWrapper));
553 } 630 }
@@ -22,6 +22,7 @@ export const voiceActivityDetectorFlush: (handle: object) => void; @@ -22,6 +22,7 @@ export const voiceActivityDetectorFlush: (handle: object) => void;
22 22
23 export const createOfflineRecognizer: (config: object, mgr?: object) => object; 23 export const createOfflineRecognizer: (config: object, mgr?: object) => object;
24 export const createOfflineStream: (handle: object) => object; 24 export const createOfflineStream: (handle: object) => object;
  25 +export const offlineRecognizerSetConfig: (handle: object, config: object) => void;
25 export const acceptWaveformOffline: (handle: object, audio: object) => void; 26 export const acceptWaveformOffline: (handle: object, audio: object) => void;
26 export const decodeOfflineStream: (handle: object, streamHandle: object) => void; 27 export const decodeOfflineStream: (handle: object, streamHandle: object) => void;
27 export const getOfflineStreamResultAsJson: (streamHandle: object) => string; 28 export const getOfflineStreamResultAsJson: (streamHandle: object) => string;
@@ -4,6 +4,7 @@ import { @@ -4,6 +4,7 @@ import {
4 createOfflineStream, 4 createOfflineStream,
5 decodeOfflineStream, 5 decodeOfflineStream,
6 getOfflineStreamResultAsJson, 6 getOfflineStreamResultAsJson,
  7 + offlineRecognizerSetConfig,
7 } from 'libsherpa_onnx.so'; 8 } from 'libsherpa_onnx.so';
8 9
9 export interface Samples { 10 export interface Samples {
@@ -67,6 +68,14 @@ export class OfflineWhisperModelConfig { @@ -67,6 +68,14 @@ export class OfflineWhisperModelConfig {
67 public tailPaddings: number = -1; 68 public tailPaddings: number = -1;
68 } 69 }
69 70
  71 +export class OfflineCanaryModelConfig {
  72 + public encoder: string = '';
  73 + public decoder: string = '';
  74 + public srcLang: string = '';
  75 + public tgtLang: string = '';
  76 + public usePnc: number = 1;
  77 +}
  78 +
70 export class OfflineTdnnModelConfig { 79 export class OfflineTdnnModelConfig {
71 public model: string = ''; 80 public model: string = '';
72 } 81 }
@@ -102,6 +111,7 @@ export class OfflineModelConfig { @@ -102,6 +111,7 @@ export class OfflineModelConfig {
102 public moonshine: OfflineMoonshineModelConfig = new OfflineMoonshineModelConfig(); 111 public moonshine: OfflineMoonshineModelConfig = new OfflineMoonshineModelConfig();
103 public dolphin: OfflineDolphinModelConfig = new OfflineDolphinModelConfig(); 112 public dolphin: OfflineDolphinModelConfig = new OfflineDolphinModelConfig();
104 public zipformerCtc: OfflineZipformerCtcModelConfig = new OfflineZipformerCtcModelConfig(); 113 public zipformerCtc: OfflineZipformerCtcModelConfig = new OfflineZipformerCtcModelConfig();
  114 + public canary: OfflineCanaryModelConfig = new OfflineCanaryModelConfig();
105 } 115 }
106 116
107 export class OfflineLMConfig { 117 export class OfflineLMConfig {
@@ -151,6 +161,10 @@ export class OfflineRecognizer { @@ -151,6 +161,10 @@ export class OfflineRecognizer {
151 this.config = config 161 this.config = config
152 } 162 }
153 163
  164 + setConfig(config: OfflineRecognizerConfig) {
  165 + offlineRecognizerSetConfig(this.handle, config);
  166 + }
  167 +
154 createStream(): OfflineStream { 168 createStream(): OfflineStream {
155 const handle: object = createOfflineStream(this.handle); 169 const handle: object = createOfflineStream(this.handle);
156 return new OfflineStream(handle); 170 return new OfflineStream(handle);
@@ -123,6 +123,7 @@ The following tables list the examples in this folder. @@ -123,6 +123,7 @@ The following tables list the examples in this folder.
123 |[./test_asr_non_streaming_moonshine.js](./test_asr_non_streaming_moonshine.js)|Non-streaming speech recognition from a file using [Moonshine](https://github.com/usefulsensors/moonshine)| 123 |[./test_asr_non_streaming_moonshine.js](./test_asr_non_streaming_moonshine.js)|Non-streaming speech recognition from a file using [Moonshine](https://github.com/usefulsensors/moonshine)|
124 |[./test_vad_with_non_streaming_asr_moonshine.js](./test_vad_with_non_streaming_asr_moonshine.js)| Non-streaming speech recognition from a file using [Moonshine](https://github.com/usefulsensors/moonshine) + [Silero VAD](https://github.com/snakers4/silero-vad)| 124 |[./test_vad_with_non_streaming_asr_moonshine.js](./test_vad_with_non_streaming_asr_moonshine.js)| Non-streaming speech recognition from a file using [Moonshine](https://github.com/usefulsensors/moonshine) + [Silero VAD](https://github.com/snakers4/silero-vad)|
125 |[./test_asr_non_streaming_nemo_ctc.js](./test_asr_non_streaming_nemo_ctc.js)|Non-streaming speech recognition from a file using a [NeMo](https://github.com/NVIDIA/NeMo) CTC model with greedy search| 125 |[./test_asr_non_streaming_nemo_ctc.js](./test_asr_non_streaming_nemo_ctc.js)|Non-streaming speech recognition from a file using a [NeMo](https://github.com/NVIDIA/NeMo) CTC model with greedy search|
  126 +|[./test_asr_non_streaming_nemo_canary.js](./test_asr_non_streaming_nemo_canary.js)|Non-streaming speech recognition from a file using a [NeMo](https://github.com/NVIDIA/NeMo) [Canary](https://k2-fsa.github.io/sherpa/onnx/nemo/canary.html#sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8-english-spanish-german-french) model|
126 |[./test_asr_non_streaming_zipformer_ctc.js](./test_asr_non_streaming_zipformer_ctc.js)|Non-streaming speech recognition from a file using a Zipformer CTC model with greedy search| 127 |[./test_asr_non_streaming_zipformer_ctc.js](./test_asr_non_streaming_zipformer_ctc.js)|Non-streaming speech recognition from a file using a Zipformer CTC model with greedy search|
127 |[./test_asr_non_streaming_nemo_parakeet_tdt_v2.js](./test_asr_non_streaming_nemo_parakeet_tdt_v2.js)|Non-streaming speech recognition from a file using a [NeMo](https://github.com/NVIDIA/NeMo) [parakeet-tdt-0.6b-v2](https://k2-fsa.github.io/sherpa/onnx/pretrained_models/offline-transducer/nemo-transducer-models.html#sherpa-onnx-nemo-parakeet-tdt-0-6b-v2-int8-english) model with greedy search| 128 |[./test_asr_non_streaming_nemo_parakeet_tdt_v2.js](./test_asr_non_streaming_nemo_parakeet_tdt_v2.js)|Non-streaming speech recognition from a file using a [NeMo](https://github.com/NVIDIA/NeMo) [parakeet-tdt-0.6b-v2](https://k2-fsa.github.io/sherpa/onnx/pretrained_models/offline-transducer/nemo-transducer-models.html#sherpa-onnx-nemo-parakeet-tdt-0-6b-v2-int8-english) model with greedy search|
128 |[./test_asr_non_streaming_dolphin_ctc.js](./test_asr_non_streaming_dolphin_ctc.js)|Non-streaming speech recognition from a file using a [Dolphinhttps://github.com/DataoceanAI/Dolphin]) CTC model with greedy search| 129 |[./test_asr_non_streaming_dolphin_ctc.js](./test_asr_non_streaming_dolphin_ctc.js)|Non-streaming speech recognition from a file using a [Dolphinhttps://github.com/DataoceanAI/Dolphin]) CTC model with greedy search|
@@ -389,6 +390,16 @@ npm install naudiodon2 @@ -389,6 +390,16 @@ npm install naudiodon2
389 node ./test_vad_asr_non_streaming_zipformer_ctc_microphone.js 390 node ./test_vad_asr_non_streaming_zipformer_ctc_microphone.js
390 ``` 391 ```
391 392
  393 +### Non-streaming speech recognition with NeMo Canary models
  394 +
  395 +```bash
  396 +wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8.tar.bz2
  397 +tar xvf sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8.tar.bz2
  398 +rm sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8.tar.bz2
  399 +
  400 +node ./test_asr_non_streaming_nemo_canary.js
  401 +```
  402 +
392 ### Non-streaming speech recognition with NeMo CTC models 403 ### Non-streaming speech recognition with NeMo CTC models
393 404
394 ```bash 405 ```bash
  1 +// Copyright (c) 2024 Xiaomi Corporation
  2 +const sherpa_onnx = require('sherpa-onnx-node');
  3 +
  4 +// Please download test files from
  5 +// https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
  6 +const config = {
  7 + 'featConfig': {
  8 + 'sampleRate': 16000,
  9 + 'featureDim': 80,
  10 + },
  11 + 'modelConfig': {
  12 + 'canary': {
  13 + 'encoder':
  14 + './sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8/encoder.int8.onnx',
  15 + 'decoder':
  16 + './sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8/decoder.int8.onnx',
  17 + 'srcLang': 'en',
  18 + 'tgtLang': 'en',
  19 + 'usePnc': 1,
  20 + },
  21 + 'tokens':
  22 + './sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8/tokens.txt',
  23 + 'numThreads': 2,
  24 + 'provider': 'cpu',
  25 + 'debug': 0,
  26 + }
  27 +};
  28 +
  29 +const waveFilename =
  30 + './sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8/test_wavs/en.wav';
  31 +
  32 +const recognizer = new sherpa_onnx.OfflineRecognizer(config);
  33 +console.log('Started')
  34 +let start = Date.now();
  35 +let stream = recognizer.createStream();
  36 +const wave = sherpa_onnx.readWave(waveFilename);
  37 +stream.acceptWaveform({sampleRate: wave.sampleRate, samples: wave.samples});
  38 +
  39 +recognizer.decode(stream);
  40 +let result = recognizer.getResult(stream)
  41 +let stop = Date.now();
  42 +console.log('Done')
  43 +
  44 +const elapsed_seconds = (stop - start) / 1000;
  45 +const duration = wave.samples.length / wave.sampleRate;
  46 +const real_time_factor = elapsed_seconds / duration;
  47 +console.log('Wave duration', duration.toFixed(3), 'seconds')
  48 +console.log('Elapsed', elapsed_seconds.toFixed(3), 'seconds')
  49 +console.log(
  50 + `RTF = ${elapsed_seconds.toFixed(3)}/${duration.toFixed(3)} =`,
  51 + real_time_factor.toFixed(3))
  52 +console.log(waveFilename)
  53 +console.log('result (English)\n', result)
  54 +
  55 +stream = recognizer.createStream();
  56 +stream.acceptWaveform({sampleRate: wave.sampleRate, samples: wave.samples});
  57 +recognizer.config.modelConfig.canary.tgtLang = 'de';
  58 +recognizer.setConfig(recognizer.config);
  59 +
  60 +recognizer.decode(stream);
  61 +result = recognizer.getResult(stream)
  62 +console.log('result (German)\n', result)
@@ -63,7 +63,7 @@ for text-to-speech. @@ -63,7 +63,7 @@ for text-to-speech.
63 You can use the following command to run it: 63 You can use the following command to run it:
64 64
65 ```bash 65 ```bash
66 -wget https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/kokoro-en-v0_19.tar.bz2 66 +curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/kokoro-en-v0_19.tar.bz2
67 tar xf kokoro-en-v0_19.tar.bz2 67 tar xf kokoro-en-v0_19.tar.bz2
68 rm kokoro-en-v0_19.tar.bz2 68 rm kokoro-en-v0_19.tar.bz2
69 69
@@ -154,6 +154,22 @@ rm sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02.tar.bz2 @@ -154,6 +154,22 @@ rm sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02.tar.bz2
154 node ./test-offline-dolphin-ctc.js 154 node ./test-offline-dolphin-ctc.js
155 ``` 155 ```
156 156
  157 +## ./test-offline-nemo-canary.js
  158 +
  159 +[./test-offline-nemo-canary.js](./test-offline-nemo-canary.js) demonstrates
  160 +how to decode a file with a NeMo Canary model. In the code we use
  161 +[sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8](https://k2-fsa.github.io/sherpa/onnx/nemo/canary.html#sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8-english-spanish-german-french).
  162 +
  163 +You can use the following command to run it:
  164 +
  165 +```bash
  166 +wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8.tar.bz2
  167 +tar xvf sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8.tar.bz2
  168 +rm sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8.tar.bz2
  169 +
  170 +node ./test-offline-nemo-canary.js
  171 +```
  172 +
157 ## ./test-offline-zipformer-ctc.js 173 ## ./test-offline-zipformer-ctc.js
158 174
159 [./test-offline-zipformer-ctc.js](./test-offline-zipformer-ctc.js) demonstrates 175 [./test-offline-zipformer-ctc.js](./test-offline-zipformer-ctc.js) demonstrates
  1 +// Copyright (c) 2025 Xiaomi Corporation (authors: Fangjun Kuang)
  2 +//
  3 +const fs = require('fs');
  4 +const {Readable} = require('stream');
  5 +const wav = require('wav');
  6 +
  7 +const sherpa_onnx = require('sherpa-onnx');
  8 +
  9 +function createOfflineRecognizer() {
  10 + let config = {
  11 + modelConfig: {
  12 + canary: {
  13 + encoder:
  14 + './sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8/encoder.int8.onnx',
  15 + decoder:
  16 + './sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8/decoder.int8.onnx',
  17 + srcLang: 'en',
  18 + tgtLang: 'en',
  19 + usePnc: 1,
  20 + },
  21 + debug: 0,
  22 + tokens:
  23 + './sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8/tokens.txt',
  24 + }
  25 + };
  26 +
  27 + return sherpa_onnx.createOfflineRecognizer(config);
  28 +}
  29 +
  30 +const recognizer = createOfflineRecognizer();
  31 +let stream = recognizer.createStream();
  32 +
  33 +const waveFilename =
  34 + './sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8/test_wavs/en.wav';
  35 +const wave = sherpa_onnx.readWave(waveFilename);
  36 +stream.acceptWaveform(wave.sampleRate, wave.samples);
  37 +
  38 +recognizer.decode(stream);
  39 +let text = recognizer.getResult(stream).text;
  40 +console.log(`text in English: ${text}`);
  41 +
  42 +stream.free();
  43 +
  44 +// now output German text
  45 +recognizer.config.modelConfig.canary.tgtLang = 'de';
  46 +recognizer.setConfig(recognizer.config);
  47 +
  48 +stream = recognizer.createStream();
  49 +stream.acceptWaveform(wave.sampleRate, wave.samples);
  50 +recognizer.decode(stream);
  51 +text = recognizer.getResult(stream).text;
  52 +
  53 +console.log(`text in German: ${text}`);
  54 +
  55 +stream.free();
  56 +recognizer.free();
@@ -24,6 +24,10 @@ class OfflineRecognizer { @@ -24,6 +24,10 @@ class OfflineRecognizer {
24 return new OfflineStream(handle); 24 return new OfflineStream(handle);
25 } 25 }
26 26
  27 + setConfig(config) {
  28 + addon.offlineRecognizerSetConfig(this.handle, config);
  29 + }
  30 +
27 decode(stream) { 31 decode(stream) {
28 addon.decodeOfflineStream(this.handle, stream.handle); 32 addon.decodeOfflineStream(this.handle, stream.handle);
29 } 33 }
@@ -487,6 +487,21 @@ static sherpa_onnx::OfflineRecognizerConfig GetOfflineRecognizerConfig( @@ -487,6 +487,21 @@ static sherpa_onnx::OfflineRecognizerConfig GetOfflineRecognizerConfig(
487 recognizer_config.model_config.zipformer_ctc.model = 487 recognizer_config.model_config.zipformer_ctc.model =
488 SHERPA_ONNX_OR(config->model_config.zipformer_ctc.model, ""); 488 SHERPA_ONNX_OR(config->model_config.zipformer_ctc.model, "");
489 489
  490 + recognizer_config.model_config.canary.encoder =
  491 + SHERPA_ONNX_OR(config->model_config.canary.encoder, "");
  492 +
  493 + recognizer_config.model_config.canary.decoder =
  494 + SHERPA_ONNX_OR(config->model_config.canary.decoder, "");
  495 +
  496 + recognizer_config.model_config.canary.src_lang =
  497 + SHERPA_ONNX_OR(config->model_config.canary.src_lang, "");
  498 +
  499 + recognizer_config.model_config.canary.tgt_lang =
  500 + SHERPA_ONNX_OR(config->model_config.canary.tgt_lang, "");
  501 +
  502 + recognizer_config.model_config.canary.use_pnc =
  503 + config->model_config.canary.use_pnc;
  504 +
490 recognizer_config.lm_config.model = 505 recognizer_config.lm_config.model =
491 SHERPA_ONNX_OR(config->lm_config.model, ""); 506 SHERPA_ONNX_OR(config->lm_config.model, "");
492 recognizer_config.lm_config.scale = 507 recognizer_config.lm_config.scale =
@@ -420,6 +420,14 @@ SHERPA_ONNX_API typedef struct SherpaOnnxOfflineWhisperModelConfig { @@ -420,6 +420,14 @@ SHERPA_ONNX_API typedef struct SherpaOnnxOfflineWhisperModelConfig {
420 int32_t tail_paddings; 420 int32_t tail_paddings;
421 } SherpaOnnxOfflineWhisperModelConfig; 421 } SherpaOnnxOfflineWhisperModelConfig;
422 422
  423 +SHERPA_ONNX_API typedef struct SherpaOnnxOfflineCanaryModelConfig {
  424 + const char *encoder;
  425 + const char *decoder;
  426 + const char *src_lang;
  427 + const char *tgt_lang;
  428 + int32_t use_pnc;
  429 +} SherpaOnnxOfflineCanaryModelConfig;
  430 +
423 SHERPA_ONNX_API typedef struct SherpaOnnxOfflineFireRedAsrModelConfig { 431 SHERPA_ONNX_API typedef struct SherpaOnnxOfflineFireRedAsrModelConfig {
424 const char *encoder; 432 const char *encoder;
425 const char *decoder; 433 const char *decoder;
@@ -479,6 +487,7 @@ SHERPA_ONNX_API typedef struct SherpaOnnxOfflineModelConfig { @@ -479,6 +487,7 @@ SHERPA_ONNX_API typedef struct SherpaOnnxOfflineModelConfig {
479 SherpaOnnxOfflineFireRedAsrModelConfig fire_red_asr; 487 SherpaOnnxOfflineFireRedAsrModelConfig fire_red_asr;
480 SherpaOnnxOfflineDolphinModelConfig dolphin; 488 SherpaOnnxOfflineDolphinModelConfig dolphin;
481 SherpaOnnxOfflineZipformerCtcModelConfig zipformer_ctc; 489 SherpaOnnxOfflineZipformerCtcModelConfig zipformer_ctc;
  490 + SherpaOnnxOfflineCanaryModelConfig canary;
482 } SherpaOnnxOfflineModelConfig; 491 } SherpaOnnxOfflineModelConfig;
483 492
484 SHERPA_ONNX_API typedef struct SherpaOnnxOfflineRecognizerConfig { 493 SHERPA_ONNX_API typedef struct SherpaOnnxOfflineRecognizerConfig {
@@ -193,7 +193,7 @@ void OfflineStream::AcceptWaveform(int32_t sample_rate, const float *samples, @@ -193,7 +193,7 @@ void OfflineStream::AcceptWaveform(int32_t sample_rate, const float *samples,
193 SherpaOnnxAcceptWaveformOffline(p_, sample_rate, samples, n); 193 SherpaOnnxAcceptWaveformOffline(p_, sample_rate, samples, n);
194 } 194 }
195 195
196 -OfflineRecognizer OfflineRecognizer::Create( 196 +static SherpaOnnxOfflineRecognizerConfig Convert(
197 const OfflineRecognizerConfig &config) { 197 const OfflineRecognizerConfig &config) {
198 struct SherpaOnnxOfflineRecognizerConfig c; 198 struct SherpaOnnxOfflineRecognizerConfig c;
199 memset(&c, 0, sizeof(c)); 199 memset(&c, 0, sizeof(c));
@@ -256,6 +256,12 @@ OfflineRecognizer OfflineRecognizer::Create( @@ -256,6 +256,12 @@ OfflineRecognizer OfflineRecognizer::Create(
256 c.model_config.zipformer_ctc.model = 256 c.model_config.zipformer_ctc.model =
257 config.model_config.zipformer_ctc.model.c_str(); 257 config.model_config.zipformer_ctc.model.c_str();
258 258
  259 + c.model_config.canary.encoder = config.model_config.canary.encoder.c_str();
  260 + c.model_config.canary.decoder = config.model_config.canary.decoder.c_str();
  261 + c.model_config.canary.src_lang = config.model_config.canary.src_lang.c_str();
  262 + c.model_config.canary.tgt_lang = config.model_config.canary.tgt_lang.c_str();
  263 + c.model_config.canary.use_pnc = config.model_config.canary.use_pnc;
  264 +
259 c.lm_config.model = config.lm_config.model.c_str(); 265 c.lm_config.model = config.lm_config.model.c_str();
260 c.lm_config.scale = config.lm_config.scale; 266 c.lm_config.scale = config.lm_config.scale;
261 267
@@ -273,10 +279,22 @@ OfflineRecognizer OfflineRecognizer::Create( @@ -273,10 +279,22 @@ OfflineRecognizer OfflineRecognizer::Create(
273 c.hr.lexicon = config.hr.lexicon.c_str(); 279 c.hr.lexicon = config.hr.lexicon.c_str();
274 c.hr.rule_fsts = config.hr.rule_fsts.c_str(); 280 c.hr.rule_fsts = config.hr.rule_fsts.c_str();
275 281
  282 + return c;
  283 +}
  284 +
  285 +OfflineRecognizer OfflineRecognizer::Create(
  286 + const OfflineRecognizerConfig &config) {
  287 + auto c = Convert(config);
  288 +
276 auto p = SherpaOnnxCreateOfflineRecognizer(&c); 289 auto p = SherpaOnnxCreateOfflineRecognizer(&c);
277 return OfflineRecognizer(p); 290 return OfflineRecognizer(p);
278 } 291 }
279 292
  293 +void OfflineRecognizer::SetConfig(const OfflineRecognizerConfig &config) const {
  294 + auto c = Convert(config);
  295 + SherpaOnnxOfflineRecognizerSetConfig(p_, &c);
  296 +}
  297 +
280 OfflineRecognizer::OfflineRecognizer(const SherpaOnnxOfflineRecognizer *p) 298 OfflineRecognizer::OfflineRecognizer(const SherpaOnnxOfflineRecognizer *p)
281 : MoveOnly<OfflineRecognizer, SherpaOnnxOfflineRecognizer>(p) {} 299 : MoveOnly<OfflineRecognizer, SherpaOnnxOfflineRecognizer>(p) {}
282 300
@@ -223,6 +223,14 @@ struct SHERPA_ONNX_API OfflineWhisperModelConfig { @@ -223,6 +223,14 @@ struct SHERPA_ONNX_API OfflineWhisperModelConfig {
223 int32_t tail_paddings = -1; 223 int32_t tail_paddings = -1;
224 }; 224 };
225 225
  226 +struct SHERPA_ONNX_API OfflineCanaryModelConfig {
  227 + std::string encoder;
  228 + std::string decoder;
  229 + std::string src_lang;
  230 + std::string tgt_lang;
  231 + bool use_pnc = true;
  232 +};
  233 +
226 struct SHERPA_ONNX_API OfflineFireRedAsrModelConfig { 234 struct SHERPA_ONNX_API OfflineFireRedAsrModelConfig {
227 std::string encoder; 235 std::string encoder;
228 std::string decoder; 236 std::string decoder;
@@ -273,6 +281,7 @@ struct SHERPA_ONNX_API OfflineModelConfig { @@ -273,6 +281,7 @@ struct SHERPA_ONNX_API OfflineModelConfig {
273 OfflineFireRedAsrModelConfig fire_red_asr; 281 OfflineFireRedAsrModelConfig fire_red_asr;
274 OfflineDolphinModelConfig dolphin; 282 OfflineDolphinModelConfig dolphin;
275 OfflineZipformerCtcModelConfig zipformer_ctc; 283 OfflineZipformerCtcModelConfig zipformer_ctc;
  284 + OfflineCanaryModelConfig canary;
276 }; 285 };
277 286
278 struct SHERPA_ONNX_API OfflineLMConfig { 287 struct SHERPA_ONNX_API OfflineLMConfig {
@@ -335,6 +344,8 @@ class SHERPA_ONNX_API OfflineRecognizer @@ -335,6 +344,8 @@ class SHERPA_ONNX_API OfflineRecognizer
335 344
336 OfflineRecognizerResult GetResult(const OfflineStream *s) const; 345 OfflineRecognizerResult GetResult(const OfflineStream *s) const;
337 346
  347 + void SetConfig(const OfflineRecognizerConfig &config) const;
  348 +
338 private: 349 private:
339 explicit OfflineRecognizer(const SherpaOnnxOfflineRecognizer *p); 350 explicit OfflineRecognizer(const SherpaOnnxOfflineRecognizer *p);
340 }; 351 };
@@ -45,7 +45,7 @@ Usage: @@ -45,7 +45,7 @@ Usage:
45 45
46 ./bin/sherpa-onnx \ 46 ./bin/sherpa-onnx \
47 --debug=1 \ 47 --debug=1 \
48 - --zipformer2-ctc-model=./sherpa-onnx-streaming-zipformer-ctc-multi-zh-hans-2023-12-13/ctc-epoch-20-avg-1-chunk-16-left-128.int8.onnx \ 48 + --zipformer2-ctc-model=./sherpa-onnx-streaming-zipformer-ctc-multi-zh-hans-2023-12-13/ctc-epoch-20-avg-1-chunk-16-left-128.onnx \
49 --tokens=./sherpa-onnx-streaming-zipformer-ctc-multi-zh-hans-2023-12-13/tokens.txt \ 49 --tokens=./sherpa-onnx-streaming-zipformer-ctc-multi-zh-hans-2023-12-13/tokens.txt \
50 ./sherpa-onnx-streaming-zipformer-ctc-multi-zh-hans-2023-12-13/test_wavs/DEV_T0000000000.wav \ 50 ./sherpa-onnx-streaming-zipformer-ctc-multi-zh-hans-2023-12-13/test_wavs/DEV_T0000000000.wav \
51 ./sherpa-onnx-streaming-zipformer-ctc-multi-zh-hans-2023-12-13/test_wavs/DEV_T0000000001.wav \ 51 ./sherpa-onnx-streaming-zipformer-ctc-multi-zh-hans-2023-12-13/test_wavs/DEV_T0000000001.wav \
@@ -12,7 +12,6 @@ set(exported_functions @@ -12,7 +12,6 @@ set(exported_functions
12 SherpaOnnxCreateOnlineRecognizer 12 SherpaOnnxCreateOnlineRecognizer
13 SherpaOnnxCreateOnlineStream 13 SherpaOnnxCreateOnlineStream
14 SherpaOnnxDecodeOnlineStream 14 SherpaOnnxDecodeOnlineStream
15 - SherpaOnnxDestroyOfflineStreamResultJson  
16 SherpaOnnxDestroyOnlineRecognizer 15 SherpaOnnxDestroyOnlineRecognizer
17 SherpaOnnxDestroyOnlineRecognizerResult 16 SherpaOnnxDestroyOnlineRecognizerResult
18 SherpaOnnxDestroyOnlineStream 17 SherpaOnnxDestroyOnlineStream
@@ -59,6 +59,10 @@ function freeConfig(config, Module) { @@ -59,6 +59,10 @@ function freeConfig(config, Module) {
59 freeConfig(config.senseVoice, Module) 59 freeConfig(config.senseVoice, Module)
60 } 60 }
61 61
  62 + if ('canary' in config) {
  63 + freeConfig(config.canary, Module)
  64 + }
  65 +
62 if ('lm' in config) { 66 if ('lm' in config) {
63 freeConfig(config.lm, Module) 67 freeConfig(config.lm, Module)
64 } 68 }
@@ -246,7 +250,7 @@ function initSherpaOnnxOnlineModelConfig(config, Module) { @@ -246,7 +250,7 @@ function initSherpaOnnxOnlineModelConfig(config, Module) {
246 Module.setValue(ptr + offset, buffer + tokensLen, 'i8*'); // provider 250 Module.setValue(ptr + offset, buffer + tokensLen, 'i8*'); // provider
247 offset += 4; 251 offset += 4;
248 252
249 - Module.setValue(ptr + offset, config.debug || 0, 'i32'); 253 + Module.setValue(ptr + offset, config.debug ?? 1, 'i32');
250 offset += 4; 254 offset += 4;
251 255
252 Module.setValue( 256 Module.setValue(
@@ -692,6 +696,51 @@ function initSherpaOnnxOfflineWhisperModelConfig(config, Module) { @@ -692,6 +696,51 @@ function initSherpaOnnxOfflineWhisperModelConfig(config, Module) {
692 } 696 }
693 } 697 }
694 698
  699 +function initSherpaOnnxOfflineCanaryModelConfig(config, Module) {
  700 + const encoderLen = Module.lengthBytesUTF8(config.encoder || '') + 1;
  701 + const decoderLen = Module.lengthBytesUTF8(config.decoder || '') + 1;
  702 + const srcLangLen = Module.lengthBytesUTF8(config.srcLang || '') + 1;
  703 + const tgtLangLen = Module.lengthBytesUTF8(config.tgtLang || '') + 1;
  704 +
  705 + const n = encoderLen + decoderLen + srcLangLen + tgtLangLen;
  706 + const buffer = Module._malloc(n);
  707 +
  708 + const len = 5 * 4; // 4 pointers + 1 int32
  709 + const ptr = Module._malloc(len);
  710 +
  711 + let offset = 0;
  712 + Module.stringToUTF8(config.encoder || '', buffer + offset, encoderLen);
  713 + offset += encoderLen;
  714 +
  715 + Module.stringToUTF8(config.decoder || '', buffer + offset, decoderLen);
  716 + offset += decoderLen;
  717 +
  718 + Module.stringToUTF8(config.srcLang || '', buffer + offset, srcLangLen);
  719 + offset += srcLangLen;
  720 +
  721 + Module.stringToUTF8(config.tgtLang || '', buffer + offset, tgtLangLen);
  722 + offset += tgtLangLen;
  723 +
  724 + offset = 0;
  725 + Module.setValue(ptr, buffer + offset, 'i8*');
  726 + offset += encoderLen;
  727 +
  728 + Module.setValue(ptr + 4, buffer + offset, 'i8*');
  729 + offset += decoderLen;
  730 +
  731 + Module.setValue(ptr + 8, buffer + offset, 'i8*');
  732 + offset += srcLangLen;
  733 +
  734 + Module.setValue(ptr + 12, buffer + offset, 'i8*');
  735 + offset += tgtLangLen;
  736 +
  737 + Module.setValue(ptr + 16, config.usePnc ?? 1, 'i32');
  738 +
  739 + return {
  740 + buffer: buffer, ptr: ptr, len: len,
  741 + }
  742 +}
  743 +
695 function initSherpaOnnxOfflineMoonshineModelConfig(config, Module) { 744 function initSherpaOnnxOfflineMoonshineModelConfig(config, Module) {
696 const preprocessorLen = Module.lengthBytesUTF8(config.preprocessor || '') + 1; 745 const preprocessorLen = Module.lengthBytesUTF8(config.preprocessor || '') + 1;
697 const encoderLen = Module.lengthBytesUTF8(config.encoder || '') + 1; 746 const encoderLen = Module.lengthBytesUTF8(config.encoder || '') + 1;
@@ -811,7 +860,7 @@ function initSherpaOnnxOfflineSenseVoiceModelConfig(config, Module) { @@ -811,7 +860,7 @@ function initSherpaOnnxOfflineSenseVoiceModelConfig(config, Module) {
811 Module.setValue(ptr + 4, buffer + offset, 'i8*'); 860 Module.setValue(ptr + 4, buffer + offset, 'i8*');
812 offset += languageLen; 861 offset += languageLen;
813 862
814 - Module.setValue(ptr + 8, config.useInverseTextNormalization || 0, 'i32'); 863 + Module.setValue(ptr + 8, config.useInverseTextNormalization ?? 0, 'i32');
815 864
816 return { 865 return {
817 buffer: buffer, ptr: ptr, len: len, 866 buffer: buffer, ptr: ptr, len: len,
@@ -907,6 +956,16 @@ function initSherpaOnnxOfflineModelConfig(config, Module) { @@ -907,6 +956,16 @@ function initSherpaOnnxOfflineModelConfig(config, Module) {
907 }; 956 };
908 } 957 }
909 958
  959 + if (!('canary' in config)) {
  960 + config.canary = {
  961 + encoder: '',
  962 + decoder: '',
  963 + srcLang: '',
  964 + tgtLang: '',
  965 + usePnc: 1,
  966 + };
  967 + }
  968 +
910 const transducer = 969 const transducer =
911 initSherpaOnnxOfflineTransducerModelConfig(config.transducer, Module); 970 initSherpaOnnxOfflineTransducerModelConfig(config.transducer, Module);
912 971
@@ -936,9 +995,11 @@ function initSherpaOnnxOfflineModelConfig(config, Module) { @@ -936,9 +995,11 @@ function initSherpaOnnxOfflineModelConfig(config, Module) {
936 const zipformerCtc = 995 const zipformerCtc =
937 initSherpaOnnxOfflineZipformerCtcModelConfig(config.zipformerCtc, Module); 996 initSherpaOnnxOfflineZipformerCtcModelConfig(config.zipformerCtc, Module);
938 997
  998 + const canary = initSherpaOnnxOfflineCanaryModelConfig(config.canary, Module);
  999 +
939 const len = transducer.len + paraformer.len + nemoCtc.len + whisper.len + 1000 const len = transducer.len + paraformer.len + nemoCtc.len + whisper.len +
940 tdnn.len + 8 * 4 + senseVoice.len + moonshine.len + fireRedAsr.len + 1001 tdnn.len + 8 * 4 + senseVoice.len + moonshine.len + fireRedAsr.len +
941 - dolphin.len + zipformerCtc.len; 1002 + dolphin.len + zipformerCtc.len + canary.len;
942 1003
943 const ptr = Module._malloc(len); 1004 const ptr = Module._malloc(len);
944 1005
@@ -1000,7 +1061,7 @@ function initSherpaOnnxOfflineModelConfig(config, Module) { @@ -1000,7 +1061,7 @@ function initSherpaOnnxOfflineModelConfig(config, Module) {
1000 Module.setValue(ptr + offset, config.numThreads || 1, 'i32'); 1061 Module.setValue(ptr + offset, config.numThreads || 1, 'i32');
1001 offset += 4; 1062 offset += 4;
1002 1063
1003 - Module.setValue(ptr + offset, config.debug || 0, 'i32'); 1064 + Module.setValue(ptr + offset, config.debug ?? 1, 'i32');
1004 offset += 4; 1065 offset += 4;
1005 1066
1006 Module.setValue(ptr + offset, buffer + tokensLen, 'i8*'); // provider 1067 Module.setValue(ptr + offset, buffer + tokensLen, 'i8*'); // provider
@@ -1043,11 +1104,14 @@ function initSherpaOnnxOfflineModelConfig(config, Module) { @@ -1043,11 +1104,14 @@ function initSherpaOnnxOfflineModelConfig(config, Module) {
1043 Module._CopyHeap(zipformerCtc.ptr, zipformerCtc.len, ptr + offset); 1104 Module._CopyHeap(zipformerCtc.ptr, zipformerCtc.len, ptr + offset);
1044 offset += zipformerCtc.len; 1105 offset += zipformerCtc.len;
1045 1106
  1107 + Module._CopyHeap(canary.ptr, canary.len, ptr + offset);
  1108 + offset += canary.len;
  1109 +
1046 return { 1110 return {
1047 buffer: buffer, ptr: ptr, len: len, transducer: transducer, 1111 buffer: buffer, ptr: ptr, len: len, transducer: transducer,
1048 paraformer: paraformer, nemoCtc: nemoCtc, whisper: whisper, tdnn: tdnn, 1112 paraformer: paraformer, nemoCtc: nemoCtc, whisper: whisper, tdnn: tdnn,
1049 senseVoice: senseVoice, moonshine: moonshine, fireRedAsr: fireRedAsr, 1113 senseVoice: senseVoice, moonshine: moonshine, fireRedAsr: fireRedAsr,
1050 - dolphin: dolphin, zipformerCtc: zipformerCtc 1114 + dolphin: dolphin, zipformerCtc: zipformerCtc, canary: canary,
1051 } 1115 }
1052 } 1116 }
1053 1117
@@ -1189,6 +1253,13 @@ class OfflineRecognizer { @@ -1189,6 +1253,13 @@ class OfflineRecognizer {
1189 this.Module = Module; 1253 this.Module = Module;
1190 } 1254 }
1191 1255
  1256 + setConfig(configObj) {
  1257 + const config =
  1258 + initSherpaOnnxOfflineRecognizerConfig(configObj, this.Module);
  1259 + this.Module._SherpaOnnxOfflineRecognizerSetConfig(this.handle, config.ptr);
  1260 + freeConfig(config, this.Module);
  1261 + }
  1262 +
1192 free() { 1263 free() {
1193 this.Module._SherpaOnnxDestroyOfflineRecognizer(this.handle); 1264 this.Module._SherpaOnnxDestroyOfflineRecognizer(this.handle);
1194 this.handle = 0 1265 this.handle = 0
@@ -41,6 +41,7 @@ set(exported_functions @@ -41,6 +41,7 @@ set(exported_functions
41 SherpaOnnxDestroyOfflineStreamResultJson 41 SherpaOnnxDestroyOfflineStreamResultJson
42 SherpaOnnxGetOfflineStreamResult 42 SherpaOnnxGetOfflineStreamResult
43 SherpaOnnxGetOfflineStreamResultAsJson 43 SherpaOnnxGetOfflineStreamResultAsJson
  44 + SherpaOnnxOfflineRecognizerSetConfig
44 # online kws 45 # online kws
45 SherpaOnnxCreateKeywordSpotter 46 SherpaOnnxCreateKeywordSpotter
46 SherpaOnnxCreateKeywordStream 47 SherpaOnnxCreateKeywordStream
@@ -21,6 +21,7 @@ static_assert(sizeof(SherpaOnnxOfflineFireRedAsrModelConfig) == 2 * 4, ""); @@ -21,6 +21,7 @@ static_assert(sizeof(SherpaOnnxOfflineFireRedAsrModelConfig) == 2 * 4, "");
21 static_assert(sizeof(SherpaOnnxOfflineMoonshineModelConfig) == 4 * 4, ""); 21 static_assert(sizeof(SherpaOnnxOfflineMoonshineModelConfig) == 4 * 4, "");
22 static_assert(sizeof(SherpaOnnxOfflineTdnnModelConfig) == 4, ""); 22 static_assert(sizeof(SherpaOnnxOfflineTdnnModelConfig) == 4, "");
23 static_assert(sizeof(SherpaOnnxOfflineSenseVoiceModelConfig) == 3 * 4, ""); 23 static_assert(sizeof(SherpaOnnxOfflineSenseVoiceModelConfig) == 3 * 4, "");
  24 +static_assert(sizeof(SherpaOnnxOfflineCanaryModelConfig) == 5 * 4, "");
24 static_assert(sizeof(SherpaOnnxOfflineLMConfig) == 2 * 4, ""); 25 static_assert(sizeof(SherpaOnnxOfflineLMConfig) == 2 * 4, "");
25 26
26 static_assert(sizeof(SherpaOnnxOfflineModelConfig) == 27 static_assert(sizeof(SherpaOnnxOfflineModelConfig) ==
@@ -33,7 +34,8 @@ static_assert(sizeof(SherpaOnnxOfflineModelConfig) == @@ -33,7 +34,8 @@ static_assert(sizeof(SherpaOnnxOfflineModelConfig) ==
33 sizeof(SherpaOnnxOfflineMoonshineModelConfig) + 34 sizeof(SherpaOnnxOfflineMoonshineModelConfig) +
34 sizeof(SherpaOnnxOfflineFireRedAsrModelConfig) + 35 sizeof(SherpaOnnxOfflineFireRedAsrModelConfig) +
35 sizeof(SherpaOnnxOfflineDolphinModelConfig) + 36 sizeof(SherpaOnnxOfflineDolphinModelConfig) +
36 - sizeof(SherpaOnnxOfflineZipformerCtcModelConfig), 37 + sizeof(SherpaOnnxOfflineZipformerCtcModelConfig) +
  38 + sizeof(SherpaOnnxOfflineCanaryModelConfig),
37 39
38 ""); 40 "");
39 static_assert(sizeof(SherpaOnnxFeatureConfig) == 2 * 4, ""); 41 static_assert(sizeof(SherpaOnnxFeatureConfig) == 2 * 4, "");
@@ -80,6 +82,7 @@ void PrintOfflineRecognizerConfig(SherpaOnnxOfflineRecognizerConfig *config) { @@ -80,6 +82,7 @@ void PrintOfflineRecognizerConfig(SherpaOnnxOfflineRecognizerConfig *config) {
80 auto fire_red_asr = &model_config->fire_red_asr; 82 auto fire_red_asr = &model_config->fire_red_asr;
81 auto dolphin = &model_config->dolphin; 83 auto dolphin = &model_config->dolphin;
82 auto zipformer_ctc = &model_config->zipformer_ctc; 84 auto zipformer_ctc = &model_config->zipformer_ctc;
  85 + auto canary = &model_config->canary;
83 86
84 fprintf(stdout, "----------offline transducer model config----------\n"); 87 fprintf(stdout, "----------offline transducer model config----------\n");
85 fprintf(stdout, "encoder: %s\n", transducer->encoder); 88 fprintf(stdout, "encoder: %s\n", transducer->encoder);
@@ -123,6 +126,13 @@ void PrintOfflineRecognizerConfig(SherpaOnnxOfflineRecognizerConfig *config) { @@ -123,6 +126,13 @@ void PrintOfflineRecognizerConfig(SherpaOnnxOfflineRecognizerConfig *config) {
123 fprintf(stdout, "----------offline zipformer ctc model config----------\n"); 126 fprintf(stdout, "----------offline zipformer ctc model config----------\n");
124 fprintf(stdout, "model: %s\n", zipformer_ctc->model); 127 fprintf(stdout, "model: %s\n", zipformer_ctc->model);
125 128
  129 + fprintf(stdout, "----------offline NeMo Canary model config----------\n");
  130 + fprintf(stdout, "encoder: %s\n", canary->encoder);
  131 + fprintf(stdout, "decoder: %s\n", canary->decoder);
  132 + fprintf(stdout, "src_lang: %s\n", canary->src_lang);
  133 + fprintf(stdout, "tgt_lang: %s\n", canary->tgt_lang);
  134 + fprintf(stdout, "use_pnc: %d\n", canary->use_pnc);
  135 +
126 fprintf(stdout, "tokens: %s\n", model_config->tokens); 136 fprintf(stdout, "tokens: %s\n", model_config->tokens);
127 fprintf(stdout, "num_threads: %d\n", model_config->num_threads); 137 fprintf(stdout, "num_threads: %d\n", model_config->num_threads);
128 fprintf(stdout, "provider: %s\n", model_config->provider); 138 fprintf(stdout, "provider: %s\n", model_config->provider);