Fangjun Kuang
Committed by GitHub

Add Java and Kotlin API for NeMo Canary models (#2359)

Add support for the NeMo Canary model in both Java and Kotlin APIs, wiring it through
JNI and updating examples and CI.

- Introduce OfflineCanaryModelConfig in Kotlin and Java with builder patterns
- Extend OfflineRecognizer to accept and apply the new canary config via setConfig
- Update JNI binding (GetOfflineConfig) and getOfflineModelConfig mapping (type 32), 
   plus examples and CI workflows
@@ -117,6 +117,13 @@ jobs: @@ -117,6 +117,13 @@ jobs:
117 cd ./java-api-examples 117 cd ./java-api-examples
118 ./run-version-test.sh 118 ./run-version-test.sh
119 119
  120 + - name: Run java test (Nemo Canary)
  121 + shell: bash
  122 + run: |
  123 + cd ./java-api-examples
  124 + ./run-non-streaming-decode-file-nemo-canary.sh
  125 + rm -rf sherpa-onnx-nemo-*
  126 +
120 - name: Run java test (Non-streaming SenseVoice with homophone replacer) 127 - name: Run java test (Non-streaming SenseVoice with homophone replacer)
121 shell: bash 128 shell: bash
122 run: | 129 run: |
  1 +// Copyright 2024 Xiaomi Corporation
  2 +
  3 +// This file shows how to use an offline NeMo Canary model, i.e.,
  4 +// non-streaming NeMo Canary model, to decode files.
  5 +import com.k2fsa.sherpa.onnx.*;
  6 +
  7 +public class NonStreamingDecodeFileNemoCanary {
  8 + public static void main(String[] args) {
  9 + // please refer to
  10 + // https://k2-fsa.github.io/sherpa/onnx/nemo/canary.html
  11 + // to download model files
  12 + String encoder = "./sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8/encoder.int8.onnx";
  13 + String decoder = "./sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8/decoder.int8.onnx";
  14 + String tokens = "./sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8/tokens.txt";
  15 +
  16 + String waveFilename = "./sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8/test_wavs/en.wav";
  17 +
  18 + WaveReader reader = new WaveReader(waveFilename);
  19 +
  20 + OfflineCanaryModelConfig canary =
  21 + OfflineCanaryModelConfig.builder()
  22 + .setEncoder(encoder)
  23 + .setDecoder(decoder)
  24 + .setSrcLang("en")
  25 + .setTgtLang("en")
  26 + .setUsePnc(true)
  27 + .build();
  28 +
  29 + OfflineModelConfig modelConfig =
  30 + OfflineModelConfig.builder()
  31 + .setCanary(canary)
  32 + .setTokens(tokens)
  33 + .setNumThreads(1)
  34 + .setDebug(true)
  35 + .build();
  36 +
  37 + OfflineRecognizerConfig config =
  38 + OfflineRecognizerConfig.builder()
  39 + .setOfflineModelConfig(modelConfig)
  40 + .setDecodingMethod("greedy_search")
  41 + .build();
  42 +
  43 + OfflineRecognizer recognizer = new OfflineRecognizer(config);
  44 + OfflineStream stream = recognizer.createStream();
  45 + stream.acceptWaveform(reader.getSamples(), reader.getSampleRate());
  46 +
  47 + recognizer.decode(stream);
  48 +
  49 + String text = recognizer.getResult(stream).getText();
  50 +
  51 + System.out.printf("filename:%s\nresult(English):%s\n", waveFilename, text);
  52 +
  53 + stream.release();
  54 + recognizer.release();
  55 + }
  56 +}
@@ -24,11 +24,18 @@ This directory contains examples for the JAVA API of sherpa-onnx. @@ -24,11 +24,18 @@ This directory contains examples for the JAVA API of sherpa-onnx.
24 24
25 ```bash 25 ```bash
26 ./run-non-streaming-decode-file-dolphin-ctc.sh 26 ./run-non-streaming-decode-file-dolphin-ctc.sh
  27 +./run-non-streaming-decode-file-fire-red-asr.sh
  28 +./run-non-streaming-decode-file-moonshine.sh
  29 +./run-non-streaming-decode-file-nemo-canary.sh
  30 +./run-non-streaming-decode-file-nemo.sh
27 ./run-non-streaming-decode-file-paraformer.sh 31 ./run-non-streaming-decode-file-paraformer.sh
28 ./run-non-streaming-decode-file-sense-voice.sh 32 ./run-non-streaming-decode-file-sense-voice.sh
  33 +./run-non-streaming-decode-file-tele-speech-ctc.sh
  34 +./run-non-streaming-decode-file-transducer-hotwords.sh
29 ./run-non-streaming-decode-file-transducer.sh 35 ./run-non-streaming-decode-file-transducer.sh
  36 +./run-non-streaming-decode-file-whisper-multiple.sh
30 ./run-non-streaming-decode-file-whisper.sh 37 ./run-non-streaming-decode-file-whisper.sh
31 -./run-non-streaming-decode-file-nemo.sh 38 +./run-non-streaming-decode-file-zipformer-ctc.sh
32 ``` 39 ```
33 40
34 ## Non-Streaming Speech recognition with homophone replacer 41 ## Non-Streaming Speech recognition with homophone replacer
  1 +#!/usr/bin/env bash
  2 +
  3 +set -ex
  4 +
  5 +if [[ ! -f ../build/lib/libsherpa-onnx-jni.dylib && ! -f ../build/lib/libsherpa-onnx-jni.so ]]; then
  6 + mkdir -p ../build
  7 + pushd ../build
  8 + cmake \
  9 + -DSHERPA_ONNX_ENABLE_PYTHON=OFF \
  10 + -DSHERPA_ONNX_ENABLE_TESTS=OFF \
  11 + -DSHERPA_ONNX_ENABLE_CHECK=OFF \
  12 + -DBUILD_SHARED_LIBS=ON \
  13 + -DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \
  14 + -DSHERPA_ONNX_ENABLE_JNI=ON \
  15 + ..
  16 +
  17 + make -j4
  18 + ls -lh lib
  19 + popd
  20 +fi
  21 +
  22 +if [ ! -f ../sherpa-onnx/java-api/build/sherpa-onnx.jar ]; then
  23 + pushd ../sherpa-onnx/java-api
  24 + make
  25 + popd
  26 +fi
  27 +
  28 +if [ ! -f sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8/encoder.int8.onnx ]; then
  29 + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8.tar.bz2
  30 + tar xvf sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8.tar.bz2
  31 + rm sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8.tar.bz2
  32 +fi
  33 +
  34 +java \
  35 + -Djava.library.path=$PWD/../build/lib \
  36 + -cp ../sherpa-onnx/java-api/build/sherpa-onnx.jar \
  37 + NonStreamingDecodeFileNemoCanary.java
@@ -455,8 +455,31 @@ function testOfflineSenseVoiceWithHr() { @@ -455,8 +455,31 @@ function testOfflineSenseVoiceWithHr() {
455 ls -lh $out_filename 455 ls -lh $out_filename
456 java -Djava.library.path=../build/lib -jar $out_filename 456 java -Djava.library.path=../build/lib -jar $out_filename
457 } 457 }
458 -testVersion  
459 458
  459 +function testOfflineNeMoCanary() {
  460 + if [ ! -f sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8/encoder.int8.onnx ]; then
  461 + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8.tar.bz2
  462 + tar xvf sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8.tar.bz2
  463 + rm sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8.tar.bz2
  464 + fi
  465 +
  466 + out_filename=test_offline_nemo_canary.jar
  467 + kotlinc-jvm -include-runtime -d $out_filename \
  468 + test_offline_nemo_canary.kt \
  469 + FeatureConfig.kt \
  470 + HomophoneReplacerConfig.kt \
  471 + OfflineRecognizer.kt \
  472 + OfflineStream.kt \
  473 + WaveReader.kt \
  474 + faked-asset-manager.kt
  475 +
  476 + ls -lh $out_filename
  477 + java -Djava.library.path=../build/lib -jar $out_filename
  478 +}
  479 +
  480 +# testVersion
  481 +
  482 +testOfflineNeMoCanary
460 testOfflineSenseVoiceWithHr 483 testOfflineSenseVoiceWithHr
461 testOfflineSpeechDenoiser 484 testOfflineSpeechDenoiser
462 testOfflineSpeakerDiarization 485 testOfflineSpeakerDiarization
  1 +package com.k2fsa.sherpa.onnx
  2 +
  3 +fun main() {
  4 + val recognizer = createOfflineRecognizer()
  5 + val waveFilename = "./sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8/test_wavs/en.wav"
  6 +
  7 + val objArray = WaveReader.readWaveFromFile(
  8 + filename = waveFilename,
  9 + )
  10 + val samples: FloatArray = objArray[0] as FloatArray
  11 + val sampleRate: Int = objArray[1] as Int
  12 +
  13 + var stream = recognizer.createStream()
  14 + stream.acceptWaveform(samples, sampleRate=sampleRate)
  15 + recognizer.decode(stream)
  16 +
  17 + var result = recognizer.getResult(stream)
  18 + println("English: $result")
  19 +
  20 + stream.release()
  21 +
  22 + // now output text in German
  23 + val config = recognizer.config.copy(modelConfig=recognizer.config.modelConfig.copy(
  24 + canary=recognizer.config.modelConfig.canary.copy(
  25 + tgtLang="de"
  26 + )
  27 + ))
  28 + recognizer.setConfig(config)
  29 +
  30 + stream = recognizer.createStream()
  31 + stream.acceptWaveform(samples, sampleRate=sampleRate)
  32 + recognizer.decode(stream)
  33 +
  34 + result = recognizer.getResult(stream)
  35 + println("German: $result")
  36 +
  37 + stream.release()
  38 + recognizer.release()
  39 +}
  40 +
  41 +
  42 +fun createOfflineRecognizer(): OfflineRecognizer {
  43 + val config = OfflineRecognizerConfig(
  44 + modelConfig = getOfflineModelConfig(type = 32)!!,
  45 + )
  46 +
  47 + return OfflineRecognizer(config = config)
  48 +}
@@ -34,6 +34,7 @@ java_files += OfflineFireRedAsrModelConfig.java @@ -34,6 +34,7 @@ java_files += OfflineFireRedAsrModelConfig.java
34 java_files += OfflineMoonshineModelConfig.java 34 java_files += OfflineMoonshineModelConfig.java
35 java_files += OfflineNemoEncDecCtcModelConfig.java 35 java_files += OfflineNemoEncDecCtcModelConfig.java
36 java_files += OfflineZipformerCtcModelConfig.java 36 java_files += OfflineZipformerCtcModelConfig.java
  37 +java_files += OfflineCanaryModelConfig.java
37 java_files += OfflineSenseVoiceModelConfig.java 38 java_files += OfflineSenseVoiceModelConfig.java
38 java_files += OfflineDolphinModelConfig.java 39 java_files += OfflineDolphinModelConfig.java
39 java_files += OfflineModelConfig.java 40 java_files += OfflineModelConfig.java
  1 +// Copyright 2025 Xiaomi Corporation
  2 +
  3 +package com.k2fsa.sherpa.onnx;
  4 +
  5 +public class OfflineCanaryModelConfig {
  6 + private final String encoder;
  7 + private final String decoder;
  8 + private final String srcLang;
  9 + private final String tgtLang;
  10 + private final boolean usePnc;
  11 +
  12 + private OfflineCanaryModelConfig(Builder builder) {
  13 + this.encoder = builder.encoder;
  14 + this.decoder = builder.decoder;
  15 + this.srcLang = builder.srcLang;
  16 + this.tgtLang = builder.tgtLang;
  17 + this.usePnc = builder.usePnc;
  18 + }
  19 +
  20 + public static Builder builder() {
  21 + return new Builder();
  22 + }
  23 +
  24 + public String getEncoder() {
  25 + return encoder;
  26 + }
  27 +
  28 + public String getDecoder() {
  29 + return decoder;
  30 + }
  31 +
  32 + public String getSrcLang() {
  33 + return srcLang;
  34 + }
  35 +
  36 + public String getTgtLang() {
  37 + return tgtLang;
  38 + }
  39 +
  40 + public boolean isUsePnc() {
  41 + return usePnc;
  42 + }
  43 +
  44 + public static class Builder {
  45 + private String encoder = "";
  46 + private String decoder = "";
  47 + private String srcLang = "en";
  48 + private String tgtLang = "en";
  49 + private boolean usePnc = true;
  50 +
  51 + public OfflineCanaryModelConfig build() {
  52 + return new OfflineCanaryModelConfig(this);
  53 + }
  54 +
  55 + public Builder setEncoder(String encoder) {
  56 + this.encoder = encoder;
  57 + return this;
  58 + }
  59 +
  60 + public Builder setDecoder(String decoder) {
  61 + this.decoder = decoder;
  62 + return this;
  63 + }
  64 +
  65 + public Builder setSrcLang(String srcLang) {
  66 + this.srcLang = srcLang;
  67 + return this;
  68 + }
  69 +
  70 + public Builder setTgtLang(String tgtLang) {
  71 + this.tgtLang = tgtLang;
  72 + return this;
  73 + }
  74 +
  75 + public Builder setUsePnc(boolean usePnc) {
  76 + this.usePnc = usePnc;
  77 + return this;
  78 + }
  79 + }
  80 +}
@@ -12,6 +12,7 @@ public class OfflineModelConfig { @@ -12,6 +12,7 @@ public class OfflineModelConfig {
12 private final OfflineSenseVoiceModelConfig senseVoice; 12 private final OfflineSenseVoiceModelConfig senseVoice;
13 private final OfflineDolphinModelConfig dolphin; 13 private final OfflineDolphinModelConfig dolphin;
14 private final OfflineZipformerCtcModelConfig zipformerCtc; 14 private final OfflineZipformerCtcModelConfig zipformerCtc;
  15 + private final OfflineCanaryModelConfig canary;
15 private final String teleSpeech; 16 private final String teleSpeech;
16 private final String tokens; 17 private final String tokens;
17 private final int numThreads; 18 private final int numThreads;
@@ -30,6 +31,7 @@ public class OfflineModelConfig { @@ -30,6 +31,7 @@ public class OfflineModelConfig {
30 this.moonshine = builder.moonshine; 31 this.moonshine = builder.moonshine;
31 this.nemo = builder.nemo; 32 this.nemo = builder.nemo;
32 this.zipformerCtc = builder.zipformerCtc; 33 this.zipformerCtc = builder.zipformerCtc;
  34 + this.canary = builder.canary;
33 this.senseVoice = builder.senseVoice; 35 this.senseVoice = builder.senseVoice;
34 this.dolphin = builder.dolphin; 36 this.dolphin = builder.dolphin;
35 this.teleSpeech = builder.teleSpeech; 37 this.teleSpeech = builder.teleSpeech;
@@ -78,6 +80,10 @@ public class OfflineModelConfig { @@ -78,6 +80,10 @@ public class OfflineModelConfig {
78 return zipformerCtc; 80 return zipformerCtc;
79 } 81 }
80 82
  83 + public OfflineCanaryModelConfig getCanary() {
  84 + return canary;
  85 + }
  86 +
81 public String getTokens() { 87 public String getTokens() {
82 return tokens; 88 return tokens;
83 } 89 }
@@ -120,6 +126,7 @@ public class OfflineModelConfig { @@ -120,6 +126,7 @@ public class OfflineModelConfig {
120 private OfflineSenseVoiceModelConfig senseVoice = OfflineSenseVoiceModelConfig.builder().build(); 126 private OfflineSenseVoiceModelConfig senseVoice = OfflineSenseVoiceModelConfig.builder().build();
121 private OfflineDolphinModelConfig dolphin = OfflineDolphinModelConfig.builder().build(); 127 private OfflineDolphinModelConfig dolphin = OfflineDolphinModelConfig.builder().build();
122 private OfflineZipformerCtcModelConfig zipformerCtc = OfflineZipformerCtcModelConfig.builder().build(); 128 private OfflineZipformerCtcModelConfig zipformerCtc = OfflineZipformerCtcModelConfig.builder().build();
  129 + private OfflineCanaryModelConfig canary = OfflineCanaryModelConfig.builder().build();
123 private String teleSpeech = ""; 130 private String teleSpeech = "";
124 private String tokens = ""; 131 private String tokens = "";
125 private int numThreads = 1; 132 private int numThreads = 1;
@@ -158,6 +165,11 @@ public class OfflineModelConfig { @@ -158,6 +165,11 @@ public class OfflineModelConfig {
158 return this; 165 return this;
159 } 166 }
160 167
  168 + public Builder setCanary(OfflineCanaryModelConfig canary) {
  169 + this.canary = canary;
  170 + return this;
  171 + }
  172 +
161 public Builder setTeleSpeech(String teleSpeech) { 173 public Builder setTeleSpeech(String teleSpeech) {
162 this.teleSpeech = teleSpeech; 174 this.teleSpeech = teleSpeech;
163 return this; 175 return this;
@@ -4,10 +4,22 @@ package com.k2fsa.sherpa.onnx; @@ -4,10 +4,22 @@ package com.k2fsa.sherpa.onnx;
4 4
5 public class OfflineRecognizer { 5 public class OfflineRecognizer {
6 private long ptr = 0; 6 private long ptr = 0;
  7 + private final OfflineRecognizerConfig config;
7 8
8 public OfflineRecognizer(OfflineRecognizerConfig config) { 9 public OfflineRecognizer(OfflineRecognizerConfig config) {
9 LibraryLoader.maybeLoad(); 10 LibraryLoader.maybeLoad();
10 ptr = newFromFile(config); 11 ptr = newFromFile(config);
  12 +
  13 + this.config = config;
  14 + }
  15 +
  16 + public void setConfig(OfflineRecognizerConfig config) {
  17 + setConfig(ptr, config);
  18 + // we don't update this.config
  19 + }
  20 +
  21 + public OfflineRecognizerConfig getConfig() {
  22 + return config;
11 } 23 }
12 24
13 public void decode(OfflineStream s) { 25 public void decode(OfflineStream s) {
@@ -60,6 +72,8 @@ public class OfflineRecognizer { @@ -60,6 +72,8 @@ public class OfflineRecognizer {
60 72
61 private native void decode(long ptr, long streamPtr); 73 private native void decode(long ptr, long streamPtr);
62 74
  75 + private native void setConfig(long ptr, OfflineRecognizerConfig config);
  76 +
63 private native void decodeStreams(long ptr, long[] streamPtrs); 77 private native void decodeStreams(long ptr, long[] streamPtrs);
64 78
65 private native Object[] getResult(long streamPtr); 79 private native Object[] getResult(long streamPtr);
@@ -284,6 +284,39 @@ static OfflineRecognizerConfig GetOfflineConfig(JNIEnv *env, jobject config) { @@ -284,6 +284,39 @@ static OfflineRecognizerConfig GetOfflineConfig(JNIEnv *env, jobject config) {
284 ans.model_config.zipformer_ctc.model = p; 284 ans.model_config.zipformer_ctc.model = p;
285 env->ReleaseStringUTFChars(s, p); 285 env->ReleaseStringUTFChars(s, p);
286 286
  287 + // canary
  288 + fid = env->GetFieldID(model_config_cls, "canary",
  289 + "Lcom/k2fsa/sherpa/onnx/OfflineCanaryModelConfig;");
  290 + jobject canary_config = env->GetObjectField(model_config, fid);
  291 + jclass canary_config_cls = env->GetObjectClass(canary_config);
  292 +
  293 + fid = env->GetFieldID(canary_config_cls, "encoder", "Ljava/lang/String;");
  294 + s = (jstring)env->GetObjectField(canary_config, fid);
  295 + p = env->GetStringUTFChars(s, nullptr);
  296 + ans.model_config.canary.encoder = p;
  297 + env->ReleaseStringUTFChars(s, p);
  298 +
  299 + fid = env->GetFieldID(canary_config_cls, "decoder", "Ljava/lang/String;");
  300 + s = (jstring)env->GetObjectField(canary_config, fid);
  301 + p = env->GetStringUTFChars(s, nullptr);
  302 + ans.model_config.canary.decoder = p;
  303 + env->ReleaseStringUTFChars(s, p);
  304 +
  305 + fid = env->GetFieldID(canary_config_cls, "srcLang", "Ljava/lang/String;");
  306 + s = (jstring)env->GetObjectField(canary_config, fid);
  307 + p = env->GetStringUTFChars(s, nullptr);
  308 + ans.model_config.canary.src_lang = p;
  309 + env->ReleaseStringUTFChars(s, p);
  310 +
  311 + fid = env->GetFieldID(canary_config_cls, "tgtLang", "Ljava/lang/String;");
  312 + s = (jstring)env->GetObjectField(canary_config, fid);
  313 + p = env->GetStringUTFChars(s, nullptr);
  314 + ans.model_config.canary.tgt_lang = p;
  315 + env->ReleaseStringUTFChars(s, p);
  316 +
  317 + fid = env->GetFieldID(canary_config_cls, "usePnc", "Z");
  318 + ans.model_config.canary.use_pnc = env->GetBooleanField(canary_config, fid);
  319 +
287 // dolphin 320 // dolphin
288 fid = env->GetFieldID(model_config_cls, "dolphin", 321 fid = env->GetFieldID(model_config_cls, "dolphin",
289 "Lcom/k2fsa/sherpa/onnx/OfflineDolphinModelConfig;"); 322 "Lcom/k2fsa/sherpa/onnx/OfflineDolphinModelConfig;");
@@ -347,10 +380,12 @@ Java_com_k2fsa_sherpa_onnx_OfflineRecognizer_newFromAsset(JNIEnv *env, @@ -347,10 +380,12 @@ Java_com_k2fsa_sherpa_onnx_OfflineRecognizer_newFromAsset(JNIEnv *env,
347 #endif 380 #endif
348 auto config = sherpa_onnx::GetOfflineConfig(env, _config); 381 auto config = sherpa_onnx::GetOfflineConfig(env, _config);
349 382
350 - // logcat truncates long strings, so we split the string into chunks  
351 - auto str_vec = sherpa_onnx::SplitString(config.ToString(), 128);  
352 - for (const auto &s : str_vec) {  
353 - SHERPA_ONNX_LOGE("%s", s.c_str()); 383 + if (config.model_config.debug) {
  384 + // logcat truncates long strings, so we split the string into chunks
  385 + auto str_vec = sherpa_onnx::SplitString(config.ToString(), 128);
  386 + for (const auto &s : str_vec) {
  387 + SHERPA_ONNX_LOGE("%s", s.c_str());
  388 + }
354 } 389 }
355 390
356 auto model = new sherpa_onnx::OfflineRecognizer( 391 auto model = new sherpa_onnx::OfflineRecognizer(
@@ -369,9 +404,11 @@ Java_com_k2fsa_sherpa_onnx_OfflineRecognizer_newFromFile(JNIEnv *env, @@ -369,9 +404,11 @@ Java_com_k2fsa_sherpa_onnx_OfflineRecognizer_newFromFile(JNIEnv *env,
369 jobject _config) { 404 jobject _config) {
370 auto config = sherpa_onnx::GetOfflineConfig(env, _config); 405 auto config = sherpa_onnx::GetOfflineConfig(env, _config);
371 406
372 - auto str_vec = sherpa_onnx::SplitString(config.ToString(), 128);  
373 - for (const auto &s : str_vec) {  
374 - SHERPA_ONNX_LOGE("%s", s.c_str()); 407 + if (config.model_config.debug) {
  408 + auto str_vec = sherpa_onnx::SplitString(config.ToString(), 128);
  409 + for (const auto &s : str_vec) {
  410 + SHERPA_ONNX_LOGE("%s", s.c_str());
  411 + }
375 } 412 }
376 413
377 if (!config.Validate()) { 414 if (!config.Validate()) {
@@ -388,7 +425,10 @@ SHERPA_ONNX_EXTERN_C @@ -388,7 +425,10 @@ SHERPA_ONNX_EXTERN_C
388 JNIEXPORT void JNICALL Java_com_k2fsa_sherpa_onnx_OfflineRecognizer_setConfig( 425 JNIEXPORT void JNICALL Java_com_k2fsa_sherpa_onnx_OfflineRecognizer_setConfig(
389 JNIEnv *env, jobject /*obj*/, jlong ptr, jobject _config) { 426 JNIEnv *env, jobject /*obj*/, jlong ptr, jobject _config) {
390 auto config = sherpa_onnx::GetOfflineConfig(env, _config); 427 auto config = sherpa_onnx::GetOfflineConfig(env, _config);
391 - SHERPA_ONNX_LOGE("config:\n%s", config.ToString().c_str()); 428 +
  429 + if (config.model_config.debug) {
  430 + SHERPA_ONNX_LOGE("config:\n%s", config.ToString().c_str());
  431 + }
392 432
393 auto recognizer = reinterpret_cast<sherpa_onnx::OfflineRecognizer *>(ptr); 433 auto recognizer = reinterpret_cast<sherpa_onnx::OfflineRecognizer *>(ptr);
394 recognizer->SetConfig(config); 434 recognizer->SetConfig(config);
@@ -41,6 +41,14 @@ data class OfflineWhisperModelConfig( @@ -41,6 +41,14 @@ data class OfflineWhisperModelConfig(
41 var tailPaddings: Int = 1000, // Padding added at the end of the samples 41 var tailPaddings: Int = 1000, // Padding added at the end of the samples
42 ) 42 )
43 43
  44 +data class OfflineCanaryModelConfig(
  45 + var encoder: String = "",
  46 + var decoder: String = "",
  47 + var srcLang: String = "en",
  48 + var tgtLang: String = "en",
  49 + var usePnc: Boolean = true,
  50 +)
  51 +
44 data class OfflineFireRedAsrModelConfig( 52 data class OfflineFireRedAsrModelConfig(
45 var encoder: String = "", 53 var encoder: String = "",
46 var decoder: String = "", 54 var decoder: String = "",
@@ -69,6 +77,7 @@ data class OfflineModelConfig( @@ -69,6 +77,7 @@ data class OfflineModelConfig(
69 var senseVoice: OfflineSenseVoiceModelConfig = OfflineSenseVoiceModelConfig(), 77 var senseVoice: OfflineSenseVoiceModelConfig = OfflineSenseVoiceModelConfig(),
70 var dolphin: OfflineDolphinModelConfig = OfflineDolphinModelConfig(), 78 var dolphin: OfflineDolphinModelConfig = OfflineDolphinModelConfig(),
71 var zipformerCtc: OfflineZipformerCtcModelConfig = OfflineZipformerCtcModelConfig(), 79 var zipformerCtc: OfflineZipformerCtcModelConfig = OfflineZipformerCtcModelConfig(),
  80 + var canary: OfflineCanaryModelConfig = OfflineCanaryModelConfig(),
72 var teleSpeech: String = "", 81 var teleSpeech: String = "",
73 var numThreads: Int = 1, 82 var numThreads: Int = 1,
74 var debug: Boolean = false, 83 var debug: Boolean = false,
@@ -95,7 +104,7 @@ data class OfflineRecognizerConfig( @@ -95,7 +104,7 @@ data class OfflineRecognizerConfig(
95 104
96 class OfflineRecognizer( 105 class OfflineRecognizer(
97 assetManager: AssetManager? = null, 106 assetManager: AssetManager? = null,
98 - config: OfflineRecognizerConfig, 107 + val config: OfflineRecognizerConfig,
99 ) { 108 ) {
100 private var ptr: Long 109 private var ptr: Long
101 110
@@ -142,10 +151,14 @@ class OfflineRecognizer( @@ -142,10 +151,14 @@ class OfflineRecognizer(
142 151
143 fun decode(stream: OfflineStream) = decode(ptr, stream.ptr) 152 fun decode(stream: OfflineStream) = decode(ptr, stream.ptr)
144 153
  154 + fun setConfig(config: OfflineRecognizerConfig) = setConfig(ptr, config)
  155 +
145 private external fun delete(ptr: Long) 156 private external fun delete(ptr: Long)
146 157
147 private external fun createStream(ptr: Long): Long 158 private external fun createStream(ptr: Long): Long
148 159
  160 + private external fun setConfig(ptr: Long, config: OfflineRecognizerConfig)
  161 +
149 private external fun newFromAsset( 162 private external fun newFromAsset(
150 assetManager: AssetManager, 163 assetManager: AssetManager,
151 config: OfflineRecognizerConfig, 164 config: OfflineRecognizerConfig,
@@ -574,6 +587,20 @@ fun getOfflineModelConfig(type: Int): OfflineModelConfig? { @@ -574,6 +587,20 @@ fun getOfflineModelConfig(type: Int): OfflineModelConfig? {
574 tokens = "$modelDir/tokens.txt", 587 tokens = "$modelDir/tokens.txt",
575 ) 588 )
576 } 589 }
  590 +
  591 + 32 -> {
  592 + val modelDir = "sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8"
  593 + return OfflineModelConfig(
  594 + canary = OfflineCanaryModelConfig(
  595 + encoder = "$modelDir/encoder.int8.onnx",
  596 + decoder = "$modelDir/decoder.int8.onnx",
  597 + srcLang = "en",
  598 + tgtLang = "en",
  599 + usePnc = true,
  600 + ),
  601 + tokens = "$modelDir/tokens.txt",
  602 + )
  603 + }
577 } 604 }
578 return null 605 return null
579 } 606 }