Committed by
GitHub
Add Java and Kotlin API for NeMo Canary models (#2359)
Add support for the NeMo Canary model in both Java and Kotlin APIs, wiring it through JNI and updating examples and CI. - Introduce OfflineCanaryModelConfig in Kotlin and Java with builder patterns - Extend OfflineRecognizer to accept and apply the new canary config via setConfig - Update JNI binding (GetOfflineConfig) and getOfflineModelConfig mapping (type 32), plus examples and CI workflows
正在显示
12 个修改的文件
包含
363 行增加
和
11 行删除
| @@ -117,6 +117,13 @@ jobs: | @@ -117,6 +117,13 @@ jobs: | ||
| 117 | cd ./java-api-examples | 117 | cd ./java-api-examples |
| 118 | ./run-version-test.sh | 118 | ./run-version-test.sh |
| 119 | 119 | ||
| 120 | + - name: Run java test (Nemo Canary) | ||
| 121 | + shell: bash | ||
| 122 | + run: | | ||
| 123 | + cd ./java-api-examples | ||
| 124 | + ./run-non-streaming-decode-file-nemo-canary.sh | ||
| 125 | + rm -rf sherpa-onnx-nemo-* | ||
| 126 | + | ||
| 120 | - name: Run java test (Non-streaming SenseVoice with homophone replacer) | 127 | - name: Run java test (Non-streaming SenseVoice with homophone replacer) |
| 121 | shell: bash | 128 | shell: bash |
| 122 | run: | | 129 | run: | |
| 1 | +// Copyright 2024 Xiaomi Corporation | ||
| 2 | + | ||
| 3 | +// This file shows how to use an offline NeMo Canary model, i.e., | ||
| 4 | +// non-streaming NeMo Canary model, to decode files. | ||
| 5 | +import com.k2fsa.sherpa.onnx.*; | ||
| 6 | + | ||
| 7 | +public class NonStreamingDecodeFileNemoCanary { | ||
| 8 | + public static void main(String[] args) { | ||
| 9 | + // please refer to | ||
| 10 | + // https://k2-fsa.github.io/sherpa/onnx/nemo/canary.html | ||
| 11 | + // to download model files | ||
| 12 | + String encoder = "./sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8/encoder.int8.onnx"; | ||
| 13 | + String decoder = "./sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8/decoder.int8.onnx"; | ||
| 14 | + String tokens = "./sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8/tokens.txt"; | ||
| 15 | + | ||
| 16 | + String waveFilename = "./sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8/test_wavs/en.wav"; | ||
| 17 | + | ||
| 18 | + WaveReader reader = new WaveReader(waveFilename); | ||
| 19 | + | ||
| 20 | + OfflineCanaryModelConfig canary = | ||
| 21 | + OfflineCanaryModelConfig.builder() | ||
| 22 | + .setEncoder(encoder) | ||
| 23 | + .setDecoder(decoder) | ||
| 24 | + .setSrcLang("en") | ||
| 25 | + .setTgtLang("en") | ||
| 26 | + .setUsePnc(true) | ||
| 27 | + .build(); | ||
| 28 | + | ||
| 29 | + OfflineModelConfig modelConfig = | ||
| 30 | + OfflineModelConfig.builder() | ||
| 31 | + .setCanary(canary) | ||
| 32 | + .setTokens(tokens) | ||
| 33 | + .setNumThreads(1) | ||
| 34 | + .setDebug(true) | ||
| 35 | + .build(); | ||
| 36 | + | ||
| 37 | + OfflineRecognizerConfig config = | ||
| 38 | + OfflineRecognizerConfig.builder() | ||
| 39 | + .setOfflineModelConfig(modelConfig) | ||
| 40 | + .setDecodingMethod("greedy_search") | ||
| 41 | + .build(); | ||
| 42 | + | ||
| 43 | + OfflineRecognizer recognizer = new OfflineRecognizer(config); | ||
| 44 | + OfflineStream stream = recognizer.createStream(); | ||
| 45 | + stream.acceptWaveform(reader.getSamples(), reader.getSampleRate()); | ||
| 46 | + | ||
| 47 | + recognizer.decode(stream); | ||
| 48 | + | ||
| 49 | + String text = recognizer.getResult(stream).getText(); | ||
| 50 | + | ||
| 51 | + System.out.printf("filename:%s\nresult(English):%s\n", waveFilename, text); | ||
| 52 | + | ||
| 53 | + stream.release(); | ||
| 54 | + recognizer.release(); | ||
| 55 | + } | ||
| 56 | +} |
| @@ -24,11 +24,18 @@ This directory contains examples for the JAVA API of sherpa-onnx. | @@ -24,11 +24,18 @@ This directory contains examples for the JAVA API of sherpa-onnx. | ||
| 24 | 24 | ||
| 25 | ```bash | 25 | ```bash |
| 26 | ./run-non-streaming-decode-file-dolphin-ctc.sh | 26 | ./run-non-streaming-decode-file-dolphin-ctc.sh |
| 27 | +./run-non-streaming-decode-file-fire-red-asr.sh | ||
| 28 | +./run-non-streaming-decode-file-moonshine.sh | ||
| 29 | +./run-non-streaming-decode-file-nemo-canary.sh | ||
| 30 | +./run-non-streaming-decode-file-nemo.sh | ||
| 27 | ./run-non-streaming-decode-file-paraformer.sh | 31 | ./run-non-streaming-decode-file-paraformer.sh |
| 28 | ./run-non-streaming-decode-file-sense-voice.sh | 32 | ./run-non-streaming-decode-file-sense-voice.sh |
| 33 | +./run-non-streaming-decode-file-tele-speech-ctc.sh | ||
| 34 | +./run-non-streaming-decode-file-transducer-hotwords.sh | ||
| 29 | ./run-non-streaming-decode-file-transducer.sh | 35 | ./run-non-streaming-decode-file-transducer.sh |
| 36 | +./run-non-streaming-decode-file-whisper-multiple.sh | ||
| 30 | ./run-non-streaming-decode-file-whisper.sh | 37 | ./run-non-streaming-decode-file-whisper.sh |
| 31 | -./run-non-streaming-decode-file-nemo.sh | 38 | +./run-non-streaming-decode-file-zipformer-ctc.sh |
| 32 | ``` | 39 | ``` |
| 33 | 40 | ||
| 34 | ## Non-Streaming Speech recognition with homophone replacer | 41 | ## Non-Streaming Speech recognition with homophone replacer |
| 1 | +#!/usr/bin/env bash | ||
| 2 | + | ||
| 3 | +set -ex | ||
| 4 | + | ||
| 5 | +if [[ ! -f ../build/lib/libsherpa-onnx-jni.dylib && ! -f ../build/lib/libsherpa-onnx-jni.so ]]; then | ||
| 6 | + mkdir -p ../build | ||
| 7 | + pushd ../build | ||
| 8 | + cmake \ | ||
| 9 | + -DSHERPA_ONNX_ENABLE_PYTHON=OFF \ | ||
| 10 | + -DSHERPA_ONNX_ENABLE_TESTS=OFF \ | ||
| 11 | + -DSHERPA_ONNX_ENABLE_CHECK=OFF \ | ||
| 12 | + -DBUILD_SHARED_LIBS=ON \ | ||
| 13 | + -DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \ | ||
| 14 | + -DSHERPA_ONNX_ENABLE_JNI=ON \ | ||
| 15 | + .. | ||
| 16 | + | ||
| 17 | + make -j4 | ||
| 18 | + ls -lh lib | ||
| 19 | + popd | ||
| 20 | +fi | ||
| 21 | + | ||
| 22 | +if [ ! -f ../sherpa-onnx/java-api/build/sherpa-onnx.jar ]; then | ||
| 23 | + pushd ../sherpa-onnx/java-api | ||
| 24 | + make | ||
| 25 | + popd | ||
| 26 | +fi | ||
| 27 | + | ||
| 28 | +if [ ! -f sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8/encoder.int8.onnx ]; then | ||
| 29 | + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8.tar.bz2 | ||
| 30 | + tar xvf sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8.tar.bz2 | ||
| 31 | + rm sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8.tar.bz2 | ||
| 32 | +fi | ||
| 33 | + | ||
| 34 | +java \ | ||
| 35 | + -Djava.library.path=$PWD/../build/lib \ | ||
| 36 | + -cp ../sherpa-onnx/java-api/build/sherpa-onnx.jar \ | ||
| 37 | + NonStreamingDecodeFileNemoCanary.java |
| @@ -455,8 +455,31 @@ function testOfflineSenseVoiceWithHr() { | @@ -455,8 +455,31 @@ function testOfflineSenseVoiceWithHr() { | ||
| 455 | ls -lh $out_filename | 455 | ls -lh $out_filename |
| 456 | java -Djava.library.path=../build/lib -jar $out_filename | 456 | java -Djava.library.path=../build/lib -jar $out_filename |
| 457 | } | 457 | } |
| 458 | -testVersion | ||
| 459 | 458 | ||
| 459 | +function testOfflineNeMoCanary() { | ||
| 460 | + if [ ! -f sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8/encoder.int8.onnx ]; then | ||
| 461 | + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8.tar.bz2 | ||
| 462 | + tar xvf sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8.tar.bz2 | ||
| 463 | + rm sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8.tar.bz2 | ||
| 464 | + fi | ||
| 465 | + | ||
| 466 | + out_filename=test_offline_nemo_canary.jar | ||
| 467 | + kotlinc-jvm -include-runtime -d $out_filename \ | ||
| 468 | + test_offline_nemo_canary.kt \ | ||
| 469 | + FeatureConfig.kt \ | ||
| 470 | + HomophoneReplacerConfig.kt \ | ||
| 471 | + OfflineRecognizer.kt \ | ||
| 472 | + OfflineStream.kt \ | ||
| 473 | + WaveReader.kt \ | ||
| 474 | + faked-asset-manager.kt | ||
| 475 | + | ||
| 476 | + ls -lh $out_filename | ||
| 477 | + java -Djava.library.path=../build/lib -jar $out_filename | ||
| 478 | +} | ||
| 479 | + | ||
| 480 | +# testVersion | ||
| 481 | + | ||
| 482 | +testOfflineNeMoCanary | ||
| 460 | testOfflineSenseVoiceWithHr | 483 | testOfflineSenseVoiceWithHr |
| 461 | testOfflineSpeechDenoiser | 484 | testOfflineSpeechDenoiser |
| 462 | testOfflineSpeakerDiarization | 485 | testOfflineSpeakerDiarization |
| 1 | +package com.k2fsa.sherpa.onnx | ||
| 2 | + | ||
| 3 | +fun main() { | ||
| 4 | + val recognizer = createOfflineRecognizer() | ||
| 5 | + val waveFilename = "./sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8/test_wavs/en.wav" | ||
| 6 | + | ||
| 7 | + val objArray = WaveReader.readWaveFromFile( | ||
| 8 | + filename = waveFilename, | ||
| 9 | + ) | ||
| 10 | + val samples: FloatArray = objArray[0] as FloatArray | ||
| 11 | + val sampleRate: Int = objArray[1] as Int | ||
| 12 | + | ||
| 13 | + var stream = recognizer.createStream() | ||
| 14 | + stream.acceptWaveform(samples, sampleRate=sampleRate) | ||
| 15 | + recognizer.decode(stream) | ||
| 16 | + | ||
| 17 | + var result = recognizer.getResult(stream) | ||
| 18 | + println("English: $result") | ||
| 19 | + | ||
| 20 | + stream.release() | ||
| 21 | + | ||
| 22 | + // now output text in German | ||
| 23 | + val config = recognizer.config.copy(modelConfig=recognizer.config.modelConfig.copy( | ||
| 24 | + canary=recognizer.config.modelConfig.canary.copy( | ||
| 25 | + tgtLang="de" | ||
| 26 | + ) | ||
| 27 | + )) | ||
| 28 | + recognizer.setConfig(config) | ||
| 29 | + | ||
| 30 | + stream = recognizer.createStream() | ||
| 31 | + stream.acceptWaveform(samples, sampleRate=sampleRate) | ||
| 32 | + recognizer.decode(stream) | ||
| 33 | + | ||
| 34 | + result = recognizer.getResult(stream) | ||
| 35 | + println("German: $result") | ||
| 36 | + | ||
| 37 | + stream.release() | ||
| 38 | + recognizer.release() | ||
| 39 | +} | ||
| 40 | + | ||
| 41 | + | ||
| 42 | +fun createOfflineRecognizer(): OfflineRecognizer { | ||
| 43 | + val config = OfflineRecognizerConfig( | ||
| 44 | + modelConfig = getOfflineModelConfig(type = 32)!!, | ||
| 45 | + ) | ||
| 46 | + | ||
| 47 | + return OfflineRecognizer(config = config) | ||
| 48 | +} |
| @@ -34,6 +34,7 @@ java_files += OfflineFireRedAsrModelConfig.java | @@ -34,6 +34,7 @@ java_files += OfflineFireRedAsrModelConfig.java | ||
| 34 | java_files += OfflineMoonshineModelConfig.java | 34 | java_files += OfflineMoonshineModelConfig.java |
| 35 | java_files += OfflineNemoEncDecCtcModelConfig.java | 35 | java_files += OfflineNemoEncDecCtcModelConfig.java |
| 36 | java_files += OfflineZipformerCtcModelConfig.java | 36 | java_files += OfflineZipformerCtcModelConfig.java |
| 37 | +java_files += OfflineCanaryModelConfig.java | ||
| 37 | java_files += OfflineSenseVoiceModelConfig.java | 38 | java_files += OfflineSenseVoiceModelConfig.java |
| 38 | java_files += OfflineDolphinModelConfig.java | 39 | java_files += OfflineDolphinModelConfig.java |
| 39 | java_files += OfflineModelConfig.java | 40 | java_files += OfflineModelConfig.java |
| 1 | +// Copyright 2025 Xiaomi Corporation | ||
| 2 | + | ||
| 3 | +package com.k2fsa.sherpa.onnx; | ||
| 4 | + | ||
| 5 | +public class OfflineCanaryModelConfig { | ||
| 6 | + private final String encoder; | ||
| 7 | + private final String decoder; | ||
| 8 | + private final String srcLang; | ||
| 9 | + private final String tgtLang; | ||
| 10 | + private final boolean usePnc; | ||
| 11 | + | ||
| 12 | + private OfflineCanaryModelConfig(Builder builder) { | ||
| 13 | + this.encoder = builder.encoder; | ||
| 14 | + this.decoder = builder.decoder; | ||
| 15 | + this.srcLang = builder.srcLang; | ||
| 16 | + this.tgtLang = builder.tgtLang; | ||
| 17 | + this.usePnc = builder.usePnc; | ||
| 18 | + } | ||
| 19 | + | ||
| 20 | + public static Builder builder() { | ||
| 21 | + return new Builder(); | ||
| 22 | + } | ||
| 23 | + | ||
| 24 | + public String getEncoder() { | ||
| 25 | + return encoder; | ||
| 26 | + } | ||
| 27 | + | ||
| 28 | + public String getDecoder() { | ||
| 29 | + return decoder; | ||
| 30 | + } | ||
| 31 | + | ||
| 32 | + public String getSrcLang() { | ||
| 33 | + return srcLang; | ||
| 34 | + } | ||
| 35 | + | ||
| 36 | + public String getTgtLang() { | ||
| 37 | + return tgtLang; | ||
| 38 | + } | ||
| 39 | + | ||
| 40 | + public boolean isUsePnc() { | ||
| 41 | + return usePnc; | ||
| 42 | + } | ||
| 43 | + | ||
| 44 | + public static class Builder { | ||
| 45 | + private String encoder = ""; | ||
| 46 | + private String decoder = ""; | ||
| 47 | + private String srcLang = "en"; | ||
| 48 | + private String tgtLang = "en"; | ||
| 49 | + private boolean usePnc = true; | ||
| 50 | + | ||
| 51 | + public OfflineCanaryModelConfig build() { | ||
| 52 | + return new OfflineCanaryModelConfig(this); | ||
| 53 | + } | ||
| 54 | + | ||
| 55 | + public Builder setEncoder(String encoder) { | ||
| 56 | + this.encoder = encoder; | ||
| 57 | + return this; | ||
| 58 | + } | ||
| 59 | + | ||
| 60 | + public Builder setDecoder(String decoder) { | ||
| 61 | + this.decoder = decoder; | ||
| 62 | + return this; | ||
| 63 | + } | ||
| 64 | + | ||
| 65 | + public Builder setSrcLang(String srcLang) { | ||
| 66 | + this.srcLang = srcLang; | ||
| 67 | + return this; | ||
| 68 | + } | ||
| 69 | + | ||
| 70 | + public Builder setTgtLang(String tgtLang) { | ||
| 71 | + this.tgtLang = tgtLang; | ||
| 72 | + return this; | ||
| 73 | + } | ||
| 74 | + | ||
| 75 | + public Builder setUsePnc(boolean usePnc) { | ||
| 76 | + this.usePnc = usePnc; | ||
| 77 | + return this; | ||
| 78 | + } | ||
| 79 | + } | ||
| 80 | +} |
| @@ -12,6 +12,7 @@ public class OfflineModelConfig { | @@ -12,6 +12,7 @@ public class OfflineModelConfig { | ||
| 12 | private final OfflineSenseVoiceModelConfig senseVoice; | 12 | private final OfflineSenseVoiceModelConfig senseVoice; |
| 13 | private final OfflineDolphinModelConfig dolphin; | 13 | private final OfflineDolphinModelConfig dolphin; |
| 14 | private final OfflineZipformerCtcModelConfig zipformerCtc; | 14 | private final OfflineZipformerCtcModelConfig zipformerCtc; |
| 15 | + private final OfflineCanaryModelConfig canary; | ||
| 15 | private final String teleSpeech; | 16 | private final String teleSpeech; |
| 16 | private final String tokens; | 17 | private final String tokens; |
| 17 | private final int numThreads; | 18 | private final int numThreads; |
| @@ -30,6 +31,7 @@ public class OfflineModelConfig { | @@ -30,6 +31,7 @@ public class OfflineModelConfig { | ||
| 30 | this.moonshine = builder.moonshine; | 31 | this.moonshine = builder.moonshine; |
| 31 | this.nemo = builder.nemo; | 32 | this.nemo = builder.nemo; |
| 32 | this.zipformerCtc = builder.zipformerCtc; | 33 | this.zipformerCtc = builder.zipformerCtc; |
| 34 | + this.canary = builder.canary; | ||
| 33 | this.senseVoice = builder.senseVoice; | 35 | this.senseVoice = builder.senseVoice; |
| 34 | this.dolphin = builder.dolphin; | 36 | this.dolphin = builder.dolphin; |
| 35 | this.teleSpeech = builder.teleSpeech; | 37 | this.teleSpeech = builder.teleSpeech; |
| @@ -78,6 +80,10 @@ public class OfflineModelConfig { | @@ -78,6 +80,10 @@ public class OfflineModelConfig { | ||
| 78 | return zipformerCtc; | 80 | return zipformerCtc; |
| 79 | } | 81 | } |
| 80 | 82 | ||
| 83 | + public OfflineCanaryModelConfig getCanary() { | ||
| 84 | + return canary; | ||
| 85 | + } | ||
| 86 | + | ||
| 81 | public String getTokens() { | 87 | public String getTokens() { |
| 82 | return tokens; | 88 | return tokens; |
| 83 | } | 89 | } |
| @@ -120,6 +126,7 @@ public class OfflineModelConfig { | @@ -120,6 +126,7 @@ public class OfflineModelConfig { | ||
| 120 | private OfflineSenseVoiceModelConfig senseVoice = OfflineSenseVoiceModelConfig.builder().build(); | 126 | private OfflineSenseVoiceModelConfig senseVoice = OfflineSenseVoiceModelConfig.builder().build(); |
| 121 | private OfflineDolphinModelConfig dolphin = OfflineDolphinModelConfig.builder().build(); | 127 | private OfflineDolphinModelConfig dolphin = OfflineDolphinModelConfig.builder().build(); |
| 122 | private OfflineZipformerCtcModelConfig zipformerCtc = OfflineZipformerCtcModelConfig.builder().build(); | 128 | private OfflineZipformerCtcModelConfig zipformerCtc = OfflineZipformerCtcModelConfig.builder().build(); |
| 129 | + private OfflineCanaryModelConfig canary = OfflineCanaryModelConfig.builder().build(); | ||
| 123 | private String teleSpeech = ""; | 130 | private String teleSpeech = ""; |
| 124 | private String tokens = ""; | 131 | private String tokens = ""; |
| 125 | private int numThreads = 1; | 132 | private int numThreads = 1; |
| @@ -158,6 +165,11 @@ public class OfflineModelConfig { | @@ -158,6 +165,11 @@ public class OfflineModelConfig { | ||
| 158 | return this; | 165 | return this; |
| 159 | } | 166 | } |
| 160 | 167 | ||
| 168 | + public Builder setCanary(OfflineCanaryModelConfig canary) { | ||
| 169 | + this.canary = canary; | ||
| 170 | + return this; | ||
| 171 | + } | ||
| 172 | + | ||
| 161 | public Builder setTeleSpeech(String teleSpeech) { | 173 | public Builder setTeleSpeech(String teleSpeech) { |
| 162 | this.teleSpeech = teleSpeech; | 174 | this.teleSpeech = teleSpeech; |
| 163 | return this; | 175 | return this; |
| @@ -4,10 +4,22 @@ package com.k2fsa.sherpa.onnx; | @@ -4,10 +4,22 @@ package com.k2fsa.sherpa.onnx; | ||
| 4 | 4 | ||
| 5 | public class OfflineRecognizer { | 5 | public class OfflineRecognizer { |
| 6 | private long ptr = 0; | 6 | private long ptr = 0; |
| 7 | + private final OfflineRecognizerConfig config; | ||
| 7 | 8 | ||
| 8 | public OfflineRecognizer(OfflineRecognizerConfig config) { | 9 | public OfflineRecognizer(OfflineRecognizerConfig config) { |
| 9 | LibraryLoader.maybeLoad(); | 10 | LibraryLoader.maybeLoad(); |
| 10 | ptr = newFromFile(config); | 11 | ptr = newFromFile(config); |
| 12 | + | ||
| 13 | + this.config = config; | ||
| 14 | + } | ||
| 15 | + | ||
| 16 | + public void setConfig(OfflineRecognizerConfig config) { | ||
| 17 | + setConfig(ptr, config); | ||
| 18 | + // we don't update this.config | ||
| 19 | + } | ||
| 20 | + | ||
| 21 | + public OfflineRecognizerConfig getConfig() { | ||
| 22 | + return config; | ||
| 11 | } | 23 | } |
| 12 | 24 | ||
| 13 | public void decode(OfflineStream s) { | 25 | public void decode(OfflineStream s) { |
| @@ -60,6 +72,8 @@ public class OfflineRecognizer { | @@ -60,6 +72,8 @@ public class OfflineRecognizer { | ||
| 60 | 72 | ||
| 61 | private native void decode(long ptr, long streamPtr); | 73 | private native void decode(long ptr, long streamPtr); |
| 62 | 74 | ||
| 75 | + private native void setConfig(long ptr, OfflineRecognizerConfig config); | ||
| 76 | + | ||
| 63 | private native void decodeStreams(long ptr, long[] streamPtrs); | 77 | private native void decodeStreams(long ptr, long[] streamPtrs); |
| 64 | 78 | ||
| 65 | private native Object[] getResult(long streamPtr); | 79 | private native Object[] getResult(long streamPtr); |
| @@ -284,6 +284,39 @@ static OfflineRecognizerConfig GetOfflineConfig(JNIEnv *env, jobject config) { | @@ -284,6 +284,39 @@ static OfflineRecognizerConfig GetOfflineConfig(JNIEnv *env, jobject config) { | ||
| 284 | ans.model_config.zipformer_ctc.model = p; | 284 | ans.model_config.zipformer_ctc.model = p; |
| 285 | env->ReleaseStringUTFChars(s, p); | 285 | env->ReleaseStringUTFChars(s, p); |
| 286 | 286 | ||
| 287 | + // canary | ||
| 288 | + fid = env->GetFieldID(model_config_cls, "canary", | ||
| 289 | + "Lcom/k2fsa/sherpa/onnx/OfflineCanaryModelConfig;"); | ||
| 290 | + jobject canary_config = env->GetObjectField(model_config, fid); | ||
| 291 | + jclass canary_config_cls = env->GetObjectClass(canary_config); | ||
| 292 | + | ||
| 293 | + fid = env->GetFieldID(canary_config_cls, "encoder", "Ljava/lang/String;"); | ||
| 294 | + s = (jstring)env->GetObjectField(canary_config, fid); | ||
| 295 | + p = env->GetStringUTFChars(s, nullptr); | ||
| 296 | + ans.model_config.canary.encoder = p; | ||
| 297 | + env->ReleaseStringUTFChars(s, p); | ||
| 298 | + | ||
| 299 | + fid = env->GetFieldID(canary_config_cls, "decoder", "Ljava/lang/String;"); | ||
| 300 | + s = (jstring)env->GetObjectField(canary_config, fid); | ||
| 301 | + p = env->GetStringUTFChars(s, nullptr); | ||
| 302 | + ans.model_config.canary.decoder = p; | ||
| 303 | + env->ReleaseStringUTFChars(s, p); | ||
| 304 | + | ||
| 305 | + fid = env->GetFieldID(canary_config_cls, "srcLang", "Ljava/lang/String;"); | ||
| 306 | + s = (jstring)env->GetObjectField(canary_config, fid); | ||
| 307 | + p = env->GetStringUTFChars(s, nullptr); | ||
| 308 | + ans.model_config.canary.src_lang = p; | ||
| 309 | + env->ReleaseStringUTFChars(s, p); | ||
| 310 | + | ||
| 311 | + fid = env->GetFieldID(canary_config_cls, "tgtLang", "Ljava/lang/String;"); | ||
| 312 | + s = (jstring)env->GetObjectField(canary_config, fid); | ||
| 313 | + p = env->GetStringUTFChars(s, nullptr); | ||
| 314 | + ans.model_config.canary.tgt_lang = p; | ||
| 315 | + env->ReleaseStringUTFChars(s, p); | ||
| 316 | + | ||
| 317 | + fid = env->GetFieldID(canary_config_cls, "usePnc", "Z"); | ||
| 318 | + ans.model_config.canary.use_pnc = env->GetBooleanField(canary_config, fid); | ||
| 319 | + | ||
| 287 | // dolphin | 320 | // dolphin |
| 288 | fid = env->GetFieldID(model_config_cls, "dolphin", | 321 | fid = env->GetFieldID(model_config_cls, "dolphin", |
| 289 | "Lcom/k2fsa/sherpa/onnx/OfflineDolphinModelConfig;"); | 322 | "Lcom/k2fsa/sherpa/onnx/OfflineDolphinModelConfig;"); |
| @@ -347,10 +380,12 @@ Java_com_k2fsa_sherpa_onnx_OfflineRecognizer_newFromAsset(JNIEnv *env, | @@ -347,10 +380,12 @@ Java_com_k2fsa_sherpa_onnx_OfflineRecognizer_newFromAsset(JNIEnv *env, | ||
| 347 | #endif | 380 | #endif |
| 348 | auto config = sherpa_onnx::GetOfflineConfig(env, _config); | 381 | auto config = sherpa_onnx::GetOfflineConfig(env, _config); |
| 349 | 382 | ||
| 350 | - // logcat truncates long strings, so we split the string into chunks | ||
| 351 | - auto str_vec = sherpa_onnx::SplitString(config.ToString(), 128); | ||
| 352 | - for (const auto &s : str_vec) { | ||
| 353 | - SHERPA_ONNX_LOGE("%s", s.c_str()); | 383 | + if (config.model_config.debug) { |
| 384 | + // logcat truncates long strings, so we split the string into chunks | ||
| 385 | + auto str_vec = sherpa_onnx::SplitString(config.ToString(), 128); | ||
| 386 | + for (const auto &s : str_vec) { | ||
| 387 | + SHERPA_ONNX_LOGE("%s", s.c_str()); | ||
| 388 | + } | ||
| 354 | } | 389 | } |
| 355 | 390 | ||
| 356 | auto model = new sherpa_onnx::OfflineRecognizer( | 391 | auto model = new sherpa_onnx::OfflineRecognizer( |
| @@ -369,9 +404,11 @@ Java_com_k2fsa_sherpa_onnx_OfflineRecognizer_newFromFile(JNIEnv *env, | @@ -369,9 +404,11 @@ Java_com_k2fsa_sherpa_onnx_OfflineRecognizer_newFromFile(JNIEnv *env, | ||
| 369 | jobject _config) { | 404 | jobject _config) { |
| 370 | auto config = sherpa_onnx::GetOfflineConfig(env, _config); | 405 | auto config = sherpa_onnx::GetOfflineConfig(env, _config); |
| 371 | 406 | ||
| 372 | - auto str_vec = sherpa_onnx::SplitString(config.ToString(), 128); | ||
| 373 | - for (const auto &s : str_vec) { | ||
| 374 | - SHERPA_ONNX_LOGE("%s", s.c_str()); | 407 | + if (config.model_config.debug) { |
| 408 | + auto str_vec = sherpa_onnx::SplitString(config.ToString(), 128); | ||
| 409 | + for (const auto &s : str_vec) { | ||
| 410 | + SHERPA_ONNX_LOGE("%s", s.c_str()); | ||
| 411 | + } | ||
| 375 | } | 412 | } |
| 376 | 413 | ||
| 377 | if (!config.Validate()) { | 414 | if (!config.Validate()) { |
| @@ -388,7 +425,10 @@ SHERPA_ONNX_EXTERN_C | @@ -388,7 +425,10 @@ SHERPA_ONNX_EXTERN_C | ||
| 388 | JNIEXPORT void JNICALL Java_com_k2fsa_sherpa_onnx_OfflineRecognizer_setConfig( | 425 | JNIEXPORT void JNICALL Java_com_k2fsa_sherpa_onnx_OfflineRecognizer_setConfig( |
| 389 | JNIEnv *env, jobject /*obj*/, jlong ptr, jobject _config) { | 426 | JNIEnv *env, jobject /*obj*/, jlong ptr, jobject _config) { |
| 390 | auto config = sherpa_onnx::GetOfflineConfig(env, _config); | 427 | auto config = sherpa_onnx::GetOfflineConfig(env, _config); |
| 391 | - SHERPA_ONNX_LOGE("config:\n%s", config.ToString().c_str()); | 428 | + |
| 429 | + if (config.model_config.debug) { | ||
| 430 | + SHERPA_ONNX_LOGE("config:\n%s", config.ToString().c_str()); | ||
| 431 | + } | ||
| 392 | 432 | ||
| 393 | auto recognizer = reinterpret_cast<sherpa_onnx::OfflineRecognizer *>(ptr); | 433 | auto recognizer = reinterpret_cast<sherpa_onnx::OfflineRecognizer *>(ptr); |
| 394 | recognizer->SetConfig(config); | 434 | recognizer->SetConfig(config); |
| @@ -41,6 +41,14 @@ data class OfflineWhisperModelConfig( | @@ -41,6 +41,14 @@ data class OfflineWhisperModelConfig( | ||
| 41 | var tailPaddings: Int = 1000, // Padding added at the end of the samples | 41 | var tailPaddings: Int = 1000, // Padding added at the end of the samples |
| 42 | ) | 42 | ) |
| 43 | 43 | ||
| 44 | +data class OfflineCanaryModelConfig( | ||
| 45 | + var encoder: String = "", | ||
| 46 | + var decoder: String = "", | ||
| 47 | + var srcLang: String = "en", | ||
| 48 | + var tgtLang: String = "en", | ||
| 49 | + var usePnc: Boolean = true, | ||
| 50 | +) | ||
| 51 | + | ||
| 44 | data class OfflineFireRedAsrModelConfig( | 52 | data class OfflineFireRedAsrModelConfig( |
| 45 | var encoder: String = "", | 53 | var encoder: String = "", |
| 46 | var decoder: String = "", | 54 | var decoder: String = "", |
| @@ -69,6 +77,7 @@ data class OfflineModelConfig( | @@ -69,6 +77,7 @@ data class OfflineModelConfig( | ||
| 69 | var senseVoice: OfflineSenseVoiceModelConfig = OfflineSenseVoiceModelConfig(), | 77 | var senseVoice: OfflineSenseVoiceModelConfig = OfflineSenseVoiceModelConfig(), |
| 70 | var dolphin: OfflineDolphinModelConfig = OfflineDolphinModelConfig(), | 78 | var dolphin: OfflineDolphinModelConfig = OfflineDolphinModelConfig(), |
| 71 | var zipformerCtc: OfflineZipformerCtcModelConfig = OfflineZipformerCtcModelConfig(), | 79 | var zipformerCtc: OfflineZipformerCtcModelConfig = OfflineZipformerCtcModelConfig(), |
| 80 | + var canary: OfflineCanaryModelConfig = OfflineCanaryModelConfig(), | ||
| 72 | var teleSpeech: String = "", | 81 | var teleSpeech: String = "", |
| 73 | var numThreads: Int = 1, | 82 | var numThreads: Int = 1, |
| 74 | var debug: Boolean = false, | 83 | var debug: Boolean = false, |
| @@ -95,7 +104,7 @@ data class OfflineRecognizerConfig( | @@ -95,7 +104,7 @@ data class OfflineRecognizerConfig( | ||
| 95 | 104 | ||
| 96 | class OfflineRecognizer( | 105 | class OfflineRecognizer( |
| 97 | assetManager: AssetManager? = null, | 106 | assetManager: AssetManager? = null, |
| 98 | - config: OfflineRecognizerConfig, | 107 | + val config: OfflineRecognizerConfig, |
| 99 | ) { | 108 | ) { |
| 100 | private var ptr: Long | 109 | private var ptr: Long |
| 101 | 110 | ||
| @@ -142,10 +151,14 @@ class OfflineRecognizer( | @@ -142,10 +151,14 @@ class OfflineRecognizer( | ||
| 142 | 151 | ||
| 143 | fun decode(stream: OfflineStream) = decode(ptr, stream.ptr) | 152 | fun decode(stream: OfflineStream) = decode(ptr, stream.ptr) |
| 144 | 153 | ||
| 154 | + fun setConfig(config: OfflineRecognizerConfig) = setConfig(ptr, config) | ||
| 155 | + | ||
| 145 | private external fun delete(ptr: Long) | 156 | private external fun delete(ptr: Long) |
| 146 | 157 | ||
| 147 | private external fun createStream(ptr: Long): Long | 158 | private external fun createStream(ptr: Long): Long |
| 148 | 159 | ||
| 160 | + private external fun setConfig(ptr: Long, config: OfflineRecognizerConfig) | ||
| 161 | + | ||
| 149 | private external fun newFromAsset( | 162 | private external fun newFromAsset( |
| 150 | assetManager: AssetManager, | 163 | assetManager: AssetManager, |
| 151 | config: OfflineRecognizerConfig, | 164 | config: OfflineRecognizerConfig, |
| @@ -574,6 +587,20 @@ fun getOfflineModelConfig(type: Int): OfflineModelConfig? { | @@ -574,6 +587,20 @@ fun getOfflineModelConfig(type: Int): OfflineModelConfig? { | ||
| 574 | tokens = "$modelDir/tokens.txt", | 587 | tokens = "$modelDir/tokens.txt", |
| 575 | ) | 588 | ) |
| 576 | } | 589 | } |
| 590 | + | ||
| 591 | + 32 -> { | ||
| 592 | + val modelDir = "sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8" | ||
| 593 | + return OfflineModelConfig( | ||
| 594 | + canary = OfflineCanaryModelConfig( | ||
| 595 | + encoder = "$modelDir/encoder.int8.onnx", | ||
| 596 | + decoder = "$modelDir/decoder.int8.onnx", | ||
| 597 | + srcLang = "en", | ||
| 598 | + tgtLang = "en", | ||
| 599 | + usePnc = true, | ||
| 600 | + ), | ||
| 601 | + tokens = "$modelDir/tokens.txt", | ||
| 602 | + ) | ||
| 603 | + } | ||
| 577 | } | 604 | } |
| 578 | return null | 605 | return null |
| 579 | } | 606 | } |
-
请 注册 或 登录 后发表评论