Committed by
GitHub
Add Kotlin and Java API for FireRedAsr AED model (#1870)
正在显示
10 个修改的文件
包含
188 行增加
和
1 行删除
| @@ -124,6 +124,9 @@ jobs: | @@ -124,6 +124,9 @@ jobs: | ||
| 124 | ./run-non-streaming-decode-file-transducer.sh | 124 | ./run-non-streaming-decode-file-transducer.sh |
| 125 | rm -rf sherpa-onnx-zipformer-* | 125 | rm -rf sherpa-onnx-zipformer-* |
| 126 | 126 | ||
| 127 | + ./run-non-streaming-decode-file-fire-red-asr.sh | ||
| 128 | + rm -rf sherpa-onnx-fire-red-* | ||
| 129 | + | ||
| 127 | ./run-non-streaming-decode-file-whisper.sh | 130 | ./run-non-streaming-decode-file-whisper.sh |
| 128 | rm -rf sherpa-onnx-whisper-* | 131 | rm -rf sherpa-onnx-whisper-* |
| 129 | 132 |
| 1 | +// Copyright 2025 Xiaomi Corporation | ||
| 2 | + | ||
| 3 | +// This file shows how to use an offline FireRedAsr AED model | ||
| 4 | +// to decode files. | ||
| 5 | +import com.k2fsa.sherpa.onnx.*; | ||
| 6 | + | ||
| 7 | +public class NonStreamingDecodeFileFireRedAsr { | ||
| 8 | + public static void main(String[] args) { | ||
| 9 | + // please refer to | ||
| 10 | + // https://k2-fsa.github.io/sherpa/onnx/pretrained_models/FireRedAsr/index.html | ||
| 11 | + // to download model files | ||
| 12 | + String encoder = "./sherpa-onnx-fire-red-asr-large-zh_en-2025-02-16/encoder.int8.onnx"; | ||
| 13 | + String decoder = "./sherpa-onnx-fire-red-asr-large-zh_en-2025-02-16/decoder.int8.onnx"; | ||
| 14 | + String tokens = "./sherpa-onnx-fire-red-asr-large-zh_en-2025-02-16/tokens.txt"; | ||
| 15 | + | ||
| 16 | + String waveFilename = "./sherpa-onnx-fire-red-asr-large-zh_en-2025-02-16/test_wavs/0.wav"; | ||
| 17 | + | ||
| 18 | + WaveReader reader = new WaveReader(waveFilename); | ||
| 19 | + | ||
| 20 | + OfflineFireRedAsrModelConfig fireRedAsr = | ||
| 21 | + OfflineFireRedAsrModelConfig.builder().setEncoder(encoder).setDecoder(decoder).build(); | ||
| 22 | + | ||
| 23 | + OfflineModelConfig modelConfig = | ||
| 24 | + OfflineModelConfig.builder() | ||
| 25 | + .setFireRedAsr(fireRedAsr) | ||
| 26 | + .setTokens(tokens) | ||
| 27 | + .setNumThreads(2) | ||
| 28 | + .setDebug(true) | ||
| 29 | + .build(); | ||
| 30 | + | ||
| 31 | + OfflineRecognizerConfig config = | ||
| 32 | + OfflineRecognizerConfig.builder() | ||
| 33 | + .setOfflineModelConfig(modelConfig) | ||
| 34 | + .setDecodingMethod("greedy_search") | ||
| 35 | + .build(); | ||
| 36 | + | ||
| 37 | + OfflineRecognizer recognizer = new OfflineRecognizer(config); | ||
| 38 | + OfflineStream stream = recognizer.createStream(); | ||
| 39 | + stream.acceptWaveform(reader.getSamples(), reader.getSampleRate()); | ||
| 40 | + | ||
| 41 | + recognizer.decode(stream); | ||
| 42 | + | ||
| 43 | + String text = recognizer.getResult(stream).getText(); | ||
| 44 | + | ||
| 45 | + System.out.printf("filename:%s\nresult:%s\n", waveFilename, text); | ||
| 46 | + | ||
| 47 | + stream.release(); | ||
| 48 | + recognizer.release(); | ||
| 49 | + } | ||
| 50 | +} |
| 1 | +#!/usr/bin/env bash | ||
| 2 | + | ||
| 3 | +set -ex | ||
| 4 | + | ||
| 5 | +if [[ ! -f ../build/lib/libsherpa-onnx-jni.dylib && ! -f ../build/lib/libsherpa-onnx-jni.so ]]; then | ||
| 6 | + mkdir -p ../build | ||
| 7 | + pushd ../build | ||
| 8 | + cmake \ | ||
| 9 | + -DSHERPA_ONNX_ENABLE_PYTHON=OFF \ | ||
| 10 | + -DSHERPA_ONNX_ENABLE_TESTS=OFF \ | ||
| 11 | + -DSHERPA_ONNX_ENABLE_CHECK=OFF \ | ||
| 12 | + -DBUILD_SHARED_LIBS=ON \ | ||
| 13 | + -DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \ | ||
| 14 | + -DSHERPA_ONNX_ENABLE_JNI=ON \ | ||
| 15 | + .. | ||
| 16 | + | ||
| 17 | + make -j4 | ||
| 18 | + ls -lh lib | ||
| 19 | + popd | ||
| 20 | +fi | ||
| 21 | + | ||
| 22 | +if [ ! -f ../sherpa-onnx/java-api/build/sherpa-onnx.jar ]; then | ||
| 23 | + pushd ../sherpa-onnx/java-api | ||
| 24 | + make | ||
| 25 | + popd | ||
| 26 | +fi | ||
| 27 | + | ||
| 28 | +if [ ! -f ./sherpa-onnx-fire-red-asr-large-zh_en-2025-02-16/encoder.int8.onnx ]; then | ||
| 29 | + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-fire-red-asr-large-zh_en-2025-02-16.tar.bz2 | ||
| 30 | + tar xvf sherpa-onnx-fire-red-asr-large-zh_en-2025-02-16.tar.bz2 | ||
| 31 | + rm sherpa-onnx-fire-red-asr-large-zh_en-2025-02-16.tar.bz2 | ||
| 32 | + ls -lh sherpa-onnx-fire-red-asr-large-zh_en-2025-02-16 | ||
| 33 | +fi | ||
| 34 | + | ||
| 35 | +java \ | ||
| 36 | + -Djava.library.path=$PWD/../build/lib \ | ||
| 37 | + -cp ../sherpa-onnx/java-api/build/sherpa-onnx.jar \ | ||
| 38 | + NonStreamingDecodeFileFireRedAsr.java |
| @@ -190,6 +190,13 @@ function testSpokenLanguageIdentification() { | @@ -190,6 +190,13 @@ function testSpokenLanguageIdentification() { | ||
| 190 | } | 190 | } |
| 191 | 191 | ||
| 192 | function testOfflineAsr() { | 192 | function testOfflineAsr() { |
| 193 | + if [ ! -f ./sherpa-onnx-fire-red-asr-large-zh_en-2025-02-16/encoder.int8.onnx ]; then | ||
| 194 | + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-fire-red-asr-large-zh_en-2025-02-16.tar.bz2 | ||
| 195 | + tar xvf sherpa-onnx-fire-red-asr-large-zh_en-2025-02-16.tar.bz2 | ||
| 196 | + rm sherpa-onnx-fire-red-asr-large-zh_en-2025-02-16.tar.bz2 | ||
| 197 | + ls -lh sherpa-onnx-fire-red-asr-large-zh_en-2025-02-16 | ||
| 198 | + fi | ||
| 199 | + | ||
| 193 | if [ ! -f ./sherpa-onnx-moonshine-tiny-en-int8/tokens.txt ]; then | 200 | if [ ! -f ./sherpa-onnx-moonshine-tiny-en-int8/tokens.txt ]; then |
| 194 | curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-moonshine-tiny-en-int8.tar.bz2 | 201 | curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-moonshine-tiny-en-int8.tar.bz2 |
| 195 | tar xvf sherpa-onnx-moonshine-tiny-en-int8.tar.bz2 | 202 | tar xvf sherpa-onnx-moonshine-tiny-en-int8.tar.bz2 |
| 1 | package com.k2fsa.sherpa.onnx | 1 | package com.k2fsa.sherpa.onnx |
| 2 | 2 | ||
| 3 | fun main() { | 3 | fun main() { |
| 4 | - val types = arrayOf(0, 2, 5, 6, 15, 21) | 4 | + val types = arrayOf(0, 2, 5, 6, 15, 21, 24) |
| 5 | for (type in types) { | 5 | for (type in types) { |
| 6 | test(type) | 6 | test(type) |
| 7 | } | 7 | } |
| @@ -17,6 +17,7 @@ fun test(type: Int) { | @@ -17,6 +17,7 @@ fun test(type: Int) { | ||
| 17 | 6 -> "./sherpa-onnx-nemo-ctc-en-citrinet-512/test_wavs/8k.wav" | 17 | 6 -> "./sherpa-onnx-nemo-ctc-en-citrinet-512/test_wavs/8k.wav" |
| 18 | 15 -> "./sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/test_wavs/zh.wav" | 18 | 15 -> "./sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/test_wavs/zh.wav" |
| 19 | 21 -> "./sherpa-onnx-moonshine-tiny-en-int8/test_wavs/0.wav" | 19 | 21 -> "./sherpa-onnx-moonshine-tiny-en-int8/test_wavs/0.wav" |
| 20 | + 24 -> "./sherpa-onnx-fire-red-asr-large-zh_en-2025-02-16/test_wavs/0.wav" | ||
| 20 | else -> null | 21 | else -> null |
| 21 | } | 22 | } |
| 22 | 23 |
| @@ -26,6 +26,7 @@ java_files += OnlineRecognizer.java | @@ -26,6 +26,7 @@ java_files += OnlineRecognizer.java | ||
| 26 | java_files += OfflineTransducerModelConfig.java | 26 | java_files += OfflineTransducerModelConfig.java |
| 27 | java_files += OfflineParaformerModelConfig.java | 27 | java_files += OfflineParaformerModelConfig.java |
| 28 | java_files += OfflineWhisperModelConfig.java | 28 | java_files += OfflineWhisperModelConfig.java |
| 29 | +java_files += OfflineFireRedAsrModelConfig.java | ||
| 29 | java_files += OfflineMoonshineModelConfig.java | 30 | java_files += OfflineMoonshineModelConfig.java |
| 30 | java_files += OfflineNemoEncDecCtcModelConfig.java | 31 | java_files += OfflineNemoEncDecCtcModelConfig.java |
| 31 | java_files += OfflineSenseVoiceModelConfig.java | 32 | java_files += OfflineSenseVoiceModelConfig.java |
| 1 | +package com.k2fsa.sherpa.onnx; | ||
| 2 | + | ||
| 3 | +public class OfflineFireRedAsrModelConfig { | ||
| 4 | + private final String encoder; | ||
| 5 | + private final String decoder; | ||
| 6 | + | ||
| 7 | + private OfflineFireRedAsrModelConfig(Builder builder) { | ||
| 8 | + this.encoder = builder.encoder; | ||
| 9 | + this.decoder = builder.decoder; | ||
| 10 | + } | ||
| 11 | + | ||
| 12 | + public static Builder builder() { | ||
| 13 | + return new Builder(); | ||
| 14 | + } | ||
| 15 | + | ||
| 16 | + public String getEncoder() { | ||
| 17 | + return encoder; | ||
| 18 | + } | ||
| 19 | + | ||
| 20 | + public String getDecoder() { | ||
| 21 | + return decoder; | ||
| 22 | + } | ||
| 23 | + | ||
| 24 | + public static class Builder { | ||
| 25 | + private String encoder = ""; | ||
| 26 | + private String decoder = ""; | ||
| 27 | + | ||
| 28 | + public OfflineFireRedAsrModelConfig build() { | ||
| 29 | + return new OfflineFireRedAsrModelConfig(this); | ||
| 30 | + } | ||
| 31 | + | ||
| 32 | + public Builder setEncoder(String encoder) { | ||
| 33 | + this.encoder = encoder; | ||
| 34 | + return this; | ||
| 35 | + } | ||
| 36 | + | ||
| 37 | + public Builder setDecoder(String decoder) { | ||
| 38 | + this.decoder = decoder; | ||
| 39 | + return this; | ||
| 40 | + } | ||
| 41 | + } | ||
| 42 | +} |
| @@ -6,6 +6,7 @@ public class OfflineModelConfig { | @@ -6,6 +6,7 @@ public class OfflineModelConfig { | ||
| 6 | private final OfflineTransducerModelConfig transducer; | 6 | private final OfflineTransducerModelConfig transducer; |
| 7 | private final OfflineParaformerModelConfig paraformer; | 7 | private final OfflineParaformerModelConfig paraformer; |
| 8 | private final OfflineWhisperModelConfig whisper; | 8 | private final OfflineWhisperModelConfig whisper; |
| 9 | + private final OfflineFireRedAsrModelConfig fireRedAsr; | ||
| 9 | private final OfflineMoonshineModelConfig moonshine; | 10 | private final OfflineMoonshineModelConfig moonshine; |
| 10 | private final OfflineNemoEncDecCtcModelConfig nemo; | 11 | private final OfflineNemoEncDecCtcModelConfig nemo; |
| 11 | private final OfflineSenseVoiceModelConfig senseVoice; | 12 | private final OfflineSenseVoiceModelConfig senseVoice; |
| @@ -23,6 +24,7 @@ public class OfflineModelConfig { | @@ -23,6 +24,7 @@ public class OfflineModelConfig { | ||
| 23 | this.transducer = builder.transducer; | 24 | this.transducer = builder.transducer; |
| 24 | this.paraformer = builder.paraformer; | 25 | this.paraformer = builder.paraformer; |
| 25 | this.whisper = builder.whisper; | 26 | this.whisper = builder.whisper; |
| 27 | + this.fireRedAsr = builder.fireRedAsr; | ||
| 26 | this.moonshine = builder.moonshine; | 28 | this.moonshine = builder.moonshine; |
| 27 | this.nemo = builder.nemo; | 29 | this.nemo = builder.nemo; |
| 28 | this.senseVoice = builder.senseVoice; | 30 | this.senseVoice = builder.senseVoice; |
| @@ -96,6 +98,7 @@ public class OfflineModelConfig { | @@ -96,6 +98,7 @@ public class OfflineModelConfig { | ||
| 96 | private OfflineParaformerModelConfig paraformer = OfflineParaformerModelConfig.builder().build(); | 98 | private OfflineParaformerModelConfig paraformer = OfflineParaformerModelConfig.builder().build(); |
| 97 | private OfflineTransducerModelConfig transducer = OfflineTransducerModelConfig.builder().build(); | 99 | private OfflineTransducerModelConfig transducer = OfflineTransducerModelConfig.builder().build(); |
| 98 | private OfflineWhisperModelConfig whisper = OfflineWhisperModelConfig.builder().build(); | 100 | private OfflineWhisperModelConfig whisper = OfflineWhisperModelConfig.builder().build(); |
| 101 | + private OfflineFireRedAsrModelConfig fireRedAsr = OfflineFireRedAsrModelConfig.builder().build(); | ||
| 99 | private OfflineMoonshineModelConfig moonshine = OfflineMoonshineModelConfig.builder().build(); | 102 | private OfflineMoonshineModelConfig moonshine = OfflineMoonshineModelConfig.builder().build(); |
| 100 | private OfflineNemoEncDecCtcModelConfig nemo = OfflineNemoEncDecCtcModelConfig.builder().build(); | 103 | private OfflineNemoEncDecCtcModelConfig nemo = OfflineNemoEncDecCtcModelConfig.builder().build(); |
| 101 | private OfflineSenseVoiceModelConfig senseVoice = OfflineSenseVoiceModelConfig.builder().build(); | 104 | private OfflineSenseVoiceModelConfig senseVoice = OfflineSenseVoiceModelConfig.builder().build(); |
| @@ -137,6 +140,11 @@ public class OfflineModelConfig { | @@ -137,6 +140,11 @@ public class OfflineModelConfig { | ||
| 137 | return this; | 140 | return this; |
| 138 | } | 141 | } |
| 139 | 142 | ||
| 143 | + public Builder setFireRedAsr(OfflineFireRedAsrModelConfig fireRedAsr) { | ||
| 144 | + this.fireRedAsr = fireRedAsr; | ||
| 145 | + return this; | ||
| 146 | + } | ||
| 147 | + | ||
| 140 | public Builder setSenseVoice(OfflineSenseVoiceModelConfig senseVoice) { | 148 | public Builder setSenseVoice(OfflineSenseVoiceModelConfig senseVoice) { |
| 141 | this.senseVoice = senseVoice; | 149 | this.senseVoice = senseVoice; |
| 142 | return this; | 150 | return this; |
| @@ -174,6 +174,26 @@ static OfflineRecognizerConfig GetOfflineConfig(JNIEnv *env, jobject config) { | @@ -174,6 +174,26 @@ static OfflineRecognizerConfig GetOfflineConfig(JNIEnv *env, jobject config) { | ||
| 174 | ans.model_config.whisper.tail_paddings = | 174 | ans.model_config.whisper.tail_paddings = |
| 175 | env->GetIntField(whisper_config, fid); | 175 | env->GetIntField(whisper_config, fid); |
| 176 | 176 | ||
| 177 | + // FireRedAsr | ||
| 178 | + fid = env->GetFieldID(model_config_cls, "fireRedAsr", | ||
| 179 | + "Lcom/k2fsa/sherpa/onnx/OfflineFireRedAsrModelConfig;"); | ||
| 180 | + jobject fire_red_asr_config = env->GetObjectField(model_config, fid); | ||
| 181 | + jclass fire_red_asr_config_cls = env->GetObjectClass(fire_red_asr_config); | ||
| 182 | + | ||
| 183 | + fid = | ||
| 184 | + env->GetFieldID(fire_red_asr_config_cls, "encoder", "Ljava/lang/String;"); | ||
| 185 | + s = (jstring)env->GetObjectField(fire_red_asr_config, fid); | ||
| 186 | + p = env->GetStringUTFChars(s, nullptr); | ||
| 187 | + ans.model_config.fire_red_asr.encoder = p; | ||
| 188 | + env->ReleaseStringUTFChars(s, p); | ||
| 189 | + | ||
| 190 | + fid = | ||
| 191 | + env->GetFieldID(fire_red_asr_config_cls, "decoder", "Ljava/lang/String;"); | ||
| 192 | + s = (jstring)env->GetObjectField(fire_red_asr_config, fid); | ||
| 193 | + p = env->GetStringUTFChars(s, nullptr); | ||
| 194 | + ans.model_config.fire_red_asr.decoder = p; | ||
| 195 | + env->ReleaseStringUTFChars(s, p); | ||
| 196 | + | ||
| 177 | // moonshine | 197 | // moonshine |
| 178 | fid = env->GetFieldID(model_config_cls, "moonshine", | 198 | fid = env->GetFieldID(model_config_cls, "moonshine", |
| 179 | "Lcom/k2fsa/sherpa/onnx/OfflineMoonshineModelConfig;"); | 199 | "Lcom/k2fsa/sherpa/onnx/OfflineMoonshineModelConfig;"); |
| @@ -33,6 +33,11 @@ data class OfflineWhisperModelConfig( | @@ -33,6 +33,11 @@ data class OfflineWhisperModelConfig( | ||
| 33 | var tailPaddings: Int = 1000, // Padding added at the end of the samples | 33 | var tailPaddings: Int = 1000, // Padding added at the end of the samples |
| 34 | ) | 34 | ) |
| 35 | 35 | ||
| 36 | +data class OfflineFireRedAsrModelConfig( | ||
| 37 | + var encoder: String = "", | ||
| 38 | + var decoder: String = "", | ||
| 39 | +) | ||
| 40 | + | ||
| 36 | data class OfflineMoonshineModelConfig( | 41 | data class OfflineMoonshineModelConfig( |
| 37 | var preprocessor: String = "", | 42 | var preprocessor: String = "", |
| 38 | var encoder: String = "", | 43 | var encoder: String = "", |
| @@ -50,6 +55,7 @@ data class OfflineModelConfig( | @@ -50,6 +55,7 @@ data class OfflineModelConfig( | ||
| 50 | var transducer: OfflineTransducerModelConfig = OfflineTransducerModelConfig(), | 55 | var transducer: OfflineTransducerModelConfig = OfflineTransducerModelConfig(), |
| 51 | var paraformer: OfflineParaformerModelConfig = OfflineParaformerModelConfig(), | 56 | var paraformer: OfflineParaformerModelConfig = OfflineParaformerModelConfig(), |
| 52 | var whisper: OfflineWhisperModelConfig = OfflineWhisperModelConfig(), | 57 | var whisper: OfflineWhisperModelConfig = OfflineWhisperModelConfig(), |
| 58 | + var fireRedAsr: OfflineFireRedAsrModelConfig = OfflineFireRedAsrModelConfig(), | ||
| 53 | var moonshine: OfflineMoonshineModelConfig = OfflineMoonshineModelConfig(), | 59 | var moonshine: OfflineMoonshineModelConfig = OfflineMoonshineModelConfig(), |
| 54 | var nemo: OfflineNemoEncDecCtcModelConfig = OfflineNemoEncDecCtcModelConfig(), | 60 | var nemo: OfflineNemoEncDecCtcModelConfig = OfflineNemoEncDecCtcModelConfig(), |
| 55 | var senseVoice: OfflineSenseVoiceModelConfig = OfflineSenseVoiceModelConfig(), | 61 | var senseVoice: OfflineSenseVoiceModelConfig = OfflineSenseVoiceModelConfig(), |
| @@ -464,6 +470,17 @@ fun getOfflineModelConfig(type: Int): OfflineModelConfig? { | @@ -464,6 +470,17 @@ fun getOfflineModelConfig(type: Int): OfflineModelConfig? { | ||
| 464 | modelType = "transducer", | 470 | modelType = "transducer", |
| 465 | ) | 471 | ) |
| 466 | } | 472 | } |
| 473 | + | ||
| 474 | + 24 -> { | ||
| 475 | + val modelDir = "sherpa-onnx-fire-red-asr-large-zh_en-2025-02-16" | ||
| 476 | + return OfflineModelConfig( | ||
| 477 | + fireRedAsr = OfflineFireRedAsrModelConfig( | ||
| 478 | + encoder = "$modelDir/encoder.int8.onnx", | ||
| 479 | + decoder = "$modelDir/decoder.int8.onnx", | ||
| 480 | + ), | ||
| 481 | + tokens = "$modelDir/tokens.txt", | ||
| 482 | + ) | ||
| 483 | + } | ||
| 467 | } | 484 | } |
| 468 | return null | 485 | return null |
| 469 | } | 486 | } |
-
请 注册 或 登录 后发表评论