Fangjun Kuang
Committed by GitHub

Add Kotlin and Java API for Dolphin CTC models (#2086)

@@ -23,8 +23,8 @@ jobs: @@ -23,8 +23,8 @@ jobs:
23 fail-fast: false 23 fail-fast: false
24 matrix: 24 matrix:
25 os: [ubuntu-latest] 25 os: [ubuntu-latest]
26 - total: ["4"]  
27 - index: ["0", "1", "2", "3"] 26 + total: ["16"]
  27 + index: ["0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12", "13", "14", "15"]
28 28
29 steps: 29 steps:
30 - uses: actions/checkout@v4 30 - uses: actions/checkout@v4
@@ -23,8 +23,8 @@ jobs: @@ -23,8 +23,8 @@ jobs:
23 fail-fast: false 23 fail-fast: false
24 matrix: 24 matrix:
25 os: [ubuntu-latest] 25 os: [ubuntu-latest]
26 - total: ["10"]  
27 - index: ["0", "1", "2", "3", "4", "5", "6", "7", "8", "9"] 26 + total: ["18"]
  27 + index: ["0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12", "13", "14", "15", "16", "17"]
28 28
29 steps: 29 steps:
30 - uses: actions/checkout@v4 30 - uses: actions/checkout@v4
@@ -105,6 +105,16 @@ jobs: @@ -105,6 +105,16 @@ jobs:
105 make -j4 105 make -j4
106 ls -lh lib 106 ls -lh lib
107 107
  108 + - name: Run java test (VAD + Non-streaming Dolphin CTC)
  109 + shell: bash
  110 + run: |
  111 + cd ./java-api-examples
  112 + ./run-vad-non-streaming-dolphin-ctc.sh
  113 + rm *.onnx
  114 + ls -lh *.wav
  115 + rm *.wav
  116 + rm -rf sherpa-onnx-dolphin-*
  117 +
108 - name: Run speech enhancement (GTCRN) 118 - name: Run speech enhancement (GTCRN)
109 shell: bash 119 shell: bash
110 run: | 120 run: |
@@ -135,6 +145,9 @@ jobs: @@ -135,6 +145,9 @@ jobs:
135 run: | 145 run: |
136 cd ./java-api-examples 146 cd ./java-api-examples
137 147
  148 + ./run-non-streaming-decode-file-dolphin-ctc.sh
  149 + rm -rf sherpa-onnx-dolphin-*
  150 +
138 ./run-non-streaming-decode-file-moonshine.sh 151 ./run-non-streaming-decode-file-moonshine.sh
139 rm -rf sherpa-onnx-moonshine-* 152 rm -rf sherpa-onnx-moonshine-*
140 153
@@ -140,3 +140,4 @@ README-DEV.txt @@ -140,3 +140,4 @@ README-DEV.txt
140 *.jit 140 *.jit
141 ##clion 141 ##clion
142 .idea 142 .idea
  143 +sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02
  1 +// Copyright 2025 Xiaomi Corporation
  2 +
  3 +// This file shows how to use an offline Dolphin CTC model, i.e.,
  4 +// non-streaming Dolphin CTC model, to decode files.
  5 +import com.k2fsa.sherpa.onnx.*;
  6 +
  7 +public class NonStreamingDecodeFileDolphinCtc {
  8 + public static void main(String[] args) {
  9 + // please refer to
  10 + // https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
  11 + // to download model files
  12 + String model = "./sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02/model.int8.onnx";
  13 + String tokens = "./sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02/tokens.txt";
  14 +
  15 + String waveFilename =
  16 + "./sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02/test_wavs/0.wav";
  17 +
  18 + WaveReader reader = new WaveReader(waveFilename);
  19 +
  20 + OfflineDolphinModelConfig dolphin = OfflineDolphinModelConfig.builder().setModel(model).build();
  21 +
  22 + OfflineModelConfig modelConfig =
  23 + OfflineModelConfig.builder()
  24 + .setDolphin(dolphin)
  25 + .setTokens(tokens)
  26 + .setNumThreads(1)
  27 + .setDebug(true)
  28 + .build();
  29 +
  30 + OfflineRecognizerConfig config =
  31 + OfflineRecognizerConfig.builder()
  32 + .setOfflineModelConfig(modelConfig)
  33 + .setDecodingMethod("greedy_search")
  34 + .build();
  35 +
  36 + OfflineRecognizer recognizer = new OfflineRecognizer(config);
  37 + OfflineStream stream = recognizer.createStream();
  38 + stream.acceptWaveform(reader.getSamples(), reader.getSampleRate());
  39 +
  40 + recognizer.decode(stream);
  41 +
  42 + String text = recognizer.getResult(stream).getText();
  43 +
  44 + System.out.printf("filename:%s\nresult:%s\n", waveFilename, text);
  45 +
  46 + stream.release();
  47 + recognizer.release();
  48 + }
  49 +}
@@ -23,6 +23,7 @@ This directory contains examples for the JAVA API of sherpa-onnx. @@ -23,6 +23,7 @@ This directory contains examples for the JAVA API of sherpa-onnx.
23 ## Non-Streaming Speech recognition 23 ## Non-Streaming Speech recognition
24 24
25 ```bash 25 ```bash
  26 +./run-non-streaming-decode-file-dolphin-ctc.sh
26 ./run-non-streaming-decode-file-paraformer.sh 27 ./run-non-streaming-decode-file-paraformer.sh
27 ./run-non-streaming-decode-file-sense-voice.sh 28 ./run-non-streaming-decode-file-sense-voice.sh
28 ./run-non-streaming-decode-file-transducer.sh 29 ./run-non-streaming-decode-file-transducer.sh
@@ -102,6 +103,12 @@ The punctuation model supports both English and Chinese. @@ -102,6 +103,12 @@ The punctuation model supports both English and Chinese.
102 ./run-vad-remove-slience.sh 103 ./run-vad-remove-slience.sh
103 ``` 104 ```
104 105
  106 +## VAD + Non-streaming Dolphin CTC for speech recognition
  107 +
  108 +```bash
  109 +./run-vad-non-streaming-dolphin-ctc.sh
  110 +```
  111 +
105 ## VAD + Non-streaming SenseVoice for speech recognition 112 ## VAD + Non-streaming SenseVoice for speech recognition
106 113
107 ```bash 114 ```bash
  1 +// Copyright 2025 Xiaomi Corporation
  2 +
  3 +// This file shows how to use a silero_vad model with a non-streaming Dolphin
  4 +// CTC model for speech recognition.
  5 +
  6 +import com.k2fsa.sherpa.onnx.*;
  7 +import java.util.Arrays;
  8 +
  9 +public class VadNonStreamingSenseVoice {
  10 + public static Vad createVad() {
  11 + // please download ./silero_vad.onnx from
  12 + // https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
  13 + String model = "./silero_vad.onnx";
  14 + SileroVadModelConfig sileroVad =
  15 + SileroVadModelConfig.builder()
  16 + .setModel(model)
  17 + .setThreshold(0.5f)
  18 + .setMinSilenceDuration(0.25f)
  19 + .setMinSpeechDuration(0.5f)
  20 + .setWindowSize(512)
  21 + .setMaxSpeechDuration(5.0f)
  22 + .build();
  23 +
  24 + VadModelConfig config =
  25 + VadModelConfig.builder()
  26 + .setSileroVadModelConfig(sileroVad)
  27 + .setSampleRate(16000)
  28 + .setNumThreads(1)
  29 + .setDebug(true)
  30 + .setProvider("cpu")
  31 + .build();
  32 +
  33 + return new Vad(config);
  34 + }
  35 +
  36 + public static OfflineRecognizer createOfflineRecognizer() {
  37 + // please refer to
  38 + // https://k2-fsa.github.io/sherpa/onnx/dolphin/index.html
  39 + // to download model files
  40 + String model = "./sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02/model.int8.onnx";
  41 + String tokens = "./sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02/tokens.txt";
  42 +
  43 + OfflineDolphinModelConfig dolphin = OfflineDolphinModelConfig.builder().setModel(model).build();
  44 +
  45 + OfflineModelConfig modelConfig =
  46 + OfflineModelConfig.builder()
  47 + .setDolphin(dolphin)
  48 + .setTokens(tokens)
  49 + .setNumThreads(1)
  50 + .setDebug(true)
  51 + .build();
  52 +
  53 + OfflineRecognizerConfig config =
  54 + OfflineRecognizerConfig.builder()
  55 + .setOfflineModelConfig(modelConfig)
  56 + .setDecodingMethod("greedy_search")
  57 + .build();
  58 +
  59 + return new OfflineRecognizer(config);
  60 + }
  61 +
  62 + public static void main(String[] args) {
  63 +
  64 + Vad vad = createVad();
  65 + OfflineRecognizer recognizer = createOfflineRecognizer();
  66 +
  67 + // You can download the test file from
  68 + // https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
  69 + String testWaveFilename = "./lei-jun-test.wav";
  70 + WaveReader reader = new WaveReader(testWaveFilename);
  71 +
  72 + int numSamples = reader.getSamples().length;
  73 + int numIter = numSamples / 512;
  74 +
  75 + for (int i = 0; i != numIter; ++i) {
  76 + int start = i * 512;
  77 + int end = start + 512;
  78 + float[] samples = Arrays.copyOfRange(reader.getSamples(), start, end);
  79 + vad.acceptWaveform(samples);
  80 + if (vad.isSpeechDetected()) {
  81 + while (!vad.empty()) {
  82 + SpeechSegment segment = vad.front();
  83 + float startTime = segment.getStart() / 16000.0f;
  84 + float duration = segment.getSamples().length / 16000.0f;
  85 +
  86 + OfflineStream stream = recognizer.createStream();
  87 + stream.acceptWaveform(segment.getSamples(), 16000);
  88 + recognizer.decode(stream);
  89 + String text = recognizer.getResult(stream).getText();
  90 + stream.release();
  91 +
  92 + if (!text.isEmpty()) {
  93 + System.out.printf("%.3f--%.3f: %s\n", startTime, startTime + duration, text);
  94 + }
  95 +
  96 + vad.pop();
  97 + }
  98 + }
  99 + }
  100 +
  101 + vad.flush();
  102 + while (!vad.empty()) {
  103 + SpeechSegment segment = vad.front();
  104 + float startTime = segment.getStart() / 16000.0f;
  105 + float duration = segment.getSamples().length / 16000.0f;
  106 +
  107 + OfflineStream stream = recognizer.createStream();
  108 + stream.acceptWaveform(segment.getSamples(), 16000);
  109 + recognizer.decode(stream);
  110 + String text = recognizer.getResult(stream).getText();
  111 + stream.release();
  112 +
  113 + if (!text.isEmpty()) {
  114 + System.out.printf("%.3f--%.3f: %s\n", startTime, startTime + duration, text);
  115 + }
  116 +
  117 + vad.pop();
  118 + }
  119 +
  120 + vad.release();
  121 + recognizer.release();
  122 + }
  123 +}
  1 +#!/usr/bin/env bash
  2 +
  3 +set -ex
  4 +
  5 +if [[ ! -f ../build/lib/libsherpa-onnx-jni.dylib && ! -f ../build/lib/libsherpa-onnx-jni.so ]]; then
  6 + mkdir -p ../build
  7 + pushd ../build
  8 + cmake \
  9 + -DSHERPA_ONNX_ENABLE_PYTHON=OFF \
  10 + -DSHERPA_ONNX_ENABLE_TESTS=OFF \
  11 + -DSHERPA_ONNX_ENABLE_CHECK=OFF \
  12 + -DBUILD_SHARED_LIBS=ON \
  13 + -DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \
  14 + -DSHERPA_ONNX_ENABLE_JNI=ON \
  15 + ..
  16 +
  17 + make -j4
  18 + ls -lh lib
  19 + popd
  20 +fi
  21 +
  22 +if [ ! -f ../sherpa-onnx/java-api/build/sherpa-onnx.jar ]; then
  23 + pushd ../sherpa-onnx/java-api
  24 + make
  25 + popd
  26 +fi
  27 +
  28 +if [ ! -f ./sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02/model.int8.onnx ]; then
  29 + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02.tar.bz2
  30 + tar xvf sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02.tar.bz2
  31 + rm sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02.tar.bz2
  32 + ls -lh sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02
  33 +fi
  34 +
  35 +java \
  36 + -Djava.library.path=$PWD/../build/lib \
  37 + -cp ../sherpa-onnx/java-api/build/sherpa-onnx.jar \
  38 + NonStreamingDecodeFileDolphinCtc.java
  1 +#!/usr/bin/env bash
  2 +
  3 +set -ex
  4 +
  5 +if [[ ! -f ../build/lib/libsherpa-onnx-jni.dylib && ! -f ../build/lib/libsherpa-onnx-jni.so ]]; then
  6 + mkdir -p ../build
  7 + pushd ../build
  8 + cmake \
  9 + -DSHERPA_ONNX_ENABLE_PYTHON=OFF \
  10 + -DSHERPA_ONNX_ENABLE_TESTS=OFF \
  11 + -DSHERPA_ONNX_ENABLE_CHECK=OFF \
  12 + -DBUILD_SHARED_LIBS=ON \
  13 + -DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \
  14 + -DSHERPA_ONNX_ENABLE_JNI=ON \
  15 + ..
  16 +
  17 + make -j4
  18 + ls -lh lib
  19 + popd
  20 +fi
  21 +
  22 +if [ ! -f ../sherpa-onnx/java-api/build/sherpa-onnx.jar ]; then
  23 + pushd ../sherpa-onnx/java-api
  24 + make
  25 + popd
  26 +fi
  27 +
  28 +if [ ! -f ./silero_vad.onnx ]; then
  29 + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx
  30 +fi
  31 +
  32 +if [ ! -f ./lei-jun-test.wav ]; then
  33 + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/lei-jun-test.wav
  34 +fi
  35 +
  36 +if [ ! -f ./sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02/model.int8.onnx ]; then
  37 + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02.tar.bz2
  38 + tar xvf sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02.tar.bz2
  39 + rm sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02.tar.bz2
  40 + ls -lh sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02
  41 +fi
  42 +
  43 +java \
  44 + -Djava.library.path=$PWD/../build/lib \
  45 + -cp ../sherpa-onnx/java-api/build/sherpa-onnx.jar \
  46 + ./VadNonStreamingDolphinCtc.java
@@ -190,6 +190,13 @@ function testSpokenLanguageIdentification() { @@ -190,6 +190,13 @@ function testSpokenLanguageIdentification() {
190 } 190 }
191 191
192 function testOfflineAsr() { 192 function testOfflineAsr() {
  193 + if [ ! -f ./sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02/model.int8.onnx ]; then
  194 + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02.tar.bz2
  195 + tar xvf sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02.tar.bz2
  196 + rm sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02.tar.bz2
  197 + ls -lh sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02
  198 + fi
  199 +
193 if [ ! -f ./sherpa-onnx-fire-red-asr-large-zh_en-2025-02-16/encoder.int8.onnx ]; then 200 if [ ! -f ./sherpa-onnx-fire-red-asr-large-zh_en-2025-02-16/encoder.int8.onnx ]; then
194 curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-fire-red-asr-large-zh_en-2025-02-16.tar.bz2 201 curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-fire-red-asr-large-zh_en-2025-02-16.tar.bz2
195 tar xvf sherpa-onnx-fire-red-asr-large-zh_en-2025-02-16.tar.bz2 202 tar xvf sherpa-onnx-fire-red-asr-large-zh_en-2025-02-16.tar.bz2
1 package com.k2fsa.sherpa.onnx 1 package com.k2fsa.sherpa.onnx
2 2
3 fun main() { 3 fun main() {
4 - val types = arrayOf(0, 2, 5, 6, 15, 21, 24) 4 + val types = arrayOf(0, 2, 5, 6, 15, 21, 24, 25)
5 for (type in types) { 5 for (type in types) {
6 test(type) 6 test(type)
7 } 7 }
@@ -18,6 +18,7 @@ fun test(type: Int) { @@ -18,6 +18,7 @@ fun test(type: Int) {
18 15 -> "./sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/test_wavs/zh.wav" 18 15 -> "./sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/test_wavs/zh.wav"
19 21 -> "./sherpa-onnx-moonshine-tiny-en-int8/test_wavs/0.wav" 19 21 -> "./sherpa-onnx-moonshine-tiny-en-int8/test_wavs/0.wav"
20 24 -> "./sherpa-onnx-fire-red-asr-large-zh_en-2025-02-16/test_wavs/0.wav" 20 24 -> "./sherpa-onnx-fire-red-asr-large-zh_en-2025-02-16/test_wavs/0.wav"
  21 + 25 -> "./sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02/test_wavs/0.wav"
21 else -> null 22 else -> null
22 } 23 }
23 24
@@ -160,6 +160,21 @@ def get_2nd_models(): @@ -160,6 +160,21 @@ def get_2nd_models():
160 popd 160 popd
161 """, 161 """,
162 ), 162 ),
  163 + Model(
  164 + model_name="sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02",
  165 + idx=25,
  166 + lang="multi_lang",
  167 + short_name="dolphin_base_ctc",
  168 + cmd="""
  169 + pushd $model_name
  170 +
  171 + rm -rfv test_wavs
  172 +
  173 + ls -lh
  174 +
  175 + popd
  176 + """,
  177 + ),
163 ] 178 ]
164 return models 179 return models
165 180
@@ -304,6 +319,48 @@ def get_1st_models(): @@ -304,6 +319,48 @@ def get_1st_models():
304 popd 319 popd
305 """, 320 """,
306 ), 321 ),
  322 + Model(
  323 + model_name="sherpa-onnx-streaming-zipformer-small-ctc-zh-int8-2025-04-01",
  324 + idx=15,
  325 + lang="zh",
  326 + short_name="int8_small_zipformer",
  327 + rule_fsts="itn_zh_number.fst",
  328 + cmd="""
  329 + if [ ! -f itn_zh_number.fst ]; then
  330 + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/itn_zh_number.fst
  331 + fi
  332 + pushd $model_name
  333 + rm -f bpe.model
  334 +
  335 + rm -rf test_wavs
  336 + rm README.md
  337 +
  338 + ls -lh
  339 +
  340 + popd
  341 + """,
  342 + ),
  343 + Model(
  344 + model_name="sherpa-onnx-streaming-zipformer-small-ctc-zh-2025-04-01",
  345 + idx=16,
  346 + lang="zh",
  347 + short_name="small_zipformer",
  348 + rule_fsts="itn_zh_number.fst",
  349 + cmd="""
  350 + if [ ! -f itn_zh_number.fst ]; then
  351 + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/itn_zh_number.fst
  352 + fi
  353 + pushd $model_name
  354 + rm -f bpe.model
  355 +
  356 + rm -rf test_wavs
  357 + rm README.md
  358 +
  359 + ls -lh
  360 +
  361 + popd
  362 + """,
  363 + ),
307 ] 364 ]
308 365
309 return models 366 return models
@@ -313,19 +370,25 @@ def get_models(): @@ -313,19 +370,25 @@ def get_models():
313 first = get_1st_models() 370 first = get_1st_models()
314 second = get_2nd_models() 371 second = get_2nd_models()
315 372
316 - combinations = [  
317 - (  
318 - "sherpa-onnx-streaming-zipformer-zh-14M-2023-02-23",  
319 - "sherpa-onnx-paraformer-zh-2023-09-14",  
320 - ),  
321 - (  
322 - "sherpa-onnx-streaming-zipformer-zh-14M-2023-02-23",  
323 - "icefall-asr-zipformer-wenetspeech-20230615",  
324 - ),  
325 - (  
326 - "sherpa-onnx-streaming-zipformer-zh-14M-2023-02-23",  
327 - "sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17",  
328 - ), 373 + combinations = []
  374 +
  375 + first_zh = [
  376 + "sherpa-onnx-streaming-zipformer-zh-14M-2023-02-23",
  377 + "sherpa-onnx-streaming-zipformer-small-ctc-zh-int8-2025-04-01",
  378 + "sherpa-onnx-streaming-zipformer-small-ctc-zh-2025-04-01",
  379 + ]
  380 +
  381 + second_zh = [
  382 + "sherpa-onnx-paraformer-zh-2023-09-14",
  383 + "icefall-asr-zipformer-wenetspeech-20230615",
  384 + "sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17",
  385 + "sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02",
  386 + ]
  387 + for first_m in first_zh:
  388 + for second_m in second_zh:
  389 + combinations.append((first_m, second_m))
  390 +
  391 + combinations += [
329 ( 392 (
330 "sherpa-onnx-streaming-zipformer-en-20M-2023-02-17", 393 "sherpa-onnx-streaming-zipformer-en-20M-2023-02-17",
331 "sherpa-onnx-whisper-tiny.en", 394 "sherpa-onnx-whisper-tiny.en",
@@ -263,6 +263,48 @@ def get_models(): @@ -263,6 +263,48 @@ def get_models():
263 popd 263 popd
264 """, 264 """,
265 ), 265 ),
  266 + Model(
  267 + model_name="sherpa-onnx-streaming-zipformer-small-ctc-zh-int8-2025-04-01",
  268 + idx=15,
  269 + lang="zh",
  270 + short_name="int8_small_zipformer",
  271 + rule_fsts="itn_zh_number.fst",
  272 + cmd="""
  273 + if [ ! -f itn_zh_number.fst ]; then
  274 + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/itn_zh_number.fst
  275 + fi
  276 + pushd $model_name
  277 + rm -f bpe.model
  278 +
  279 + rm -rf test_wavs
  280 + rm README.md
  281 +
  282 + ls -lh
  283 +
  284 + popd
  285 + """,
  286 + ),
  287 + Model(
  288 + model_name="sherpa-onnx-streaming-zipformer-small-ctc-zh-2025-04-01",
  289 + idx=16,
  290 + lang="zh",
  291 + short_name="small_zipformer",
  292 + rule_fsts="itn_zh_number.fst",
  293 + cmd="""
  294 + if [ ! -f itn_zh_number.fst ]; then
  295 + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/itn_zh_number.fst
  296 + fi
  297 + pushd $model_name
  298 + rm -f bpe.model
  299 +
  300 + rm -rf test_wavs
  301 + rm README.md
  302 +
  303 + ls -lh
  304 +
  305 + popd
  306 + """,
  307 + ),
266 ] 308 ]
267 309
268 return models 310 return models
@@ -443,6 +443,22 @@ def get_models(): @@ -443,6 +443,22 @@ def get_models():
443 popd 443 popd
444 """, 444 """,
445 ), 445 ),
  446 + Model(
  447 + model_name="sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02",
  448 + idx=25,
  449 + lang="multi_lang",
  450 + lang2="multi_lang",
  451 + short_name="multi_lang",
  452 + cmd="""
  453 + pushd $model_name
  454 +
  455 + rm -rfv test_wavs
  456 +
  457 + ls -lh
  458 +
  459 + popd
  460 + """,
  461 + ),
446 ] 462 ]
447 return models 463 return models
448 464
@@ -30,6 +30,7 @@ java_files += OfflineFireRedAsrModelConfig.java @@ -30,6 +30,7 @@ java_files += OfflineFireRedAsrModelConfig.java
30 java_files += OfflineMoonshineModelConfig.java 30 java_files += OfflineMoonshineModelConfig.java
31 java_files += OfflineNemoEncDecCtcModelConfig.java 31 java_files += OfflineNemoEncDecCtcModelConfig.java
32 java_files += OfflineSenseVoiceModelConfig.java 32 java_files += OfflineSenseVoiceModelConfig.java
  33 +java_files += OfflineDolphinModelConfig.java
33 java_files += OfflineModelConfig.java 34 java_files += OfflineModelConfig.java
34 java_files += OfflineRecognizerConfig.java 35 java_files += OfflineRecognizerConfig.java
35 java_files += OfflineRecognizerResult.java 36 java_files += OfflineRecognizerResult.java
  1 +// Copyright 2025 Xiaomi Corporation
  2 +
  3 +package com.k2fsa.sherpa.onnx;
  4 +
  5 +public class OfflineDolphinModelConfig {
  6 + private final String model;
  7 +
  8 + private OfflineDolphinModelConfig(Builder builder) {
  9 + this.model = builder.model;
  10 + }
  11 +
  12 + public static Builder builder() {
  13 + return new Builder();
  14 + }
  15 +
  16 + public String getModel() {
  17 + return model;
  18 + }
  19 +
  20 + public static class Builder {
  21 + private String model = "";
  22 +
  23 + public OfflineDolphinModelConfig build() {
  24 + return new OfflineDolphinModelConfig(this);
  25 + }
  26 +
  27 + public Builder setModel(String model) {
  28 + this.model = model;
  29 + return this;
  30 + }
  31 + }
  32 +}
@@ -10,6 +10,7 @@ public class OfflineModelConfig { @@ -10,6 +10,7 @@ public class OfflineModelConfig {
10 private final OfflineMoonshineModelConfig moonshine; 10 private final OfflineMoonshineModelConfig moonshine;
11 private final OfflineNemoEncDecCtcModelConfig nemo; 11 private final OfflineNemoEncDecCtcModelConfig nemo;
12 private final OfflineSenseVoiceModelConfig senseVoice; 12 private final OfflineSenseVoiceModelConfig senseVoice;
  13 + private final OfflineDolphinModelConfig dolphin;
13 private final String teleSpeech; 14 private final String teleSpeech;
14 private final String tokens; 15 private final String tokens;
15 private final int numThreads; 16 private final int numThreads;
@@ -28,6 +29,7 @@ public class OfflineModelConfig { @@ -28,6 +29,7 @@ public class OfflineModelConfig {
28 this.moonshine = builder.moonshine; 29 this.moonshine = builder.moonshine;
29 this.nemo = builder.nemo; 30 this.nemo = builder.nemo;
30 this.senseVoice = builder.senseVoice; 31 this.senseVoice = builder.senseVoice;
  32 + this.dolphin = builder.dolphin;
31 this.teleSpeech = builder.teleSpeech; 33 this.teleSpeech = builder.teleSpeech;
32 this.tokens = builder.tokens; 34 this.tokens = builder.tokens;
33 this.numThreads = builder.numThreads; 35 this.numThreads = builder.numThreads;
@@ -62,6 +64,10 @@ public class OfflineModelConfig { @@ -62,6 +64,10 @@ public class OfflineModelConfig {
62 return senseVoice; 64 return senseVoice;
63 } 65 }
64 66
  67 + public OfflineDolphinModelConfig getDolphin() {
  68 + return dolphin;
  69 + }
  70 +
65 public String getTokens() { 71 public String getTokens() {
66 return tokens; 72 return tokens;
67 } 73 }
@@ -102,6 +108,7 @@ public class OfflineModelConfig { @@ -102,6 +108,7 @@ public class OfflineModelConfig {
102 private OfflineMoonshineModelConfig moonshine = OfflineMoonshineModelConfig.builder().build(); 108 private OfflineMoonshineModelConfig moonshine = OfflineMoonshineModelConfig.builder().build();
103 private OfflineNemoEncDecCtcModelConfig nemo = OfflineNemoEncDecCtcModelConfig.builder().build(); 109 private OfflineNemoEncDecCtcModelConfig nemo = OfflineNemoEncDecCtcModelConfig.builder().build();
104 private OfflineSenseVoiceModelConfig senseVoice = OfflineSenseVoiceModelConfig.builder().build(); 110 private OfflineSenseVoiceModelConfig senseVoice = OfflineSenseVoiceModelConfig.builder().build();
  111 + private OfflineDolphinModelConfig dolphin = OfflineDolphinModelConfig.builder().build();
105 private String teleSpeech = ""; 112 private String teleSpeech = "";
106 private String tokens = ""; 113 private String tokens = "";
107 private int numThreads = 1; 114 private int numThreads = 1;
@@ -120,6 +127,11 @@ public class OfflineModelConfig { @@ -120,6 +127,11 @@ public class OfflineModelConfig {
120 return this; 127 return this;
121 } 128 }
122 129
  130 + public Builder setDolphin(OfflineDolphinModelConfig dolphin) {
  131 + this.dolphin = dolphin;
  132 + return this;
  133 + }
  134 +
123 public Builder setParaformer(OfflineParaformerModelConfig paraformer) { 135 public Builder setParaformer(OfflineParaformerModelConfig paraformer) {
124 this.paraformer = paraformer; 136 this.paraformer = paraformer;
125 return this; 137 return this;
@@ -265,6 +265,19 @@ static OfflineRecognizerConfig GetOfflineConfig(JNIEnv *env, jobject config) { @@ -265,6 +265,19 @@ static OfflineRecognizerConfig GetOfflineConfig(JNIEnv *env, jobject config) {
265 ans.model_config.nemo_ctc.model = p; 265 ans.model_config.nemo_ctc.model = p;
266 env->ReleaseStringUTFChars(s, p); 266 env->ReleaseStringUTFChars(s, p);
267 267
  268 + // dolphin
  269 + fid = env->GetFieldID(model_config_cls, "dolphin",
  270 + "Lcom/k2fsa/sherpa/onnx/OfflineDolphinModelConfig;");
  271 + jobject dolphin_config = env->GetObjectField(model_config, fid);
  272 + jclass dolphin_config_cls = env->GetObjectClass(dolphin_config);
  273 +
  274 + fid = env->GetFieldID(nemo_config_cls, "model", "Ljava/lang/String;");
  275 +
  276 + s = (jstring)env->GetObjectField(dolphin_config, fid);
  277 + p = env->GetStringUTFChars(s, nullptr);
  278 + ans.model_config.dolphin.model = p;
  279 + env->ReleaseStringUTFChars(s, p);
  280 +
268 fid = env->GetFieldID(model_config_cls, "teleSpeech", "Ljava/lang/String;"); 281 fid = env->GetFieldID(model_config_cls, "teleSpeech", "Ljava/lang/String;");
269 s = (jstring)env->GetObjectField(model_config, fid); 282 s = (jstring)env->GetObjectField(model_config, fid);
270 p = env->GetStringUTFChars(s, nullptr); 283 p = env->GetStringUTFChars(s, nullptr);
@@ -25,6 +25,10 @@ data class OfflineNemoEncDecCtcModelConfig( @@ -25,6 +25,10 @@ data class OfflineNemoEncDecCtcModelConfig(
25 var model: String = "", 25 var model: String = "",
26 ) 26 )
27 27
  28 +data class OfflineDolphinModelConfig(
  29 + var model: String = "",
  30 +)
  31 +
28 data class OfflineWhisperModelConfig( 32 data class OfflineWhisperModelConfig(
29 var encoder: String = "", 33 var encoder: String = "",
30 var decoder: String = "", 34 var decoder: String = "",
@@ -59,6 +63,7 @@ data class OfflineModelConfig( @@ -59,6 +63,7 @@ data class OfflineModelConfig(
59 var moonshine: OfflineMoonshineModelConfig = OfflineMoonshineModelConfig(), 63 var moonshine: OfflineMoonshineModelConfig = OfflineMoonshineModelConfig(),
60 var nemo: OfflineNemoEncDecCtcModelConfig = OfflineNemoEncDecCtcModelConfig(), 64 var nemo: OfflineNemoEncDecCtcModelConfig = OfflineNemoEncDecCtcModelConfig(),
61 var senseVoice: OfflineSenseVoiceModelConfig = OfflineSenseVoiceModelConfig(), 65 var senseVoice: OfflineSenseVoiceModelConfig = OfflineSenseVoiceModelConfig(),
  66 + var dolphin: OfflineDolphinModelConfig = OfflineDolphinModelConfig(),
62 var teleSpeech: String = "", 67 var teleSpeech: String = "",
63 var numThreads: Int = 1, 68 var numThreads: Int = 1,
64 var debug: Boolean = false, 69 var debug: Boolean = false,
@@ -481,6 +486,16 @@ fun getOfflineModelConfig(type: Int): OfflineModelConfig? { @@ -481,6 +486,16 @@ fun getOfflineModelConfig(type: Int): OfflineModelConfig? {
481 tokens = "$modelDir/tokens.txt", 486 tokens = "$modelDir/tokens.txt",
482 ) 487 )
483 } 488 }
  489 +
  490 + 25 -> {
  491 + val modelDir = "sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02"
  492 + return OfflineModelConfig(
  493 + dolphin = OfflineDolphinModelConfig(
  494 + model = "$modelDir/model.int8.onnx",
  495 + ),
  496 + tokens = "$modelDir/tokens.txt",
  497 + )
  498 + }
484 } 499 }
485 return null 500 return null
486 } 501 }
@@ -374,6 +374,26 @@ fun getModelConfig(type: Int): OnlineModelConfig? { @@ -374,6 +374,26 @@ fun getModelConfig(type: Int): OnlineModelConfig? {
374 modelType = "zipformer", 374 modelType = "zipformer",
375 ) 375 )
376 } 376 }
  377 +
  378 + 15 -> {
  379 + val modelDir = "sherpa-onnx-streaming-zipformer-small-ctc-zh-int8-2025-04-01"
  380 + return OnlineModelConfig(
  381 + zipformer2Ctc = OnlineZipformer2CtcModelConfig(
  382 + model = "$modelDir/model.int8.onnx",
  383 + ),
  384 + tokens = "$modelDir/tokens.txt",
  385 + )
  386 + }
  387 +
  388 + 16 -> {
  389 + val modelDir = "sherpa-onnx-streaming-zipformer-small-ctc-zh-2025-04-01"
  390 + return OnlineModelConfig(
  391 + zipformer2Ctc = OnlineZipformer2CtcModelConfig(
  392 + model = "$modelDir/model.onnx",
  393 + ),
  394 + tokens = "$modelDir/tokens.txt",
  395 + )
  396 + }
377 } 397 }
378 return null 398 return null
379 } 399 }