Committed by
GitHub
Add Kotlin and Java API for Dolphin CTC models (#2086)
正在显示
20 个修改的文件
包含
517 行增加
和
18 行删除
| @@ -23,8 +23,8 @@ jobs: | @@ -23,8 +23,8 @@ jobs: | ||
| 23 | fail-fast: false | 23 | fail-fast: false |
| 24 | matrix: | 24 | matrix: |
| 25 | os: [ubuntu-latest] | 25 | os: [ubuntu-latest] |
| 26 | - total: ["4"] | ||
| 27 | - index: ["0", "1", "2", "3"] | 26 | + total: ["16"] |
| 27 | + index: ["0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12", "13", "14", "15"] | ||
| 28 | 28 | ||
| 29 | steps: | 29 | steps: |
| 30 | - uses: actions/checkout@v4 | 30 | - uses: actions/checkout@v4 |
| @@ -23,8 +23,8 @@ jobs: | @@ -23,8 +23,8 @@ jobs: | ||
| 23 | fail-fast: false | 23 | fail-fast: false |
| 24 | matrix: | 24 | matrix: |
| 25 | os: [ubuntu-latest] | 25 | os: [ubuntu-latest] |
| 26 | - total: ["10"] | ||
| 27 | - index: ["0", "1", "2", "3", "4", "5", "6", "7", "8", "9"] | 26 | + total: ["18"] |
| 27 | + index: ["0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12", "13", "14", "15", "16", "17"] | ||
| 28 | 28 | ||
| 29 | steps: | 29 | steps: |
| 30 | - uses: actions/checkout@v4 | 30 | - uses: actions/checkout@v4 |
| @@ -105,6 +105,16 @@ jobs: | @@ -105,6 +105,16 @@ jobs: | ||
| 105 | make -j4 | 105 | make -j4 |
| 106 | ls -lh lib | 106 | ls -lh lib |
| 107 | 107 | ||
| 108 | + - name: Run java test (VAD + Non-streaming Dolphin CTC) | ||
| 109 | + shell: bash | ||
| 110 | + run: | | ||
| 111 | + cd ./java-api-examples | ||
| 112 | + ./run-vad-non-streaming-dolphin-ctc.sh | ||
| 113 | + rm *.onnx | ||
| 114 | + ls -lh *.wav | ||
| 115 | + rm *.wav | ||
| 116 | + rm -rf sherpa-onnx-dolphin-* | ||
| 117 | + | ||
| 108 | - name: Run speech enhancement (GTCRN) | 118 | - name: Run speech enhancement (GTCRN) |
| 109 | shell: bash | 119 | shell: bash |
| 110 | run: | | 120 | run: | |
| @@ -135,6 +145,9 @@ jobs: | @@ -135,6 +145,9 @@ jobs: | ||
| 135 | run: | | 145 | run: | |
| 136 | cd ./java-api-examples | 146 | cd ./java-api-examples |
| 137 | 147 | ||
| 148 | + ./run-non-streaming-decode-file-dolphin-ctc.sh | ||
| 149 | + rm -rf sherpa-onnx-dolphin-* | ||
| 150 | + | ||
| 138 | ./run-non-streaming-decode-file-moonshine.sh | 151 | ./run-non-streaming-decode-file-moonshine.sh |
| 139 | rm -rf sherpa-onnx-moonshine-* | 152 | rm -rf sherpa-onnx-moonshine-* |
| 140 | 153 |
| 1 | +// Copyright 2025 Xiaomi Corporation | ||
| 2 | + | ||
| 3 | +// This file shows how to use an offline Dolphin CTC model, i.e., | ||
| 4 | +// non-streaming Dolphin CTC model, to decode files. | ||
| 5 | +import com.k2fsa.sherpa.onnx.*; | ||
| 6 | + | ||
| 7 | +public class NonStreamingDecodeFileDolphinCtc { | ||
| 8 | + public static void main(String[] args) { | ||
| 9 | + // please refer to | ||
| 10 | + // https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models | ||
| 11 | + // to download model files | ||
| 12 | + String model = "./sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02/model.int8.onnx"; | ||
| 13 | + String tokens = "./sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02/tokens.txt"; | ||
| 14 | + | ||
| 15 | + String waveFilename = | ||
| 16 | + "./sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02/test_wavs/0.wav"; | ||
| 17 | + | ||
| 18 | + WaveReader reader = new WaveReader(waveFilename); | ||
| 19 | + | ||
| 20 | + OfflineDolphinModelConfig dolphin = OfflineDolphinModelConfig.builder().setModel(model).build(); | ||
| 21 | + | ||
| 22 | + OfflineModelConfig modelConfig = | ||
| 23 | + OfflineModelConfig.builder() | ||
| 24 | + .setDolphin(dolphin) | ||
| 25 | + .setTokens(tokens) | ||
| 26 | + .setNumThreads(1) | ||
| 27 | + .setDebug(true) | ||
| 28 | + .build(); | ||
| 29 | + | ||
| 30 | + OfflineRecognizerConfig config = | ||
| 31 | + OfflineRecognizerConfig.builder() | ||
| 32 | + .setOfflineModelConfig(modelConfig) | ||
| 33 | + .setDecodingMethod("greedy_search") | ||
| 34 | + .build(); | ||
| 35 | + | ||
| 36 | + OfflineRecognizer recognizer = new OfflineRecognizer(config); | ||
| 37 | + OfflineStream stream = recognizer.createStream(); | ||
| 38 | + stream.acceptWaveform(reader.getSamples(), reader.getSampleRate()); | ||
| 39 | + | ||
| 40 | + recognizer.decode(stream); | ||
| 41 | + | ||
| 42 | + String text = recognizer.getResult(stream).getText(); | ||
| 43 | + | ||
| 44 | + System.out.printf("filename:%s\nresult:%s\n", waveFilename, text); | ||
| 45 | + | ||
| 46 | + stream.release(); | ||
| 47 | + recognizer.release(); | ||
| 48 | + } | ||
| 49 | +} |
| @@ -23,6 +23,7 @@ This directory contains examples for the JAVA API of sherpa-onnx. | @@ -23,6 +23,7 @@ This directory contains examples for the JAVA API of sherpa-onnx. | ||
| 23 | ## Non-Streaming Speech recognition | 23 | ## Non-Streaming Speech recognition |
| 24 | 24 | ||
| 25 | ```bash | 25 | ```bash |
| 26 | +./run-non-streaming-decode-file-dolphin-ctc.sh | ||
| 26 | ./run-non-streaming-decode-file-paraformer.sh | 27 | ./run-non-streaming-decode-file-paraformer.sh |
| 27 | ./run-non-streaming-decode-file-sense-voice.sh | 28 | ./run-non-streaming-decode-file-sense-voice.sh |
| 28 | ./run-non-streaming-decode-file-transducer.sh | 29 | ./run-non-streaming-decode-file-transducer.sh |
| @@ -102,6 +103,12 @@ The punctuation model supports both English and Chinese. | @@ -102,6 +103,12 @@ The punctuation model supports both English and Chinese. | ||
| 102 | ./run-vad-remove-slience.sh | 103 | ./run-vad-remove-slience.sh |
| 103 | ``` | 104 | ``` |
| 104 | 105 | ||
| 106 | +## VAD + Non-streaming Dolphin CTC for speech recognition | ||
| 107 | + | ||
| 108 | +```bash | ||
| 109 | +./run-vad-non-streaming-dolphin-ctc.sh | ||
| 110 | +``` | ||
| 111 | + | ||
| 105 | ## VAD + Non-streaming SenseVoice for speech recognition | 112 | ## VAD + Non-streaming SenseVoice for speech recognition |
| 106 | 113 | ||
| 107 | ```bash | 114 | ```bash |
| 1 | +// Copyright 2025 Xiaomi Corporation | ||
| 2 | + | ||
| 3 | +// This file shows how to use a silero_vad model with a non-streaming Dolphin | ||
| 4 | +// CTC model for speech recognition. | ||
| 5 | + | ||
| 6 | +import com.k2fsa.sherpa.onnx.*; | ||
| 7 | +import java.util.Arrays; | ||
| 8 | + | ||
| 9 | +public class VadNonStreamingSenseVoice { | ||
| 10 | + public static Vad createVad() { | ||
| 11 | + // please download ./silero_vad.onnx from | ||
| 12 | + // https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models | ||
| 13 | + String model = "./silero_vad.onnx"; | ||
| 14 | + SileroVadModelConfig sileroVad = | ||
| 15 | + SileroVadModelConfig.builder() | ||
| 16 | + .setModel(model) | ||
| 17 | + .setThreshold(0.5f) | ||
| 18 | + .setMinSilenceDuration(0.25f) | ||
| 19 | + .setMinSpeechDuration(0.5f) | ||
| 20 | + .setWindowSize(512) | ||
| 21 | + .setMaxSpeechDuration(5.0f) | ||
| 22 | + .build(); | ||
| 23 | + | ||
| 24 | + VadModelConfig config = | ||
| 25 | + VadModelConfig.builder() | ||
| 26 | + .setSileroVadModelConfig(sileroVad) | ||
| 27 | + .setSampleRate(16000) | ||
| 28 | + .setNumThreads(1) | ||
| 29 | + .setDebug(true) | ||
| 30 | + .setProvider("cpu") | ||
| 31 | + .build(); | ||
| 32 | + | ||
| 33 | + return new Vad(config); | ||
| 34 | + } | ||
| 35 | + | ||
| 36 | + public static OfflineRecognizer createOfflineRecognizer() { | ||
| 37 | + // please refer to | ||
| 38 | + // https://k2-fsa.github.io/sherpa/onnx/dolphin/index.html | ||
| 39 | + // to download model files | ||
| 40 | + String model = "./sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02/model.int8.onnx"; | ||
| 41 | + String tokens = "./sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02/tokens.txt"; | ||
| 42 | + | ||
| 43 | + OfflineDolphinModelConfig dolphin = OfflineDolphinModelConfig.builder().setModel(model).build(); | ||
| 44 | + | ||
| 45 | + OfflineModelConfig modelConfig = | ||
| 46 | + OfflineModelConfig.builder() | ||
| 47 | + .setDolphin(dolphin) | ||
| 48 | + .setTokens(tokens) | ||
| 49 | + .setNumThreads(1) | ||
| 50 | + .setDebug(true) | ||
| 51 | + .build(); | ||
| 52 | + | ||
| 53 | + OfflineRecognizerConfig config = | ||
| 54 | + OfflineRecognizerConfig.builder() | ||
| 55 | + .setOfflineModelConfig(modelConfig) | ||
| 56 | + .setDecodingMethod("greedy_search") | ||
| 57 | + .build(); | ||
| 58 | + | ||
| 59 | + return new OfflineRecognizer(config); | ||
| 60 | + } | ||
| 61 | + | ||
| 62 | + public static void main(String[] args) { | ||
| 63 | + | ||
| 64 | + Vad vad = createVad(); | ||
| 65 | + OfflineRecognizer recognizer = createOfflineRecognizer(); | ||
| 66 | + | ||
| 67 | + // You can download the test file from | ||
| 68 | + // https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models | ||
| 69 | + String testWaveFilename = "./lei-jun-test.wav"; | ||
| 70 | + WaveReader reader = new WaveReader(testWaveFilename); | ||
| 71 | + | ||
| 72 | + int numSamples = reader.getSamples().length; | ||
| 73 | + int numIter = numSamples / 512; | ||
| 74 | + | ||
| 75 | + for (int i = 0; i != numIter; ++i) { | ||
| 76 | + int start = i * 512; | ||
| 77 | + int end = start + 512; | ||
| 78 | + float[] samples = Arrays.copyOfRange(reader.getSamples(), start, end); | ||
| 79 | + vad.acceptWaveform(samples); | ||
| 80 | + if (vad.isSpeechDetected()) { | ||
| 81 | + while (!vad.empty()) { | ||
| 82 | + SpeechSegment segment = vad.front(); | ||
| 83 | + float startTime = segment.getStart() / 16000.0f; | ||
| 84 | + float duration = segment.getSamples().length / 16000.0f; | ||
| 85 | + | ||
| 86 | + OfflineStream stream = recognizer.createStream(); | ||
| 87 | + stream.acceptWaveform(segment.getSamples(), 16000); | ||
| 88 | + recognizer.decode(stream); | ||
| 89 | + String text = recognizer.getResult(stream).getText(); | ||
| 90 | + stream.release(); | ||
| 91 | + | ||
| 92 | + if (!text.isEmpty()) { | ||
| 93 | + System.out.printf("%.3f--%.3f: %s\n", startTime, startTime + duration, text); | ||
| 94 | + } | ||
| 95 | + | ||
| 96 | + vad.pop(); | ||
| 97 | + } | ||
| 98 | + } | ||
| 99 | + } | ||
| 100 | + | ||
| 101 | + vad.flush(); | ||
| 102 | + while (!vad.empty()) { | ||
| 103 | + SpeechSegment segment = vad.front(); | ||
| 104 | + float startTime = segment.getStart() / 16000.0f; | ||
| 105 | + float duration = segment.getSamples().length / 16000.0f; | ||
| 106 | + | ||
| 107 | + OfflineStream stream = recognizer.createStream(); | ||
| 108 | + stream.acceptWaveform(segment.getSamples(), 16000); | ||
| 109 | + recognizer.decode(stream); | ||
| 110 | + String text = recognizer.getResult(stream).getText(); | ||
| 111 | + stream.release(); | ||
| 112 | + | ||
| 113 | + if (!text.isEmpty()) { | ||
| 114 | + System.out.printf("%.3f--%.3f: %s\n", startTime, startTime + duration, text); | ||
| 115 | + } | ||
| 116 | + | ||
| 117 | + vad.pop(); | ||
| 118 | + } | ||
| 119 | + | ||
| 120 | + vad.release(); | ||
| 121 | + recognizer.release(); | ||
| 122 | + } | ||
| 123 | +} |
| 1 | +#!/usr/bin/env bash | ||
| 2 | + | ||
| 3 | +set -ex | ||
| 4 | + | ||
| 5 | +if [[ ! -f ../build/lib/libsherpa-onnx-jni.dylib && ! -f ../build/lib/libsherpa-onnx-jni.so ]]; then | ||
| 6 | + mkdir -p ../build | ||
| 7 | + pushd ../build | ||
| 8 | + cmake \ | ||
| 9 | + -DSHERPA_ONNX_ENABLE_PYTHON=OFF \ | ||
| 10 | + -DSHERPA_ONNX_ENABLE_TESTS=OFF \ | ||
| 11 | + -DSHERPA_ONNX_ENABLE_CHECK=OFF \ | ||
| 12 | + -DBUILD_SHARED_LIBS=ON \ | ||
| 13 | + -DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \ | ||
| 14 | + -DSHERPA_ONNX_ENABLE_JNI=ON \ | ||
| 15 | + .. | ||
| 16 | + | ||
| 17 | + make -j4 | ||
| 18 | + ls -lh lib | ||
| 19 | + popd | ||
| 20 | +fi | ||
| 21 | + | ||
| 22 | +if [ ! -f ../sherpa-onnx/java-api/build/sherpa-onnx.jar ]; then | ||
| 23 | + pushd ../sherpa-onnx/java-api | ||
| 24 | + make | ||
| 25 | + popd | ||
| 26 | +fi | ||
| 27 | + | ||
| 28 | +if [ ! -f ./sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02/model.int8.onnx ]; then | ||
| 29 | + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02.tar.bz2 | ||
| 30 | + tar xvf sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02.tar.bz2 | ||
| 31 | + rm sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02.tar.bz2 | ||
| 32 | + ls -lh sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02 | ||
| 33 | +fi | ||
| 34 | + | ||
| 35 | +java \ | ||
| 36 | + -Djava.library.path=$PWD/../build/lib \ | ||
| 37 | + -cp ../sherpa-onnx/java-api/build/sherpa-onnx.jar \ | ||
| 38 | + NonStreamingDecodeFileDolphinCtc.java |
| 1 | +#!/usr/bin/env bash | ||
| 2 | + | ||
| 3 | +set -ex | ||
| 4 | + | ||
| 5 | +if [[ ! -f ../build/lib/libsherpa-onnx-jni.dylib && ! -f ../build/lib/libsherpa-onnx-jni.so ]]; then | ||
| 6 | + mkdir -p ../build | ||
| 7 | + pushd ../build | ||
| 8 | + cmake \ | ||
| 9 | + -DSHERPA_ONNX_ENABLE_PYTHON=OFF \ | ||
| 10 | + -DSHERPA_ONNX_ENABLE_TESTS=OFF \ | ||
| 11 | + -DSHERPA_ONNX_ENABLE_CHECK=OFF \ | ||
| 12 | + -DBUILD_SHARED_LIBS=ON \ | ||
| 13 | + -DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \ | ||
| 14 | + -DSHERPA_ONNX_ENABLE_JNI=ON \ | ||
| 15 | + .. | ||
| 16 | + | ||
| 17 | + make -j4 | ||
| 18 | + ls -lh lib | ||
| 19 | + popd | ||
| 20 | +fi | ||
| 21 | + | ||
| 22 | +if [ ! -f ../sherpa-onnx/java-api/build/sherpa-onnx.jar ]; then | ||
| 23 | + pushd ../sherpa-onnx/java-api | ||
| 24 | + make | ||
| 25 | + popd | ||
| 26 | +fi | ||
| 27 | + | ||
| 28 | +if [ ! -f ./silero_vad.onnx ]; then | ||
| 29 | + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx | ||
| 30 | +fi | ||
| 31 | + | ||
| 32 | +if [ ! -f ./lei-jun-test.wav ]; then | ||
| 33 | + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/lei-jun-test.wav | ||
| 34 | +fi | ||
| 35 | + | ||
| 36 | +if [ ! -f ./sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02/model.int8.onnx ]; then | ||
| 37 | + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02.tar.bz2 | ||
| 38 | + tar xvf sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02.tar.bz2 | ||
| 39 | + rm sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02.tar.bz2 | ||
| 40 | + ls -lh sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02 | ||
| 41 | +fi | ||
| 42 | + | ||
| 43 | +java \ | ||
| 44 | + -Djava.library.path=$PWD/../build/lib \ | ||
| 45 | + -cp ../sherpa-onnx/java-api/build/sherpa-onnx.jar \ | ||
| 46 | + ./VadNonStreamingDolphinCtc.java |
| @@ -190,6 +190,13 @@ function testSpokenLanguageIdentification() { | @@ -190,6 +190,13 @@ function testSpokenLanguageIdentification() { | ||
| 190 | } | 190 | } |
| 191 | 191 | ||
| 192 | function testOfflineAsr() { | 192 | function testOfflineAsr() { |
| 193 | + if [ ! -f ./sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02/model.int8.onnx ]; then | ||
| 194 | + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02.tar.bz2 | ||
| 195 | + tar xvf sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02.tar.bz2 | ||
| 196 | + rm sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02.tar.bz2 | ||
| 197 | + ls -lh sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02 | ||
| 198 | + fi | ||
| 199 | + | ||
| 193 | if [ ! -f ./sherpa-onnx-fire-red-asr-large-zh_en-2025-02-16/encoder.int8.onnx ]; then | 200 | if [ ! -f ./sherpa-onnx-fire-red-asr-large-zh_en-2025-02-16/encoder.int8.onnx ]; then |
| 194 | curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-fire-red-asr-large-zh_en-2025-02-16.tar.bz2 | 201 | curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-fire-red-asr-large-zh_en-2025-02-16.tar.bz2 |
| 195 | tar xvf sherpa-onnx-fire-red-asr-large-zh_en-2025-02-16.tar.bz2 | 202 | tar xvf sherpa-onnx-fire-red-asr-large-zh_en-2025-02-16.tar.bz2 |
| 1 | package com.k2fsa.sherpa.onnx | 1 | package com.k2fsa.sherpa.onnx |
| 2 | 2 | ||
| 3 | fun main() { | 3 | fun main() { |
| 4 | - val types = arrayOf(0, 2, 5, 6, 15, 21, 24) | 4 | + val types = arrayOf(0, 2, 5, 6, 15, 21, 24, 25) |
| 5 | for (type in types) { | 5 | for (type in types) { |
| 6 | test(type) | 6 | test(type) |
| 7 | } | 7 | } |
| @@ -18,6 +18,7 @@ fun test(type: Int) { | @@ -18,6 +18,7 @@ fun test(type: Int) { | ||
| 18 | 15 -> "./sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/test_wavs/zh.wav" | 18 | 15 -> "./sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/test_wavs/zh.wav" |
| 19 | 21 -> "./sherpa-onnx-moonshine-tiny-en-int8/test_wavs/0.wav" | 19 | 21 -> "./sherpa-onnx-moonshine-tiny-en-int8/test_wavs/0.wav" |
| 20 | 24 -> "./sherpa-onnx-fire-red-asr-large-zh_en-2025-02-16/test_wavs/0.wav" | 20 | 24 -> "./sherpa-onnx-fire-red-asr-large-zh_en-2025-02-16/test_wavs/0.wav" |
| 21 | + 25 -> "./sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02/test_wavs/0.wav" | ||
| 21 | else -> null | 22 | else -> null |
| 22 | } | 23 | } |
| 23 | 24 |
| @@ -160,6 +160,21 @@ def get_2nd_models(): | @@ -160,6 +160,21 @@ def get_2nd_models(): | ||
| 160 | popd | 160 | popd |
| 161 | """, | 161 | """, |
| 162 | ), | 162 | ), |
| 163 | + Model( | ||
| 164 | + model_name="sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02", | ||
| 165 | + idx=25, | ||
| 166 | + lang="multi_lang", | ||
| 167 | + short_name="dolphin_base_ctc", | ||
| 168 | + cmd=""" | ||
| 169 | + pushd $model_name | ||
| 170 | + | ||
| 171 | + rm -rfv test_wavs | ||
| 172 | + | ||
| 173 | + ls -lh | ||
| 174 | + | ||
| 175 | + popd | ||
| 176 | + """, | ||
| 177 | + ), | ||
| 163 | ] | 178 | ] |
| 164 | return models | 179 | return models |
| 165 | 180 | ||
| @@ -304,6 +319,48 @@ def get_1st_models(): | @@ -304,6 +319,48 @@ def get_1st_models(): | ||
| 304 | popd | 319 | popd |
| 305 | """, | 320 | """, |
| 306 | ), | 321 | ), |
| 322 | + Model( | ||
| 323 | + model_name="sherpa-onnx-streaming-zipformer-small-ctc-zh-int8-2025-04-01", | ||
| 324 | + idx=15, | ||
| 325 | + lang="zh", | ||
| 326 | + short_name="int8_small_zipformer", | ||
| 327 | + rule_fsts="itn_zh_number.fst", | ||
| 328 | + cmd=""" | ||
| 329 | + if [ ! -f itn_zh_number.fst ]; then | ||
| 330 | + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/itn_zh_number.fst | ||
| 331 | + fi | ||
| 332 | + pushd $model_name | ||
| 333 | + rm -f bpe.model | ||
| 334 | + | ||
| 335 | + rm -rf test_wavs | ||
| 336 | + rm README.md | ||
| 337 | + | ||
| 338 | + ls -lh | ||
| 339 | + | ||
| 340 | + popd | ||
| 341 | + """, | ||
| 342 | + ), | ||
| 343 | + Model( | ||
| 344 | + model_name="sherpa-onnx-streaming-zipformer-small-ctc-zh-2025-04-01", | ||
| 345 | + idx=16, | ||
| 346 | + lang="zh", | ||
| 347 | + short_name="small_zipformer", | ||
| 348 | + rule_fsts="itn_zh_number.fst", | ||
| 349 | + cmd=""" | ||
| 350 | + if [ ! -f itn_zh_number.fst ]; then | ||
| 351 | + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/itn_zh_number.fst | ||
| 352 | + fi | ||
| 353 | + pushd $model_name | ||
| 354 | + rm -f bpe.model | ||
| 355 | + | ||
| 356 | + rm -rf test_wavs | ||
| 357 | + rm README.md | ||
| 358 | + | ||
| 359 | + ls -lh | ||
| 360 | + | ||
| 361 | + popd | ||
| 362 | + """, | ||
| 363 | + ), | ||
| 307 | ] | 364 | ] |
| 308 | 365 | ||
| 309 | return models | 366 | return models |
| @@ -313,19 +370,25 @@ def get_models(): | @@ -313,19 +370,25 @@ def get_models(): | ||
| 313 | first = get_1st_models() | 370 | first = get_1st_models() |
| 314 | second = get_2nd_models() | 371 | second = get_2nd_models() |
| 315 | 372 | ||
| 316 | - combinations = [ | ||
| 317 | - ( | ||
| 318 | - "sherpa-onnx-streaming-zipformer-zh-14M-2023-02-23", | ||
| 319 | - "sherpa-onnx-paraformer-zh-2023-09-14", | ||
| 320 | - ), | ||
| 321 | - ( | ||
| 322 | - "sherpa-onnx-streaming-zipformer-zh-14M-2023-02-23", | ||
| 323 | - "icefall-asr-zipformer-wenetspeech-20230615", | ||
| 324 | - ), | ||
| 325 | - ( | ||
| 326 | - "sherpa-onnx-streaming-zipformer-zh-14M-2023-02-23", | ||
| 327 | - "sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17", | ||
| 328 | - ), | 373 | + combinations = [] |
| 374 | + | ||
| 375 | + first_zh = [ | ||
| 376 | + "sherpa-onnx-streaming-zipformer-zh-14M-2023-02-23", | ||
| 377 | + "sherpa-onnx-streaming-zipformer-small-ctc-zh-int8-2025-04-01", | ||
| 378 | + "sherpa-onnx-streaming-zipformer-small-ctc-zh-2025-04-01", | ||
| 379 | + ] | ||
| 380 | + | ||
| 381 | + second_zh = [ | ||
| 382 | + "sherpa-onnx-paraformer-zh-2023-09-14", | ||
| 383 | + "icefall-asr-zipformer-wenetspeech-20230615", | ||
| 384 | + "sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17", | ||
| 385 | + "sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02", | ||
| 386 | + ] | ||
| 387 | + for first_m in first_zh: | ||
| 388 | + for second_m in second_zh: | ||
| 389 | + combinations.append((first_m, second_m)) | ||
| 390 | + | ||
| 391 | + combinations += [ | ||
| 329 | ( | 392 | ( |
| 330 | "sherpa-onnx-streaming-zipformer-en-20M-2023-02-17", | 393 | "sherpa-onnx-streaming-zipformer-en-20M-2023-02-17", |
| 331 | "sherpa-onnx-whisper-tiny.en", | 394 | "sherpa-onnx-whisper-tiny.en", |
| @@ -263,6 +263,48 @@ def get_models(): | @@ -263,6 +263,48 @@ def get_models(): | ||
| 263 | popd | 263 | popd |
| 264 | """, | 264 | """, |
| 265 | ), | 265 | ), |
| 266 | + Model( | ||
| 267 | + model_name="sherpa-onnx-streaming-zipformer-small-ctc-zh-int8-2025-04-01", | ||
| 268 | + idx=15, | ||
| 269 | + lang="zh", | ||
| 270 | + short_name="int8_small_zipformer", | ||
| 271 | + rule_fsts="itn_zh_number.fst", | ||
| 272 | + cmd=""" | ||
| 273 | + if [ ! -f itn_zh_number.fst ]; then | ||
| 274 | + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/itn_zh_number.fst | ||
| 275 | + fi | ||
| 276 | + pushd $model_name | ||
| 277 | + rm -f bpe.model | ||
| 278 | + | ||
| 279 | + rm -rf test_wavs | ||
| 280 | + rm README.md | ||
| 281 | + | ||
| 282 | + ls -lh | ||
| 283 | + | ||
| 284 | + popd | ||
| 285 | + """, | ||
| 286 | + ), | ||
| 287 | + Model( | ||
| 288 | + model_name="sherpa-onnx-streaming-zipformer-small-ctc-zh-2025-04-01", | ||
| 289 | + idx=16, | ||
| 290 | + lang="zh", | ||
| 291 | + short_name="small_zipformer", | ||
| 292 | + rule_fsts="itn_zh_number.fst", | ||
| 293 | + cmd=""" | ||
| 294 | + if [ ! -f itn_zh_number.fst ]; then | ||
| 295 | + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/itn_zh_number.fst | ||
| 296 | + fi | ||
| 297 | + pushd $model_name | ||
| 298 | + rm -f bpe.model | ||
| 299 | + | ||
| 300 | + rm -rf test_wavs | ||
| 301 | + rm README.md | ||
| 302 | + | ||
| 303 | + ls -lh | ||
| 304 | + | ||
| 305 | + popd | ||
| 306 | + """, | ||
| 307 | + ), | ||
| 266 | ] | 308 | ] |
| 267 | 309 | ||
| 268 | return models | 310 | return models |
| @@ -443,6 +443,22 @@ def get_models(): | @@ -443,6 +443,22 @@ def get_models(): | ||
| 443 | popd | 443 | popd |
| 444 | """, | 444 | """, |
| 445 | ), | 445 | ), |
| 446 | + Model( | ||
| 447 | + model_name="sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02", | ||
| 448 | + idx=25, | ||
| 449 | + lang="multi_lang", | ||
| 450 | + lang2="multi_lang", | ||
| 451 | + short_name="multi_lang", | ||
| 452 | + cmd=""" | ||
| 453 | + pushd $model_name | ||
| 454 | + | ||
| 455 | + rm -rfv test_wavs | ||
| 456 | + | ||
| 457 | + ls -lh | ||
| 458 | + | ||
| 459 | + popd | ||
| 460 | + """, | ||
| 461 | + ), | ||
| 446 | ] | 462 | ] |
| 447 | return models | 463 | return models |
| 448 | 464 |
| @@ -30,6 +30,7 @@ java_files += OfflineFireRedAsrModelConfig.java | @@ -30,6 +30,7 @@ java_files += OfflineFireRedAsrModelConfig.java | ||
| 30 | java_files += OfflineMoonshineModelConfig.java | 30 | java_files += OfflineMoonshineModelConfig.java |
| 31 | java_files += OfflineNemoEncDecCtcModelConfig.java | 31 | java_files += OfflineNemoEncDecCtcModelConfig.java |
| 32 | java_files += OfflineSenseVoiceModelConfig.java | 32 | java_files += OfflineSenseVoiceModelConfig.java |
| 33 | +java_files += OfflineDolphinModelConfig.java | ||
| 33 | java_files += OfflineModelConfig.java | 34 | java_files += OfflineModelConfig.java |
| 34 | java_files += OfflineRecognizerConfig.java | 35 | java_files += OfflineRecognizerConfig.java |
| 35 | java_files += OfflineRecognizerResult.java | 36 | java_files += OfflineRecognizerResult.java |
| 1 | +// Copyright 2025 Xiaomi Corporation | ||
| 2 | + | ||
| 3 | +package com.k2fsa.sherpa.onnx; | ||
| 4 | + | ||
| 5 | +public class OfflineDolphinModelConfig { | ||
| 6 | + private final String model; | ||
| 7 | + | ||
| 8 | + private OfflineDolphinModelConfig(Builder builder) { | ||
| 9 | + this.model = builder.model; | ||
| 10 | + } | ||
| 11 | + | ||
| 12 | + public static Builder builder() { | ||
| 13 | + return new Builder(); | ||
| 14 | + } | ||
| 15 | + | ||
| 16 | + public String getModel() { | ||
| 17 | + return model; | ||
| 18 | + } | ||
| 19 | + | ||
| 20 | + public static class Builder { | ||
| 21 | + private String model = ""; | ||
| 22 | + | ||
| 23 | + public OfflineDolphinModelConfig build() { | ||
| 24 | + return new OfflineDolphinModelConfig(this); | ||
| 25 | + } | ||
| 26 | + | ||
| 27 | + public Builder setModel(String model) { | ||
| 28 | + this.model = model; | ||
| 29 | + return this; | ||
| 30 | + } | ||
| 31 | + } | ||
| 32 | +} |
| @@ -10,6 +10,7 @@ public class OfflineModelConfig { | @@ -10,6 +10,7 @@ public class OfflineModelConfig { | ||
| 10 | private final OfflineMoonshineModelConfig moonshine; | 10 | private final OfflineMoonshineModelConfig moonshine; |
| 11 | private final OfflineNemoEncDecCtcModelConfig nemo; | 11 | private final OfflineNemoEncDecCtcModelConfig nemo; |
| 12 | private final OfflineSenseVoiceModelConfig senseVoice; | 12 | private final OfflineSenseVoiceModelConfig senseVoice; |
| 13 | + private final OfflineDolphinModelConfig dolphin; | ||
| 13 | private final String teleSpeech; | 14 | private final String teleSpeech; |
| 14 | private final String tokens; | 15 | private final String tokens; |
| 15 | private final int numThreads; | 16 | private final int numThreads; |
| @@ -28,6 +29,7 @@ public class OfflineModelConfig { | @@ -28,6 +29,7 @@ public class OfflineModelConfig { | ||
| 28 | this.moonshine = builder.moonshine; | 29 | this.moonshine = builder.moonshine; |
| 29 | this.nemo = builder.nemo; | 30 | this.nemo = builder.nemo; |
| 30 | this.senseVoice = builder.senseVoice; | 31 | this.senseVoice = builder.senseVoice; |
| 32 | + this.dolphin = builder.dolphin; | ||
| 31 | this.teleSpeech = builder.teleSpeech; | 33 | this.teleSpeech = builder.teleSpeech; |
| 32 | this.tokens = builder.tokens; | 34 | this.tokens = builder.tokens; |
| 33 | this.numThreads = builder.numThreads; | 35 | this.numThreads = builder.numThreads; |
| @@ -62,6 +64,10 @@ public class OfflineModelConfig { | @@ -62,6 +64,10 @@ public class OfflineModelConfig { | ||
| 62 | return senseVoice; | 64 | return senseVoice; |
| 63 | } | 65 | } |
| 64 | 66 | ||
| 67 | + public OfflineDolphinModelConfig getDolphin() { | ||
| 68 | + return dolphin; | ||
| 69 | + } | ||
| 70 | + | ||
| 65 | public String getTokens() { | 71 | public String getTokens() { |
| 66 | return tokens; | 72 | return tokens; |
| 67 | } | 73 | } |
| @@ -102,6 +108,7 @@ public class OfflineModelConfig { | @@ -102,6 +108,7 @@ public class OfflineModelConfig { | ||
| 102 | private OfflineMoonshineModelConfig moonshine = OfflineMoonshineModelConfig.builder().build(); | 108 | private OfflineMoonshineModelConfig moonshine = OfflineMoonshineModelConfig.builder().build(); |
| 103 | private OfflineNemoEncDecCtcModelConfig nemo = OfflineNemoEncDecCtcModelConfig.builder().build(); | 109 | private OfflineNemoEncDecCtcModelConfig nemo = OfflineNemoEncDecCtcModelConfig.builder().build(); |
| 104 | private OfflineSenseVoiceModelConfig senseVoice = OfflineSenseVoiceModelConfig.builder().build(); | 110 | private OfflineSenseVoiceModelConfig senseVoice = OfflineSenseVoiceModelConfig.builder().build(); |
| 111 | + private OfflineDolphinModelConfig dolphin = OfflineDolphinModelConfig.builder().build(); | ||
| 105 | private String teleSpeech = ""; | 112 | private String teleSpeech = ""; |
| 106 | private String tokens = ""; | 113 | private String tokens = ""; |
| 107 | private int numThreads = 1; | 114 | private int numThreads = 1; |
| @@ -120,6 +127,11 @@ public class OfflineModelConfig { | @@ -120,6 +127,11 @@ public class OfflineModelConfig { | ||
| 120 | return this; | 127 | return this; |
| 121 | } | 128 | } |
| 122 | 129 | ||
| 130 | + public Builder setDolphin(OfflineDolphinModelConfig dolphin) { | ||
| 131 | + this.dolphin = dolphin; | ||
| 132 | + return this; | ||
| 133 | + } | ||
| 134 | + | ||
| 123 | public Builder setParaformer(OfflineParaformerModelConfig paraformer) { | 135 | public Builder setParaformer(OfflineParaformerModelConfig paraformer) { |
| 124 | this.paraformer = paraformer; | 136 | this.paraformer = paraformer; |
| 125 | return this; | 137 | return this; |
| @@ -265,6 +265,19 @@ static OfflineRecognizerConfig GetOfflineConfig(JNIEnv *env, jobject config) { | @@ -265,6 +265,19 @@ static OfflineRecognizerConfig GetOfflineConfig(JNIEnv *env, jobject config) { | ||
| 265 | ans.model_config.nemo_ctc.model = p; | 265 | ans.model_config.nemo_ctc.model = p; |
| 266 | env->ReleaseStringUTFChars(s, p); | 266 | env->ReleaseStringUTFChars(s, p); |
| 267 | 267 | ||
| 268 | + // dolphin | ||
| 269 | + fid = env->GetFieldID(model_config_cls, "dolphin", | ||
| 270 | + "Lcom/k2fsa/sherpa/onnx/OfflineDolphinModelConfig;"); | ||
| 271 | + jobject dolphin_config = env->GetObjectField(model_config, fid); | ||
| 272 | + jclass dolphin_config_cls = env->GetObjectClass(dolphin_config); | ||
| 273 | + | ||
| 274 | + fid = env->GetFieldID(nemo_config_cls, "model", "Ljava/lang/String;"); | ||
| 275 | + | ||
| 276 | + s = (jstring)env->GetObjectField(dolphin_config, fid); | ||
| 277 | + p = env->GetStringUTFChars(s, nullptr); | ||
| 278 | + ans.model_config.dolphin.model = p; | ||
| 279 | + env->ReleaseStringUTFChars(s, p); | ||
| 280 | + | ||
| 268 | fid = env->GetFieldID(model_config_cls, "teleSpeech", "Ljava/lang/String;"); | 281 | fid = env->GetFieldID(model_config_cls, "teleSpeech", "Ljava/lang/String;"); |
| 269 | s = (jstring)env->GetObjectField(model_config, fid); | 282 | s = (jstring)env->GetObjectField(model_config, fid); |
| 270 | p = env->GetStringUTFChars(s, nullptr); | 283 | p = env->GetStringUTFChars(s, nullptr); |
| @@ -25,6 +25,10 @@ data class OfflineNemoEncDecCtcModelConfig( | @@ -25,6 +25,10 @@ data class OfflineNemoEncDecCtcModelConfig( | ||
| 25 | var model: String = "", | 25 | var model: String = "", |
| 26 | ) | 26 | ) |
| 27 | 27 | ||
| 28 | +data class OfflineDolphinModelConfig( | ||
| 29 | + var model: String = "", | ||
| 30 | +) | ||
| 31 | + | ||
| 28 | data class OfflineWhisperModelConfig( | 32 | data class OfflineWhisperModelConfig( |
| 29 | var encoder: String = "", | 33 | var encoder: String = "", |
| 30 | var decoder: String = "", | 34 | var decoder: String = "", |
| @@ -59,6 +63,7 @@ data class OfflineModelConfig( | @@ -59,6 +63,7 @@ data class OfflineModelConfig( | ||
| 59 | var moonshine: OfflineMoonshineModelConfig = OfflineMoonshineModelConfig(), | 63 | var moonshine: OfflineMoonshineModelConfig = OfflineMoonshineModelConfig(), |
| 60 | var nemo: OfflineNemoEncDecCtcModelConfig = OfflineNemoEncDecCtcModelConfig(), | 64 | var nemo: OfflineNemoEncDecCtcModelConfig = OfflineNemoEncDecCtcModelConfig(), |
| 61 | var senseVoice: OfflineSenseVoiceModelConfig = OfflineSenseVoiceModelConfig(), | 65 | var senseVoice: OfflineSenseVoiceModelConfig = OfflineSenseVoiceModelConfig(), |
| 66 | + var dolphin: OfflineDolphinModelConfig = OfflineDolphinModelConfig(), | ||
| 62 | var teleSpeech: String = "", | 67 | var teleSpeech: String = "", |
| 63 | var numThreads: Int = 1, | 68 | var numThreads: Int = 1, |
| 64 | var debug: Boolean = false, | 69 | var debug: Boolean = false, |
| @@ -481,6 +486,16 @@ fun getOfflineModelConfig(type: Int): OfflineModelConfig? { | @@ -481,6 +486,16 @@ fun getOfflineModelConfig(type: Int): OfflineModelConfig? { | ||
| 481 | tokens = "$modelDir/tokens.txt", | 486 | tokens = "$modelDir/tokens.txt", |
| 482 | ) | 487 | ) |
| 483 | } | 488 | } |
| 489 | + | ||
| 490 | + 25 -> { | ||
| 491 | + val modelDir = "sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02" | ||
| 492 | + return OfflineModelConfig( | ||
| 493 | + dolphin = OfflineDolphinModelConfig( | ||
| 494 | + model = "$modelDir/model.int8.onnx", | ||
| 495 | + ), | ||
| 496 | + tokens = "$modelDir/tokens.txt", | ||
| 497 | + ) | ||
| 498 | + } | ||
| 484 | } | 499 | } |
| 485 | return null | 500 | return null |
| 486 | } | 501 | } |
| @@ -374,6 +374,26 @@ fun getModelConfig(type: Int): OnlineModelConfig? { | @@ -374,6 +374,26 @@ fun getModelConfig(type: Int): OnlineModelConfig? { | ||
| 374 | modelType = "zipformer", | 374 | modelType = "zipformer", |
| 375 | ) | 375 | ) |
| 376 | } | 376 | } |
| 377 | + | ||
| 378 | + 15 -> { | ||
| 379 | + val modelDir = "sherpa-onnx-streaming-zipformer-small-ctc-zh-int8-2025-04-01" | ||
| 380 | + return OnlineModelConfig( | ||
| 381 | + zipformer2Ctc = OnlineZipformer2CtcModelConfig( | ||
| 382 | + model = "$modelDir/model.int8.onnx", | ||
| 383 | + ), | ||
| 384 | + tokens = "$modelDir/tokens.txt", | ||
| 385 | + ) | ||
| 386 | + } | ||
| 387 | + | ||
| 388 | + 16 -> { | ||
| 389 | + val modelDir = "sherpa-onnx-streaming-zipformer-small-ctc-zh-2025-04-01" | ||
| 390 | + return OnlineModelConfig( | ||
| 391 | + zipformer2Ctc = OnlineZipformer2CtcModelConfig( | ||
| 392 | + model = "$modelDir/model.onnx", | ||
| 393 | + ), | ||
| 394 | + tokens = "$modelDir/tokens.txt", | ||
| 395 | + ) | ||
| 396 | + } | ||
| 377 | } | 397 | } |
| 378 | return null | 398 | return null |
| 379 | } | 399 | } |
-
请 注册 或 登录 后发表评论