Committed by
GitHub
Add Kotlin and Java API for Moonshine models (#1474)
正在显示
15 个修改的文件
包含
480 行增加
和
25 行删除
| @@ -23,8 +23,8 @@ jobs: | @@ -23,8 +23,8 @@ jobs: | ||
| 23 | fail-fast: false | 23 | fail-fast: false |
| 24 | matrix: | 24 | matrix: |
| 25 | os: [ubuntu-latest] | 25 | os: [ubuntu-latest] |
| 26 | - total: ["5"] | ||
| 27 | - index: ["0", "1", "2", "3", "4"] | 26 | + total: ["10"] |
| 27 | + index: ["0", "1", "2", "3", "4", "5", "6", "7", "8", "9"] | ||
| 28 | 28 | ||
| 29 | steps: | 29 | steps: |
| 30 | - uses: actions/checkout@v4 | 30 | - uses: actions/checkout@v4 |
| @@ -165,6 +165,7 @@ jobs: | @@ -165,6 +165,7 @@ jobs: | ||
| 165 | 165 | ||
| 166 | git clone https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/sherpa-onnx-apk huggingface | 166 | git clone https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/sherpa-onnx-apk huggingface |
| 167 | cd huggingface | 167 | cd huggingface |
| 168 | + du -h -d1 . | ||
| 168 | git fetch | 169 | git fetch |
| 169 | git pull | 170 | git pull |
| 170 | git merge -m "merge remote" --ff origin main | 171 | git merge -m "merge remote" --ff origin main |
| @@ -107,6 +107,31 @@ jobs: | @@ -107,6 +107,31 @@ jobs: | ||
| 107 | make -j4 | 107 | make -j4 |
| 108 | ls -lh lib | 108 | ls -lh lib |
| 109 | 109 | ||
| 110 | + - name: Run java test (Non-Streaming ASR) | ||
| 111 | + shell: bash | ||
| 112 | + run: | | ||
| 113 | + cd ./java-api-examples | ||
| 114 | + | ||
| 115 | + ./run-non-streaming-decode-file-moonshine.sh | ||
| 116 | + rm -rf sherpa-onnx-moonshine-* | ||
| 117 | + | ||
| 118 | + ./run-non-streaming-decode-file-sense-voice.sh | ||
| 119 | + rm -rf sherpa-onnx-sense-voice-* | ||
| 120 | + | ||
| 121 | + ./run-inverse-text-normalization-paraformer.sh | ||
| 122 | + | ||
| 123 | + ./run-non-streaming-decode-file-paraformer.sh | ||
| 124 | + rm -rf sherpa-onnx-paraformer-zh-* | ||
| 125 | + | ||
| 126 | + ./run-non-streaming-decode-file-transducer.sh | ||
| 127 | + rm -rf sherpa-onnx-zipformer-* | ||
| 128 | + | ||
| 129 | + ./run-non-streaming-decode-file-whisper.sh | ||
| 130 | + rm -rf sherpa-onnx-whisper-* | ||
| 131 | + | ||
| 132 | + ./run-non-streaming-decode-file-nemo.sh | ||
| 133 | + rm -rf sherpa-onnx-nemo-* | ||
| 134 | + | ||
| 110 | - name: Run java test (speaker diarization) | 135 | - name: Run java test (speaker diarization) |
| 111 | shell: bash | 136 | shell: bash |
| 112 | run: | | 137 | run: | |
| @@ -206,28 +231,6 @@ jobs: | @@ -206,28 +231,6 @@ jobs: | ||
| 206 | ./run-streaming-decode-file-transducer.sh | 231 | ./run-streaming-decode-file-transducer.sh |
| 207 | rm -rf sherpa-onnx-streaming-* | 232 | rm -rf sherpa-onnx-streaming-* |
| 208 | 233 | ||
| 209 | - - name: Run java test (Non-Streaming ASR) | ||
| 210 | - shell: bash | ||
| 211 | - run: | | ||
| 212 | - cd ./java-api-examples | ||
| 213 | - | ||
| 214 | - ./run-non-streaming-decode-file-sense-voice.sh | ||
| 215 | - rm -rf sherpa-onnx-sense-voice-* | ||
| 216 | - | ||
| 217 | - ./run-inverse-text-normalization-paraformer.sh | ||
| 218 | - | ||
| 219 | - ./run-non-streaming-decode-file-paraformer.sh | ||
| 220 | - rm -rf sherpa-onnx-paraformer-zh-* | ||
| 221 | - | ||
| 222 | - ./run-non-streaming-decode-file-transducer.sh | ||
| 223 | - rm -rf sherpa-onnx-zipformer-* | ||
| 224 | - | ||
| 225 | - ./run-non-streaming-decode-file-whisper.sh | ||
| 226 | - rm -rf sherpa-onnx-whisper-* | ||
| 227 | - | ||
| 228 | - ./run-non-streaming-decode-file-nemo.sh | ||
| 229 | - rm -rf sherpa-onnx-nemo-* | ||
| 230 | - | ||
| 231 | - name: Run java test (Non-Streaming TTS) | 234 | - name: Run java test (Non-Streaming TTS) |
| 232 | shell: bash | 235 | shell: bash |
| 233 | run: | | 236 | run: | |
| @@ -121,3 +121,5 @@ sherpa-onnx-online-punct-en-2024-08-06 | @@ -121,3 +121,5 @@ sherpa-onnx-online-punct-en-2024-08-06 | ||
| 121 | *.mp4 | 121 | *.mp4 |
| 122 | *.mp3 | 122 | *.mp3 |
| 123 | sherpa-onnx-pyannote-segmentation-3-0 | 123 | sherpa-onnx-pyannote-segmentation-3-0 |
| 124 | +sherpa-onnx-moonshine-tiny-en-int8 | ||
| 125 | +sherpa-onnx-moonshine-base-en-int8 |
| 1 | +// Copyright 2024 Xiaomi Corporation | ||
| 2 | + | ||
| 3 | +// This file shows how to use an offline Moonshine, | ||
| 4 | +// i.e., non-streaming Moonshine model, | ||
| 5 | +// to decode files. | ||
| 6 | +import com.k2fsa.sherpa.onnx.*; | ||
| 7 | + | ||
| 8 | +public class NonStreamingDecodeFileMoonshine { | ||
| 9 | + public static void main(String[] args) { | ||
| 10 | + // please refer to | ||
| 11 | + // https://k2-fsa.github.io/sherpa/onnx/moonshine/index.html | ||
| 12 | + // to download model files | ||
| 13 | + | ||
| 14 | + String preprocessor = "./sherpa-onnx-moonshine-tiny-en-int8/preprocess.onnx"; | ||
| 15 | + String encoder = "./sherpa-onnx-moonshine-tiny-en-int8/encode.int8.onnx"; | ||
| 16 | + String uncachedDecoder = "./sherpa-onnx-moonshine-tiny-en-int8/uncached_decode.int8.onnx"; | ||
| 17 | + String cachedDecoder = "./sherpa-onnx-moonshine-tiny-en-int8/cached_decode.int8.onnx"; | ||
| 18 | + | ||
| 19 | + String tokens = "./sherpa-onnx-moonshine-tiny-en-int8/tokens.txt"; | ||
| 20 | + | ||
| 21 | + String waveFilename = "./sherpa-onnx-moonshine-tiny-en-int8/test_wavs/0.wav"; | ||
| 22 | + | ||
| 23 | + WaveReader reader = new WaveReader(waveFilename); | ||
| 24 | + | ||
| 25 | + OfflineMoonshineModelConfig moonshine = | ||
| 26 | + OfflineMoonshineModelConfig.builder() | ||
| 27 | + .setPreprocessor(preprocessor) | ||
| 28 | + .setEncoder(encoder) | ||
| 29 | + .setUncachedDecoder(uncachedDecoder) | ||
| 30 | + .setCachedDecoder(cachedDecoder) | ||
| 31 | + .build(); | ||
| 32 | + | ||
| 33 | + OfflineModelConfig modelConfig = | ||
| 34 | + OfflineModelConfig.builder() | ||
| 35 | + .setMoonshine(moonshine) | ||
| 36 | + .setTokens(tokens) | ||
| 37 | + .setNumThreads(1) | ||
| 38 | + .setDebug(true) | ||
| 39 | + .build(); | ||
| 40 | + | ||
| 41 | + OfflineRecognizerConfig config = | ||
| 42 | + OfflineRecognizerConfig.builder() | ||
| 43 | + .setOfflineModelConfig(modelConfig) | ||
| 44 | + .setDecodingMethod("greedy_search") | ||
| 45 | + .build(); | ||
| 46 | + | ||
| 47 | + OfflineRecognizer recognizer = new OfflineRecognizer(config); | ||
| 48 | + OfflineStream stream = recognizer.createStream(); | ||
| 49 | + stream.acceptWaveform(reader.getSamples(), reader.getSampleRate()); | ||
| 50 | + | ||
| 51 | + recognizer.decode(stream); | ||
| 52 | + | ||
| 53 | + String text = recognizer.getResult(stream).getText(); | ||
| 54 | + | ||
| 55 | + System.out.printf("filename:%s\nresult:%s\n", waveFilename, text); | ||
| 56 | + | ||
| 57 | + stream.release(); | ||
| 58 | + recognizer.release(); | ||
| 59 | + } | ||
| 60 | +} |
| 1 | +// Copyright 2024 Xiaomi Corporation | ||
| 2 | + | ||
| 3 | +// This file shows how to use a silero_vad model with a non-streaming | ||
| 4 | +// Moonshine tiny for speech recognition. | ||
| 5 | + | ||
| 6 | +import com.k2fsa.sherpa.onnx.*; | ||
| 7 | +import javax.sound.sampled.*; | ||
| 8 | + | ||
| 9 | +public class VadFromMicNonStreamingMoonshine { | ||
| 10 | + private static final int sampleRate = 16000; | ||
| 11 | + private static final int windowSize = 512; | ||
| 12 | + | ||
| 13 | + public static Vad createVad() { | ||
| 14 | + // please download ./silero_vad.onnx from | ||
| 15 | + // https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models | ||
| 16 | + String model = "./silero_vad.onnx"; | ||
| 17 | + SileroVadModelConfig sileroVad = | ||
| 18 | + SileroVadModelConfig.builder() | ||
| 19 | + .setModel(model) | ||
| 20 | + .setThreshold(0.5f) | ||
| 21 | + .setMinSilenceDuration(0.25f) | ||
| 22 | + .setMinSpeechDuration(0.5f) | ||
| 23 | + .setWindowSize(windowSize) | ||
| 24 | + .build(); | ||
| 25 | + | ||
| 26 | + VadModelConfig config = | ||
| 27 | + VadModelConfig.builder() | ||
| 28 | + .setSileroVadModelConfig(sileroVad) | ||
| 29 | + .setSampleRate(sampleRate) | ||
| 30 | + .setNumThreads(1) | ||
| 31 | + .setDebug(true) | ||
| 32 | + .setProvider("cpu") | ||
| 33 | + .build(); | ||
| 34 | + | ||
| 35 | + return new Vad(config); | ||
| 36 | + } | ||
| 37 | + | ||
| 38 | + public static OfflineRecognizer createOfflineRecognizer() { | ||
| 39 | + // please refer to | ||
| 40 | + // https://k2-fsa.github.io/sherpa/onnx/moonshine/index.html | ||
| 41 | + // to download model files | ||
| 42 | + | ||
| 43 | + String preprocessor = "./sherpa-onnx-moonshine-tiny-en-int8/preprocess.onnx"; | ||
| 44 | + String encoder = "./sherpa-onnx-moonshine-tiny-en-int8/encode.int8.onnx"; | ||
| 45 | + String uncachedDecoder = "./sherpa-onnx-moonshine-tiny-en-int8/uncached_decode.int8.onnx"; | ||
| 46 | + String cachedDecoder = "./sherpa-onnx-moonshine-tiny-en-int8/cached_decode.int8.onnx"; | ||
| 47 | + | ||
| 48 | + String tokens = "./sherpa-onnx-moonshine-tiny-en-int8/tokens.txt"; | ||
| 49 | + | ||
| 50 | + OfflineMoonshineModelConfig moonshine = | ||
| 51 | + OfflineMoonshineModelConfig.builder() | ||
| 52 | + .setPreprocessor(preprocessor) | ||
| 53 | + .setEncoder(encoder) | ||
| 54 | + .setUncachedDecoder(uncachedDecoder) | ||
| 55 | + .setCachedDecoder(cachedDecoder) | ||
| 56 | + .build(); | ||
| 57 | + | ||
| 58 | + OfflineModelConfig modelConfig = | ||
| 59 | + OfflineModelConfig.builder() | ||
| 60 | + .setMoonshine(moonshine) | ||
| 61 | + .setTokens(tokens) | ||
| 62 | + .setNumThreads(1) | ||
| 63 | + .setDebug(true) | ||
| 64 | + .build(); | ||
| 65 | + | ||
| 66 | + OfflineRecognizerConfig config = | ||
| 67 | + OfflineRecognizerConfig.builder() | ||
| 68 | + .setOfflineModelConfig(modelConfig) | ||
| 69 | + .setDecodingMethod("greedy_search") | ||
| 70 | + .build(); | ||
| 71 | + | ||
| 72 | + return new OfflineRecognizer(config); | ||
| 73 | + } | ||
| 74 | + | ||
| 75 | + public static void main(String[] args) { | ||
| 76 | + Vad vad = createVad(); | ||
| 77 | + OfflineRecognizer recognizer = createOfflineRecognizer(); | ||
| 78 | + | ||
| 79 | + // https://docs.oracle.com/javase/8/docs/api/javax/sound/sampled/AudioFormat.html | ||
| 80 | + // Linear PCM, 16000Hz, 16-bit, 1 channel, signed, little endian | ||
| 81 | + AudioFormat format = new AudioFormat(sampleRate, 16, 1, true, false); | ||
| 82 | + | ||
| 83 | + // https://docs.oracle.com/javase/8/docs/api/javax/sound/sampled/DataLine.Info.html#Info-java.lang.Class-javax.sound.sampled.AudioFormat-int- | ||
| 84 | + DataLine.Info info = new DataLine.Info(TargetDataLine.class, format); | ||
| 85 | + TargetDataLine targetDataLine; | ||
| 86 | + try { | ||
| 87 | + targetDataLine = (TargetDataLine) AudioSystem.getLine(info); | ||
| 88 | + targetDataLine.open(format); | ||
| 89 | + targetDataLine.start(); | ||
| 90 | + } catch (LineUnavailableException e) { | ||
| 91 | + System.out.println("Failed to open target data line: " + e.getMessage()); | ||
| 92 | + vad.release(); | ||
| 93 | + recognizer.release(); | ||
| 94 | + return; | ||
| 95 | + } | ||
| 96 | + | ||
| 97 | + boolean printed = false; | ||
| 98 | + byte[] buffer = new byte[windowSize * 2]; | ||
| 99 | + float[] samples = new float[windowSize]; | ||
| 100 | + | ||
| 101 | + System.out.println("Started. Please speak"); | ||
| 102 | + boolean running = true; | ||
| 103 | + while (targetDataLine.isOpen() && running) { | ||
| 104 | + int n = targetDataLine.read(buffer, 0, buffer.length); | ||
| 105 | + if (n <= 0) { | ||
| 106 | + System.out.printf("Got %d bytes. Expected %d bytes.\n", n, buffer.length); | ||
| 107 | + continue; | ||
| 108 | + } | ||
| 109 | + for (int i = 0; i != windowSize; ++i) { | ||
| 110 | + short low = buffer[2 * i]; | ||
| 111 | + short high = buffer[2 * i + 1]; | ||
| 112 | + int s = (high << 8) + low; | ||
| 113 | + samples[i] = (float) s / 32768; | ||
| 114 | + } | ||
| 115 | + | ||
| 116 | + vad.acceptWaveform(samples); | ||
| 117 | + if (vad.isSpeechDetected() && !printed) { | ||
| 118 | + System.out.println("Detected speech"); | ||
| 119 | + printed = true; | ||
| 120 | + } | ||
| 121 | + | ||
| 122 | + if (!vad.isSpeechDetected()) { | ||
| 123 | + printed = false; | ||
| 124 | + } | ||
| 125 | + | ||
| 126 | + while (!vad.empty()) { | ||
| 127 | + SpeechSegment segment = vad.front(); | ||
| 128 | + float startTime = segment.getStart() / (float) sampleRate; | ||
| 129 | + float duration = segment.getSamples().length / (float) sampleRate; | ||
| 130 | + | ||
| 131 | + OfflineStream stream = recognizer.createStream(); | ||
| 132 | + stream.acceptWaveform(segment.getSamples(), sampleRate); | ||
| 133 | + recognizer.decode(stream); | ||
| 134 | + String text = recognizer.getResult(stream).getText(); | ||
| 135 | + stream.release(); | ||
| 136 | + | ||
| 137 | + if (!text.isEmpty()) { | ||
| 138 | + System.out.printf("%.3f--%.3f: %s\n", startTime, startTime + duration, text); | ||
| 139 | + } | ||
| 140 | + | ||
| 141 | + if (text.contains("exit the program")) { | ||
| 142 | + running = false; | ||
| 143 | + } | ||
| 144 | + | ||
| 145 | + vad.pop(); | ||
| 146 | + } | ||
| 147 | + } | ||
| 148 | + | ||
| 149 | + vad.release(); | ||
| 150 | + recognizer.release(); | ||
| 151 | + } | ||
| 152 | +} |
| 1 | +#!/usr/bin/env bash | ||
| 2 | + | ||
| 3 | +set -ex | ||
| 4 | + | ||
| 5 | +if [[ ! -f ../build/lib/libsherpa-onnx-jni.dylib && ! -f ../build/lib/libsherpa-onnx-jni.so ]]; then | ||
| 6 | + mkdir -p ../build | ||
| 7 | + pushd ../build | ||
| 8 | + cmake \ | ||
| 9 | + -DSHERPA_ONNX_ENABLE_PYTHON=OFF \ | ||
| 10 | + -DSHERPA_ONNX_ENABLE_TESTS=OFF \ | ||
| 11 | + -DSHERPA_ONNX_ENABLE_CHECK=OFF \ | ||
| 12 | + -DBUILD_SHARED_LIBS=ON \ | ||
| 13 | + -DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \ | ||
| 14 | + -DSHERPA_ONNX_ENABLE_JNI=ON \ | ||
| 15 | + .. | ||
| 16 | + | ||
| 17 | + make -j4 | ||
| 18 | + ls -lh lib | ||
| 19 | + popd | ||
| 20 | +fi | ||
| 21 | + | ||
| 22 | +if [ ! -f ../sherpa-onnx/java-api/build/sherpa-onnx.jar ]; then | ||
| 23 | + pushd ../sherpa-onnx/java-api | ||
| 24 | + make | ||
| 25 | + popd | ||
| 26 | +fi | ||
| 27 | + | ||
| 28 | +if [ ! -f ./sherpa-onnx-moonshine-tiny-en-int8/tokens.txt ]; then | ||
| 29 | + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-moonshine-tiny-en-int8.tar.bz2 | ||
| 30 | + tar xvf sherpa-onnx-moonshine-tiny-en-int8.tar.bz2 | ||
| 31 | + rm sherpa-onnx-moonshine-tiny-en-int8.tar.bz2 | ||
| 32 | +fi | ||
| 33 | + | ||
| 34 | +java \ | ||
| 35 | + -Djava.library.path=$PWD/../build/lib \ | ||
| 36 | + -cp ../sherpa-onnx/java-api/build/sherpa-onnx.jar \ | ||
| 37 | + NonStreamingDecodeFileMoonshine.java |
| 1 | +#!/usr/bin/env bash | ||
| 2 | + | ||
| 3 | +set -ex | ||
| 4 | + | ||
| 5 | +if [[ ! -f ../build/lib/libsherpa-onnx-jni.dylib && ! -f ../build/lib/libsherpa-onnx-jni.so ]]; then | ||
| 6 | + mkdir -p ../build | ||
| 7 | + pushd ../build | ||
| 8 | + cmake \ | ||
| 9 | + -DSHERPA_ONNX_ENABLE_PYTHON=OFF \ | ||
| 10 | + -DSHERPA_ONNX_ENABLE_TESTS=OFF \ | ||
| 11 | + -DSHERPA_ONNX_ENABLE_CHECK=OFF \ | ||
| 12 | + -DBUILD_SHARED_LIBS=ON \ | ||
| 13 | + -DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \ | ||
| 14 | + -DSHERPA_ONNX_ENABLE_JNI=ON \ | ||
| 15 | + .. | ||
| 16 | + | ||
| 17 | + make -j4 | ||
| 18 | + ls -lh lib | ||
| 19 | + popd | ||
| 20 | +fi | ||
| 21 | + | ||
| 22 | +if [ ! -f ../sherpa-onnx/java-api/build/sherpa-onnx.jar ]; then | ||
| 23 | + pushd ../sherpa-onnx/java-api | ||
| 24 | + make | ||
| 25 | + popd | ||
| 26 | +fi | ||
| 27 | + | ||
| 28 | +if [ ! -f ./silero_vad.onnx ]; then | ||
| 29 | + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx | ||
| 30 | +fi | ||
| 31 | + | ||
| 32 | +if [ ! -f ./sherpa-onnx-moonshine-tiny-en-int8/tokens.txt ]; then | ||
| 33 | + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-moonshine-tiny-en-int8.tar.bz2 | ||
| 34 | + tar xvf sherpa-onnx-moonshine-tiny-en-int8.tar.bz2 | ||
| 35 | + rm sherpa-onnx-moonshine-tiny-en-int8.tar.bz2 | ||
| 36 | +fi | ||
| 37 | + | ||
| 38 | +java \ | ||
| 39 | + -Djava.library.path=$PWD/../build/lib \ | ||
| 40 | + -cp ../sherpa-onnx/java-api/build/sherpa-onnx.jar \ | ||
| 41 | + ./VadFromMicWithNonStreamingMoonshine.java |
| @@ -168,6 +168,12 @@ function testSpokenLanguageIdentification() { | @@ -168,6 +168,12 @@ function testSpokenLanguageIdentification() { | ||
| 168 | } | 168 | } |
| 169 | 169 | ||
| 170 | function testOfflineAsr() { | 170 | function testOfflineAsr() { |
| 171 | + if [ ! -f ./sherpa-onnx-moonshine-tiny-en-int8/tokens.txt ]; then | ||
| 172 | + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-moonshine-tiny-en-int8.tar.bz2 | ||
| 173 | + tar xvf sherpa-onnx-moonshine-tiny-en-int8.tar.bz2 | ||
| 174 | + rm sherpa-onnx-moonshine-tiny-en-int8.tar.bz2 | ||
| 175 | + fi | ||
| 176 | + | ||
| 171 | if [ ! -f ./sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/tokens.txt ]; then | 177 | if [ ! -f ./sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/tokens.txt ]; then |
| 172 | curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17.tar.bz2 | 178 | curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17.tar.bz2 |
| 173 | tar xvf sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17.tar.bz2 | 179 | tar xvf sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17.tar.bz2 |
| 1 | package com.k2fsa.sherpa.onnx | 1 | package com.k2fsa.sherpa.onnx |
| 2 | 2 | ||
| 3 | fun main() { | 3 | fun main() { |
| 4 | - val types = arrayOf(0, 2, 5, 6, 15) | 4 | + val types = arrayOf(0, 2, 5, 6, 15, 21) |
| 5 | for (type in types) { | 5 | for (type in types) { |
| 6 | test(type) | 6 | test(type) |
| 7 | } | 7 | } |
| @@ -16,6 +16,7 @@ fun test(type: Int) { | @@ -16,6 +16,7 @@ fun test(type: Int) { | ||
| 16 | 5 -> "./sherpa-onnx-zipformer-multi-zh-hans-2023-9-2/test_wavs/1.wav" | 16 | 5 -> "./sherpa-onnx-zipformer-multi-zh-hans-2023-9-2/test_wavs/1.wav" |
| 17 | 6 -> "./sherpa-onnx-nemo-ctc-en-citrinet-512/test_wavs/8k.wav" | 17 | 6 -> "./sherpa-onnx-nemo-ctc-en-citrinet-512/test_wavs/8k.wav" |
| 18 | 15 -> "./sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/test_wavs/zh.wav" | 18 | 15 -> "./sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/test_wavs/zh.wav" |
| 19 | + 21 -> "./sherpa-onnx-moonshine-tiny-en-int8/test_wavs/0.wav" | ||
| 19 | else -> null | 20 | else -> null |
| 20 | } | 21 | } |
| 21 | 22 |
| @@ -372,6 +372,21 @@ def get_models(): | @@ -372,6 +372,21 @@ def get_models(): | ||
| 372 | popd | 372 | popd |
| 373 | """, | 373 | """, |
| 374 | ), | 374 | ), |
| 375 | + Model( | ||
| 376 | + model_name="sherpa-onnx-moonshine-tiny-en-int8", | ||
| 377 | + idx=21, | ||
| 378 | + lang="en", | ||
| 379 | + short_name="moonshine_tiny_int8", | ||
| 380 | + cmd=""" | ||
| 381 | + pushd $model_name | ||
| 382 | + | ||
| 383 | + rm -rfv test_wavs | ||
| 384 | + | ||
| 385 | + ls -lh | ||
| 386 | + | ||
| 387 | + popd | ||
| 388 | + """, | ||
| 389 | + ), | ||
| 375 | ] | 390 | ] |
| 376 | return models | 391 | return models |
| 377 | 392 |
| @@ -26,6 +26,7 @@ java_files += OnlineRecognizer.java | @@ -26,6 +26,7 @@ java_files += OnlineRecognizer.java | ||
| 26 | java_files += OfflineTransducerModelConfig.java | 26 | java_files += OfflineTransducerModelConfig.java |
| 27 | java_files += OfflineParaformerModelConfig.java | 27 | java_files += OfflineParaformerModelConfig.java |
| 28 | java_files += OfflineWhisperModelConfig.java | 28 | java_files += OfflineWhisperModelConfig.java |
| 29 | +java_files += OfflineMoonshineModelConfig.java | ||
| 29 | java_files += OfflineNemoEncDecCtcModelConfig.java | 30 | java_files += OfflineNemoEncDecCtcModelConfig.java |
| 30 | java_files += OfflineSenseVoiceModelConfig.java | 31 | java_files += OfflineSenseVoiceModelConfig.java |
| 31 | java_files += OfflineModelConfig.java | 32 | java_files += OfflineModelConfig.java |
| @@ -6,6 +6,7 @@ public class OfflineModelConfig { | @@ -6,6 +6,7 @@ public class OfflineModelConfig { | ||
| 6 | private final OfflineTransducerModelConfig transducer; | 6 | private final OfflineTransducerModelConfig transducer; |
| 7 | private final OfflineParaformerModelConfig paraformer; | 7 | private final OfflineParaformerModelConfig paraformer; |
| 8 | private final OfflineWhisperModelConfig whisper; | 8 | private final OfflineWhisperModelConfig whisper; |
| 9 | + private final OfflineMoonshineModelConfig moonshine; | ||
| 9 | private final OfflineNemoEncDecCtcModelConfig nemo; | 10 | private final OfflineNemoEncDecCtcModelConfig nemo; |
| 10 | private final OfflineSenseVoiceModelConfig senseVoice; | 11 | private final OfflineSenseVoiceModelConfig senseVoice; |
| 11 | private final String teleSpeech; | 12 | private final String teleSpeech; |
| @@ -22,6 +23,7 @@ public class OfflineModelConfig { | @@ -22,6 +23,7 @@ public class OfflineModelConfig { | ||
| 22 | this.transducer = builder.transducer; | 23 | this.transducer = builder.transducer; |
| 23 | this.paraformer = builder.paraformer; | 24 | this.paraformer = builder.paraformer; |
| 24 | this.whisper = builder.whisper; | 25 | this.whisper = builder.whisper; |
| 26 | + this.moonshine = builder.moonshine; | ||
| 25 | this.nemo = builder.nemo; | 27 | this.nemo = builder.nemo; |
| 26 | this.senseVoice = builder.senseVoice; | 28 | this.senseVoice = builder.senseVoice; |
| 27 | this.teleSpeech = builder.teleSpeech; | 29 | this.teleSpeech = builder.teleSpeech; |
| @@ -50,6 +52,10 @@ public class OfflineModelConfig { | @@ -50,6 +52,10 @@ public class OfflineModelConfig { | ||
| 50 | return whisper; | 52 | return whisper; |
| 51 | } | 53 | } |
| 52 | 54 | ||
| 55 | + public OfflineMoonshineModelConfig getMoonshine() { | ||
| 56 | + return moonshine; | ||
| 57 | + } | ||
| 58 | + | ||
| 53 | public OfflineSenseVoiceModelConfig getSenseVoice() { | 59 | public OfflineSenseVoiceModelConfig getSenseVoice() { |
| 54 | return senseVoice; | 60 | return senseVoice; |
| 55 | } | 61 | } |
| @@ -90,6 +96,7 @@ public class OfflineModelConfig { | @@ -90,6 +96,7 @@ public class OfflineModelConfig { | ||
| 90 | private OfflineParaformerModelConfig paraformer = OfflineParaformerModelConfig.builder().build(); | 96 | private OfflineParaformerModelConfig paraformer = OfflineParaformerModelConfig.builder().build(); |
| 91 | private OfflineTransducerModelConfig transducer = OfflineTransducerModelConfig.builder().build(); | 97 | private OfflineTransducerModelConfig transducer = OfflineTransducerModelConfig.builder().build(); |
| 92 | private OfflineWhisperModelConfig whisper = OfflineWhisperModelConfig.builder().build(); | 98 | private OfflineWhisperModelConfig whisper = OfflineWhisperModelConfig.builder().build(); |
| 99 | + private OfflineMoonshineModelConfig moonshine = OfflineMoonshineModelConfig.builder().build(); | ||
| 93 | private OfflineNemoEncDecCtcModelConfig nemo = OfflineNemoEncDecCtcModelConfig.builder().build(); | 100 | private OfflineNemoEncDecCtcModelConfig nemo = OfflineNemoEncDecCtcModelConfig.builder().build(); |
| 94 | private OfflineSenseVoiceModelConfig senseVoice = OfflineSenseVoiceModelConfig.builder().build(); | 101 | private OfflineSenseVoiceModelConfig senseVoice = OfflineSenseVoiceModelConfig.builder().build(); |
| 95 | private String teleSpeech = ""; | 102 | private String teleSpeech = ""; |
| @@ -135,6 +142,11 @@ public class OfflineModelConfig { | @@ -135,6 +142,11 @@ public class OfflineModelConfig { | ||
| 135 | return this; | 142 | return this; |
| 136 | } | 143 | } |
| 137 | 144 | ||
| 145 | + public Builder setMoonshine(OfflineMoonshineModelConfig moonshine) { | ||
| 146 | + this.moonshine = moonshine; | ||
| 147 | + return this; | ||
| 148 | + } | ||
| 149 | + | ||
| 138 | public Builder setTokens(String tokens) { | 150 | public Builder setTokens(String tokens) { |
| 139 | this.tokens = tokens; | 151 | this.tokens = tokens; |
| 140 | return this; | 152 | return this; |
| 1 | +// Copyright 2024 Xiaomi Corporation | ||
| 2 | + | ||
| 3 | +package com.k2fsa.sherpa.onnx; | ||
| 4 | + | ||
| 5 | +public class OfflineMoonshineModelConfig { | ||
| 6 | + private final String preprocessor; | ||
| 7 | + private final String encoder; | ||
| 8 | + private final String uncachedDecoder; | ||
| 9 | + private final String cachedDecoder; | ||
| 10 | + | ||
| 11 | + private OfflineMoonshineModelConfig(Builder builder) { | ||
| 12 | + this.preprocessor = builder.preprocessor; | ||
| 13 | + this.encoder = builder.encoder; | ||
| 14 | + this.uncachedDecoder = builder.uncachedDecoder; | ||
| 15 | + this.cachedDecoder = builder.cachedDecoder; | ||
| 16 | + } | ||
| 17 | + | ||
| 18 | + public static Builder builder() { | ||
| 19 | + return new Builder(); | ||
| 20 | + } | ||
| 21 | + | ||
| 22 | + public String getPreprocessor() { | ||
| 23 | + return preprocessor; | ||
| 24 | + } | ||
| 25 | + | ||
| 26 | + public String getEncoder() { | ||
| 27 | + return encoder; | ||
| 28 | + } | ||
| 29 | + | ||
| 30 | + public String getUncachedDecoder() { | ||
| 31 | + return uncachedDecoder; | ||
| 32 | + } | ||
| 33 | + | ||
| 34 | + public String getCachedDecoder() { | ||
| 35 | + return cachedDecoder; | ||
| 36 | + } | ||
| 37 | + | ||
| 38 | + public static class Builder { | ||
| 39 | + private String preprocessor = ""; | ||
| 40 | + private String encoder = ""; | ||
| 41 | + private String uncachedDecoder = ""; | ||
| 42 | + private String cachedDecoder = ""; | ||
| 43 | + | ||
| 44 | + public OfflineMoonshineModelConfig build() { | ||
| 45 | + return new OfflineMoonshineModelConfig(this); | ||
| 46 | + } | ||
| 47 | + | ||
| 48 | + public Builder setPreprocessor(String preprocessor) { | ||
| 49 | + this.preprocessor = preprocessor; | ||
| 50 | + return this; | ||
| 51 | + } | ||
| 52 | + | ||
| 53 | + public Builder setEncoder(String encoder) { | ||
| 54 | + this.encoder = encoder; | ||
| 55 | + return this; | ||
| 56 | + } | ||
| 57 | + | ||
| 58 | + public Builder setUncachedDecoder(String uncachedDecoder) { | ||
| 59 | + this.uncachedDecoder = uncachedDecoder; | ||
| 60 | + return this; | ||
| 61 | + } | ||
| 62 | + | ||
| 63 | + public Builder setCachedDecoder(String cachedDecoder) { | ||
| 64 | + this.cachedDecoder = cachedDecoder; | ||
| 65 | + return this; | ||
| 66 | + } | ||
| 67 | + } | ||
| 68 | + | ||
| 69 | + | ||
| 70 | +} |
| @@ -174,6 +174,39 @@ static OfflineRecognizerConfig GetOfflineConfig(JNIEnv *env, jobject config) { | @@ -174,6 +174,39 @@ static OfflineRecognizerConfig GetOfflineConfig(JNIEnv *env, jobject config) { | ||
| 174 | ans.model_config.whisper.tail_paddings = | 174 | ans.model_config.whisper.tail_paddings = |
| 175 | env->GetIntField(whisper_config, fid); | 175 | env->GetIntField(whisper_config, fid); |
| 176 | 176 | ||
| 177 | + // moonshine | ||
| 178 | + fid = env->GetFieldID(model_config_cls, "moonshine", | ||
| 179 | + "Lcom/k2fsa/sherpa/onnx/OfflineMoonshineModelConfig;"); | ||
| 180 | + jobject moonshine_config = env->GetObjectField(model_config, fid); | ||
| 181 | + jclass moonshine_config_cls = env->GetObjectClass(moonshine_config); | ||
| 182 | + | ||
| 183 | + fid = env->GetFieldID(moonshine_config_cls, "preprocessor", | ||
| 184 | + "Ljava/lang/String;"); | ||
| 185 | + s = (jstring)env->GetObjectField(moonshine_config, fid); | ||
| 186 | + p = env->GetStringUTFChars(s, nullptr); | ||
| 187 | + ans.model_config.moonshine.preprocessor = p; | ||
| 188 | + env->ReleaseStringUTFChars(s, p); | ||
| 189 | + | ||
| 190 | + fid = env->GetFieldID(moonshine_config_cls, "encoder", "Ljava/lang/String;"); | ||
| 191 | + s = (jstring)env->GetObjectField(moonshine_config, fid); | ||
| 192 | + p = env->GetStringUTFChars(s, nullptr); | ||
| 193 | + ans.model_config.moonshine.encoder = p; | ||
| 194 | + env->ReleaseStringUTFChars(s, p); | ||
| 195 | + | ||
| 196 | + fid = env->GetFieldID(moonshine_config_cls, "uncachedDecoder", | ||
| 197 | + "Ljava/lang/String;"); | ||
| 198 | + s = (jstring)env->GetObjectField(moonshine_config, fid); | ||
| 199 | + p = env->GetStringUTFChars(s, nullptr); | ||
| 200 | + ans.model_config.moonshine.uncached_decoder = p; | ||
| 201 | + env->ReleaseStringUTFChars(s, p); | ||
| 202 | + | ||
| 203 | + fid = env->GetFieldID(moonshine_config_cls, "cachedDecoder", | ||
| 204 | + "Ljava/lang/String;"); | ||
| 205 | + s = (jstring)env->GetObjectField(moonshine_config, fid); | ||
| 206 | + p = env->GetStringUTFChars(s, nullptr); | ||
| 207 | + ans.model_config.moonshine.cached_decoder = p; | ||
| 208 | + env->ReleaseStringUTFChars(s, p); | ||
| 209 | + | ||
| 177 | // sense voice | 210 | // sense voice |
| 178 | fid = env->GetFieldID(model_config_cls, "senseVoice", | 211 | fid = env->GetFieldID(model_config_cls, "senseVoice", |
| 179 | "Lcom/k2fsa/sherpa/onnx/OfflineSenseVoiceModelConfig;"); | 212 | "Lcom/k2fsa/sherpa/onnx/OfflineSenseVoiceModelConfig;"); |
| @@ -33,6 +33,13 @@ data class OfflineWhisperModelConfig( | @@ -33,6 +33,13 @@ data class OfflineWhisperModelConfig( | ||
| 33 | var tailPaddings: Int = 1000, // Padding added at the end of the samples | 33 | var tailPaddings: Int = 1000, // Padding added at the end of the samples |
| 34 | ) | 34 | ) |
| 35 | 35 | ||
| 36 | +data class OfflineMoonshineModelConfig( | ||
| 37 | + var preprocessor: String = "", | ||
| 38 | + var encoder: String = "", | ||
| 39 | + var uncachedDecoder: String = "", | ||
| 40 | + var cachedDecoder: String = "", | ||
| 41 | +) | ||
| 42 | + | ||
| 36 | data class OfflineSenseVoiceModelConfig( | 43 | data class OfflineSenseVoiceModelConfig( |
| 37 | var model: String = "", | 44 | var model: String = "", |
| 38 | var language: String = "", | 45 | var language: String = "", |
| @@ -43,6 +50,7 @@ data class OfflineModelConfig( | @@ -43,6 +50,7 @@ data class OfflineModelConfig( | ||
| 43 | var transducer: OfflineTransducerModelConfig = OfflineTransducerModelConfig(), | 50 | var transducer: OfflineTransducerModelConfig = OfflineTransducerModelConfig(), |
| 44 | var paraformer: OfflineParaformerModelConfig = OfflineParaformerModelConfig(), | 51 | var paraformer: OfflineParaformerModelConfig = OfflineParaformerModelConfig(), |
| 45 | var whisper: OfflineWhisperModelConfig = OfflineWhisperModelConfig(), | 52 | var whisper: OfflineWhisperModelConfig = OfflineWhisperModelConfig(), |
| 53 | + var moonshine: OfflineMoonshineModelConfig = OfflineMoonshineModelConfig(), | ||
| 46 | var nemo: OfflineNemoEncDecCtcModelConfig = OfflineNemoEncDecCtcModelConfig(), | 54 | var nemo: OfflineNemoEncDecCtcModelConfig = OfflineNemoEncDecCtcModelConfig(), |
| 47 | var senseVoice: OfflineSenseVoiceModelConfig = OfflineSenseVoiceModelConfig(), | 55 | var senseVoice: OfflineSenseVoiceModelConfig = OfflineSenseVoiceModelConfig(), |
| 48 | var teleSpeech: String = "", | 56 | var teleSpeech: String = "", |
| @@ -417,6 +425,19 @@ fun getOfflineModelConfig(type: Int): OfflineModelConfig? { | @@ -417,6 +425,19 @@ fun getOfflineModelConfig(type: Int): OfflineModelConfig? { | ||
| 417 | modelType = "nemo_transducer", | 425 | modelType = "nemo_transducer", |
| 418 | ) | 426 | ) |
| 419 | } | 427 | } |
| 428 | + | ||
| 429 | + 21 -> { | ||
| 430 | + val modelDir = "sherpa-onnx-moonshine-tiny-en-int8" | ||
| 431 | + return OfflineModelConfig( | ||
| 432 | + moonshine = OfflineMoonshineModelConfig( | ||
| 433 | + preprocessor = "$modelDir/preprocess.onnx", | ||
| 434 | + encoder = "$modelDir/encode.int8.onnx", | ||
| 435 | + uncachedDecoder = "$modelDir/uncached_decode.int8.onnx", | ||
| 436 | + cachedDecoder = "$modelDir/cached_decode.int8.onnx", | ||
| 437 | + ), | ||
| 438 | + tokens = "$modelDir/tokens.txt", | ||
| 439 | + ) | ||
| 440 | + } | ||
| 420 | } | 441 | } |
| 421 | return null | 442 | return null |
| 422 | } | 443 | } |
-
请 注册 或 登录 后发表评论