Committed by
GitHub
Add Java API for spoken language identification with whisper multilingual models (#817)
正在显示
12 个修改的文件
包含
405 行增加
和
10 行删除
| @@ -57,6 +57,7 @@ jobs: | @@ -57,6 +57,7 @@ jobs: | ||
| 57 | ./build-android-arm64-v8a.sh | 57 | ./build-android-arm64-v8a.sh |
| 58 | mkdir -p jniLibs/arm64-v8a/ | 58 | mkdir -p jniLibs/arm64-v8a/ |
| 59 | cp -v ./build-android-arm64-v8a/install/lib/*.so ./jniLibs/arm64-v8a/ | 59 | cp -v ./build-android-arm64-v8a/install/lib/*.so ./jniLibs/arm64-v8a/ |
| 60 | + rm -rf ./build-android-arm64-v8a/ | ||
| 60 | 61 | ||
| 61 | - name: build android armv7-eabi | 62 | - name: build android armv7-eabi |
| 62 | shell: bash | 63 | shell: bash |
| @@ -65,6 +66,7 @@ jobs: | @@ -65,6 +66,7 @@ jobs: | ||
| 65 | ./build-android-armv7-eabi.sh | 66 | ./build-android-armv7-eabi.sh |
| 66 | mkdir -p ./jniLibs/armeabi-v7a/ | 67 | mkdir -p ./jniLibs/armeabi-v7a/ |
| 67 | cp -v ./build-android-armv7-eabi/install/lib/*.so ./jniLibs/armeabi-v7a/ | 68 | cp -v ./build-android-armv7-eabi/install/lib/*.so ./jniLibs/armeabi-v7a/ |
| 69 | + rm -rf ./build-android-armv7-eabi | ||
| 68 | 70 | ||
| 69 | - name: build android x86_64 | 71 | - name: build android x86_64 |
| 70 | shell: bash | 72 | shell: bash |
| @@ -73,6 +75,7 @@ jobs: | @@ -73,6 +75,7 @@ jobs: | ||
| 73 | ./build-android-x86-64.sh | 75 | ./build-android-x86-64.sh |
| 74 | mkdir -p ./jniLibs/x86_64 | 76 | mkdir -p ./jniLibs/x86_64 |
| 75 | cp -v ./build-android-x86-64/install/lib/*.so ./jniLibs/x86_64 | 77 | cp -v ./build-android-x86-64/install/lib/*.so ./jniLibs/x86_64 |
| 78 | + rm -rf ./build-android-x86-64 | ||
| 76 | 79 | ||
| 77 | - name: build android x86 | 80 | - name: build android x86 |
| 78 | shell: bash | 81 | shell: bash |
| @@ -81,6 +84,7 @@ jobs: | @@ -81,6 +84,7 @@ jobs: | ||
| 81 | ./build-android-x86.sh | 84 | ./build-android-x86.sh |
| 82 | mkdir -p ./jniLibs/x86 | 85 | mkdir -p ./jniLibs/x86 |
| 83 | cp -v ./build-android-x86/install/lib/*.so ./jniLibs/x86 | 86 | cp -v ./build-android-x86/install/lib/*.so ./jniLibs/x86 |
| 87 | + rm -rf ./build-android-x86 | ||
| 84 | 88 | ||
| 85 | - name: Copy files | 89 | - name: Copy files |
| 86 | shell: bash | 90 | shell: bash |
| @@ -112,6 +116,8 @@ jobs: | @@ -112,6 +116,8 @@ jobs: | ||
| 112 | command: | | 116 | command: | |
| 113 | git config --global user.email "csukuangfj@gmail.com" | 117 | git config --global user.email "csukuangfj@gmail.com" |
| 114 | git config --global user.name "Fangjun Kuang" | 118 | git config --global user.name "Fangjun Kuang" |
| 119 | + du -h -d1 . | ||
| 120 | + ls -lh | ||
| 115 | 121 | ||
| 116 | rm -rf huggingface | 122 | rm -rf huggingface |
| 117 | GIT_LFS_SKIP_SMUDGE=1 git clone https://huggingface.co/csukuangfj/sherpa-onnx-libs huggingface | 123 | GIT_LFS_SKIP_SMUDGE=1 git clone https://huggingface.co/csukuangfj/sherpa-onnx-libs huggingface |
| @@ -44,6 +44,23 @@ jobs: | @@ -44,6 +44,23 @@ jobs: | ||
| 44 | echo "ANDROID_NDK_LATEST_HOME: ${ANDROID_NDK_LATEST_HOME}" | 44 | echo "ANDROID_NDK_LATEST_HOME: ${ANDROID_NDK_LATEST_HOME}" |
| 45 | ls -lh ${ANDROID_NDK_LATEST_HOME} | 45 | ls -lh ${ANDROID_NDK_LATEST_HOME} |
| 46 | 46 | ||
| 47 | + - name: Setup build tool version variable | ||
| 48 | + shell: bash | ||
| 49 | + run: | | ||
| 50 | + echo "---" | ||
| 51 | + ls -lh /usr/local/lib/android/ | ||
| 52 | + echo "---" | ||
| 53 | + | ||
| 54 | + ls -lh /usr/local/lib/android/sdk | ||
| 55 | + echo "---" | ||
| 56 | + | ||
| 57 | + ls -lh /usr/local/lib/android/sdk/build-tools | ||
| 58 | + echo "---" | ||
| 59 | + | ||
| 60 | + BUILD_TOOL_VERSION=$(ls /usr/local/lib/android/sdk/build-tools/ | tail -n 1) | ||
| 61 | + echo "BUILD_TOOL_VERSION=$BUILD_TOOL_VERSION" >> $GITHUB_ENV | ||
| 62 | + echo "Last build tool version is: $BUILD_TOOL_VERSION" | ||
| 63 | + | ||
| 47 | - name: build APK | 64 | - name: build APK |
| 48 | shell: bash | 65 | shell: bash |
| 49 | run: | | 66 | run: | |
| @@ -59,13 +76,77 @@ jobs: | @@ -59,13 +76,77 @@ jobs: | ||
| 59 | run: | | 76 | run: | |
| 60 | ls -lh ./apks/ | 77 | ls -lh ./apks/ |
| 61 | 78 | ||
| 62 | - - uses: actions/upload-artifact@v4 | 79 | + |
| 80 | + # https://github.com/marketplace/actions/sign-android-release | ||
| 81 | + - uses: r0adkll/sign-android-release@v1 | ||
| 82 | + name: Sign app APK | ||
| 63 | with: | 83 | with: |
| 64 | - path: ./apks/*.apk | 84 | + releaseDirectory: ./apks |
| 85 | + signingKeyBase64: ${{ secrets.ANDROID_SIGNING_KEY }} | ||
| 86 | + alias: ${{ secrets.ANDROID_SIGNING_KEY_ALIAS }} | ||
| 87 | + keyStorePassword: ${{ secrets.ANDROID_SIGNING_KEY_STORE_PASSWORD }} | ||
| 88 | + env: | ||
| 89 | + BUILD_TOOLS_VERSION: ${{ env.BUILD_TOOL_VERSION }} | ||
| 90 | + | ||
| 91 | + - name: Display APK after signing | ||
| 92 | + shell: bash | ||
| 93 | + run: | | ||
| 94 | + ls -lh ./apks/ | ||
| 95 | + du -h -d1 . | ||
| 96 | + | ||
| 97 | + - name: Rename APK after signing | ||
| 98 | + shell: bash | ||
| 99 | + run: | | ||
| 100 | + cd apks | ||
| 101 | + rm -fv signingKey.jks | ||
| 102 | + rm -fv *.apk.idsig | ||
| 103 | + rm -fv *-aligned.apk | ||
| 104 | + | ||
| 105 | + all_apks=$(ls -1 *-signed.apk) | ||
| 106 | + echo "----" | ||
| 107 | + echo $all_apks | ||
| 108 | + echo "----" | ||
| 109 | + for apk in ${all_apks[@]}; do | ||
| 110 | + n=$(echo $apk | sed -e s/-signed//) | ||
| 111 | + mv -v $apk $n | ||
| 112 | + done | ||
| 65 | 113 | ||
| 66 | - - name: Release APK | ||
| 67 | - uses: svenstaro/upload-release-action@v2 | 114 | + cd .. |
| 115 | + | ||
| 116 | + ls -lh ./apks/ | ||
| 117 | + du -h -d1 . | ||
| 118 | + | ||
| 119 | + - name: Display APK after rename | ||
| 120 | + shell: bash | ||
| 121 | + run: | | ||
| 122 | + ls -lh ./apks/ | ||
| 123 | + du -h -d1 . | ||
| 124 | + | ||
| 125 | + - name: Publish to huggingface | ||
| 126 | + env: | ||
| 127 | + HF_TOKEN: ${{ secrets.HF_TOKEN }} | ||
| 128 | + uses: nick-fields/retry@v3 | ||
| 68 | with: | 129 | with: |
| 69 | - file_glob: true | ||
| 70 | - file: apks/*.apk | ||
| 71 | - overwrite: true | 130 | + max_attempts: 20 |
| 131 | + timeout_seconds: 200 | ||
| 132 | + shell: bash | ||
| 133 | + command: | | ||
| 134 | + git config --global user.email "csukuangfj@gmail.com" | ||
| 135 | + git config --global user.name "Fangjun Kuang" | ||
| 136 | + | ||
| 137 | + rm -rf huggingface | ||
| 138 | + export GIT_LFS_SKIP_SMUDGE=1 | ||
| 139 | + | ||
| 140 | + git clone https://huggingface.co/csukuangfj/sherpa-onnx-apk huggingface | ||
| 141 | + cd huggingface | ||
| 142 | + git fetch | ||
| 143 | + git pull | ||
| 144 | + git merge -m "merge remote" --ff origin main | ||
| 145 | + | ||
| 146 | + mkdir -p kws | ||
| 147 | + cp -v ../apks/*.apk ./kws/ | ||
| 148 | + git status | ||
| 149 | + git lfs track "*.apk" | ||
| 150 | + git add . | ||
| 151 | + git commit -m "add more apks" | ||
| 152 | + git push https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/sherpa-onnx-apk main |
| @@ -106,6 +106,14 @@ jobs: | @@ -106,6 +106,14 @@ jobs: | ||
| 106 | make -j4 | 106 | make -j4 |
| 107 | ls -lh lib | 107 | ls -lh lib |
| 108 | 108 | ||
| 109 | + - name: Run java test (Spoken language identification) | ||
| 110 | + shell: bash | ||
| 111 | + run: | | ||
| 112 | + cd ./java-api-examples | ||
| 113 | + ./run-spoken-language-identification-whisper.sh | ||
| 114 | + # Delete model files to save space | ||
| 115 | + rm -rf sherpa-onnx-whisper-* | ||
| 116 | + | ||
| 109 | - name: Run java test (Streaming ASR) | 117 | - name: Run java test (Streaming ASR) |
| 110 | shell: bash | 118 | shell: bash |
| 111 | run: | | 119 | run: | |
| @@ -200,7 +200,7 @@ class MainActivity : AppCompatActivity() { | @@ -200,7 +200,7 @@ class MainActivity : AppCompatActivity() { | ||
| 200 | val config = OnlineRecognizerConfig( | 200 | val config = OnlineRecognizerConfig( |
| 201 | featConfig = getFeatureConfig(sampleRate = sampleRateInHz, featureDim = 80), | 201 | featConfig = getFeatureConfig(sampleRate = sampleRateInHz, featureDim = 80), |
| 202 | modelConfig = getModelConfig(type = type)!!, | 202 | modelConfig = getModelConfig(type = type)!!, |
| 203 | - lmConfig = getOnlineLMConfig(type = type), | 203 | + // lmConfig = getOnlineLMConfig(type = type), |
| 204 | endpointConfig = getEndpointConfig(), | 204 | endpointConfig = getEndpointConfig(), |
| 205 | enableEndpoint = true, | 205 | enableEndpoint = true, |
| 206 | ) | 206 | ) |
| @@ -29,3 +29,9 @@ This directory contains examples for the JAVA API of sherpa-onnx. | @@ -29,3 +29,9 @@ This directory contains examples for the JAVA API of sherpa-onnx. | ||
| 29 | ./run-non-streaming-tts-coqui-de.sh | 29 | ./run-non-streaming-tts-coqui-de.sh |
| 30 | ./run-non-streaming-tts-vits-zh.sh | 30 | ./run-non-streaming-tts-vits-zh.sh |
| 31 | ``` | 31 | ``` |
| 32 | + | ||
| 33 | +## Spoken language identification | ||
| 34 | + | ||
| 35 | +```bash | ||
| 36 | +./run-spoken-language-identification-whisper.sh | ||
| 37 | +``` |
| 1 | +// Copyright 2024 Xiaomi Corporation | ||
| 2 | + | ||
| 3 | +// This file shows how to use a multilingual whisper model for | ||
| 4 | +// spoken language identification. | ||
| 5 | +// | ||
| 6 | +// Note that it needs a multilingual whisper model. For instance, | ||
| 7 | +// tiny works, but tiny.en doesn't. | ||
| 8 | +import com.k2fsa.sherpa.onnx.*; | ||
| 9 | + | ||
| 10 | +public class SpokenLanguageIdentificationWhisper { | ||
| 11 | + public static void main(String[] args) { | ||
| 12 | + // please download model and test files from | ||
| 13 | + // https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models | ||
| 14 | + String encoder = "./sherpa-onnx-whisper-tiny/tiny-encoder.int8.onnx"; | ||
| 15 | + String decoder = "./sherpa-onnx-whisper-tiny/tiny-decoder.int8.onnx"; | ||
| 16 | + | ||
| 17 | + String[] testFiles = | ||
| 18 | + new String[] { | ||
| 19 | + "./spoken-language-identification-test-wavs/en-english.wav", | ||
| 20 | + "./spoken-language-identification-test-wavs/de-german.wav", | ||
| 21 | + "./spoken-language-identification-test-wavs/zh-chinese.wav", | ||
| 22 | + "./spoken-language-identification-test-wavs/es-spanish.wav", | ||
| 23 | + "./spoken-language-identification-test-wavs/fa-persian.wav", | ||
| 24 | + "./spoken-language-identification-test-wavs/ko-korean.wav", | ||
| 25 | + "./spoken-language-identification-test-wavs/ja-japanese.wav", | ||
| 26 | + "./spoken-language-identification-test-wavs/ru-russian.wav", | ||
| 27 | + "./spoken-language-identification-test-wavs/uk-ukrainian.wav", | ||
| 28 | + }; | ||
| 29 | + | ||
| 30 | + SpokenLanguageIdentificationWhisperConfig whisper = | ||
| 31 | + SpokenLanguageIdentificationWhisperConfig.builder() | ||
| 32 | + .setEncoder(encoder) | ||
| 33 | + .setDecoder(decoder) | ||
| 34 | + .build(); | ||
| 35 | + | ||
| 36 | + SpokenLanguageIdentificationConfig config = | ||
| 37 | + SpokenLanguageIdentificationConfig.builder() | ||
| 38 | + .setWhisper(whisper) | ||
| 39 | + .setNumThreads(1) | ||
| 40 | + .setDebug(true) | ||
| 41 | + .build(); | ||
| 42 | + | ||
| 43 | + SpokenLanguageIdentification slid = new SpokenLanguageIdentification(config); | ||
| 44 | + for (String filename : testFiles) { | ||
| 45 | + WaveReader reader = new WaveReader(filename); | ||
| 46 | + | ||
| 47 | + OfflineStream stream = slid.createStream(); | ||
| 48 | + stream.acceptWaveform(reader.getSamples(), reader.getSampleRate()); | ||
| 49 | + | ||
| 50 | + String lang = slid.compute(stream); | ||
| 51 | + System.out.println("---"); | ||
| 52 | + System.out.printf("filename: %s\n", filename); | ||
| 53 | + System.out.printf("lang: %s\n", lang); | ||
| 54 | + | ||
| 55 | + stream.release(); | ||
| 56 | + } | ||
| 57 | + System.out.println("---"); | ||
| 58 | + | ||
| 59 | + slid.release(); | ||
| 60 | + } | ||
| 61 | +} |
| 1 | +#!/usr/bin/env bash | ||
| 2 | + | ||
| 3 | +set -ex | ||
| 4 | + | ||
| 5 | +if [[ ! -f ../build/lib/libsherpa-onnx-jni.dylib && ! -f ../build/lib/libsherpa-onnx-jni.so ]]; then | ||
| 6 | + mkdir -p ../build | ||
| 7 | + pushd ../build | ||
| 8 | + cmake \ | ||
| 9 | + -DSHERPA_ONNX_ENABLE_PYTHON=OFF \ | ||
| 10 | + -DSHERPA_ONNX_ENABLE_TESTS=OFF \ | ||
| 11 | + -DSHERPA_ONNX_ENABLE_CHECK=OFF \ | ||
| 12 | + -DBUILD_SHARED_LIBS=ON \ | ||
| 13 | + -DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \ | ||
| 14 | + -DSHERPA_ONNX_ENABLE_JNI=ON \ | ||
| 15 | + .. | ||
| 16 | + | ||
| 17 | + make -j4 | ||
| 18 | + ls -lh lib | ||
| 19 | + popd | ||
| 20 | +fi | ||
| 21 | + | ||
| 22 | +if [ ! -f ../sherpa-onnx/java-api/build/sherpa-onnx.jar ]; then | ||
| 23 | + pushd ../sherpa-onnx/java-api | ||
| 24 | + make | ||
| 25 | + popd | ||
| 26 | +fi | ||
| 27 | + | ||
| 28 | +if [[ ! -f ../build/lib/libsherpa-onnx-jni.dylib && ! -f ../build/lib/libsherpa-onnx-jni.so ]]; then | ||
| 29 | + cmake \ | ||
| 30 | + -DSHERPA_ONNX_ENABLE_PYTHON=OFF \ | ||
| 31 | + -DSHERPA_ONNX_ENABLE_TESTS=OFF \ | ||
| 32 | + -DSHERPA_ONNX_ENABLE_CHECK=OFF \ | ||
| 33 | + -DBUILD_SHARED_LIBS=ON \ | ||
| 34 | + -DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \ | ||
| 35 | + -DSHERPA_ONNX_ENABLE_JNI=ON \ | ||
| 36 | + .. | ||
| 37 | + | ||
| 38 | + make -j4 | ||
| 39 | + ls -lh lib | ||
| 40 | +fi | ||
| 41 | + | ||
| 42 | +# Note that it needs a multilingual whisper model. so, for example, tiny works while tiny.en does not work | ||
| 43 | +# https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-whisper-tiny.tar.bz2 | ||
| 44 | +if [ ! -f ./sherpa-onnx-whisper-tiny/tiny-encoder.int8.onnx ]; then | ||
| 45 | + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-whisper-tiny.tar.bz2 | ||
| 46 | + tar xvf sherpa-onnx-whisper-tiny.tar.bz2 | ||
| 47 | + rm sherpa-onnx-whisper-tiny.tar.bz2 | ||
| 48 | +fi | ||
| 49 | + | ||
| 50 | +if [ ! -f ./spoken-language-identification-test-wavs/en-english.wav ]; then | ||
| 51 | + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/spoken-language-identification-test-wavs.tar.bz2 | ||
| 52 | + tar xvf spoken-language-identification-test-wavs.tar.bz2 | ||
| 53 | + rm spoken-language-identification-test-wavs.tar.bz2 | ||
| 54 | +fi | ||
| 55 | + | ||
| 56 | +java \ | ||
| 57 | + -Djava.library.path=$PWD/../build/lib \ | ||
| 58 | + -cp ../sherpa-onnx/java-api/build/sherpa-onnx.jar \ | ||
| 59 | + ./SpokenLanguageIdentificationWhisper.java |
| @@ -36,6 +36,10 @@ java_files += OfflineTtsConfig.java | @@ -36,6 +36,10 @@ java_files += OfflineTtsConfig.java | ||
| 36 | java_files += GeneratedAudio.java | 36 | java_files += GeneratedAudio.java |
| 37 | java_files += OfflineTts.java | 37 | java_files += OfflineTts.java |
| 38 | 38 | ||
| 39 | +java_files += SpokenLanguageIdentificationWhisperConfig.java | ||
| 40 | +java_files += SpokenLanguageIdentificationConfig.java | ||
| 41 | +java_files += SpokenLanguageIdentification.java | ||
| 42 | + | ||
| 39 | class_files := $(java_files:%.java=%.class) | 43 | class_files := $(java_files:%.java=%.class) |
| 40 | 44 | ||
| 41 | java_files := $(addprefix src/$(package_dir)/,$(java_files)) | 45 | java_files := $(addprefix src/$(package_dir)/,$(java_files)) |
| 1 | +// Copyright 2024 Xiaomi Corporation | ||
| 2 | + | ||
| 3 | +package com.k2fsa.sherpa.onnx; | ||
| 4 | + | ||
| 5 | +import java.util.HashMap; | ||
| 6 | +import java.util.Locale; | ||
| 7 | +import java.util.Map; | ||
| 8 | + | ||
| 9 | +public class SpokenLanguageIdentification { | ||
| 10 | + static { | ||
| 11 | + System.loadLibrary("sherpa-onnx-jni"); | ||
| 12 | + } | ||
| 13 | + | ||
| 14 | + private final Map<String, String> localeMap; | ||
| 15 | + private long ptr = 0; // this is the asr engine ptrss | ||
| 16 | + | ||
| 17 | + public SpokenLanguageIdentification(SpokenLanguageIdentificationConfig config) { | ||
| 18 | + ptr = newFromFile(config); | ||
| 19 | + | ||
| 20 | + String[] languages = Locale.getISOLanguages(); | ||
| 21 | + localeMap = new HashMap<String, String>(languages.length); | ||
| 22 | + for (String language : languages) { | ||
| 23 | + Locale locale = new Locale(language); | ||
| 24 | + localeMap.put(language, locale.getDisplayName()); | ||
| 25 | + } | ||
| 26 | + } | ||
| 27 | + | ||
| 28 | + public String compute(OfflineStream stream) { | ||
| 29 | + String lang = compute(ptr, stream.getPtr()); | ||
| 30 | + return localeMap.getOrDefault(lang, lang); | ||
| 31 | + } | ||
| 32 | + | ||
| 33 | + public OfflineStream createStream() { | ||
| 34 | + long p = createStream(ptr); | ||
| 35 | + return new OfflineStream(p); | ||
| 36 | + } | ||
| 37 | + | ||
| 38 | + @Override | ||
| 39 | + protected void finalize() throws Throwable { | ||
| 40 | + release(); | ||
| 41 | + } | ||
| 42 | + | ||
| 43 | + // You'd better call it manually if it is not used anymore | ||
| 44 | + public void release() { | ||
| 45 | + if (this.ptr == 0) { | ||
| 46 | + return; | ||
| 47 | + } | ||
| 48 | + delete(this.ptr); | ||
| 49 | + this.ptr = 0; | ||
| 50 | + } | ||
| 51 | + | ||
| 52 | + private native void delete(long ptr); | ||
| 53 | + | ||
| 54 | + private native long newFromFile(SpokenLanguageIdentificationConfig config); | ||
| 55 | + | ||
| 56 | + private native long createStream(long ptr); | ||
| 57 | + | ||
| 58 | + private native String compute(long ptr, long streamPtr); | ||
| 59 | +} |
| 1 | +// Copyright 2024 Xiaomi Corporation | ||
| 2 | + | ||
| 3 | +package com.k2fsa.sherpa.onnx; | ||
| 4 | + | ||
| 5 | +public class SpokenLanguageIdentificationConfig { | ||
| 6 | + private final SpokenLanguageIdentificationWhisperConfig whisper; | ||
| 7 | + private final int numThreads; | ||
| 8 | + private final boolean debug; | ||
| 9 | + private final String provider; | ||
| 10 | + | ||
| 11 | + private SpokenLanguageIdentificationConfig(Builder builder) { | ||
| 12 | + this.whisper = builder.whisper; | ||
| 13 | + this.numThreads = builder.numThreads; | ||
| 14 | + this.debug = builder.debug; | ||
| 15 | + this.provider = builder.provider; | ||
| 16 | + } | ||
| 17 | + | ||
| 18 | + public static Builder builder() { | ||
| 19 | + return new Builder(); | ||
| 20 | + } | ||
| 21 | + | ||
| 22 | + public SpokenLanguageIdentificationWhisperConfig getWhisper() { | ||
| 23 | + return whisper; | ||
| 24 | + } | ||
| 25 | + | ||
| 26 | + public static class Builder { | ||
| 27 | + private SpokenLanguageIdentificationWhisperConfig whisper = SpokenLanguageIdentificationWhisperConfig.builder().build(); | ||
| 28 | + private int numThreads = 1; | ||
| 29 | + private boolean debug = true; | ||
| 30 | + private String provider = "cpu"; | ||
| 31 | + | ||
| 32 | + public SpokenLanguageIdentificationConfig build() { | ||
| 33 | + return new SpokenLanguageIdentificationConfig(this); | ||
| 34 | + } | ||
| 35 | + | ||
| 36 | + public Builder setWhisper(SpokenLanguageIdentificationWhisperConfig whisper) { | ||
| 37 | + this.whisper = whisper; | ||
| 38 | + return this; | ||
| 39 | + } | ||
| 40 | + | ||
| 41 | + public Builder setNumThreads(int numThreads) { | ||
| 42 | + this.numThreads = numThreads; | ||
| 43 | + return this; | ||
| 44 | + } | ||
| 45 | + | ||
| 46 | + public Builder setDebug(boolean debug) { | ||
| 47 | + this.debug = debug; | ||
| 48 | + return this; | ||
| 49 | + } | ||
| 50 | + | ||
| 51 | + public Builder setProvider(String provider) { | ||
| 52 | + this.provider = provider; | ||
| 53 | + return this; | ||
| 54 | + } | ||
| 55 | + } | ||
| 56 | +} |
sherpa-onnx/java-api/src/com/k2fsa/sherpa/onnx/SpokenLanguageIdentificationWhisperConfig.java
0 → 100644
| 1 | +// Copyright 2024 Xiaomi Corporation | ||
| 2 | + | ||
| 3 | +package com.k2fsa.sherpa.onnx; | ||
| 4 | + | ||
| 5 | +public class SpokenLanguageIdentificationWhisperConfig { | ||
| 6 | + private final String encoder; | ||
| 7 | + private final String decoder; | ||
| 8 | + private final int tailPaddings; | ||
| 9 | + | ||
| 10 | + private SpokenLanguageIdentificationWhisperConfig(Builder builder) { | ||
| 11 | + this.encoder = builder.encoder; | ||
| 12 | + this.decoder = builder.decoder; | ||
| 13 | + this.tailPaddings = builder.tailPaddings; | ||
| 14 | + } | ||
| 15 | + | ||
| 16 | + public static Builder builder() { | ||
| 17 | + return new Builder(); | ||
| 18 | + } | ||
| 19 | + | ||
| 20 | + public String getEncoder() { | ||
| 21 | + return encoder; | ||
| 22 | + } | ||
| 23 | + | ||
| 24 | + public String getDecoder() { | ||
| 25 | + return decoder; | ||
| 26 | + } | ||
| 27 | + | ||
| 28 | + public int getTailPaddings() { | ||
| 29 | + return tailPaddings; | ||
| 30 | + } | ||
| 31 | + | ||
| 32 | + public static class Builder { | ||
| 33 | + private String encoder = ""; | ||
| 34 | + private String decoder = ""; | ||
| 35 | + private int tailPaddings = 1000; // number of frames to pad | ||
| 36 | + | ||
| 37 | + public SpokenLanguageIdentificationWhisperConfig build() { | ||
| 38 | + return new SpokenLanguageIdentificationWhisperConfig(this); | ||
| 39 | + } | ||
| 40 | + | ||
| 41 | + public Builder setEncoder(String encoder) { | ||
| 42 | + this.encoder = encoder; | ||
| 43 | + return this; | ||
| 44 | + } | ||
| 45 | + | ||
| 46 | + public Builder setDecoder(String decoder) { | ||
| 47 | + this.decoder = decoder; | ||
| 48 | + return this; | ||
| 49 | + } | ||
| 50 | + | ||
| 51 | + public Builder setTailPaddings(int tailPaddings) { | ||
| 52 | + this.tailPaddings = tailPaddings; | ||
| 53 | + return this; | ||
| 54 | + } | ||
| 55 | + } | ||
| 56 | +} |
-
请 注册 或 登录 后发表评论