Committed by
GitHub
Add JNI support for spoken language identification (#782)
正在显示
8 个修改的文件
包含
189 行增加
和
42 行删除
| @@ -161,10 +161,12 @@ jobs: | @@ -161,10 +161,12 @@ jobs: | ||
| 161 | ./run-vits-vctk.sh | 161 | ./run-vits-vctk.sh |
| 162 | rm -rf vits-vctk | 162 | rm -rf vits-vctk |
| 163 | 163 | ||
| 164 | - echo "Test vits-zh-aishell3" | ||
| 165 | - git clone https://huggingface.co/csukuangfj/vits-zh-aishell3 | 164 | + echo "Test vits-icefall-zh-aishell3" |
| 165 | + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-icefall-zh-aishell3.tar.bz2 | ||
| 166 | + tar xvf vits-icefall-zh-aishell3.tar.bz2 | ||
| 167 | + rm vits-icefall-zh-aishell3.tar.bz2 | ||
| 166 | ./run-vits-zh-aishell3.sh | 168 | ./run-vits-zh-aishell3.sh |
| 167 | - rm -rf vits-zh-aishell3 | 169 | + rm -rf vits-icefall-zh-aishell3* |
| 168 | 170 | ||
| 169 | echo "Test vits-piper-en_US-lessac-medium" | 171 | echo "Test vits-piper-en_US-lessac-medium" |
| 170 | git clone https://huggingface.co/csukuangfj/vits-piper-en_US-lessac-medium | 172 | git clone https://huggingface.co/csukuangfj/vits-piper-en_US-lessac-medium |
android/SherpaOnnxAudioTagging/app/src/main/java/com/k2fsa/sherpa/onnx/audio/tagging/AudioTagging.kt
| @@ -6,7 +6,7 @@ import android.util.Log | @@ -6,7 +6,7 @@ import android.util.Log | ||
| 6 | private val TAG = "sherpa-onnx" | 6 | private val TAG = "sherpa-onnx" |
| 7 | 7 | ||
| 8 | data class OfflineZipformerAudioTaggingModelConfig( | 8 | data class OfflineZipformerAudioTaggingModelConfig( |
| 9 | - val model: String, | 9 | + var model: String, |
| 10 | ) | 10 | ) |
| 11 | 11 | ||
| 12 | data class AudioTaggingModelConfig( | 12 | data class AudioTaggingModelConfig( |
| @@ -134,4 +134,4 @@ fun getAudioTaggingConfig(type: Int, numThreads: Int=1): AudioTaggingConfig? { | @@ -134,4 +134,4 @@ fun getAudioTaggingConfig(type: Int, numThreads: Int=1): AudioTaggingConfig? { | ||
| 134 | } | 134 | } |
| 135 | 135 | ||
| 136 | return null | 136 | return null |
| 137 | -} | ||
| 137 | +} |
| @@ -7,6 +7,7 @@ fun callback(samples: FloatArray): Unit { | @@ -7,6 +7,7 @@ fun callback(samples: FloatArray): Unit { | ||
| 7 | } | 7 | } |
| 8 | 8 | ||
| 9 | fun main() { | 9 | fun main() { |
| 10 | + testSpokenLanguageIdentifcation() | ||
| 10 | testAudioTagging() | 11 | testAudioTagging() |
| 11 | testSpeakerRecognition() | 12 | testSpeakerRecognition() |
| 12 | testTts() | 13 | testTts() |
| @@ -14,6 +15,41 @@ fun main() { | @@ -14,6 +15,41 @@ fun main() { | ||
| 14 | testAsr("zipformer2-ctc") | 15 | testAsr("zipformer2-ctc") |
| 15 | } | 16 | } |
| 16 | 17 | ||
| 18 | +fun testSpokenLanguageIdentifcation() { | ||
| 19 | + val config = SpokenLanguageIdentificationConfig( | ||
| 20 | + whisper = SpokenLanguageIdentificationWhisperConfig( | ||
| 21 | + encoder = "./sherpa-onnx-whisper-tiny/tiny-encoder.int8.onnx", | ||
| 22 | + decoder = "./sherpa-onnx-whisper-tiny/tiny-decoder.int8.onnx", | ||
| 23 | + tailPaddings = 33, | ||
| 24 | + ), | ||
| 25 | + numThreads=1, | ||
| 26 | + debug=true, | ||
| 27 | + provider="cpu", | ||
| 28 | + ) | ||
| 29 | + val slid = SpokenLanguageIdentification(assetManager=null, config=config) | ||
| 30 | + | ||
| 31 | + val testFiles = arrayOf( | ||
| 32 | + "./spoken-language-identification-test-wavs/ar-arabic.wav", | ||
| 33 | + "./spoken-language-identification-test-wavs/bg-bulgarian.wav", | ||
| 34 | + "./spoken-language-identification-test-wavs/de-german.wav", | ||
| 35 | + ) | ||
| 36 | + | ||
| 37 | + for (waveFilename in testFiles) { | ||
| 38 | + val objArray = WaveReader.readWaveFromFile( | ||
| 39 | + filename = waveFilename, | ||
| 40 | + ) | ||
| 41 | + val samples: FloatArray = objArray[0] as FloatArray | ||
| 42 | + val sampleRate: Int = objArray[1] as Int | ||
| 43 | + | ||
| 44 | + val stream = slid.createStream() | ||
| 45 | + stream.acceptWaveform(samples, sampleRate = sampleRate) | ||
| 46 | + val lang = slid.compute(stream) | ||
| 47 | + stream.release() | ||
| 48 | + println(waveFilename) | ||
| 49 | + println(lang) | ||
| 50 | + } | ||
| 51 | +} | ||
| 52 | + | ||
| 17 | fun testAudioTagging() { | 53 | fun testAudioTagging() { |
| 18 | val config = AudioTaggingConfig( | 54 | val config = AudioTaggingConfig( |
| 19 | model=AudioTaggingModelConfig( | 55 | model=AudioTaggingModelConfig( |
| @@ -5,32 +5,22 @@ import android.util.Log | @@ -5,32 +5,22 @@ import android.util.Log | ||
| 5 | 5 | ||
| 6 | private val TAG = "sherpa-onnx" | 6 | private val TAG = "sherpa-onnx" |
| 7 | 7 | ||
| 8 | -data class OfflineZipformerAudioTaggingModelConfig ( | ||
| 9 | - val model: String, | 8 | +data class SpokenLanguageIdentificationWhisperConfig ( |
| 9 | + var encoder: String, | ||
| 10 | + var decoder: String, | ||
| 11 | + var tailPaddings: Int = -1, | ||
| 10 | ) | 12 | ) |
| 11 | 13 | ||
| 12 | -data class AudioTaggingModelConfig ( | ||
| 13 | - var zipformer: OfflineZipformerAudioTaggingModelConfig, | 14 | +data class SpokenLanguageIdentificationConfig ( |
| 15 | + var whisper: SpokenLanguageIdentificationWhisperConfig, | ||
| 14 | var numThreads: Int = 1, | 16 | var numThreads: Int = 1, |
| 15 | var debug: Boolean = false, | 17 | var debug: Boolean = false, |
| 16 | var provider: String = "cpu", | 18 | var provider: String = "cpu", |
| 17 | ) | 19 | ) |
| 18 | 20 | ||
| 19 | -data class AudioTaggingConfig ( | ||
| 20 | - var model: AudioTaggingModelConfig, | ||
| 21 | - var labels: String, | ||
| 22 | - var topK: Int = 5, | ||
| 23 | -) | ||
| 24 | - | ||
| 25 | -data class AudioEvent ( | ||
| 26 | - val name: String, | ||
| 27 | - val index: Int, | ||
| 28 | - val prob: Float, | ||
| 29 | -) | ||
| 30 | - | ||
| 31 | -class AudioTagging( | 21 | +class SpokenLanguageIdentification ( |
| 32 | assetManager: AssetManager? = null, | 22 | assetManager: AssetManager? = null, |
| 33 | - config: AudioTaggingConfig, | 23 | + config: SpokenLanguageIdentificationConfig, |
| 34 | ) { | 24 | ) { |
| 35 | private var ptr: Long | 25 | private var ptr: Long |
| 36 | 26 | ||
| @@ -43,10 +33,10 @@ class AudioTagging( | @@ -43,10 +33,10 @@ class AudioTagging( | ||
| 43 | } | 33 | } |
| 44 | 34 | ||
| 45 | protected fun finalize() { | 35 | protected fun finalize() { |
| 46 | - if(ptr != 0) { | ||
| 47 | - delete(ptr) | ||
| 48 | - ptr = 0 | ||
| 49 | - } | 36 | + if (ptr != 0L) { |
| 37 | + delete(ptr) | ||
| 38 | + ptr = 0 | ||
| 39 | + } | ||
| 50 | } | 40 | } |
| 51 | 41 | ||
| 52 | fun release() = finalize() | 42 | fun release() = finalize() |
| @@ -56,25 +46,22 @@ class AudioTagging( | @@ -56,25 +46,22 @@ class AudioTagging( | ||
| 56 | return OfflineStream(p) | 46 | return OfflineStream(p) |
| 57 | } | 47 | } |
| 58 | 48 | ||
| 59 | - // fun compute(stream: OfflineStream, topK: Int=-1): Array<AudioEvent> { | ||
| 60 | - fun compute(stream: OfflineStream, topK: Int=-1): Array<Any> { | ||
| 61 | - var events :Array<Any> = compute(ptr, stream.ptr, topK) | ||
| 62 | - } | 49 | + fun compute(stream: OfflineStream) = compute(ptr, stream.ptr) |
| 63 | 50 | ||
| 64 | private external fun newFromAsset( | 51 | private external fun newFromAsset( |
| 65 | assetManager: AssetManager, | 52 | assetManager: AssetManager, |
| 66 | - config: AudioTaggingConfig, | 53 | + config: SpokenLanguageIdentificationConfig, |
| 67 | ): Long | 54 | ): Long |
| 68 | 55 | ||
| 69 | private external fun newFromFile( | 56 | private external fun newFromFile( |
| 70 | - config: AudioTaggingConfig, | 57 | + config: SpokenLanguageIdentificationConfig, |
| 71 | ): Long | 58 | ): Long |
| 72 | 59 | ||
| 73 | private external fun delete(ptr: Long) | 60 | private external fun delete(ptr: Long) |
| 74 | 61 | ||
| 75 | private external fun createStream(ptr: Long): Long | 62 | private external fun createStream(ptr: Long): Long |
| 76 | 63 | ||
| 77 | - private external fun compute(ptr: Long, streamPtr: Long, topK: Int): Array<Any> | 64 | + private external fun compute(ptr: Long, streamPtr: Long): String |
| 78 | 65 | ||
| 79 | companion object { | 66 | companion object { |
| 80 | init { | 67 | init { |
| @@ -30,19 +30,19 @@ cd ../kotlin-api-examples | @@ -30,19 +30,19 @@ cd ../kotlin-api-examples | ||
| 30 | 30 | ||
| 31 | function testSpeakerEmbeddingExtractor() { | 31 | function testSpeakerEmbeddingExtractor() { |
| 32 | if [ ! -f ./3dspeaker_speech_eres2net_large_sv_zh-cn_3dspeaker_16k.onnx ]; then | 32 | if [ ! -f ./3dspeaker_speech_eres2net_large_sv_zh-cn_3dspeaker_16k.onnx ]; then |
| 33 | - wget -q https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-recongition-models/3dspeaker_speech_eres2net_large_sv_zh-cn_3dspeaker_16k.onnx | 33 | + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-recongition-models/3dspeaker_speech_eres2net_large_sv_zh-cn_3dspeaker_16k.onnx |
| 34 | fi | 34 | fi |
| 35 | 35 | ||
| 36 | if [ ! -f ./speaker1_a_cn_16k.wav ]; then | 36 | if [ ! -f ./speaker1_a_cn_16k.wav ]; then |
| 37 | - wget -q https://github.com/csukuangfj/sr-data/raw/main/test/3d-speaker/speaker1_a_cn_16k.wav | 37 | + curl -SL -O https://github.com/csukuangfj/sr-data/raw/main/test/3d-speaker/speaker1_a_cn_16k.wav |
| 38 | fi | 38 | fi |
| 39 | 39 | ||
| 40 | if [ ! -f ./speaker1_b_cn_16k.wav ]; then | 40 | if [ ! -f ./speaker1_b_cn_16k.wav ]; then |
| 41 | - wget -q https://github.com/csukuangfj/sr-data/raw/main/test/3d-speaker/speaker1_b_cn_16k.wav | 41 | + curl -SL -O https://github.com/csukuangfj/sr-data/raw/main/test/3d-speaker/speaker1_b_cn_16k.wav |
| 42 | fi | 42 | fi |
| 43 | 43 | ||
| 44 | if [ ! -f ./speaker2_a_cn_16k.wav ]; then | 44 | if [ ! -f ./speaker2_a_cn_16k.wav ]; then |
| 45 | - wget -q https://github.com/csukuangfj/sr-data/raw/main/test/3d-speaker/speaker2_a_cn_16k.wav | 45 | + curl -SL -O https://github.com/csukuangfj/sr-data/raw/main/test/3d-speaker/speaker2_a_cn_16k.wav |
| 46 | fi | 46 | fi |
| 47 | } | 47 | } |
| 48 | 48 | ||
| @@ -53,7 +53,7 @@ function testAsr() { | @@ -53,7 +53,7 @@ function testAsr() { | ||
| 53 | fi | 53 | fi |
| 54 | 54 | ||
| 55 | if [ ! -d ./sherpa-onnx-streaming-zipformer-ctc-multi-zh-hans-2023-12-13 ]; then | 55 | if [ ! -d ./sherpa-onnx-streaming-zipformer-ctc-multi-zh-hans-2023-12-13 ]; then |
| 56 | - wget -q https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-zipformer-ctc-multi-zh-hans-2023-12-13.tar.bz2 | 56 | + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-zipformer-ctc-multi-zh-hans-2023-12-13.tar.bz2 |
| 57 | tar xvf sherpa-onnx-streaming-zipformer-ctc-multi-zh-hans-2023-12-13.tar.bz2 | 57 | tar xvf sherpa-onnx-streaming-zipformer-ctc-multi-zh-hans-2023-12-13.tar.bz2 |
| 58 | rm sherpa-onnx-streaming-zipformer-ctc-multi-zh-hans-2023-12-13.tar.bz2 | 58 | rm sherpa-onnx-streaming-zipformer-ctc-multi-zh-hans-2023-12-13.tar.bz2 |
| 59 | fi | 59 | fi |
| @@ -61,7 +61,7 @@ function testAsr() { | @@ -61,7 +61,7 @@ function testAsr() { | ||
| 61 | 61 | ||
| 62 | function testTts() { | 62 | function testTts() { |
| 63 | if [ ! -f ./vits-piper-en_US-amy-low/en_US-amy-low.onnx ]; then | 63 | if [ ! -f ./vits-piper-en_US-amy-low/en_US-amy-low.onnx ]; then |
| 64 | - wget -q https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-piper-en_US-amy-low.tar.bz2 | 64 | + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-piper-en_US-amy-low.tar.bz2 |
| 65 | tar xf vits-piper-en_US-amy-low.tar.bz2 | 65 | tar xf vits-piper-en_US-amy-low.tar.bz2 |
| 66 | rm vits-piper-en_US-amy-low.tar.bz2 | 66 | rm vits-piper-en_US-amy-low.tar.bz2 |
| 67 | fi | 67 | fi |
| @@ -75,7 +75,22 @@ function testAudioTagging() { | @@ -75,7 +75,22 @@ function testAudioTagging() { | ||
| 75 | fi | 75 | fi |
| 76 | } | 76 | } |
| 77 | 77 | ||
| 78 | +function testSpokenLanguageIdentification() { | ||
| 79 | + if [ ! -f ./sherpa-onnx-whisper-tiny/tiny-encoder.int8.onnx ]; then | ||
| 80 | + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-whisper-tiny.tar.bz2 | ||
| 81 | + tar xvf sherpa-onnx-whisper-tiny.tar.bz2 | ||
| 82 | + rm sherpa-onnx-whisper-tiny.tar.bz2 | ||
| 83 | + fi | ||
| 84 | + | ||
| 85 | + if [ ! -f ./spoken-language-identification-test-wavs/ar-arabic.wav ]; then | ||
| 86 | + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/spoken-language-identification-test-wavs.tar.bz2 | ||
| 87 | + tar xvf spoken-language-identification-test-wavs.tar.bz2 | ||
| 88 | + rm spoken-language-identification-test-wavs.tar.bz2 | ||
| 89 | + fi | ||
| 90 | +} | ||
| 91 | + | ||
| 78 | function test() { | 92 | function test() { |
| 93 | + testSpokenLanguageIdentification | ||
| 79 | testAudioTagging | 94 | testAudioTagging |
| 80 | testSpeakerEmbeddingExtractor | 95 | testSpeakerEmbeddingExtractor |
| 81 | testAsr | 96 | testAsr |
| @@ -90,6 +105,7 @@ kotlinc-jvm -include-runtime -d main.jar \ | @@ -90,6 +105,7 @@ kotlinc-jvm -include-runtime -d main.jar \ | ||
| 90 | OfflineStream.kt \ | 105 | OfflineStream.kt \ |
| 91 | SherpaOnnx.kt \ | 106 | SherpaOnnx.kt \ |
| 92 | Speaker.kt \ | 107 | Speaker.kt \ |
| 108 | + SpokenLanguageIdentification.kt \ | ||
| 93 | Tts.kt \ | 109 | Tts.kt \ |
| 94 | WaveReader.kt \ | 110 | WaveReader.kt \ |
| 95 | faked-asset-manager.kt \ | 111 | faked-asset-manager.kt \ |
| @@ -101,13 +117,13 @@ java -Djava.library.path=../build/lib -jar main.jar | @@ -101,13 +117,13 @@ java -Djava.library.path=../build/lib -jar main.jar | ||
| 101 | 117 | ||
| 102 | function testTwoPass() { | 118 | function testTwoPass() { |
| 103 | if [ ! -f ./sherpa-onnx-streaming-zipformer-en-20M-2023-02-17/encoder-epoch-99-avg-1.int8.onnx ]; then | 119 | if [ ! -f ./sherpa-onnx-streaming-zipformer-en-20M-2023-02-17/encoder-epoch-99-avg-1.int8.onnx ]; then |
| 104 | - wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-zipformer-en-20M-2023-02-17.tar.bz2 | 120 | + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-zipformer-en-20M-2023-02-17.tar.bz2 |
| 105 | tar xvf sherpa-onnx-streaming-zipformer-en-20M-2023-02-17.tar.bz2 | 121 | tar xvf sherpa-onnx-streaming-zipformer-en-20M-2023-02-17.tar.bz2 |
| 106 | rm sherpa-onnx-streaming-zipformer-en-20M-2023-02-17.tar.bz2 | 122 | rm sherpa-onnx-streaming-zipformer-en-20M-2023-02-17.tar.bz2 |
| 107 | fi | 123 | fi |
| 108 | 124 | ||
| 109 | if [ ! -f ./sherpa-onnx-whisper-tiny.en/tiny.en-encoder.int8.onnx ]; then | 125 | if [ ! -f ./sherpa-onnx-whisper-tiny.en/tiny.en-encoder.int8.onnx ]; then |
| 110 | - wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-whisper-tiny.en.tar.bz2 | 126 | + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-whisper-tiny.en.tar.bz2 |
| 111 | tar xvf sherpa-onnx-whisper-tiny.en.tar.bz2 | 127 | tar xvf sherpa-onnx-whisper-tiny.en.tar.bz2 |
| 112 | rm sherpa-onnx-whisper-tiny.en.tar.bz2 | 128 | rm sherpa-onnx-whisper-tiny.en.tar.bz2 |
| 113 | fi | 129 | fi |
| @@ -13,6 +13,7 @@ add_library(sherpa-onnx-jni | @@ -13,6 +13,7 @@ add_library(sherpa-onnx-jni | ||
| 13 | audio-tagging.cc | 13 | audio-tagging.cc |
| 14 | jni.cc | 14 | jni.cc |
| 15 | offline-stream.cc | 15 | offline-stream.cc |
| 16 | + spoken-language-identification.cc | ||
| 16 | ) | 17 | ) |
| 17 | target_link_libraries(sherpa-onnx-jni sherpa-onnx-core) | 18 | target_link_libraries(sherpa-onnx-jni sherpa-onnx-core) |
| 18 | install(TARGETS sherpa-onnx-jni DESTINATION lib) | 19 | install(TARGETS sherpa-onnx-jni DESTINATION lib) |
| 1 | +// sherpa-onnx/jni/spoken-language-identification.cc | ||
| 2 | +// | ||
| 3 | +// Copyright (c) 2024 Xiaomi Corporation | ||
| 4 | + | ||
| 5 | +#include "sherpa-onnx/csrc/spoken-language-identification.h" | ||
| 6 | + | ||
| 7 | +#include "sherpa-onnx/csrc/macros.h" | ||
| 8 | +#include "sherpa-onnx/jni/common.h" | ||
| 9 | + | ||
| 10 | +namespace sherpa_onnx { | ||
| 11 | + | ||
| 12 | +static SpokenLanguageIdentificationConfig GetSpokenLanguageIdentificationConfig( | ||
| 13 | + JNIEnv *env, jobject config) { | ||
| 14 | + SpokenLanguageIdentificationConfig ans; | ||
| 15 | + | ||
| 16 | + jclass cls = env->GetObjectClass(config); | ||
| 17 | + jfieldID fid = env->GetFieldID( | ||
| 18 | + cls, "whisper", | ||
| 19 | + "Lcom/k2fsa/sherpa/onnx/SpokenLanguageIdentificationWhisperConfig;"); | ||
| 20 | + | ||
| 21 | + jobject whisper = env->GetObjectField(config, fid); | ||
| 22 | + jclass whisper_cls = env->GetObjectClass(whisper); | ||
| 23 | + | ||
| 24 | + fid = env->GetFieldID(whisper_cls, "encoder", "Ljava/lang/String;"); | ||
| 25 | + | ||
| 26 | + jstring s = (jstring)env->GetObjectField(whisper, fid); | ||
| 27 | + const char *p = env->GetStringUTFChars(s, nullptr); | ||
| 28 | + ans.whisper.encoder = p; | ||
| 29 | + env->ReleaseStringUTFChars(s, p); | ||
| 30 | + | ||
| 31 | + fid = env->GetFieldID(whisper_cls, "decoder", "Ljava/lang/String;"); | ||
| 32 | + s = (jstring)env->GetObjectField(whisper, fid); | ||
| 33 | + p = env->GetStringUTFChars(s, nullptr); | ||
| 34 | + ans.whisper.decoder = p; | ||
| 35 | + env->ReleaseStringUTFChars(s, p); | ||
| 36 | + | ||
| 37 | + fid = env->GetFieldID(whisper_cls, "tailPaddings", "I"); | ||
| 38 | + ans.whisper.tail_paddings = env->GetIntField(whisper, fid); | ||
| 39 | + | ||
| 40 | + fid = env->GetFieldID(cls, "numThreads", "I"); | ||
| 41 | + ans.num_threads = env->GetIntField(config, fid); | ||
| 42 | + | ||
| 43 | + fid = env->GetFieldID(cls, "debug", "Z"); | ||
| 44 | + ans.debug = env->GetBooleanField(config, fid); | ||
| 45 | + | ||
| 46 | + fid = env->GetFieldID(cls, "provider", "Ljava/lang/String;"); | ||
| 47 | + s = (jstring)env->GetObjectField(config, fid); | ||
| 48 | + p = env->GetStringUTFChars(s, nullptr); | ||
| 49 | + ans.provider = p; | ||
| 50 | + env->ReleaseStringUTFChars(s, p); | ||
| 51 | + | ||
| 52 | + return ans; | ||
| 53 | +} | ||
| 54 | + | ||
| 55 | +} // namespace sherpa_onnx | ||
| 56 | + | ||
| 57 | +SHERPA_ONNX_EXTERN_C | ||
| 58 | +JNIEXPORT jlong JNICALL | ||
| 59 | +Java_com_k2fsa_sherpa_onnx_SpokenLanguageIdentification_newFromFile( | ||
| 60 | + JNIEnv *env, jobject /*obj*/, jobject _config) { | ||
| 61 | + auto config = | ||
| 62 | + sherpa_onnx::GetSpokenLanguageIdentificationConfig(env, _config); | ||
| 63 | + SHERPA_ONNX_LOGE("SpokenLanguageIdentification newFromFile config:\n%s", | ||
| 64 | + config.ToString().c_str()); | ||
| 65 | + | ||
| 66 | + if (!config.Validate()) { | ||
| 67 | + SHERPA_ONNX_LOGE("Errors found in config!"); | ||
| 68 | + return 0; | ||
| 69 | + } | ||
| 70 | + | ||
| 71 | + auto tagger = new sherpa_onnx::SpokenLanguageIdentification(config); | ||
| 72 | + | ||
| 73 | + return (jlong)tagger; | ||
| 74 | +} | ||
| 75 | + | ||
| 76 | +SHERPA_ONNX_EXTERN_C | ||
| 77 | +JNIEXPORT jlong JNICALL | ||
| 78 | +Java_com_k2fsa_sherpa_onnx_SpokenLanguageIdentification_createStream( | ||
| 79 | + JNIEnv *env, jobject /*obj*/, jlong ptr) { | ||
| 80 | + auto slid = | ||
| 81 | + reinterpret_cast<sherpa_onnx::SpokenLanguageIdentification *>(ptr); | ||
| 82 | + std::unique_ptr<sherpa_onnx::OfflineStream> s = slid->CreateStream(); | ||
| 83 | + | ||
| 84 | + // The user is responsible to free the returned pointer. | ||
| 85 | + // | ||
| 86 | + // See Java_com_k2fsa_sherpa_onnx_OfflineStream_delete() from | ||
| 87 | + // ./offline-stream.cc | ||
| 88 | + sherpa_onnx::OfflineStream *p = s.release(); | ||
| 89 | + return (jlong)p; | ||
| 90 | +} | ||
| 91 | + | ||
| 92 | +SHERPA_ONNX_EXTERN_C | ||
| 93 | +JNIEXPORT jstring JNICALL | ||
| 94 | +Java_com_k2fsa_sherpa_onnx_SpokenLanguageIdentification_compute(JNIEnv *env, | ||
| 95 | + jobject /*obj*/, | ||
| 96 | + jlong ptr, | ||
| 97 | + jlong s_ptr) { | ||
| 98 | + sherpa_onnx::SpokenLanguageIdentification *slid = | ||
| 99 | + reinterpret_cast<sherpa_onnx::SpokenLanguageIdentification *>(ptr); | ||
| 100 | + sherpa_onnx::OfflineStream *s = | ||
| 101 | + reinterpret_cast<sherpa_onnx::OfflineStream *>(s_ptr); | ||
| 102 | + std::string lang = slid->Compute(s); | ||
| 103 | + return env->NewStringUTF(lang.c_str()); | ||
| 104 | +} |
-
请 注册 或 登录 后发表评论