Committed by
GitHub
Add Kotlin and Java API for homophone replacer (#2166)
* Add Kotlin API for homonphone replacer * Add Java API for homonphone replacer
正在显示
21 个修改的文件
包含
325 行增加
和
1 行删除
| @@ -105,6 +105,14 @@ jobs: | @@ -105,6 +105,14 @@ jobs: | ||
| 105 | make -j4 | 105 | make -j4 |
| 106 | ls -lh lib | 106 | ls -lh lib |
| 107 | 107 | ||
| 108 | + - name: Run java test (Non-streaming SenseVoice with homophone replacer) | ||
| 109 | + shell: bash | ||
| 110 | + run: | | ||
| 111 | + cd ./java-api-examples | ||
| 112 | + ./run-non-streaming-decode-file-sense-voice-with-hr.sh | ||
| 113 | + rm -rf sherpa-onnx-sense-* | ||
| 114 | + rm -rf dict lexicon.txt replace.fst | ||
| 115 | + | ||
| 108 | - name: Run java test (VAD + Non-streaming Dolphin CTC) | 116 | - name: Run java test (VAD + Non-streaming Dolphin CTC) |
| 109 | shell: bash | 117 | shell: bash |
| 110 | run: | | 118 | run: | |
| 1 | +../../../../../../../../../../sherpa-onnx/kotlin-api/HomophoneReplacerConfig.kt |
android/SherpaOnnx2Pass/app/src/main/java/com/k2fsa/sherpa/onnx/HomophoneReplacerConfig.kt
0 → 120000
| 1 | +../../../../../../../../../../sherpa-onnx/kotlin-api/HomophoneReplacerConfig.kt |
android/SherpaOnnxAar/sherpa_onnx/src/main/java/com/k2fsa/sherpa/onnx/HomophoneReplacerConfig.kt
0 → 120000
| 1 | +../../../../../../../../../../sherpa-onnx/kotlin-api/HomophoneReplacerConfig.kt |
| 1 | +../../../../../../../../../../sherpa-onnx/kotlin-api/HomophoneReplacerConfig.kt |
android/SherpaOnnxVadAsr/app/src/main/java/com/k2fsa/sherpa/onnx/HomophoneReplacerConfig.kt
0 → 120000
| 1 | +../../../../../../../../../../sherpa-onnx/kotlin-api/HomophoneReplacerConfig.kt |
| 1 | +// Copyright 2025 Xiaomi Corporation | ||
| 2 | + | ||
| 3 | +// This file shows how to use an offline SenseVoice model, | ||
| 4 | +// i.e., non-streaming SenseVoice model | ||
| 5 | +// to decode files with homophone replacer. | ||
| 6 | +import com.k2fsa.sherpa.onnx.*; | ||
| 7 | + | ||
| 8 | +public class NonStreamingDecodeFileSenseVoiceWithHr { | ||
| 9 | + public static void main(String[] args) { | ||
| 10 | + // please refer to | ||
| 11 | + // https://k2-fsa.github.io/sherpa/onnx/sense-voice/index.html | ||
| 12 | + // to download model files | ||
| 13 | + String model = "./sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/model.int8.onnx"; | ||
| 14 | + String tokens = "./sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/tokens.txt"; | ||
| 15 | + | ||
| 16 | + String waveFilename = "./test-hr.wav"; | ||
| 17 | + | ||
| 18 | + WaveReader reader = new WaveReader(waveFilename); | ||
| 19 | + | ||
| 20 | + OfflineSenseVoiceModelConfig senseVoice = | ||
| 21 | + OfflineSenseVoiceModelConfig.builder().setModel(model).build(); | ||
| 22 | + | ||
| 23 | + OfflineModelConfig modelConfig = | ||
| 24 | + OfflineModelConfig.builder() | ||
| 25 | + .setSenseVoice(senseVoice) | ||
| 26 | + .setTokens(tokens) | ||
| 27 | + .setNumThreads(1) | ||
| 28 | + .setDebug(true) | ||
| 29 | + .build(); | ||
| 30 | + | ||
| 31 | + HomophoneReplacerConfig hr = | ||
| 32 | + HomophoneReplacerConfig.builder() | ||
| 33 | + .setDictDir("./dict") | ||
| 34 | + .setLexicon("./lexicon.txt") | ||
| 35 | + .setRuleFsts("./replace.fst") | ||
| 36 | + .build(); | ||
| 37 | + | ||
| 38 | + OfflineRecognizerConfig config = | ||
| 39 | + OfflineRecognizerConfig.builder() | ||
| 40 | + .setOfflineModelConfig(modelConfig) | ||
| 41 | + .setDecodingMethod("greedy_search") | ||
| 42 | + .setHr(hr) | ||
| 43 | + .build(); | ||
| 44 | + | ||
| 45 | + OfflineRecognizer recognizer = new OfflineRecognizer(config); | ||
| 46 | + OfflineStream stream = recognizer.createStream(); | ||
| 47 | + stream.acceptWaveform(reader.getSamples(), reader.getSampleRate()); | ||
| 48 | + | ||
| 49 | + recognizer.decode(stream); | ||
| 50 | + | ||
| 51 | + String text = recognizer.getResult(stream).getText(); | ||
| 52 | + | ||
| 53 | + System.out.printf("filename:%s\nresult:%s\n", waveFilename, text); | ||
| 54 | + | ||
| 55 | + stream.release(); | ||
| 56 | + recognizer.release(); | ||
| 57 | + } | ||
| 58 | +} |
| @@ -31,6 +31,11 @@ This directory contains examples for the JAVA API of sherpa-onnx. | @@ -31,6 +31,11 @@ This directory contains examples for the JAVA API of sherpa-onnx. | ||
| 31 | ./run-non-streaming-decode-file-nemo.sh | 31 | ./run-non-streaming-decode-file-nemo.sh |
| 32 | ``` | 32 | ``` |
| 33 | 33 | ||
| 34 | +## Non-Streaming Speech recognition with homophone replacer | ||
| 35 | + | ||
| 36 | +```bash | ||
| 37 | +./run-non-streaming-decode-file-sense-voice-with-hr.sh | ||
| 38 | +``` | ||
| 34 | 39 | ||
| 35 | ## Non-Streaming text-to-speech | 40 | ## Non-Streaming text-to-speech |
| 36 | 41 |
| 1 | +#!/usr/bin/env bash | ||
| 2 | + | ||
| 3 | +set -ex | ||
| 4 | + | ||
| 5 | +if [[ ! -f ../build/lib/libsherpa-onnx-jni.dylib && ! -f ../build/lib/libsherpa-onnx-jni.so ]]; then | ||
| 6 | + mkdir -p ../build | ||
| 7 | + pushd ../build | ||
| 8 | + cmake \ | ||
| 9 | + -DSHERPA_ONNX_ENABLE_PYTHON=OFF \ | ||
| 10 | + -DSHERPA_ONNX_ENABLE_TESTS=OFF \ | ||
| 11 | + -DSHERPA_ONNX_ENABLE_CHECK=OFF \ | ||
| 12 | + -DBUILD_SHARED_LIBS=ON \ | ||
| 13 | + -DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \ | ||
| 14 | + -DSHERPA_ONNX_ENABLE_JNI=ON \ | ||
| 15 | + .. | ||
| 16 | + | ||
| 17 | + make -j4 | ||
| 18 | + ls -lh lib | ||
| 19 | + popd | ||
| 20 | +fi | ||
| 21 | + | ||
| 22 | +if [ ! -f ../sherpa-onnx/java-api/build/sherpa-onnx.jar ]; then | ||
| 23 | + pushd ../sherpa-onnx/java-api | ||
| 24 | + make | ||
| 25 | + popd | ||
| 26 | +fi | ||
| 27 | + | ||
| 28 | +if [ ! -f ./sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/tokens.txt ]; then | ||
| 29 | + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17.tar.bz2 | ||
| 30 | + tar xvf sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17.tar.bz2 | ||
| 31 | + rm sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17.tar.bz2 | ||
| 32 | +fi | ||
| 33 | + | ||
| 34 | +if [ ! -d dict ]; then | ||
| 35 | + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/hr-files/dict.tar.bz2 | ||
| 36 | + tar xf dict.tar.bz2 | ||
| 37 | + rm dict.tar.bz2 | ||
| 38 | + | ||
| 39 | + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/hr-files/replace.fst | ||
| 40 | + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/hr-files/test-hr.wav | ||
| 41 | + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/hr-files/lexicon.txt | ||
| 42 | +fi | ||
| 43 | + | ||
| 44 | +java \ | ||
| 45 | + -Djava.library.path=$PWD/../build/lib \ | ||
| 46 | + -cp ../sherpa-onnx/java-api/build/sherpa-onnx.jar \ | ||
| 47 | + NonStreamingDecodeFileSenseVoiceWithHr.java |
| 1 | +../sherpa-onnx/kotlin-api/HomophoneReplacerConfig.kt |
| @@ -87,6 +87,7 @@ function testOnlineAsr() { | @@ -87,6 +87,7 @@ function testOnlineAsr() { | ||
| 87 | kotlinc-jvm -include-runtime -d $out_filename \ | 87 | kotlinc-jvm -include-runtime -d $out_filename \ |
| 88 | test_online_asr.kt \ | 88 | test_online_asr.kt \ |
| 89 | FeatureConfig.kt \ | 89 | FeatureConfig.kt \ |
| 90 | + HomophoneReplacerConfig.kt \ | ||
| 90 | OnlineRecognizer.kt \ | 91 | OnlineRecognizer.kt \ |
| 91 | OnlineStream.kt \ | 92 | OnlineStream.kt \ |
| 92 | WaveReader.kt \ | 93 | WaveReader.kt \ |
| @@ -244,6 +245,7 @@ function testOfflineAsr() { | @@ -244,6 +245,7 @@ function testOfflineAsr() { | ||
| 244 | kotlinc-jvm -include-runtime -d $out_filename \ | 245 | kotlinc-jvm -include-runtime -d $out_filename \ |
| 245 | test_offline_asr.kt \ | 246 | test_offline_asr.kt \ |
| 246 | FeatureConfig.kt \ | 247 | FeatureConfig.kt \ |
| 248 | + HomophoneReplacerConfig.kt \ | ||
| 247 | OfflineRecognizer.kt \ | 249 | OfflineRecognizer.kt \ |
| 248 | OfflineStream.kt \ | 250 | OfflineStream.kt \ |
| 249 | WaveReader.kt \ | 251 | WaveReader.kt \ |
| @@ -272,6 +274,7 @@ function testInverseTextNormalizationOfflineAsr() { | @@ -272,6 +274,7 @@ function testInverseTextNormalizationOfflineAsr() { | ||
| 272 | kotlinc-jvm -include-runtime -d $out_filename \ | 274 | kotlinc-jvm -include-runtime -d $out_filename \ |
| 273 | test_itn_offline_asr.kt \ | 275 | test_itn_offline_asr.kt \ |
| 274 | FeatureConfig.kt \ | 276 | FeatureConfig.kt \ |
| 277 | + HomophoneReplacerConfig.kt \ | ||
| 275 | OfflineRecognizer.kt \ | 278 | OfflineRecognizer.kt \ |
| 276 | OfflineStream.kt \ | 279 | OfflineStream.kt \ |
| 277 | WaveReader.kt \ | 280 | WaveReader.kt \ |
| @@ -300,6 +303,7 @@ function testInverseTextNormalizationOnlineAsr() { | @@ -300,6 +303,7 @@ function testInverseTextNormalizationOnlineAsr() { | ||
| 300 | kotlinc-jvm -include-runtime -d $out_filename \ | 303 | kotlinc-jvm -include-runtime -d $out_filename \ |
| 301 | test_itn_online_asr.kt \ | 304 | test_itn_online_asr.kt \ |
| 302 | FeatureConfig.kt \ | 305 | FeatureConfig.kt \ |
| 306 | + HomophoneReplacerConfig.kt \ | ||
| 303 | OnlineRecognizer.kt \ | 307 | OnlineRecognizer.kt \ |
| 304 | OnlineStream.kt \ | 308 | OnlineStream.kt \ |
| 305 | WaveReader.kt \ | 309 | WaveReader.kt \ |
| @@ -402,6 +406,38 @@ function testOfflineSpeechDenoiser() { | @@ -402,6 +406,38 @@ function testOfflineSpeechDenoiser() { | ||
| 402 | ls -lh *.wav | 406 | ls -lh *.wav |
| 403 | } | 407 | } |
| 404 | 408 | ||
| 409 | +function testOfflineSenseVoiceWithHr() { | ||
| 410 | + if [ ! -f ./sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/tokens.txt ]; then | ||
| 411 | + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17.tar.bz2 | ||
| 412 | + tar xvf sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17.tar.bz2 | ||
| 413 | + rm sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17.tar.bz2 | ||
| 414 | + fi | ||
| 415 | + | ||
| 416 | + if [ ! -d dict ]; then | ||
| 417 | + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/hr-files/dict.tar.bz2 | ||
| 418 | + tar xf dict.tar.bz2 | ||
| 419 | + rm dict.tar.bz2 | ||
| 420 | + | ||
| 421 | + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/hr-files/replace.fst | ||
| 422 | + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/hr-files/test-hr.wav | ||
| 423 | + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/hr-files/lexicon.txt | ||
| 424 | + fi | ||
| 425 | + | ||
| 426 | + out_filename=test_offline_sense_voice_with_hr.jar | ||
| 427 | + kotlinc-jvm -include-runtime -d $out_filename \ | ||
| 428 | + test_offline_sense_voice_with_hr.kt \ | ||
| 429 | + FeatureConfig.kt \ | ||
| 430 | + HomophoneReplacerConfig.kt \ | ||
| 431 | + OfflineRecognizer.kt \ | ||
| 432 | + OfflineStream.kt \ | ||
| 433 | + WaveReader.kt \ | ||
| 434 | + faked-asset-manager.kt | ||
| 435 | + | ||
| 436 | + ls -lh $out_filename | ||
| 437 | + java -Djava.library.path=../build/lib -jar $out_filename | ||
| 438 | +} | ||
| 439 | + | ||
| 440 | +testOfflineSenseVoiceWithHr | ||
| 405 | testOfflineSpeechDenoiser | 441 | testOfflineSpeechDenoiser |
| 406 | testOfflineSpeakerDiarization | 442 | testOfflineSpeakerDiarization |
| 407 | testSpeakerEmbeddingExtractor | 443 | testSpeakerEmbeddingExtractor |
| 1 | +package com.k2fsa.sherpa.onnx | ||
| 2 | + | ||
| 3 | +fun main() { | ||
| 4 | + val recognizer = createOfflineRecognizer() | ||
| 5 | + val waveFilename = "./test-hr.wav" | ||
| 6 | + | ||
| 7 | + val objArray = WaveReader.readWaveFromFile( | ||
| 8 | + filename = waveFilename, | ||
| 9 | + ) | ||
| 10 | + val samples: FloatArray = objArray[0] as FloatArray | ||
| 11 | + val sampleRate: Int = objArray[1] as Int | ||
| 12 | + | ||
| 13 | + val stream = recognizer.createStream() | ||
| 14 | + stream.acceptWaveform(samples, sampleRate=sampleRate) | ||
| 15 | + recognizer.decode(stream) | ||
| 16 | + | ||
| 17 | + val result = recognizer.getResult(stream) | ||
| 18 | + println(result) | ||
| 19 | + | ||
| 20 | + stream.release() | ||
| 21 | + recognizer.release() | ||
| 22 | +} | ||
| 23 | + | ||
| 24 | +fun createOfflineRecognizer(): OfflineRecognizer { | ||
| 25 | + val config = OfflineRecognizerConfig( | ||
| 26 | + featConfig = getFeatureConfig(sampleRate = 16000, featureDim = 80), | ||
| 27 | + modelConfig = getOfflineModelConfig(type = 15)!!, | ||
| 28 | + hr = HomophoneReplacerConfig( | ||
| 29 | + dictDir = "./dict", | ||
| 30 | + lexicon = "./lexicon.txt", | ||
| 31 | + ruleFsts = "./replace.fst"), | ||
| 32 | + ) | ||
| 33 | + | ||
| 34 | + return OfflineRecognizer(config = config) | ||
| 35 | +} |
| @@ -11,6 +11,7 @@ java_files += WaveWriter.java | @@ -11,6 +11,7 @@ java_files += WaveWriter.java | ||
| 11 | java_files += EndpointRule.java | 11 | java_files += EndpointRule.java |
| 12 | java_files += EndpointConfig.java | 12 | java_files += EndpointConfig.java |
| 13 | java_files += FeatureConfig.java | 13 | java_files += FeatureConfig.java |
| 14 | +java_files += HomophoneReplacerConfig.java | ||
| 14 | java_files += OnlineLMConfig.java | 15 | java_files += OnlineLMConfig.java |
| 15 | java_files += OnlineParaformerModelConfig.java | 16 | java_files += OnlineParaformerModelConfig.java |
| 16 | java_files += OnlineZipformer2CtcModelConfig.java | 17 | java_files += OnlineZipformer2CtcModelConfig.java |
| 1 | +// Copyright 2025 Xiaomi Corporation | ||
| 2 | + | ||
| 3 | +package com.k2fsa.sherpa.onnx; | ||
| 4 | + | ||
| 5 | +public class HomophoneReplacerConfig { | ||
| 6 | + private final String dictDir; | ||
| 7 | + private final String lexicon; | ||
| 8 | + private final String ruleFsts; | ||
| 9 | + | ||
| 10 | + private HomophoneReplacerConfig(Builder builder) { | ||
| 11 | + this.dictDir = builder.dictDir; | ||
| 12 | + this.lexicon = builder.lexicon; | ||
| 13 | + this.ruleFsts = builder.ruleFsts; | ||
| 14 | + } | ||
| 15 | + | ||
| 16 | + public static Builder builder() { | ||
| 17 | + return new Builder(); | ||
| 18 | + } | ||
| 19 | + | ||
| 20 | + public String getDictDir() { | ||
| 21 | + return dictDir; | ||
| 22 | + } | ||
| 23 | + | ||
| 24 | + public String getLexicon() { | ||
| 25 | + return lexicon; | ||
| 26 | + } | ||
| 27 | + | ||
| 28 | + public String getRuleFsts() { | ||
| 29 | + return ruleFsts; | ||
| 30 | + } | ||
| 31 | + | ||
| 32 | + public static class Builder { | ||
| 33 | + private String dictDir = ""; | ||
| 34 | + private String lexicon = ""; | ||
| 35 | + private String ruleFsts = ""; | ||
| 36 | + | ||
| 37 | + public HomophoneReplacerConfig build() { | ||
| 38 | + return new HomophoneReplacerConfig(this); | ||
| 39 | + } | ||
| 40 | + | ||
| 41 | + public Builder setDictDir(String dictDir) { | ||
| 42 | + this.dictDir = dictDir; | ||
| 43 | + return this; | ||
| 44 | + } | ||
| 45 | + | ||
| 46 | + public Builder setLexicon(String lexicon) { | ||
| 47 | + this.lexicon = lexicon; | ||
| 48 | + return this; | ||
| 49 | + } | ||
| 50 | + | ||
| 51 | + public Builder setRuleFsts(String ruleFsts) { | ||
| 52 | + this.ruleFsts = ruleFsts; | ||
| 53 | + return this; | ||
| 54 | + } | ||
| 55 | + } | ||
| 56 | +} |
| @@ -5,6 +5,7 @@ package com.k2fsa.sherpa.onnx; | @@ -5,6 +5,7 @@ package com.k2fsa.sherpa.onnx; | ||
| 5 | public class OfflineRecognizerConfig { | 5 | public class OfflineRecognizerConfig { |
| 6 | private final FeatureConfig featConfig; | 6 | private final FeatureConfig featConfig; |
| 7 | private final OfflineModelConfig modelConfig; | 7 | private final OfflineModelConfig modelConfig; |
| 8 | + private final HomophoneReplacerConfig hr; | ||
| 8 | private final String decodingMethod; | 9 | private final String decodingMethod; |
| 9 | private final int maxActivePaths; | 10 | private final int maxActivePaths; |
| 10 | private final String hotwordsFile; | 11 | private final String hotwordsFile; |
| @@ -16,6 +17,7 @@ public class OfflineRecognizerConfig { | @@ -16,6 +17,7 @@ public class OfflineRecognizerConfig { | ||
| 16 | private OfflineRecognizerConfig(Builder builder) { | 17 | private OfflineRecognizerConfig(Builder builder) { |
| 17 | this.featConfig = builder.featConfig; | 18 | this.featConfig = builder.featConfig; |
| 18 | this.modelConfig = builder.modelConfig; | 19 | this.modelConfig = builder.modelConfig; |
| 20 | + this.hr = builder.hr; | ||
| 19 | this.decodingMethod = builder.decodingMethod; | 21 | this.decodingMethod = builder.decodingMethod; |
| 20 | this.maxActivePaths = builder.maxActivePaths; | 22 | this.maxActivePaths = builder.maxActivePaths; |
| 21 | this.hotwordsFile = builder.hotwordsFile; | 23 | this.hotwordsFile = builder.hotwordsFile; |
| @@ -36,6 +38,7 @@ public class OfflineRecognizerConfig { | @@ -36,6 +38,7 @@ public class OfflineRecognizerConfig { | ||
| 36 | public static class Builder { | 38 | public static class Builder { |
| 37 | private FeatureConfig featConfig = FeatureConfig.builder().build(); | 39 | private FeatureConfig featConfig = FeatureConfig.builder().build(); |
| 38 | private OfflineModelConfig modelConfig = OfflineModelConfig.builder().build(); | 40 | private OfflineModelConfig modelConfig = OfflineModelConfig.builder().build(); |
| 41 | + private HomophoneReplacerConfig hr = HomophoneReplacerConfig.builder().build(); | ||
| 39 | private String decodingMethod = "greedy_search"; | 42 | private String decodingMethod = "greedy_search"; |
| 40 | private int maxActivePaths = 4; | 43 | private int maxActivePaths = 4; |
| 41 | private String hotwordsFile = ""; | 44 | private String hotwordsFile = ""; |
| @@ -58,6 +61,11 @@ public class OfflineRecognizerConfig { | @@ -58,6 +61,11 @@ public class OfflineRecognizerConfig { | ||
| 58 | return this; | 61 | return this; |
| 59 | } | 62 | } |
| 60 | 63 | ||
| 64 | + public Builder setHr(HomophoneReplacerConfig hr) { | ||
| 65 | + this.hr = hr; | ||
| 66 | + return this; | ||
| 67 | + } | ||
| 68 | + | ||
| 61 | public Builder setDecodingMethod(String decodingMethod) { | 69 | public Builder setDecodingMethod(String decodingMethod) { |
| 62 | this.decodingMethod = decodingMethod; | 70 | this.decodingMethod = decodingMethod; |
| 63 | return this; | 71 | return this; |
| @@ -10,6 +10,7 @@ public class OnlineRecognizerConfig { | @@ -10,6 +10,7 @@ public class OnlineRecognizerConfig { | ||
| 10 | 10 | ||
| 11 | private final OnlineCtcFstDecoderConfig ctcFstDecoderConfig; | 11 | private final OnlineCtcFstDecoderConfig ctcFstDecoderConfig; |
| 12 | private final EndpointConfig endpointConfig; | 12 | private final EndpointConfig endpointConfig; |
| 13 | + private final HomophoneReplacerConfig hr; | ||
| 13 | private final boolean enableEndpoint; | 14 | private final boolean enableEndpoint; |
| 14 | private final String decodingMethod; | 15 | private final String decodingMethod; |
| 15 | private final int maxActivePaths; | 16 | private final int maxActivePaths; |
| @@ -25,6 +26,7 @@ public class OnlineRecognizerConfig { | @@ -25,6 +26,7 @@ public class OnlineRecognizerConfig { | ||
| 25 | this.lmConfig = builder.lmConfig; | 26 | this.lmConfig = builder.lmConfig; |
| 26 | this.ctcFstDecoderConfig = builder.ctcFstDecoderConfig; | 27 | this.ctcFstDecoderConfig = builder.ctcFstDecoderConfig; |
| 27 | this.endpointConfig = builder.endpointConfig; | 28 | this.endpointConfig = builder.endpointConfig; |
| 29 | + this.hr = builder.hr; | ||
| 28 | this.enableEndpoint = builder.enableEndpoint; | 30 | this.enableEndpoint = builder.enableEndpoint; |
| 29 | this.decodingMethod = builder.decodingMethod; | 31 | this.decodingMethod = builder.decodingMethod; |
| 30 | this.maxActivePaths = builder.maxActivePaths; | 32 | this.maxActivePaths = builder.maxActivePaths; |
| @@ -49,6 +51,7 @@ public class OnlineRecognizerConfig { | @@ -49,6 +51,7 @@ public class OnlineRecognizerConfig { | ||
| 49 | private OnlineLMConfig lmConfig = OnlineLMConfig.builder().build(); | 51 | private OnlineLMConfig lmConfig = OnlineLMConfig.builder().build(); |
| 50 | private OnlineCtcFstDecoderConfig ctcFstDecoderConfig = OnlineCtcFstDecoderConfig.builder().build(); | 52 | private OnlineCtcFstDecoderConfig ctcFstDecoderConfig = OnlineCtcFstDecoderConfig.builder().build(); |
| 51 | private EndpointConfig endpointConfig = EndpointConfig.builder().build(); | 53 | private EndpointConfig endpointConfig = EndpointConfig.builder().build(); |
| 54 | + private HomophoneReplacerConfig hr = HomophoneReplacerConfig.builder().build(); | ||
| 52 | private boolean enableEndpoint = true; | 55 | private boolean enableEndpoint = true; |
| 53 | private String decodingMethod = "greedy_search"; | 56 | private String decodingMethod = "greedy_search"; |
| 54 | private int maxActivePaths = 4; | 57 | private int maxActivePaths = 4; |
| @@ -87,6 +90,11 @@ public class OnlineRecognizerConfig { | @@ -87,6 +90,11 @@ public class OnlineRecognizerConfig { | ||
| 87 | return this; | 90 | return this; |
| 88 | } | 91 | } |
| 89 | 92 | ||
| 93 | + public Builder setHr(HomophoneReplacerConfig hr) { | ||
| 94 | + this.hr = hr; | ||
| 95 | + return this; | ||
| 96 | + } | ||
| 97 | + | ||
| 90 | public Builder setEnableEndpoint(boolean enableEndpoint) { | 98 | public Builder setEnableEndpoint(boolean enableEndpoint) { |
| 91 | this.enableEndpoint = enableEndpoint; | 99 | this.enableEndpoint = enableEndpoint; |
| 92 | return this; | 100 | return this; |
| @@ -284,6 +284,30 @@ static OfflineRecognizerConfig GetOfflineConfig(JNIEnv *env, jobject config) { | @@ -284,6 +284,30 @@ static OfflineRecognizerConfig GetOfflineConfig(JNIEnv *env, jobject config) { | ||
| 284 | ans.model_config.telespeech_ctc = p; | 284 | ans.model_config.telespeech_ctc = p; |
| 285 | env->ReleaseStringUTFChars(s, p); | 285 | env->ReleaseStringUTFChars(s, p); |
| 286 | 286 | ||
| 287 | + // homophone replacer config | ||
| 288 | + fid = env->GetFieldID(cls, "hr", | ||
| 289 | + "Lcom/k2fsa/sherpa/onnx/HomophoneReplacerConfig;"); | ||
| 290 | + jobject hr_config = env->GetObjectField(config, fid); | ||
| 291 | + jclass hr_config_cls = env->GetObjectClass(hr_config); | ||
| 292 | + | ||
| 293 | + fid = env->GetFieldID(hr_config_cls, "dictDir", "Ljava/lang/String;"); | ||
| 294 | + s = (jstring)env->GetObjectField(hr_config, fid); | ||
| 295 | + p = env->GetStringUTFChars(s, nullptr); | ||
| 296 | + ans.hr.dict_dir = p; | ||
| 297 | + env->ReleaseStringUTFChars(s, p); | ||
| 298 | + | ||
| 299 | + fid = env->GetFieldID(hr_config_cls, "lexicon", "Ljava/lang/String;"); | ||
| 300 | + s = (jstring)env->GetObjectField(hr_config, fid); | ||
| 301 | + p = env->GetStringUTFChars(s, nullptr); | ||
| 302 | + ans.hr.lexicon = p; | ||
| 303 | + env->ReleaseStringUTFChars(s, p); | ||
| 304 | + | ||
| 305 | + fid = env->GetFieldID(hr_config_cls, "ruleFsts", "Ljava/lang/String;"); | ||
| 306 | + s = (jstring)env->GetObjectField(hr_config, fid); | ||
| 307 | + p = env->GetStringUTFChars(s, nullptr); | ||
| 308 | + ans.hr.rule_fsts = p; | ||
| 309 | + env->ReleaseStringUTFChars(s, p); | ||
| 310 | + | ||
| 287 | return ans; | 311 | return ans; |
| 288 | } | 312 | } |
| 289 | 313 |
| @@ -253,6 +253,30 @@ static OnlineRecognizerConfig GetConfig(JNIEnv *env, jobject config) { | @@ -253,6 +253,30 @@ static OnlineRecognizerConfig GetConfig(JNIEnv *env, jobject config) { | ||
| 253 | ans.ctc_fst_decoder_config.max_active = | 253 | ans.ctc_fst_decoder_config.max_active = |
| 254 | env->GetIntField(fst_decoder_config, fid); | 254 | env->GetIntField(fst_decoder_config, fid); |
| 255 | 255 | ||
| 256 | + // homophone replacer config | ||
| 257 | + fid = env->GetFieldID(cls, "hr", | ||
| 258 | + "Lcom/k2fsa/sherpa/onnx/HomophoneReplacerConfig;"); | ||
| 259 | + jobject hr_config = env->GetObjectField(config, fid); | ||
| 260 | + jclass hr_config_cls = env->GetObjectClass(hr_config); | ||
| 261 | + | ||
| 262 | + fid = env->GetFieldID(hr_config_cls, "dictDir", "Ljava/lang/String;"); | ||
| 263 | + s = (jstring)env->GetObjectField(hr_config, fid); | ||
| 264 | + p = env->GetStringUTFChars(s, nullptr); | ||
| 265 | + ans.hr.dict_dir = p; | ||
| 266 | + env->ReleaseStringUTFChars(s, p); | ||
| 267 | + | ||
| 268 | + fid = env->GetFieldID(hr_config_cls, "lexicon", "Ljava/lang/String;"); | ||
| 269 | + s = (jstring)env->GetObjectField(hr_config, fid); | ||
| 270 | + p = env->GetStringUTFChars(s, nullptr); | ||
| 271 | + ans.hr.lexicon = p; | ||
| 272 | + env->ReleaseStringUTFChars(s, p); | ||
| 273 | + | ||
| 274 | + fid = env->GetFieldID(hr_config_cls, "ruleFsts", "Ljava/lang/String;"); | ||
| 275 | + s = (jstring)env->GetObjectField(hr_config, fid); | ||
| 276 | + p = env->GetStringUTFChars(s, nullptr); | ||
| 277 | + ans.hr.rule_fsts = p; | ||
| 278 | + env->ReleaseStringUTFChars(s, p); | ||
| 279 | + | ||
| 256 | return ans; | 280 | return ans; |
| 257 | } | 281 | } |
| 258 | } // namespace sherpa_onnx | 282 | } // namespace sherpa_onnx |
| @@ -78,6 +78,7 @@ data class OfflineRecognizerConfig( | @@ -78,6 +78,7 @@ data class OfflineRecognizerConfig( | ||
| 78 | var featConfig: FeatureConfig = FeatureConfig(), | 78 | var featConfig: FeatureConfig = FeatureConfig(), |
| 79 | var modelConfig: OfflineModelConfig = OfflineModelConfig(), | 79 | var modelConfig: OfflineModelConfig = OfflineModelConfig(), |
| 80 | // var lmConfig: OfflineLMConfig(), // TODO(fangjun): enable it | 80 | // var lmConfig: OfflineLMConfig(), // TODO(fangjun): enable it |
| 81 | + var hr: HomophoneReplacerConfig = HomophoneReplacerConfig(), | ||
| 81 | var decodingMethod: String = "greedy_search", | 82 | var decodingMethod: String = "greedy_search", |
| 82 | var maxActivePaths: Int = 4, | 83 | var maxActivePaths: Int = 4, |
| 83 | var hotwordsFile: String = "", | 84 | var hotwordsFile: String = "", |
| @@ -57,12 +57,12 @@ data class OnlineCtcFstDecoderConfig( | @@ -57,12 +57,12 @@ data class OnlineCtcFstDecoderConfig( | ||
| 57 | var maxActive: Int = 3000, | 57 | var maxActive: Int = 3000, |
| 58 | ) | 58 | ) |
| 59 | 59 | ||
| 60 | - | ||
| 61 | data class OnlineRecognizerConfig( | 60 | data class OnlineRecognizerConfig( |
| 62 | var featConfig: FeatureConfig = FeatureConfig(), | 61 | var featConfig: FeatureConfig = FeatureConfig(), |
| 63 | var modelConfig: OnlineModelConfig = OnlineModelConfig(), | 62 | var modelConfig: OnlineModelConfig = OnlineModelConfig(), |
| 64 | var lmConfig: OnlineLMConfig = OnlineLMConfig(), | 63 | var lmConfig: OnlineLMConfig = OnlineLMConfig(), |
| 65 | var ctcFstDecoderConfig: OnlineCtcFstDecoderConfig = OnlineCtcFstDecoderConfig(), | 64 | var ctcFstDecoderConfig: OnlineCtcFstDecoderConfig = OnlineCtcFstDecoderConfig(), |
| 65 | + var hr: HomophoneReplacerConfig = HomophoneReplacerConfig(), | ||
| 66 | var endpointConfig: EndpointConfig = EndpointConfig(), | 66 | var endpointConfig: EndpointConfig = EndpointConfig(), |
| 67 | var enableEndpoint: Boolean = true, | 67 | var enableEndpoint: Boolean = true, |
| 68 | var decodingMethod: String = "greedy_search", | 68 | var decodingMethod: String = "greedy_search", |
-
请 注册 或 登录 后发表评论