Committed by
GitHub
Add Kotlin and Java API for online punctuation models (#1936)
正在显示
16 个修改的文件
包含
474 行增加
和
13 行删除
| @@ -105,6 +105,22 @@ jobs: | @@ -105,6 +105,22 @@ jobs: | ||
| 105 | make -j4 | 105 | make -j4 |
| 106 | ls -lh lib | 106 | ls -lh lib |
| 107 | 107 | ||
| 108 | + - name: Run java test (Online add punctuations) | ||
| 109 | + shell: bash | ||
| 110 | + run: | | ||
| 111 | + cd ./java-api-examples | ||
| 112 | + ./run-online-add-punctuation-zh-en.sh | ||
| 113 | + # Delete model files to save space | ||
| 114 | + rm -rf sherpa-onnx-online-* | ||
| 115 | + | ||
| 116 | + - name: Run java test (Offline add punctuations) | ||
| 117 | + shell: bash | ||
| 118 | + run: | | ||
| 119 | + cd ./java-api-examples | ||
| 120 | + ./run-offline-add-punctuation-zh-en.sh | ||
| 121 | + # Delete model files to save space | ||
| 122 | + rm -rf sherpa-onnx-punct-* | ||
| 123 | + | ||
| 108 | - name: Run java test (Non-Streaming ASR) | 124 | - name: Run java test (Non-Streaming ASR) |
| 109 | shell: bash | 125 | shell: bash |
| 110 | run: | | 126 | run: | |
| @@ -196,13 +212,6 @@ jobs: | @@ -196,13 +212,6 @@ jobs: | ||
| 196 | ./run-audio-tagging-ced-from-file.sh | 212 | ./run-audio-tagging-ced-from-file.sh |
| 197 | rm -rf sherpa-onnx-ced-* | 213 | rm -rf sherpa-onnx-ced-* |
| 198 | 214 | ||
| 199 | - - name: Run java test (add punctuations) | ||
| 200 | - shell: bash | ||
| 201 | - run: | | ||
| 202 | - cd ./java-api-examples | ||
| 203 | - ./run-add-punctuation-zh-en.sh | ||
| 204 | - # Delete model files to save space | ||
| 205 | - rm -rf sherpa-onnx-punct-* | ||
| 206 | 215 | ||
| 207 | - name: Run java test (Spoken language identification) | 216 | - name: Run java test (Spoken language identification) |
| 208 | shell: bash | 217 | shell: bash |
| @@ -5,7 +5,7 @@ | @@ -5,7 +5,7 @@ | ||
| 5 | // The model supports both English and Chinese. | 5 | // The model supports both English and Chinese. |
| 6 | import com.k2fsa.sherpa.onnx.*; | 6 | import com.k2fsa.sherpa.onnx.*; |
| 7 | 7 | ||
| 8 | -public class AddPunctuation { | 8 | +public class OfflineAddPunctuation { |
| 9 | public static void main(String[] args) { | 9 | public static void main(String[] args) { |
| 10 | // please download the model from | 10 | // please download the model from |
| 11 | // https://github.com/k2-fsa/sherpa-onnx/releases/tag/punctuation-models | 11 | // https://github.com/k2-fsa/sherpa-onnx/releases/tag/punctuation-models |
java-api-examples/OnlineAddPunctuation.java
0 → 100644
| 1 | +// Copyright 2025 Xiaomi Corporation | ||
| 2 | + | ||
| 3 | +// This file shows how to use a punctuation model to add punctuations to text. | ||
| 4 | +// | ||
| 5 | +// The model supports ONLY English. | ||
| 6 | +import com.k2fsa.sherpa.onnx.*; | ||
| 7 | + | ||
| 8 | +public class OnlineAddPunctuation { | ||
| 9 | + public static void main(String[] args) { | ||
| 10 | + // please download the model from | ||
| 11 | + // https://github.com/k2-fsa/sherpa-onnx/releases/download/punctuation-models/sherpa-onnx-online-punct-en-2024-08-06.tar.bz2 | ||
| 12 | + String model = "./sherpa-onnx-online-punct-en-2024-08-06/model.int8.onnx"; | ||
| 13 | + String bpeVocab = "./sherpa-onnx-online-punct-en-2024-08-06/bpe.vocab"; | ||
| 14 | + OnlinePunctuationModelConfig modelConfig = | ||
| 15 | + OnlinePunctuationModelConfig.builder() | ||
| 16 | + .setCnnBilstm(model) | ||
| 17 | + .setBpeVocab(bpeVocab) | ||
| 18 | + .setNumThreads(1) | ||
| 19 | + .setDebug(true) | ||
| 20 | + .build(); | ||
| 21 | + OnlinePunctuationConfig config = | ||
| 22 | + OnlinePunctuationConfig.builder().setModel(modelConfig).build(); | ||
| 23 | + | ||
| 24 | + OnlinePunctuation punct = new OnlinePunctuation(config); | ||
| 25 | + | ||
| 26 | + String[] sentences = | ||
| 27 | + new String[] { | ||
| 28 | + "how are you doing fantastic thank you how about you", | ||
| 29 | + "The African blogosphere is rapidly expanding bringing more voices online in the form of" | ||
| 30 | + + " commentaries opinions analyses rants and poetry", | ||
| 31 | + }; | ||
| 32 | + | ||
| 33 | + System.out.println("---"); | ||
| 34 | + for (String text : sentences) { | ||
| 35 | + String out = punct.addPunctuation(text); | ||
| 36 | + System.out.printf("Input: %s\n", text); | ||
| 37 | + System.out.printf("Output: %s\n", out); | ||
| 38 | + System.out.println("---"); | ||
| 39 | + } | ||
| 40 | + } | ||
| 41 | +} |
| @@ -34,4 +34,4 @@ fi | @@ -34,4 +34,4 @@ fi | ||
| 34 | java \ | 34 | java \ |
| 35 | -Djava.library.path=$PWD/../build/lib \ | 35 | -Djava.library.path=$PWD/../build/lib \ |
| 36 | -cp ../sherpa-onnx/java-api/build/sherpa-onnx.jar \ | 36 | -cp ../sherpa-onnx/java-api/build/sherpa-onnx.jar \ |
| 37 | - ./AddPunctuation.java | 37 | + ./OfflineAddPunctuation.java |
| 1 | +#!/usr/bin/env bash | ||
| 2 | + | ||
| 3 | +set -ex | ||
| 4 | + | ||
| 5 | +if [[ ! -f ../build/lib/libsherpa-onnx-jni.dylib && ! -f ../build/lib/libsherpa-onnx-jni.so ]]; then | ||
| 6 | + mkdir -p ../build | ||
| 7 | + pushd ../build | ||
| 8 | + cmake \ | ||
| 9 | + -DSHERPA_ONNX_ENABLE_PYTHON=OFF \ | ||
| 10 | + -DSHERPA_ONNX_ENABLE_TESTS=OFF \ | ||
| 11 | + -DSHERPA_ONNX_ENABLE_CHECK=OFF \ | ||
| 12 | + -DBUILD_SHARED_LIBS=ON \ | ||
| 13 | + -DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \ | ||
| 14 | + -DSHERPA_ONNX_ENABLE_JNI=ON \ | ||
| 15 | + .. | ||
| 16 | + | ||
| 17 | + make -j4 | ||
| 18 | + ls -lh lib | ||
| 19 | + popd | ||
| 20 | +fi | ||
| 21 | + | ||
| 22 | +if [ ! -f ../sherpa-onnx/java-api/build/sherpa-onnx.jar ]; then | ||
| 23 | + pushd ../sherpa-onnx/java-api | ||
| 24 | + make | ||
| 25 | + popd | ||
| 26 | +fi | ||
| 27 | + | ||
| 28 | +if [ ! -f ./sherpa-onnx-online-punct-en-2024-08-06/model.int8.onnx ]; then | ||
| 29 | + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/punctuation-models/sherpa-onnx-online-punct-en-2024-08-06.tar.bz2 | ||
| 30 | + tar xvf sherpa-onnx-online-punct-en-2024-08-06.tar.bz2 | ||
| 31 | + rm sherpa-onnx-online-punct-en-2024-08-06.tar.bz2 | ||
| 32 | +fi | ||
| 33 | + | ||
| 34 | +java \ | ||
| 35 | + -Djava.library.path=$PWD/../build/lib \ | ||
| 36 | + -cp ../sherpa-onnx/java-api/build/sherpa-onnx.jar \ | ||
| 37 | + ./OnlineAddPunctuation.java |
kotlin-api-examples/OnlinePunctuation.kt
0 → 120000
| 1 | +../sherpa-onnx/kotlin-api/OnlinePunctuation.kt |
| @@ -302,16 +302,16 @@ function testInverseTextNormalizationOnlineAsr() { | @@ -302,16 +302,16 @@ function testInverseTextNormalizationOnlineAsr() { | ||
| 302 | java -Djava.library.path=../build/lib -jar $out_filename | 302 | java -Djava.library.path=../build/lib -jar $out_filename |
| 303 | } | 303 | } |
| 304 | 304 | ||
| 305 | -function testPunctuation() { | 305 | +function testOfflinePunctuation() { |
| 306 | if [ ! -f ./sherpa-onnx-punct-ct-transformer-zh-en-vocab272727-2024-04-12/model.onnx ]; then | 306 | if [ ! -f ./sherpa-onnx-punct-ct-transformer-zh-en-vocab272727-2024-04-12/model.onnx ]; then |
| 307 | curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/punctuation-models/sherpa-onnx-punct-ct-transformer-zh-en-vocab272727-2024-04-12.tar.bz2 | 307 | curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/punctuation-models/sherpa-onnx-punct-ct-transformer-zh-en-vocab272727-2024-04-12.tar.bz2 |
| 308 | tar xvf sherpa-onnx-punct-ct-transformer-zh-en-vocab272727-2024-04-12.tar.bz2 | 308 | tar xvf sherpa-onnx-punct-ct-transformer-zh-en-vocab272727-2024-04-12.tar.bz2 |
| 309 | rm sherpa-onnx-punct-ct-transformer-zh-en-vocab272727-2024-04-12.tar.bz2 | 309 | rm sherpa-onnx-punct-ct-transformer-zh-en-vocab272727-2024-04-12.tar.bz2 |
| 310 | fi | 310 | fi |
| 311 | 311 | ||
| 312 | - out_filename=test_punctuation.jar | 312 | + out_filename=test_offline_punctuation.jar |
| 313 | kotlinc-jvm -include-runtime -d $out_filename \ | 313 | kotlinc-jvm -include-runtime -d $out_filename \ |
| 314 | - ./test_punctuation.kt \ | 314 | + ./test_offline_punctuation.kt \ |
| 315 | ./OfflinePunctuation.kt \ | 315 | ./OfflinePunctuation.kt \ |
| 316 | faked-asset-manager.kt \ | 316 | faked-asset-manager.kt \ |
| 317 | faked-log.kt | 317 | faked-log.kt |
| @@ -321,6 +321,25 @@ function testPunctuation() { | @@ -321,6 +321,25 @@ function testPunctuation() { | ||
| 321 | java -Djava.library.path=../build/lib -jar $out_filename | 321 | java -Djava.library.path=../build/lib -jar $out_filename |
| 322 | } | 322 | } |
| 323 | 323 | ||
| 324 | +function testOnlinePunctuation() { | ||
| 325 | + if [ ! -f ./sherpa-onnx-online-punct-en-2024-08-06/model.int8.onnx ]; then | ||
| 326 | + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/punctuation-models/sherpa-onnx-online-punct-en-2024-08-06.tar.bz2 | ||
| 327 | + tar xvf sherpa-onnx-online-punct-en-2024-08-06.tar.bz2 | ||
| 328 | + rm sherpa-onnx-online-punct-en-2024-08-06.tar.bz2 | ||
| 329 | + fi | ||
| 330 | + | ||
| 331 | + out_filename=test_online_punctuation.jar | ||
| 332 | + kotlinc-jvm -include-runtime -d $out_filename \ | ||
| 333 | + ./test_online_punctuation.kt \ | ||
| 334 | + ./OnlinePunctuation.kt \ | ||
| 335 | + faked-asset-manager.kt \ | ||
| 336 | + faked-log.kt | ||
| 337 | + | ||
| 338 | + ls -lh $out_filename | ||
| 339 | + | ||
| 340 | + java -Djava.library.path=../build/lib -jar $out_filename | ||
| 341 | +} | ||
| 342 | + | ||
| 324 | function testOfflineSpeakerDiarization() { | 343 | function testOfflineSpeakerDiarization() { |
| 325 | if [ ! -f ./sherpa-onnx-pyannote-segmentation-3-0/model.onnx ]; then | 344 | if [ ! -f ./sherpa-onnx-pyannote-segmentation-3-0/model.onnx ]; then |
| 326 | curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-segmentation-models/sherpa-onnx-pyannote-segmentation-3-0.tar.bz2 | 345 | curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-segmentation-models/sherpa-onnx-pyannote-segmentation-3-0.tar.bz2 |
| @@ -359,6 +378,7 @@ testTts | @@ -359,6 +378,7 @@ testTts | ||
| 359 | testAudioTagging | 378 | testAudioTagging |
| 360 | testSpokenLanguageIdentification | 379 | testSpokenLanguageIdentification |
| 361 | testOfflineAsr | 380 | testOfflineAsr |
| 362 | -testPunctuation | 381 | +testOfflinePunctuation |
| 382 | +testOnlinePunctuation | ||
| 363 | testInverseTextNormalizationOfflineAsr | 383 | testInverseTextNormalizationOfflineAsr |
| 364 | testInverseTextNormalizationOnlineAsr | 384 | testInverseTextNormalizationOnlineAsr |
| 1 | +package com.k2fsa.sherpa.onnx | ||
| 2 | + | ||
| 3 | +fun main() { | ||
| 4 | + testPunctuation() | ||
| 5 | +} | ||
| 6 | + | ||
| 7 | +// https://github.com/k2-fsa/sherpa-onnx/releases/download/punctuation-models/sherpa-onnx-online-punct-en-2024-08-06.tar.bz2 | ||
| 8 | +fun testPunctuation() { | ||
| 9 | + val config = OnlinePunctuationConfig( | ||
| 10 | + model=OnlinePunctuationModelConfig( | ||
| 11 | + cnnBilstm="./sherpa-onnx-online-punct-en-2024-08-06/model.int8.onnx", | ||
| 12 | + bpeVocab="./sherpa-onnx-online-punct-en-2024-08-06/bpe.vocab", | ||
| 13 | + numThreads=1, | ||
| 14 | + debug=true, | ||
| 15 | + provider="cpu", | ||
| 16 | + ) | ||
| 17 | + ) | ||
| 18 | + val punct = OnlinePunctuation(config = config) | ||
| 19 | + val sentences = arrayOf( | ||
| 20 | + "how are you doing fantastic thank you what is about you", | ||
| 21 | + "The African blogosphere is rapidly expanding bringing more voices online in the form of commentaries opinions analyses rants and poetry", | ||
| 22 | + ) | ||
| 23 | + println("---") | ||
| 24 | + for (text in sentences) { | ||
| 25 | + val out = punct.addPunctuation(text) | ||
| 26 | + println("Input: $text") | ||
| 27 | + println("Output: $out") | ||
| 28 | + println("---") | ||
| 29 | + } | ||
| 30 | +} |
| @@ -53,6 +53,10 @@ java_files += OfflinePunctuationModelConfig.java | @@ -53,6 +53,10 @@ java_files += OfflinePunctuationModelConfig.java | ||
| 53 | java_files += OfflinePunctuationConfig.java | 53 | java_files += OfflinePunctuationConfig.java |
| 54 | java_files += OfflinePunctuation.java | 54 | java_files += OfflinePunctuation.java |
| 55 | 55 | ||
| 56 | +java_files += OnlinePunctuationModelConfig.java | ||
| 57 | +java_files += OnlinePunctuationConfig.java | ||
| 58 | +java_files += OnlinePunctuation.java | ||
| 59 | + | ||
| 56 | java_files += OfflineZipformerAudioTaggingModelConfig.java | 60 | java_files += OfflineZipformerAudioTaggingModelConfig.java |
| 57 | java_files += AudioTaggingModelConfig.java | 61 | java_files += AudioTaggingModelConfig.java |
| 58 | java_files += AudioTaggingConfig.java | 62 | java_files += AudioTaggingConfig.java |
| 1 | +// Copyright 2024 Xiaomi Corporation | ||
| 2 | + | ||
| 3 | +package com.k2fsa.sherpa.onnx; | ||
| 4 | + | ||
| 5 | +public class OnlinePunctuation { | ||
| 6 | + static { | ||
| 7 | + System.loadLibrary("sherpa-onnx-jni"); | ||
| 8 | + } | ||
| 9 | + | ||
| 10 | + private long ptr = 0; | ||
| 11 | + | ||
| 12 | + public OnlinePunctuation(OnlinePunctuationConfig config) { | ||
| 13 | + ptr = newFromFile(config); | ||
| 14 | + } | ||
| 15 | + | ||
| 16 | + public String addPunctuation(String text) { | ||
| 17 | + return addPunctuation(ptr, text); | ||
| 18 | + } | ||
| 19 | + | ||
| 20 | + @Override | ||
| 21 | + protected void finalize() throws Throwable { | ||
| 22 | + release(); | ||
| 23 | + } | ||
| 24 | + | ||
| 25 | + // You'd better call it manually if it is not used anymore | ||
| 26 | + public void release() { | ||
| 27 | + if (this.ptr == 0) { | ||
| 28 | + return; | ||
| 29 | + } | ||
| 30 | + delete(this.ptr); | ||
| 31 | + this.ptr = 0; | ||
| 32 | + } | ||
| 33 | + | ||
| 34 | + private native void delete(long ptr); | ||
| 35 | + | ||
| 36 | + private native long newFromFile(OnlinePunctuationConfig config); | ||
| 37 | + | ||
| 38 | + private native String addPunctuation(long ptr, String text); | ||
| 39 | +} |
| 1 | +// Copyright 2024 Xiaomi Corporation | ||
| 2 | + | ||
| 3 | +package com.k2fsa.sherpa.onnx; | ||
| 4 | + | ||
| 5 | +public class OnlinePunctuationConfig { | ||
| 6 | + private final OnlinePunctuationModelConfig model; | ||
| 7 | + | ||
| 8 | + private OnlinePunctuationConfig(Builder builder) { | ||
| 9 | + this.model = builder.model; | ||
| 10 | + } | ||
| 11 | + | ||
| 12 | + public static Builder builder() { | ||
| 13 | + return new Builder(); | ||
| 14 | + } | ||
| 15 | + | ||
| 16 | + public OnlinePunctuationModelConfig getModel() { | ||
| 17 | + return model; | ||
| 18 | + } | ||
| 19 | + | ||
| 20 | + | ||
| 21 | + public static class Builder { | ||
| 22 | + private OnlinePunctuationModelConfig model = OnlinePunctuationModelConfig.builder().build(); | ||
| 23 | + | ||
| 24 | + public OnlinePunctuationConfig build() { | ||
| 25 | + return new OnlinePunctuationConfig(this); | ||
| 26 | + } | ||
| 27 | + | ||
| 28 | + public Builder setModel(OnlinePunctuationModelConfig model) { | ||
| 29 | + this.model = model; | ||
| 30 | + return this; | ||
| 31 | + } | ||
| 32 | + } | ||
| 33 | +} |
| 1 | +// Copyright 2024 Xiaomi Corporation | ||
| 2 | + | ||
| 3 | +package com.k2fsa.sherpa.onnx; | ||
| 4 | + | ||
| 5 | +public class OnlinePunctuationModelConfig { | ||
| 6 | + private final String cnnBilstm; | ||
| 7 | + private final String bpeVocab; | ||
| 8 | + private final int numThreads; | ||
| 9 | + private final boolean debug; | ||
| 10 | + private final String provider; | ||
| 11 | + | ||
| 12 | + private OnlinePunctuationModelConfig(Builder builder) { | ||
| 13 | + this.cnnBilstm = builder.cnnBilstm; | ||
| 14 | + this.bpeVocab = builder.bpeVocab; | ||
| 15 | + this.numThreads = builder.numThreads; | ||
| 16 | + this.debug = builder.debug; | ||
| 17 | + this.provider = builder.provider; | ||
| 18 | + } | ||
| 19 | + | ||
| 20 | + public static Builder builder() { | ||
| 21 | + return new Builder(); | ||
| 22 | + } | ||
| 23 | + | ||
| 24 | + public String getCnnBilstm() { | ||
| 25 | + return cnnBilstm; | ||
| 26 | + } | ||
| 27 | + | ||
| 28 | + public String getBpeVocab() { | ||
| 29 | + return bpeVocab; | ||
| 30 | + } | ||
| 31 | + | ||
| 32 | + public static class Builder { | ||
| 33 | + private String cnnBilstm = ""; | ||
| 34 | + private String bpeVocab = ""; | ||
| 35 | + private int numThreads = 1; | ||
| 36 | + private boolean debug = true; | ||
| 37 | + private String provider = "cpu"; | ||
| 38 | + | ||
| 39 | + public OnlinePunctuationModelConfig build() { | ||
| 40 | + return new OnlinePunctuationModelConfig(this); | ||
| 41 | + } | ||
| 42 | + | ||
| 43 | + public Builder setCnnBilstm(String cnnBilstm) { | ||
| 44 | + this.cnnBilstm = cnnBilstm; | ||
| 45 | + return this; | ||
| 46 | + } | ||
| 47 | + | ||
| 48 | + public Builder setBpeVocab(String bpeVocab) { | ||
| 49 | + this.bpeVocab = bpeVocab; | ||
| 50 | + return this; | ||
| 51 | + } | ||
| 52 | + | ||
| 53 | + public Builder setNumThreads(int numThreads) { | ||
| 54 | + this.numThreads = numThreads; | ||
| 55 | + return this; | ||
| 56 | + } | ||
| 57 | + | ||
| 58 | + public Builder setDebug(boolean debug) { | ||
| 59 | + this.debug = debug; | ||
| 60 | + return this; | ||
| 61 | + } | ||
| 62 | + | ||
| 63 | + public Builder setProvider(String provider) { | ||
| 64 | + this.provider = provider; | ||
| 65 | + return this; | ||
| 66 | + } | ||
| 67 | + } | ||
| 68 | +} |
| @@ -17,6 +17,7 @@ set(sources | @@ -17,6 +17,7 @@ set(sources | ||
| 17 | offline-punctuation.cc | 17 | offline-punctuation.cc |
| 18 | offline-recognizer.cc | 18 | offline-recognizer.cc |
| 19 | offline-stream.cc | 19 | offline-stream.cc |
| 20 | + online-punctuation.cc | ||
| 20 | online-recognizer.cc | 21 | online-recognizer.cc |
| 21 | online-stream.cc | 22 | online-stream.cc |
| 22 | speaker-embedding-extractor.cc | 23 | speaker-embedding-extractor.cc |
sherpa-onnx/jni/online-punctuation.cc
0 → 100644
| 1 | +// sherpa-onnx/jni/online-punctuation.cc | ||
| 2 | +// | ||
| 3 | +// Copyright (c) 2025 Xiaomi Corporation | ||
| 4 | + | ||
| 5 | +#include "sherpa-onnx/csrc/online-punctuation.h" | ||
| 6 | + | ||
| 7 | +#include "sherpa-onnx/csrc/macros.h" | ||
| 8 | +#include "sherpa-onnx/jni/common.h" | ||
| 9 | + | ||
| 10 | +namespace sherpa_onnx { | ||
| 11 | + | ||
| 12 | +static OnlinePunctuationConfig GetOnlinePunctuationConfig(JNIEnv *env, | ||
| 13 | + jobject config) { | ||
| 14 | + OnlinePunctuationConfig ans; | ||
| 15 | + | ||
| 16 | + jclass cls = env->GetObjectClass(config); | ||
| 17 | + jfieldID fid; | ||
| 18 | + | ||
| 19 | + fid = env->GetFieldID(cls, "model", | ||
| 20 | + "Lcom/k2fsa/sherpa/onnx/OnlinePunctuationModelConfig;"); | ||
| 21 | + jobject model_config = env->GetObjectField(config, fid); | ||
| 22 | + jclass model_config_cls = env->GetObjectClass(model_config); | ||
| 23 | + | ||
| 24 | + fid = env->GetFieldID(model_config_cls, "cnnBilstm", "Ljava/lang/String;"); | ||
| 25 | + jstring s = (jstring)env->GetObjectField(model_config, fid); | ||
| 26 | + const char *p = env->GetStringUTFChars(s, nullptr); | ||
| 27 | + ans.model.cnn_bilstm = p; | ||
| 28 | + env->ReleaseStringUTFChars(s, p); | ||
| 29 | + | ||
| 30 | + fid = env->GetFieldID(model_config_cls, "bpeVocab", "Ljava/lang/String;"); | ||
| 31 | + s = (jstring)env->GetObjectField(model_config, fid); | ||
| 32 | + p = env->GetStringUTFChars(s, nullptr); | ||
| 33 | + ans.model.bpe_vocab = p; | ||
| 34 | + env->ReleaseStringUTFChars(s, p); | ||
| 35 | + | ||
| 36 | + fid = env->GetFieldID(model_config_cls, "numThreads", "I"); | ||
| 37 | + ans.model.num_threads = env->GetIntField(model_config, fid); | ||
| 38 | + | ||
| 39 | + fid = env->GetFieldID(model_config_cls, "debug", "Z"); | ||
| 40 | + ans.model.debug = env->GetBooleanField(model_config, fid); | ||
| 41 | + | ||
| 42 | + fid = env->GetFieldID(model_config_cls, "provider", "Ljava/lang/String;"); | ||
| 43 | + s = (jstring)env->GetObjectField(model_config, fid); | ||
| 44 | + p = env->GetStringUTFChars(s, nullptr); | ||
| 45 | + ans.model.provider = p; | ||
| 46 | + env->ReleaseStringUTFChars(s, p); | ||
| 47 | + | ||
| 48 | + return ans; | ||
| 49 | +} | ||
| 50 | + | ||
| 51 | +} // namespace sherpa_onnx | ||
| 52 | + | ||
| 53 | +SHERPA_ONNX_EXTERN_C | ||
| 54 | +JNIEXPORT jlong JNICALL | ||
| 55 | +Java_com_k2fsa_sherpa_onnx_OnlinePunctuation_newFromAsset(JNIEnv *env, | ||
| 56 | + jobject /*obj*/, | ||
| 57 | + jobject asset_manager, | ||
| 58 | + jobject _config) { | ||
| 59 | +#if __ANDROID_API__ >= 9 | ||
| 60 | + AAssetManager *mgr = AAssetManager_fromJava(env, asset_manager); | ||
| 61 | + if (!mgr) { | ||
| 62 | + SHERPA_ONNX_LOGE("Failed to get asset manager: %p", mgr); | ||
| 63 | + return 0; | ||
| 64 | + } | ||
| 65 | +#endif | ||
| 66 | + auto config = sherpa_onnx::GetOnlinePunctuationConfig(env, _config); | ||
| 67 | + SHERPA_ONNX_LOGE("config:\n%s", config.ToString().c_str()); | ||
| 68 | + | ||
| 69 | + auto model = new sherpa_onnx::OnlinePunctuation( | ||
| 70 | +#if __ANDROID_API__ >= 9 | ||
| 71 | + mgr, | ||
| 72 | +#endif | ||
| 73 | + config); | ||
| 74 | + | ||
| 75 | + return (jlong)model; | ||
| 76 | +} | ||
| 77 | + | ||
| 78 | +SHERPA_ONNX_EXTERN_C | ||
| 79 | +JNIEXPORT jlong JNICALL | ||
| 80 | +Java_com_k2fsa_sherpa_onnx_OnlinePunctuation_newFromFile(JNIEnv *env, | ||
| 81 | + jobject /*obj*/, | ||
| 82 | + jobject _config) { | ||
| 83 | + auto config = sherpa_onnx::GetOnlinePunctuationConfig(env, _config); | ||
| 84 | + SHERPA_ONNX_LOGE("config:\n%s", config.ToString().c_str()); | ||
| 85 | + | ||
| 86 | + if (!config.Validate()) { | ||
| 87 | + SHERPA_ONNX_LOGE("Errors found in config!"); | ||
| 88 | + return 0; | ||
| 89 | + } | ||
| 90 | + | ||
| 91 | + auto model = new sherpa_onnx::OnlinePunctuation(config); | ||
| 92 | + | ||
| 93 | + return (jlong)model; | ||
| 94 | +} | ||
| 95 | + | ||
| 96 | +SHERPA_ONNX_EXTERN_C | ||
| 97 | +JNIEXPORT void JNICALL Java_com_k2fsa_sherpa_onnx_OnlinePunctuation_delete( | ||
| 98 | + JNIEnv * /*env*/, jobject /*obj*/, jlong ptr) { | ||
| 99 | + delete reinterpret_cast<sherpa_onnx::OnlinePunctuation *>(ptr); | ||
| 100 | +} | ||
| 101 | + | ||
| 102 | +SHERPA_ONNX_EXTERN_C | ||
| 103 | +JNIEXPORT jstring JNICALL | ||
| 104 | +Java_com_k2fsa_sherpa_onnx_OnlinePunctuation_addPunctuation(JNIEnv *env, | ||
| 105 | + jobject /*obj*/, | ||
| 106 | + jlong ptr, | ||
| 107 | + jstring text) { | ||
| 108 | + auto punct = reinterpret_cast<const sherpa_onnx::OnlinePunctuation *>(ptr); | ||
| 109 | + | ||
| 110 | + const char *ptext = env->GetStringUTFChars(text, nullptr); | ||
| 111 | + | ||
| 112 | + std::string result = punct->AddPunctuationWithCase(ptext); | ||
| 113 | + | ||
| 114 | + env->ReleaseStringUTFChars(text, ptext); | ||
| 115 | + | ||
| 116 | + return env->NewStringUTF(result.c_str()); | ||
| 117 | +} |
sherpa-onnx/kotlin-api/OnlinePunctuation.kt
0 → 100644
| 1 | +package com.k2fsa.sherpa.onnx | ||
| 2 | + | ||
| 3 | +import android.content.res.AssetManager | ||
| 4 | + | ||
| 5 | +data class OnlinePunctuationModelConfig( | ||
| 6 | + var cnnBilstm: String = "", | ||
| 7 | + var bpeVocab: String = "", | ||
| 8 | + var numThreads: Int = 1, | ||
| 9 | + var debug: Boolean = false, | ||
| 10 | + var provider: String = "cpu", | ||
| 11 | +) | ||
| 12 | + | ||
| 13 | + | ||
| 14 | +data class OnlinePunctuationConfig( | ||
| 15 | + var model: OnlinePunctuationModelConfig, | ||
| 16 | +) | ||
| 17 | + | ||
| 18 | +class OnlinePunctuation( | ||
| 19 | + assetManager: AssetManager? = null, | ||
| 20 | + config: OnlinePunctuationConfig, | ||
| 21 | +) { | ||
| 22 | + private var ptr: Long | ||
| 23 | + | ||
| 24 | + init { | ||
| 25 | + ptr = if (assetManager != null) { | ||
| 26 | + newFromAsset(assetManager, config) | ||
| 27 | + } else { | ||
| 28 | + newFromFile(config) | ||
| 29 | + } | ||
| 30 | + } | ||
| 31 | + | ||
| 32 | + protected fun finalize() { | ||
| 33 | + if (ptr != 0L) { | ||
| 34 | + delete(ptr) | ||
| 35 | + ptr = 0 | ||
| 36 | + } | ||
| 37 | + } | ||
| 38 | + | ||
| 39 | + fun release() = finalize() | ||
| 40 | + | ||
| 41 | + fun addPunctuation(text: String) = addPunctuation(ptr, text) | ||
| 42 | + | ||
| 43 | + private external fun delete(ptr: Long) | ||
| 44 | + | ||
| 45 | + private external fun addPunctuation(ptr: Long, text: String): String | ||
| 46 | + | ||
| 47 | + private external fun newFromAsset( | ||
| 48 | + assetManager: AssetManager, | ||
| 49 | + config: OnlinePunctuationConfig, | ||
| 50 | + ): Long | ||
| 51 | + | ||
| 52 | + private external fun newFromFile( | ||
| 53 | + config: OnlinePunctuationConfig, | ||
| 54 | + ): Long | ||
| 55 | + | ||
| 56 | + companion object { | ||
| 57 | + init { | ||
| 58 | + System.loadLibrary("sherpa-onnx-jni") | ||
| 59 | + } | ||
| 60 | + } | ||
| 61 | +} |
-
请 注册 或 登录 后发表评论