正在显示
42 个修改的文件
包含
1004 行增加
和
964 行删除
| @@ -11,6 +11,7 @@ on: | @@ -11,6 +11,7 @@ on: | ||
| 11 | - 'java-api-examples/**' | 11 | - 'java-api-examples/**' |
| 12 | - 'sherpa-onnx/csrc/*' | 12 | - 'sherpa-onnx/csrc/*' |
| 13 | - 'sherpa-onnx/jni/*' | 13 | - 'sherpa-onnx/jni/*' |
| 14 | + - 'sherpa-onnx/java-api/**' | ||
| 14 | pull_request: | 15 | pull_request: |
| 15 | branches: | 16 | branches: |
| 16 | - master | 17 | - master |
| @@ -21,6 +22,7 @@ on: | @@ -21,6 +22,7 @@ on: | ||
| 21 | - 'java-api-examples/**' | 22 | - 'java-api-examples/**' |
| 22 | - 'sherpa-onnx/csrc/*' | 23 | - 'sherpa-onnx/csrc/*' |
| 23 | - 'sherpa-onnx/jni/*' | 24 | - 'sherpa-onnx/jni/*' |
| 25 | + - 'sherpa-onnx/java-api/**' | ||
| 24 | workflow_dispatch: | 26 | workflow_dispatch: |
| 25 | 27 | ||
| 26 | concurrency: | 28 | concurrency: |
| @@ -46,7 +48,7 @@ jobs: | @@ -46,7 +48,7 @@ jobs: | ||
| 46 | - name: ccache | 48 | - name: ccache |
| 47 | uses: hendrikmuhs/ccache-action@v1.2 | 49 | uses: hendrikmuhs/ccache-action@v1.2 |
| 48 | with: | 50 | with: |
| 49 | - key: ${{ matrix.os }} | 51 | + key: ${{ matrix.os }}-java |
| 50 | 52 | ||
| 51 | - name: Display java version | 53 | - name: Display java version |
| 52 | shell: bash | 54 | shell: bash |
| @@ -54,6 +56,42 @@ jobs: | @@ -54,6 +56,42 @@ jobs: | ||
| 54 | java -version | 56 | java -version |
| 55 | echo "JAVA_HOME is: ${JAVA_HOME}" | 57 | echo "JAVA_HOME is: ${JAVA_HOME}" |
| 56 | 58 | ||
| 59 | + cmake --version | ||
| 60 | + | ||
| 61 | + - name: Build sherpa-onnx (jar) | ||
| 62 | + shell: bash | ||
| 63 | + run: | | ||
| 64 | + cd sherpa-onnx/java-api/ | ||
| 65 | + make | ||
| 66 | + ls -lh | ||
| 67 | + | ||
| 68 | + - uses: actions/upload-artifact@v4 | ||
| 69 | + with: | ||
| 70 | + name: sherpa-onnx-jar-${{ matrix.os }} | ||
| 71 | + path: sherpa-onnx/java-api/build | ||
| 72 | + | ||
| 73 | + - name: Build sherpa-onnx (C++) | ||
| 74 | + shell: bash | ||
| 75 | + run: | | ||
| 76 | + export CMAKE_CXX_COMPILER_LAUNCHER=ccache | ||
| 77 | + export PATH="/usr/lib/ccache:/usr/local/opt/ccache/libexec:$PATH" | ||
| 78 | + | ||
| 79 | + mkdir build | ||
| 80 | + cd build | ||
| 81 | + | ||
| 82 | + cmake \ | ||
| 83 | + -DSHERPA_ONNX_ENABLE_PYTHON=OFF \ | ||
| 84 | + -DSHERPA_ONNX_ENABLE_TESTS=OFF \ | ||
| 85 | + -DSHERPA_ONNX_ENABLE_CHECK=OFF \ | ||
| 86 | + -DBUILD_SHARED_LIBS=ON \ | ||
| 87 | + -DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \ | ||
| 88 | + -DSHERPA_ONNX_ENABLE_BINARY=OFF \ | ||
| 89 | + -DSHERPA_ONNX_ENABLE_JNI=ON \ | ||
| 90 | + .. | ||
| 91 | + | ||
| 92 | + make -j4 | ||
| 93 | + ls -lh lib | ||
| 94 | + | ||
| 57 | - name: Run java test | 95 | - name: Run java test |
| 58 | shell: bash | 96 | shell: bash |
| 59 | run: | | 97 | run: | |
| @@ -62,4 +100,12 @@ jobs: | @@ -62,4 +100,12 @@ jobs: | ||
| 62 | cmake --version | 100 | cmake --version |
| 63 | 101 | ||
| 64 | cd ./java-api-examples | 102 | cd ./java-api-examples |
| 65 | - ./runtest.sh | 103 | + ./run-streaming-decode-file-ctc.sh |
| 104 | + # Delete model files to save space | ||
| 105 | + rm -rf sherpa-onnx-streaming-* | ||
| 106 | + | ||
| 107 | + ./run-streaming-decode-file-paraformer.sh | ||
| 108 | + rm -rf sherpa-onnx-streaming-* | ||
| 109 | + | ||
| 110 | + ./run-streaming-decode-file-transducer.sh | ||
| 111 | + rm -rf sherpa-onnx-streaming-* |
| @@ -66,11 +66,11 @@ jobs: | @@ -66,11 +66,11 @@ jobs: | ||
| 66 | - os: macos-14 | 66 | - os: macos-14 |
| 67 | python-version: "3.12" | 67 | python-version: "3.12" |
| 68 | 68 | ||
| 69 | - - os: windows-2019 | 69 | + - os: windows-2022 |
| 70 | python-version: "3.7" | 70 | python-version: "3.7" |
| 71 | - - os: windows-2019 | 71 | + - os: windows-2022 |
| 72 | python-version: "3.8" | 72 | python-version: "3.8" |
| 73 | - - os: windows-2019 | 73 | + - os: windows-2022 |
| 74 | python-version: "3.9" | 74 | python-version: "3.9" |
| 75 | 75 | ||
| 76 | - os: windows-2022 | 76 | - os: windows-2022 |
java-api-examples/Makefile
已删除
100755 → 0
| 1 | -ENTRY_POINT = ./ | ||
| 2 | - | ||
| 3 | -LIB_SRC_DIR := ../sherpa-onnx/java-api/src/com/k2fsa/sherpa/onnx | ||
| 4 | - | ||
| 5 | -LIB_FILES = \ | ||
| 6 | - $(LIB_SRC_DIR)/EndpointRule.java \ | ||
| 7 | - $(LIB_SRC_DIR)/EndpointConfig.java \ | ||
| 8 | - $(LIB_SRC_DIR)/FeatureConfig.java \ | ||
| 9 | - $(LIB_SRC_DIR)/OnlineLMConfig.java \ | ||
| 10 | - $(LIB_SRC_DIR)/OnlineTransducerModelConfig.java \ | ||
| 11 | - $(LIB_SRC_DIR)/OnlineParaformerModelConfig.java \ | ||
| 12 | - $(LIB_SRC_DIR)/OnlineZipformer2CtcModelConfig.java \ | ||
| 13 | - $(LIB_SRC_DIR)/OnlineModelConfig.java \ | ||
| 14 | - $(LIB_SRC_DIR)/OnlineRecognizerConfig.java \ | ||
| 15 | - $(LIB_SRC_DIR)/OnlineStream.java \ | ||
| 16 | - $(LIB_SRC_DIR)/OnlineRecognizer.java | ||
| 17 | - | ||
| 18 | -WEBSOCKET_DIR:= ./src/websocketsrv | ||
| 19 | -WEBSOCKET_FILES = \ | ||
| 20 | - $(WEBSOCKET_DIR)/ConnectionData.java \ | ||
| 21 | - $(WEBSOCKET_DIR)/DecoderThreadHandler.java \ | ||
| 22 | - $(WEBSOCKET_DIR)/StreamThreadHandler.java \ | ||
| 23 | - $(WEBSOCKET_DIR)/AsrWebsocketServer.java \ | ||
| 24 | - $(WEBSOCKET_DIR)/AsrWebsocketClient.java \ | ||
| 25 | - | ||
| 26 | - | ||
| 27 | -LIB_BUILD_DIR = ./lib | ||
| 28 | - | ||
| 29 | - | ||
| 30 | -EXAMPLE_FILE = DecodeFile.java | ||
| 31 | - | ||
| 32 | -EXAMPLE_Mic = DecodeMic.java | ||
| 33 | - | ||
| 34 | -JAVAC = javac | ||
| 35 | - | ||
| 36 | -BUILD_DIR = build | ||
| 37 | - | ||
| 38 | - | ||
| 39 | -RUNJFLAGS = -Dfile.encoding=utf-8 | ||
| 40 | - | ||
| 41 | -vpath %.class $(BUILD_DIR) | ||
| 42 | -vpath %.java src | ||
| 43 | - | ||
| 44 | - | ||
| 45 | -buildfile: | ||
| 46 | - $(JAVAC) -cp lib/sherpaonnx.jar -d $(BUILD_DIR) -encoding UTF-8 src/$(EXAMPLE_FILE) | ||
| 47 | - | ||
| 48 | -buildmic: | ||
| 49 | - $(JAVAC) -cp lib/sherpaonnx.jar -d $(BUILD_DIR) -encoding UTF-8 src/$(EXAMPLE_Mic) | ||
| 50 | - | ||
| 51 | -rebuild: clean all | ||
| 52 | - | ||
| 53 | -.PHONY: clean run downjar | ||
| 54 | - | ||
| 55 | -downjar: | ||
| 56 | - wget https://repo1.maven.org/maven2/org/slf4j/slf4j-api/1.7.25/slf4j-api-1.7.25.jar -P ./lib/ | ||
| 57 | - wget https://repo1.maven.org/maven2/org/slf4j/slf4j-simple/1.7.25/slf4j-simple-1.7.25.jar -P ./lib/ | ||
| 58 | - wget https://github.com/TooTallNate/Java-WebSocket/releases/download/v1.5.3/Java-WebSocket-1.5.3.jar -P ./lib/ | ||
| 59 | - | ||
| 60 | - | ||
| 61 | -clean: | ||
| 62 | - rm -frv $(BUILD_DIR)/* | ||
| 63 | - rm -frv $(LIB_BUILD_DIR)/* | ||
| 64 | - mkdir -p $(BUILD_DIR) | ||
| 65 | - mkdir -p ./lib | ||
| 66 | - | ||
| 67 | -runfile: packjar buildfile | ||
| 68 | - java -cp ./lib/sherpaonnx.jar:build $(RUNJFLAGS) DecodeFile test.wav | ||
| 69 | - | ||
| 70 | -runhotwords: | ||
| 71 | - java -cp ./lib/sherpaonnx.jar:build $(RUNJFLAGS) DecodeFile hotwords.wav | ||
| 72 | - | ||
| 73 | -runmic: | ||
| 74 | - java -cp ./lib/sherpaonnx.jar:build $(RUNJFLAGS) DecodeMic | ||
| 75 | - | ||
| 76 | -runsrv: | ||
| 77 | - java -cp $(BUILD_DIR):lib/Java-WebSocket-1.5.3.jar:lib/slf4j-simple-1.7.25.jar:lib/slf4j-api-1.7.25.jar:../lib/sherpaonnx.jar $(RUNJFLAGS) websocketsrv.AsrWebsocketServer $(shell pwd)/../build/lib/libsherpa-onnx-jni.so ./modeltest.cfg | ||
| 78 | - | ||
| 79 | -runclient: | ||
| 80 | - java -cp $(BUILD_DIR):lib/Java-WebSocket-1.5.3.jar:lib/slf4j-simple-1.7.25.jar:lib/slf4j-api-1.7.25.jar:../lib/sherpaonnx.jar $(RUNJFLAGS) websocketsrv.AsrWebsocketClient $(shell pwd)/../build/lib/libsherpa-onnx-jni.so 127.0.0.1 8890 ./test.wav 32 | ||
| 81 | - | ||
| 82 | -runclienthotwords: | ||
| 83 | - java -cp $(BUILD_DIR):lib/Java-WebSocket-1.5.3.jar:lib/slf4j-simple-1.7.25.jar:lib/slf4j-api-1.7.25.jar:../lib/sherpaonnx.jar $(RUNJFLAGS) websocketsrv.AsrWebsocketClient $(shell pwd)/../build/lib/libsherpa-onnx-jni.so 127.0.0.1 8890 ./hotwords.wav 32 | ||
| 84 | - | ||
| 85 | -buildlib: $(LIB_FILES:.java=.class) | ||
| 86 | - | ||
| 87 | - | ||
| 88 | -%.class: %.java | ||
| 89 | - $(JAVAC) -cp $(BUILD_DIR) -d $(BUILD_DIR) -encoding UTF-8 $< | ||
| 90 | - | ||
| 91 | -buildwebsocket: $(WEBSOCKET_FILES:.java=.class) | ||
| 92 | - | ||
| 93 | - | ||
| 94 | -%.class: %.java | ||
| 95 | - | ||
| 96 | - $(JAVAC) -cp $(BUILD_DIR):lib/slf4j-simple-1.7.25.jar:lib/slf4j-api-1.7.25.jar:lib/Java-WebSocket-1.5.3.jar:../lib/sherpaonnx.jar -d $(BUILD_DIR) -encoding UTF-8 $< | ||
| 97 | - | ||
| 98 | -packjar: buildlib | ||
| 99 | - jar cvfe lib/sherpaonnx.jar . -C $(BUILD_DIR) . | ||
| 100 | - | ||
| 101 | -all: clean buildlib packjar buildfile buildmic downjar buildwebsocket |
| 1 | -0.Introduction | ||
| 2 | --------------- | 1 | +# Introduction |
| 3 | 2 | ||
| 4 | -Java wrapper `com.k2fsa.sherpa.onnx.OnlineRecognizer` for `sherpa-onnx`. Java is a cross-platform language; you can build jni .so lib according to your system, and then use the same java api for all your platform. | ||
| 5 | -now support multiple threads for websocket server | 3 | +This directory contains examples for the JAVA API of sherpa-onnx. |
| 6 | 4 | ||
| 7 | -```xml | ||
| 8 | -Depend on: | ||
| 9 | - Openjdk 1.8 | ||
| 10 | -``` | ||
| 11 | - | ||
| 12 | ---- | ||
| 13 | - | ||
| 14 | -1.Compile libsherpa-onnx-jni.so | ||
| 15 | -------------------------------- | ||
| 16 | - | ||
| 17 | -Compile sherpa-onnx/jni/jni.cc according to your system. | ||
| 18 | -Example for Ubuntu 18.04 LTS, Openjdk 1.8.0_362: | ||
| 19 | - | ||
| 20 | -```xml | ||
| 21 | - git clone https://github.com/k2-fsa/sherpa-onnx | ||
| 22 | - cd sherpa-onnx | ||
| 23 | - mkdir build | ||
| 24 | - cd build | ||
| 25 | - cmake -DCMAKE_BUILD_TYPE=Release -DBUILD_SHARED_LIBS=ON -DSHERPA_ONNX_ENABLE_JNI=ON .. | ||
| 26 | - make -j6 | ||
| 27 | -``` | ||
| 28 | - | ||
| 29 | ---- | ||
| 30 | - | ||
| 31 | -2.Download asr model files | ||
| 32 | --------------------------- | ||
| 33 | - | ||
| 34 | -[click here for more detail](https://k2-fsa.github.io/sherpa/onnx/pretrained_models/index.html) | ||
| 35 | --------------------------- | ||
| 36 | - | ||
| 37 | -3.Config model config.cfg | ||
| 38 | -------------------------- | ||
| 39 | -/**change model path in config.cfg according to your env**/ | ||
| 40 | -```xml | ||
| 41 | - #model config | ||
| 42 | - sample_rate=16000 | ||
| 43 | - feature_dim=80 | ||
| 44 | - rule1_min_trailing_silence=2.4 | ||
| 45 | - rule2_min_trailing_silence=1.2 | ||
| 46 | - rule3_min_utterance_length=20 | ||
| 47 | - encoder=/sherpa-onnx/build/bin/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/encoder-epoch-99-avg-1.onnx | ||
| 48 | - decoder=/sherpa-onnx/build/bin/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/decoder-epoch-99-avg-1.onnx | ||
| 49 | - joiner=/sherpa-onnx/build/bin/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/joiner-epoch-99-avg-1.onnx | ||
| 50 | - tokens=/sherpa-onnx/build/bin/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/tokens.txt | ||
| 51 | - num_threads=4 | ||
| 52 | - enable_endpoint_detection=false | ||
| 53 | - decoding_method=greedy_search | ||
| 54 | - max_active_paths=4 | ||
| 55 | - | ||
| 56 | - #websocket server config | ||
| 57 | - port=8890 | ||
| 58 | - #number of threads pool for network io | ||
| 59 | - connection_thread_num=16 | ||
| 60 | - #number of threads pool for stream | ||
| 61 | - stream_thread_num=16 | ||
| 62 | - #number of threads pool for decoder | ||
| 63 | - decoder_thread_num=16 | ||
| 64 | - #size of streams for parallel decoding | ||
| 65 | - parallel_decoder_num=16 | ||
| 66 | - #time(ms) idle for decoder thread when no job | ||
| 67 | - decoder_time_idle=10 | ||
| 68 | - #time(ms) out for connection data | ||
| 69 | - deocder_time_out=3000 | ||
| 70 | -``` | 5 | +# Usage |
| 71 | 6 | ||
| 72 | ---- | ||
| 73 | - | ||
| 74 | -4.A simple java example | ||
| 75 | ------------------------ | ||
| 76 | - | ||
| 77 | -refer to [java_api_example](https://github.com/k2-fsa/sherpa-onnx/blob/master/java-api-examples/src/DecodeFile.java) for more detail. | ||
| 78 | - | ||
| 79 | -```java | ||
| 80 | - import com.k2fsa.sherpa.onnx.OnlineRecognizer; | ||
| 81 | - import com.k2fsa.sherpa.onnx.OnlineStream; | ||
| 82 | - String cfgpath=appdir+"/modelconfig.cfg"; | ||
| 83 | - OnlineRecognizer.setSoPath(soPath); //set so lib path | ||
| 84 | - | ||
| 85 | - OnlineRecognizer rcgOjb = new OnlineRecognizer(); //create a recognizer | ||
| 86 | - rcgOjb = new OnlineRecognizer(cfgFile); //set model config file | ||
| 87 | - CreateStream streamObj=rcgOjb.CreateStream(); //create a stream for read wav data | ||
| 88 | - float[] buffer = rcgOjb.readWavFile(wavfilename); // read data from file | ||
| 89 | - streamObj.acceptWaveform(buffer); // feed stream with data | ||
| 90 | - streamObj.inputFinished(); // tell engine you done with all data | ||
| 91 | - OnlineStream ssObj[] = new OnlineStream[1]; | ||
| 92 | - while (rcgOjb.isReady(streamObj)) { // engine is ready for unprocessed data | ||
| 93 | - ssObj[0] = streamObj; | ||
| 94 | - rcgOjb.decodeStreams(ssObj); // decode for multiple stream | ||
| 95 | - // rcgOjb.DecodeStream(streamObj); // decode for single stream | ||
| 96 | - } | ||
| 97 | - | ||
| 98 | - String recText = "simple:" + rcgOjb.getResult(streamObj) + "\n"; | ||
| 99 | - byte[] utf8Data = recText.getBytes(StandardCharsets.UTF_8); | ||
| 100 | - System.out.println(new String(utf8Data)); | ||
| 101 | - rcgOjb.reSet(streamObj); | ||
| 102 | - rcgOjb.releaseStream(streamObj); // release stream | ||
| 103 | - rcgOjb.release(); // release recognizer | ||
| 104 | ``` | 7 | ``` |
| 105 | - | ||
| 106 | ---- | ||
| 107 | - | ||
| 108 | -5.Makefile | ||
| 109 | ----------- | ||
| 110 | - | ||
| 111 | -OS Ubuntu 18.04 LTS | ||
| 112 | -Build package path: /sherpa-onnx/java-api-examples/lib/sherpaonnx.jar | ||
| 113 | - | ||
| 114 | -5.1 Build | ||
| 115 | - | ||
| 116 | -```bash | ||
| 117 | - cd sherpa-onnx/java-api-examples | ||
| 118 | - make all | 8 | +./run-streaming-decode-file-ctc.sh |
| 9 | +./run-streaming-decode-file-paraformer.sh | ||
| 10 | +./run-streaming-decode-file-transducer.sh | ||
| 119 | ``` | 11 | ``` |
| 120 | - | ||
| 121 | -5.2 Run DecodeFile example | ||
| 122 | - | ||
| 123 | -```bash | ||
| 124 | - make runfile | ||
| 125 | -``` | ||
| 126 | - | ||
| 127 | -5.3 Run DecodeMic example | ||
| 128 | - | ||
| 129 | -```bash | ||
| 130 | - make runmic | ||
| 131 | -``` | ||
| 132 | - | ||
| 133 | ---- | ||
| 134 | - | ||
| 135 | -6.WebSocket Server | ||
| 136 | ----------- | ||
| 137 | - | ||
| 138 | -support multiple threads for websocket server | ||
| 139 | -6.0 Protocol for communication | ||
| 140 | -1) client connect to server | ||
| 141 | -```shell | ||
| 142 | - ws client -> srv ws address | ||
| 143 | - ws address example: ws://127.0.0.1:8889/ | ||
| 144 | -``` | ||
| 145 | -2) client send 16k pcm_s16le binary stream data to server | ||
| 146 | -```shell | ||
| 147 | - PCM sampleRate 16000 | ||
| 148 | - single channel | ||
| 149 | - sampleSize 16bit | ||
| 150 | - little endian | ||
| 151 | - type short | ||
| 152 | -``` | ||
| 153 | -3) client send "Done" text to server when all data is sent | ||
| 154 | -```shell | ||
| 155 | - ws_socket.send("Done") | ||
| 156 | -``` | ||
| 157 | -4) client will receive json message from server whenever asr engine decoded new text | ||
| 158 | -```shell | ||
| 159 | - json example: {"text":"甚至出现交易几乎停滞的情况","eof":false"} | ||
| 160 | -``` | ||
| 161 | - | ||
| 162 | - | ||
| 163 | -6.1 Build | ||
| 164 | - | ||
| 165 | -```bash | ||
| 166 | - cd sherpa-onnx/java-api-examples | ||
| 167 | - make all | ||
| 168 | -``` | ||
| 169 | - | ||
| 170 | -6.2 Run srv example | ||
| 171 | - | ||
| 172 | -usage: AsrWebsocketServer soPath modelCfgPath | ||
| 173 | - | ||
| 174 | -```bash | ||
| 175 | - make runsrv /**change path in Makefile according to your env**/ | ||
| 176 | -``` | ||
| 177 | - | ||
| 178 | -6.3 Run multiple threads client example | ||
| 179 | - | ||
| 180 | -usage: AsrWebsocketClient soPath srvIp srvPort wavPath numThreads | ||
| 181 | - | ||
| 182 | -json result example: {"text":"甚至出现交易几乎停滞的情况","eof":"true"} | ||
| 183 | - | ||
| 184 | -```bash | ||
| 185 | - make runclient /**change path in Makefile according to your env**/ | ||
| 186 | -``` | ||
| 187 | - | ||
| 188 | -7 runtest | ||
| 189 | -this script will download model, compile codes and run test | ||
| 190 | -```bash | ||
| 191 | - cd sherpa-onnx/java-api-examples | ||
| 192 | - runtest.sh | ||
| 193 | -``` |
| 1 | +// Copyright 2022-2023 by zhaoming | ||
| 2 | +// Copyright 2024 Xiaomi Corporation | ||
| 3 | + | ||
| 4 | +// This file shows how to use an online CTC model, i.e., streaming CTC model, | ||
| 5 | +// to decode files. | ||
| 6 | +import com.k2fsa.sherpa.onnx.*; | ||
| 7 | + | ||
| 8 | +public class StreamingDecodeFileCtc { | ||
| 9 | + public static void main(String[] args) { | ||
| 10 | + // please refer to | ||
| 11 | + // https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18.tar.bz2 | ||
| 12 | + // to download model files | ||
| 13 | + String model = | ||
| 14 | + "./sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18/ctc-epoch-30-avg-3-chunk-16-left-128.int8.onnx"; | ||
| 15 | + String tokens = "./sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18/tokens.txt"; | ||
| 16 | + String waveFilename = "./sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18/test_wavs/8k.wav"; | ||
| 17 | + | ||
| 18 | + WaveReader reader = new WaveReader(waveFilename); | ||
| 19 | + System.out.println(reader.getSampleRate()); | ||
| 20 | + System.out.println(reader.getSamples().length); | ||
| 21 | + | ||
| 22 | + OnlineZipformer2CtcModelConfig ctc = | ||
| 23 | + OnlineZipformer2CtcModelConfig.builder().setModel(model).build(); | ||
| 24 | + | ||
| 25 | + OnlineModelConfig modelConfig = | ||
| 26 | + OnlineModelConfig.builder() | ||
| 27 | + .setZipformer2Ctc(ctc) | ||
| 28 | + .setTokens(tokens) | ||
| 29 | + .setNumThreads(1) | ||
| 30 | + .setDebug(true) | ||
| 31 | + .build(); | ||
| 32 | + | ||
| 33 | + OnlineRecognizerConfig config = | ||
| 34 | + OnlineRecognizerConfig.builder() | ||
| 35 | + .setOnlineModelConfig(modelConfig) | ||
| 36 | + .setDecodingMethod("greedy_search") | ||
| 37 | + .build(); | ||
| 38 | + | ||
| 39 | + OnlineRecognizer recognizer = new OnlineRecognizer(config); | ||
| 40 | + OnlineStream stream = recognizer.createStream(); | ||
| 41 | + stream.acceptWaveform(reader.getSamples(), reader.getSampleRate()); | ||
| 42 | + | ||
| 43 | + float[] tailPaddings = new float[(int) (0.3 * reader.getSampleRate())]; | ||
| 44 | + stream.acceptWaveform(tailPaddings, reader.getSampleRate()); | ||
| 45 | + | ||
| 46 | + while (recognizer.isReady(stream)) { | ||
| 47 | + recognizer.decode(stream); | ||
| 48 | + } | ||
| 49 | + | ||
| 50 | + String text = recognizer.getResult(stream).getText(); | ||
| 51 | + | ||
| 52 | + System.out.printf("filename:%s\nresult:%s\n", waveFilename, text); | ||
| 53 | + | ||
| 54 | + stream.release(); | ||
| 55 | + recognizer.release(); | ||
| 56 | + } | ||
| 57 | +} |
| 1 | +// Copyright 2022-2023 by zhaoming | ||
| 2 | +// Copyright 2024 Xiaomi Corporation | ||
| 3 | + | ||
| 4 | +// This file shows how to use an online paraformer, i.e., streaming paraformer, | ||
| 5 | +// to decode files. | ||
| 6 | +import com.k2fsa.sherpa.onnx.*; | ||
| 7 | + | ||
| 8 | +public class StreamingDecodeFileParaformer { | ||
| 9 | + public static void main(String[] args) { | ||
| 10 | + // please refer to | ||
| 11 | + // https://k2-fsa.github.io/sherpa/onnx/pretrained_models/online-paraformer/paraformer-models.html#csukuangfj-sherpa-onnx-streaming-paraformer-bilingual-zh-en-chinese-english | ||
| 12 | + // to download model files | ||
| 13 | + String encoder = "./sherpa-onnx-streaming-paraformer-bilingual-zh-en/encoder.int8.onnx"; | ||
| 14 | + String decoder = "./sherpa-onnx-streaming-paraformer-bilingual-zh-en/decoder.int8.onnx"; | ||
| 15 | + String tokens = "./sherpa-onnx-streaming-paraformer-bilingual-zh-en/tokens.txt"; | ||
| 16 | + String waveFilename = "./sherpa-onnx-streaming-paraformer-bilingual-zh-en/test_wavs/2.wav"; | ||
| 17 | + | ||
| 18 | + WaveReader reader = new WaveReader(waveFilename); | ||
| 19 | + System.out.println(reader.getSampleRate()); | ||
| 20 | + System.out.println(reader.getSamples().length); | ||
| 21 | + | ||
| 22 | + OnlineParaformerModelConfig paraformer = | ||
| 23 | + OnlineParaformerModelConfig.builder().setEncoder(encoder).setDecoder(decoder).build(); | ||
| 24 | + | ||
| 25 | + OnlineModelConfig modelConfig = | ||
| 26 | + OnlineModelConfig.builder() | ||
| 27 | + .setParaformer(paraformer) | ||
| 28 | + .setTokens(tokens) | ||
| 29 | + .setNumThreads(1) | ||
| 30 | + .setDebug(true) | ||
| 31 | + .build(); | ||
| 32 | + | ||
| 33 | + OnlineRecognizerConfig config = | ||
| 34 | + OnlineRecognizerConfig.builder() | ||
| 35 | + .setOnlineModelConfig(modelConfig) | ||
| 36 | + .setDecodingMethod("greedy_search") | ||
| 37 | + .build(); | ||
| 38 | + | ||
| 39 | + OnlineRecognizer recognizer = new OnlineRecognizer(config); | ||
| 40 | + OnlineStream stream = recognizer.createStream(); | ||
| 41 | + stream.acceptWaveform(reader.getSamples(), reader.getSampleRate()); | ||
| 42 | + | ||
| 43 | + float[] tailPaddings = new float[(int) (0.8 * reader.getSampleRate())]; | ||
| 44 | + stream.acceptWaveform(tailPaddings, reader.getSampleRate()); | ||
| 45 | + | ||
| 46 | + while (recognizer.isReady(stream)) { | ||
| 47 | + recognizer.decode(stream); | ||
| 48 | + } | ||
| 49 | + | ||
| 50 | + String text = recognizer.getResult(stream).getText(); | ||
| 51 | + | ||
| 52 | + System.out.printf("filename:%s\nresult:%s\n", waveFilename, text); | ||
| 53 | + | ||
| 54 | + stream.release(); | ||
| 55 | + recognizer.release(); | ||
| 56 | + } | ||
| 57 | +} |
| 1 | +// Copyright 2022-2023 by zhaoming | ||
| 2 | +// Copyright 2024 Xiaomi Corporation | ||
| 3 | + | ||
| 4 | +// This file shows how to use an online transducer, i.e., streaming transducer, | ||
| 5 | +// to decode files. | ||
| 6 | +import com.k2fsa.sherpa.onnx.*; | ||
| 7 | + | ||
| 8 | +public class StreamingDecodeFileTransducer { | ||
| 9 | + public static void main(String[] args) { | ||
| 10 | + // please refer to | ||
| 11 | + // https://k2-fsa.github.io/sherpa/onnx/pretrained_models/online-transducer/zipformer-transducer-models.html#csukuangfj-sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20-bilingual-chinese-english | ||
| 12 | + // to download model files | ||
| 13 | + String encoder = | ||
| 14 | + "./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/encoder-epoch-99-avg-1.int8.onnx"; | ||
| 15 | + String decoder = | ||
| 16 | + "./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/decoder-epoch-99-avg-1.onnx"; | ||
| 17 | + String joiner = | ||
| 18 | + "./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/joiner-epoch-99-avg-1.onnx"; | ||
| 19 | + String tokens = "./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/tokens.txt"; | ||
| 20 | + | ||
| 21 | + String waveFilename = | ||
| 22 | + "./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/test_wavs/0.wav"; | ||
| 23 | + | ||
| 24 | + WaveReader reader = new WaveReader(waveFilename); | ||
| 25 | + System.out.println(reader.getSampleRate()); | ||
| 26 | + System.out.println(reader.getSamples().length); | ||
| 27 | + | ||
| 28 | + OnlineTransducerModelConfig transducer = | ||
| 29 | + OnlineTransducerModelConfig.builder() | ||
| 30 | + .setEncoder(encoder) | ||
| 31 | + .setDecoder(decoder) | ||
| 32 | + .setJoiner(joiner) | ||
| 33 | + .build(); | ||
| 34 | + | ||
| 35 | + OnlineModelConfig modelConfig = | ||
| 36 | + OnlineModelConfig.builder() | ||
| 37 | + .setTransducer(transducer) | ||
| 38 | + .setTokens(tokens) | ||
| 39 | + .setNumThreads(1) | ||
| 40 | + .setDebug(true) | ||
| 41 | + .build(); | ||
| 42 | + | ||
| 43 | + OnlineRecognizerConfig config = | ||
| 44 | + OnlineRecognizerConfig.builder() | ||
| 45 | + .setOnlineModelConfig(modelConfig) | ||
| 46 | + .setDecodingMethod("greedy_search") | ||
| 47 | + .build(); | ||
| 48 | + | ||
| 49 | + OnlineRecognizer recognizer = new OnlineRecognizer(config); | ||
| 50 | + OnlineStream stream = recognizer.createStream(); | ||
| 51 | + stream.acceptWaveform(reader.getSamples(), reader.getSampleRate()); | ||
| 52 | + | ||
| 53 | + float[] tailPaddings = new float[(int) (0.8 * reader.getSampleRate())]; | ||
| 54 | + stream.acceptWaveform(tailPaddings, reader.getSampleRate()); | ||
| 55 | + | ||
| 56 | + while (recognizer.isReady(stream)) { | ||
| 57 | + recognizer.decode(stream); | ||
| 58 | + } | ||
| 59 | + | ||
| 60 | + String text = recognizer.getResult(stream).getText(); | ||
| 61 | + | ||
| 62 | + System.out.printf("filename:%s\nresult:%s\n", waveFilename, text); | ||
| 63 | + | ||
| 64 | + stream.release(); | ||
| 65 | + recognizer.release(); | ||
| 66 | + } | ||
| 67 | +} |
java-api-examples/modelconfig.cfg
已删除
100755 → 0
| 1 | -#model config | ||
| 2 | -sample_rate=16000 | ||
| 3 | -feature_dim=80 | ||
| 4 | -rule1_min_trailing_silence=2.4 | ||
| 5 | -rule2_min_trailing_silence=1.2 | ||
| 6 | -rule3_min_utterance_length=20 | ||
| 7 | -encoder=/sherpa/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/encoder-epoch-99-avg-1.onnx | ||
| 8 | -decoder=/sherpa/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/decoder-epoch-99-avg-1.onnx | ||
| 9 | -joiner=/sherpa/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/joiner-epoch-99-avg-1.onnx | ||
| 10 | -tokens=/sherpa/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/tokens.txt | ||
| 11 | -num_threads=4 | ||
| 12 | -enable_endpoint_detection=true | ||
| 13 | -decoding_method=modified_beam_search | ||
| 14 | -max_active_paths=4 | ||
| 15 | -hotwords_file= | ||
| 16 | -hotwords_score=1.5 | ||
| 17 | -lm_model= | ||
| 18 | -lm_scale=0.5 | ||
| 19 | -model_type=zipformer | ||
| 20 | - | ||
| 21 | -#websocket server config | ||
| 22 | -port=8890 | ||
| 23 | -connection_thread_num=16 | ||
| 24 | -stream_thread_num=16 | ||
| 25 | -decoder_thread_num=16 | ||
| 26 | -parallel_decoder_num=16 | ||
| 27 | -decoder_time_idle=200 | ||
| 28 | -deocder_time_out=30000 |
| 1 | +#!/usr/bin/env bash | ||
| 2 | +set -ex | ||
| 3 | + | ||
| 4 | +if [[ ! -f ../build/lib/libsherpa-onnx-jni.dylib && ! -f ../build/lib/libsherpa-onnx-jni.so ]]; then | ||
| 5 | + mkdir -p ../build | ||
| 6 | + pushd ../build | ||
| 7 | + cmake \ | ||
| 8 | + -DSHERPA_ONNX_ENABLE_PYTHON=OFF \ | ||
| 9 | + -DSHERPA_ONNX_ENABLE_TESTS=OFF \ | ||
| 10 | + -DSHERPA_ONNX_ENABLE_CHECK=OFF \ | ||
| 11 | + -DBUILD_SHARED_LIBS=ON \ | ||
| 12 | + -DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \ | ||
| 13 | + -DSHERPA_ONNX_ENABLE_JNI=ON \ | ||
| 14 | + .. | ||
| 15 | + | ||
| 16 | + make -j4 | ||
| 17 | + ls -lh lib | ||
| 18 | + popd | ||
| 19 | +fi | ||
| 20 | + | ||
| 21 | +if [ ! -f ../sherpa-onnx/java-api/build/sherpa-onnx.jar ]; then | ||
| 22 | + pushd ../sherpa-onnx/java-api | ||
| 23 | + make | ||
| 24 | + popd | ||
| 25 | +fi | ||
| 26 | + | ||
| 27 | +if [ ! -f ./sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18/tokens.txt ]; then | ||
| 28 | + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18.tar.bz2 | ||
| 29 | + tar xvf sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18.tar.bz2 | ||
| 30 | + rm sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18.tar.bz2 | ||
| 31 | +fi | ||
| 32 | + | ||
| 33 | +java \ | ||
| 34 | + -Djava.library.path=$PWD/../build/lib \ | ||
| 35 | + -cp ../sherpa-onnx/java-api/build/sherpa-onnx.jar \ | ||
| 36 | + StreamingDecodeFileCtc.java |
| 1 | +#!/usr/bin/env bash | ||
| 2 | +set -ex | ||
| 3 | + | ||
| 4 | +if [[ ! -f ../build/lib/libsherpa-onnx-jni.dylib && ! -f ../build/lib/libsherpa-onnx-jni.so ]]; then | ||
| 5 | + mkdir -p ../build | ||
| 6 | + pushd ../build | ||
| 7 | + cmake \ | ||
| 8 | + -DSHERPA_ONNX_ENABLE_PYTHON=OFF \ | ||
| 9 | + -DSHERPA_ONNX_ENABLE_TESTS=OFF \ | ||
| 10 | + -DSHERPA_ONNX_ENABLE_CHECK=OFF \ | ||
| 11 | + -DBUILD_SHARED_LIBS=ON \ | ||
| 12 | + -DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \ | ||
| 13 | + -DSHERPA_ONNX_ENABLE_JNI=ON \ | ||
| 14 | + .. | ||
| 15 | + | ||
| 16 | + make -j4 | ||
| 17 | + ls -lh lib | ||
| 18 | + popd | ||
| 19 | +fi | ||
| 20 | + | ||
| 21 | +if [ ! -f ../sherpa-onnx/java-api/build/sherpa-onnx.jar ]; then | ||
| 22 | + pushd ../sherpa-onnx/java-api | ||
| 23 | + make | ||
| 24 | + popd | ||
| 25 | +fi | ||
| 26 | + | ||
| 27 | +if [ ! -f ./sherpa-onnx-streaming-paraformer-bilingual-zh-en/tokens.txt ]; then | ||
| 28 | + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-paraformer-bilingual-zh-en.tar.bz2 | ||
| 29 | + tar xvf sherpa-onnx-streaming-paraformer-bilingual-zh-en.tar.bz2 | ||
| 30 | + rm sherpa-onnx-streaming-paraformer-bilingual-zh-en.tar.bz2 | ||
| 31 | +fi | ||
| 32 | + | ||
| 33 | +java \ | ||
| 34 | + -Djava.library.path=$PWD/../build/lib \ | ||
| 35 | + -cp ../sherpa-onnx/java-api/build/sherpa-onnx.jar \ | ||
| 36 | + StreamingDecodeFileParaformer.java |
| 1 | +#!/usr/bin/env bash | ||
| 2 | + | ||
| 3 | +set -ex | ||
| 4 | + | ||
| 5 | +if [[ ! -f ../build/lib/libsherpa-onnx-jni.dylib && ! -f ../build/lib/libsherpa-onnx-jni.so ]]; then | ||
| 6 | + mkdir -p ../build | ||
| 7 | + pushd ../build | ||
| 8 | + cmake \ | ||
| 9 | + -DSHERPA_ONNX_ENABLE_PYTHON=OFF \ | ||
| 10 | + -DSHERPA_ONNX_ENABLE_TESTS=OFF \ | ||
| 11 | + -DSHERPA_ONNX_ENABLE_CHECK=OFF \ | ||
| 12 | + -DBUILD_SHARED_LIBS=ON \ | ||
| 13 | + -DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \ | ||
| 14 | + -DSHERPA_ONNX_ENABLE_JNI=ON \ | ||
| 15 | + .. | ||
| 16 | + | ||
| 17 | + make -j4 | ||
| 18 | + ls -lh lib | ||
| 19 | + popd | ||
| 20 | +fi | ||
| 21 | + | ||
| 22 | +if [ ! -f ../sherpa-onnx/java-api/build/sherpa-onnx.jar ]; then | ||
| 23 | + pushd ../sherpa-onnx/java-api | ||
| 24 | + make | ||
| 25 | + popd | ||
| 26 | +fi | ||
| 27 | + | ||
| 28 | +if [[ ! -f ../build/lib/libsherpa-onnx-jni.dylib && ! -f ../build/lib/libsherpa-onnx-jni.so ]]; then | ||
| 29 | + cmake \ | ||
| 30 | + -DSHERPA_ONNX_ENABLE_PYTHON=OFF \ | ||
| 31 | + -DSHERPA_ONNX_ENABLE_TESTS=OFF \ | ||
| 32 | + -DSHERPA_ONNX_ENABLE_CHECK=OFF \ | ||
| 33 | + -DBUILD_SHARED_LIBS=ON \ | ||
| 34 | + -DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \ | ||
| 35 | + -DSHERPA_ONNX_ENABLE_JNI=ON \ | ||
| 36 | + .. | ||
| 37 | + | ||
| 38 | + make -j4 | ||
| 39 | + ls -lh lib | ||
| 40 | +fi | ||
| 41 | + | ||
| 42 | +if [ ! -f ./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/tokens.txt ]; then | ||
| 43 | + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2 | ||
| 44 | + tar xvf sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2 | ||
| 45 | + rm sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2 | ||
| 46 | +fi | ||
| 47 | + | ||
| 48 | +java \ | ||
| 49 | + -Djava.library.path=$PWD/../build/lib \ | ||
| 50 | + -cp ../sherpa-onnx/java-api/build/sherpa-onnx.jar \ | ||
| 51 | + StreamingDecodeFileTransducer.java |
java-api-examples/runtest.sh
已删除
100755 → 0
| 1 | -#!/usr/bin/env bash | ||
| 2 | -# | ||
| 3 | -# This scripts shows how to test java for sherpa-onnx | ||
| 4 | -# Note: This scripts runs only on Linux and macOS | ||
| 5 | - | ||
| 6 | -set -e | ||
| 7 | - | ||
| 8 | -log() { | ||
| 9 | - # This function is from espnet | ||
| 10 | - local fname=${BASH_SOURCE[1]##*/} | ||
| 11 | - echo -e "$(date '+%Y-%m-%d %H:%M:%S') (${fname}:${BASH_LINENO[0]}:${FUNCNAME[1]}) $*" | ||
| 12 | -} | ||
| 13 | - | ||
| 14 | - | ||
| 15 | - | ||
| 16 | - | ||
| 17 | -echo "PATH: $PATH" | ||
| 18 | - | ||
| 19 | - | ||
| 20 | - | ||
| 21 | - | ||
| 22 | - | ||
| 23 | -log "------------------------------------------------------------" | ||
| 24 | -log "Run download model" | ||
| 25 | -log "------------------------------------------------------------" | ||
| 26 | - | ||
| 27 | -repo_url=https://huggingface.co/csukuangfj/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20 | ||
| 28 | -log "Start testing ${repo_url}" | ||
| 29 | -repo=$(basename $repo_url) | ||
| 30 | -log "download dir is $(basename $repo_url)" | ||
| 31 | -if [ ! -d $repo ];then | ||
| 32 | - log "Download pretrained model and test-data from $repo_url" | ||
| 33 | - | ||
| 34 | - GIT_LFS_SKIP_SMUDGE=1 git clone $repo_url | ||
| 35 | - pushd $repo | ||
| 36 | - git lfs pull --include "*.onnx" | ||
| 37 | - ls -lh *.onnx | ||
| 38 | - popd | ||
| 39 | - ln -s $repo/test_wavs/0.wav hotwords.wav | ||
| 40 | - | ||
| 41 | -fi | ||
| 42 | - | ||
| 43 | -log $(pwd) | ||
| 44 | - | ||
| 45 | -sed -e 's?/sherpa/?'$(pwd)'/?g' modelconfig.cfg > modeltest.cfg | ||
| 46 | - | ||
| 47 | -log "display model cfg" | ||
| 48 | -cat modeltest.cfg | ||
| 49 | - | ||
| 50 | -cd .. | ||
| 51 | - | ||
| 52 | -export JAVA_HOME=$(readlink -f /usr/bin/javac | sed "s:/bin/javac::") | ||
| 53 | - | ||
| 54 | -mkdir -p build | ||
| 55 | -cd build | ||
| 56 | - | ||
| 57 | -cmake -DCMAKE_BUILD_TYPE=Release -DBUILD_SHARED_LIBS=ON -DSHERPA_ONNX_ENABLE_JNI=ON .. | ||
| 58 | - | ||
| 59 | -make -j4 | ||
| 60 | -ls -lh lib | ||
| 61 | - | ||
| 62 | -export LD_LIBRARY_PATH=$PWD/build/lib:$LD_LIBRARY_PATH | ||
| 63 | - | ||
| 64 | -cd ../java-api-examples | ||
| 65 | - | ||
| 66 | -make all | ||
| 67 | - | ||
| 68 | -make runfile | ||
| 69 | - | ||
| 70 | -echo "礼 拜 二" > hotwords.txt | ||
| 71 | - | ||
| 72 | -sed -i 's/hotwords_file=/hotwords_file=hotwords.txt/g' modeltest.cfg | ||
| 73 | - | ||
| 74 | -make runhotwords |
java-api-examples/test.wav
已删除
100644 → 0
不能预览此文件类型
| @@ -6,11 +6,9 @@ | @@ -6,11 +6,9 @@ | ||
| 6 | 6 | ||
| 7 | set -ex | 7 | set -ex |
| 8 | 8 | ||
| 9 | -cd .. | ||
| 10 | -mkdir -p build | ||
| 11 | -cd build | ||
| 12 | - | ||
| 13 | if [[ ! -f ../build/lib/libsherpa-onnx-jni.dylib && ! -f ../build/lib/libsherpa-onnx-jni.so ]]; then | 9 | if [[ ! -f ../build/lib/libsherpa-onnx-jni.dylib && ! -f ../build/lib/libsherpa-onnx-jni.so ]]; then |
| 10 | + mkdir -p ../build | ||
| 11 | + pushd ../build | ||
| 14 | cmake \ | 12 | cmake \ |
| 15 | -DSHERPA_ONNX_ENABLE_PYTHON=OFF \ | 13 | -DSHERPA_ONNX_ENABLE_PYTHON=OFF \ |
| 16 | -DSHERPA_ONNX_ENABLE_TESTS=OFF \ | 14 | -DSHERPA_ONNX_ENABLE_TESTS=OFF \ |
| @@ -22,12 +20,11 @@ if [[ ! -f ../build/lib/libsherpa-onnx-jni.dylib && ! -f ../build/lib/libsherpa | @@ -22,12 +20,11 @@ if [[ ! -f ../build/lib/libsherpa-onnx-jni.dylib && ! -f ../build/lib/libsherpa | ||
| 22 | 20 | ||
| 23 | make -j4 | 21 | make -j4 |
| 24 | ls -lh lib | 22 | ls -lh lib |
| 23 | + popd | ||
| 25 | fi | 24 | fi |
| 26 | 25 | ||
| 27 | export LD_LIBRARY_PATH=$PWD/build/lib:$LD_LIBRARY_PATH | 26 | export LD_LIBRARY_PATH=$PWD/build/lib:$LD_LIBRARY_PATH |
| 28 | 27 | ||
| 29 | -cd ../kotlin-api-examples | ||
| 30 | - | ||
| 31 | function testSpeakerEmbeddingExtractor() { | 28 | function testSpeakerEmbeddingExtractor() { |
| 32 | if [ ! -f ./3dspeaker_speech_eres2net_large_sv_zh-cn_3dspeaker_16k.onnx ]; then | 29 | if [ ! -f ./3dspeaker_speech_eres2net_large_sv_zh-cn_3dspeaker_16k.onnx ]; then |
| 33 | curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-recongition-models/3dspeaker_speech_eres2net_large_sv_zh-cn_3dspeaker_16k.onnx | 30 | curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-recongition-models/3dspeaker_speech_eres2net_large_sv_zh-cn_3dspeaker_16k.onnx |
| @@ -253,7 +253,7 @@ int32_t main(int32_t argc, char *argv[]) { | @@ -253,7 +253,7 @@ int32_t main(int32_t argc, char *argv[]) { | ||
| 253 | sherpa_onnx::ReadWave(wave_filename, &actual_sample_rate, &is_ok); | 253 | sherpa_onnx::ReadWave(wave_filename, &actual_sample_rate, &is_ok); |
| 254 | 254 | ||
| 255 | if (!is_ok) { | 255 | if (!is_ok) { |
| 256 | - SHERPA_ONNX_LOGE("Failed to read %s", wave_filename.c_str()); | 256 | + SHERPA_ONNX_LOGE("Failed to read '%s'", wave_filename.c_str()); |
| 257 | return -1; | 257 | return -1; |
| 258 | } | 258 | } |
| 259 | 259 |
| @@ -96,7 +96,7 @@ static std::vector<std::vector<float>> ComputeEmbeddings( | @@ -96,7 +96,7 @@ static std::vector<std::vector<float>> ComputeEmbeddings( | ||
| 96 | sherpa_onnx::ReadWave(f, &sampling_rate, &is_ok); | 96 | sherpa_onnx::ReadWave(f, &sampling_rate, &is_ok); |
| 97 | 97 | ||
| 98 | if (!is_ok) { | 98 | if (!is_ok) { |
| 99 | - fprintf(stderr, "Failed to read %s\n", f.c_str()); | 99 | + fprintf(stderr, "Failed to read '%s'\n", f.c_str()); |
| 100 | exit(-1); | 100 | exit(-1); |
| 101 | } | 101 | } |
| 102 | 102 |
| @@ -78,7 +78,7 @@ for a list of pre-trained models to download. | @@ -78,7 +78,7 @@ for a list of pre-trained models to download. | ||
| 78 | sherpa_onnx::ReadWave(wav_filename, &sampling_rate, &is_ok); | 78 | sherpa_onnx::ReadWave(wav_filename, &sampling_rate, &is_ok); |
| 79 | 79 | ||
| 80 | if (!is_ok) { | 80 | if (!is_ok) { |
| 81 | - fprintf(stderr, "Failed to read %s\n", wav_filename.c_str()); | 81 | + fprintf(stderr, "Failed to read '%s'\n", wav_filename.c_str()); |
| 82 | return -1; | 82 | return -1; |
| 83 | } | 83 | } |
| 84 | 84 |
| @@ -93,7 +93,7 @@ static std::vector<std::vector<float>> ComputeEmbeddings( | @@ -93,7 +93,7 @@ static std::vector<std::vector<float>> ComputeEmbeddings( | ||
| 93 | sherpa_onnx::ReadWave(f, &sampling_rate, &is_ok); | 93 | sherpa_onnx::ReadWave(f, &sampling_rate, &is_ok); |
| 94 | 94 | ||
| 95 | if (!is_ok) { | 95 | if (!is_ok) { |
| 96 | - fprintf(stderr, "Failed to read %s\n", f.c_str()); | 96 | + fprintf(stderr, "Failed to read '%s'\n", f.c_str()); |
| 97 | exit(-1); | 97 | exit(-1); |
| 98 | } | 98 | } |
| 99 | 99 |
| @@ -58,7 +58,7 @@ for more models. | @@ -58,7 +58,7 @@ for more models. | ||
| 58 | sherpa_onnx::ReadWave(wav_filename, &sampling_rate, &is_ok); | 58 | sherpa_onnx::ReadWave(wav_filename, &sampling_rate, &is_ok); |
| 59 | 59 | ||
| 60 | if (!is_ok) { | 60 | if (!is_ok) { |
| 61 | - fprintf(stderr, "Failed to read %s\n", wav_filename.c_str()); | 61 | + fprintf(stderr, "Failed to read '%s'\n", wav_filename.c_str()); |
| 62 | return -1; | 62 | return -1; |
| 63 | } | 63 | } |
| 64 | 64 |
| @@ -73,7 +73,7 @@ for a list of pre-trained models to download. | @@ -73,7 +73,7 @@ for a list of pre-trained models to download. | ||
| 73 | const std::vector<float> samples = | 73 | const std::vector<float> samples = |
| 74 | sherpa_onnx::ReadWave(wav_filename, &sampling_rate, &is_ok); | 74 | sherpa_onnx::ReadWave(wav_filename, &sampling_rate, &is_ok); |
| 75 | if (!is_ok) { | 75 | if (!is_ok) { |
| 76 | - fprintf(stderr, "Failed to read %s\n", wav_filename.c_str()); | 76 | + fprintf(stderr, "Failed to read '%s'\n", wav_filename.c_str()); |
| 77 | return -1; | 77 | return -1; |
| 78 | } | 78 | } |
| 79 | float duration = samples.size() / static_cast<float>(sampling_rate); | 79 | float duration = samples.size() / static_cast<float>(sampling_rate); |
| @@ -69,7 +69,7 @@ void AsrInference(const std::vector<std::vector<std::string>> &chunk_wav_paths, | @@ -69,7 +69,7 @@ void AsrInference(const std::vector<std::vector<std::string>> &chunk_wav_paths, | ||
| 69 | const std::vector<float> samples = | 69 | const std::vector<float> samples = |
| 70 | sherpa_onnx::ReadWave(wav_filename, &sampling_rate, &is_ok); | 70 | sherpa_onnx::ReadWave(wav_filename, &sampling_rate, &is_ok); |
| 71 | if (!is_ok) { | 71 | if (!is_ok) { |
| 72 | - fprintf(stderr, "Failed to read %s\n", wav_filename.c_str()); | 72 | + fprintf(stderr, "Failed to read '%s'\n", wav_filename.c_str()); |
| 73 | continue; | 73 | continue; |
| 74 | } | 74 | } |
| 75 | duration += samples.size() / static_cast<float>(sampling_rate); | 75 | duration += samples.size() / static_cast<float>(sampling_rate); |
| @@ -96,7 +96,7 @@ void AsrInference(const std::vector<std::vector<std::string>> &chunk_wav_paths, | @@ -96,7 +96,7 @@ void AsrInference(const std::vector<std::vector<std::string>> &chunk_wav_paths, | ||
| 96 | const std::vector<float> samples = | 96 | const std::vector<float> samples = |
| 97 | sherpa_onnx::ReadWave(wav_filename, &sampling_rate, &is_ok); | 97 | sherpa_onnx::ReadWave(wav_filename, &sampling_rate, &is_ok); |
| 98 | if (!is_ok) { | 98 | if (!is_ok) { |
| 99 | - fprintf(stderr, "Failed to read %s\n", wav_filename.c_str()); | 99 | + fprintf(stderr, "Failed to read '%s'\n", wav_filename.c_str()); |
| 100 | continue; | 100 | continue; |
| 101 | } | 101 | } |
| 102 | duration += samples.size() / static_cast<float>(sampling_rate); | 102 | duration += samples.size() / static_cast<float>(sampling_rate); |
| @@ -124,7 +124,7 @@ for a list of pre-trained models to download. | @@ -124,7 +124,7 @@ for a list of pre-trained models to download. | ||
| 124 | const std::vector<float> samples = | 124 | const std::vector<float> samples = |
| 125 | sherpa_onnx::ReadWave(wav_filename, &sampling_rate, &is_ok); | 125 | sherpa_onnx::ReadWave(wav_filename, &sampling_rate, &is_ok); |
| 126 | if (!is_ok) { | 126 | if (!is_ok) { |
| 127 | - fprintf(stderr, "Failed to read %s\n", wav_filename.c_str()); | 127 | + fprintf(stderr, "Failed to read '%s'\n", wav_filename.c_str()); |
| 128 | return -1; | 128 | return -1; |
| 129 | } | 129 | } |
| 130 | duration += samples.size() / static_cast<float>(sampling_rate); | 130 | duration += samples.size() / static_cast<float>(sampling_rate); |
| @@ -109,7 +109,7 @@ for a list of pre-trained models to download. | @@ -109,7 +109,7 @@ for a list of pre-trained models to download. | ||
| 109 | sherpa_onnx::ReadWave(wav_filename, &sampling_rate, &is_ok); | 109 | sherpa_onnx::ReadWave(wav_filename, &sampling_rate, &is_ok); |
| 110 | 110 | ||
| 111 | if (!is_ok) { | 111 | if (!is_ok) { |
| 112 | - fprintf(stderr, "Failed to read %s\n", wav_filename.c_str()); | 112 | + fprintf(stderr, "Failed to read '%s'\n", wav_filename.c_str()); |
| 113 | return -1; | 113 | return -1; |
| 114 | } | 114 | } |
| 115 | 115 |
sherpa-onnx/java-api/Makefile
0 → 100644
| 1 | + | ||
| 2 | +# all .class and .jar files are put inside out_dir | ||
| 3 | +out_dir := build | ||
| 4 | +out_jar := $(out_dir)/sherpa-onnx.jar | ||
| 5 | + | ||
| 6 | +package_dir := com/k2fsa/sherpa/onnx | ||
| 7 | + | ||
| 8 | +java_files := WaveReader.java | ||
| 9 | +java_files += EndpointRule.java | ||
| 10 | +java_files += EndpointConfig.java | ||
| 11 | +java_files += FeatureConfig.java | ||
| 12 | +java_files += OnlineLMConfig.java | ||
| 13 | +java_files += OnlineParaformerModelConfig.java | ||
| 14 | +java_files += OnlineZipformer2CtcModelConfig.java | ||
| 15 | +java_files += OnlineTransducerModelConfig.java | ||
| 16 | +java_files += OnlineModelConfig.java | ||
| 17 | +java_files += OnlineStream.java | ||
| 18 | +java_files += OnlineRecognizerConfig.java | ||
| 19 | +java_files += OnlineRecognizerResult.java | ||
| 20 | +java_files += OnlineRecognizer.java | ||
| 21 | + | ||
| 22 | +class_files := $(java_files:%.java=%.class) | ||
| 23 | + | ||
| 24 | +java_files := $(addprefix src/$(package_dir)/,$(java_files)) | ||
| 25 | +class_files := $(addprefix $(out_dir)/$(package_dir)/,$(class_files)) | ||
| 26 | + | ||
| 27 | +$(info -- java files $(java_files)) | ||
| 28 | +$(info --) | ||
| 29 | +$(info -- class files $(class_files)) | ||
| 30 | + | ||
| 31 | +.phony: all clean | ||
| 32 | + | ||
| 33 | +all: $(out_jar) | ||
| 34 | + | ||
| 35 | +$(out_jar): $(class_files) | ||
| 36 | + jar --create --verbose --file $(out_jar) -C $(out_dir) . | ||
| 37 | + | ||
| 38 | +clean: | ||
| 39 | + $(RM) -rfv $(out_dir) | ||
| 40 | + | ||
| 41 | +$(class_files): $(out_dir)/$(package_dir)/%.class: src/$(package_dir)/%.java | ||
| 42 | + javac -d $(out_dir) --class-path $(out_dir) $< |
| 1 | -/* | ||
| 2 | - * // Copyright 2022-2023 by zhaoming | ||
| 3 | - */ | 1 | +// Copyright 2022-2023 by zhaoming |
| 2 | +// Copyright 2024 Xiaomi Corporation | ||
| 4 | 3 | ||
| 5 | package com.k2fsa.sherpa.onnx; | 4 | package com.k2fsa.sherpa.onnx; |
| 6 | 5 | ||
| 7 | public class EndpointConfig { | 6 | public class EndpointConfig { |
| 7 | + | ||
| 8 | private final EndpointRule rule1; | 8 | private final EndpointRule rule1; |
| 9 | private final EndpointRule rule2; | 9 | private final EndpointRule rule2; |
| 10 | private final EndpointRule rule3; | 10 | private final EndpointRule rule3; |
| 11 | 11 | ||
| 12 | - public EndpointConfig(EndpointRule rule1, EndpointRule rule2, EndpointRule rule3) { | ||
| 13 | - this.rule1 = rule1; | ||
| 14 | - this.rule2 = rule2; | ||
| 15 | - this.rule3 = rule3; | 12 | + private EndpointConfig(Builder builder) { |
| 13 | + this.rule1 = builder.rule1; | ||
| 14 | + this.rule2 = builder.rule2; | ||
| 15 | + this.rule3 = builder.rule3; | ||
| 16 | + } | ||
| 17 | + | ||
| 18 | + public static Builder builder() { | ||
| 19 | + return new Builder(); | ||
| 16 | } | 20 | } |
| 17 | 21 | ||
| 18 | public EndpointRule getRule1() { | 22 | public EndpointRule getRule1() { |
| @@ -26,4 +30,42 @@ public class EndpointConfig { | @@ -26,4 +30,42 @@ public class EndpointConfig { | ||
| 26 | public EndpointRule getRule3() { | 30 | public EndpointRule getRule3() { |
| 27 | return rule3; | 31 | return rule3; |
| 28 | } | 32 | } |
| 33 | + | ||
| 34 | + public static class Builder { | ||
| 35 | + | ||
| 36 | + private EndpointRule rule1 = EndpointRule.builder(). | ||
| 37 | + setMustContainNonSilence(false). | ||
| 38 | + setMinTrailingSilence(2.4f). | ||
| 39 | + setMinUtteranceLength(0). | ||
| 40 | + build(); | ||
| 41 | + private EndpointRule rule2 = EndpointRule.builder(). | ||
| 42 | + setMustContainNonSilence(true). | ||
| 43 | + setMinTrailingSilence(1.4f). | ||
| 44 | + setMinUtteranceLength(0). | ||
| 45 | + build(); | ||
| 46 | + private EndpointRule rule3 = EndpointRule.builder(). | ||
| 47 | + setMustContainNonSilence(false). | ||
| 48 | + setMinTrailingSilence(0.0f). | ||
| 49 | + setMinUtteranceLength(20.0f). | ||
| 50 | + build(); | ||
| 51 | + | ||
| 52 | + public EndpointConfig build() { | ||
| 53 | + return new EndpointConfig(this); | ||
| 54 | + } | ||
| 55 | + | ||
| 56 | + public Builder setRule1(EndpointRule rule) { | ||
| 57 | + this.rule1 = rule; | ||
| 58 | + return this; | ||
| 59 | + } | ||
| 60 | + | ||
| 61 | + public Builder setRule2(EndpointRule rule) { | ||
| 62 | + this.rule2 = rule; | ||
| 63 | + return this; | ||
| 64 | + } | ||
| 65 | + | ||
| 66 | + public Builder setRul3(EndpointRule rule) { | ||
| 67 | + this.rule3 = rule; | ||
| 68 | + return this; | ||
| 69 | + } | ||
| 70 | + } | ||
| 29 | } | 71 | } |
| 1 | -/* | ||
| 2 | - * // Copyright 2022-2023 by zhaoming | ||
| 3 | - */ | ||
| 4 | - | 1 | +// Copyright 2022-2023 by zhaoming |
| 2 | +// Copyright 2024 Xiaomi Corporation | ||
| 5 | package com.k2fsa.sherpa.onnx; | 3 | package com.k2fsa.sherpa.onnx; |
| 6 | 4 | ||
| 7 | public class EndpointRule { | 5 | public class EndpointRule { |
| 6 | + | ||
| 8 | private final boolean mustContainNonSilence; | 7 | private final boolean mustContainNonSilence; |
| 9 | private final float minTrailingSilence; | 8 | private final float minTrailingSilence; |
| 10 | private final float minUtteranceLength; | 9 | private final float minUtteranceLength; |
| 11 | 10 | ||
| 12 | - public EndpointRule( | ||
| 13 | - boolean mustContainNonSilence, float minTrailingSilence, float minUtteranceLength) { | ||
| 14 | - this.mustContainNonSilence = mustContainNonSilence; | ||
| 15 | - this.minTrailingSilence = minTrailingSilence; | ||
| 16 | - this.minUtteranceLength = minUtteranceLength; | 11 | + private EndpointRule(Builder builder) { |
| 12 | + this.mustContainNonSilence = builder.mustContainNonSilence; | ||
| 13 | + this.minTrailingSilence = builder.minTrailingSilence; | ||
| 14 | + this.minUtteranceLength = builder.minUtteranceLength; | ||
| 15 | + } | ||
| 16 | + | ||
| 17 | + public static Builder builder() { | ||
| 18 | + return new Builder(); | ||
| 17 | } | 19 | } |
| 18 | 20 | ||
| 19 | public float getMinTrailingSilence() { | 21 | public float getMinTrailingSilence() { |
| @@ -27,4 +29,29 @@ public class EndpointRule { | @@ -27,4 +29,29 @@ public class EndpointRule { | ||
| 27 | public boolean getMustContainNonSilence() { | 29 | public boolean getMustContainNonSilence() { |
| 28 | return mustContainNonSilence; | 30 | return mustContainNonSilence; |
| 29 | } | 31 | } |
| 30 | -} | 32 | + |
| 33 | + public static class Builder { | ||
| 34 | + private boolean mustContainNonSilence = false; | ||
| 35 | + private float minTrailingSilence = 0; | ||
| 36 | + private float minUtteranceLength = 0; | ||
| 37 | + | ||
| 38 | + public EndpointRule build() { | ||
| 39 | + return new EndpointRule(this); | ||
| 40 | + } | ||
| 41 | + | ||
| 42 | + public Builder setMustContainNonSilence(boolean mustContainNonSilence) { | ||
| 43 | + this.mustContainNonSilence = mustContainNonSilence; | ||
| 44 | + return this; | ||
| 45 | + } | ||
| 46 | + | ||
| 47 | + public Builder setMinTrailingSilence(float minTrailingSilence) { | ||
| 48 | + this.minTrailingSilence = minTrailingSilence; | ||
| 49 | + return this; | ||
| 50 | + } | ||
| 51 | + | ||
| 52 | + public Builder setMinUtteranceLength(float minUtteranceLength) { | ||
| 53 | + this.minUtteranceLength = minUtteranceLength; | ||
| 54 | + return this; | ||
| 55 | + } | ||
| 56 | + } | ||
| 57 | +} |
| 1 | -/* | ||
| 2 | - * // Copyright 2022-2023 by zhaoming | ||
| 3 | - */ | 1 | +// Copyright 2022-2023 by zhaoming |
| 2 | +// Copyright 2024 Xiaomi Corporation | ||
| 4 | 3 | ||
| 5 | package com.k2fsa.sherpa.onnx; | 4 | package com.k2fsa.sherpa.onnx; |
| 6 | 5 | ||
| @@ -8,9 +7,13 @@ public class FeatureConfig { | @@ -8,9 +7,13 @@ public class FeatureConfig { | ||
| 8 | private final int sampleRate; | 7 | private final int sampleRate; |
| 9 | private final int featureDim; | 8 | private final int featureDim; |
| 10 | 9 | ||
| 11 | - public FeatureConfig(int sampleRate, int featureDim) { | ||
| 12 | - this.sampleRate = sampleRate; | ||
| 13 | - this.featureDim = featureDim; | 10 | + private FeatureConfig(Builder builder) { |
| 11 | + this.sampleRate = builder.sampleRate; | ||
| 12 | + this.featureDim = builder.featureDim; | ||
| 13 | + } | ||
| 14 | + | ||
| 15 | + public static Builder builder() { | ||
| 16 | + return new Builder(); | ||
| 14 | } | 17 | } |
| 15 | 18 | ||
| 16 | public int getSampleRate() { | 19 | public int getSampleRate() { |
| @@ -20,4 +23,23 @@ public class FeatureConfig { | @@ -20,4 +23,23 @@ public class FeatureConfig { | ||
| 20 | public int getFeatureDim() { | 23 | public int getFeatureDim() { |
| 21 | return featureDim; | 24 | return featureDim; |
| 22 | } | 25 | } |
| 26 | + | ||
| 27 | + public static class Builder { | ||
| 28 | + private int sampleRate = 16000; | ||
| 29 | + private int featureDim = 80; | ||
| 30 | + | ||
| 31 | + public FeatureConfig build() { | ||
| 32 | + return new FeatureConfig(this); | ||
| 33 | + } | ||
| 34 | + | ||
| 35 | + public Builder setSampleRate(int sampleRate) { | ||
| 36 | + this.sampleRate = sampleRate; | ||
| 37 | + return this; | ||
| 38 | + } | ||
| 39 | + | ||
| 40 | + public Builder setFeatureDim(int featureDim) { | ||
| 41 | + this.featureDim = featureDim; | ||
| 42 | + return this; | ||
| 43 | + } | ||
| 44 | + } | ||
| 23 | } | 45 | } |
| 1 | -/* | ||
| 2 | - * // Copyright 2022-2023 by zhaoming | ||
| 3 | - */ | 1 | +// Copyright 2022-2023 by zhaoming |
| 2 | +// Copyright 2024 Xiaomi Corporation | ||
| 4 | 3 | ||
| 5 | package com.k2fsa.sherpa.onnx; | 4 | package com.k2fsa.sherpa.onnx; |
| 6 | 5 | ||
| 7 | public class OnlineLMConfig { | 6 | public class OnlineLMConfig { |
| 7 | + | ||
| 8 | private final String model; | 8 | private final String model; |
| 9 | private final float scale; | 9 | private final float scale; |
| 10 | 10 | ||
| 11 | - public OnlineLMConfig(String model, float scale) { | ||
| 12 | - this.model = model; | ||
| 13 | - this.scale = scale; | 11 | + private OnlineLMConfig(Builder builder) { |
| 12 | + this.model = builder.model; | ||
| 13 | + this.scale = builder.scale; | ||
| 14 | + } | ||
| 15 | + | ||
| 16 | + public static Builder builder() { | ||
| 17 | + return new Builder(); | ||
| 14 | } | 18 | } |
| 15 | 19 | ||
| 16 | public String getModel() { | 20 | public String getModel() { |
| @@ -20,4 +24,23 @@ public class OnlineLMConfig { | @@ -20,4 +24,23 @@ public class OnlineLMConfig { | ||
| 20 | public float getScale() { | 24 | public float getScale() { |
| 21 | return scale; | 25 | return scale; |
| 22 | } | 26 | } |
| 23 | -} | 27 | + |
| 28 | + public static class Builder { | ||
| 29 | + private String model = ""; | ||
| 30 | + private float scale = 1.0f; | ||
| 31 | + | ||
| 32 | + public OnlineLMConfig build() { | ||
| 33 | + return new OnlineLMConfig(this); | ||
| 34 | + } | ||
| 35 | + | ||
| 36 | + public Builder setModel(String model) { | ||
| 37 | + this.model = model; | ||
| 38 | + return this; | ||
| 39 | + } | ||
| 40 | + | ||
| 41 | + public Builder setScale(float scale) { | ||
| 42 | + this.scale = scale; | ||
| 43 | + return this; | ||
| 44 | + } | ||
| 45 | + } | ||
| 46 | +} |
| 1 | -/* | ||
| 2 | - * // Copyright 2022-2023 by zhaoming | ||
| 3 | - */ | 1 | +// Copyright 2022-2023 by zhaoming |
| 2 | +// Copyright 2024 Xiaomi Corporation | ||
| 4 | 3 | ||
| 5 | package com.k2fsa.sherpa.onnx; | 4 | package com.k2fsa.sherpa.onnx; |
| 6 | 5 | ||
| 7 | public class OnlineModelConfig { | 6 | public class OnlineModelConfig { |
| 8 | - private final OnlineParaformerModelConfig paraformer; | ||
| 9 | private final OnlineTransducerModelConfig transducer; | 7 | private final OnlineTransducerModelConfig transducer; |
| 8 | + private final OnlineParaformerModelConfig paraformer; | ||
| 10 | private final OnlineZipformer2CtcModelConfig zipformer2Ctc; | 9 | private final OnlineZipformer2CtcModelConfig zipformer2Ctc; |
| 11 | private final String tokens; | 10 | private final String tokens; |
| 12 | private final int numThreads; | 11 | private final int numThreads; |
| 13 | private final boolean debug; | 12 | private final boolean debug; |
| 14 | - private final String provider = "cpu"; | ||
| 15 | - private String modelType = ""; | ||
| 16 | - | ||
| 17 | - public OnlineModelConfig( | ||
| 18 | - String tokens, | ||
| 19 | - int numThreads, | ||
| 20 | - boolean debug, | ||
| 21 | - String modelType, | ||
| 22 | - OnlineParaformerModelConfig paraformer, | ||
| 23 | - OnlineTransducerModelConfig transducer, | ||
| 24 | - OnlineZipformer2CtcModelConfig zipformer2Ctc | ||
| 25 | - ) { | ||
| 26 | - | ||
| 27 | - this.tokens = tokens; | ||
| 28 | - this.numThreads = numThreads; | ||
| 29 | - this.debug = debug; | ||
| 30 | - this.modelType = modelType; | ||
| 31 | - this.paraformer = paraformer; | ||
| 32 | - this.transducer = transducer; | ||
| 33 | - this.zipformer2Ctc = zipformer2Ctc; | 13 | + private final String provider; |
| 14 | + private final String modelType; | ||
| 15 | + private OnlineModelConfig(Builder builder) { | ||
| 16 | + this.transducer = builder.transducer; | ||
| 17 | + this.paraformer = builder.paraformer; | ||
| 18 | + this.zipformer2Ctc = builder.zipformer2Ctc; | ||
| 19 | + this.tokens = builder.tokens; | ||
| 20 | + this.numThreads = builder.numThreads; | ||
| 21 | + this.debug = builder.debug; | ||
| 22 | + this.provider = builder.provider; | ||
| 23 | + this.modelType = builder.modelType; | ||
| 24 | + } | ||
| 25 | + | ||
| 26 | + public static Builder builder() { | ||
| 27 | + return new Builder(); | ||
| 34 | } | 28 | } |
| 35 | 29 | ||
| 36 | public OnlineParaformerModelConfig getParaformer() { | 30 | public OnlineParaformerModelConfig getParaformer() { |
| @@ -41,6 +35,10 @@ public class OnlineModelConfig { | @@ -41,6 +35,10 @@ public class OnlineModelConfig { | ||
| 41 | return transducer; | 35 | return transducer; |
| 42 | } | 36 | } |
| 43 | 37 | ||
| 38 | + public OnlineZipformer2CtcModelConfig getZipformer2Ctc() { | ||
| 39 | + return zipformer2Ctc; | ||
| 40 | + } | ||
| 41 | + | ||
| 44 | public String getTokens() { | 42 | public String getTokens() { |
| 45 | return tokens; | 43 | return tokens; |
| 46 | } | 44 | } |
| @@ -52,4 +50,67 @@ public class OnlineModelConfig { | @@ -52,4 +50,67 @@ public class OnlineModelConfig { | ||
| 52 | public boolean getDebug() { | 50 | public boolean getDebug() { |
| 53 | return debug; | 51 | return debug; |
| 54 | } | 52 | } |
| 53 | + | ||
| 54 | + public String getProvider() { | ||
| 55 | + return provider; | ||
| 56 | + } | ||
| 57 | + | ||
| 58 | + public String getModelType() { | ||
| 59 | + return modelType; | ||
| 60 | + } | ||
| 61 | + | ||
| 62 | + public static class Builder { | ||
| 63 | + private OnlineParaformerModelConfig paraformer = OnlineParaformerModelConfig.builder().build(); | ||
| 64 | + private OnlineTransducerModelConfig transducer = OnlineTransducerModelConfig.builder().build(); | ||
| 65 | + private OnlineZipformer2CtcModelConfig zipformer2Ctc = OnlineZipformer2CtcModelConfig.builder().build(); | ||
| 66 | + private String tokens = ""; | ||
| 67 | + private int numThreads = 1; | ||
| 68 | + private boolean debug = true; | ||
| 69 | + private String provider = "cpu"; | ||
| 70 | + private String modelType = ""; | ||
| 71 | + | ||
| 72 | + public OnlineModelConfig build() { | ||
| 73 | + return new OnlineModelConfig(this); | ||
| 74 | + } | ||
| 75 | + | ||
| 76 | + public Builder setTransducer(OnlineTransducerModelConfig transducer) { | ||
| 77 | + this.transducer = transducer; | ||
| 78 | + return this; | ||
| 79 | + } | ||
| 80 | + | ||
| 81 | + public Builder setParaformer(OnlineParaformerModelConfig paraformer) { | ||
| 82 | + this.paraformer = paraformer; | ||
| 83 | + return this; | ||
| 84 | + } | ||
| 85 | + | ||
| 86 | + public Builder setZipformer2Ctc(OnlineZipformer2CtcModelConfig zipformer2Ctc) { | ||
| 87 | + this.zipformer2Ctc = zipformer2Ctc; | ||
| 88 | + return this; | ||
| 89 | + } | ||
| 90 | + | ||
| 91 | + public Builder setTokens(String tokens) { | ||
| 92 | + this.tokens = tokens; | ||
| 93 | + return this; | ||
| 94 | + } | ||
| 95 | + | ||
| 96 | + public Builder setNumThreads(int numThreads) { | ||
| 97 | + this.numThreads = numThreads; | ||
| 98 | + return this; | ||
| 99 | + } | ||
| 100 | + | ||
| 101 | + public Builder setDebug(boolean debug) { | ||
| 102 | + this.debug = debug; | ||
| 103 | + return this; | ||
| 104 | + } | ||
| 105 | + | ||
| 106 | + public Builder setProvider(String provider) { | ||
| 107 | + this.provider = provider; | ||
| 108 | + return this; | ||
| 109 | + } | ||
| 110 | + | ||
| 111 | + public Builder setModelType(String modelType) { | ||
| 112 | + this.modelType = modelType; | ||
| 113 | + return this; | ||
| 114 | + } | ||
| 115 | + } | ||
| 55 | } | 116 | } |
| 1 | -/* | ||
| 2 | - * // Copyright 2022-2023 by zhaoming | ||
| 3 | - */ | 1 | +// Copyright 2022-2023 by zhaoming |
| 2 | +// Copyright 2024 Xiaomi Corporation | ||
| 4 | 3 | ||
| 5 | package com.k2fsa.sherpa.onnx; | 4 | package com.k2fsa.sherpa.onnx; |
| 6 | 5 | ||
| @@ -8,9 +7,13 @@ public class OnlineParaformerModelConfig { | @@ -8,9 +7,13 @@ public class OnlineParaformerModelConfig { | ||
| 8 | private final String encoder; | 7 | private final String encoder; |
| 9 | private final String decoder; | 8 | private final String decoder; |
| 10 | 9 | ||
| 11 | - public OnlineParaformerModelConfig(String encoder, String decoder) { | ||
| 12 | - this.encoder = encoder; | ||
| 13 | - this.decoder = decoder; | 10 | + private OnlineParaformerModelConfig(Builder builder) { |
| 11 | + this.encoder = builder.encoder; | ||
| 12 | + this.decoder = builder.decoder; | ||
| 13 | + } | ||
| 14 | + | ||
| 15 | + public static Builder builder() { | ||
| 16 | + return new Builder(); | ||
| 14 | } | 17 | } |
| 15 | 18 | ||
| 16 | public String getEncoder() { | 19 | public String getEncoder() { |
| @@ -20,4 +23,23 @@ public class OnlineParaformerModelConfig { | @@ -20,4 +23,23 @@ public class OnlineParaformerModelConfig { | ||
| 20 | public String getDecoder() { | 23 | public String getDecoder() { |
| 21 | return decoder; | 24 | return decoder; |
| 22 | } | 25 | } |
| 26 | + | ||
| 27 | + public static class Builder { | ||
| 28 | + private String encoder = ""; | ||
| 29 | + private String decoder = ""; | ||
| 30 | + | ||
| 31 | + public OnlineParaformerModelConfig build() { | ||
| 32 | + return new OnlineParaformerModelConfig(this); | ||
| 33 | + } | ||
| 34 | + | ||
| 35 | + public Builder setEncoder(String encoder) { | ||
| 36 | + this.encoder = encoder; | ||
| 37 | + return this; | ||
| 38 | + } | ||
| 39 | + | ||
| 40 | + public Builder setDecoder(String decoder) { | ||
| 41 | + this.decoder = decoder; | ||
| 42 | + return this; | ||
| 43 | + } | ||
| 44 | + } | ||
| 23 | } | 45 | } |
| 1 | -/* | ||
| 2 | - * // Copyright 2022-2023 by zhaoming | ||
| 3 | - * // the online recognizer for sherpa-onnx, it can load config from a file | ||
| 4 | - * // or by argument | ||
| 5 | - */ | ||
| 6 | -/* | ||
| 7 | -usage example: | ||
| 8 | - | ||
| 9 | - String cfgpath=appdir+"/modelconfig.cfg"; | ||
| 10 | - OnlineRecognizer.setSoPath(soPath); //set so lib path | ||
| 11 | - | ||
| 12 | - OnlineRecognizer rcgOjb = new OnlineRecognizer(); //create a recognizer | ||
| 13 | - rcgOjb = new OnlineRecognizer(cfgFile); //set model config file | ||
| 14 | - CreateStream streamObj=rcgOjb.CreateStream(); //create a stream for read wav data | ||
| 15 | - float[] buffer = rcgOjb.readWavFile(wavfilename); // read data from file | ||
| 16 | - streamObj.acceptWaveform(buffer); // feed stream with data | ||
| 17 | - streamObj.inputFinished(); // tell engine you done with all data | ||
| 18 | - OnlineStream ssObj[] = new OnlineStream[1]; | ||
| 19 | - while (rcgOjb.isReady(streamObj)) { // engine is ready for unprocessed data | ||
| 20 | - ssObj[0] = streamObj; | ||
| 21 | - rcgOjb.decodeStreams(ssObj); // decode for multiple stream | ||
| 22 | - // rcgOjb.DecodeStream(streamObj); // decode for single stream | ||
| 23 | - } | ||
| 24 | - | ||
| 25 | - String recText = "simple:" + rcgOjb.getResult(streamObj) + "\n"; | ||
| 26 | - byte[] utf8Data = recText.getBytes(StandardCharsets.UTF_8); | ||
| 27 | - System.out.println(new String(utf8Data)); | ||
| 28 | - rcgOjb.reSet(streamObj); | ||
| 29 | - rcgOjb.releaseStream(streamObj); // release stream | ||
| 30 | - rcgOjb.release(); // release recognizer | ||
| 31 | - | ||
| 32 | -*/ | 1 | +// Copyright 2022-2023 by zhaoming |
| 2 | +// Copyright 2024 Xiaomi Corporation | ||
| 33 | package com.k2fsa.sherpa.onnx; | 3 | package com.k2fsa.sherpa.onnx; |
| 34 | 4 | ||
| 35 | -import java.io.BufferedInputStream; | ||
| 36 | -import java.io.File; | ||
| 37 | -import java.io.FileInputStream; | ||
| 38 | -import java.io.InputStream; | ||
| 39 | -import java.util.Enumeration; | ||
| 40 | -import java.util.HashMap; | ||
| 41 | -import java.util.Map; | ||
| 42 | -import java.util.Properties; | ||
| 43 | 5 | ||
| 44 | public class OnlineRecognizer { | 6 | public class OnlineRecognizer { |
| 45 | - private long ptr = 0; // this is the asr engine ptrss | ||
| 46 | - | ||
| 47 | - private int sampleRate = 16000; | ||
| 48 | - | ||
| 49 | - // load config file for OnlineRecognizer | ||
| 50 | - public OnlineRecognizer(String modelCfgPath) { | ||
| 51 | - Map<String, String> proMap = this.readProperties(modelCfgPath); | ||
| 52 | - try { | ||
| 53 | - int sampleRate = Integer.parseInt(proMap.getOrDefault("sample_rate", "16000").trim()); | ||
| 54 | - this.sampleRate = sampleRate; | ||
| 55 | - EndpointRule rule1 = | ||
| 56 | - new EndpointRule( | ||
| 57 | - false, | ||
| 58 | - Float.parseFloat(proMap.getOrDefault("rule1_min_trailing_silence", "2.4").trim()), | ||
| 59 | - 0.0F); | ||
| 60 | - EndpointRule rule2 = | ||
| 61 | - new EndpointRule( | ||
| 62 | - true, | ||
| 63 | - Float.parseFloat(proMap.getOrDefault("rule2_min_trailing_silence", "1.2").trim()), | ||
| 64 | - 0.0F); | ||
| 65 | - EndpointRule rule3 = | ||
| 66 | - new EndpointRule( | ||
| 67 | - false, | ||
| 68 | - 0.0F, | ||
| 69 | - Float.parseFloat(proMap.getOrDefault("rule3_min_utterance_length", "20").trim())); | ||
| 70 | - EndpointConfig endCfg = new EndpointConfig(rule1, rule2, rule3); | ||
| 71 | - | ||
| 72 | - OnlineParaformerModelConfig modelParaCfg = | ||
| 73 | - new OnlineParaformerModelConfig( | ||
| 74 | - proMap.getOrDefault("encoder", "").trim(), proMap.getOrDefault("decoder", "").trim()); | ||
| 75 | - OnlineTransducerModelConfig modelTranCfg = | ||
| 76 | - new OnlineTransducerModelConfig( | ||
| 77 | - proMap.getOrDefault("encoder", "").trim(), | ||
| 78 | - proMap.getOrDefault("decoder", "").trim(), | ||
| 79 | - proMap.getOrDefault("joiner", "").trim()); | ||
| 80 | - OnlineZipformer2CtcModelConfig zipformer2CtcConfig = new OnlineZipformer2CtcModelConfig(""); | ||
| 81 | - OnlineModelConfig modelCfg = | ||
| 82 | - new OnlineModelConfig( | ||
| 83 | - proMap.getOrDefault("tokens", "").trim(), | ||
| 84 | - Integer.parseInt(proMap.getOrDefault("num_threads", "4").trim()), | ||
| 85 | - false, | ||
| 86 | - proMap.getOrDefault("model_type", "zipformer").trim(), | ||
| 87 | - modelParaCfg, | ||
| 88 | - modelTranCfg, zipformer2CtcConfig); | ||
| 89 | - FeatureConfig featConfig = | ||
| 90 | - new FeatureConfig( | ||
| 91 | - sampleRate, Integer.parseInt(proMap.getOrDefault("feature_dim", "80").trim())); | ||
| 92 | - OnlineLMConfig onlineLmConfig = | ||
| 93 | - new OnlineLMConfig( | ||
| 94 | - proMap.getOrDefault("lm_model", "").trim(), | ||
| 95 | - Float.parseFloat(proMap.getOrDefault("lm_scale", "0.5").trim())); | ||
| 96 | - | ||
| 97 | - OnlineRecognizerConfig rcgCfg = | ||
| 98 | - new OnlineRecognizerConfig( | ||
| 99 | - featConfig, | ||
| 100 | - modelCfg, | ||
| 101 | - endCfg, | ||
| 102 | - onlineLmConfig, | ||
| 103 | - Boolean.parseBoolean(proMap.getOrDefault("enable_endpoint_detection", "true").trim()), | ||
| 104 | - proMap.getOrDefault("decoding_method", "modified_beam_search").trim(), | ||
| 105 | - Integer.parseInt(proMap.getOrDefault("max_active_paths", "4").trim()), | ||
| 106 | - proMap.getOrDefault("hotwords_file", "").trim(), | ||
| 107 | - Float.parseFloat(proMap.getOrDefault("hotwords_score", "1.5").trim())); | ||
| 108 | - // create a new Recognizer, first parameter kept for android asset_manager ANDROID_API__ >= 9 | ||
| 109 | - this.ptr = createOnlineRecognizer(new Object(), rcgCfg); | ||
| 110 | - | ||
| 111 | - } catch (Exception e) { | ||
| 112 | - System.err.println(e); | ||
| 113 | - } | 7 | + static { |
| 8 | + System.loadLibrary("sherpa-onnx-jni"); | ||
| 114 | } | 9 | } |
| 115 | 10 | ||
| 116 | - // use for android asset_manager ANDROID_API__ >= 9 | ||
| 117 | - public OnlineRecognizer(Object assetManager, String modelCfgPath) { | ||
| 118 | - Map<String, String> proMap = this.readProperties(modelCfgPath); | ||
| 119 | - try { | ||
| 120 | - int sampleRate = Integer.parseInt(proMap.getOrDefault("sample_rate", "16000").trim()); | ||
| 121 | - this.sampleRate = sampleRate; | ||
| 122 | - EndpointRule rule1 = | ||
| 123 | - new EndpointRule( | ||
| 124 | - false, | ||
| 125 | - Float.parseFloat(proMap.getOrDefault("rule1_min_trailing_silence", "2.4").trim()), | ||
| 126 | - 0.0F); | ||
| 127 | - EndpointRule rule2 = | ||
| 128 | - new EndpointRule( | ||
| 129 | - true, | ||
| 130 | - Float.parseFloat(proMap.getOrDefault("rule2_min_trailing_silence", "1.2").trim()), | ||
| 131 | - 0.0F); | ||
| 132 | - EndpointRule rule3 = | ||
| 133 | - new EndpointRule( | ||
| 134 | - false, | ||
| 135 | - 0.0F, | ||
| 136 | - Float.parseFloat(proMap.getOrDefault("rule3_min_utterance_length", "20").trim())); | ||
| 137 | - EndpointConfig endCfg = new EndpointConfig(rule1, rule2, rule3); | ||
| 138 | - OnlineParaformerModelConfig modelParaCfg = | ||
| 139 | - new OnlineParaformerModelConfig( | ||
| 140 | - proMap.getOrDefault("encoder", "").trim(), proMap.getOrDefault("decoder", "").trim()); | ||
| 141 | - OnlineTransducerModelConfig modelTranCfg = | ||
| 142 | - new OnlineTransducerModelConfig( | ||
| 143 | - proMap.getOrDefault("encoder", "").trim(), | ||
| 144 | - proMap.getOrDefault("decoder", "").trim(), | ||
| 145 | - proMap.getOrDefault("joiner", "").trim()); | ||
| 146 | - OnlineZipformer2CtcModelConfig zipformer2CtcConfig = new OnlineZipformer2CtcModelConfig(""); | ||
| 147 | - | ||
| 148 | - OnlineModelConfig modelCfg = | ||
| 149 | - new OnlineModelConfig( | ||
| 150 | - proMap.getOrDefault("tokens", "").trim(), | ||
| 151 | - Integer.parseInt(proMap.getOrDefault("num_threads", "4").trim()), | ||
| 152 | - false, | ||
| 153 | - proMap.getOrDefault("model_type", "zipformer").trim(), | ||
| 154 | - modelParaCfg, | ||
| 155 | - modelTranCfg, zipformer2CtcConfig); | ||
| 156 | - FeatureConfig featConfig = | ||
| 157 | - new FeatureConfig( | ||
| 158 | - sampleRate, Integer.parseInt(proMap.getOrDefault("feature_dim", "80").trim())); | ||
| 159 | - | ||
| 160 | - OnlineLMConfig onlineLmConfig = | ||
| 161 | - new OnlineLMConfig( | ||
| 162 | - proMap.getOrDefault("lm_model", "").trim(), | ||
| 163 | - Float.parseFloat(proMap.getOrDefault("lm_scale", "0.5").trim())); | ||
| 164 | - | ||
| 165 | - OnlineRecognizerConfig rcgCfg = | ||
| 166 | - new OnlineRecognizerConfig( | ||
| 167 | - featConfig, | ||
| 168 | - modelCfg, | ||
| 169 | - endCfg, | ||
| 170 | - onlineLmConfig, | ||
| 171 | - Boolean.parseBoolean(proMap.getOrDefault("enable_endpoint_detection", "true").trim()), | ||
| 172 | - proMap.getOrDefault("decoding_method", "modified_beam_search").trim(), | ||
| 173 | - Integer.parseInt(proMap.getOrDefault("max_active_paths", "4").trim()), | ||
| 174 | - proMap.getOrDefault("hotwords_file", "").trim(), | ||
| 175 | - Float.parseFloat(proMap.getOrDefault("hotwords_score", "1.5").trim())); | ||
| 176 | - // create a new Recognizer, first parameter kept for android asset_manager ANDROID_API__ >= 9 | ||
| 177 | - this.ptr = createOnlineRecognizer(assetManager, rcgCfg); | 11 | + private long ptr = 0; // this is the asr engine ptrss |
| 178 | 12 | ||
| 179 | - } catch (Exception e) { | ||
| 180 | - System.err.println(e); | ||
| 181 | - } | ||
| 182 | - } | ||
| 183 | 13 | ||
| 184 | - // set onlineRecognizer by parameter | ||
| 185 | - public OnlineRecognizer( | ||
| 186 | - String tokens, | ||
| 187 | - String encoder, | ||
| 188 | - String decoder, | ||
| 189 | - String joiner, | ||
| 190 | - int numThreads, | ||
| 191 | - int sampleRate, | ||
| 192 | - int featureDim, | ||
| 193 | - boolean enableEndpointDetection, | ||
| 194 | - float rule1MinTrailingSilence, | ||
| 195 | - float rule2MinTrailingSilence, | ||
| 196 | - float rule3MinUtteranceLength, | ||
| 197 | - String decodingMethod, | ||
| 198 | - String lm_model, | ||
| 199 | - float lm_scale, | ||
| 200 | - int maxActivePaths, | ||
| 201 | - String hotwordsFile, | ||
| 202 | - float hotwordsScore, | ||
| 203 | - String modelType) { | ||
| 204 | - this.sampleRate = sampleRate; | ||
| 205 | - EndpointRule rule1 = new EndpointRule(false, rule1MinTrailingSilence, 0.0F); | ||
| 206 | - EndpointRule rule2 = new EndpointRule(true, rule2MinTrailingSilence, 0.0F); | ||
| 207 | - EndpointRule rule3 = new EndpointRule(false, 0.0F, rule3MinUtteranceLength); | ||
| 208 | - EndpointConfig endCfg = new EndpointConfig(rule1, rule2, rule3); | ||
| 209 | - OnlineParaformerModelConfig modelParaCfg = new OnlineParaformerModelConfig(encoder, decoder); | ||
| 210 | - OnlineTransducerModelConfig modelTranCfg = | ||
| 211 | - new OnlineTransducerModelConfig(encoder, decoder, joiner); | ||
| 212 | - OnlineZipformer2CtcModelConfig zipformer2CtcConfig = new OnlineZipformer2CtcModelConfig(""); | ||
| 213 | - OnlineModelConfig modelCfg = | ||
| 214 | - new OnlineModelConfig(tokens, numThreads, false, modelType, modelParaCfg, modelTranCfg, zipformer2CtcConfig); | ||
| 215 | - FeatureConfig featConfig = new FeatureConfig(sampleRate, featureDim); | ||
| 216 | - OnlineLMConfig onlineLmConfig = new OnlineLMConfig(lm_model, lm_scale); | ||
| 217 | - OnlineRecognizerConfig rcgCfg = | ||
| 218 | - new OnlineRecognizerConfig( | ||
| 219 | - featConfig, | ||
| 220 | - modelCfg, | ||
| 221 | - endCfg, | ||
| 222 | - onlineLmConfig, | ||
| 223 | - enableEndpointDetection, | ||
| 224 | - decodingMethod, | ||
| 225 | - maxActivePaths, | ||
| 226 | - hotwordsFile, | ||
| 227 | - hotwordsScore); | ||
| 228 | - // create a new Recognizer, first parameter kept for android asset_manager ANDROID_API__ >= 9 | ||
| 229 | - this.ptr = createOnlineRecognizer(new Object(), rcgCfg); | 14 | + public OnlineRecognizer(OnlineRecognizerConfig config) { |
| 15 | + ptr = newFromFile(config); | ||
| 230 | } | 16 | } |
| 231 | 17 | ||
| 18 | + /* | ||
| 232 | public static float[] readWavFile(String fileName) { | 19 | public static float[] readWavFile(String fileName) { |
| 233 | // read data from the filename | 20 | // read data from the filename |
| 234 | Object[] wavdata = readWave(fileName); | 21 | Object[] wavdata = readWave(fileName); |
| @@ -238,139 +25,67 @@ public class OnlineRecognizer { | @@ -238,139 +25,67 @@ public class OnlineRecognizer { | ||
| 238 | 25 | ||
| 239 | return floatData; | 26 | return floatData; |
| 240 | } | 27 | } |
| 28 | + */ | ||
| 241 | 29 | ||
| 242 | - // load the libsherpa-onnx-jni.so lib | ||
| 243 | - public static void loadSoLib(String soPath) { | ||
| 244 | - // load libsherpa-onnx-jni.so lib from the path | ||
| 245 | - | ||
| 246 | - System.out.println("so lib path=" + soPath + "\n"); | ||
| 247 | - System.load(soPath.trim()); | ||
| 248 | - System.out.println("load so lib succeed\n"); | ||
| 249 | - } | ||
| 250 | - | ||
| 251 | - public static void setSoPath(String soPath) { | ||
| 252 | - OnlineRecognizer.loadSoLib(soPath); | ||
| 253 | - OnlineStream.loadSoLib(soPath); | ||
| 254 | - } | ||
| 255 | - | ||
| 256 | - private static native Object[] readWave(String fileName); // static | ||
| 257 | - | ||
| 258 | - private Map<String, String> readProperties(String modelCfgPath) { | ||
| 259 | - // read and parse config file | ||
| 260 | - Properties props = new Properties(); | ||
| 261 | - Map<String, String> proMap = new HashMap<>(); | ||
| 262 | - try { | ||
| 263 | - File file = new File(modelCfgPath); | ||
| 264 | - if (!file.exists()) { | ||
| 265 | - System.out.println("model cfg file not exists!"); | ||
| 266 | - System.exit(0); | ||
| 267 | - } | ||
| 268 | - InputStream in = new BufferedInputStream(new FileInputStream(modelCfgPath)); | ||
| 269 | - props.load(in); | ||
| 270 | - Enumeration en = props.propertyNames(); | ||
| 271 | - while (en.hasMoreElements()) { | ||
| 272 | - String key = (String) en.nextElement(); | ||
| 273 | - String Property = props.getProperty(key); | ||
| 274 | - proMap.put(key, Property); | ||
| 275 | - } | ||
| 276 | - | ||
| 277 | - } catch (Exception e) { | ||
| 278 | - e.printStackTrace(); | ||
| 279 | - } | ||
| 280 | - return proMap; | ||
| 281 | - } | ||
| 282 | - | ||
| 283 | - public void decodeStream(OnlineStream s) throws Exception { | ||
| 284 | - if (this.ptr == 0) throw new Exception("null exception for recognizer ptr"); | ||
| 285 | - long streamPtr = s.getPtr(); | ||
| 286 | - if (streamPtr == 0) throw new Exception("null exception for stream ptr"); | ||
| 287 | - // when feeded samples to engine, call DecodeStream to let it process | ||
| 288 | - decodeStream(this.ptr, streamPtr); | ||
| 289 | - } | ||
| 290 | 30 | ||
| 291 | - public void decodeStreams(OnlineStream[] ssOjb) throws Exception { | ||
| 292 | - if (this.ptr == 0) throw new Exception("null exception for recognizer ptr"); | ||
| 293 | - // decode for multiple streams | ||
| 294 | - long[] ss = new long[ssOjb.length]; | ||
| 295 | - for (int i = 0; i < ssOjb.length; i++) { | ||
| 296 | - ss[i] = ssOjb[i].getPtr(); | ||
| 297 | - if (ss[i] == 0) throw new Exception("null exception for stream ptr"); | ||
| 298 | - } | ||
| 299 | - decodeStreams(this.ptr, ss); | 31 | + public void decode(OnlineStream s) { |
| 32 | + decode(ptr, s.getPtr()); | ||
| 300 | } | 33 | } |
| 301 | 34 | ||
| 302 | - public boolean isReady(OnlineStream s) throws Exception { | ||
| 303 | - // whether the engine is ready for decode | ||
| 304 | - if (this.ptr == 0) throw new Exception("null exception for recognizer ptr"); | ||
| 305 | - long streamPtr = s.getPtr(); | ||
| 306 | - if (streamPtr == 0) throw new Exception("null exception for stream ptr"); | ||
| 307 | - return isReady(this.ptr, streamPtr); | ||
| 308 | - } | ||
| 309 | 35 | ||
| 310 | - public String getResult(OnlineStream s) throws Exception { | ||
| 311 | - // get text from the engine | ||
| 312 | - if (this.ptr == 0) throw new Exception("null exception for recognizer ptr"); | ||
| 313 | - long streamPtr = s.getPtr(); | ||
| 314 | - if (streamPtr == 0) throw new Exception("null exception for stream ptr"); | ||
| 315 | - return getResult(this.ptr, streamPtr); | 36 | + public boolean isReady(OnlineStream s) { |
| 37 | + return isReady(ptr, s.getPtr()); | ||
| 316 | } | 38 | } |
| 317 | 39 | ||
| 318 | - public boolean isEndpoint(OnlineStream s) throws Exception { | ||
| 319 | - if (this.ptr == 0) throw new Exception("null exception for recognizer ptr"); | ||
| 320 | - long streamPtr = s.getPtr(); | ||
| 321 | - if (streamPtr == 0) throw new Exception("null exception for stream ptr"); | ||
| 322 | - return isEndpoint(this.ptr, streamPtr); | 40 | + public boolean isEndpoint(OnlineStream s) { |
| 41 | + return isEndpoint(ptr, s.getPtr()); | ||
| 323 | } | 42 | } |
| 324 | 43 | ||
| 325 | - public void reSet(OnlineStream s) throws Exception { | ||
| 326 | - if (this.ptr == 0) throw new Exception("null exception for recognizer ptr"); | ||
| 327 | - long streamPtr = s.getPtr(); | ||
| 328 | - if (streamPtr == 0) throw new Exception("null exception for stream ptr"); | ||
| 329 | - reSet(this.ptr, streamPtr); | 44 | + public void reset(OnlineStream s) { |
| 45 | + reset(ptr, s.getPtr()); | ||
| 330 | } | 46 | } |
| 331 | 47 | ||
| 332 | - public OnlineStream createStream() throws Exception { | ||
| 333 | - // create one stream for data to feed in | ||
| 334 | - if (this.ptr == 0) throw new Exception("null exception for recognizer ptr"); | ||
| 335 | - long streamPtr = createStream(this.ptr); | ||
| 336 | - OnlineStream stream = new OnlineStream(streamPtr, this.sampleRate); | ||
| 337 | - return stream; | 48 | + public OnlineStream createStream() { |
| 49 | + long p = createStream(ptr, ""); | ||
| 50 | + return new OnlineStream(p); | ||
| 338 | } | 51 | } |
| 339 | 52 | ||
| 53 | + @Override | ||
| 340 | protected void finalize() throws Throwable { | 54 | protected void finalize() throws Throwable { |
| 341 | release(); | 55 | release(); |
| 342 | } | 56 | } |
| 343 | 57 | ||
| 344 | // recognizer release, you'd better call it manually if not use anymore | 58 | // recognizer release, you'd better call it manually if not use anymore |
| 345 | public void release() { | 59 | public void release() { |
| 346 | - if (this.ptr == 0) return; | ||
| 347 | - deleteOnlineRecognizer(this.ptr); | 60 | + if (this.ptr == 0) { |
| 61 | + return; | ||
| 62 | + } | ||
| 63 | + delete(this.ptr); | ||
| 348 | this.ptr = 0; | 64 | this.ptr = 0; |
| 349 | } | 65 | } |
| 350 | 66 | ||
| 351 | - // JNI interface libsherpa-onnx-jni.so | ||
| 352 | - | ||
| 353 | - // stream release, you'd better call it manually if not use anymore | ||
| 354 | - public void releaseStream(OnlineStream s) { | ||
| 355 | - s.release(); | 67 | + public OnlineRecognizerResult getResult(OnlineStream s) { |
| 68 | + Object[] arr = getResult(ptr, s.getPtr()); | ||
| 69 | + String text = (String) arr[0]; | ||
| 70 | + String[] tokens = (String[]) arr[1]; | ||
| 71 | + float[] timestamps = (float[]) arr[2]; | ||
| 72 | + return new OnlineRecognizerResult(text, tokens, timestamps); | ||
| 356 | } | 73 | } |
| 357 | 74 | ||
| 358 | - private native String getResult(long ptr, long streamPtr); | ||
| 359 | 75 | ||
| 360 | - private native void decodeStream(long ptr, long streamPtr); | 76 | + private native void delete(long ptr); |
| 361 | 77 | ||
| 362 | - private native void decodeStreams(long ptr, long[] ssPtr); | 78 | + private native long newFromFile(OnlineRecognizerConfig config); |
| 363 | 79 | ||
| 364 | - private native boolean isReady(long ptr, long streamPtr); | ||
| 365 | - | ||
| 366 | - // first parameter keep for android asset_manager ANDROID_API__ >= 9 | ||
| 367 | - private native long createOnlineRecognizer(Object asset, OnlineRecognizerConfig config); | 80 | + private native long createStream(long ptr, String hotwords); |
| 368 | 81 | ||
| 369 | - private native long createStream(long ptr); | 82 | + private native void reset(long ptr, long streamPtr); |
| 370 | 83 | ||
| 371 | - private native void deleteOnlineRecognizer(long ptr); | 84 | + private native void decode(long ptr, long streamPtr); |
| 372 | 85 | ||
| 373 | private native boolean isEndpoint(long ptr, long streamPtr); | 86 | private native boolean isEndpoint(long ptr, long streamPtr); |
| 374 | 87 | ||
| 375 | - private native void reSet(long ptr, long streamPtr); | ||
| 376 | -} | 88 | + private native boolean isReady(long ptr, long streamPtr); |
| 89 | + | ||
| 90 | + private native Object[] getResult(long ptr, long streamPtr); | ||
| 91 | +} |
| 1 | -/* | ||
| 2 | - * // Copyright 2022-2023 by zhaoming | ||
| 3 | - */ | ||
| 4 | - | 1 | +// Copyright 2022-2023 by zhaoming |
| 2 | +// Copyright 2024 Xiaomi Corporation | ||
| 5 | package com.k2fsa.sherpa.onnx; | 3 | package com.k2fsa.sherpa.onnx; |
| 6 | 4 | ||
| 7 | public class OnlineRecognizerConfig { | 5 | public class OnlineRecognizerConfig { |
| 8 | private final FeatureConfig featConfig; | 6 | private final FeatureConfig featConfig; |
| 9 | private final OnlineModelConfig modelConfig; | 7 | private final OnlineModelConfig modelConfig; |
| 10 | - private final EndpointConfig endpointConfig; | ||
| 11 | private final OnlineLMConfig lmConfig; | 8 | private final OnlineLMConfig lmConfig; |
| 9 | + private final EndpointConfig endpointConfig; | ||
| 12 | private final boolean enableEndpoint; | 10 | private final boolean enableEndpoint; |
| 13 | private final String decodingMethod; | 11 | private final String decodingMethod; |
| 14 | private final int maxActivePaths; | 12 | private final int maxActivePaths; |
| 15 | private final String hotwordsFile; | 13 | private final String hotwordsFile; |
| 16 | private final float hotwordsScore; | 14 | private final float hotwordsScore; |
| 17 | - | ||
| 18 | - public OnlineRecognizerConfig( | ||
| 19 | - FeatureConfig featConfig, | ||
| 20 | - OnlineModelConfig modelConfig, | ||
| 21 | - EndpointConfig endpointConfig, | ||
| 22 | - OnlineLMConfig lmConfig, | ||
| 23 | - boolean enableEndpoint, | ||
| 24 | - String decodingMethod, | ||
| 25 | - int maxActivePaths, | ||
| 26 | - String hotwordsFile, | ||
| 27 | - float hotwordsScore) { | ||
| 28 | - this.featConfig = featConfig; | ||
| 29 | - this.modelConfig = modelConfig; | ||
| 30 | - this.endpointConfig = endpointConfig; | ||
| 31 | - this.lmConfig = lmConfig; | ||
| 32 | - this.enableEndpoint = enableEndpoint; | ||
| 33 | - this.decodingMethod = decodingMethod; | ||
| 34 | - this.maxActivePaths = maxActivePaths; | ||
| 35 | - this.hotwordsFile = hotwordsFile; | ||
| 36 | - this.hotwordsScore = hotwordsScore; | 15 | + private OnlineRecognizerConfig(Builder builder) { |
| 16 | + this.featConfig = builder.featConfig; | ||
| 17 | + this.modelConfig = builder.modelConfig; | ||
| 18 | + this.lmConfig = builder.lmConfig; | ||
| 19 | + this.endpointConfig = builder.endpointConfig; | ||
| 20 | + this.enableEndpoint = builder.enableEndpoint; | ||
| 21 | + this.decodingMethod = builder.decodingMethod; | ||
| 22 | + this.maxActivePaths = builder.maxActivePaths; | ||
| 23 | + this.hotwordsFile = builder.hotwordsFile; | ||
| 24 | + this.hotwordsScore = builder.hotwordsScore; | ||
| 37 | } | 25 | } |
| 38 | 26 | ||
| 39 | - public OnlineLMConfig getLmConfig() { | ||
| 40 | - return lmConfig; | ||
| 41 | - } | ||
| 42 | - | ||
| 43 | - public FeatureConfig getFeatConfig() { | ||
| 44 | - return featConfig; | 27 | + public static Builder builder() { |
| 28 | + return new Builder(); | ||
| 45 | } | 29 | } |
| 46 | 30 | ||
| 47 | public OnlineModelConfig getModelConfig() { | 31 | public OnlineModelConfig getModelConfig() { |
| 48 | return modelConfig; | 32 | return modelConfig; |
| 49 | } | 33 | } |
| 50 | 34 | ||
| 51 | - public EndpointConfig getEndpointConfig() { | ||
| 52 | - return endpointConfig; | ||
| 53 | - } | 35 | + public static class Builder { |
| 36 | + private FeatureConfig featConfig = FeatureConfig.builder().build(); | ||
| 37 | + private OnlineModelConfig modelConfig = OnlineModelConfig.builder().build(); | ||
| 38 | + private OnlineLMConfig lmConfig = OnlineLMConfig.builder().build(); | ||
| 39 | + private EndpointConfig endpointConfig = EndpointConfig.builder().build(); | ||
| 40 | + private boolean enableEndpoint = true; | ||
| 41 | + private String decodingMethod = "greedy_search"; | ||
| 42 | + private int maxActivePaths = 4; | ||
| 43 | + private String hotwordsFile = ""; | ||
| 44 | + private float hotwordsScore = 1.5f; | ||
| 54 | 45 | ||
| 55 | - public boolean isEnableEndpoint() { | ||
| 56 | - return enableEndpoint; | ||
| 57 | - } | 46 | + public OnlineRecognizerConfig build() { |
| 47 | + return new OnlineRecognizerConfig(this); | ||
| 48 | + } | ||
| 58 | 49 | ||
| 59 | - public String getDecodingMethod() { | ||
| 60 | - return decodingMethod; | ||
| 61 | - } | 50 | + public Builder setFeatureConfig(FeatureConfig featConfig) { |
| 51 | + this.featConfig = featConfig; | ||
| 52 | + return this; | ||
| 53 | + } | ||
| 54 | + | ||
| 55 | + public Builder setOnlineModelConfig(OnlineModelConfig modelConfig) { | ||
| 56 | + this.modelConfig = modelConfig; | ||
| 57 | + return this; | ||
| 58 | + } | ||
| 59 | + | ||
| 60 | + public Builder setOnlineLMConfig(OnlineLMConfig lmConfig) { | ||
| 61 | + this.lmConfig = lmConfig; | ||
| 62 | + return this; | ||
| 63 | + } | ||
| 64 | + | ||
| 65 | + public Builder setEndpointConfig(EndpointConfig endpointConfig) { | ||
| 66 | + this.endpointConfig = endpointConfig; | ||
| 67 | + return this; | ||
| 68 | + } | ||
| 69 | + | ||
| 70 | + public Builder setEnableEndpoint(boolean enableEndpoint) { | ||
| 71 | + this.enableEndpoint = enableEndpoint; | ||
| 72 | + return this; | ||
| 73 | + } | ||
| 74 | + | ||
| 75 | + public Builder setDecodingMethod(String decodingMethod) { | ||
| 76 | + this.decodingMethod = decodingMethod; | ||
| 77 | + return this; | ||
| 78 | + } | ||
| 79 | + | ||
| 80 | + public Builder setMaxActivePaths(int maxActivePaths) { | ||
| 81 | + this.maxActivePaths = maxActivePaths; | ||
| 82 | + return this; | ||
| 83 | + } | ||
| 84 | + | ||
| 85 | + public Builder setHotwordsFile(String hotwordsFile) { | ||
| 86 | + this.hotwordsFile = hotwordsFile; | ||
| 87 | + return this; | ||
| 88 | + } | ||
| 62 | 89 | ||
| 63 | - public int getMaxActivePaths() { | ||
| 64 | - return maxActivePaths; | 90 | + public Builder setHotwordsScore(float hotwordsScore) { |
| 91 | + this.hotwordsScore = hotwordsScore; | ||
| 92 | + return this; | ||
| 93 | + } | ||
| 65 | } | 94 | } |
| 66 | } | 95 | } |
| 1 | +// Copyright 2024 Xiaomi Corporation | ||
| 2 | +package com.k2fsa.sherpa.onnx; | ||
| 3 | + | ||
| 4 | +public class OnlineRecognizerResult { | ||
| 5 | + private final String text; | ||
| 6 | + private final String[] tokens; | ||
| 7 | + private final float[] timestamps; | ||
| 8 | + | ||
| 9 | + public OnlineRecognizerResult(String text, String[] tokens, float[] timestamps) { | ||
| 10 | + this.text = text; | ||
| 11 | + this.tokens = tokens; | ||
| 12 | + this.timestamps = timestamps; | ||
| 13 | + } | ||
| 14 | + | ||
| 15 | + public String getText() { | ||
| 16 | + return text; | ||
| 17 | + } | ||
| 18 | + | ||
| 19 | + public String[] getTokens() { | ||
| 20 | + return tokens; | ||
| 21 | + } | ||
| 22 | + | ||
| 23 | + public float[] getTimestamps() { | ||
| 24 | + return timestamps; | ||
| 25 | + } | ||
| 26 | +} |
| 1 | -/* | ||
| 2 | - * // Copyright 2022-2023 by zhaoming | ||
| 3 | - */ | ||
| 4 | -// Stream is used for feeding data to the asr engine | 1 | +// Copyright 2022-2023 by zhaoming |
| 2 | +// Copyright 2024 Xiaomi Corporation | ||
| 5 | package com.k2fsa.sherpa.onnx; | 3 | package com.k2fsa.sherpa.onnx; |
| 6 | 4 | ||
| 7 | public class OnlineStream { | 5 | public class OnlineStream { |
| 8 | - private long ptr = 0; // this is the stream ptr | 6 | + static { |
| 7 | + System.loadLibrary("sherpa-onnx-jni"); | ||
| 8 | + } | ||
| 9 | 9 | ||
| 10 | - private int sampleRate = 16000; | 10 | + private long ptr = 0; |
| 11 | 11 | ||
| 12 | - // assign ptr to this stream in construction | ||
| 13 | - public OnlineStream(long ptr, int sampleRate) { | ||
| 14 | - this.ptr = ptr; | ||
| 15 | - this.sampleRate = sampleRate; | 12 | + public OnlineStream() { |
| 13 | + this.ptr = 0; | ||
| 16 | } | 14 | } |
| 17 | 15 | ||
| 18 | - public static void loadSoLib(String soPath) { | ||
| 19 | - // load .so lib from the path | ||
| 20 | - System.load(soPath.trim()); // ("sherpa-onnx-jni-java"); | 16 | + public OnlineStream(long ptr) { |
| 17 | + this.ptr = ptr; | ||
| 21 | } | 18 | } |
| 22 | 19 | ||
| 23 | public long getPtr() { | 20 | public long getPtr() { |
| 24 | return ptr; | 21 | return ptr; |
| 25 | } | 22 | } |
| 26 | 23 | ||
| 27 | - public void acceptWaveform(float[] samples) throws Exception { | ||
| 28 | - if (this.ptr == 0) throw new Exception("null exception for stream ptr"); | 24 | + public void setPtr(long ptr) { |
| 25 | + this.ptr = ptr; | ||
| 26 | + } | ||
| 29 | 27 | ||
| 30 | - // feed wave data to asr engine | ||
| 31 | - acceptWaveform(this.ptr, this.sampleRate, samples); | 28 | + public void acceptWaveform(float[] samples, int sampleRate) { |
| 29 | + acceptWaveform(this.ptr, samples, sampleRate); | ||
| 32 | } | 30 | } |
| 33 | 31 | ||
| 34 | public void inputFinished() { | 32 | public void inputFinished() { |
| 35 | - // add some tail padding | ||
| 36 | - int padLen = (int) (this.sampleRate * 0.3); // 0.3 seconds at 16 kHz sample rate | ||
| 37 | - float[] tailPaddings = new float[padLen]; // default value is 0 | ||
| 38 | - acceptWaveform(this.ptr, this.sampleRate, tailPaddings); | ||
| 39 | - | ||
| 40 | - // tell the engine all data are feeded | ||
| 41 | inputFinished(this.ptr); | 33 | inputFinished(this.ptr); |
| 42 | } | 34 | } |
| 43 | 35 | ||
| 44 | public void release() { | 36 | public void release() { |
| 45 | // stream object must be release after used | 37 | // stream object must be release after used |
| 46 | - if (this.ptr == 0) return; | ||
| 47 | - deleteStream(this.ptr); | 38 | + if (this.ptr == 0) { |
| 39 | + return; | ||
| 40 | + } | ||
| 41 | + delete(this.ptr); | ||
| 48 | this.ptr = 0; | 42 | this.ptr = 0; |
| 49 | } | 43 | } |
| 50 | 44 | ||
| 45 | + @Override | ||
| 51 | protected void finalize() throws Throwable { | 46 | protected void finalize() throws Throwable { |
| 52 | release(); | 47 | release(); |
| 48 | + super.finalize(); | ||
| 53 | } | 49 | } |
| 54 | 50 | ||
| 55 | - public boolean isLastFrame() throws Exception { | ||
| 56 | - if (this.ptr == 0) throw new Exception("null exception for stream ptr"); | ||
| 57 | - return isLastFrame(this.ptr); | ||
| 58 | - } | ||
| 59 | - | ||
| 60 | - public void reSet() throws Exception { | ||
| 61 | - if (this.ptr == 0) throw new Exception("null exception for stream ptr"); | ||
| 62 | - reSet(this.ptr); | ||
| 63 | - } | ||
| 64 | - | ||
| 65 | - public int featureDim() throws Exception { | ||
| 66 | - if (this.ptr == 0) throw new Exception("null exception for stream ptr"); | ||
| 67 | - return featureDim(this.ptr); | ||
| 68 | - } | ||
| 69 | - | ||
| 70 | - // JNI interface libsherpa-onnx-jni.so | ||
| 71 | - private native void acceptWaveform(long ptr, int sampleRate, float[] samples); | 51 | + private native void acceptWaveform(long ptr, float[] samples, int sampleRate); |
| 72 | 52 | ||
| 73 | private native void inputFinished(long ptr); | 53 | private native void inputFinished(long ptr); |
| 74 | 54 | ||
| 75 | - private native void deleteStream(long ptr); | ||
| 76 | - | ||
| 77 | - private native int numFramesReady(long ptr); | ||
| 78 | - | ||
| 79 | - private native boolean isLastFrame(long ptr); | ||
| 80 | - | ||
| 81 | - private native void reSet(long ptr); | ||
| 82 | - | ||
| 83 | - private native int featureDim(long ptr); | ||
| 84 | -} | 55 | + private native void delete(long ptr); |
| 56 | +} |
| 1 | -/* | ||
| 2 | - * // Copyright 2022-2023 by zhaoming | ||
| 3 | - */ | 1 | +// Copyright 2022-2023 by zhaoming |
| 2 | +// Copyright 2024 Xiaomi Corporation | ||
| 4 | 3 | ||
| 5 | package com.k2fsa.sherpa.onnx; | 4 | package com.k2fsa.sherpa.onnx; |
| 6 | 5 | ||
| @@ -9,10 +8,14 @@ public class OnlineTransducerModelConfig { | @@ -9,10 +8,14 @@ public class OnlineTransducerModelConfig { | ||
| 9 | private final String decoder; | 8 | private final String decoder; |
| 10 | private final String joiner; | 9 | private final String joiner; |
| 11 | 10 | ||
| 12 | - public OnlineTransducerModelConfig(String encoder, String decoder, String joiner) { | ||
| 13 | - this.encoder = encoder; | ||
| 14 | - this.decoder = decoder; | ||
| 15 | - this.joiner = joiner; | 11 | + private OnlineTransducerModelConfig(Builder builder) { |
| 12 | + this.encoder = builder.encoder; | ||
| 13 | + this.decoder = builder.decoder; | ||
| 14 | + this.joiner = builder.joiner; | ||
| 15 | + } | ||
| 16 | + | ||
| 17 | + public static Builder builder() { | ||
| 18 | + return new Builder(); | ||
| 16 | } | 19 | } |
| 17 | 20 | ||
| 18 | public String getEncoder() { | 21 | public String getEncoder() { |
| @@ -26,4 +29,29 @@ public class OnlineTransducerModelConfig { | @@ -26,4 +29,29 @@ public class OnlineTransducerModelConfig { | ||
| 26 | public String getJoiner() { | 29 | public String getJoiner() { |
| 27 | return joiner; | 30 | return joiner; |
| 28 | } | 31 | } |
| 32 | + | ||
| 33 | + public static class Builder { | ||
| 34 | + private String encoder = ""; | ||
| 35 | + private String decoder = ""; | ||
| 36 | + private String joiner = ""; | ||
| 37 | + | ||
| 38 | + public OnlineTransducerModelConfig build() { | ||
| 39 | + return new OnlineTransducerModelConfig(this); | ||
| 40 | + } | ||
| 41 | + | ||
| 42 | + public Builder setEncoder(String encoder) { | ||
| 43 | + this.encoder = encoder; | ||
| 44 | + return this; | ||
| 45 | + } | ||
| 46 | + | ||
| 47 | + public Builder setDecoder(String decoder) { | ||
| 48 | + this.decoder = decoder; | ||
| 49 | + return this; | ||
| 50 | + } | ||
| 51 | + | ||
| 52 | + public Builder setJoiner(String joiner) { | ||
| 53 | + this.joiner = joiner; | ||
| 54 | + return this; | ||
| 55 | + } | ||
| 56 | + } | ||
| 29 | } | 57 | } |
| 1 | +// Copyright 2024 Xiaomi Corporation | ||
| 1 | package com.k2fsa.sherpa.onnx; | 2 | package com.k2fsa.sherpa.onnx; |
| 2 | 3 | ||
| 3 | public class OnlineZipformer2CtcModelConfig { | 4 | public class OnlineZipformer2CtcModelConfig { |
| 4 | private final String model; | 5 | private final String model; |
| 5 | 6 | ||
| 6 | - public OnlineZipformer2CtcModelConfig(String model) { | ||
| 7 | - this.model = model; | 7 | + private OnlineZipformer2CtcModelConfig(Builder builder) { |
| 8 | + this.model = builder.model; | ||
| 9 | + } | ||
| 10 | + | ||
| 11 | + public static Builder builder() { | ||
| 12 | + return new Builder(); | ||
| 8 | } | 13 | } |
| 9 | 14 | ||
| 10 | public String getModel() { | 15 | public String getModel() { |
| 11 | return model; | 16 | return model; |
| 12 | } | 17 | } |
| 13 | 18 | ||
| 19 | + public static class Builder { | ||
| 20 | + private String model = ""; | ||
| 21 | + | ||
| 22 | + public OnlineZipformer2CtcModelConfig build() { | ||
| 23 | + return new OnlineZipformer2CtcModelConfig(this); | ||
| 24 | + } | ||
| 25 | + | ||
| 26 | + public Builder setModel(String model) { | ||
| 27 | + this.model = model; | ||
| 28 | + return this; | ||
| 29 | + } | ||
| 30 | + } | ||
| 14 | } | 31 | } |
| 1 | +// Copyright 2024 Xiaomi Corporation | ||
| 2 | +package com.k2fsa.sherpa.onnx; | ||
| 3 | + | ||
| 4 | +public class WaveReader { | ||
| 5 | + static { | ||
| 6 | + System.loadLibrary("sherpa-onnx-jni"); | ||
| 7 | + } | ||
| 8 | + | ||
| 9 | + private final int sampleRate; | ||
| 10 | + private final float[] samples; | ||
| 11 | + | ||
| 12 | + // It supports only single channel, 16-bit wave file. | ||
| 13 | + // It will exit the program if the given file has a wrong format | ||
| 14 | + public WaveReader(String filename) { | ||
| 15 | + Object[] arr = readWaveFromFile(filename); | ||
| 16 | + samples = (float[]) arr[0]; | ||
| 17 | + sampleRate = (int) arr[1]; | ||
| 18 | + } | ||
| 19 | + | ||
| 20 | + public int getSampleRate() { | ||
| 21 | + return sampleRate; | ||
| 22 | + } | ||
| 23 | + | ||
| 24 | + public float[] getSamples() { | ||
| 25 | + return samples; | ||
| 26 | + } | ||
| 27 | + | ||
| 28 | + private native Object[] readWaveFromFile(String filename); | ||
| 29 | +} |
| @@ -21,6 +21,7 @@ set(sources | @@ -21,6 +21,7 @@ set(sources | ||
| 21 | speaker-embedding-manager.cc | 21 | speaker-embedding-manager.cc |
| 22 | spoken-language-identification.cc | 22 | spoken-language-identification.cc |
| 23 | voice-activity-detector.cc | 23 | voice-activity-detector.cc |
| 24 | + wave-reader.cc | ||
| 24 | ) | 25 | ) |
| 25 | 26 | ||
| 26 | if(SHERPA_ONNX_ENABLE_TTS) | 27 | if(SHERPA_ONNX_ENABLE_TTS) |
| @@ -8,7 +8,6 @@ | @@ -8,7 +8,6 @@ | ||
| 8 | 8 | ||
| 9 | #include "sherpa-onnx/csrc/macros.h" | 9 | #include "sherpa-onnx/csrc/macros.h" |
| 10 | #include "sherpa-onnx/csrc/onnx-utils.h" | 10 | #include "sherpa-onnx/csrc/onnx-utils.h" |
| 11 | -#include "sherpa-onnx/csrc/wave-reader.h" | ||
| 12 | #include "sherpa-onnx/csrc/wave-writer.h" | 11 | #include "sherpa-onnx/csrc/wave-writer.h" |
| 13 | #include "sherpa-onnx/jni/common.h" | 12 | #include "sherpa-onnx/jni/common.h" |
| 14 | 13 | ||
| @@ -43,69 +42,6 @@ JNIEXPORT jboolean JNICALL Java_com_k2fsa_sherpa_onnx_GeneratedAudio_saveImpl( | @@ -43,69 +42,6 @@ JNIEXPORT jboolean JNICALL Java_com_k2fsa_sherpa_onnx_GeneratedAudio_saveImpl( | ||
| 43 | return ok; | 42 | return ok; |
| 44 | } | 43 | } |
| 45 | 44 | ||
| 46 | -static jobjectArray ReadWaveImpl(JNIEnv *env, std::istream &is, | ||
| 47 | - const char *p_filename) { | ||
| 48 | - bool is_ok = false; | ||
| 49 | - int32_t sampling_rate = -1; | ||
| 50 | - std::vector<float> samples = | ||
| 51 | - sherpa_onnx::ReadWave(is, &sampling_rate, &is_ok); | ||
| 52 | - | ||
| 53 | - if (!is_ok) { | ||
| 54 | - SHERPA_ONNX_LOGE("Failed to read %s", p_filename); | ||
| 55 | - exit(-1); | ||
| 56 | - } | ||
| 57 | - | ||
| 58 | - jfloatArray samples_arr = env->NewFloatArray(samples.size()); | ||
| 59 | - env->SetFloatArrayRegion(samples_arr, 0, samples.size(), samples.data()); | ||
| 60 | - | ||
| 61 | - jobjectArray obj_arr = (jobjectArray)env->NewObjectArray( | ||
| 62 | - 2, env->FindClass("java/lang/Object"), nullptr); | ||
| 63 | - | ||
| 64 | - env->SetObjectArrayElement(obj_arr, 0, samples_arr); | ||
| 65 | - env->SetObjectArrayElement(obj_arr, 1, NewInteger(env, sampling_rate)); | ||
| 66 | - | ||
| 67 | - return obj_arr; | ||
| 68 | -} | ||
| 69 | - | ||
| 70 | -SHERPA_ONNX_EXTERN_C | ||
| 71 | -JNIEXPORT jobjectArray JNICALL | ||
| 72 | -Java_com_k2fsa_sherpa_onnx_WaveReader_00024Companion_readWaveFromFile( | ||
| 73 | - JNIEnv *env, jclass /*cls*/, jstring filename) { | ||
| 74 | - const char *p_filename = env->GetStringUTFChars(filename, nullptr); | ||
| 75 | - std::ifstream is(p_filename, std::ios::binary); | ||
| 76 | - | ||
| 77 | - auto obj_arr = ReadWaveImpl(env, is, p_filename); | ||
| 78 | - | ||
| 79 | - env->ReleaseStringUTFChars(filename, p_filename); | ||
| 80 | - | ||
| 81 | - return obj_arr; | ||
| 82 | -} | ||
| 83 | - | ||
| 84 | -SHERPA_ONNX_EXTERN_C | ||
| 85 | -JNIEXPORT jobjectArray JNICALL | ||
| 86 | -Java_com_k2fsa_sherpa_onnx_WaveReader_00024Companion_readWaveFromAsset( | ||
| 87 | - JNIEnv *env, jclass /*cls*/, jobject asset_manager, jstring filename) { | ||
| 88 | - const char *p_filename = env->GetStringUTFChars(filename, nullptr); | ||
| 89 | -#if __ANDROID_API__ >= 9 | ||
| 90 | - AAssetManager *mgr = AAssetManager_fromJava(env, asset_manager); | ||
| 91 | - if (!mgr) { | ||
| 92 | - SHERPA_ONNX_LOGE("Failed to get asset manager: %p", mgr); | ||
| 93 | - exit(-1); | ||
| 94 | - } | ||
| 95 | - std::vector<char> buffer = sherpa_onnx::ReadFile(mgr, p_filename); | ||
| 96 | - | ||
| 97 | - std::istrstream is(buffer.data(), buffer.size()); | ||
| 98 | -#else | ||
| 99 | - std::ifstream is(p_filename, std::ios::binary); | ||
| 100 | -#endif | ||
| 101 | - | ||
| 102 | - auto obj_arr = ReadWaveImpl(env, is, p_filename); | ||
| 103 | - | ||
| 104 | - env->ReleaseStringUTFChars(filename, p_filename); | ||
| 105 | - | ||
| 106 | - return obj_arr; | ||
| 107 | -} | ||
| 108 | - | ||
| 109 | #if 0 | 45 | #if 0 |
| 110 | SHERPA_ONNX_EXTERN_C | 46 | SHERPA_ONNX_EXTERN_C |
| 111 | JNIEXPORT void JNICALL | 47 | JNIEXPORT void JNICALL |
sherpa-onnx/jni/wave-reader.cc
0 → 100644
| 1 | +// sherpa-onnx/jni/wave-reader.cc | ||
| 2 | +// | ||
| 3 | +// Copyright (c) 2024 Xiaomi Corporation | ||
| 4 | +#include "sherpa-onnx/csrc/wave-reader.h" | ||
| 5 | + | ||
| 6 | +#include <fstream> | ||
| 7 | + | ||
| 8 | +#include "sherpa-onnx/csrc/macros.h" | ||
| 9 | +#include "sherpa-onnx/jni/common.h" | ||
| 10 | + | ||
| 11 | +static jobjectArray ReadWaveImpl(JNIEnv *env, std::istream &is, | ||
| 12 | + const char *p_filename) { | ||
| 13 | + bool is_ok = false; | ||
| 14 | + int32_t sampling_rate = -1; | ||
| 15 | + std::vector<float> samples = | ||
| 16 | + sherpa_onnx::ReadWave(is, &sampling_rate, &is_ok); | ||
| 17 | + | ||
| 18 | + if (!is_ok) { | ||
| 19 | + SHERPA_ONNX_LOGE("Failed to read '%s'", p_filename); | ||
| 20 | + exit(-1); | ||
| 21 | + } | ||
| 22 | + | ||
| 23 | + jfloatArray samples_arr = env->NewFloatArray(samples.size()); | ||
| 24 | + env->SetFloatArrayRegion(samples_arr, 0, samples.size(), samples.data()); | ||
| 25 | + | ||
| 26 | + jobjectArray obj_arr = (jobjectArray)env->NewObjectArray( | ||
| 27 | + 2, env->FindClass("java/lang/Object"), nullptr); | ||
| 28 | + | ||
| 29 | + env->SetObjectArrayElement(obj_arr, 0, samples_arr); | ||
| 30 | + env->SetObjectArrayElement(obj_arr, 1, NewInteger(env, sampling_rate)); | ||
| 31 | + | ||
| 32 | + return obj_arr; | ||
| 33 | +} | ||
| 34 | + | ||
| 35 | +SHERPA_ONNX_EXTERN_C | ||
| 36 | +JNIEXPORT jobjectArray JNICALL | ||
| 37 | +Java_com_k2fsa_sherpa_onnx_WaveReader_00024Companion_readWaveFromFile( | ||
| 38 | + JNIEnv *env, jclass /*cls*/, jstring filename) { | ||
| 39 | + const char *p_filename = env->GetStringUTFChars(filename, nullptr); | ||
| 40 | + std::ifstream is(p_filename, std::ios::binary); | ||
| 41 | + | ||
| 42 | + auto obj_arr = ReadWaveImpl(env, is, p_filename); | ||
| 43 | + | ||
| 44 | + env->ReleaseStringUTFChars(filename, p_filename); | ||
| 45 | + | ||
| 46 | + return obj_arr; | ||
| 47 | +} | ||
| 48 | + | ||
| 49 | +SHERPA_ONNX_EXTERN_C | ||
| 50 | +JNIEXPORT jobjectArray JNICALL | ||
| 51 | +Java_com_k2fsa_sherpa_onnx_WaveReader_readWaveFromFile(JNIEnv *env, | ||
| 52 | + jclass /*obj*/, | ||
| 53 | + jstring filename) { | ||
| 54 | + return Java_com_k2fsa_sherpa_onnx_WaveReader_00024Companion_readWaveFromFile( | ||
| 55 | + env, nullptr, filename); | ||
| 56 | +} | ||
| 57 | + | ||
| 58 | +SHERPA_ONNX_EXTERN_C | ||
| 59 | +JNIEXPORT jobjectArray JNICALL | ||
| 60 | +Java_com_k2fsa_sherpa_onnx_WaveReader_00024Companion_readWaveFromAsset( | ||
| 61 | + JNIEnv *env, jclass /*cls*/, jobject asset_manager, jstring filename) { | ||
| 62 | + const char *p_filename = env->GetStringUTFChars(filename, nullptr); | ||
| 63 | +#if __ANDROID_API__ >= 9 | ||
| 64 | + AAssetManager *mgr = AAssetManager_fromJava(env, asset_manager); | ||
| 65 | + if (!mgr) { | ||
| 66 | + SHERPA_ONNX_LOGE("Failed to get asset manager: %p", mgr); | ||
| 67 | + exit(-1); | ||
| 68 | + } | ||
| 69 | + std::vector<char> buffer = sherpa_onnx::ReadFile(mgr, p_filename); | ||
| 70 | + | ||
| 71 | + std::istrstream is(buffer.data(), buffer.size()); | ||
| 72 | +#else | ||
| 73 | + std::ifstream is(p_filename, std::ios::binary); | ||
| 74 | +#endif | ||
| 75 | + | ||
| 76 | + auto obj_arr = ReadWaveImpl(env, is, p_filename); | ||
| 77 | + | ||
| 78 | + env->ReleaseStringUTFChars(filename, p_filename); | ||
| 79 | + | ||
| 80 | + return obj_arr; | ||
| 81 | +} |
-
请 注册 或 登录 后发表评论