Fangjun Kuang
Committed by GitHub

Refactor Java API (#806)

正在显示 42 个修改的文件 包含 1004 行增加964 行删除
@@ -11,6 +11,7 @@ on: @@ -11,6 +11,7 @@ on:
11 - 'java-api-examples/**' 11 - 'java-api-examples/**'
12 - 'sherpa-onnx/csrc/*' 12 - 'sherpa-onnx/csrc/*'
13 - 'sherpa-onnx/jni/*' 13 - 'sherpa-onnx/jni/*'
  14 + - 'sherpa-onnx/java-api/**'
14 pull_request: 15 pull_request:
15 branches: 16 branches:
16 - master 17 - master
@@ -21,6 +22,7 @@ on: @@ -21,6 +22,7 @@ on:
21 - 'java-api-examples/**' 22 - 'java-api-examples/**'
22 - 'sherpa-onnx/csrc/*' 23 - 'sherpa-onnx/csrc/*'
23 - 'sherpa-onnx/jni/*' 24 - 'sherpa-onnx/jni/*'
  25 + - 'sherpa-onnx/java-api/**'
24 workflow_dispatch: 26 workflow_dispatch:
25 27
26 concurrency: 28 concurrency:
@@ -46,7 +48,7 @@ jobs: @@ -46,7 +48,7 @@ jobs:
46 - name: ccache 48 - name: ccache
47 uses: hendrikmuhs/ccache-action@v1.2 49 uses: hendrikmuhs/ccache-action@v1.2
48 with: 50 with:
49 - key: ${{ matrix.os }} 51 + key: ${{ matrix.os }}-java
50 52
51 - name: Display java version 53 - name: Display java version
52 shell: bash 54 shell: bash
@@ -54,6 +56,42 @@ jobs: @@ -54,6 +56,42 @@ jobs:
54 java -version 56 java -version
55 echo "JAVA_HOME is: ${JAVA_HOME}" 57 echo "JAVA_HOME is: ${JAVA_HOME}"
56 58
  59 + cmake --version
  60 +
  61 + - name: Build sherpa-onnx (jar)
  62 + shell: bash
  63 + run: |
  64 + cd sherpa-onnx/java-api/
  65 + make
  66 + ls -lh
  67 +
  68 + - uses: actions/upload-artifact@v4
  69 + with:
  70 + name: sherpa-onnx-jar-${{ matrix.os }}
  71 + path: sherpa-onnx/java-api/build
  72 +
  73 + - name: Build sherpa-onnx (C++)
  74 + shell: bash
  75 + run: |
  76 + export CMAKE_CXX_COMPILER_LAUNCHER=ccache
  77 + export PATH="/usr/lib/ccache:/usr/local/opt/ccache/libexec:$PATH"
  78 +
  79 + mkdir build
  80 + cd build
  81 +
  82 + cmake \
  83 + -DSHERPA_ONNX_ENABLE_PYTHON=OFF \
  84 + -DSHERPA_ONNX_ENABLE_TESTS=OFF \
  85 + -DSHERPA_ONNX_ENABLE_CHECK=OFF \
  86 + -DBUILD_SHARED_LIBS=ON \
  87 + -DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \
  88 + -DSHERPA_ONNX_ENABLE_BINARY=OFF \
  89 + -DSHERPA_ONNX_ENABLE_JNI=ON \
  90 + ..
  91 +
  92 + make -j4
  93 + ls -lh lib
  94 +
57 - name: Run java test 95 - name: Run java test
58 shell: bash 96 shell: bash
59 run: | 97 run: |
@@ -62,4 +100,12 @@ jobs: @@ -62,4 +100,12 @@ jobs:
62 cmake --version 100 cmake --version
63 101
64 cd ./java-api-examples 102 cd ./java-api-examples
65 - ./runtest.sh 103 + ./run-streaming-decode-file-ctc.sh
  104 + # Delete model files to save space
  105 + rm -rf sherpa-onnx-streaming-*
  106 +
  107 + ./run-streaming-decode-file-paraformer.sh
  108 + rm -rf sherpa-onnx-streaming-*
  109 +
  110 + ./run-streaming-decode-file-transducer.sh
  111 + rm -rf sherpa-onnx-streaming-*
@@ -66,11 +66,11 @@ jobs: @@ -66,11 +66,11 @@ jobs:
66 - os: macos-14 66 - os: macos-14
67 python-version: "3.12" 67 python-version: "3.12"
68 68
69 - - os: windows-2019 69 + - os: windows-2022
70 python-version: "3.7" 70 python-version: "3.7"
71 - - os: windows-2019 71 + - os: windows-2022
72 python-version: "3.8" 72 python-version: "3.8"
73 - - os: windows-2019 73 + - os: windows-2022
74 python-version: "3.9" 74 python-version: "3.9"
75 75
76 - os: windows-2022 76 - os: windows-2022
1 lib 1 lib
2 hs_err* 2 hs_err*
  3 +!run-streaming*.sh
1 -ENTRY_POINT = ./  
2 -  
3 -LIB_SRC_DIR := ../sherpa-onnx/java-api/src/com/k2fsa/sherpa/onnx  
4 -  
5 -LIB_FILES = \  
6 - $(LIB_SRC_DIR)/EndpointRule.java \  
7 - $(LIB_SRC_DIR)/EndpointConfig.java \  
8 - $(LIB_SRC_DIR)/FeatureConfig.java \  
9 - $(LIB_SRC_DIR)/OnlineLMConfig.java \  
10 - $(LIB_SRC_DIR)/OnlineTransducerModelConfig.java \  
11 - $(LIB_SRC_DIR)/OnlineParaformerModelConfig.java \  
12 - $(LIB_SRC_DIR)/OnlineZipformer2CtcModelConfig.java \  
13 - $(LIB_SRC_DIR)/OnlineModelConfig.java \  
14 - $(LIB_SRC_DIR)/OnlineRecognizerConfig.java \  
15 - $(LIB_SRC_DIR)/OnlineStream.java \  
16 - $(LIB_SRC_DIR)/OnlineRecognizer.java  
17 -  
18 -WEBSOCKET_DIR:= ./src/websocketsrv  
19 -WEBSOCKET_FILES = \  
20 - $(WEBSOCKET_DIR)/ConnectionData.java \  
21 - $(WEBSOCKET_DIR)/DecoderThreadHandler.java \  
22 - $(WEBSOCKET_DIR)/StreamThreadHandler.java \  
23 - $(WEBSOCKET_DIR)/AsrWebsocketServer.java \  
24 - $(WEBSOCKET_DIR)/AsrWebsocketClient.java \  
25 -  
26 -  
27 -LIB_BUILD_DIR = ./lib  
28 -  
29 -  
30 -EXAMPLE_FILE = DecodeFile.java  
31 -  
32 -EXAMPLE_Mic = DecodeMic.java  
33 -  
34 -JAVAC = javac  
35 -  
36 -BUILD_DIR = build  
37 -  
38 -  
39 -RUNJFLAGS = -Dfile.encoding=utf-8  
40 -  
41 -vpath %.class $(BUILD_DIR)  
42 -vpath %.java src  
43 -  
44 -  
45 -buildfile:  
46 - $(JAVAC) -cp lib/sherpaonnx.jar -d $(BUILD_DIR) -encoding UTF-8 src/$(EXAMPLE_FILE)  
47 -  
48 -buildmic:  
49 - $(JAVAC) -cp lib/sherpaonnx.jar -d $(BUILD_DIR) -encoding UTF-8 src/$(EXAMPLE_Mic)  
50 -  
51 -rebuild: clean all  
52 -  
53 -.PHONY: clean run downjar  
54 -  
55 -downjar:  
56 - wget https://repo1.maven.org/maven2/org/slf4j/slf4j-api/1.7.25/slf4j-api-1.7.25.jar -P ./lib/  
57 - wget https://repo1.maven.org/maven2/org/slf4j/slf4j-simple/1.7.25/slf4j-simple-1.7.25.jar -P ./lib/  
58 - wget https://github.com/TooTallNate/Java-WebSocket/releases/download/v1.5.3/Java-WebSocket-1.5.3.jar -P ./lib/  
59 -  
60 -  
61 -clean:  
62 - rm -frv $(BUILD_DIR)/*  
63 - rm -frv $(LIB_BUILD_DIR)/*  
64 - mkdir -p $(BUILD_DIR)  
65 - mkdir -p ./lib  
66 -  
67 -runfile: packjar buildfile  
68 - java -cp ./lib/sherpaonnx.jar:build $(RUNJFLAGS) DecodeFile test.wav  
69 -  
70 -runhotwords:  
71 - java -cp ./lib/sherpaonnx.jar:build $(RUNJFLAGS) DecodeFile hotwords.wav  
72 -  
73 -runmic:  
74 - java -cp ./lib/sherpaonnx.jar:build $(RUNJFLAGS) DecodeMic  
75 -  
76 -runsrv:  
77 - java -cp $(BUILD_DIR):lib/Java-WebSocket-1.5.3.jar:lib/slf4j-simple-1.7.25.jar:lib/slf4j-api-1.7.25.jar:../lib/sherpaonnx.jar $(RUNJFLAGS) websocketsrv.AsrWebsocketServer $(shell pwd)/../build/lib/libsherpa-onnx-jni.so ./modeltest.cfg  
78 -  
79 -runclient:  
80 - java -cp $(BUILD_DIR):lib/Java-WebSocket-1.5.3.jar:lib/slf4j-simple-1.7.25.jar:lib/slf4j-api-1.7.25.jar:../lib/sherpaonnx.jar $(RUNJFLAGS) websocketsrv.AsrWebsocketClient $(shell pwd)/../build/lib/libsherpa-onnx-jni.so 127.0.0.1 8890 ./test.wav 32  
81 -  
82 -runclienthotwords:  
83 - java -cp $(BUILD_DIR):lib/Java-WebSocket-1.5.3.jar:lib/slf4j-simple-1.7.25.jar:lib/slf4j-api-1.7.25.jar:../lib/sherpaonnx.jar $(RUNJFLAGS) websocketsrv.AsrWebsocketClient $(shell pwd)/../build/lib/libsherpa-onnx-jni.so 127.0.0.1 8890 ./hotwords.wav 32  
84 -  
85 -buildlib: $(LIB_FILES:.java=.class)  
86 -  
87 -  
88 -%.class: %.java  
89 - $(JAVAC) -cp $(BUILD_DIR) -d $(BUILD_DIR) -encoding UTF-8 $<  
90 -  
91 -buildwebsocket: $(WEBSOCKET_FILES:.java=.class)  
92 -  
93 -  
94 -%.class: %.java  
95 -  
96 - $(JAVAC) -cp $(BUILD_DIR):lib/slf4j-simple-1.7.25.jar:lib/slf4j-api-1.7.25.jar:lib/Java-WebSocket-1.5.3.jar:../lib/sherpaonnx.jar -d $(BUILD_DIR) -encoding UTF-8 $<  
97 -  
98 -packjar: buildlib  
99 - jar cvfe lib/sherpaonnx.jar . -C $(BUILD_DIR) .  
100 -  
101 -all: clean buildlib packjar buildfile buildmic downjar buildwebsocket  
1 -0.Introduction  
2 --------------- 1 +# Introduction
3 2
4 -Java wrapper `com.k2fsa.sherpa.onnx.OnlineRecognizer` for `sherpa-onnx`. Java is a cross-platform language; you can build jni .so lib according to your system, and then use the same java api for all your platform.  
5 -now support multiple threads for websocket server 3 +This directory contains examples for the JAVA API of sherpa-onnx.
6 4
7 -```xml  
8 -Depend on:  
9 - Openjdk 1.8  
10 -```  
11 -  
12 ----  
13 -  
14 -1.Compile libsherpa-onnx-jni.so  
15 --------------------------------  
16 -  
17 -Compile sherpa-onnx/jni/jni.cc according to your system.  
18 -Example for Ubuntu 18.04 LTS, Openjdk 1.8.0_362:  
19 -  
20 -```xml  
21 - git clone https://github.com/k2-fsa/sherpa-onnx  
22 - cd sherpa-onnx  
23 - mkdir build  
24 - cd build  
25 - cmake -DCMAKE_BUILD_TYPE=Release -DBUILD_SHARED_LIBS=ON -DSHERPA_ONNX_ENABLE_JNI=ON ..  
26 - make -j6  
27 -```  
28 -  
29 ----  
30 -  
31 -2.Download asr model files  
32 ---------------------------  
33 -  
34 -[click here for more detail](https://k2-fsa.github.io/sherpa/onnx/pretrained_models/index.html)  
35 ---------------------------  
36 -  
37 -3.Config model config.cfg  
38 --------------------------  
39 -/**change model path in config.cfg according to your env**/  
40 -```xml  
41 - #model config  
42 - sample_rate=16000  
43 - feature_dim=80  
44 - rule1_min_trailing_silence=2.4  
45 - rule2_min_trailing_silence=1.2  
46 - rule3_min_utterance_length=20  
47 - encoder=/sherpa-onnx/build/bin/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/encoder-epoch-99-avg-1.onnx  
48 - decoder=/sherpa-onnx/build/bin/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/decoder-epoch-99-avg-1.onnx  
49 - joiner=/sherpa-onnx/build/bin/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/joiner-epoch-99-avg-1.onnx  
50 - tokens=/sherpa-onnx/build/bin/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/tokens.txt  
51 - num_threads=4  
52 - enable_endpoint_detection=false  
53 - decoding_method=greedy_search  
54 - max_active_paths=4  
55 -  
56 - #websocket server config  
57 - port=8890  
58 - #number of threads pool for network io  
59 - connection_thread_num=16  
60 - #number of threads pool for stream  
61 - stream_thread_num=16  
62 - #number of threads pool for decoder  
63 - decoder_thread_num=16  
64 - #size of streams for parallel decoding  
65 - parallel_decoder_num=16  
66 - #time(ms) idle for decoder thread when no job  
67 - decoder_time_idle=10  
68 - #time(ms) out for connection data  
69 - deocder_time_out=3000  
70 -``` 5 +# Usage
71 6
72 ----  
73 -  
74 -4.A simple java example  
75 ------------------------  
76 -  
77 -refer to [java_api_example](https://github.com/k2-fsa/sherpa-onnx/blob/master/java-api-examples/src/DecodeFile.java) for more detail.  
78 -  
79 -```java  
80 - import com.k2fsa.sherpa.onnx.OnlineRecognizer;  
81 - import com.k2fsa.sherpa.onnx.OnlineStream;  
82 - String cfgpath=appdir+"/modelconfig.cfg";  
83 - OnlineRecognizer.setSoPath(soPath); //set so lib path  
84 -  
85 - OnlineRecognizer rcgOjb = new OnlineRecognizer(); //create a recognizer  
86 - rcgOjb = new OnlineRecognizer(cfgFile); //set model config file  
87 - CreateStream streamObj=rcgOjb.CreateStream(); //create a stream for read wav data  
88 - float[] buffer = rcgOjb.readWavFile(wavfilename); // read data from file  
89 - streamObj.acceptWaveform(buffer); // feed stream with data  
90 - streamObj.inputFinished(); // tell engine you done with all data  
91 - OnlineStream ssObj[] = new OnlineStream[1];  
92 - while (rcgOjb.isReady(streamObj)) { // engine is ready for unprocessed data  
93 - ssObj[0] = streamObj;  
94 - rcgOjb.decodeStreams(ssObj); // decode for multiple stream  
95 - // rcgOjb.DecodeStream(streamObj); // decode for single stream  
96 - }  
97 -  
98 - String recText = "simple:" + rcgOjb.getResult(streamObj) + "\n";  
99 - byte[] utf8Data = recText.getBytes(StandardCharsets.UTF_8);  
100 - System.out.println(new String(utf8Data));  
101 - rcgOjb.reSet(streamObj);  
102 - rcgOjb.releaseStream(streamObj); // release stream  
103 - rcgOjb.release(); // release recognizer  
104 ``` 7 ```
105 -  
106 ----  
107 -  
108 -5.Makefile  
109 -----------  
110 -  
111 -OS Ubuntu 18.04 LTS  
112 -Build package path: /sherpa-onnx/java-api-examples/lib/sherpaonnx.jar  
113 -  
114 -5.1 Build  
115 -  
116 -```bash  
117 - cd sherpa-onnx/java-api-examples  
118 - make all 8 +./run-streaming-decode-file-ctc.sh
  9 +./run-streaming-decode-file-paraformer.sh
  10 +./run-streaming-decode-file-transducer.sh
119 ``` 11 ```
120 -  
121 -5.2 Run DecodeFile example  
122 -  
123 -```bash  
124 - make runfile  
125 -```  
126 -  
127 -5.3 Run DecodeMic example  
128 -  
129 -```bash  
130 - make runmic  
131 -```  
132 -  
133 ----  
134 -  
135 -6.WebSocket Server  
136 -----------  
137 -  
138 -support multiple threads for websocket server  
139 -6.0 Protocol for communication  
140 -1) client connect to server  
141 -```shell  
142 - ws client -> srv ws address  
143 - ws address example: ws://127.0.0.1:8889/  
144 -```  
145 -2) client send 16k pcm_s16le binary stream data to server  
146 -```shell  
147 - PCM sampleRate 16000  
148 - single channel  
149 - sampleSize 16bit  
150 - little endian  
151 - type short  
152 -```  
153 -3) client send "Done" text to server when all data is sent  
154 -```shell  
155 - ws_socket.send("Done")  
156 -```  
157 -4) client will receive json message from server whenever asr engine decoded new text  
158 -```shell  
159 - json example: {"text":"甚至出现交易几乎停滞的情况","eof":false"}  
160 -```  
161 -  
162 -  
163 -6.1 Build  
164 -  
165 -```bash  
166 - cd sherpa-onnx/java-api-examples  
167 - make all  
168 -```  
169 -  
170 -6.2 Run srv example  
171 -  
172 -usage: AsrWebsocketServer soPath modelCfgPath  
173 -  
174 -```bash  
175 - make runsrv /**change path in Makefile according to your env**/  
176 -```  
177 -  
178 -6.3 Run multiple threads client example  
179 -  
180 -usage: AsrWebsocketClient soPath srvIp srvPort wavPath numThreads  
181 -  
182 -json result example: {"text":"甚至出现交易几乎停滞的情况","eof":"true"}  
183 -  
184 -```bash  
185 - make runclient /**change path in Makefile according to your env**/  
186 -```  
187 -  
188 -7 runtest  
189 -this script will download model, compile codes and run test  
190 -```bash  
191 - cd sherpa-onnx/java-api-examples  
192 - runtest.sh  
193 -```  
  1 +// Copyright 2022-2023 by zhaoming
  2 +// Copyright 2024 Xiaomi Corporation
  3 +
  4 +// This file shows how to use an online CTC model, i.e., streaming CTC model,
  5 +// to decode files.
  6 +import com.k2fsa.sherpa.onnx.*;
  7 +
  8 +public class StreamingDecodeFileCtc {
  9 + public static void main(String[] args) {
  10 + // please refer to
  11 + // https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18.tar.bz2
  12 + // to download model files
  13 + String model =
  14 + "./sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18/ctc-epoch-30-avg-3-chunk-16-left-128.int8.onnx";
  15 + String tokens = "./sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18/tokens.txt";
  16 + String waveFilename = "./sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18/test_wavs/8k.wav";
  17 +
  18 + WaveReader reader = new WaveReader(waveFilename);
  19 + System.out.println(reader.getSampleRate());
  20 + System.out.println(reader.getSamples().length);
  21 +
  22 + OnlineZipformer2CtcModelConfig ctc =
  23 + OnlineZipformer2CtcModelConfig.builder().setModel(model).build();
  24 +
  25 + OnlineModelConfig modelConfig =
  26 + OnlineModelConfig.builder()
  27 + .setZipformer2Ctc(ctc)
  28 + .setTokens(tokens)
  29 + .setNumThreads(1)
  30 + .setDebug(true)
  31 + .build();
  32 +
  33 + OnlineRecognizerConfig config =
  34 + OnlineRecognizerConfig.builder()
  35 + .setOnlineModelConfig(modelConfig)
  36 + .setDecodingMethod("greedy_search")
  37 + .build();
  38 +
  39 + OnlineRecognizer recognizer = new OnlineRecognizer(config);
  40 + OnlineStream stream = recognizer.createStream();
  41 + stream.acceptWaveform(reader.getSamples(), reader.getSampleRate());
  42 +
  43 + float[] tailPaddings = new float[(int) (0.3 * reader.getSampleRate())];
  44 + stream.acceptWaveform(tailPaddings, reader.getSampleRate());
  45 +
  46 + while (recognizer.isReady(stream)) {
  47 + recognizer.decode(stream);
  48 + }
  49 +
  50 + String text = recognizer.getResult(stream).getText();
  51 +
  52 + System.out.printf("filename:%s\nresult:%s\n", waveFilename, text);
  53 +
  54 + stream.release();
  55 + recognizer.release();
  56 + }
  57 +}
  1 +// Copyright 2022-2023 by zhaoming
  2 +// Copyright 2024 Xiaomi Corporation
  3 +
  4 +// This file shows how to use an online paraformer, i.e., streaming paraformer,
  5 +// to decode files.
  6 +import com.k2fsa.sherpa.onnx.*;
  7 +
  8 +public class StreamingDecodeFileParaformer {
  9 + public static void main(String[] args) {
  10 + // please refer to
  11 + // https://k2-fsa.github.io/sherpa/onnx/pretrained_models/online-paraformer/paraformer-models.html#csukuangfj-sherpa-onnx-streaming-paraformer-bilingual-zh-en-chinese-english
  12 + // to download model files
  13 + String encoder = "./sherpa-onnx-streaming-paraformer-bilingual-zh-en/encoder.int8.onnx";
  14 + String decoder = "./sherpa-onnx-streaming-paraformer-bilingual-zh-en/decoder.int8.onnx";
  15 + String tokens = "./sherpa-onnx-streaming-paraformer-bilingual-zh-en/tokens.txt";
  16 + String waveFilename = "./sherpa-onnx-streaming-paraformer-bilingual-zh-en/test_wavs/2.wav";
  17 +
  18 + WaveReader reader = new WaveReader(waveFilename);
  19 + System.out.println(reader.getSampleRate());
  20 + System.out.println(reader.getSamples().length);
  21 +
  22 + OnlineParaformerModelConfig paraformer =
  23 + OnlineParaformerModelConfig.builder().setEncoder(encoder).setDecoder(decoder).build();
  24 +
  25 + OnlineModelConfig modelConfig =
  26 + OnlineModelConfig.builder()
  27 + .setParaformer(paraformer)
  28 + .setTokens(tokens)
  29 + .setNumThreads(1)
  30 + .setDebug(true)
  31 + .build();
  32 +
  33 + OnlineRecognizerConfig config =
  34 + OnlineRecognizerConfig.builder()
  35 + .setOnlineModelConfig(modelConfig)
  36 + .setDecodingMethod("greedy_search")
  37 + .build();
  38 +
  39 + OnlineRecognizer recognizer = new OnlineRecognizer(config);
  40 + OnlineStream stream = recognizer.createStream();
  41 + stream.acceptWaveform(reader.getSamples(), reader.getSampleRate());
  42 +
  43 + float[] tailPaddings = new float[(int) (0.8 * reader.getSampleRate())];
  44 + stream.acceptWaveform(tailPaddings, reader.getSampleRate());
  45 +
  46 + while (recognizer.isReady(stream)) {
  47 + recognizer.decode(stream);
  48 + }
  49 +
  50 + String text = recognizer.getResult(stream).getText();
  51 +
  52 + System.out.printf("filename:%s\nresult:%s\n", waveFilename, text);
  53 +
  54 + stream.release();
  55 + recognizer.release();
  56 + }
  57 +}
  1 +// Copyright 2022-2023 by zhaoming
  2 +// Copyright 2024 Xiaomi Corporation
  3 +
  4 +// This file shows how to use an online transducer, i.e., streaming transducer,
  5 +// to decode files.
  6 +import com.k2fsa.sherpa.onnx.*;
  7 +
  8 +public class StreamingDecodeFileTransducer {
  9 + public static void main(String[] args) {
  10 + // please refer to
  11 + // https://k2-fsa.github.io/sherpa/onnx/pretrained_models/online-transducer/zipformer-transducer-models.html#csukuangfj-sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20-bilingual-chinese-english
  12 + // to download model files
  13 + String encoder =
  14 + "./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/encoder-epoch-99-avg-1.int8.onnx";
  15 + String decoder =
  16 + "./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/decoder-epoch-99-avg-1.onnx";
  17 + String joiner =
  18 + "./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/joiner-epoch-99-avg-1.onnx";
  19 + String tokens = "./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/tokens.txt";
  20 +
  21 + String waveFilename =
  22 + "./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/test_wavs/0.wav";
  23 +
  24 + WaveReader reader = new WaveReader(waveFilename);
  25 + System.out.println(reader.getSampleRate());
  26 + System.out.println(reader.getSamples().length);
  27 +
  28 + OnlineTransducerModelConfig transducer =
  29 + OnlineTransducerModelConfig.builder()
  30 + .setEncoder(encoder)
  31 + .setDecoder(decoder)
  32 + .setJoiner(joiner)
  33 + .build();
  34 +
  35 + OnlineModelConfig modelConfig =
  36 + OnlineModelConfig.builder()
  37 + .setTransducer(transducer)
  38 + .setTokens(tokens)
  39 + .setNumThreads(1)
  40 + .setDebug(true)
  41 + .build();
  42 +
  43 + OnlineRecognizerConfig config =
  44 + OnlineRecognizerConfig.builder()
  45 + .setOnlineModelConfig(modelConfig)
  46 + .setDecodingMethod("greedy_search")
  47 + .build();
  48 +
  49 + OnlineRecognizer recognizer = new OnlineRecognizer(config);
  50 + OnlineStream stream = recognizer.createStream();
  51 + stream.acceptWaveform(reader.getSamples(), reader.getSampleRate());
  52 +
  53 + float[] tailPaddings = new float[(int) (0.8 * reader.getSampleRate())];
  54 + stream.acceptWaveform(tailPaddings, reader.getSampleRate());
  55 +
  56 + while (recognizer.isReady(stream)) {
  57 + recognizer.decode(stream);
  58 + }
  59 +
  60 + String text = recognizer.getResult(stream).getText();
  61 +
  62 + System.out.printf("filename:%s\nresult:%s\n", waveFilename, text);
  63 +
  64 + stream.release();
  65 + recognizer.release();
  66 + }
  67 +}
1 -#model config  
2 -sample_rate=16000  
3 -feature_dim=80  
4 -rule1_min_trailing_silence=2.4  
5 -rule2_min_trailing_silence=1.2  
6 -rule3_min_utterance_length=20  
7 -encoder=/sherpa/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/encoder-epoch-99-avg-1.onnx  
8 -decoder=/sherpa/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/decoder-epoch-99-avg-1.onnx  
9 -joiner=/sherpa/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/joiner-epoch-99-avg-1.onnx  
10 -tokens=/sherpa/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/tokens.txt  
11 -num_threads=4  
12 -enable_endpoint_detection=true  
13 -decoding_method=modified_beam_search  
14 -max_active_paths=4  
15 -hotwords_file=  
16 -hotwords_score=1.5  
17 -lm_model=  
18 -lm_scale=0.5  
19 -model_type=zipformer  
20 -  
21 -#websocket server config  
22 -port=8890  
23 -connection_thread_num=16  
24 -stream_thread_num=16  
25 -decoder_thread_num=16  
26 -parallel_decoder_num=16  
27 -decoder_time_idle=200  
28 -deocder_time_out=30000  
  1 +#!/usr/bin/env bash
  2 +set -ex
  3 +
  4 +if [[ ! -f ../build/lib/libsherpa-onnx-jni.dylib && ! -f ../build/lib/libsherpa-onnx-jni.so ]]; then
  5 + mkdir -p ../build
  6 + pushd ../build
  7 + cmake \
  8 + -DSHERPA_ONNX_ENABLE_PYTHON=OFF \
  9 + -DSHERPA_ONNX_ENABLE_TESTS=OFF \
  10 + -DSHERPA_ONNX_ENABLE_CHECK=OFF \
  11 + -DBUILD_SHARED_LIBS=ON \
  12 + -DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \
  13 + -DSHERPA_ONNX_ENABLE_JNI=ON \
  14 + ..
  15 +
  16 + make -j4
  17 + ls -lh lib
  18 + popd
  19 +fi
  20 +
  21 +if [ ! -f ../sherpa-onnx/java-api/build/sherpa-onnx.jar ]; then
  22 + pushd ../sherpa-onnx/java-api
  23 + make
  24 + popd
  25 +fi
  26 +
  27 +if [ ! -f ./sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18/tokens.txt ]; then
  28 + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18.tar.bz2
  29 + tar xvf sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18.tar.bz2
  30 + rm sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18.tar.bz2
  31 +fi
  32 +
  33 +java \
  34 + -Djava.library.path=$PWD/../build/lib \
  35 + -cp ../sherpa-onnx/java-api/build/sherpa-onnx.jar \
  36 + StreamingDecodeFileCtc.java
  1 +#!/usr/bin/env bash
  2 +set -ex
  3 +
  4 +if [[ ! -f ../build/lib/libsherpa-onnx-jni.dylib && ! -f ../build/lib/libsherpa-onnx-jni.so ]]; then
  5 + mkdir -p ../build
  6 + pushd ../build
  7 + cmake \
  8 + -DSHERPA_ONNX_ENABLE_PYTHON=OFF \
  9 + -DSHERPA_ONNX_ENABLE_TESTS=OFF \
  10 + -DSHERPA_ONNX_ENABLE_CHECK=OFF \
  11 + -DBUILD_SHARED_LIBS=ON \
  12 + -DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \
  13 + -DSHERPA_ONNX_ENABLE_JNI=ON \
  14 + ..
  15 +
  16 + make -j4
  17 + ls -lh lib
  18 + popd
  19 +fi
  20 +
  21 +if [ ! -f ../sherpa-onnx/java-api/build/sherpa-onnx.jar ]; then
  22 + pushd ../sherpa-onnx/java-api
  23 + make
  24 + popd
  25 +fi
  26 +
  27 +if [ ! -f ./sherpa-onnx-streaming-paraformer-bilingual-zh-en/tokens.txt ]; then
  28 + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-paraformer-bilingual-zh-en.tar.bz2
  29 + tar xvf sherpa-onnx-streaming-paraformer-bilingual-zh-en.tar.bz2
  30 + rm sherpa-onnx-streaming-paraformer-bilingual-zh-en.tar.bz2
  31 +fi
  32 +
  33 +java \
  34 + -Djava.library.path=$PWD/../build/lib \
  35 + -cp ../sherpa-onnx/java-api/build/sherpa-onnx.jar \
  36 + StreamingDecodeFileParaformer.java
  1 +#!/usr/bin/env bash
  2 +
  3 +set -ex
  4 +
  5 +if [[ ! -f ../build/lib/libsherpa-onnx-jni.dylib && ! -f ../build/lib/libsherpa-onnx-jni.so ]]; then
  6 + mkdir -p ../build
  7 + pushd ../build
  8 + cmake \
  9 + -DSHERPA_ONNX_ENABLE_PYTHON=OFF \
  10 + -DSHERPA_ONNX_ENABLE_TESTS=OFF \
  11 + -DSHERPA_ONNX_ENABLE_CHECK=OFF \
  12 + -DBUILD_SHARED_LIBS=ON \
  13 + -DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \
  14 + -DSHERPA_ONNX_ENABLE_JNI=ON \
  15 + ..
  16 +
  17 + make -j4
  18 + ls -lh lib
  19 + popd
  20 +fi
  21 +
  22 +if [ ! -f ../sherpa-onnx/java-api/build/sherpa-onnx.jar ]; then
  23 + pushd ../sherpa-onnx/java-api
  24 + make
  25 + popd
  26 +fi
  27 +
  28 +if [[ ! -f ../build/lib/libsherpa-onnx-jni.dylib && ! -f ../build/lib/libsherpa-onnx-jni.so ]]; then
  29 + cmake \
  30 + -DSHERPA_ONNX_ENABLE_PYTHON=OFF \
  31 + -DSHERPA_ONNX_ENABLE_TESTS=OFF \
  32 + -DSHERPA_ONNX_ENABLE_CHECK=OFF \
  33 + -DBUILD_SHARED_LIBS=ON \
  34 + -DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \
  35 + -DSHERPA_ONNX_ENABLE_JNI=ON \
  36 + ..
  37 +
  38 + make -j4
  39 + ls -lh lib
  40 +fi
  41 +
  42 +if [ ! -f ./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/tokens.txt ]; then
  43 + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2
  44 + tar xvf sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2
  45 + rm sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2
  46 +fi
  47 +
  48 +java \
  49 + -Djava.library.path=$PWD/../build/lib \
  50 + -cp ../sherpa-onnx/java-api/build/sherpa-onnx.jar \
  51 + StreamingDecodeFileTransducer.java
1 -#!/usr/bin/env bash  
2 -#  
3 -# This scripts shows how to test java for sherpa-onnx  
4 -# Note: This scripts runs only on Linux and macOS  
5 -  
6 -set -e  
7 -  
8 -log() {  
9 - # This function is from espnet  
10 - local fname=${BASH_SOURCE[1]##*/}  
11 - echo -e "$(date '+%Y-%m-%d %H:%M:%S') (${fname}:${BASH_LINENO[0]}:${FUNCNAME[1]}) $*"  
12 -}  
13 -  
14 -  
15 -  
16 -  
17 -echo "PATH: $PATH"  
18 -  
19 -  
20 -  
21 -  
22 -  
23 -log "------------------------------------------------------------"  
24 -log "Run download model"  
25 -log "------------------------------------------------------------"  
26 -  
27 -repo_url=https://huggingface.co/csukuangfj/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20  
28 -log "Start testing ${repo_url}"  
29 -repo=$(basename $repo_url)  
30 -log "download dir is $(basename $repo_url)"  
31 -if [ ! -d $repo ];then  
32 - log "Download pretrained model and test-data from $repo_url"  
33 -  
34 - GIT_LFS_SKIP_SMUDGE=1 git clone $repo_url  
35 - pushd $repo  
36 - git lfs pull --include "*.onnx"  
37 - ls -lh *.onnx  
38 - popd  
39 - ln -s $repo/test_wavs/0.wav hotwords.wav  
40 -  
41 -fi  
42 -  
43 -log $(pwd)  
44 -  
45 -sed -e 's?/sherpa/?'$(pwd)'/?g' modelconfig.cfg > modeltest.cfg  
46 -  
47 -log "display model cfg"  
48 -cat modeltest.cfg  
49 -  
50 -cd ..  
51 -  
52 -export JAVA_HOME=$(readlink -f /usr/bin/javac | sed "s:/bin/javac::")  
53 -  
54 -mkdir -p build  
55 -cd build  
56 -  
57 -cmake -DCMAKE_BUILD_TYPE=Release -DBUILD_SHARED_LIBS=ON -DSHERPA_ONNX_ENABLE_JNI=ON ..  
58 -  
59 -make -j4  
60 -ls -lh lib  
61 -  
62 -export LD_LIBRARY_PATH=$PWD/build/lib:$LD_LIBRARY_PATH  
63 -  
64 -cd ../java-api-examples  
65 -  
66 -make all  
67 -  
68 -make runfile  
69 -  
70 -echo "礼 拜 二" > hotwords.txt  
71 -  
72 -sed -i 's/hotwords_file=/hotwords_file=hotwords.txt/g' modeltest.cfg  
73 -  
74 -make runhotwords  
不能预览此文件类型
@@ -6,11 +6,9 @@ @@ -6,11 +6,9 @@
6 6
7 set -ex 7 set -ex
8 8
9 -cd ..  
10 -mkdir -p build  
11 -cd build  
12 -  
13 if [[ ! -f ../build/lib/libsherpa-onnx-jni.dylib && ! -f ../build/lib/libsherpa-onnx-jni.so ]]; then 9 if [[ ! -f ../build/lib/libsherpa-onnx-jni.dylib && ! -f ../build/lib/libsherpa-onnx-jni.so ]]; then
  10 + mkdir -p ../build
  11 + pushd ../build
14 cmake \ 12 cmake \
15 -DSHERPA_ONNX_ENABLE_PYTHON=OFF \ 13 -DSHERPA_ONNX_ENABLE_PYTHON=OFF \
16 -DSHERPA_ONNX_ENABLE_TESTS=OFF \ 14 -DSHERPA_ONNX_ENABLE_TESTS=OFF \
@@ -22,12 +20,11 @@ if [[ ! -f ../build/lib/libsherpa-onnx-jni.dylib && ! -f ../build/lib/libsherpa @@ -22,12 +20,11 @@ if [[ ! -f ../build/lib/libsherpa-onnx-jni.dylib && ! -f ../build/lib/libsherpa
22 20
23 make -j4 21 make -j4
24 ls -lh lib 22 ls -lh lib
  23 + popd
25 fi 24 fi
26 25
27 export LD_LIBRARY_PATH=$PWD/build/lib:$LD_LIBRARY_PATH 26 export LD_LIBRARY_PATH=$PWD/build/lib:$LD_LIBRARY_PATH
28 27
29 -cd ../kotlin-api-examples  
30 -  
31 function testSpeakerEmbeddingExtractor() { 28 function testSpeakerEmbeddingExtractor() {
32 if [ ! -f ./3dspeaker_speech_eres2net_large_sv_zh-cn_3dspeaker_16k.onnx ]; then 29 if [ ! -f ./3dspeaker_speech_eres2net_large_sv_zh-cn_3dspeaker_16k.onnx ]; then
33 curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-recongition-models/3dspeaker_speech_eres2net_large_sv_zh-cn_3dspeaker_16k.onnx 30 curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-recongition-models/3dspeaker_speech_eres2net_large_sv_zh-cn_3dspeaker_16k.onnx
@@ -253,7 +253,7 @@ int32_t main(int32_t argc, char *argv[]) { @@ -253,7 +253,7 @@ int32_t main(int32_t argc, char *argv[]) {
253 sherpa_onnx::ReadWave(wave_filename, &actual_sample_rate, &is_ok); 253 sherpa_onnx::ReadWave(wave_filename, &actual_sample_rate, &is_ok);
254 254
255 if (!is_ok) { 255 if (!is_ok) {
256 - SHERPA_ONNX_LOGE("Failed to read %s", wave_filename.c_str()); 256 + SHERPA_ONNX_LOGE("Failed to read '%s'", wave_filename.c_str());
257 return -1; 257 return -1;
258 } 258 }
259 259
@@ -96,7 +96,7 @@ static std::vector<std::vector<float>> ComputeEmbeddings( @@ -96,7 +96,7 @@ static std::vector<std::vector<float>> ComputeEmbeddings(
96 sherpa_onnx::ReadWave(f, &sampling_rate, &is_ok); 96 sherpa_onnx::ReadWave(f, &sampling_rate, &is_ok);
97 97
98 if (!is_ok) { 98 if (!is_ok) {
99 - fprintf(stderr, "Failed to read %s\n", f.c_str()); 99 + fprintf(stderr, "Failed to read '%s'\n", f.c_str());
100 exit(-1); 100 exit(-1);
101 } 101 }
102 102
@@ -78,7 +78,7 @@ for a list of pre-trained models to download. @@ -78,7 +78,7 @@ for a list of pre-trained models to download.
78 sherpa_onnx::ReadWave(wav_filename, &sampling_rate, &is_ok); 78 sherpa_onnx::ReadWave(wav_filename, &sampling_rate, &is_ok);
79 79
80 if (!is_ok) { 80 if (!is_ok) {
81 - fprintf(stderr, "Failed to read %s\n", wav_filename.c_str()); 81 + fprintf(stderr, "Failed to read '%s'\n", wav_filename.c_str());
82 return -1; 82 return -1;
83 } 83 }
84 84
@@ -93,7 +93,7 @@ static std::vector<std::vector<float>> ComputeEmbeddings( @@ -93,7 +93,7 @@ static std::vector<std::vector<float>> ComputeEmbeddings(
93 sherpa_onnx::ReadWave(f, &sampling_rate, &is_ok); 93 sherpa_onnx::ReadWave(f, &sampling_rate, &is_ok);
94 94
95 if (!is_ok) { 95 if (!is_ok) {
96 - fprintf(stderr, "Failed to read %s\n", f.c_str()); 96 + fprintf(stderr, "Failed to read '%s'\n", f.c_str());
97 exit(-1); 97 exit(-1);
98 } 98 }
99 99
@@ -58,7 +58,7 @@ for more models. @@ -58,7 +58,7 @@ for more models.
58 sherpa_onnx::ReadWave(wav_filename, &sampling_rate, &is_ok); 58 sherpa_onnx::ReadWave(wav_filename, &sampling_rate, &is_ok);
59 59
60 if (!is_ok) { 60 if (!is_ok) {
61 - fprintf(stderr, "Failed to read %s\n", wav_filename.c_str()); 61 + fprintf(stderr, "Failed to read '%s'\n", wav_filename.c_str());
62 return -1; 62 return -1;
63 } 63 }
64 64
@@ -73,7 +73,7 @@ for a list of pre-trained models to download. @@ -73,7 +73,7 @@ for a list of pre-trained models to download.
73 const std::vector<float> samples = 73 const std::vector<float> samples =
74 sherpa_onnx::ReadWave(wav_filename, &sampling_rate, &is_ok); 74 sherpa_onnx::ReadWave(wav_filename, &sampling_rate, &is_ok);
75 if (!is_ok) { 75 if (!is_ok) {
76 - fprintf(stderr, "Failed to read %s\n", wav_filename.c_str()); 76 + fprintf(stderr, "Failed to read '%s'\n", wav_filename.c_str());
77 return -1; 77 return -1;
78 } 78 }
79 float duration = samples.size() / static_cast<float>(sampling_rate); 79 float duration = samples.size() / static_cast<float>(sampling_rate);
@@ -69,7 +69,7 @@ void AsrInference(const std::vector<std::vector<std::string>> &chunk_wav_paths, @@ -69,7 +69,7 @@ void AsrInference(const std::vector<std::vector<std::string>> &chunk_wav_paths,
69 const std::vector<float> samples = 69 const std::vector<float> samples =
70 sherpa_onnx::ReadWave(wav_filename, &sampling_rate, &is_ok); 70 sherpa_onnx::ReadWave(wav_filename, &sampling_rate, &is_ok);
71 if (!is_ok) { 71 if (!is_ok) {
72 - fprintf(stderr, "Failed to read %s\n", wav_filename.c_str()); 72 + fprintf(stderr, "Failed to read '%s'\n", wav_filename.c_str());
73 continue; 73 continue;
74 } 74 }
75 duration += samples.size() / static_cast<float>(sampling_rate); 75 duration += samples.size() / static_cast<float>(sampling_rate);
@@ -96,7 +96,7 @@ void AsrInference(const std::vector<std::vector<std::string>> &chunk_wav_paths, @@ -96,7 +96,7 @@ void AsrInference(const std::vector<std::vector<std::string>> &chunk_wav_paths,
96 const std::vector<float> samples = 96 const std::vector<float> samples =
97 sherpa_onnx::ReadWave(wav_filename, &sampling_rate, &is_ok); 97 sherpa_onnx::ReadWave(wav_filename, &sampling_rate, &is_ok);
98 if (!is_ok) { 98 if (!is_ok) {
99 - fprintf(stderr, "Failed to read %s\n", wav_filename.c_str()); 99 + fprintf(stderr, "Failed to read '%s'\n", wav_filename.c_str());
100 continue; 100 continue;
101 } 101 }
102 duration += samples.size() / static_cast<float>(sampling_rate); 102 duration += samples.size() / static_cast<float>(sampling_rate);
@@ -124,7 +124,7 @@ for a list of pre-trained models to download. @@ -124,7 +124,7 @@ for a list of pre-trained models to download.
124 const std::vector<float> samples = 124 const std::vector<float> samples =
125 sherpa_onnx::ReadWave(wav_filename, &sampling_rate, &is_ok); 125 sherpa_onnx::ReadWave(wav_filename, &sampling_rate, &is_ok);
126 if (!is_ok) { 126 if (!is_ok) {
127 - fprintf(stderr, "Failed to read %s\n", wav_filename.c_str()); 127 + fprintf(stderr, "Failed to read '%s'\n", wav_filename.c_str());
128 return -1; 128 return -1;
129 } 129 }
130 duration += samples.size() / static_cast<float>(sampling_rate); 130 duration += samples.size() / static_cast<float>(sampling_rate);
@@ -109,7 +109,7 @@ for a list of pre-trained models to download. @@ -109,7 +109,7 @@ for a list of pre-trained models to download.
109 sherpa_onnx::ReadWave(wav_filename, &sampling_rate, &is_ok); 109 sherpa_onnx::ReadWave(wav_filename, &sampling_rate, &is_ok);
110 110
111 if (!is_ok) { 111 if (!is_ok) {
112 - fprintf(stderr, "Failed to read %s\n", wav_filename.c_str()); 112 + fprintf(stderr, "Failed to read '%s'\n", wav_filename.c_str());
113 return -1; 113 return -1;
114 } 114 }
115 115
1 .idea 1 .idea
2 java-api.iml 2 java-api.iml
  3 +out
  4 +META-INF
  5 +build
  6 +*.jar
  1 +
  2 +# all .class and .jar files are put inside out_dir
  3 +out_dir := build
  4 +out_jar := $(out_dir)/sherpa-onnx.jar
  5 +
  6 +package_dir := com/k2fsa/sherpa/onnx
  7 +
  8 +java_files := WaveReader.java
  9 +java_files += EndpointRule.java
  10 +java_files += EndpointConfig.java
  11 +java_files += FeatureConfig.java
  12 +java_files += OnlineLMConfig.java
  13 +java_files += OnlineParaformerModelConfig.java
  14 +java_files += OnlineZipformer2CtcModelConfig.java
  15 +java_files += OnlineTransducerModelConfig.java
  16 +java_files += OnlineModelConfig.java
  17 +java_files += OnlineStream.java
  18 +java_files += OnlineRecognizerConfig.java
  19 +java_files += OnlineRecognizerResult.java
  20 +java_files += OnlineRecognizer.java
  21 +
  22 +class_files := $(java_files:%.java=%.class)
  23 +
  24 +java_files := $(addprefix src/$(package_dir)/,$(java_files))
  25 +class_files := $(addprefix $(out_dir)/$(package_dir)/,$(class_files))
  26 +
  27 +$(info -- java files $(java_files))
  28 +$(info --)
  29 +$(info -- class files $(class_files))
  30 +
  31 +.phony: all clean
  32 +
  33 +all: $(out_jar)
  34 +
  35 +$(out_jar): $(class_files)
  36 + jar --create --verbose --file $(out_jar) -C $(out_dir) .
  37 +
  38 +clean:
  39 + $(RM) -rfv $(out_dir)
  40 +
  41 +$(class_files): $(out_dir)/$(package_dir)/%.class: src/$(package_dir)/%.java
  42 + javac -d $(out_dir) --class-path $(out_dir) $<
1 -/*  
2 - * // Copyright 2022-2023 by zhaoming  
3 - */ 1 +// Copyright 2022-2023 by zhaoming
  2 +// Copyright 2024 Xiaomi Corporation
4 3
5 package com.k2fsa.sherpa.onnx; 4 package com.k2fsa.sherpa.onnx;
6 5
7 public class EndpointConfig { 6 public class EndpointConfig {
  7 +
8 private final EndpointRule rule1; 8 private final EndpointRule rule1;
9 private final EndpointRule rule2; 9 private final EndpointRule rule2;
10 private final EndpointRule rule3; 10 private final EndpointRule rule3;
11 11
12 - public EndpointConfig(EndpointRule rule1, EndpointRule rule2, EndpointRule rule3) {  
13 - this.rule1 = rule1;  
14 - this.rule2 = rule2;  
15 - this.rule3 = rule3; 12 + private EndpointConfig(Builder builder) {
  13 + this.rule1 = builder.rule1;
  14 + this.rule2 = builder.rule2;
  15 + this.rule3 = builder.rule3;
  16 + }
  17 +
  18 + public static Builder builder() {
  19 + return new Builder();
16 } 20 }
17 21
18 public EndpointRule getRule1() { 22 public EndpointRule getRule1() {
@@ -26,4 +30,42 @@ public class EndpointConfig { @@ -26,4 +30,42 @@ public class EndpointConfig {
26 public EndpointRule getRule3() { 30 public EndpointRule getRule3() {
27 return rule3; 31 return rule3;
28 } 32 }
  33 +
  34 + public static class Builder {
  35 +
  36 + private EndpointRule rule1 = EndpointRule.builder().
  37 + setMustContainNonSilence(false).
  38 + setMinTrailingSilence(2.4f).
  39 + setMinUtteranceLength(0).
  40 + build();
  41 + private EndpointRule rule2 = EndpointRule.builder().
  42 + setMustContainNonSilence(true).
  43 + setMinTrailingSilence(1.4f).
  44 + setMinUtteranceLength(0).
  45 + build();
  46 + private EndpointRule rule3 = EndpointRule.builder().
  47 + setMustContainNonSilence(false).
  48 + setMinTrailingSilence(0.0f).
  49 + setMinUtteranceLength(20.0f).
  50 + build();
  51 +
  52 + public EndpointConfig build() {
  53 + return new EndpointConfig(this);
  54 + }
  55 +
  56 + public Builder setRule1(EndpointRule rule) {
  57 + this.rule1 = rule;
  58 + return this;
  59 + }
  60 +
  61 + public Builder setRule2(EndpointRule rule) {
  62 + this.rule2 = rule;
  63 + return this;
  64 + }
  65 +
  66 + public Builder setRul3(EndpointRule rule) {
  67 + this.rule3 = rule;
  68 + return this;
  69 + }
  70 + }
29 } 71 }
1 -/*  
2 - * // Copyright 2022-2023 by zhaoming  
3 - */  
4 - 1 +// Copyright 2022-2023 by zhaoming
  2 +// Copyright 2024 Xiaomi Corporation
5 package com.k2fsa.sherpa.onnx; 3 package com.k2fsa.sherpa.onnx;
6 4
7 public class EndpointRule { 5 public class EndpointRule {
  6 +
8 private final boolean mustContainNonSilence; 7 private final boolean mustContainNonSilence;
9 private final float minTrailingSilence; 8 private final float minTrailingSilence;
10 private final float minUtteranceLength; 9 private final float minUtteranceLength;
11 10
12 - public EndpointRule(  
13 - boolean mustContainNonSilence, float minTrailingSilence, float minUtteranceLength) {  
14 - this.mustContainNonSilence = mustContainNonSilence;  
15 - this.minTrailingSilence = minTrailingSilence;  
16 - this.minUtteranceLength = minUtteranceLength; 11 + private EndpointRule(Builder builder) {
  12 + this.mustContainNonSilence = builder.mustContainNonSilence;
  13 + this.minTrailingSilence = builder.minTrailingSilence;
  14 + this.minUtteranceLength = builder.minUtteranceLength;
  15 + }
  16 +
  17 + public static Builder builder() {
  18 + return new Builder();
17 } 19 }
18 20
19 public float getMinTrailingSilence() { 21 public float getMinTrailingSilence() {
@@ -27,4 +29,29 @@ public class EndpointRule { @@ -27,4 +29,29 @@ public class EndpointRule {
27 public boolean getMustContainNonSilence() { 29 public boolean getMustContainNonSilence() {
28 return mustContainNonSilence; 30 return mustContainNonSilence;
29 } 31 }
30 -} 32 +
  33 + public static class Builder {
  34 + private boolean mustContainNonSilence = false;
  35 + private float minTrailingSilence = 0;
  36 + private float minUtteranceLength = 0;
  37 +
  38 + public EndpointRule build() {
  39 + return new EndpointRule(this);
  40 + }
  41 +
  42 + public Builder setMustContainNonSilence(boolean mustContainNonSilence) {
  43 + this.mustContainNonSilence = mustContainNonSilence;
  44 + return this;
  45 + }
  46 +
  47 + public Builder setMinTrailingSilence(float minTrailingSilence) {
  48 + this.minTrailingSilence = minTrailingSilence;
  49 + return this;
  50 + }
  51 +
  52 + public Builder setMinUtteranceLength(float minUtteranceLength) {
  53 + this.minUtteranceLength = minUtteranceLength;
  54 + return this;
  55 + }
  56 + }
  57 +}
1 -/*  
2 - * // Copyright 2022-2023 by zhaoming  
3 - */ 1 +// Copyright 2022-2023 by zhaoming
  2 +// Copyright 2024 Xiaomi Corporation
4 3
5 package com.k2fsa.sherpa.onnx; 4 package com.k2fsa.sherpa.onnx;
6 5
@@ -8,9 +7,13 @@ public class FeatureConfig { @@ -8,9 +7,13 @@ public class FeatureConfig {
8 private final int sampleRate; 7 private final int sampleRate;
9 private final int featureDim; 8 private final int featureDim;
10 9
11 - public FeatureConfig(int sampleRate, int featureDim) {  
12 - this.sampleRate = sampleRate;  
13 - this.featureDim = featureDim; 10 + private FeatureConfig(Builder builder) {
  11 + this.sampleRate = builder.sampleRate;
  12 + this.featureDim = builder.featureDim;
  13 + }
  14 +
  15 + public static Builder builder() {
  16 + return new Builder();
14 } 17 }
15 18
16 public int getSampleRate() { 19 public int getSampleRate() {
@@ -20,4 +23,23 @@ public class FeatureConfig { @@ -20,4 +23,23 @@ public class FeatureConfig {
20 public int getFeatureDim() { 23 public int getFeatureDim() {
21 return featureDim; 24 return featureDim;
22 } 25 }
  26 +
  27 + public static class Builder {
  28 + private int sampleRate = 16000;
  29 + private int featureDim = 80;
  30 +
  31 + public FeatureConfig build() {
  32 + return new FeatureConfig(this);
  33 + }
  34 +
  35 + public Builder setSampleRate(int sampleRate) {
  36 + this.sampleRate = sampleRate;
  37 + return this;
  38 + }
  39 +
  40 + public Builder setFeatureDim(int featureDim) {
  41 + this.featureDim = featureDim;
  42 + return this;
  43 + }
  44 + }
23 } 45 }
1 -/*  
2 - * // Copyright 2022-2023 by zhaoming  
3 - */ 1 +// Copyright 2022-2023 by zhaoming
  2 +// Copyright 2024 Xiaomi Corporation
4 3
5 package com.k2fsa.sherpa.onnx; 4 package com.k2fsa.sherpa.onnx;
6 5
7 public class OnlineLMConfig { 6 public class OnlineLMConfig {
  7 +
8 private final String model; 8 private final String model;
9 private final float scale; 9 private final float scale;
10 10
11 - public OnlineLMConfig(String model, float scale) {  
12 - this.model = model;  
13 - this.scale = scale; 11 + private OnlineLMConfig(Builder builder) {
  12 + this.model = builder.model;
  13 + this.scale = builder.scale;
  14 + }
  15 +
  16 + public static Builder builder() {
  17 + return new Builder();
14 } 18 }
15 19
16 public String getModel() { 20 public String getModel() {
@@ -20,4 +24,23 @@ public class OnlineLMConfig { @@ -20,4 +24,23 @@ public class OnlineLMConfig {
20 public float getScale() { 24 public float getScale() {
21 return scale; 25 return scale;
22 } 26 }
23 -} 27 +
  28 + public static class Builder {
  29 + private String model = "";
  30 + private float scale = 1.0f;
  31 +
  32 + public OnlineLMConfig build() {
  33 + return new OnlineLMConfig(this);
  34 + }
  35 +
  36 + public Builder setModel(String model) {
  37 + this.model = model;
  38 + return this;
  39 + }
  40 +
  41 + public Builder setScale(float scale) {
  42 + this.scale = scale;
  43 + return this;
  44 + }
  45 + }
  46 +}
1 -/*  
2 - * // Copyright 2022-2023 by zhaoming  
3 - */ 1 +// Copyright 2022-2023 by zhaoming
  2 +// Copyright 2024 Xiaomi Corporation
4 3
5 package com.k2fsa.sherpa.onnx; 4 package com.k2fsa.sherpa.onnx;
6 5
7 public class OnlineModelConfig { 6 public class OnlineModelConfig {
8 - private final OnlineParaformerModelConfig paraformer;  
9 private final OnlineTransducerModelConfig transducer; 7 private final OnlineTransducerModelConfig transducer;
  8 + private final OnlineParaformerModelConfig paraformer;
10 private final OnlineZipformer2CtcModelConfig zipformer2Ctc; 9 private final OnlineZipformer2CtcModelConfig zipformer2Ctc;
11 private final String tokens; 10 private final String tokens;
12 private final int numThreads; 11 private final int numThreads;
13 private final boolean debug; 12 private final boolean debug;
14 - private final String provider = "cpu";  
15 - private String modelType = "";  
16 -  
17 - public OnlineModelConfig(  
18 - String tokens,  
19 - int numThreads,  
20 - boolean debug,  
21 - String modelType,  
22 - OnlineParaformerModelConfig paraformer,  
23 - OnlineTransducerModelConfig transducer,  
24 - OnlineZipformer2CtcModelConfig zipformer2Ctc  
25 - ) {  
26 -  
27 - this.tokens = tokens;  
28 - this.numThreads = numThreads;  
29 - this.debug = debug;  
30 - this.modelType = modelType;  
31 - this.paraformer = paraformer;  
32 - this.transducer = transducer;  
33 - this.zipformer2Ctc = zipformer2Ctc; 13 + private final String provider;
  14 + private final String modelType;
  15 + private OnlineModelConfig(Builder builder) {
  16 + this.transducer = builder.transducer;
  17 + this.paraformer = builder.paraformer;
  18 + this.zipformer2Ctc = builder.zipformer2Ctc;
  19 + this.tokens = builder.tokens;
  20 + this.numThreads = builder.numThreads;
  21 + this.debug = builder.debug;
  22 + this.provider = builder.provider;
  23 + this.modelType = builder.modelType;
  24 + }
  25 +
  26 + public static Builder builder() {
  27 + return new Builder();
34 } 28 }
35 29
36 public OnlineParaformerModelConfig getParaformer() { 30 public OnlineParaformerModelConfig getParaformer() {
@@ -41,6 +35,10 @@ public class OnlineModelConfig { @@ -41,6 +35,10 @@ public class OnlineModelConfig {
41 return transducer; 35 return transducer;
42 } 36 }
43 37
  38 + public OnlineZipformer2CtcModelConfig getZipformer2Ctc() {
  39 + return zipformer2Ctc;
  40 + }
  41 +
44 public String getTokens() { 42 public String getTokens() {
45 return tokens; 43 return tokens;
46 } 44 }
@@ -52,4 +50,67 @@ public class OnlineModelConfig { @@ -52,4 +50,67 @@ public class OnlineModelConfig {
52 public boolean getDebug() { 50 public boolean getDebug() {
53 return debug; 51 return debug;
54 } 52 }
  53 +
  54 + public String getProvider() {
  55 + return provider;
  56 + }
  57 +
  58 + public String getModelType() {
  59 + return modelType;
  60 + }
  61 +
  62 + public static class Builder {
  63 + private OnlineParaformerModelConfig paraformer = OnlineParaformerModelConfig.builder().build();
  64 + private OnlineTransducerModelConfig transducer = OnlineTransducerModelConfig.builder().build();
  65 + private OnlineZipformer2CtcModelConfig zipformer2Ctc = OnlineZipformer2CtcModelConfig.builder().build();
  66 + private String tokens = "";
  67 + private int numThreads = 1;
  68 + private boolean debug = true;
  69 + private String provider = "cpu";
  70 + private String modelType = "";
  71 +
  72 + public OnlineModelConfig build() {
  73 + return new OnlineModelConfig(this);
  74 + }
  75 +
  76 + public Builder setTransducer(OnlineTransducerModelConfig transducer) {
  77 + this.transducer = transducer;
  78 + return this;
  79 + }
  80 +
  81 + public Builder setParaformer(OnlineParaformerModelConfig paraformer) {
  82 + this.paraformer = paraformer;
  83 + return this;
  84 + }
  85 +
  86 + public Builder setZipformer2Ctc(OnlineZipformer2CtcModelConfig zipformer2Ctc) {
  87 + this.zipformer2Ctc = zipformer2Ctc;
  88 + return this;
  89 + }
  90 +
  91 + public Builder setTokens(String tokens) {
  92 + this.tokens = tokens;
  93 + return this;
  94 + }
  95 +
  96 + public Builder setNumThreads(int numThreads) {
  97 + this.numThreads = numThreads;
  98 + return this;
  99 + }
  100 +
  101 + public Builder setDebug(boolean debug) {
  102 + this.debug = debug;
  103 + return this;
  104 + }
  105 +
  106 + public Builder setProvider(String provider) {
  107 + this.provider = provider;
  108 + return this;
  109 + }
  110 +
  111 + public Builder setModelType(String modelType) {
  112 + this.modelType = modelType;
  113 + return this;
  114 + }
  115 + }
55 } 116 }
1 -/*  
2 - * // Copyright 2022-2023 by zhaoming  
3 - */ 1 +// Copyright 2022-2023 by zhaoming
  2 +// Copyright 2024 Xiaomi Corporation
4 3
5 package com.k2fsa.sherpa.onnx; 4 package com.k2fsa.sherpa.onnx;
6 5
@@ -8,9 +7,13 @@ public class OnlineParaformerModelConfig { @@ -8,9 +7,13 @@ public class OnlineParaformerModelConfig {
8 private final String encoder; 7 private final String encoder;
9 private final String decoder; 8 private final String decoder;
10 9
11 - public OnlineParaformerModelConfig(String encoder, String decoder) {  
12 - this.encoder = encoder;  
13 - this.decoder = decoder; 10 + private OnlineParaformerModelConfig(Builder builder) {
  11 + this.encoder = builder.encoder;
  12 + this.decoder = builder.decoder;
  13 + }
  14 +
  15 + public static Builder builder() {
  16 + return new Builder();
14 } 17 }
15 18
16 public String getEncoder() { 19 public String getEncoder() {
@@ -20,4 +23,23 @@ public class OnlineParaformerModelConfig { @@ -20,4 +23,23 @@ public class OnlineParaformerModelConfig {
20 public String getDecoder() { 23 public String getDecoder() {
21 return decoder; 24 return decoder;
22 } 25 }
  26 +
  27 + public static class Builder {
  28 + private String encoder = "";
  29 + private String decoder = "";
  30 +
  31 + public OnlineParaformerModelConfig build() {
  32 + return new OnlineParaformerModelConfig(this);
  33 + }
  34 +
  35 + public Builder setEncoder(String encoder) {
  36 + this.encoder = encoder;
  37 + return this;
  38 + }
  39 +
  40 + public Builder setDecoder(String decoder) {
  41 + this.decoder = decoder;
  42 + return this;
  43 + }
  44 + }
23 } 45 }
1 -/*  
2 - * // Copyright 2022-2023 by zhaoming  
3 - * // the online recognizer for sherpa-onnx, it can load config from a file  
4 - * // or by argument  
5 - */  
6 -/*  
7 -usage example:  
8 -  
9 - String cfgpath=appdir+"/modelconfig.cfg";  
10 - OnlineRecognizer.setSoPath(soPath); //set so lib path  
11 -  
12 - OnlineRecognizer rcgOjb = new OnlineRecognizer(); //create a recognizer  
13 - rcgOjb = new OnlineRecognizer(cfgFile); //set model config file  
14 - CreateStream streamObj=rcgOjb.CreateStream(); //create a stream for read wav data  
15 - float[] buffer = rcgOjb.readWavFile(wavfilename); // read data from file  
16 - streamObj.acceptWaveform(buffer); // feed stream with data  
17 - streamObj.inputFinished(); // tell engine you done with all data  
18 - OnlineStream ssObj[] = new OnlineStream[1];  
19 - while (rcgOjb.isReady(streamObj)) { // engine is ready for unprocessed data  
20 - ssObj[0] = streamObj;  
21 - rcgOjb.decodeStreams(ssObj); // decode for multiple stream  
22 - // rcgOjb.DecodeStream(streamObj); // decode for single stream  
23 - }  
24 -  
25 - String recText = "simple:" + rcgOjb.getResult(streamObj) + "\n";  
26 - byte[] utf8Data = recText.getBytes(StandardCharsets.UTF_8);  
27 - System.out.println(new String(utf8Data));  
28 - rcgOjb.reSet(streamObj);  
29 - rcgOjb.releaseStream(streamObj); // release stream  
30 - rcgOjb.release(); // release recognizer  
31 -  
32 -*/ 1 +// Copyright 2022-2023 by zhaoming
  2 +// Copyright 2024 Xiaomi Corporation
33 package com.k2fsa.sherpa.onnx; 3 package com.k2fsa.sherpa.onnx;
34 4
35 -import java.io.BufferedInputStream;  
36 -import java.io.File;  
37 -import java.io.FileInputStream;  
38 -import java.io.InputStream;  
39 -import java.util.Enumeration;  
40 -import java.util.HashMap;  
41 -import java.util.Map;  
42 -import java.util.Properties;  
43 5
44 public class OnlineRecognizer { 6 public class OnlineRecognizer {
45 - private long ptr = 0; // this is the asr engine ptrss  
46 -  
47 - private int sampleRate = 16000;  
48 -  
49 - // load config file for OnlineRecognizer  
50 - public OnlineRecognizer(String modelCfgPath) {  
51 - Map<String, String> proMap = this.readProperties(modelCfgPath);  
52 - try {  
53 - int sampleRate = Integer.parseInt(proMap.getOrDefault("sample_rate", "16000").trim());  
54 - this.sampleRate = sampleRate;  
55 - EndpointRule rule1 =  
56 - new EndpointRule(  
57 - false,  
58 - Float.parseFloat(proMap.getOrDefault("rule1_min_trailing_silence", "2.4").trim()),  
59 - 0.0F);  
60 - EndpointRule rule2 =  
61 - new EndpointRule(  
62 - true,  
63 - Float.parseFloat(proMap.getOrDefault("rule2_min_trailing_silence", "1.2").trim()),  
64 - 0.0F);  
65 - EndpointRule rule3 =  
66 - new EndpointRule(  
67 - false,  
68 - 0.0F,  
69 - Float.parseFloat(proMap.getOrDefault("rule3_min_utterance_length", "20").trim()));  
70 - EndpointConfig endCfg = new EndpointConfig(rule1, rule2, rule3);  
71 -  
72 - OnlineParaformerModelConfig modelParaCfg =  
73 - new OnlineParaformerModelConfig(  
74 - proMap.getOrDefault("encoder", "").trim(), proMap.getOrDefault("decoder", "").trim());  
75 - OnlineTransducerModelConfig modelTranCfg =  
76 - new OnlineTransducerModelConfig(  
77 - proMap.getOrDefault("encoder", "").trim(),  
78 - proMap.getOrDefault("decoder", "").trim(),  
79 - proMap.getOrDefault("joiner", "").trim());  
80 - OnlineZipformer2CtcModelConfig zipformer2CtcConfig = new OnlineZipformer2CtcModelConfig("");  
81 - OnlineModelConfig modelCfg =  
82 - new OnlineModelConfig(  
83 - proMap.getOrDefault("tokens", "").trim(),  
84 - Integer.parseInt(proMap.getOrDefault("num_threads", "4").trim()),  
85 - false,  
86 - proMap.getOrDefault("model_type", "zipformer").trim(),  
87 - modelParaCfg,  
88 - modelTranCfg, zipformer2CtcConfig);  
89 - FeatureConfig featConfig =  
90 - new FeatureConfig(  
91 - sampleRate, Integer.parseInt(proMap.getOrDefault("feature_dim", "80").trim()));  
92 - OnlineLMConfig onlineLmConfig =  
93 - new OnlineLMConfig(  
94 - proMap.getOrDefault("lm_model", "").trim(),  
95 - Float.parseFloat(proMap.getOrDefault("lm_scale", "0.5").trim()));  
96 -  
97 - OnlineRecognizerConfig rcgCfg =  
98 - new OnlineRecognizerConfig(  
99 - featConfig,  
100 - modelCfg,  
101 - endCfg,  
102 - onlineLmConfig,  
103 - Boolean.parseBoolean(proMap.getOrDefault("enable_endpoint_detection", "true").trim()),  
104 - proMap.getOrDefault("decoding_method", "modified_beam_search").trim(),  
105 - Integer.parseInt(proMap.getOrDefault("max_active_paths", "4").trim()),  
106 - proMap.getOrDefault("hotwords_file", "").trim(),  
107 - Float.parseFloat(proMap.getOrDefault("hotwords_score", "1.5").trim()));  
108 - // create a new Recognizer, first parameter kept for android asset_manager ANDROID_API__ >= 9  
109 - this.ptr = createOnlineRecognizer(new Object(), rcgCfg);  
110 -  
111 - } catch (Exception e) {  
112 - System.err.println(e);  
113 - } 7 + static {
  8 + System.loadLibrary("sherpa-onnx-jni");
114 } 9 }
115 10
116 - // use for android asset_manager ANDROID_API__ >= 9  
117 - public OnlineRecognizer(Object assetManager, String modelCfgPath) {  
118 - Map<String, String> proMap = this.readProperties(modelCfgPath);  
119 - try {  
120 - int sampleRate = Integer.parseInt(proMap.getOrDefault("sample_rate", "16000").trim());  
121 - this.sampleRate = sampleRate;  
122 - EndpointRule rule1 =  
123 - new EndpointRule(  
124 - false,  
125 - Float.parseFloat(proMap.getOrDefault("rule1_min_trailing_silence", "2.4").trim()),  
126 - 0.0F);  
127 - EndpointRule rule2 =  
128 - new EndpointRule(  
129 - true,  
130 - Float.parseFloat(proMap.getOrDefault("rule2_min_trailing_silence", "1.2").trim()),  
131 - 0.0F);  
132 - EndpointRule rule3 =  
133 - new EndpointRule(  
134 - false,  
135 - 0.0F,  
136 - Float.parseFloat(proMap.getOrDefault("rule3_min_utterance_length", "20").trim()));  
137 - EndpointConfig endCfg = new EndpointConfig(rule1, rule2, rule3);  
138 - OnlineParaformerModelConfig modelParaCfg =  
139 - new OnlineParaformerModelConfig(  
140 - proMap.getOrDefault("encoder", "").trim(), proMap.getOrDefault("decoder", "").trim());  
141 - OnlineTransducerModelConfig modelTranCfg =  
142 - new OnlineTransducerModelConfig(  
143 - proMap.getOrDefault("encoder", "").trim(),  
144 - proMap.getOrDefault("decoder", "").trim(),  
145 - proMap.getOrDefault("joiner", "").trim());  
146 - OnlineZipformer2CtcModelConfig zipformer2CtcConfig = new OnlineZipformer2CtcModelConfig("");  
147 -  
148 - OnlineModelConfig modelCfg =  
149 - new OnlineModelConfig(  
150 - proMap.getOrDefault("tokens", "").trim(),  
151 - Integer.parseInt(proMap.getOrDefault("num_threads", "4").trim()),  
152 - false,  
153 - proMap.getOrDefault("model_type", "zipformer").trim(),  
154 - modelParaCfg,  
155 - modelTranCfg, zipformer2CtcConfig);  
156 - FeatureConfig featConfig =  
157 - new FeatureConfig(  
158 - sampleRate, Integer.parseInt(proMap.getOrDefault("feature_dim", "80").trim()));  
159 -  
160 - OnlineLMConfig onlineLmConfig =  
161 - new OnlineLMConfig(  
162 - proMap.getOrDefault("lm_model", "").trim(),  
163 - Float.parseFloat(proMap.getOrDefault("lm_scale", "0.5").trim()));  
164 -  
165 - OnlineRecognizerConfig rcgCfg =  
166 - new OnlineRecognizerConfig(  
167 - featConfig,  
168 - modelCfg,  
169 - endCfg,  
170 - onlineLmConfig,  
171 - Boolean.parseBoolean(proMap.getOrDefault("enable_endpoint_detection", "true").trim()),  
172 - proMap.getOrDefault("decoding_method", "modified_beam_search").trim(),  
173 - Integer.parseInt(proMap.getOrDefault("max_active_paths", "4").trim()),  
174 - proMap.getOrDefault("hotwords_file", "").trim(),  
175 - Float.parseFloat(proMap.getOrDefault("hotwords_score", "1.5").trim()));  
176 - // create a new Recognizer, first parameter kept for android asset_manager ANDROID_API__ >= 9  
177 - this.ptr = createOnlineRecognizer(assetManager, rcgCfg); 11 + private long ptr = 0; // this is the asr engine ptrss
178 12
179 - } catch (Exception e) {  
180 - System.err.println(e);  
181 - }  
182 - }  
183 13
184 - // set onlineRecognizer by parameter  
185 - public OnlineRecognizer(  
186 - String tokens,  
187 - String encoder,  
188 - String decoder,  
189 - String joiner,  
190 - int numThreads,  
191 - int sampleRate,  
192 - int featureDim,  
193 - boolean enableEndpointDetection,  
194 - float rule1MinTrailingSilence,  
195 - float rule2MinTrailingSilence,  
196 - float rule3MinUtteranceLength,  
197 - String decodingMethod,  
198 - String lm_model,  
199 - float lm_scale,  
200 - int maxActivePaths,  
201 - String hotwordsFile,  
202 - float hotwordsScore,  
203 - String modelType) {  
204 - this.sampleRate = sampleRate;  
205 - EndpointRule rule1 = new EndpointRule(false, rule1MinTrailingSilence, 0.0F);  
206 - EndpointRule rule2 = new EndpointRule(true, rule2MinTrailingSilence, 0.0F);  
207 - EndpointRule rule3 = new EndpointRule(false, 0.0F, rule3MinUtteranceLength);  
208 - EndpointConfig endCfg = new EndpointConfig(rule1, rule2, rule3);  
209 - OnlineParaformerModelConfig modelParaCfg = new OnlineParaformerModelConfig(encoder, decoder);  
210 - OnlineTransducerModelConfig modelTranCfg =  
211 - new OnlineTransducerModelConfig(encoder, decoder, joiner);  
212 - OnlineZipformer2CtcModelConfig zipformer2CtcConfig = new OnlineZipformer2CtcModelConfig("");  
213 - OnlineModelConfig modelCfg =  
214 - new OnlineModelConfig(tokens, numThreads, false, modelType, modelParaCfg, modelTranCfg, zipformer2CtcConfig);  
215 - FeatureConfig featConfig = new FeatureConfig(sampleRate, featureDim);  
216 - OnlineLMConfig onlineLmConfig = new OnlineLMConfig(lm_model, lm_scale);  
217 - OnlineRecognizerConfig rcgCfg =  
218 - new OnlineRecognizerConfig(  
219 - featConfig,  
220 - modelCfg,  
221 - endCfg,  
222 - onlineLmConfig,  
223 - enableEndpointDetection,  
224 - decodingMethod,  
225 - maxActivePaths,  
226 - hotwordsFile,  
227 - hotwordsScore);  
228 - // create a new Recognizer, first parameter kept for android asset_manager ANDROID_API__ >= 9  
229 - this.ptr = createOnlineRecognizer(new Object(), rcgCfg); 14 + public OnlineRecognizer(OnlineRecognizerConfig config) {
  15 + ptr = newFromFile(config);
230 } 16 }
231 17
  18 + /*
232 public static float[] readWavFile(String fileName) { 19 public static float[] readWavFile(String fileName) {
233 // read data from the filename 20 // read data from the filename
234 Object[] wavdata = readWave(fileName); 21 Object[] wavdata = readWave(fileName);
@@ -238,139 +25,67 @@ public class OnlineRecognizer { @@ -238,139 +25,67 @@ public class OnlineRecognizer {
238 25
239 return floatData; 26 return floatData;
240 } 27 }
  28 + */
241 29
242 - // load the libsherpa-onnx-jni.so lib  
243 - public static void loadSoLib(String soPath) {  
244 - // load libsherpa-onnx-jni.so lib from the path  
245 -  
246 - System.out.println("so lib path=" + soPath + "\n");  
247 - System.load(soPath.trim());  
248 - System.out.println("load so lib succeed\n");  
249 - }  
250 -  
251 - public static void setSoPath(String soPath) {  
252 - OnlineRecognizer.loadSoLib(soPath);  
253 - OnlineStream.loadSoLib(soPath);  
254 - }  
255 -  
256 - private static native Object[] readWave(String fileName); // static  
257 -  
258 - private Map<String, String> readProperties(String modelCfgPath) {  
259 - // read and parse config file  
260 - Properties props = new Properties();  
261 - Map<String, String> proMap = new HashMap<>();  
262 - try {  
263 - File file = new File(modelCfgPath);  
264 - if (!file.exists()) {  
265 - System.out.println("model cfg file not exists!");  
266 - System.exit(0);  
267 - }  
268 - InputStream in = new BufferedInputStream(new FileInputStream(modelCfgPath));  
269 - props.load(in);  
270 - Enumeration en = props.propertyNames();  
271 - while (en.hasMoreElements()) {  
272 - String key = (String) en.nextElement();  
273 - String Property = props.getProperty(key);  
274 - proMap.put(key, Property);  
275 - }  
276 -  
277 - } catch (Exception e) {  
278 - e.printStackTrace();  
279 - }  
280 - return proMap;  
281 - }  
282 -  
283 - public void decodeStream(OnlineStream s) throws Exception {  
284 - if (this.ptr == 0) throw new Exception("null exception for recognizer ptr");  
285 - long streamPtr = s.getPtr();  
286 - if (streamPtr == 0) throw new Exception("null exception for stream ptr");  
287 - // when feeded samples to engine, call DecodeStream to let it process  
288 - decodeStream(this.ptr, streamPtr);  
289 - }  
290 30
291 - public void decodeStreams(OnlineStream[] ssOjb) throws Exception {  
292 - if (this.ptr == 0) throw new Exception("null exception for recognizer ptr");  
293 - // decode for multiple streams  
294 - long[] ss = new long[ssOjb.length];  
295 - for (int i = 0; i < ssOjb.length; i++) {  
296 - ss[i] = ssOjb[i].getPtr();  
297 - if (ss[i] == 0) throw new Exception("null exception for stream ptr");  
298 - }  
299 - decodeStreams(this.ptr, ss); 31 + public void decode(OnlineStream s) {
  32 + decode(ptr, s.getPtr());
300 } 33 }
301 34
302 - public boolean isReady(OnlineStream s) throws Exception {  
303 - // whether the engine is ready for decode  
304 - if (this.ptr == 0) throw new Exception("null exception for recognizer ptr");  
305 - long streamPtr = s.getPtr();  
306 - if (streamPtr == 0) throw new Exception("null exception for stream ptr");  
307 - return isReady(this.ptr, streamPtr);  
308 - }  
309 35
310 - public String getResult(OnlineStream s) throws Exception {  
311 - // get text from the engine  
312 - if (this.ptr == 0) throw new Exception("null exception for recognizer ptr");  
313 - long streamPtr = s.getPtr();  
314 - if (streamPtr == 0) throw new Exception("null exception for stream ptr");  
315 - return getResult(this.ptr, streamPtr); 36 + public boolean isReady(OnlineStream s) {
  37 + return isReady(ptr, s.getPtr());
316 } 38 }
317 39
318 - public boolean isEndpoint(OnlineStream s) throws Exception {  
319 - if (this.ptr == 0) throw new Exception("null exception for recognizer ptr");  
320 - long streamPtr = s.getPtr();  
321 - if (streamPtr == 0) throw new Exception("null exception for stream ptr");  
322 - return isEndpoint(this.ptr, streamPtr); 40 + public boolean isEndpoint(OnlineStream s) {
  41 + return isEndpoint(ptr, s.getPtr());
323 } 42 }
324 43
325 - public void reSet(OnlineStream s) throws Exception {  
326 - if (this.ptr == 0) throw new Exception("null exception for recognizer ptr");  
327 - long streamPtr = s.getPtr();  
328 - if (streamPtr == 0) throw new Exception("null exception for stream ptr");  
329 - reSet(this.ptr, streamPtr); 44 + public void reset(OnlineStream s) {
  45 + reset(ptr, s.getPtr());
330 } 46 }
331 47
332 - public OnlineStream createStream() throws Exception {  
333 - // create one stream for data to feed in  
334 - if (this.ptr == 0) throw new Exception("null exception for recognizer ptr");  
335 - long streamPtr = createStream(this.ptr);  
336 - OnlineStream stream = new OnlineStream(streamPtr, this.sampleRate);  
337 - return stream; 48 + public OnlineStream createStream() {
  49 + long p = createStream(ptr, "");
  50 + return new OnlineStream(p);
338 } 51 }
339 52
  53 + @Override
340 protected void finalize() throws Throwable { 54 protected void finalize() throws Throwable {
341 release(); 55 release();
342 } 56 }
343 57
344 // recognizer release, you'd better call it manually if not use anymore 58 // recognizer release, you'd better call it manually if not use anymore
345 public void release() { 59 public void release() {
346 - if (this.ptr == 0) return;  
347 - deleteOnlineRecognizer(this.ptr); 60 + if (this.ptr == 0) {
  61 + return;
  62 + }
  63 + delete(this.ptr);
348 this.ptr = 0; 64 this.ptr = 0;
349 } 65 }
350 66
351 - // JNI interface libsherpa-onnx-jni.so  
352 -  
353 - // stream release, you'd better call it manually if not use anymore  
354 - public void releaseStream(OnlineStream s) {  
355 - s.release(); 67 + public OnlineRecognizerResult getResult(OnlineStream s) {
  68 + Object[] arr = getResult(ptr, s.getPtr());
  69 + String text = (String) arr[0];
  70 + String[] tokens = (String[]) arr[1];
  71 + float[] timestamps = (float[]) arr[2];
  72 + return new OnlineRecognizerResult(text, tokens, timestamps);
356 } 73 }
357 74
358 - private native String getResult(long ptr, long streamPtr);  
359 75
360 - private native void decodeStream(long ptr, long streamPtr); 76 + private native void delete(long ptr);
361 77
362 - private native void decodeStreams(long ptr, long[] ssPtr); 78 + private native long newFromFile(OnlineRecognizerConfig config);
363 79
364 - private native boolean isReady(long ptr, long streamPtr);  
365 -  
366 - // first parameter keep for android asset_manager ANDROID_API__ >= 9  
367 - private native long createOnlineRecognizer(Object asset, OnlineRecognizerConfig config); 80 + private native long createStream(long ptr, String hotwords);
368 81
369 - private native long createStream(long ptr); 82 + private native void reset(long ptr, long streamPtr);
370 83
371 - private native void deleteOnlineRecognizer(long ptr); 84 + private native void decode(long ptr, long streamPtr);
372 85
373 private native boolean isEndpoint(long ptr, long streamPtr); 86 private native boolean isEndpoint(long ptr, long streamPtr);
374 87
375 - private native void reSet(long ptr, long streamPtr);  
376 -} 88 + private native boolean isReady(long ptr, long streamPtr);
  89 +
  90 + private native Object[] getResult(long ptr, long streamPtr);
  91 +}
1 -/*  
2 - * // Copyright 2022-2023 by zhaoming  
3 - */  
4 - 1 +// Copyright 2022-2023 by zhaoming
  2 +// Copyright 2024 Xiaomi Corporation
5 package com.k2fsa.sherpa.onnx; 3 package com.k2fsa.sherpa.onnx;
6 4
7 public class OnlineRecognizerConfig { 5 public class OnlineRecognizerConfig {
8 private final FeatureConfig featConfig; 6 private final FeatureConfig featConfig;
9 private final OnlineModelConfig modelConfig; 7 private final OnlineModelConfig modelConfig;
10 - private final EndpointConfig endpointConfig;  
11 private final OnlineLMConfig lmConfig; 8 private final OnlineLMConfig lmConfig;
  9 + private final EndpointConfig endpointConfig;
12 private final boolean enableEndpoint; 10 private final boolean enableEndpoint;
13 private final String decodingMethod; 11 private final String decodingMethod;
14 private final int maxActivePaths; 12 private final int maxActivePaths;
15 private final String hotwordsFile; 13 private final String hotwordsFile;
16 private final float hotwordsScore; 14 private final float hotwordsScore;
17 -  
18 - public OnlineRecognizerConfig(  
19 - FeatureConfig featConfig,  
20 - OnlineModelConfig modelConfig,  
21 - EndpointConfig endpointConfig,  
22 - OnlineLMConfig lmConfig,  
23 - boolean enableEndpoint,  
24 - String decodingMethod,  
25 - int maxActivePaths,  
26 - String hotwordsFile,  
27 - float hotwordsScore) {  
28 - this.featConfig = featConfig;  
29 - this.modelConfig = modelConfig;  
30 - this.endpointConfig = endpointConfig;  
31 - this.lmConfig = lmConfig;  
32 - this.enableEndpoint = enableEndpoint;  
33 - this.decodingMethod = decodingMethod;  
34 - this.maxActivePaths = maxActivePaths;  
35 - this.hotwordsFile = hotwordsFile;  
36 - this.hotwordsScore = hotwordsScore; 15 + private OnlineRecognizerConfig(Builder builder) {
  16 + this.featConfig = builder.featConfig;
  17 + this.modelConfig = builder.modelConfig;
  18 + this.lmConfig = builder.lmConfig;
  19 + this.endpointConfig = builder.endpointConfig;
  20 + this.enableEndpoint = builder.enableEndpoint;
  21 + this.decodingMethod = builder.decodingMethod;
  22 + this.maxActivePaths = builder.maxActivePaths;
  23 + this.hotwordsFile = builder.hotwordsFile;
  24 + this.hotwordsScore = builder.hotwordsScore;
37 } 25 }
38 26
39 - public OnlineLMConfig getLmConfig() {  
40 - return lmConfig;  
41 - }  
42 -  
43 - public FeatureConfig getFeatConfig() {  
44 - return featConfig; 27 + public static Builder builder() {
  28 + return new Builder();
45 } 29 }
46 30
47 public OnlineModelConfig getModelConfig() { 31 public OnlineModelConfig getModelConfig() {
48 return modelConfig; 32 return modelConfig;
49 } 33 }
50 34
51 - public EndpointConfig getEndpointConfig() {  
52 - return endpointConfig;  
53 - } 35 + public static class Builder {
  36 + private FeatureConfig featConfig = FeatureConfig.builder().build();
  37 + private OnlineModelConfig modelConfig = OnlineModelConfig.builder().build();
  38 + private OnlineLMConfig lmConfig = OnlineLMConfig.builder().build();
  39 + private EndpointConfig endpointConfig = EndpointConfig.builder().build();
  40 + private boolean enableEndpoint = true;
  41 + private String decodingMethod = "greedy_search";
  42 + private int maxActivePaths = 4;
  43 + private String hotwordsFile = "";
  44 + private float hotwordsScore = 1.5f;
54 45
55 - public boolean isEnableEndpoint() {  
56 - return enableEndpoint;  
57 - } 46 + public OnlineRecognizerConfig build() {
  47 + return new OnlineRecognizerConfig(this);
  48 + }
58 49
59 - public String getDecodingMethod() {  
60 - return decodingMethod;  
61 - } 50 + public Builder setFeatureConfig(FeatureConfig featConfig) {
  51 + this.featConfig = featConfig;
  52 + return this;
  53 + }
  54 +
  55 + public Builder setOnlineModelConfig(OnlineModelConfig modelConfig) {
  56 + this.modelConfig = modelConfig;
  57 + return this;
  58 + }
  59 +
  60 + public Builder setOnlineLMConfig(OnlineLMConfig lmConfig) {
  61 + this.lmConfig = lmConfig;
  62 + return this;
  63 + }
  64 +
  65 + public Builder setEndpointConfig(EndpointConfig endpointConfig) {
  66 + this.endpointConfig = endpointConfig;
  67 + return this;
  68 + }
  69 +
  70 + public Builder setEnableEndpoint(boolean enableEndpoint) {
  71 + this.enableEndpoint = enableEndpoint;
  72 + return this;
  73 + }
  74 +
  75 + public Builder setDecodingMethod(String decodingMethod) {
  76 + this.decodingMethod = decodingMethod;
  77 + return this;
  78 + }
  79 +
  80 + public Builder setMaxActivePaths(int maxActivePaths) {
  81 + this.maxActivePaths = maxActivePaths;
  82 + return this;
  83 + }
  84 +
  85 + public Builder setHotwordsFile(String hotwordsFile) {
  86 + this.hotwordsFile = hotwordsFile;
  87 + return this;
  88 + }
62 89
63 - public int getMaxActivePaths() {  
64 - return maxActivePaths; 90 + public Builder setHotwordsScore(float hotwordsScore) {
  91 + this.hotwordsScore = hotwordsScore;
  92 + return this;
  93 + }
65 } 94 }
66 } 95 }
  1 +// Copyright 2024 Xiaomi Corporation
  2 +package com.k2fsa.sherpa.onnx;
  3 +
  4 +public class OnlineRecognizerResult {
  5 + private final String text;
  6 + private final String[] tokens;
  7 + private final float[] timestamps;
  8 +
  9 + public OnlineRecognizerResult(String text, String[] tokens, float[] timestamps) {
  10 + this.text = text;
  11 + this.tokens = tokens;
  12 + this.timestamps = timestamps;
  13 + }
  14 +
  15 + public String getText() {
  16 + return text;
  17 + }
  18 +
  19 + public String[] getTokens() {
  20 + return tokens;
  21 + }
  22 +
  23 + public float[] getTimestamps() {
  24 + return timestamps;
  25 + }
  26 +}
1 -/*  
2 - * // Copyright 2022-2023 by zhaoming  
3 - */  
4 -// Stream is used for feeding data to the asr engine 1 +// Copyright 2022-2023 by zhaoming
  2 +// Copyright 2024 Xiaomi Corporation
5 package com.k2fsa.sherpa.onnx; 3 package com.k2fsa.sherpa.onnx;
6 4
7 public class OnlineStream { 5 public class OnlineStream {
8 - private long ptr = 0; // this is the stream ptr 6 + static {
  7 + System.loadLibrary("sherpa-onnx-jni");
  8 + }
9 9
10 - private int sampleRate = 16000; 10 + private long ptr = 0;
11 11
12 - // assign ptr to this stream in construction  
13 - public OnlineStream(long ptr, int sampleRate) {  
14 - this.ptr = ptr;  
15 - this.sampleRate = sampleRate; 12 + public OnlineStream() {
  13 + this.ptr = 0;
16 } 14 }
17 15
18 - public static void loadSoLib(String soPath) {  
19 - // load .so lib from the path  
20 - System.load(soPath.trim()); // ("sherpa-onnx-jni-java"); 16 + public OnlineStream(long ptr) {
  17 + this.ptr = ptr;
21 } 18 }
22 19
23 public long getPtr() { 20 public long getPtr() {
24 return ptr; 21 return ptr;
25 } 22 }
26 23
27 - public void acceptWaveform(float[] samples) throws Exception {  
28 - if (this.ptr == 0) throw new Exception("null exception for stream ptr"); 24 + public void setPtr(long ptr) {
  25 + this.ptr = ptr;
  26 + }
29 27
30 - // feed wave data to asr engine  
31 - acceptWaveform(this.ptr, this.sampleRate, samples); 28 + public void acceptWaveform(float[] samples, int sampleRate) {
  29 + acceptWaveform(this.ptr, samples, sampleRate);
32 } 30 }
33 31
34 public void inputFinished() { 32 public void inputFinished() {
35 - // add some tail padding  
36 - int padLen = (int) (this.sampleRate * 0.3); // 0.3 seconds at 16 kHz sample rate  
37 - float[] tailPaddings = new float[padLen]; // default value is 0  
38 - acceptWaveform(this.ptr, this.sampleRate, tailPaddings);  
39 -  
40 - // tell the engine all data are feeded  
41 inputFinished(this.ptr); 33 inputFinished(this.ptr);
42 } 34 }
43 35
44 public void release() { 36 public void release() {
45 // stream object must be release after used 37 // stream object must be release after used
46 - if (this.ptr == 0) return;  
47 - deleteStream(this.ptr); 38 + if (this.ptr == 0) {
  39 + return;
  40 + }
  41 + delete(this.ptr);
48 this.ptr = 0; 42 this.ptr = 0;
49 } 43 }
50 44
  45 + @Override
51 protected void finalize() throws Throwable { 46 protected void finalize() throws Throwable {
52 release(); 47 release();
  48 + super.finalize();
53 } 49 }
54 50
55 - public boolean isLastFrame() throws Exception {  
56 - if (this.ptr == 0) throw new Exception("null exception for stream ptr");  
57 - return isLastFrame(this.ptr);  
58 - }  
59 -  
60 - public void reSet() throws Exception {  
61 - if (this.ptr == 0) throw new Exception("null exception for stream ptr");  
62 - reSet(this.ptr);  
63 - }  
64 -  
65 - public int featureDim() throws Exception {  
66 - if (this.ptr == 0) throw new Exception("null exception for stream ptr");  
67 - return featureDim(this.ptr);  
68 - }  
69 -  
70 - // JNI interface libsherpa-onnx-jni.so  
71 - private native void acceptWaveform(long ptr, int sampleRate, float[] samples); 51 + private native void acceptWaveform(long ptr, float[] samples, int sampleRate);
72 52
73 private native void inputFinished(long ptr); 53 private native void inputFinished(long ptr);
74 54
75 - private native void deleteStream(long ptr);  
76 -  
77 - private native int numFramesReady(long ptr);  
78 -  
79 - private native boolean isLastFrame(long ptr);  
80 -  
81 - private native void reSet(long ptr);  
82 -  
83 - private native int featureDim(long ptr);  
84 -} 55 + private native void delete(long ptr);
  56 +}
1 -/*  
2 - * // Copyright 2022-2023 by zhaoming  
3 - */ 1 +// Copyright 2022-2023 by zhaoming
  2 +// Copyright 2024 Xiaomi Corporation
4 3
5 package com.k2fsa.sherpa.onnx; 4 package com.k2fsa.sherpa.onnx;
6 5
@@ -9,10 +8,14 @@ public class OnlineTransducerModelConfig { @@ -9,10 +8,14 @@ public class OnlineTransducerModelConfig {
9 private final String decoder; 8 private final String decoder;
10 private final String joiner; 9 private final String joiner;
11 10
12 - public OnlineTransducerModelConfig(String encoder, String decoder, String joiner) {  
13 - this.encoder = encoder;  
14 - this.decoder = decoder;  
15 - this.joiner = joiner; 11 + private OnlineTransducerModelConfig(Builder builder) {
  12 + this.encoder = builder.encoder;
  13 + this.decoder = builder.decoder;
  14 + this.joiner = builder.joiner;
  15 + }
  16 +
  17 + public static Builder builder() {
  18 + return new Builder();
16 } 19 }
17 20
18 public String getEncoder() { 21 public String getEncoder() {
@@ -26,4 +29,29 @@ public class OnlineTransducerModelConfig { @@ -26,4 +29,29 @@ public class OnlineTransducerModelConfig {
26 public String getJoiner() { 29 public String getJoiner() {
27 return joiner; 30 return joiner;
28 } 31 }
  32 +
  33 + public static class Builder {
  34 + private String encoder = "";
  35 + private String decoder = "";
  36 + private String joiner = "";
  37 +
  38 + public OnlineTransducerModelConfig build() {
  39 + return new OnlineTransducerModelConfig(this);
  40 + }
  41 +
  42 + public Builder setEncoder(String encoder) {
  43 + this.encoder = encoder;
  44 + return this;
  45 + }
  46 +
  47 + public Builder setDecoder(String decoder) {
  48 + this.decoder = decoder;
  49 + return this;
  50 + }
  51 +
  52 + public Builder setJoiner(String joiner) {
  53 + this.joiner = joiner;
  54 + return this;
  55 + }
  56 + }
29 } 57 }
  1 +// Copyright 2024 Xiaomi Corporation
1 package com.k2fsa.sherpa.onnx; 2 package com.k2fsa.sherpa.onnx;
2 3
3 public class OnlineZipformer2CtcModelConfig { 4 public class OnlineZipformer2CtcModelConfig {
4 private final String model; 5 private final String model;
5 6
6 - public OnlineZipformer2CtcModelConfig(String model) {  
7 - this.model = model; 7 + private OnlineZipformer2CtcModelConfig(Builder builder) {
  8 + this.model = builder.model;
  9 + }
  10 +
  11 + public static Builder builder() {
  12 + return new Builder();
8 } 13 }
9 14
10 public String getModel() { 15 public String getModel() {
11 return model; 16 return model;
12 } 17 }
13 18
  19 + public static class Builder {
  20 + private String model = "";
  21 +
  22 + public OnlineZipformer2CtcModelConfig build() {
  23 + return new OnlineZipformer2CtcModelConfig(this);
  24 + }
  25 +
  26 + public Builder setModel(String model) {
  27 + this.model = model;
  28 + return this;
  29 + }
  30 + }
14 } 31 }
  1 +// Copyright 2024 Xiaomi Corporation
  2 +package com.k2fsa.sherpa.onnx;
  3 +
  4 +public class WaveReader {
  5 + static {
  6 + System.loadLibrary("sherpa-onnx-jni");
  7 + }
  8 +
  9 + private final int sampleRate;
  10 + private final float[] samples;
  11 +
  12 + // It supports only single channel, 16-bit wave file.
  13 + // It will exit the program if the given file has a wrong format
  14 + public WaveReader(String filename) {
  15 + Object[] arr = readWaveFromFile(filename);
  16 + samples = (float[]) arr[0];
  17 + sampleRate = (int) arr[1];
  18 + }
  19 +
  20 + public int getSampleRate() {
  21 + return sampleRate;
  22 + }
  23 +
  24 + public float[] getSamples() {
  25 + return samples;
  26 + }
  27 +
  28 + private native Object[] readWaveFromFile(String filename);
  29 +}
@@ -21,6 +21,7 @@ set(sources @@ -21,6 +21,7 @@ set(sources
21 speaker-embedding-manager.cc 21 speaker-embedding-manager.cc
22 spoken-language-identification.cc 22 spoken-language-identification.cc
23 voice-activity-detector.cc 23 voice-activity-detector.cc
  24 + wave-reader.cc
24 ) 25 )
25 26
26 if(SHERPA_ONNX_ENABLE_TTS) 27 if(SHERPA_ONNX_ENABLE_TTS)
@@ -8,7 +8,6 @@ @@ -8,7 +8,6 @@
8 8
9 #include "sherpa-onnx/csrc/macros.h" 9 #include "sherpa-onnx/csrc/macros.h"
10 #include "sherpa-onnx/csrc/onnx-utils.h" 10 #include "sherpa-onnx/csrc/onnx-utils.h"
11 -#include "sherpa-onnx/csrc/wave-reader.h"  
12 #include "sherpa-onnx/csrc/wave-writer.h" 11 #include "sherpa-onnx/csrc/wave-writer.h"
13 #include "sherpa-onnx/jni/common.h" 12 #include "sherpa-onnx/jni/common.h"
14 13
@@ -43,69 +42,6 @@ JNIEXPORT jboolean JNICALL Java_com_k2fsa_sherpa_onnx_GeneratedAudio_saveImpl( @@ -43,69 +42,6 @@ JNIEXPORT jboolean JNICALL Java_com_k2fsa_sherpa_onnx_GeneratedAudio_saveImpl(
43 return ok; 42 return ok;
44 } 43 }
45 44
46 -static jobjectArray ReadWaveImpl(JNIEnv *env, std::istream &is,  
47 - const char *p_filename) {  
48 - bool is_ok = false;  
49 - int32_t sampling_rate = -1;  
50 - std::vector<float> samples =  
51 - sherpa_onnx::ReadWave(is, &sampling_rate, &is_ok);  
52 -  
53 - if (!is_ok) {  
54 - SHERPA_ONNX_LOGE("Failed to read %s", p_filename);  
55 - exit(-1);  
56 - }  
57 -  
58 - jfloatArray samples_arr = env->NewFloatArray(samples.size());  
59 - env->SetFloatArrayRegion(samples_arr, 0, samples.size(), samples.data());  
60 -  
61 - jobjectArray obj_arr = (jobjectArray)env->NewObjectArray(  
62 - 2, env->FindClass("java/lang/Object"), nullptr);  
63 -  
64 - env->SetObjectArrayElement(obj_arr, 0, samples_arr);  
65 - env->SetObjectArrayElement(obj_arr, 1, NewInteger(env, sampling_rate));  
66 -  
67 - return obj_arr;  
68 -}  
69 -  
70 -SHERPA_ONNX_EXTERN_C  
71 -JNIEXPORT jobjectArray JNICALL  
72 -Java_com_k2fsa_sherpa_onnx_WaveReader_00024Companion_readWaveFromFile(  
73 - JNIEnv *env, jclass /*cls*/, jstring filename) {  
74 - const char *p_filename = env->GetStringUTFChars(filename, nullptr);  
75 - std::ifstream is(p_filename, std::ios::binary);  
76 -  
77 - auto obj_arr = ReadWaveImpl(env, is, p_filename);  
78 -  
79 - env->ReleaseStringUTFChars(filename, p_filename);  
80 -  
81 - return obj_arr;  
82 -}  
83 -  
84 -SHERPA_ONNX_EXTERN_C  
85 -JNIEXPORT jobjectArray JNICALL  
86 -Java_com_k2fsa_sherpa_onnx_WaveReader_00024Companion_readWaveFromAsset(  
87 - JNIEnv *env, jclass /*cls*/, jobject asset_manager, jstring filename) {  
88 - const char *p_filename = env->GetStringUTFChars(filename, nullptr);  
89 -#if __ANDROID_API__ >= 9  
90 - AAssetManager *mgr = AAssetManager_fromJava(env, asset_manager);  
91 - if (!mgr) {  
92 - SHERPA_ONNX_LOGE("Failed to get asset manager: %p", mgr);  
93 - exit(-1);  
94 - }  
95 - std::vector<char> buffer = sherpa_onnx::ReadFile(mgr, p_filename);  
96 -  
97 - std::istrstream is(buffer.data(), buffer.size());  
98 -#else  
99 - std::ifstream is(p_filename, std::ios::binary);  
100 -#endif  
101 -  
102 - auto obj_arr = ReadWaveImpl(env, is, p_filename);  
103 -  
104 - env->ReleaseStringUTFChars(filename, p_filename);  
105 -  
106 - return obj_arr;  
107 -}  
108 -  
109 #if 0 45 #if 0
110 SHERPA_ONNX_EXTERN_C 46 SHERPA_ONNX_EXTERN_C
111 JNIEXPORT void JNICALL 47 JNIEXPORT void JNICALL
  1 +// sherpa-onnx/jni/wave-reader.cc
  2 +//
  3 +// Copyright (c) 2024 Xiaomi Corporation
  4 +#include "sherpa-onnx/csrc/wave-reader.h"
  5 +
  6 +#include <fstream>
  7 +
  8 +#include "sherpa-onnx/csrc/macros.h"
  9 +#include "sherpa-onnx/jni/common.h"
  10 +
  11 +static jobjectArray ReadWaveImpl(JNIEnv *env, std::istream &is,
  12 + const char *p_filename) {
  13 + bool is_ok = false;
  14 + int32_t sampling_rate = -1;
  15 + std::vector<float> samples =
  16 + sherpa_onnx::ReadWave(is, &sampling_rate, &is_ok);
  17 +
  18 + if (!is_ok) {
  19 + SHERPA_ONNX_LOGE("Failed to read '%s'", p_filename);
  20 + exit(-1);
  21 + }
  22 +
  23 + jfloatArray samples_arr = env->NewFloatArray(samples.size());
  24 + env->SetFloatArrayRegion(samples_arr, 0, samples.size(), samples.data());
  25 +
  26 + jobjectArray obj_arr = (jobjectArray)env->NewObjectArray(
  27 + 2, env->FindClass("java/lang/Object"), nullptr);
  28 +
  29 + env->SetObjectArrayElement(obj_arr, 0, samples_arr);
  30 + env->SetObjectArrayElement(obj_arr, 1, NewInteger(env, sampling_rate));
  31 +
  32 + return obj_arr;
  33 +}
  34 +
  35 +SHERPA_ONNX_EXTERN_C
  36 +JNIEXPORT jobjectArray JNICALL
  37 +Java_com_k2fsa_sherpa_onnx_WaveReader_00024Companion_readWaveFromFile(
  38 + JNIEnv *env, jclass /*cls*/, jstring filename) {
  39 + const char *p_filename = env->GetStringUTFChars(filename, nullptr);
  40 + std::ifstream is(p_filename, std::ios::binary);
  41 +
  42 + auto obj_arr = ReadWaveImpl(env, is, p_filename);
  43 +
  44 + env->ReleaseStringUTFChars(filename, p_filename);
  45 +
  46 + return obj_arr;
  47 +}
  48 +
  49 +SHERPA_ONNX_EXTERN_C
  50 +JNIEXPORT jobjectArray JNICALL
  51 +Java_com_k2fsa_sherpa_onnx_WaveReader_readWaveFromFile(JNIEnv *env,
  52 + jclass /*obj*/,
  53 + jstring filename) {
  54 + return Java_com_k2fsa_sherpa_onnx_WaveReader_00024Companion_readWaveFromFile(
  55 + env, nullptr, filename);
  56 +}
  57 +
  58 +SHERPA_ONNX_EXTERN_C
  59 +JNIEXPORT jobjectArray JNICALL
  60 +Java_com_k2fsa_sherpa_onnx_WaveReader_00024Companion_readWaveFromAsset(
  61 + JNIEnv *env, jclass /*cls*/, jobject asset_manager, jstring filename) {
  62 + const char *p_filename = env->GetStringUTFChars(filename, nullptr);
  63 +#if __ANDROID_API__ >= 9
  64 + AAssetManager *mgr = AAssetManager_fromJava(env, asset_manager);
  65 + if (!mgr) {
  66 + SHERPA_ONNX_LOGE("Failed to get asset manager: %p", mgr);
  67 + exit(-1);
  68 + }
  69 + std::vector<char> buffer = sherpa_onnx::ReadFile(mgr, p_filename);
  70 +
  71 + std::istrstream is(buffer.data(), buffer.size());
  72 +#else
  73 + std::ifstream is(p_filename, std::ios::binary);
  74 +#endif
  75 +
  76 + auto obj_arr = ReadWaveImpl(env, is, p_filename);
  77 +
  78 + env->ReleaseStringUTFChars(filename, p_filename);
  79 +
  80 + return obj_arr;
  81 +}