zhaomingwork
Committed by GitHub

java decode example for microphone (#122)

@@ -17,6 +17,8 @@ LIB_BUILD_DIR = ./lib @@ -17,6 +17,8 @@ LIB_BUILD_DIR = ./lib
17 17
18 EXAMPLE_FILE = DecodeFile.java 18 EXAMPLE_FILE = DecodeFile.java
19 19
  20 +EXAMPLE_Mic = DecodeMic.java
  21 +
20 JAVAC = javac 22 JAVAC = javac
21 23
22 BUILD_DIR = build 24 BUILD_DIR = build
@@ -29,8 +31,11 @@ vpath %.class $(BUILD_DIR) @@ -29,8 +31,11 @@ vpath %.class $(BUILD_DIR)
29 vpath %.java src 31 vpath %.java src
30 32
31 33
32 -buildexample: 34 +buildfile:
33 $(JAVAC) -cp lib/sherpaonnx.jar -d $(BUILD_DIR) -encoding UTF-8 src/$(EXAMPLE_FILE) 35 $(JAVAC) -cp lib/sherpaonnx.jar -d $(BUILD_DIR) -encoding UTF-8 src/$(EXAMPLE_FILE)
  36 +
  37 +buildmic:
  38 + $(JAVAC) -cp lib/sherpaonnx.jar -d $(BUILD_DIR) -encoding UTF-8 src/$(EXAMPLE_Mic)
34 39
35 rebuild: clean all 40 rebuild: clean all
36 41
@@ -43,10 +48,13 @@ clean: @@ -43,10 +48,13 @@ clean:
43 mkdir -p ./lib 48 mkdir -p ./lib
44 49
45 50
46 -run: 51 +runfile:
47 52
48 - java -cp ./lib/sherpaonnx.jar:build $(RUNJFLAGS) DecodeFile 53 + java -cp ./lib/sherpaonnx.jar:build $(RUNJFLAGS) DecodeFile
49 54
  55 +runmic:
  56 +
  57 + java -cp ./lib/sherpaonnx.jar:build $(RUNJFLAGS) DecodeMic
50 58
51 buildlib: $(LIB_FILES:.java=.class) 59 buildlib: $(LIB_FILES:.java=.class)
52 60
@@ -58,4 +66,8 @@ buildlib: $(LIB_FILES:.java=.class) @@ -58,4 +66,8 @@ buildlib: $(LIB_FILES:.java=.class)
58 packjar: 66 packjar:
59 jar cvfe lib/sherpaonnx.jar . -C $(BUILD_DIR) . 67 jar cvfe lib/sherpaonnx.jar . -C $(BUILD_DIR) .
60 68
61 -all: clean buildlib packjar buildexample run 69 +all: clean buildlib packjar buildfile buildmic
  70 +
  71 +
  72 +
  73 +
1 -  
2 0.Introduction 1 0.Introduction
3 ----  
4 -Java wrapper `com.k2fsa.sherpaonnx.OnlineRecognizer` for `sherpa-onnx`. Java is a cross-platform language; you can build jni .so lib according to your system, and then use the same java api for all your platform.  
5 -``` xml 2 +--------------
  3 +
  4 +Java wrapper `com.k2fsa.sherpa.onnx.OnlineRecognizer` for `sherpa-onnx`. Java is a cross-platform language; you can build jni .so lib according to your system, and then use the same java api for all your platform.
  5 +
  6 +```xml
6 Depend on: 7 Depend on:
7 Openjdk 1.8 8 Openjdk 1.8
8 ``` 9 ```
  10 +
9 --- 11 ---
  12 +
10 1.Compile libsherpa-onnx-jni.so 13 1.Compile libsherpa-onnx-jni.so
11 ---- 14 +-------------------------------
  15 +
12 Compile sherpa-onnx/jni/jni.cc according to your system. 16 Compile sherpa-onnx/jni/jni.cc according to your system.
13 Example for Ubuntu 18.04 LTS, Openjdk 1.8.0_362: 17 Example for Ubuntu 18.04 LTS, Openjdk 1.8.0_362:
14 -``` xml 18 +
  19 +```xml
15 git clone https://github.com/k2-fsa/sherpa-onnx 20 git clone https://github.com/k2-fsa/sherpa-onnx
16 cd sherpa-onnx 21 cd sherpa-onnx
17 mkdir build 22 mkdir build
@@ -19,14 +24,19 @@ Example for Ubuntu 18.04 LTS, Openjdk 1.8.0_362: @@ -19,14 +24,19 @@ Example for Ubuntu 18.04 LTS, Openjdk 1.8.0_362:
19 cmake -DCMAKE_BUILD_TYPE=Release -DBUILD_SHARED_LIBS=ON -DSHERPA_ONNX_ENABLE_JNI=ON .. 24 cmake -DCMAKE_BUILD_TYPE=Release -DBUILD_SHARED_LIBS=ON -DSHERPA_ONNX_ENABLE_JNI=ON ..
20 make -j6 25 make -j6
21 ``` 26 ```
  27 +
22 --- 28 ---
  29 +
23 2.Download asr model files 30 2.Download asr model files
24 ---- 31 +--------------------------
  32 +
25 [click here for more detail](https://k2-fsa.github.io/sherpa/onnx/pretrained_models/index.html) 33 [click here for more detail](https://k2-fsa.github.io/sherpa/onnx/pretrained_models/index.html)
26 ---- 34 +--------------------------
  35 +
27 3.Config model config.cfg 36 3.Config model config.cfg
28 ----  
29 -``` xml 37 +-------------------------
  38 +
  39 +```xml
30 #model config 40 #model config
31 sample_rate=16000 41 sample_rate=16000
32 feature_dim=80 42 feature_dim=80
@@ -42,11 +52,15 @@ Example for Ubuntu 18.04 LTS, Openjdk 1.8.0_362: @@ -42,11 +52,15 @@ Example for Ubuntu 18.04 LTS, Openjdk 1.8.0_362:
42 decoding_method=greedy_search 52 decoding_method=greedy_search
43 max_active_paths=4 53 max_active_paths=4
44 ``` 54 ```
  55 +
45 --- 56 ---
  57 +
46 4.A simple java example 58 4.A simple java example
47 ---- 59 +-----------------------
  60 +
48 refer to [java_api_example](https://github.com/k2-fsa/sherpa-onnx/blob/master/java-api-examples/src/DecodeFile.java) for more detail. 61 refer to [java_api_example](https://github.com/k2-fsa/sherpa-onnx/blob/master/java-api-examples/src/DecodeFile.java) for more detail.
49 -``` java 62 +
  63 +```java
50 import com.k2fsa.sherpa.onnx.OnlineRecognizer; 64 import com.k2fsa.sherpa.onnx.OnlineRecognizer;
51 import com.k2fsa.sherpa.onnx.OnlineStream; 65 import com.k2fsa.sherpa.onnx.OnlineStream;
52 String cfgpath=appdir+"/modelconfig.cfg"; 66 String cfgpath=appdir+"/modelconfig.cfg";
@@ -71,18 +85,34 @@ refer to [java_api_example](https://github.com/k2-fsa/sherpa-onnx/blob/master/ja @@ -71,18 +85,34 @@ refer to [java_api_example](https://github.com/k2-fsa/sherpa-onnx/blob/master/ja
71 rcgOjb.reSet(streamObj); 85 rcgOjb.reSet(streamObj);
72 rcgOjb.releaseStream(streamObj); // release stream 86 rcgOjb.releaseStream(streamObj); // release stream
73 rcgOjb.release(); // release recognizer 87 rcgOjb.release(); // release recognizer
74 -  
75 -  
76 ``` 88 ```
  89 +
77 --- 90 ---
78 -5.Makefile  
79 ----  
80 -package jar and run app example  
81 -package path: /sherpa-onnx/java-api-examples/lib/sherpaonnx.jar  
82 -``` bash 91 +
  92 +5.Makefile
  93 +----------
  94 +
  95 +OS Ubuntu 18.04 LTS
  96 +Build package path: /sherpa-onnx/java-api-examples/lib/sherpaonnx.jar
  97 +
  98 +5.1 Build
  99 +
  100 +```bash
83 cd sherpa-onnx/java-api-examples 101 cd sherpa-onnx/java-api-examples
84 - make all  
85 -  
86 - ``` 102 + make all
  103 +```
  104 +
  105 +5.2 Run DecodeFile example
  106 +
  107 +```bash
  108 + make runfile
  109 +```
  110 +
  111 +5.3 Run DecodeMic example
  112 +
  113 +```bash
  114 + make runmic
  115 +```
  116 +
87 117
88 118
  1 +/*
  2 + * // Copyright 2022-2023 by zhaoming
  3 + */
  4 +/*
  5 +Real-time speech recognition from a microphone with com.k2fsa.sherpa.onnx Java API
  6 +
  7 +example for cfgFile modelconfig.cfg
  8 + sample_rate=16000
  9 + feature_dim=80
  10 + rule1_min_trailing_silence=2.4
  11 + rule2_min_trailing_silence=1.2
  12 + rule3_min_utterance_length=20
  13 + encoder=/sherpa-onnx/build/bin/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/encoder-epoch-99-avg-1.onnx
  14 + decoder=/sherpa-onnx/build/bin/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/decoder-epoch-99-avg-1.onnx
  15 + joiner=/sherpa-onnx/build/bin/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/joiner-epoch-99-avg-1.onnx
  16 + tokens=/sherpa-onnx/build/bin/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/tokens.txt
  17 + num_threads=4
  18 + enable_endpoint_detection=true
  19 + decoding_method=greedy_search
  20 + max_active_paths=4
  21 +
  22 +*/
  23 +import com.k2fsa.sherpa.onnx.OnlineRecognizer;
  24 +import com.k2fsa.sherpa.onnx.OnlineStream;
  25 +import java.io.*;
  26 +import java.nio.ByteBuffer;
  27 +import java.nio.ByteOrder;
  28 +import java.nio.ShortBuffer;
  29 +import java.nio.charset.StandardCharsets;
  30 +import javax.sound.sampled.AudioFormat;
  31 +import javax.sound.sampled.AudioSystem;
  32 +import javax.sound.sampled.DataLine;
  33 +import javax.sound.sampled.TargetDataLine;
  34 +
  35 +/** Microphone Example */
  36 +public class DecodeMic {
  37 + MicRcgThread micRcgThread = null; // thread handle
  38 +
  39 + OnlineRecognizer rcgOjb; // the recognizer
  40 +
  41 + OnlineStream streamObj; // the stream
  42 +
  43 + public DecodeMic() {
  44 +
  45 + micRcgThread = new MicRcgThread(); // create a new instance for MicRcgThread
  46 + }
  47 +
  48 + public void open() {
  49 + micRcgThread.start(); // start to capture microphone data
  50 + }
  51 +
  52 + public void close() {
  53 + micRcgThread.stop(); // close capture
  54 + }
  55 +
  56 + /** init asr engine with config file */
  57 + public void initModelWithCfg(String cfgFile) {
  58 + try {
  59 +
  60 + // set setSoPath() before running this
  61 + rcgOjb = new OnlineRecognizer(cfgFile);
  62 +
  63 + streamObj = rcgOjb.createStream(); // create a stream for asr engine to feed data
  64 + } catch (Exception e) {
  65 + System.err.println(e);
  66 + e.printStackTrace();
  67 + }
  68 + }
  69 +
  70 + /** read data from mic and feed to asr engine */
  71 + class MicRcgThread implements Runnable {
  72 +
  73 + TargetDataLine capline; // line for capture mic data
  74 +
  75 + Thread thread; // this thread
  76 + int segmentId = 0; // record the segment id when detect endpoint
  77 + String preText = ""; // decoded text
  78 +
  79 + public MicRcgThread() {}
  80 +
  81 + public void start() {
  82 +
  83 + thread = new Thread(this);
  84 +
  85 + thread.start(); // start thread
  86 + }
  87 +
  88 + public void stop() {
  89 + capline.stop();
  90 + capline.close();
  91 + capline = null;
  92 + thread = null;
  93 + }
  94 +
  95 + /** feed captured microphone data to asr */
  96 + public void decodeSample(byte[] samplebytes) {
  97 + try {
  98 + ByteBuffer byteBuf = ByteBuffer.wrap(samplebytes); // create a bytebuf for samples
  99 + byteBuf.order(ByteOrder.LITTLE_ENDIAN); // set bytebuf to little endian
  100 + ShortBuffer shortBuf = byteBuf.asShortBuffer(); // covert to short type
  101 + short[] arrShort = new short[shortBuf.capacity()]; // array for copy short data
  102 + float[] arrFloat = new float[shortBuf.capacity()]; // array for copy float data
  103 + shortBuf.get(arrShort); // put date to arrShort
  104 +
  105 + for (int i = 0; i < arrShort.length; i++) {
  106 + arrFloat[i] = arrShort[i] / 32768f; // loop to covert short data to float -1 to 1
  107 + }
  108 + streamObj.acceptWaveform(arrFloat); // feed asr engine with float data
  109 + while (rcgOjb.isReady(streamObj)) { // if engine is ready for unprocessed data
  110 +
  111 + rcgOjb.decodeStream(streamObj); // decode for this stream
  112 + }
  113 + boolean isEndpoint =
  114 + rcgOjb.isEndpoint(
  115 + streamObj); // endpoint check, make sure enable_endpoint_detection=true in config
  116 + // file
  117 + String nowText = rcgOjb.getResult(streamObj); // get asr result
  118 + String recText = "";
  119 + byte[] utf8Data; // for covert text to utf8
  120 + if (isEndpoint && nowText.length() > 0) {
  121 + rcgOjb.reSet(streamObj); // reSet stream when detect endpoint
  122 + segmentId++;
  123 + preText = nowText;
  124 + recText = "text(seg_" + String.valueOf(segmentId) + "):" + nowText + "\n";
  125 + utf8Data = recText.getBytes(StandardCharsets.UTF_8);
  126 + System.out.println(new String(utf8Data));
  127 + }
  128 +
  129 + if (!nowText.equals(preText)) { // if preText not equal nowtext
  130 + preText = nowText;
  131 + recText = nowText + "\n";
  132 + utf8Data = recText.getBytes(StandardCharsets.UTF_8);
  133 + System.out.println(new String(utf8Data));
  134 + }
  135 + } catch (Exception e) {
  136 + System.err.println(e);
  137 + e.printStackTrace();
  138 + }
  139 + }
  140 +
  141 + /** run mic capture thread */
  142 + public void run() {
  143 + System.out.println("Started! Please speak...");
  144 +
  145 + AudioFormat.Encoding encoding = AudioFormat.Encoding.PCM_SIGNED; // the pcm format
  146 + float rate = 16000.0f; // using 16 kHz
  147 + int channels = 1; // single channel
  148 + int sampleSize = 16; // sampleSize 16bit
  149 + boolean isBigEndian = false; // using little endian
  150 +
  151 + AudioFormat format =
  152 + new AudioFormat(
  153 + encoding, rate, sampleSize, channels, (sampleSize / 8) * channels, rate, isBigEndian);
  154 +
  155 + DataLine.Info info = new DataLine.Info(TargetDataLine.class, format);
  156 +
  157 + // check system support such data format
  158 + if (!AudioSystem.isLineSupported(info)) {
  159 + System.out.println(info + " not supported.");
  160 + return;
  161 + }
  162 +
  163 + // open a line for capture.
  164 +
  165 + try {
  166 + capline = (TargetDataLine) AudioSystem.getLine(info);
  167 + capline.open(format, capline.getBufferSize());
  168 + } catch (Exception ex) {
  169 + System.out.println(ex);
  170 + return;
  171 + }
  172 +
  173 + // the buf size for mic captured each time
  174 + int bufferLengthInBytes = capline.getBufferSize() / 8 * format.getFrameSize();
  175 + byte[] micData = new byte[bufferLengthInBytes];
  176 + int numBytesRead;
  177 +
  178 + capline.start(); // start to capture mic data
  179 +
  180 + while (thread != null) {
  181 + // read data from line
  182 + if ((numBytesRead = capline.read(micData, 0, bufferLengthInBytes)) == -1) {
  183 + break;
  184 + }
  185 +
  186 + decodeSample(micData); // decode mic data
  187 + }
  188 +
  189 + // stop and close
  190 +
  191 + try {
  192 + if (capline != null) {
  193 + capline.stop();
  194 + capline.close();
  195 + capline = null;
  196 + }
  197 +
  198 + } catch (Exception ex) {
  199 + System.err.println(ex);
  200 + }
  201 + }
  202 + } // End class DecodeMic
  203 +
  204 + public static void main(String s[]) {
  205 + try {
  206 + String appDir = System.getProperty("user.dir");
  207 + System.out.println("appdir=" + appDir);
  208 + String cfgPath = appDir + "/modelconfig.cfg";
  209 + String soPath = appDir + "/../build/lib/libsherpa-onnx-jni.so";
  210 + OnlineRecognizer.setSoPath(soPath); // set so. lib for OnlineRecognizer
  211 +
  212 + DecodeMic decodeEx = new DecodeMic();
  213 + decodeEx.initModelWithCfg(cfgPath); // init asr engine
  214 + decodeEx.open(); // open thread for mic
  215 + System.out.print("Press Enter to EXIT!\n");
  216 + char i = (char) System.in.read();
  217 + decodeEx.close();
  218 + } catch (Exception e) {
  219 + System.err.println(e);
  220 + e.printStackTrace();
  221 + }
  222 + }
  223 +}