Add VAD + Non-streaming ASR + microphone examples for Java API (#1046)

Fangjun Kuang · GitHub
Commit 29abf242c35208b13bc227e7a7df26d519de0e09 29abf242 1 parent 757a44b1
java-api-examples/README.md
java-api-examples/VadFromMicWithNonStreamingParaformer.java
java-api-examples/VadFromMicWithNonStreamingWhisper.java
java-api-examples/run-vad-from-mic-non-streaming-paraformer.sh
java-api-examples/run-vad-from-mic-non-streaming-whisper.sh
java-api-examples/src/DecodeFile.java
java-api-examples/src/DecodeMic.java
--- a/java-api-examples/README.md
查看文件 @29abf24
+++ b/java-api-examples/README.md
查看文件 @29abf24
@@ -63,6 +63,18 @@ The punctuation model supports both English and Chinese.
 ./run-vad-from-mic.sh
 ```
+## VAD with a microphone + Non-streaming Paraformer for speech recognition
+
+```bash
+./run-vad-from-mic-non-streaming-paraformer.sh
+```
+
+## VAD with a microphone + Non-streaming Whisper tiny.en for speech recognition
+
+```bash
+./run-vad-from-mic-non-streaming-whisper.sh
+```
+
 ## VAD (Remove silence)
 ```bash
--- a/java-api-examples/VadFromMicWithNonStreamingParaformer.java 0 → 100644
查看文件 @29abf24
+++ b/java-api-examples/VadFromMicWithNonStreamingParaformer.java 0 → 100644
查看文件 @29abf24
+// Copyright 2024 Xiaomi Corporation
+
+// This file shows how to use a silero_vad model with a non-streaming Paraformer
+// for speech recognition.
+
+import com.k2fsa.sherpa.onnx.*;
+import javax.sound.sampled.*;
+
+public class VadFromMicWithNonStreamingParaformer {
+  private static final int sampleRate = 16000;
+  private static final int windowSize = 512;
+
+  public static Vad createVad() {
+    // please download ./silero_vad.onnx from
+    // https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
+    String model = "./silero_vad.onnx";
+    SileroVadModelConfig sileroVad =
+        SileroVadModelConfig.builder()
+            .setModel(model)
+            .setThreshold(0.5f)
+            .setMinSilenceDuration(0.25f)
+            .setMinSpeechDuration(0.5f)
+            .setWindowSize(windowSize)
+            .build();
+
+    VadModelConfig config =
+        VadModelConfig.builder()
+            .setSileroVadModelConfig(sileroVad)
+            .setSampleRate(sampleRate)
+            .setNumThreads(1)
+            .setDebug(true)
+            .setProvider("cpu")
+            .build();
+
+    return new Vad(config);
+  }
+
+  public static OfflineRecognizer createOfflineRecognizer() {
+    // please refer to
+    // https://k2-fsa.github.io/sherpa/onnx/pretrained_models/offline-paraformer/paraformer-models.html#csukuangfj-sherpa-onnx-paraformer-zh-2023-03-28-chinese-english
+    // to download model files
+    String model = "./sherpa-onnx-paraformer-zh-2023-03-28/model.int8.onnx";
+    String tokens = "./sherpa-onnx-paraformer-zh-2023-03-28/tokens.txt";
+
+    // https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/itn_zh_number.fst
+    String ruleFsts = "./itn_zh_number.fst";
+
+    OfflineParaformerModelConfig paraformer =
+        OfflineParaformerModelConfig.builder().setModel(model).build();
+
+    OfflineModelConfig modelConfig =
+        OfflineModelConfig.builder()
+            .setParaformer(paraformer)
+            .setTokens(tokens)
+            .setNumThreads(1)
+            .setDebug(true)
+            .build();
+
+    OfflineRecognizerConfig config =
+        OfflineRecognizerConfig.builder()
+            .setOfflineModelConfig(modelConfig)
+            .setDecodingMethod("greedy_search")
+            .setRuleFsts(ruleFsts)
+            .build();
+
+    return new OfflineRecognizer(config);
+  }
+
+  public static void main(String[] args) {
+    Vad vad = createVad();
+    OfflineRecognizer recognizer = createOfflineRecognizer();
+
+    // https://docs.oracle.com/javase/8/docs/api/javax/sound/sampled/AudioFormat.html
+    // Linear PCM, 16000Hz, 16-bit, 1 channel, signed, little endian
+    AudioFormat format = new AudioFormat(sampleRate, 16, 1, true, false);
+
+    // https://docs.oracle.com/javase/8/docs/api/javax/sound/sampled/DataLine.Info.html#Info-java.lang.Class-javax.sound.sampled.AudioFormat-int-
+    DataLine.Info info = new DataLine.Info(TargetDataLine.class, format);
+    TargetDataLine targetDataLine;
+    try {
+      targetDataLine = (TargetDataLine) AudioSystem.getLine(info);
+      targetDataLine.open(format);
+      targetDataLine.start();
+    } catch (LineUnavailableException e) {
+      System.out.println("Failed to open target data line: " + e.getMessage());
+      vad.release();
+      recognizer.release();
+      return;
+    }
+
+    boolean printed = false;
+    byte[] buffer = new byte[windowSize * 2];
+    float[] samples = new float[windowSize];
+
+    System.out.println("Started. Please speak");
+    boolean running = true;
+    while (targetDataLine.isOpen() && running) {
+      int n = targetDataLine.read(buffer, 0, buffer.length);
+      if (n <= 0) {
+        System.out.printf("Got %d bytes. Expected %d bytes.\n", n, buffer.length);
+        continue;
+      }
+      for (int i = 0; i != windowSize; ++i) {
+        short low = buffer[2 * i];
+        short high = buffer[2 * i + 1];
+        int s = (high << 8) + low;
+        samples[i] = (float) s / 32768;
+      }
+
+      vad.acceptWaveform(samples);
+      if (vad.isSpeechDetected() && !printed) {
+        System.out.println("Detected speech");
+        printed = true;
+      }
+
+      if (!vad.isSpeechDetected()) {
+        printed = false;
+      }
+
+      while (!vad.empty()) {
+        SpeechSegment segment = vad.front();
+        float startTime = segment.getStart() / (float) sampleRate;
+        float duration = segment.getSamples().length / (float) sampleRate;
+
+        OfflineStream stream = recognizer.createStream();
+        stream.acceptWaveform(segment.getSamples(), sampleRate);
+        recognizer.decode(stream);
+        String text = recognizer.getResult(stream).getText();
+        stream.release();
+
+        if (!text.isEmpty()) {
+          System.out.printf("%.3f--%.3f: %s\n", startTime, startTime + duration, text);
+        }
+
+        if (text.contains("退出程序")) {
+          running = false;
+        }
+
+        vad.pop();
+      }
+    }
+
+    vad.release();
+    recognizer.release();
+  }
+}
--- a/java-api-examples/VadFromMicWithNonStreamingWhisper.java 0 → 100644
查看文件 @29abf24
+++ b/java-api-examples/VadFromMicWithNonStreamingWhisper.java 0 → 100644
查看文件 @29abf24
+// Copyright 2024 Xiaomi Corporation
+
+// This file shows how to use a silero_vad model with a non-streaming Whisper tiny.en
+// for speech recognition.
+
+import com.k2fsa.sherpa.onnx.*;
+import javax.sound.sampled.*;
+
+public class VadFromMicNonStreamingWhisper {
+  private static final int sampleRate = 16000;
+  private static final int windowSize = 512;
+
+  public static Vad createVad() {
+    // please download ./silero_vad.onnx from
+    // https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
+    String model = "./silero_vad.onnx";
+    SileroVadModelConfig sileroVad =
+        SileroVadModelConfig.builder()
+            .setModel(model)
+            .setThreshold(0.5f)
+            .setMinSilenceDuration(0.25f)
+            .setMinSpeechDuration(0.5f)
+            .setWindowSize(windowSize)
+            .build();
+
+    VadModelConfig config =
+        VadModelConfig.builder()
+            .setSileroVadModelConfig(sileroVad)
+            .setSampleRate(sampleRate)
+            .setNumThreads(1)
+            .setDebug(true)
+            .setProvider("cpu")
+            .build();
+
+    return new Vad(config);
+  }
+
+  public static OfflineRecognizer createOfflineRecognizer() {
+    // please refer to
+    // https://k2-fsa.github.io/sherpa/onnx/pretrained_models/whisper/tiny.en.html
+    // to download model files
+    String encoder = "./sherpa-onnx-whisper-tiny.en/tiny.en-encoder.int8.onnx";
+    String decoder = "./sherpa-onnx-whisper-tiny.en/tiny.en-decoder.int8.onnx";
+    String tokens = "./sherpa-onnx-whisper-tiny.en/tiny.en-tokens.txt";
+
+    OfflineWhisperModelConfig whisper =
+        OfflineWhisperModelConfig.builder().setEncoder(encoder).setDecoder(decoder).build();
+
+    OfflineModelConfig modelConfig =
+        OfflineModelConfig.builder()
+            .setWhisper(whisper)
+            .setTokens(tokens)
+            .setNumThreads(1)
+            .setDebug(true)
+            .build();
+
+    OfflineRecognizerConfig config =
+        OfflineRecognizerConfig.builder()
+            .setOfflineModelConfig(modelConfig)
+            .setDecodingMethod("greedy_search")
+            .build();
+
+    return new OfflineRecognizer(config);
+  }
+
+  public static void main(String[] args) {
+    Vad vad = createVad();
+    OfflineRecognizer recognizer = createOfflineRecognizer();
+
+    // https://docs.oracle.com/javase/8/docs/api/javax/sound/sampled/AudioFormat.html
+    // Linear PCM, 16000Hz, 16-bit, 1 channel, signed, little endian
+    AudioFormat format = new AudioFormat(sampleRate, 16, 1, true, false);
+
+    // https://docs.oracle.com/javase/8/docs/api/javax/sound/sampled/DataLine.Info.html#Info-java.lang.Class-javax.sound.sampled.AudioFormat-int-
+    DataLine.Info info = new DataLine.Info(TargetDataLine.class, format);
+    TargetDataLine targetDataLine;
+    try {
+      targetDataLine = (TargetDataLine) AudioSystem.getLine(info);
+      targetDataLine.open(format);
+      targetDataLine.start();
+    } catch (LineUnavailableException e) {
+      System.out.println("Failed to open target data line: " + e.getMessage());
+      vad.release();
+      recognizer.release();
+      return;
+    }
+
+    boolean printed = false;
+    byte[] buffer = new byte[windowSize * 2];
+    float[] samples = new float[windowSize];
+
+    System.out.println("Started. Please speak");
+    boolean running = true;
+    while (targetDataLine.isOpen() && running) {
+      int n = targetDataLine.read(buffer, 0, buffer.length);
+      if (n <= 0) {
+        System.out.printf("Got %d bytes. Expected %d bytes.\n", n, buffer.length);
+        continue;
+      }
+      for (int i = 0; i != windowSize; ++i) {
+        short low = buffer[2 * i];
+        short high = buffer[2 * i + 1];
+        int s = (high << 8) + low;
+        samples[i] = (float) s / 32768;
+      }
+
+      vad.acceptWaveform(samples);
+      if (vad.isSpeechDetected() && !printed) {
+        System.out.println("Detected speech");
+        printed = true;
+      }
+
+      if (!vad.isSpeechDetected()) {
+        printed = false;
+      }
+
+      while (!vad.empty()) {
+        SpeechSegment segment = vad.front();
+        float startTime = segment.getStart() / (float) sampleRate;
+        float duration = segment.getSamples().length / (float) sampleRate;
+
+        OfflineStream stream = recognizer.createStream();
+        stream.acceptWaveform(segment.getSamples(), sampleRate);
+        recognizer.decode(stream);
+        String text = recognizer.getResult(stream).getText();
+        stream.release();
+
+        if (!text.isEmpty()) {
+          System.out.printf("%.3f--%.3f: %s\n", startTime, startTime + duration, text);
+        }
+
+        if (text.contains("exit the program")) {
+          running = false;
+        }
+
+        vad.pop();
+      }
+    }
+
+    vad.release();
+    recognizer.release();
+  }
+}
--- a/java-api-examples/run-vad-from-mic-non-streaming-paraformer.sh 0 → 100755
查看文件 @29abf24
+++ b/java-api-examples/run-vad-from-mic-non-streaming-paraformer.sh 0 → 100755
查看文件 @29abf24
+#!/usr/bin/env bash
+
+set -ex
+
+if [[ ! -f ../build/lib/libsherpa-onnx-jni.dylib  && ! -f ../build/lib/libsherpa-onnx-jni.so ]]; then
+  mkdir -p ../build
+  pushd ../build
+  cmake \
+    -DSHERPA_ONNX_ENABLE_PYTHON=OFF \
+    -DSHERPA_ONNX_ENABLE_TESTS=OFF \
+    -DSHERPA_ONNX_ENABLE_CHECK=OFF \
+    -DBUILD_SHARED_LIBS=ON \
+    -DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \
+    -DSHERPA_ONNX_ENABLE_JNI=ON \
+    ..
+
+  make -j4
+  ls -lh lib
+  popd
+fi
+
+if [ ! -f ../sherpa-onnx/java-api/build/sherpa-onnx.jar ]; then
+  pushd ../sherpa-onnx/java-api
+  make
+  popd
+fi
+
+if [ ! -f ./silero_vad.onnx ]; then
+  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx
+fi
+
+if [ ! -f ./sherpa-onnx-paraformer-zh-2023-03-28/tokens.txt ]; then
+  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-paraformer-zh-2023-03-28.tar.bz2
+
+  tar xvf sherpa-onnx-paraformer-zh-2023-03-28.tar.bz2
+  rm sherpa-onnx-paraformer-zh-2023-03-28.tar.bz2
+fi
+
+if [ ! -f ./itn_zh_number.fst ]; then
+  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/itn_zh_number.fst
+fi
+
+java \
+  -Djava.library.path=$PWD/../build/lib \
+  -cp ../sherpa-onnx/java-api/build/sherpa-onnx.jar \
+  ./VadFromMicWithNonStreamingParaformer.java
--- a/java-api-examples/run-vad-from-mic-non-streaming-whisper.sh 0 → 100755
查看文件 @29abf24
+++ b/java-api-examples/run-vad-from-mic-non-streaming-whisper.sh 0 → 100755
查看文件 @29abf24
+#!/usr/bin/env bash
+
+set -ex
+
+if [[ ! -f ../build/lib/libsherpa-onnx-jni.dylib  && ! -f ../build/lib/libsherpa-onnx-jni.so ]]; then
+  mkdir -p ../build
+  pushd ../build
+  cmake \
+    -DSHERPA_ONNX_ENABLE_PYTHON=OFF \
+    -DSHERPA_ONNX_ENABLE_TESTS=OFF \
+    -DSHERPA_ONNX_ENABLE_CHECK=OFF \
+    -DBUILD_SHARED_LIBS=ON \
+    -DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \
+    -DSHERPA_ONNX_ENABLE_JNI=ON \
+    ..
+
+  make -j4
+  ls -lh lib
+  popd
+fi
+
+if [ ! -f ../sherpa-onnx/java-api/build/sherpa-onnx.jar ]; then
+  pushd ../sherpa-onnx/java-api
+  make
+  popd
+fi
+
+if [ ! -f ./silero_vad.onnx ]; then
+  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx
+fi
+
+if [ ! -f ./sherpa-onnx-whisper-tiny.en/tiny.en-tokens.txt ]; then
+  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-whisper-tiny.en.tar.bz2
+
+  tar xvf sherpa-onnx-whisper-tiny.en.tar.bz2
+  rm sherpa-onnx-whisper-tiny.en.tar.bz2
+fi
+
+java \
+  -Djava.library.path=$PWD/../build/lib \
+  -cp ../sherpa-onnx/java-api/build/sherpa-onnx.jar \
+  ./VadFromMicWithNonStreamingWhisper.java
--- a/java-api-examples/src/DecodeFile.java 已删除 100644 → 0
查看文件 @757a44b
+++ b/java-api-examples/src/DecodeFile.java 已删除 100644 → 0
查看文件 @757a44b
-/*
- * // Copyright 2022-2023 by zhaoming
- */
-/*
-Config modelconfig.cfg
-  sample_rate=16000
-  feature_dim=80
-  rule1_min_trailing_silence=2.4
-  rule2_min_trailing_silence=1.2
-  rule3_min_utterance_length=20
-  encoder=/sherpa-onnx/build/bin/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/encoder-epoch-99-avg-1.onnx
-  decoder=/sherpa-onnx/build/bin/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/decoder-epoch-99-avg-1.onnx
-  joiner=/sherpa-onnx/build/bin/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/joiner-epoch-99-avg-1.onnx
-  tokens=/sherpa-onnx/build/bin/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/tokens.txt
-  num_threads=4
-  enable_endpoint_detection=false
-  decoding_method=greedy_search
-  max_active_paths=4
-*/
-
-import com.k2fsa.sherpa.onnx.OnlineRecognizer;
-import com.k2fsa.sherpa.onnx.OnlineStream;
-import java.io.*;
-import java.nio.charset.StandardCharsets;
-
-public class DecodeFile {
-  OnlineRecognizer rcgOjb;
-  OnlineStream streamObj;
-  String wavfilename;
-
-  public DecodeFile(String fileName) {
-    wavfilename = fileName;
-  }
-
-  public void initModelWithPara() {
-    try {
-      String modelDir =
-          "/sherpa-onnx/build_old/bin/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20";
-      String encoder = modelDir + "/encoder-epoch-99-avg-1.onnx";
-      String decoder = modelDir + "/decoder-epoch-99-avg-1.onnx";
-      String joiner = modelDir + "/joiner-epoch-99-avg-1.onnx";
-      String tokens = modelDir + "/tokens.txt";
-      int numThreads = 4;
-      int sampleRate = 16000;
-      int featureDim = 80;
-      boolean enableEndpointDetection = false;
-      float rule1MinTrailingSilence = 2.4F;
-      float rule2MinTrailingSilence = 1.2F;
-      float rule3MinUtteranceLength = 20F;
-      String decodingMethod = "greedy_search";
-      int maxActivePaths = 4;
-      String hotwordsFile = "";
-      float hotwordsScore = 1.5F;
-      String lm_model = "";
-      float lm_scale = 0.5F;
-      String modelType = "zipformer";
-      rcgOjb =
-          new OnlineRecognizer(
-              tokens,
-              encoder,
-              decoder,
-              joiner,
-              numThreads,
-              sampleRate,
-              featureDim,
-              enableEndpointDetection,
-              rule1MinTrailingSilence,
-              rule2MinTrailingSilence,
-              rule3MinUtteranceLength,
-              decodingMethod,
-              lm_model,
-              lm_scale,
-              maxActivePaths,
-              hotwordsFile,
-              hotwordsScore,
-              modelType);
-      streamObj = rcgOjb.createStream();
-    } catch (Exception e) {
-      System.err.println(e);
-      e.printStackTrace();
-    }
-  }
-
-  public void initModelWithCfg(String cfgFile) {
-    try {
-      // you should set setCfgPath() before running this
-      rcgOjb = new OnlineRecognizer(cfgFile);
-      streamObj = rcgOjb.createStream();
-    } catch (Exception e) {
-      System.err.println(e);
-      e.printStackTrace();
-    }
-  }
-
-  public void simpleExample() {
-    try {
-      float[] buffer = rcgOjb.readWavFile(wavfilename); // read data from file
-      streamObj.acceptWaveform(buffer); // feed stream with data
-      streamObj.inputFinished(); // tell engine you done with all data
-      OnlineStream ssObj[] = new OnlineStream[1];
-      while (rcgOjb.isReady(streamObj)) { // engine is ready for unprocessed data
-        ssObj[0] = streamObj;
-        rcgOjb.decodeStreams(ssObj); // decode for multiple stream
-        // rcgOjb.DecodeStream(streamObj);   // decode for single stream
-      }
-
-      String recText = "simple:" + rcgOjb.getResult(streamObj) + "\n";
-      byte[] utf8Data = recText.getBytes(StandardCharsets.UTF_8);
-      System.out.println(new String(utf8Data));
-      rcgOjb.reSet(streamObj);
-      rcgOjb.releaseStream(streamObj); // release stream
-      rcgOjb.release(); // release recognizer
-
-    } catch (Exception e) {
-      System.err.println(e);
-      e.printStackTrace();
-    }
-  }
-
-  public void streamExample() {
-    try {
-      float[] buffer = rcgOjb.readWavFile(wavfilename); // read data from file
-      float[] chunk = new float[1600]; // //each time read 1600(0.1s) data
-      int chunkIndex = 0;
-      for (int i = 0; i < buffer.length; i++) // total wav length loop
-      {
-        chunk[chunkIndex] = buffer[i];
-        chunkIndex++;
-        if (chunkIndex >= 1600 || i == (buffer.length - 1)) {
-          chunkIndex = 0;
-          streamObj.acceptWaveform(chunk); // feed chunk
-          if (rcgOjb.isReady(streamObj)) {
-            rcgOjb.decodeStream(streamObj);
-          }
-          String testDate = rcgOjb.getResult(streamObj);
-          byte[] utf8Data = testDate.getBytes(StandardCharsets.UTF_8);
-
-          if (utf8Data.length > 0) {
-            System.out.println(Float.valueOf((float) i / 16000) + ":" + new String(utf8Data));
-          }
-        }
-      }
-      streamObj.inputFinished();
-      while (rcgOjb.isReady(streamObj)) {
-        rcgOjb.decodeStream(streamObj);
-      }
-
-      String recText = "stream:" + rcgOjb.getResult(streamObj) + "\n";
-      byte[] utf8Data = recText.getBytes(StandardCharsets.UTF_8);
-      System.out.println(new String(utf8Data));
-      rcgOjb.reSet(streamObj);
-      rcgOjb.releaseStream(streamObj); // release stream
-      rcgOjb.release(); // release recognizer
-
-    } catch (Exception e) {
-      System.err.println(e);
-      e.printStackTrace();
-    }
-  }
-
-  public static void main(String[] args) {
-    try {
-      String appDir = System.getProperty("user.dir");
-      System.out.println("appdir=" + appDir);
-      String fileName = appDir + "/" + args[0];
-      String cfgPath = appDir + "/modeltest.cfg";
-      String soPath = appDir + "/../build/lib/libsherpa-onnx-jni.so";
-      OnlineRecognizer.setSoPath(soPath);
-      DecodeFile rcgDemo = new DecodeFile(fileName);
-
-      // ***************** */
-      rcgDemo.initModelWithCfg(cfgPath);
-      rcgDemo.streamExample();
-      // **************** */
-      rcgDemo.initModelWithCfg(cfgPath);
-      rcgDemo.simpleExample();
-
-    } catch (Exception e) {
-      System.err.println(e);
-      e.printStackTrace();
-    }
-  }
-}
--- a/java-api-examples/src/DecodeMic.java 已删除 100755 → 0
查看文件 @757a44b
+++ b/java-api-examples/src/DecodeMic.java 已删除 100755 → 0
查看文件 @757a44b
-/*
- * // Copyright 2022-2023 by zhaoming
- */
-/*
-Real-time speech recognition from a microphone with com.k2fsa.sherpa.onnx Java API
-
-example for cfgFile modelconfig.cfg
-  sample_rate=16000
-  feature_dim=80
-  rule1_min_trailing_silence=2.4
-  rule2_min_trailing_silence=1.2
-  rule3_min_utterance_length=20
-  encoder=/sherpa-onnx/build/bin/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/encoder-epoch-99-avg-1.onnx
-  decoder=/sherpa-onnx/build/bin/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/decoder-epoch-99-avg-1.onnx
-  joiner=/sherpa-onnx/build/bin/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/joiner-epoch-99-avg-1.onnx
-  tokens=/sherpa-onnx/build/bin/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/tokens.txt
-  num_threads=4
-  enable_endpoint_detection=true
-  decoding_method=greedy_search
-  max_active_paths=4
-
-*/
-import com.k2fsa.sherpa.onnx.OnlineRecognizer;
-import com.k2fsa.sherpa.onnx.OnlineStream;
-import java.io.*;
-import java.nio.ByteBuffer;
-import java.nio.ByteOrder;
-import java.nio.ShortBuffer;
-import java.nio.charset.StandardCharsets;
-import javax.sound.sampled.AudioFormat;
-import javax.sound.sampled.AudioSystem;
-import javax.sound.sampled.DataLine;
-import javax.sound.sampled.TargetDataLine;
-
-/** Microphone Example */
-public class DecodeMic {
-  MicRcgThread micRcgThread = null; // thread handle
-
-  OnlineRecognizer rcgOjb; // the recognizer
-
-  OnlineStream streamObj; // the stream
-
-  public DecodeMic() {
-
-    micRcgThread = new MicRcgThread(); // create a new instance for MicRcgThread
-  }
-
-  public void open() {
-    micRcgThread.start(); // start to capture microphone data
-  }
-
-  public void close() {
-    micRcgThread.stop(); // close capture
-  }
-
-  /** init asr engine with config file */
-  public void initModelWithCfg(String cfgFile) {
-    try {
-
-      // set setSoPath() before running this
-      rcgOjb = new OnlineRecognizer(cfgFile);
-
-      streamObj = rcgOjb.createStream(); // create a stream for asr engine to feed data
-    } catch (Exception e) {
-      System.err.println(e);
-      e.printStackTrace();
-    }
-  }
-
-  /** read data from mic and feed to asr engine */
-  class MicRcgThread implements Runnable {
-
-    TargetDataLine capline; // line for capture mic data
-
-    Thread thread; // this thread
-    int segmentId = 0; // record the segment id when detect endpoint
-    String preText = ""; // decoded text
-
-    public MicRcgThread() {}
-
-    public void start() {
-
-      thread = new Thread(this);
-
-      thread.start(); // start thread
-    }
-
-    public void stop() {
-      capline.stop();
-      capline.close();
-      capline = null;
-      thread = null;
-    }
-
-    /** feed captured microphone data to asr */
-    public void decodeSample(byte[] samplebytes) {
-      try {
-        ByteBuffer byteBuf = ByteBuffer.wrap(samplebytes); // create a bytebuf for samples
-        byteBuf.order(ByteOrder.LITTLE_ENDIAN); // set bytebuf to little endian
-        ShortBuffer shortBuf = byteBuf.asShortBuffer(); // covert to short type
-        short[] arrShort = new short[shortBuf.capacity()]; // array for copy short data
-        float[] arrFloat = new float[shortBuf.capacity()]; // array for copy float data
-        shortBuf.get(arrShort); // put date to arrShort
-
-        for (int i = 0; i < arrShort.length; i++) {
-          arrFloat[i] = arrShort[i] / 32768f; // loop to covert short data to float -1 to 1
-        }
-        streamObj.acceptWaveform(arrFloat); // feed asr engine with float data
-        while (rcgOjb.isReady(streamObj)) { // if engine is ready for unprocessed data
-
-          rcgOjb.decodeStream(streamObj); // decode for this stream
-        }
-        boolean isEndpoint =
-            rcgOjb.isEndpoint(
-                streamObj); // endpoint check, make sure enable_endpoint_detection=true in config
-                            // file
-        String nowText = rcgOjb.getResult(streamObj); // get asr result
-        String recText = "";
-        byte[] utf8Data; // for covert text to utf8
-        if (isEndpoint && nowText.length() > 0) {
-          rcgOjb.reSet(streamObj); // reSet stream when detect endpoint
-          segmentId++;
-          preText = nowText;
-          recText = "text(seg_" + String.valueOf(segmentId) + "):" + nowText + "\n";
-          utf8Data = recText.getBytes(StandardCharsets.UTF_8);
-          System.out.println(new String(utf8Data));
-        }
-
-        if (!nowText.equals(preText)) { // if preText not equal nowtext
-          preText = nowText;
-          recText = nowText + "\n";
-          utf8Data = recText.getBytes(StandardCharsets.UTF_8);
-          System.out.println(new String(utf8Data));
-        }
-      } catch (Exception e) {
-        System.err.println(e);
-        e.printStackTrace();
-      }
-    }
-
-    /** run mic capture thread */
-    public void run() {
-      System.out.println("Started! Please speak...");
-
-      AudioFormat.Encoding encoding = AudioFormat.Encoding.PCM_SIGNED; // the pcm format
-      float rate = 16000.0f; // using 16 kHz
-      int channels = 1; // single channel
-      int sampleSize = 16; // sampleSize 16bit
-      boolean isBigEndian = false; // using little endian
-
-      AudioFormat format =
-          new AudioFormat(
-              encoding, rate, sampleSize, channels, (sampleSize / 8) * channels, rate, isBigEndian);
-
-      DataLine.Info info = new DataLine.Info(TargetDataLine.class, format);
-
-      // check system support such data format
-      if (!AudioSystem.isLineSupported(info)) {
-        System.out.println(info + " not supported.");
-        return;
-      }
-
-      // open a line for capture.
-
-      try {
-        capline = (TargetDataLine) AudioSystem.getLine(info);
-        capline.open(format, capline.getBufferSize());
-      } catch (Exception ex) {
-        System.out.println(ex);
-        return;
-      }
-
-      // the buf size for mic captured each time
-      int bufferLengthInBytes = capline.getBufferSize() / 8 * format.getFrameSize();
-      byte[] micData = new byte[bufferLengthInBytes];
-      int numBytesRead;
-
-      capline.start(); // start to capture mic data
-
-      while (thread != null) {
-        // read data from line
-        if ((numBytesRead = capline.read(micData, 0, bufferLengthInBytes)) == -1) {
-          break;
-        }
-
-        decodeSample(micData); // decode mic data
-      }
-
-      // stop and close
-
-      try {
-        if (capline != null) {
-          capline.stop();
-          capline.close();
-          capline = null;
-        }
-
-      } catch (Exception ex) {
-        System.err.println(ex);
-      }
-    }
-  } // End class DecodeMic
-
-  public static void main(String s[]) {
-    try {
-      String appDir = System.getProperty("user.dir");
-      System.out.println("appdir=" + appDir);
-      String cfgPath = appDir + "/modelconfig.cfg";
-      String soPath = appDir + "/../build/lib/libsherpa-onnx-jni.so";
-      OnlineRecognizer.setSoPath(soPath); // set so. lib for OnlineRecognizer
-
-      DecodeMic decodeEx = new DecodeMic();
-      decodeEx.initModelWithCfg(cfgPath); // init asr engine
-      decodeEx.open(); // open thread for mic
-      System.out.print("Press Enter to EXIT!\n");
-      char i = (char) System.in.read();
-      decodeEx.close();
-    } catch (Exception e) {
-      System.err.println(e);
-      e.printStackTrace();
-    }
-  }
-}