Fangjun Kuang
Committed by GitHub

Add streaming ASR example from a microphone for Java API (#1047)

@@ -7,6 +7,7 @@ This directory contains examples for the JAVA API of sherpa-onnx. @@ -7,6 +7,7 @@ This directory contains examples for the JAVA API of sherpa-onnx.
7 ## Streaming Speech recognition 7 ## Streaming Speech recognition
8 8
9 ``` 9 ```
  10 +./run-streaming-asr-from-mic-transducer.sh
10 ./run-streaming-decode-file-ctc.sh 11 ./run-streaming-decode-file-ctc.sh
11 ./run-streaming-decode-file-ctc-hlg.sh 12 ./run-streaming-decode-file-ctc-hlg.sh
12 ./run-streaming-decode-file-paraformer.sh 13 ./run-streaming-decode-file-paraformer.sh
  1 +// Copyright 2022-2023 by zhaoming
  2 +// Copyright 2024 Xiaomi Corporation
  3 +
  4 +// This file shows how to use an online transducer, i.e., streaming transducer,
  5 +// for real-time speech recognition with a microphone.
  6 +import com.k2fsa.sherpa.onnx.*;
  7 +import javax.sound.sampled.*;
  8 +
  9 +public class StreamingAsrFromMicTransducer {
  10 + public static void main(String[] args) {
  11 + // please refer to
  12 + // https://k2-fsa.github.io/sherpa/onnx/pretrained_models/online-transducer/zipformer-transducer-models.html#csukuangfj-sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20-bilingual-chinese-english
  13 + // to download model files
  14 + String encoder =
  15 + "./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/encoder-epoch-99-avg-1.int8.onnx";
  16 + String decoder =
  17 + "./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/decoder-epoch-99-avg-1.onnx";
  18 + String joiner =
  19 + "./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/joiner-epoch-99-avg-1.onnx";
  20 + String tokens = "./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/tokens.txt";
  21 +
  22 + // https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/itn_zh_number.fst
  23 + String ruleFsts = "./itn_zh_number.fst";
  24 +
  25 + int sampleRate = 16000;
  26 +
  27 + OnlineTransducerModelConfig transducer =
  28 + OnlineTransducerModelConfig.builder()
  29 + .setEncoder(encoder)
  30 + .setDecoder(decoder)
  31 + .setJoiner(joiner)
  32 + .build();
  33 +
  34 + OnlineModelConfig modelConfig =
  35 + OnlineModelConfig.builder()
  36 + .setTransducer(transducer)
  37 + .setTokens(tokens)
  38 + .setNumThreads(1)
  39 + .setDebug(true)
  40 + .build();
  41 +
  42 + OnlineRecognizerConfig config =
  43 + OnlineRecognizerConfig.builder()
  44 + .setOnlineModelConfig(modelConfig)
  45 + .setDecodingMethod("greedy_search")
  46 + .setRuleFsts(ruleFsts)
  47 + .build();
  48 +
  49 + OnlineRecognizer recognizer = new OnlineRecognizer(config);
  50 + OnlineStream stream = recognizer.createStream();
  51 +
  52 + // https://docs.oracle.com/javase/8/docs/api/javax/sound/sampled/AudioFormat.html
  53 + // Linear PCM, 16000Hz, 16-bit, 1 channel, signed, little endian
  54 + AudioFormat format = new AudioFormat(sampleRate, 16, 1, true, false);
  55 +
  56 + // https://docs.oracle.com/javase/8/docs/api/javax/sound/sampled/DataLine.Info.html#Info-java.lang.Class-javax.sound.sampled.AudioFormat-int-
  57 + DataLine.Info info = new DataLine.Info(TargetDataLine.class, format);
  58 + TargetDataLine targetDataLine;
  59 + try {
  60 + targetDataLine = (TargetDataLine) AudioSystem.getLine(info);
  61 + targetDataLine.open(format);
  62 + targetDataLine.start();
  63 + } catch (LineUnavailableException e) {
  64 + System.out.println("Failed to open target data line: " + e.getMessage());
  65 + recognizer.release();
  66 + stream.release();
  67 + return;
  68 + }
  69 +
  70 + String lastText = "";
  71 + int segmentIndex = 0;
  72 +
  73 + // You can choose an arbitrary number
  74 + int bufferSize = 1600; // 0.1 seconds for 16000Hz
  75 + byte[] buffer = new byte[bufferSize * 2]; // a short has 2 bytes
  76 + float[] samples = new float[bufferSize];
  77 +
  78 + System.out.println("Started! Please speak");
  79 + while (targetDataLine.isOpen()) {
  80 + int n = targetDataLine.read(buffer, 0, buffer.length);
  81 + if (n <= 0) {
  82 + System.out.printf("Got %d bytes. Expected %d bytes.\n", n, buffer.length);
  83 + continue;
  84 + }
  85 + for (int i = 0; i != bufferSize; ++i) {
  86 + short low = buffer[2 * i];
  87 + short high = buffer[2 * i + 1];
  88 + int s = (high << 8) + low;
  89 + samples[i] = (float) s / 32768;
  90 + }
  91 + stream.acceptWaveform(samples, sampleRate);
  92 +
  93 + while (recognizer.isReady(stream)) {
  94 + recognizer.decode(stream);
  95 + }
  96 +
  97 + String text = recognizer.getResult(stream).getText();
  98 + boolean isEndpoint = recognizer.isEndpoint(stream);
  99 + if (!text.isEmpty() && text != " " && lastText != text) {
  100 + lastText = text;
  101 + System.out.printf("%d: %s\r", segmentIndex, text);
  102 + }
  103 +
  104 + if (isEndpoint) {
  105 + if (!text.isEmpty()) {
  106 + System.out.println();
  107 + segmentIndex += 1;
  108 + }
  109 +
  110 + recognizer.reset(stream);
  111 + }
  112 + } // while (targetDataLine.isOpen())
  113 +
  114 + stream.release();
  115 + recognizer.release();
  116 + }
  117 +}
  1 +#!/usr/bin/env bash
  2 +
  3 +set -ex
  4 +
  5 +if [[ ! -f ../build/lib/libsherpa-onnx-jni.dylib && ! -f ../build/lib/libsherpa-onnx-jni.so ]]; then
  6 + mkdir -p ../build
  7 + pushd ../build
  8 + cmake \
  9 + -DSHERPA_ONNX_ENABLE_PYTHON=OFF \
  10 + -DSHERPA_ONNX_ENABLE_TESTS=OFF \
  11 + -DSHERPA_ONNX_ENABLE_CHECK=OFF \
  12 + -DBUILD_SHARED_LIBS=ON \
  13 + -DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \
  14 + -DSHERPA_ONNX_ENABLE_JNI=ON \
  15 + ..
  16 +
  17 + make -j4
  18 + ls -lh lib
  19 + popd
  20 +fi
  21 +
  22 +if [ ! -f ../sherpa-onnx/java-api/build/sherpa-onnx.jar ]; then
  23 + pushd ../sherpa-onnx/java-api
  24 + make
  25 + popd
  26 +fi
  27 +
  28 +if [[ ! -f ../build/lib/libsherpa-onnx-jni.dylib && ! -f ../build/lib/libsherpa-onnx-jni.so ]]; then
  29 + cmake \
  30 + -DSHERPA_ONNX_ENABLE_PYTHON=OFF \
  31 + -DSHERPA_ONNX_ENABLE_TESTS=OFF \
  32 + -DSHERPA_ONNX_ENABLE_CHECK=OFF \
  33 + -DBUILD_SHARED_LIBS=ON \
  34 + -DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \
  35 + -DSHERPA_ONNX_ENABLE_JNI=ON \
  36 + ..
  37 +
  38 + make -j4
  39 + ls -lh lib
  40 +fi
  41 +
  42 +if [ ! -f ./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/tokens.txt ]; then
  43 + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2
  44 + tar xvf sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2
  45 + rm sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2
  46 +fi
  47 +
  48 +if [ ! -f ./itn_zh_number.fst ]; then
  49 + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/itn_zh_number.fst
  50 +fi
  51 +
  52 +java \
  53 + -Djava.library.path=$PWD/../build/lib \
  54 + -cp ../sherpa-onnx/java-api/build/sherpa-onnx.jar \
  55 + ./StreamingAsrFromMicTransducer.java