Committed by
GitHub
Add streaming ASR example from a microphone for Java API (#1047)
正在显示
3 个修改的文件
包含
173 行增加
和
0 行删除
| @@ -7,6 +7,7 @@ This directory contains examples for the JAVA API of sherpa-onnx. | @@ -7,6 +7,7 @@ This directory contains examples for the JAVA API of sherpa-onnx. | ||
| 7 | ## Streaming Speech recognition | 7 | ## Streaming Speech recognition |
| 8 | 8 | ||
| 9 | ``` | 9 | ``` |
| 10 | +./run-streaming-asr-from-mic-transducer.sh | ||
| 10 | ./run-streaming-decode-file-ctc.sh | 11 | ./run-streaming-decode-file-ctc.sh |
| 11 | ./run-streaming-decode-file-ctc-hlg.sh | 12 | ./run-streaming-decode-file-ctc-hlg.sh |
| 12 | ./run-streaming-decode-file-paraformer.sh | 13 | ./run-streaming-decode-file-paraformer.sh |
| 1 | +// Copyright 2022-2023 by zhaoming | ||
| 2 | +// Copyright 2024 Xiaomi Corporation | ||
| 3 | + | ||
| 4 | +// This file shows how to use an online transducer, i.e., streaming transducer, | ||
| 5 | +// for real-time speech recognition with a microphone. | ||
| 6 | +import com.k2fsa.sherpa.onnx.*; | ||
| 7 | +import javax.sound.sampled.*; | ||
| 8 | + | ||
| 9 | +public class StreamingAsrFromMicTransducer { | ||
| 10 | + public static void main(String[] args) { | ||
| 11 | + // please refer to | ||
| 12 | + // https://k2-fsa.github.io/sherpa/onnx/pretrained_models/online-transducer/zipformer-transducer-models.html#csukuangfj-sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20-bilingual-chinese-english | ||
| 13 | + // to download model files | ||
| 14 | + String encoder = | ||
| 15 | + "./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/encoder-epoch-99-avg-1.int8.onnx"; | ||
| 16 | + String decoder = | ||
| 17 | + "./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/decoder-epoch-99-avg-1.onnx"; | ||
| 18 | + String joiner = | ||
| 19 | + "./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/joiner-epoch-99-avg-1.onnx"; | ||
| 20 | + String tokens = "./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/tokens.txt"; | ||
| 21 | + | ||
| 22 | + // https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/itn_zh_number.fst | ||
| 23 | + String ruleFsts = "./itn_zh_number.fst"; | ||
| 24 | + | ||
| 25 | + int sampleRate = 16000; | ||
| 26 | + | ||
| 27 | + OnlineTransducerModelConfig transducer = | ||
| 28 | + OnlineTransducerModelConfig.builder() | ||
| 29 | + .setEncoder(encoder) | ||
| 30 | + .setDecoder(decoder) | ||
| 31 | + .setJoiner(joiner) | ||
| 32 | + .build(); | ||
| 33 | + | ||
| 34 | + OnlineModelConfig modelConfig = | ||
| 35 | + OnlineModelConfig.builder() | ||
| 36 | + .setTransducer(transducer) | ||
| 37 | + .setTokens(tokens) | ||
| 38 | + .setNumThreads(1) | ||
| 39 | + .setDebug(true) | ||
| 40 | + .build(); | ||
| 41 | + | ||
| 42 | + OnlineRecognizerConfig config = | ||
| 43 | + OnlineRecognizerConfig.builder() | ||
| 44 | + .setOnlineModelConfig(modelConfig) | ||
| 45 | + .setDecodingMethod("greedy_search") | ||
| 46 | + .setRuleFsts(ruleFsts) | ||
| 47 | + .build(); | ||
| 48 | + | ||
| 49 | + OnlineRecognizer recognizer = new OnlineRecognizer(config); | ||
| 50 | + OnlineStream stream = recognizer.createStream(); | ||
| 51 | + | ||
| 52 | + // https://docs.oracle.com/javase/8/docs/api/javax/sound/sampled/AudioFormat.html | ||
| 53 | + // Linear PCM, 16000Hz, 16-bit, 1 channel, signed, little endian | ||
| 54 | + AudioFormat format = new AudioFormat(sampleRate, 16, 1, true, false); | ||
| 55 | + | ||
| 56 | + // https://docs.oracle.com/javase/8/docs/api/javax/sound/sampled/DataLine.Info.html#Info-java.lang.Class-javax.sound.sampled.AudioFormat-int- | ||
| 57 | + DataLine.Info info = new DataLine.Info(TargetDataLine.class, format); | ||
| 58 | + TargetDataLine targetDataLine; | ||
| 59 | + try { | ||
| 60 | + targetDataLine = (TargetDataLine) AudioSystem.getLine(info); | ||
| 61 | + targetDataLine.open(format); | ||
| 62 | + targetDataLine.start(); | ||
| 63 | + } catch (LineUnavailableException e) { | ||
| 64 | + System.out.println("Failed to open target data line: " + e.getMessage()); | ||
| 65 | + recognizer.release(); | ||
| 66 | + stream.release(); | ||
| 67 | + return; | ||
| 68 | + } | ||
| 69 | + | ||
| 70 | + String lastText = ""; | ||
| 71 | + int segmentIndex = 0; | ||
| 72 | + | ||
| 73 | + // You can choose an arbitrary number | ||
| 74 | + int bufferSize = 1600; // 0.1 seconds for 16000Hz | ||
| 75 | + byte[] buffer = new byte[bufferSize * 2]; // a short has 2 bytes | ||
| 76 | + float[] samples = new float[bufferSize]; | ||
| 77 | + | ||
| 78 | + System.out.println("Started! Please speak"); | ||
| 79 | + while (targetDataLine.isOpen()) { | ||
| 80 | + int n = targetDataLine.read(buffer, 0, buffer.length); | ||
| 81 | + if (n <= 0) { | ||
| 82 | + System.out.printf("Got %d bytes. Expected %d bytes.\n", n, buffer.length); | ||
| 83 | + continue; | ||
| 84 | + } | ||
| 85 | + for (int i = 0; i != bufferSize; ++i) { | ||
| 86 | + short low = buffer[2 * i]; | ||
| 87 | + short high = buffer[2 * i + 1]; | ||
| 88 | + int s = (high << 8) + low; | ||
| 89 | + samples[i] = (float) s / 32768; | ||
| 90 | + } | ||
| 91 | + stream.acceptWaveform(samples, sampleRate); | ||
| 92 | + | ||
| 93 | + while (recognizer.isReady(stream)) { | ||
| 94 | + recognizer.decode(stream); | ||
| 95 | + } | ||
| 96 | + | ||
| 97 | + String text = recognizer.getResult(stream).getText(); | ||
| 98 | + boolean isEndpoint = recognizer.isEndpoint(stream); | ||
| 99 | + if (!text.isEmpty() && text != " " && lastText != text) { | ||
| 100 | + lastText = text; | ||
| 101 | + System.out.printf("%d: %s\r", segmentIndex, text); | ||
| 102 | + } | ||
| 103 | + | ||
| 104 | + if (isEndpoint) { | ||
| 105 | + if (!text.isEmpty()) { | ||
| 106 | + System.out.println(); | ||
| 107 | + segmentIndex += 1; | ||
| 108 | + } | ||
| 109 | + | ||
| 110 | + recognizer.reset(stream); | ||
| 111 | + } | ||
| 112 | + } // while (targetDataLine.isOpen()) | ||
| 113 | + | ||
| 114 | + stream.release(); | ||
| 115 | + recognizer.release(); | ||
| 116 | + } | ||
| 117 | +} |
| 1 | +#!/usr/bin/env bash | ||
| 2 | + | ||
| 3 | +set -ex | ||
| 4 | + | ||
| 5 | +if [[ ! -f ../build/lib/libsherpa-onnx-jni.dylib && ! -f ../build/lib/libsherpa-onnx-jni.so ]]; then | ||
| 6 | + mkdir -p ../build | ||
| 7 | + pushd ../build | ||
| 8 | + cmake \ | ||
| 9 | + -DSHERPA_ONNX_ENABLE_PYTHON=OFF \ | ||
| 10 | + -DSHERPA_ONNX_ENABLE_TESTS=OFF \ | ||
| 11 | + -DSHERPA_ONNX_ENABLE_CHECK=OFF \ | ||
| 12 | + -DBUILD_SHARED_LIBS=ON \ | ||
| 13 | + -DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \ | ||
| 14 | + -DSHERPA_ONNX_ENABLE_JNI=ON \ | ||
| 15 | + .. | ||
| 16 | + | ||
| 17 | + make -j4 | ||
| 18 | + ls -lh lib | ||
| 19 | + popd | ||
| 20 | +fi | ||
| 21 | + | ||
| 22 | +if [ ! -f ../sherpa-onnx/java-api/build/sherpa-onnx.jar ]; then | ||
| 23 | + pushd ../sherpa-onnx/java-api | ||
| 24 | + make | ||
| 25 | + popd | ||
| 26 | +fi | ||
| 27 | + | ||
| 28 | +if [[ ! -f ../build/lib/libsherpa-onnx-jni.dylib && ! -f ../build/lib/libsherpa-onnx-jni.so ]]; then | ||
| 29 | + cmake \ | ||
| 30 | + -DSHERPA_ONNX_ENABLE_PYTHON=OFF \ | ||
| 31 | + -DSHERPA_ONNX_ENABLE_TESTS=OFF \ | ||
| 32 | + -DSHERPA_ONNX_ENABLE_CHECK=OFF \ | ||
| 33 | + -DBUILD_SHARED_LIBS=ON \ | ||
| 34 | + -DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \ | ||
| 35 | + -DSHERPA_ONNX_ENABLE_JNI=ON \ | ||
| 36 | + .. | ||
| 37 | + | ||
| 38 | + make -j4 | ||
| 39 | + ls -lh lib | ||
| 40 | +fi | ||
| 41 | + | ||
| 42 | +if [ ! -f ./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/tokens.txt ]; then | ||
| 43 | + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2 | ||
| 44 | + tar xvf sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2 | ||
| 45 | + rm sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2 | ||
| 46 | +fi | ||
| 47 | + | ||
| 48 | +if [ ! -f ./itn_zh_number.fst ]; then | ||
| 49 | + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/itn_zh_number.fst | ||
| 50 | +fi | ||
| 51 | + | ||
| 52 | +java \ | ||
| 53 | + -Djava.library.path=$PWD/../build/lib \ | ||
| 54 | + -cp ../sherpa-onnx/java-api/build/sherpa-onnx.jar \ | ||
| 55 | + ./StreamingAsrFromMicTransducer.java |
-
请 注册 或 登录 后发表评论