Committed by
GitHub
Add VAD + microphone example for Java API. (#1045)
正在显示
3 个修改的文件
包含
139 行增加
和
0 行删除
| @@ -57,6 +57,12 @@ The punctuation model supports both English and Chinese. | @@ -57,6 +57,12 @@ The punctuation model supports both English and Chinese. | ||
| 57 | ./run-speaker-identification.sh | 57 | ./run-speaker-identification.sh |
| 58 | ``` | 58 | ``` |
| 59 | 59 | ||
| 60 | +## VAD with a microphone | ||
| 61 | + | ||
| 62 | +```bash | ||
| 63 | +./run-vad-from-mic.sh | ||
| 64 | +``` | ||
| 65 | + | ||
| 60 | ## VAD (Remove silence) | 66 | ## VAD (Remove silence) |
| 61 | 67 | ||
| 62 | ```bash | 68 | ```bash |
java-api-examples/VadFromMic.java
0 → 100644
| 1 | +// Copyright 2024 Xiaomi Corporation | ||
| 2 | + | ||
| 3 | +// This file shows how to use a silero_vad model to detect speech | ||
| 4 | +// and save detected speech into a wave file. | ||
| 5 | + | ||
| 6 | +import com.k2fsa.sherpa.onnx.*; | ||
| 7 | +import javax.sound.sampled.*; | ||
| 8 | + | ||
| 9 | +public class VadFromMic { | ||
| 10 | + public static void main(String[] args) { | ||
| 11 | + int sampleRate = 16000; | ||
| 12 | + int windowSize = 512; | ||
| 13 | + // please download ./silero_vad.onnx from | ||
| 14 | + // https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models | ||
| 15 | + String model = "./silero_vad.onnx"; | ||
| 16 | + SileroVadModelConfig sileroVad = | ||
| 17 | + SileroVadModelConfig.builder() | ||
| 18 | + .setModel(model) | ||
| 19 | + .setThreshold(0.5f) | ||
| 20 | + .setMinSilenceDuration(0.25f) | ||
| 21 | + .setMinSpeechDuration(0.5f) | ||
| 22 | + .setWindowSize(windowSize) | ||
| 23 | + .build(); | ||
| 24 | + | ||
| 25 | + VadModelConfig config = | ||
| 26 | + VadModelConfig.builder() | ||
| 27 | + .setSileroVadModelConfig(sileroVad) | ||
| 28 | + .setSampleRate(sampleRate) | ||
| 29 | + .setNumThreads(1) | ||
| 30 | + .setDebug(true) | ||
| 31 | + .setProvider("cpu") | ||
| 32 | + .build(); | ||
| 33 | + | ||
| 34 | + Vad vad = new Vad(config); | ||
| 35 | + | ||
| 36 | + // https://docs.oracle.com/javase/8/docs/api/javax/sound/sampled/AudioFormat.html | ||
| 37 | + // Linear PCM, 16000Hz, 16-bit, 1 channel, signed, little endian | ||
| 38 | + AudioFormat format = new AudioFormat(sampleRate, 16, 1, true, false); | ||
| 39 | + | ||
| 40 | + // https://docs.oracle.com/javase/8/docs/api/javax/sound/sampled/DataLine.Info.html#Info-java.lang.Class-javax.sound.sampled.AudioFormat-int- | ||
| 41 | + DataLine.Info info = new DataLine.Info(TargetDataLine.class, format); | ||
| 42 | + TargetDataLine targetDataLine; | ||
| 43 | + try { | ||
| 44 | + targetDataLine = (TargetDataLine) AudioSystem.getLine(info); | ||
| 45 | + targetDataLine.open(format); | ||
| 46 | + targetDataLine.start(); | ||
| 47 | + } catch (LineUnavailableException e) { | ||
| 48 | + System.out.println("Failed to open target data line: " + e.getMessage()); | ||
| 49 | + vad.release(); | ||
| 50 | + return; | ||
| 51 | + } | ||
| 52 | + | ||
| 53 | + boolean printed = false; | ||
| 54 | + int index = 0; | ||
| 55 | + | ||
| 56 | + byte[] buffer = new byte[windowSize * 2]; | ||
| 57 | + float[] samples = new float[windowSize]; | ||
| 58 | + | ||
| 59 | + while (targetDataLine.isOpen()) { | ||
| 60 | + int n = targetDataLine.read(buffer, 0, buffer.length); | ||
| 61 | + if (n <= 0) { | ||
| 62 | + System.out.printf("Got %d bytes. Expected %d bytes.\n", n, buffer.length); | ||
| 63 | + continue; | ||
| 64 | + } | ||
| 65 | + for (int i = 0; i != windowSize; ++i) { | ||
| 66 | + short low = buffer[2 * i]; | ||
| 67 | + short high = buffer[2 * i + 1]; | ||
| 68 | + int s = (high << 8) + low; | ||
| 69 | + samples[i] = (float) s / 32768; | ||
| 70 | + } | ||
| 71 | + | ||
| 72 | + vad.acceptWaveform(samples); | ||
| 73 | + if (vad.isSpeechDetected() && !printed) { | ||
| 74 | + System.out.println("Detected speech"); | ||
| 75 | + printed = true; | ||
| 76 | + } | ||
| 77 | + | ||
| 78 | + if (!vad.isSpeechDetected()) { | ||
| 79 | + printed = false; | ||
| 80 | + } | ||
| 81 | + | ||
| 82 | + while (!vad.empty()) { | ||
| 83 | + float[] segment = vad.front().getSamples(); | ||
| 84 | + float duration = segment.length / (float) sampleRate; | ||
| 85 | + System.out.printf("Duration: %.3f seconds\n", duration); | ||
| 86 | + | ||
| 87 | + String filename = String.format("seg-%d-%.3fs.wav", index, duration); | ||
| 88 | + index += 1; | ||
| 89 | + WaveWriter.write(filename, segment, sampleRate); | ||
| 90 | + System.out.printf("Saved to %s\n", filename); | ||
| 91 | + System.out.println("----------"); | ||
| 92 | + vad.pop(); | ||
| 93 | + } | ||
| 94 | + } | ||
| 95 | + | ||
| 96 | + vad.release(); | ||
| 97 | + } | ||
| 98 | +} |
java-api-examples/run-vad-from-mic.sh
0 → 100755
| 1 | +#!/usr/bin/env bash | ||
| 2 | + | ||
| 3 | +set -ex | ||
| 4 | + | ||
| 5 | +if [[ ! -f ../build/lib/libsherpa-onnx-jni.dylib && ! -f ../build/lib/libsherpa-onnx-jni.so ]]; then | ||
| 6 | + mkdir -p ../build | ||
| 7 | + pushd ../build | ||
| 8 | + cmake \ | ||
| 9 | + -DSHERPA_ONNX_ENABLE_PYTHON=OFF \ | ||
| 10 | + -DSHERPA_ONNX_ENABLE_TESTS=OFF \ | ||
| 11 | + -DSHERPA_ONNX_ENABLE_CHECK=OFF \ | ||
| 12 | + -DBUILD_SHARED_LIBS=ON \ | ||
| 13 | + -DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \ | ||
| 14 | + -DSHERPA_ONNX_ENABLE_JNI=ON \ | ||
| 15 | + .. | ||
| 16 | + | ||
| 17 | + make -j4 | ||
| 18 | + ls -lh lib | ||
| 19 | + popd | ||
| 20 | +fi | ||
| 21 | + | ||
| 22 | +if [ ! -f ../sherpa-onnx/java-api/build/sherpa-onnx.jar ]; then | ||
| 23 | + pushd ../sherpa-onnx/java-api | ||
| 24 | + make | ||
| 25 | + popd | ||
| 26 | +fi | ||
| 27 | + | ||
| 28 | +if [ ! -f ./silero_vad.onnx ]; then | ||
| 29 | + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx | ||
| 30 | +fi | ||
| 31 | + | ||
| 32 | +java \ | ||
| 33 | + -Djava.library.path=$PWD/../build/lib \ | ||
| 34 | + -cp ../sherpa-onnx/java-api/build/sherpa-onnx.jar \ | ||
| 35 | + ./VadFromMic.java |
-
请 注册 或 登录 后发表评论