Fangjun Kuang
Committed by GitHub

Add VAD + microphone example for Java API. (#1045)

@@ -57,6 +57,12 @@ The punctuation model supports both English and Chinese. @@ -57,6 +57,12 @@ The punctuation model supports both English and Chinese.
57 ./run-speaker-identification.sh 57 ./run-speaker-identification.sh
58 ``` 58 ```
59 59
  60 +## VAD with a microphone
  61 +
  62 +```bash
  63 +./run-vad-from-mic.sh
  64 +```
  65 +
60 ## VAD (Remove silence) 66 ## VAD (Remove silence)
61 67
62 ```bash 68 ```bash
  1 +// Copyright 2024 Xiaomi Corporation
  2 +
  3 +// This file shows how to use a silero_vad model to detect speech
  4 +// and save detected speech into a wave file.
  5 +
  6 +import com.k2fsa.sherpa.onnx.*;
  7 +import javax.sound.sampled.*;
  8 +
  9 +public class VadFromMic {
  10 + public static void main(String[] args) {
  11 + int sampleRate = 16000;
  12 + int windowSize = 512;
  13 + // please download ./silero_vad.onnx from
  14 + // https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
  15 + String model = "./silero_vad.onnx";
  16 + SileroVadModelConfig sileroVad =
  17 + SileroVadModelConfig.builder()
  18 + .setModel(model)
  19 + .setThreshold(0.5f)
  20 + .setMinSilenceDuration(0.25f)
  21 + .setMinSpeechDuration(0.5f)
  22 + .setWindowSize(windowSize)
  23 + .build();
  24 +
  25 + VadModelConfig config =
  26 + VadModelConfig.builder()
  27 + .setSileroVadModelConfig(sileroVad)
  28 + .setSampleRate(sampleRate)
  29 + .setNumThreads(1)
  30 + .setDebug(true)
  31 + .setProvider("cpu")
  32 + .build();
  33 +
  34 + Vad vad = new Vad(config);
  35 +
  36 + // https://docs.oracle.com/javase/8/docs/api/javax/sound/sampled/AudioFormat.html
  37 + // Linear PCM, 16000Hz, 16-bit, 1 channel, signed, little endian
  38 + AudioFormat format = new AudioFormat(sampleRate, 16, 1, true, false);
  39 +
  40 + // https://docs.oracle.com/javase/8/docs/api/javax/sound/sampled/DataLine.Info.html#Info-java.lang.Class-javax.sound.sampled.AudioFormat-int-
  41 + DataLine.Info info = new DataLine.Info(TargetDataLine.class, format);
  42 + TargetDataLine targetDataLine;
  43 + try {
  44 + targetDataLine = (TargetDataLine) AudioSystem.getLine(info);
  45 + targetDataLine.open(format);
  46 + targetDataLine.start();
  47 + } catch (LineUnavailableException e) {
  48 + System.out.println("Failed to open target data line: " + e.getMessage());
  49 + vad.release();
  50 + return;
  51 + }
  52 +
  53 + boolean printed = false;
  54 + int index = 0;
  55 +
  56 + byte[] buffer = new byte[windowSize * 2];
  57 + float[] samples = new float[windowSize];
  58 +
  59 + while (targetDataLine.isOpen()) {
  60 + int n = targetDataLine.read(buffer, 0, buffer.length);
  61 + if (n <= 0) {
  62 + System.out.printf("Got %d bytes. Expected %d bytes.\n", n, buffer.length);
  63 + continue;
  64 + }
  65 + for (int i = 0; i != windowSize; ++i) {
  66 + short low = buffer[2 * i];
  67 + short high = buffer[2 * i + 1];
  68 + int s = (high << 8) + low;
  69 + samples[i] = (float) s / 32768;
  70 + }
  71 +
  72 + vad.acceptWaveform(samples);
  73 + if (vad.isSpeechDetected() && !printed) {
  74 + System.out.println("Detected speech");
  75 + printed = true;
  76 + }
  77 +
  78 + if (!vad.isSpeechDetected()) {
  79 + printed = false;
  80 + }
  81 +
  82 + while (!vad.empty()) {
  83 + float[] segment = vad.front().getSamples();
  84 + float duration = segment.length / (float) sampleRate;
  85 + System.out.printf("Duration: %.3f seconds\n", duration);
  86 +
  87 + String filename = String.format("seg-%d-%.3fs.wav", index, duration);
  88 + index += 1;
  89 + WaveWriter.write(filename, segment, sampleRate);
  90 + System.out.printf("Saved to %s\n", filename);
  91 + System.out.println("----------");
  92 + vad.pop();
  93 + }
  94 + }
  95 +
  96 + vad.release();
  97 + }
  98 +}
  1 +#!/usr/bin/env bash
  2 +
  3 +set -ex
  4 +
  5 +if [[ ! -f ../build/lib/libsherpa-onnx-jni.dylib && ! -f ../build/lib/libsherpa-onnx-jni.so ]]; then
  6 + mkdir -p ../build
  7 + pushd ../build
  8 + cmake \
  9 + -DSHERPA_ONNX_ENABLE_PYTHON=OFF \
  10 + -DSHERPA_ONNX_ENABLE_TESTS=OFF \
  11 + -DSHERPA_ONNX_ENABLE_CHECK=OFF \
  12 + -DBUILD_SHARED_LIBS=ON \
  13 + -DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \
  14 + -DSHERPA_ONNX_ENABLE_JNI=ON \
  15 + ..
  16 +
  17 + make -j4
  18 + ls -lh lib
  19 + popd
  20 +fi
  21 +
  22 +if [ ! -f ../sherpa-onnx/java-api/build/sherpa-onnx.jar ]; then
  23 + pushd ../sherpa-onnx/java-api
  24 + make
  25 + popd
  26 +fi
  27 +
  28 +if [ ! -f ./silero_vad.onnx ]; then
  29 + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx
  30 +fi
  31 +
  32 +java \
  33 + -Djava.library.path=$PWD/../build/lib \
  34 + -cp ../sherpa-onnx/java-api/build/sherpa-onnx.jar \
  35 + ./VadFromMic.java