正在显示
14 个修改的文件
包含
604 行增加
和
0 行删除
| @@ -100,12 +100,32 @@ jobs: | @@ -100,12 +100,32 @@ jobs: | ||
| 100 | -DBUILD_SHARED_LIBS=ON \ | 100 | -DBUILD_SHARED_LIBS=ON \ |
| 101 | -DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \ | 101 | -DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \ |
| 102 | -DSHERPA_ONNX_ENABLE_BINARY=OFF \ | 102 | -DSHERPA_ONNX_ENABLE_BINARY=OFF \ |
| 103 | + -DBUILD_ESPEAK_NG_EXE=OFF \ | ||
| 103 | -DSHERPA_ONNX_ENABLE_JNI=ON \ | 104 | -DSHERPA_ONNX_ENABLE_JNI=ON \ |
| 104 | .. | 105 | .. |
| 105 | 106 | ||
| 106 | make -j4 | 107 | make -j4 |
| 107 | ls -lh lib | 108 | ls -lh lib |
| 108 | 109 | ||
| 110 | + - name: Run java test (VAD + Non-streaming Paraformer) | ||
| 111 | + shell: bash | ||
| 112 | + run: | | ||
| 113 | + cd ./java-api-examples | ||
| 114 | + ./run-vad-non-streaming-paraformer.sh | ||
| 115 | + rm *.onnx | ||
| 116 | + ls -lh *.wav | ||
| 117 | + rm *.wav | ||
| 118 | + rm -rf sherpa-onnx-* | ||
| 119 | + | ||
| 120 | + - name: Run java test (VAD remove silence) | ||
| 121 | + shell: bash | ||
| 122 | + run: | | ||
| 123 | + cd ./java-api-examples | ||
| 124 | + ./run-vad-remove-slience.sh | ||
| 125 | + rm *.onnx | ||
| 126 | + ls -lh *.wav | ||
| 127 | + rm *.wav | ||
| 128 | + | ||
| 109 | - name: Run java test (speaker identification) | 129 | - name: Run java test (speaker identification) |
| 110 | shell: bash | 130 | shell: bash |
| 111 | run: | | 131 | run: | |
| @@ -56,3 +56,15 @@ The punctuation model supports both English and Chinese. | @@ -56,3 +56,15 @@ The punctuation model supports both English and Chinese. | ||
| 56 | ```bash | 56 | ```bash |
| 57 | ./run-speaker-identification.sh | 57 | ./run-speaker-identification.sh |
| 58 | ``` | 58 | ``` |
| 59 | + | ||
| 60 | +## VAD (Remove silence) | ||
| 61 | + | ||
| 62 | +```bash | ||
| 63 | +./run-vad-remove-slience.sh | ||
| 64 | +``` | ||
| 65 | + | ||
| 66 | +## VAD + Non-streaming Paraformer for speech recognition | ||
| 67 | + | ||
| 68 | +```bash | ||
| 69 | +./run-vad-non-streaming-paraformer.sh | ||
| 70 | +``` |
| 1 | +// Copyright 2024 Xiaomi Corporation | ||
| 2 | + | ||
| 3 | +// This file shows how to use a silero_vad model with a non-streaming Paraformer | ||
| 4 | +// for speech recognition. | ||
| 5 | + | ||
| 6 | +import com.k2fsa.sherpa.onnx.*; | ||
| 7 | +import java.util.Arrays; | ||
| 8 | + | ||
| 9 | +public class VadNonStreamingParaformer { | ||
| 10 | + public static Vad createVad() { | ||
| 11 | + // please download ./silero_vad.onnx from | ||
| 12 | + // https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models | ||
| 13 | + String model = "./silero_vad.onnx"; | ||
| 14 | + SileroVadModelConfig sileroVad = | ||
| 15 | + SileroVadModelConfig.builder() | ||
| 16 | + .setModel(model) | ||
| 17 | + .setThreshold(0.5f) | ||
| 18 | + .setMinSilenceDuration(0.25f) | ||
| 19 | + .setMinSpeechDuration(0.5f) | ||
| 20 | + .setWindowSize(512) | ||
| 21 | + .build(); | ||
| 22 | + | ||
| 23 | + VadModelConfig config = | ||
| 24 | + VadModelConfig.builder() | ||
| 25 | + .setSileroVadModelConfig(sileroVad) | ||
| 26 | + .setSampleRate(16000) | ||
| 27 | + .setNumThreads(1) | ||
| 28 | + .setDebug(true) | ||
| 29 | + .setProvider("cpu") | ||
| 30 | + .build(); | ||
| 31 | + | ||
| 32 | + return new Vad(config); | ||
| 33 | + } | ||
| 34 | + | ||
| 35 | + public static OfflineRecognizer createOfflineRecognizer() { | ||
| 36 | + // please refer to | ||
| 37 | + // https://k2-fsa.github.io/sherpa/onnx/pretrained_models/offline-paraformer/paraformer-models.html#csukuangfj-sherpa-onnx-paraformer-zh-2023-03-28-chinese-english | ||
| 38 | + // to download model files | ||
| 39 | + String model = "./sherpa-onnx-paraformer-zh-2023-03-28/model.int8.onnx"; | ||
| 40 | + String tokens = "./sherpa-onnx-paraformer-zh-2023-03-28/tokens.txt"; | ||
| 41 | + | ||
| 42 | + String waveFilename = "./sherpa-onnx-paraformer-zh-2023-03-28/test_wavs/3-sichuan.wav"; | ||
| 43 | + | ||
| 44 | + WaveReader reader = new WaveReader(waveFilename); | ||
| 45 | + | ||
| 46 | + OfflineParaformerModelConfig paraformer = | ||
| 47 | + OfflineParaformerModelConfig.builder().setModel(model).build(); | ||
| 48 | + | ||
| 49 | + OfflineModelConfig modelConfig = | ||
| 50 | + OfflineModelConfig.builder() | ||
| 51 | + .setParaformer(paraformer) | ||
| 52 | + .setTokens(tokens) | ||
| 53 | + .setNumThreads(1) | ||
| 54 | + .setDebug(true) | ||
| 55 | + .build(); | ||
| 56 | + | ||
| 57 | + OfflineRecognizerConfig config = | ||
| 58 | + OfflineRecognizerConfig.builder() | ||
| 59 | + .setOfflineModelConfig(modelConfig) | ||
| 60 | + .setDecodingMethod("greedy_search") | ||
| 61 | + .build(); | ||
| 62 | + | ||
| 63 | + return new OfflineRecognizer(config); | ||
| 64 | + } | ||
| 65 | + | ||
| 66 | + public static void main(String[] args) { | ||
| 67 | + | ||
| 68 | + Vad vad = createVad(); | ||
| 69 | + OfflineRecognizer recognizer = createOfflineRecognizer(); | ||
| 70 | + | ||
| 71 | + // You can download the test file from | ||
| 72 | + // https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models | ||
| 73 | + String testWaveFilename = "./lei-jun-test.wav"; | ||
| 74 | + WaveReader reader = new WaveReader(testWaveFilename); | ||
| 75 | + | ||
| 76 | + int numSamples = reader.getSamples().length; | ||
| 77 | + int numIter = numSamples / 512; | ||
| 78 | + | ||
| 79 | + for (int i = 0; i != numIter; ++i) { | ||
| 80 | + int start = i * 512; | ||
| 81 | + int end = start + 512; | ||
| 82 | + float[] samples = Arrays.copyOfRange(reader.getSamples(), start, end); | ||
| 83 | + vad.acceptWaveform(samples); | ||
| 84 | + if (vad.isSpeechDetected()) { | ||
| 85 | + while (!vad.empty()) { | ||
| 86 | + SpeechSegment segment = vad.front(); | ||
| 87 | + float startTime = segment.getStart() / 16000.0f; | ||
| 88 | + float duration = segment.getSamples().length / 16000.0f; | ||
| 89 | + | ||
| 90 | + OfflineStream stream = recognizer.createStream(); | ||
| 91 | + stream.acceptWaveform(segment.getSamples(), 16000); | ||
| 92 | + recognizer.decode(stream); | ||
| 93 | + String text = recognizer.getResult(stream).getText(); | ||
| 94 | + | ||
| 95 | + if (!text.isEmpty()) { | ||
| 96 | + System.out.printf("%.3f--%.3f: %s\n", startTime, startTime + duration, text); | ||
| 97 | + } | ||
| 98 | + | ||
| 99 | + vad.pop(); | ||
| 100 | + } | ||
| 101 | + } | ||
| 102 | + } | ||
| 103 | + } | ||
| 104 | +} |
java-api-examples/VadRemoveSilence.java
0 → 100644
| 1 | +// Copyright 2024 Xiaomi Corporation | ||
| 2 | + | ||
| 3 | +// This file shows how to use a silero_vad model to remove silences from | ||
| 4 | +// a wave file. | ||
| 5 | + | ||
| 6 | +import com.k2fsa.sherpa.onnx.*; | ||
| 7 | +import java.util.ArrayList; | ||
| 8 | +import java.util.Arrays; | ||
| 9 | + | ||
| 10 | +public class VadRemoveSilence { | ||
| 11 | + public static void main(String[] args) { | ||
| 12 | + // please download ./silero_vad.onnx from | ||
| 13 | + // https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models | ||
| 14 | + String model = "./silero_vad.onnx"; | ||
| 15 | + SileroVadModelConfig sileroVad = | ||
| 16 | + SileroVadModelConfig.builder() | ||
| 17 | + .setModel(model) | ||
| 18 | + .setThreshold(0.5f) | ||
| 19 | + .setMinSilenceDuration(0.25f) | ||
| 20 | + .setMinSpeechDuration(0.5f) | ||
| 21 | + .setWindowSize(512) | ||
| 22 | + .build(); | ||
| 23 | + | ||
| 24 | + VadModelConfig config = | ||
| 25 | + VadModelConfig.builder() | ||
| 26 | + .setSileroVadModelConfig(sileroVad) | ||
| 27 | + .setSampleRate(16000) | ||
| 28 | + .setNumThreads(1) | ||
| 29 | + .setDebug(true) | ||
| 30 | + .setProvider("cpu") | ||
| 31 | + .build(); | ||
| 32 | + | ||
| 33 | + Vad vad = new Vad(config); | ||
| 34 | + | ||
| 35 | + // You can download the test file from | ||
| 36 | + // https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models | ||
| 37 | + String testWaveFilename = "./lei-jun-test.wav"; | ||
| 38 | + WaveReader reader = new WaveReader(testWaveFilename); | ||
| 39 | + | ||
| 40 | + int numSamples = reader.getSamples().length; | ||
| 41 | + int numIter = numSamples / 512; | ||
| 42 | + | ||
| 43 | + ArrayList<float[]> segments = new ArrayList<float[]>(); | ||
| 44 | + | ||
| 45 | + for (int i = 0; i != numIter; ++i) { | ||
| 46 | + int start = i * 512; | ||
| 47 | + int end = start + 512; | ||
| 48 | + float[] samples = Arrays.copyOfRange(reader.getSamples(), start, end); | ||
| 49 | + vad.acceptWaveform(samples); | ||
| 50 | + if (vad.isSpeechDetected()) { | ||
| 51 | + while (!vad.empty()) { | ||
| 52 | + | ||
| 53 | + // if you want to get the starting time of this segment, you can use | ||
| 54 | + /* float startTime = vad.front().getStart() / 16000.0f; */ | ||
| 55 | + | ||
| 56 | + segments.add(vad.front().getSamples()); | ||
| 57 | + vad.pop(); | ||
| 58 | + } | ||
| 59 | + } | ||
| 60 | + } | ||
| 61 | + | ||
| 62 | + // get total number of samples | ||
| 63 | + int n = 0; | ||
| 64 | + for (float[] s : segments) { | ||
| 65 | + n += s.length; | ||
| 66 | + } | ||
| 67 | + | ||
| 68 | + float[] allSamples = new float[n]; | ||
| 69 | + int i = 0; | ||
| 70 | + for (float[] s : segments) { | ||
| 71 | + System.arraycopy(s, 0, allSamples, i, s.length); | ||
| 72 | + i += s.length; | ||
| 73 | + } | ||
| 74 | + | ||
| 75 | + String outFilename = "lei-jun-test-no-silence.wav"; | ||
| 76 | + WaveWriter.write(outFilename, allSamples, 16000); | ||
| 77 | + System.out.printf("Saved to %s\n", outFilename); | ||
| 78 | + } | ||
| 79 | +} |
| 1 | +#!/usr/bin/env bash | ||
| 2 | + | ||
| 3 | +set -ex | ||
| 4 | + | ||
| 5 | +if [[ ! -f ../build/lib/libsherpa-onnx-jni.dylib && ! -f ../build/lib/libsherpa-onnx-jni.so ]]; then | ||
| 6 | + mkdir -p ../build | ||
| 7 | + pushd ../build | ||
| 8 | + cmake \ | ||
| 9 | + -DSHERPA_ONNX_ENABLE_PYTHON=OFF \ | ||
| 10 | + -DSHERPA_ONNX_ENABLE_TESTS=OFF \ | ||
| 11 | + -DSHERPA_ONNX_ENABLE_CHECK=OFF \ | ||
| 12 | + -DBUILD_SHARED_LIBS=ON \ | ||
| 13 | + -DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \ | ||
| 14 | + -DSHERPA_ONNX_ENABLE_JNI=ON \ | ||
| 15 | + .. | ||
| 16 | + | ||
| 17 | + make -j4 | ||
| 18 | + ls -lh lib | ||
| 19 | + popd | ||
| 20 | +fi | ||
| 21 | + | ||
| 22 | +if [ ! -f ../sherpa-onnx/java-api/build/sherpa-onnx.jar ]; then | ||
| 23 | + pushd ../sherpa-onnx/java-api | ||
| 24 | + make | ||
| 25 | + popd | ||
| 26 | +fi | ||
| 27 | + | ||
| 28 | +if [ ! -f ./silero_vad.onnx ]; then | ||
| 29 | + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx | ||
| 30 | +fi | ||
| 31 | + | ||
| 32 | +if [ ! -f ./lei-jun-test.wav ]; then | ||
| 33 | + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/lei-jun-test.wav | ||
| 34 | +fi | ||
| 35 | + | ||
| 36 | +if [ ! -f ./sherpa-onnx-paraformer-zh-2023-03-28/tokens.txt ]; then | ||
| 37 | + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-paraformer-zh-2023-03-28.tar.bz2 | ||
| 38 | + | ||
| 39 | + tar xvf sherpa-onnx-paraformer-zh-2023-03-28.tar.bz2 | ||
| 40 | + rm sherpa-onnx-paraformer-zh-2023-03-28.tar.bz2 | ||
| 41 | +fi | ||
| 42 | + | ||
| 43 | +java \ | ||
| 44 | + -Djava.library.path=$PWD/../build/lib \ | ||
| 45 | + -cp ../sherpa-onnx/java-api/build/sherpa-onnx.jar \ | ||
| 46 | + ./VadNonStreamingParaformer.java |
java-api-examples/run-vad-remove-slience.sh
0 → 100755
| 1 | +#!/usr/bin/env bash | ||
| 2 | + | ||
| 3 | +set -ex | ||
| 4 | + | ||
| 5 | +if [[ ! -f ../build/lib/libsherpa-onnx-jni.dylib && ! -f ../build/lib/libsherpa-onnx-jni.so ]]; then | ||
| 6 | + mkdir -p ../build | ||
| 7 | + pushd ../build | ||
| 8 | + cmake \ | ||
| 9 | + -DSHERPA_ONNX_ENABLE_PYTHON=OFF \ | ||
| 10 | + -DSHERPA_ONNX_ENABLE_TESTS=OFF \ | ||
| 11 | + -DSHERPA_ONNX_ENABLE_CHECK=OFF \ | ||
| 12 | + -DBUILD_SHARED_LIBS=ON \ | ||
| 13 | + -DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \ | ||
| 14 | + -DSHERPA_ONNX_ENABLE_JNI=ON \ | ||
| 15 | + .. | ||
| 16 | + | ||
| 17 | + make -j4 | ||
| 18 | + ls -lh lib | ||
| 19 | + popd | ||
| 20 | +fi | ||
| 21 | + | ||
| 22 | +if [ ! -f ../sherpa-onnx/java-api/build/sherpa-onnx.jar ]; then | ||
| 23 | + pushd ../sherpa-onnx/java-api | ||
| 24 | + make | ||
| 25 | + popd | ||
| 26 | +fi | ||
| 27 | + | ||
| 28 | +if [ ! -f ./silero_vad.onnx ]; then | ||
| 29 | + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx | ||
| 30 | +fi | ||
| 31 | + | ||
| 32 | +if [ ! -f ./lei-jun-test.wav ]; then | ||
| 33 | + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/lei-jun-test.wav | ||
| 34 | +fi | ||
| 35 | + | ||
| 36 | +java \ | ||
| 37 | + -Djava.library.path=$PWD/../build/lib \ | ||
| 38 | + -cp ../sherpa-onnx/java-api/build/sherpa-onnx.jar \ | ||
| 39 | + ./VadRemoveSilence.java |
| @@ -7,6 +7,7 @@ out_jar := $(out_dir)/sherpa-onnx.jar | @@ -7,6 +7,7 @@ out_jar := $(out_dir)/sherpa-onnx.jar | ||
| 7 | package_dir := com/k2fsa/sherpa/onnx | 7 | package_dir := com/k2fsa/sherpa/onnx |
| 8 | 8 | ||
| 9 | java_files := WaveReader.java | 9 | java_files := WaveReader.java |
| 10 | +java_files += WaveWriter.java | ||
| 10 | java_files += EndpointRule.java | 11 | java_files += EndpointRule.java |
| 11 | java_files += EndpointConfig.java | 12 | java_files += EndpointConfig.java |
| 12 | java_files += FeatureConfig.java | 13 | java_files += FeatureConfig.java |
| @@ -56,6 +57,11 @@ java_files += SpeakerEmbeddingExtractorConfig.java | @@ -56,6 +57,11 @@ java_files += SpeakerEmbeddingExtractorConfig.java | ||
| 56 | java_files += SpeakerEmbeddingExtractor.java | 57 | java_files += SpeakerEmbeddingExtractor.java |
| 57 | java_files += SpeakerEmbeddingManager.java | 58 | java_files += SpeakerEmbeddingManager.java |
| 58 | 59 | ||
| 60 | +java_files += SileroVadModelConfig.java | ||
| 61 | +java_files += VadModelConfig.java | ||
| 62 | +java_files += SpeechSegment.java | ||
| 63 | +java_files += Vad.java | ||
| 64 | + | ||
| 59 | class_files := $(java_files:%.java=%.class) | 65 | class_files := $(java_files:%.java=%.class) |
| 60 | 66 | ||
| 61 | java_files := $(addprefix src/$(package_dir)/,$(java_files)) | 67 | java_files := $(addprefix src/$(package_dir)/,$(java_files)) |
| 1 | +// Copyright 2024 Xiaomi Corporation | ||
| 2 | + | ||
| 3 | +package com.k2fsa.sherpa.onnx; | ||
| 4 | + | ||
| 5 | +public class SileroVadModelConfig { | ||
| 6 | + private final String model; | ||
| 7 | + private final float threshold; | ||
| 8 | + private final float minSilenceDuration; | ||
| 9 | + private final float minSpeechDuration; | ||
| 10 | + private final int windowSize; | ||
| 11 | + | ||
| 12 | + private SileroVadModelConfig(Builder builder) { | ||
| 13 | + this.model = builder.model; | ||
| 14 | + this.threshold = builder.threshold; | ||
| 15 | + this.minSilenceDuration = builder.minSilenceDuration; | ||
| 16 | + this.minSpeechDuration = builder.minSpeechDuration; | ||
| 17 | + this.windowSize = builder.windowSize; | ||
| 18 | + } | ||
| 19 | + | ||
| 20 | + public static Builder builder() { | ||
| 21 | + return new Builder(); | ||
| 22 | + } | ||
| 23 | + | ||
| 24 | + public String getModel() { | ||
| 25 | + return model; | ||
| 26 | + } | ||
| 27 | + | ||
| 28 | + public float getThreshold() { | ||
| 29 | + return threshold; | ||
| 30 | + } | ||
| 31 | + | ||
| 32 | + public float getMinSilenceDuration() { | ||
| 33 | + return minSilenceDuration; | ||
| 34 | + } | ||
| 35 | + | ||
| 36 | + public float getMinSpeechDuration() { | ||
| 37 | + return minSpeechDuration; | ||
| 38 | + } | ||
| 39 | + | ||
| 40 | + public int getWindowSize() { | ||
| 41 | + return windowSize; | ||
| 42 | + } | ||
| 43 | + | ||
| 44 | + public static class Builder { | ||
| 45 | + private String model = ""; | ||
| 46 | + private float threshold = 0.5f; | ||
| 47 | + private float minSilenceDuration = 0.25f; | ||
| 48 | + private float minSpeechDuration = 0.5f; | ||
| 49 | + private int windowSize = 512; | ||
| 50 | + | ||
| 51 | + public SileroVadModelConfig build() { | ||
| 52 | + return new SileroVadModelConfig(this); | ||
| 53 | + } | ||
| 54 | + | ||
| 55 | + | ||
| 56 | + public Builder setModel(String model) { | ||
| 57 | + this.model = model; | ||
| 58 | + return this; | ||
| 59 | + } | ||
| 60 | + | ||
| 61 | + public Builder setThreshold(float threshold) { | ||
| 62 | + this.threshold = threshold; | ||
| 63 | + return this; | ||
| 64 | + } | ||
| 65 | + | ||
| 66 | + public Builder setMinSilenceDuration(float minSilenceDuration) { | ||
| 67 | + this.minSilenceDuration = minSilenceDuration; | ||
| 68 | + return this; | ||
| 69 | + } | ||
| 70 | + | ||
| 71 | + public Builder setMinSpeechDuration(float minSpeechDuration) { | ||
| 72 | + this.minSpeechDuration = minSpeechDuration; | ||
| 73 | + return this; | ||
| 74 | + } | ||
| 75 | + | ||
| 76 | + public Builder setWindowSize(int windowSize) { | ||
| 77 | + this.windowSize = windowSize; | ||
| 78 | + return this; | ||
| 79 | + } | ||
| 80 | + } | ||
| 81 | +} |
| 1 | +package com.k2fsa.sherpa.onnx; | ||
| 2 | + | ||
| 3 | +public class SpeechSegment { | ||
| 4 | + | ||
| 5 | + private final int start; | ||
| 6 | + private final float[] samples; | ||
| 7 | + | ||
| 8 | + public SpeechSegment(int start, float[] samples) { | ||
| 9 | + this.start = start; | ||
| 10 | + this.samples = samples; | ||
| 11 | + } | ||
| 12 | + | ||
| 13 | + public int getStart() { | ||
| 14 | + return start; | ||
| 15 | + } | ||
| 16 | + | ||
| 17 | + public float[] getSamples() { | ||
| 18 | + return samples; | ||
| 19 | + } | ||
| 20 | +} |
| 1 | +// Copyright 2024 Xiaomi Corporation | ||
| 2 | + | ||
| 3 | +package com.k2fsa.sherpa.onnx; | ||
| 4 | + | ||
| 5 | +public class Vad { | ||
| 6 | + static { | ||
| 7 | + System.loadLibrary("sherpa-onnx-jni"); | ||
| 8 | + } | ||
| 9 | + | ||
| 10 | + private long ptr = 0; | ||
| 11 | + | ||
| 12 | + public Vad(VadModelConfig config) { | ||
| 13 | + ptr = newFromFile(config); | ||
| 14 | + } | ||
| 15 | + | ||
| 16 | + @Override | ||
| 17 | + protected void finalize() throws Throwable { | ||
| 18 | + release(); | ||
| 19 | + } | ||
| 20 | + | ||
| 21 | + public void release() { | ||
| 22 | + if (this.ptr == 0) { | ||
| 23 | + return; | ||
| 24 | + } | ||
| 25 | + delete(this.ptr); | ||
| 26 | + this.ptr = 0; | ||
| 27 | + } | ||
| 28 | + | ||
| 29 | + public void acceptWaveform(float[] samples) { | ||
| 30 | + acceptWaveform(this.ptr, samples); | ||
| 31 | + } | ||
| 32 | + | ||
| 33 | + public boolean empty() { | ||
| 34 | + return empty(this.ptr); | ||
| 35 | + } | ||
| 36 | + | ||
| 37 | + public void pop() { | ||
| 38 | + pop(this.ptr); | ||
| 39 | + } | ||
| 40 | + | ||
| 41 | + public void clear() { | ||
| 42 | + clear(this.ptr); | ||
| 43 | + } | ||
| 44 | + | ||
| 45 | + public void reset() { | ||
| 46 | + reset(this.ptr); | ||
| 47 | + } | ||
| 48 | + | ||
| 49 | + public SpeechSegment front() { | ||
| 50 | + Object[] arr = front(this.ptr); | ||
| 51 | + int start = (int) arr[0]; | ||
| 52 | + float[] samples = (float[]) arr[1]; | ||
| 53 | + | ||
| 54 | + return new SpeechSegment(start, samples); | ||
| 55 | + } | ||
| 56 | + | ||
| 57 | + public boolean isSpeechDetected() { | ||
| 58 | + return isSpeechDetected(this.ptr); | ||
| 59 | + } | ||
| 60 | + | ||
| 61 | + private native void delete(long ptr); | ||
| 62 | + | ||
| 63 | + private native long newFromFile(VadModelConfig config); | ||
| 64 | + | ||
| 65 | + private native void acceptWaveform(long ptr, float[] samples); | ||
| 66 | + | ||
| 67 | + private native boolean empty(long ptr); | ||
| 68 | + | ||
| 69 | + private native void pop(long ptr); | ||
| 70 | + | ||
| 71 | + private native void clear(long ptr); | ||
| 72 | + | ||
| 73 | + private native Object[] front(long ptr); | ||
| 74 | + | ||
| 75 | + private native boolean isSpeechDetected(long ptr); | ||
| 76 | + | ||
| 77 | + private native void reset(long ptr); | ||
| 78 | +} |
| 1 | +// Copyright 2024 Xiaomi Corporation | ||
| 2 | + | ||
| 3 | +package com.k2fsa.sherpa.onnx; | ||
| 4 | + | ||
| 5 | +public class VadModelConfig { | ||
| 6 | + private final SileroVadModelConfig sileroVadModelConfig; | ||
| 7 | + private final int sampleRate; | ||
| 8 | + private final int numThreads; | ||
| 9 | + private final boolean debug; | ||
| 10 | + private final String provider; | ||
| 11 | + | ||
| 12 | + private VadModelConfig(Builder builder) { | ||
| 13 | + this.sileroVadModelConfig = builder.sileroVadModelConfig; | ||
| 14 | + this.sampleRate = builder.sampleRate; | ||
| 15 | + this.numThreads = builder.numThreads; | ||
| 16 | + this.debug = builder.debug; | ||
| 17 | + this.provider = builder.provider; | ||
| 18 | + } | ||
| 19 | + | ||
| 20 | + public static Builder builder() { | ||
| 21 | + return new Builder(); | ||
| 22 | + } | ||
| 23 | + | ||
| 24 | + public SileroVadModelConfig getSileroVadModelConfig() { | ||
| 25 | + return sileroVadModelConfig; | ||
| 26 | + } | ||
| 27 | + | ||
| 28 | + public int getSampleRate() { | ||
| 29 | + return sampleRate; | ||
| 30 | + } | ||
| 31 | + | ||
| 32 | + public int getNumThreads() { | ||
| 33 | + return numThreads; | ||
| 34 | + } | ||
| 35 | + | ||
| 36 | + public String getProvider() { | ||
| 37 | + return provider; | ||
| 38 | + } | ||
| 39 | + | ||
| 40 | + public boolean getDebug() { | ||
| 41 | + return debug; | ||
| 42 | + } | ||
| 43 | + | ||
| 44 | + public static class Builder { | ||
| 45 | + private SileroVadModelConfig sileroVadModelConfig = new SileroVadModelConfig.Builder().build(); | ||
| 46 | + private int sampleRate = 16000; | ||
| 47 | + private int numThreads = 1; | ||
| 48 | + private boolean debug = true; | ||
| 49 | + private String provider = "cpu"; | ||
| 50 | + | ||
| 51 | + public VadModelConfig build() { | ||
| 52 | + return new VadModelConfig(this); | ||
| 53 | + } | ||
| 54 | + | ||
| 55 | + public Builder setSileroVadModelConfig(SileroVadModelConfig sileroVadModelConfig) { | ||
| 56 | + this.sileroVadModelConfig = sileroVadModelConfig; | ||
| 57 | + return this; | ||
| 58 | + } | ||
| 59 | + | ||
| 60 | + public Builder setSampleRate(int sampleRate) { | ||
| 61 | + this.sampleRate = sampleRate; | ||
| 62 | + return this; | ||
| 63 | + } | ||
| 64 | + | ||
| 65 | + public Builder setNumThreads(int numThreads) { | ||
| 66 | + this.numThreads = numThreads; | ||
| 67 | + return this; | ||
| 68 | + } | ||
| 69 | + | ||
| 70 | + public Builder setDebug(boolean debug) { | ||
| 71 | + this.debug = debug; | ||
| 72 | + return this; | ||
| 73 | + } | ||
| 74 | + | ||
| 75 | + public Builder setProvider(String provider) { | ||
| 76 | + this.provider = provider; | ||
| 77 | + return this; | ||
| 78 | + } | ||
| 79 | + } | ||
| 80 | +} |
| 1 | +// Copyright 2024 Xiaomi Corporation | ||
| 2 | + | ||
| 3 | +package com.k2fsa.sherpa.onnx; | ||
| 4 | + | ||
| 5 | +public class WaveWriter { | ||
| 6 | + public WaveWriter() { | ||
| 7 | + } | ||
| 8 | + | ||
| 9 | + public static boolean write(String filename, float[] samples, int sampleRate) { | ||
| 10 | + WaveWriter w = new WaveWriter(); | ||
| 11 | + return w.writeWaveToFile(filename, samples, sampleRate); | ||
| 12 | + } | ||
| 13 | + | ||
| 14 | + private native boolean writeWaveToFile(String filename, float[] samples, int sampleRate); | ||
| 15 | +} |
| @@ -24,6 +24,7 @@ set(sources | @@ -24,6 +24,7 @@ set(sources | ||
| 24 | spoken-language-identification.cc | 24 | spoken-language-identification.cc |
| 25 | voice-activity-detector.cc | 25 | voice-activity-detector.cc |
| 26 | wave-reader.cc | 26 | wave-reader.cc |
| 27 | + wave-writer.cc | ||
| 27 | ) | 28 | ) |
| 28 | 29 | ||
| 29 | if(SHERPA_ONNX_ENABLE_TTS) | 30 | if(SHERPA_ONNX_ENABLE_TTS) |
sherpa-onnx/jni/wave-writer.cc
0 → 100644
| 1 | +// sherpa-onnx/jni/wave-writer.cc | ||
| 2 | +// | ||
| 3 | +// Copyright (c) 2024 Xiaomi Corporation | ||
| 4 | +#include "sherpa-onnx/csrc/wave-writer.h" | ||
| 5 | + | ||
| 6 | +#include "sherpa-onnx/jni/common.h" | ||
| 7 | + | ||
| 8 | +SHERPA_ONNX_EXTERN_C | ||
| 9 | +JNIEXPORT bool JNICALL Java_com_k2fsa_sherpa_onnx_WaveWriter_writeWaveToFile( | ||
| 10 | + JNIEnv *env, jclass /*obj*/, jstring filename, jfloatArray samples, | ||
| 11 | + jint sample_rate) { | ||
| 12 | + jfloat *p = env->GetFloatArrayElements(samples, nullptr); | ||
| 13 | + jsize n = env->GetArrayLength(samples); | ||
| 14 | + | ||
| 15 | + const char *p_filename = env->GetStringUTFChars(filename, nullptr); | ||
| 16 | + | ||
| 17 | + bool ok = sherpa_onnx::WriteWave(p_filename, sample_rate, p, n); | ||
| 18 | + | ||
| 19 | + env->ReleaseFloatArrayElements(samples, p, JNI_ABORT); | ||
| 20 | + env->ReleaseStringUTFChars(filename, p_filename); | ||
| 21 | + | ||
| 22 | + return ok; | ||
| 23 | +} |
-
请 注册 或 登录 后发表评论