Committed by
GitHub
Add Java API for speech enhancement GTCRN models (#2009)
正在显示
10 个修改的文件
包含
294 行增加
和
0 行删除
| @@ -105,6 +105,15 @@ jobs: | @@ -105,6 +105,15 @@ jobs: | ||
| 105 | make -j4 | 105 | make -j4 |
| 106 | ls -lh lib | 106 | ls -lh lib |
| 107 | 107 | ||
| 108 | + - name: Run speech enhancement (GTCRN) | ||
| 109 | + shell: bash | ||
| 110 | + run: | | ||
| 111 | + cd ./java-api-examples | ||
| 112 | + ./run-non-streaming-speech-enhancement-gtcrn.sh | ||
| 113 | + ls -lh *.wav | ||
| 114 | + | ||
| 115 | + rm -fv gtcrn_simple.onnx *.wav | ||
| 116 | + | ||
| 108 | - name: Run java test (Online add punctuations) | 117 | - name: Run java test (Online add punctuations) |
| 109 | shell: bash | 118 | shell: bash |
| 110 | run: | | 119 | run: | |
| 1 | +// Copyright 2025 Xiaomi Corporation | ||
| 2 | + | ||
| 3 | +// This file shows how to use speech enhancement models in sherpa-onnx | ||
| 4 | +// | ||
| 5 | +// please download files in this script from | ||
| 6 | +// https://github.com/k2-fsa/sherpa-onnx/releases/tag/speech-enhancement-models | ||
| 7 | + | ||
| 8 | +import com.k2fsa.sherpa.onnx.*; | ||
| 9 | + | ||
| 10 | +public class NonStreamingSpeechEnhancementGtcrn { | ||
| 11 | + public static void main(String[] args) { | ||
| 12 | + String model = "./gtcrn_simple.onnx"; | ||
| 13 | + OfflineSpeechDenoiserGtcrnModelConfig gtcrn = | ||
| 14 | + OfflineSpeechDenoiserGtcrnModelConfig.builder().setModel(model).build(); | ||
| 15 | + | ||
| 16 | + OfflineSpeechDenoiserModelConfig modelConfig = | ||
| 17 | + OfflineSpeechDenoiserModelConfig.builder() | ||
| 18 | + .setGtcrn(gtcrn) | ||
| 19 | + .setNumThreads(1) | ||
| 20 | + .setDebug(true) | ||
| 21 | + .setProvider("cpu") | ||
| 22 | + .build(); | ||
| 23 | + OfflineSpeechDenoiserConfig config = | ||
| 24 | + OfflineSpeechDenoiserConfig.builder().setModel(modelConfig).build(); | ||
| 25 | + | ||
| 26 | + OfflineSpeechDenoiser speech_denoiser = new OfflineSpeechDenoiser(config); | ||
| 27 | + | ||
| 28 | + String testWaveFilename = "./inp_16k.wav"; | ||
| 29 | + WaveReader reader = new WaveReader(testWaveFilename); | ||
| 30 | + | ||
| 31 | + DenoisedAudio denoised = speech_denoiser.run(reader.getSamples(), reader.getSampleRate()); | ||
| 32 | + String outFilename = "enhanced-16k.wav"; | ||
| 33 | + WaveWriter.write(outFilename, denoised.getSamples(), denoised.getSampleRate()); | ||
| 34 | + System.out.printf("Saved to %s\n", outFilename); | ||
| 35 | + | ||
| 36 | + speech_denoiser.release(); | ||
| 37 | + } | ||
| 38 | +} |
| 1 | +#!/usr/bin/env bash | ||
| 2 | + | ||
| 3 | +set -ex | ||
| 4 | + | ||
| 5 | +if [[ ! -f ../build/lib/libsherpa-onnx-jni.dylib && ! -f ../build/lib/libsherpa-onnx-jni.so ]]; then | ||
| 6 | + mkdir -p ../build | ||
| 7 | + pushd ../build | ||
| 8 | + cmake \ | ||
| 9 | + -DSHERPA_ONNX_ENABLE_PYTHON=OFF \ | ||
| 10 | + -DSHERPA_ONNX_ENABLE_TESTS=OFF \ | ||
| 11 | + -DSHERPA_ONNX_ENABLE_CHECK=OFF \ | ||
| 12 | + -DBUILD_SHARED_LIBS=ON \ | ||
| 13 | + -DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \ | ||
| 14 | + -DSHERPA_ONNX_ENABLE_JNI=ON \ | ||
| 15 | + .. | ||
| 16 | + | ||
| 17 | + make -j4 | ||
| 18 | + ls -lh lib | ||
| 19 | + popd | ||
| 20 | +fi | ||
| 21 | + | ||
| 22 | +if [ ! -f ../sherpa-onnx/java-api/build/sherpa-onnx.jar ]; then | ||
| 23 | + pushd ../sherpa-onnx/java-api | ||
| 24 | + make | ||
| 25 | + popd | ||
| 26 | +fi | ||
| 27 | + | ||
| 28 | +if [ ! -f ./gtcrn_simple.onnx ]; then | ||
| 29 | + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speech-enhancement-models/gtcrn_simple.onnx | ||
| 30 | +fi | ||
| 31 | + | ||
| 32 | +if [ ! -f ./inp_16k.wav ]; then | ||
| 33 | + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speech-enhancement-models/inp_16k.wav | ||
| 34 | +fi | ||
| 35 | + | ||
| 36 | +java \ | ||
| 37 | + -Djava.library.path=$PWD/../build/lib \ | ||
| 38 | + -cp ../sherpa-onnx/java-api/build/sherpa-onnx.jar \ | ||
| 39 | + NonStreamingSpeechEnhancementGtcrn.java |
| @@ -9,6 +9,15 @@ | @@ -9,6 +9,15 @@ | ||
| 9 | #include <utility> | 9 | #include <utility> |
| 10 | #include <vector> | 10 | #include <vector> |
| 11 | 11 | ||
| 12 | +#if __ANDROID_API__ >= 9 | ||
| 13 | +#include "android/asset_manager.h" | ||
| 14 | +#include "android/asset_manager_jni.h" | ||
| 15 | +#endif | ||
| 16 | + | ||
| 17 | +#if __OHOS__ | ||
| 18 | +#include "rawfile/raw_file_manager.h" | ||
| 19 | +#endif | ||
| 20 | + | ||
| 12 | #include "sherpa-onnx/csrc/file-utils.h" | 21 | #include "sherpa-onnx/csrc/file-utils.h" |
| 13 | #include "sherpa-onnx/csrc/onnx-utils.h" | 22 | #include "sherpa-onnx/csrc/onnx-utils.h" |
| 14 | #include "sherpa-onnx/csrc/session.h" | 23 | #include "sherpa-onnx/csrc/session.h" |
| @@ -193,4 +202,14 @@ OfflineSpeechDenoiserGtcrnModel::GetMetaData() const { | @@ -193,4 +202,14 @@ OfflineSpeechDenoiserGtcrnModel::GetMetaData() const { | ||
| 193 | return impl_->GetMetaData(); | 202 | return impl_->GetMetaData(); |
| 194 | } | 203 | } |
| 195 | 204 | ||
| 205 | +#if __ANDROID_API__ >= 9 | ||
| 206 | +template OfflineSpeechDenoiserGtcrnModel::OfflineSpeechDenoiserGtcrnModel( | ||
| 207 | + AAssetManager *mgr, const OfflineSpeechDenoiserModelConfig &config); | ||
| 208 | +#endif | ||
| 209 | + | ||
| 210 | +#if __OHOS__ | ||
| 211 | +template OfflineSpeechDenoiserGtcrnModel::OfflineSpeechDenoiserGtcrnModel( | ||
| 212 | + NativeResourceManager *mgr, const OfflineSpeechDenoiserModelConfig &config); | ||
| 213 | +#endif | ||
| 214 | + | ||
| 196 | } // namespace sherpa_onnx | 215 | } // namespace sherpa_onnx |
| @@ -84,6 +84,11 @@ java_files += OfflineSpeakerDiarizationSegment.java | @@ -84,6 +84,11 @@ java_files += OfflineSpeakerDiarizationSegment.java | ||
| 84 | java_files += OfflineSpeakerDiarizationCallback.java | 84 | java_files += OfflineSpeakerDiarizationCallback.java |
| 85 | java_files += OfflineSpeakerDiarization.java | 85 | java_files += OfflineSpeakerDiarization.java |
| 86 | 86 | ||
| 87 | +java_files += OfflineSpeechDenoiserGtcrnModelConfig.java | ||
| 88 | +java_files += OfflineSpeechDenoiserModelConfig.java | ||
| 89 | +java_files += OfflineSpeechDenoiserConfig.java | ||
| 90 | +java_files += DenoisedAudio.java | ||
| 91 | +java_files += OfflineSpeechDenoiser.java | ||
| 87 | 92 | ||
| 88 | class_files := $(java_files:%.java=%.class) | 93 | class_files := $(java_files:%.java=%.class) |
| 89 | 94 |
| 1 | +// Copyright 2025 Xiaomi Corporation | ||
| 2 | + | ||
| 3 | +package com.k2fsa.sherpa.onnx; | ||
| 4 | + | ||
| 5 | +public class DenoisedAudio { | ||
| 6 | + static { | ||
| 7 | + System.loadLibrary("sherpa-onnx-jni"); | ||
| 8 | + } | ||
| 9 | + | ||
| 10 | + private final float[] samples; | ||
| 11 | + private final int sampleRate; | ||
| 12 | + | ||
| 13 | + public DenoisedAudio(float[] samples, int sampleRate) { | ||
| 14 | + this.samples = samples; | ||
| 15 | + this.sampleRate = sampleRate; | ||
| 16 | + } | ||
| 17 | + | ||
| 18 | + public int getSampleRate() { | ||
| 19 | + return sampleRate; | ||
| 20 | + } | ||
| 21 | + | ||
| 22 | + public float[] getSamples() { | ||
| 23 | + return samples; | ||
| 24 | + } | ||
| 25 | + | ||
| 26 | + // return true if saved successfully. | ||
| 27 | + public boolean save(String filename) { | ||
| 28 | + return saveImpl(filename, samples, sampleRate); | ||
| 29 | + } | ||
| 30 | + | ||
| 31 | + private native boolean saveImpl(String filename, float[] samples, int sampleRate); | ||
| 32 | +} |
| 1 | +// Copyright 2025 Xiaomi Corporation | ||
| 2 | + | ||
| 3 | +package com.k2fsa.sherpa.onnx; | ||
| 4 | + | ||
| 5 | +public class OfflineSpeechDenoiser { | ||
| 6 | + static { | ||
| 7 | + System.loadLibrary("sherpa-onnx-jni"); | ||
| 8 | + } | ||
| 9 | + | ||
| 10 | + private long ptr = 0; | ||
| 11 | + | ||
| 12 | + public OfflineSpeechDenoiser(OfflineSpeechDenoiserConfig config) { | ||
| 13 | + ptr = newFromFile(config); | ||
| 14 | + } | ||
| 15 | + | ||
| 16 | + public int getSampleRate() { | ||
| 17 | + return getSampleRate(ptr); | ||
| 18 | + } | ||
| 19 | + | ||
| 20 | + public DenoisedAudio run(float[] samples, int sampleRate) { | ||
| 21 | + return run(ptr, samples, sampleRate); | ||
| 22 | + } | ||
| 23 | + | ||
| 24 | + protected void finalize() throws Throwable { | ||
| 25 | + release(); | ||
| 26 | + } | ||
| 27 | + | ||
| 28 | + public void release() { | ||
| 29 | + if (this.ptr == 0) { | ||
| 30 | + return; | ||
| 31 | + } | ||
| 32 | + delete(this.ptr); | ||
| 33 | + this.ptr = 0; | ||
| 34 | + } | ||
| 35 | + | ||
| 36 | + private native void delete(long ptr); | ||
| 37 | + | ||
| 38 | + private native int getSampleRate(long ptr); | ||
| 39 | + | ||
| 40 | + private native DenoisedAudio run(long ptr, float[] samples, int sampleRate); | ||
| 41 | + | ||
| 42 | + private native long newFromFile(OfflineSpeechDenoiserConfig config); | ||
| 43 | +} |
| 1 | +// Copyright 2025 Xiaomi Corporation | ||
| 2 | +package com.k2fsa.sherpa.onnx; | ||
| 3 | + | ||
| 4 | +public class OfflineSpeechDenoiserConfig { | ||
| 5 | + private final OfflineSpeechDenoiserModelConfig model; | ||
| 6 | + | ||
| 7 | + private OfflineSpeechDenoiserConfig(OfflineSpeechDenoiserConfig.Builder builder) { | ||
| 8 | + this.model = builder.model; | ||
| 9 | + } | ||
| 10 | + | ||
| 11 | + public static Builder builder() { | ||
| 12 | + return new Builder(); | ||
| 13 | + } | ||
| 14 | + | ||
| 15 | + public static class Builder { | ||
| 16 | + private OfflineSpeechDenoiserModelConfig model = OfflineSpeechDenoiserModelConfig.builder().build(); | ||
| 17 | + | ||
| 18 | + public OfflineSpeechDenoiserConfig build() { | ||
| 19 | + return new OfflineSpeechDenoiserConfig(this); | ||
| 20 | + } | ||
| 21 | + | ||
| 22 | + public Builder setModel(OfflineSpeechDenoiserModelConfig model) { | ||
| 23 | + this.model = model; | ||
| 24 | + return this; | ||
| 25 | + } | ||
| 26 | + } | ||
| 27 | +} |
sherpa-onnx/java-api/src/com/k2fsa/sherpa/onnx/OfflineSpeechDenoiserGtcrnModelConfig.java
0 → 100644
| 1 | +// Copyright 2025 Xiaomi Corporation | ||
| 2 | +package com.k2fsa.sherpa.onnx; | ||
| 3 | + | ||
| 4 | +public class OfflineSpeechDenoiserGtcrnModelConfig { | ||
| 5 | + private final String model; | ||
| 6 | + | ||
| 7 | + private OfflineSpeechDenoiserGtcrnModelConfig(Builder builder) { | ||
| 8 | + this.model = builder.model; | ||
| 9 | + } | ||
| 10 | + | ||
| 11 | + public static Builder builder() { | ||
| 12 | + return new Builder(); | ||
| 13 | + } | ||
| 14 | + | ||
| 15 | + public String getModel() { | ||
| 16 | + return model; | ||
| 17 | + } | ||
| 18 | + | ||
| 19 | + public static class Builder { | ||
| 20 | + private String model = ""; | ||
| 21 | + | ||
| 22 | + public OfflineSpeechDenoiserGtcrnModelConfig build() { | ||
| 23 | + return new OfflineSpeechDenoiserGtcrnModelConfig(this); | ||
| 24 | + } | ||
| 25 | + | ||
| 26 | + public Builder setModel(String model) { | ||
| 27 | + this.model = model; | ||
| 28 | + return this; | ||
| 29 | + } | ||
| 30 | + } | ||
| 31 | +} |
| 1 | +// Copyright 2025 Xiaomi Corporation | ||
| 2 | +package com.k2fsa.sherpa.onnx; | ||
| 3 | + | ||
| 4 | +public class OfflineSpeechDenoiserModelConfig { | ||
| 5 | + private final OfflineSpeechDenoiserGtcrnModelConfig gtcrn; | ||
| 6 | + private final int numThreads; | ||
| 7 | + private final boolean debug; | ||
| 8 | + private final String provider; | ||
| 9 | + | ||
| 10 | + private OfflineSpeechDenoiserModelConfig(Builder builder) { | ||
| 11 | + this.gtcrn = builder.gtcrn; | ||
| 12 | + this.numThreads = builder.numThreads; | ||
| 13 | + this.debug = builder.debug; | ||
| 14 | + this.provider = builder.provider; | ||
| 15 | + } | ||
| 16 | + | ||
| 17 | + public static Builder builder() { | ||
| 18 | + return new Builder(); | ||
| 19 | + } | ||
| 20 | + | ||
| 21 | + public static class Builder { | ||
| 22 | + private OfflineSpeechDenoiserGtcrnModelConfig gtcrn = OfflineSpeechDenoiserGtcrnModelConfig.builder().build(); | ||
| 23 | + private int numThreads = 1; | ||
| 24 | + private boolean debug = true; | ||
| 25 | + private String provider = "cpu"; | ||
| 26 | + | ||
| 27 | + public OfflineSpeechDenoiserModelConfig build() { | ||
| 28 | + return new OfflineSpeechDenoiserModelConfig(this); | ||
| 29 | + } | ||
| 30 | + | ||
| 31 | + public Builder setGtcrn(OfflineSpeechDenoiserGtcrnModelConfig gtcrn) { | ||
| 32 | + this.gtcrn = gtcrn; | ||
| 33 | + return this; | ||
| 34 | + } | ||
| 35 | + | ||
| 36 | + public Builder setNumThreads(int numThreads) { | ||
| 37 | + this.numThreads = numThreads; | ||
| 38 | + return this; | ||
| 39 | + } | ||
| 40 | + | ||
| 41 | + public Builder setDebug(boolean debug) { | ||
| 42 | + this.debug = debug; | ||
| 43 | + return this; | ||
| 44 | + } | ||
| 45 | + | ||
| 46 | + public Builder setProvider(String provider) { | ||
| 47 | + this.provider = provider; | ||
| 48 | + return this; | ||
| 49 | + } | ||
| 50 | + } | ||
| 51 | +} |
-
请 注册 或 登录 后发表评论