Fangjun Kuang
Committed by GitHub

Add C API for speech enhancement GTCRN models (#1984)

@@ -79,6 +79,40 @@ jobs: @@ -79,6 +79,40 @@ jobs:
79 otool -L ./install/lib/libsherpa-onnx-c-api.dylib 79 otool -L ./install/lib/libsherpa-onnx-c-api.dylib
80 fi 80 fi
81 81
  82 + - name: Test speech enhancement (GTCRN)
  83 + shell: bash
  84 + run: |
  85 + name=speech-enhancement-gtcrn-c-api
  86 + gcc -o $name ./c-api-examples/$name.c \
  87 + -I ./build/install/include \
  88 + -L ./build/install/lib/ \
  89 + -l sherpa-onnx-c-api \
  90 + -l onnxruntime
  91 +
  92 + ls -lh $name
  93 +
  94 + if [[ ${{ matrix.os }} == ubuntu-latest || ${{ matrix.os }} == ubuntu-22.04-arm ]]; then
  95 + ldd ./$name
  96 + echo "----"
  97 + readelf -d ./$name
  98 + fi
  99 + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speech-enhancement-models/gtcrn_simple.onnx
  100 + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speech-enhancement-models/inp_16k.wav
  101 +
  102 + export LD_LIBRARY_PATH=$PWD/build/install/lib:$LD_LIBRARY_PATH
  103 + export DYLD_LIBRARY_PATH=$PWD/build/install/lib:$DYLD_LIBRARY_PATH
  104 +
  105 + ./$name
  106 + rm -fv *.onnx
  107 + mkdir denoised-wavs
  108 + cp -v inp_16k.wav denoised-wavs
  109 + cp -v enhanced_16k.wav denoised-wavs
  110 +
  111 + - uses: actions/upload-artifact@v4
  112 + with:
  113 + name: denoised-wavs-${{ matrix.os }}
  114 + path: ./denoised-wavs/*.wav
  115 +
82 - name: Test FireRedAsr 116 - name: Test FireRedAsr
83 shell: bash 117 shell: bash
84 run: | 118 run: |
@@ -7,6 +7,9 @@ target_link_libraries(decode-file-c-api sherpa-onnx-c-api cargs) @@ -7,6 +7,9 @@ target_link_libraries(decode-file-c-api sherpa-onnx-c-api cargs)
7 add_executable(kws-c-api kws-c-api.c) 7 add_executable(kws-c-api kws-c-api.c)
8 target_link_libraries(kws-c-api sherpa-onnx-c-api) 8 target_link_libraries(kws-c-api sherpa-onnx-c-api)
9 9
  10 +add_executable(speech-enhancement-gtcrn-c-api speech-enhancement-gtcrn-c-api.c)
  11 +target_link_libraries(speech-enhancement-gtcrn-c-api sherpa-onnx-c-api)
  12 +
10 if(SHERPA_ONNX_ENABLE_TTS) 13 if(SHERPA_ONNX_ENABLE_TTS)
11 add_executable(offline-tts-c-api offline-tts-c-api.c) 14 add_executable(offline-tts-c-api offline-tts-c-api.c)
12 target_link_libraries(offline-tts-c-api sherpa-onnx-c-api cargs) 15 target_link_libraries(offline-tts-c-api sherpa-onnx-c-api cargs)
  1 +// c-api-examples/speech-enhancement-gtcrn-c-api.c
  2 +//
  3 +// Copyright (c) 2025 Xiaomi Corporation
  4 +//
  5 +// We assume you have pre-downloaded model
  6 +// from
  7 +// https://github.com/k2-fsa/sherpa-onnx/releases/tag/speech-enhancement-models
  8 +//
  9 +//
  10 +// An example command to download
  11 +// clang-format off
  12 +/*
  13 +wget https://github.com/k2-fsa/sherpa-onnx/releases/download/speech-enhancement-models/gtcrn_simple.onnx
  14 +wget https://github.com/k2-fsa/sherpa-onnx/releases/download/speech-enhancement-models/inp_16k.wav
  15 +*/
  16 +// clang-format on
  17 +#include <stdio.h>
  18 +#include <string.h>
  19 +
  20 +#include "sherpa-onnx/c-api/c-api.h"
  21 +
  22 +int32_t main() {
  23 + SherpaOnnxOfflineSpeechDenoiserConfig config;
  24 + const char *wav_filename = "./inp_16k.wav";
  25 + const char *out_wave_filename = "./enhanced_16k.wav";
  26 +
  27 + memset(&config, 0, sizeof(config));
  28 + config.model.gtcrn.model = "./gtcrn_simple.onnx";
  29 +
  30 + const SherpaOnnxOfflineSpeechDenoiser *sd =
  31 + SherpaOnnxCreateOfflineSpeechDenoiser(&config);
  32 + if (!sd) {
  33 + fprintf(stderr, "Please check your config");
  34 + return -1;
  35 + }
  36 +
  37 + const SherpaOnnxWave *wave = SherpaOnnxReadWave(wav_filename);
  38 + if (wave == NULL) {
  39 + SherpaOnnxDestroyOfflineSpeechDenoiser(sd);
  40 + fprintf(stderr, "Failed to read %s\n", wav_filename);
  41 + return -1;
  42 + }
  43 +
  44 + const SherpaOnnxDenoisedAudio *denoised = SherpaOnnxOfflineSpeechDenoiserRun(
  45 + sd, wave->samples, wave->num_samples, wave->sample_rate);
  46 +
  47 + SherpaOnnxWriteWave(denoised->samples, denoised->n, denoised->sample_rate,
  48 + out_wave_filename);
  49 +
  50 + SherpaOnnxDestroyDenoisedAudio(denoised);
  51 + SherpaOnnxFreeWave(wave);
  52 + SherpaOnnxDestroyOfflineSpeechDenoiser(sd);
  53 +
  54 + fprintf(stdout, "Saved to %s\n", out_wave_filename);
  55 +}
@@ -24,6 +24,7 @@ @@ -24,6 +24,7 @@
24 #include "sherpa-onnx/csrc/macros.h" 24 #include "sherpa-onnx/csrc/macros.h"
25 #include "sherpa-onnx/csrc/offline-punctuation.h" 25 #include "sherpa-onnx/csrc/offline-punctuation.h"
26 #include "sherpa-onnx/csrc/offline-recognizer.h" 26 #include "sherpa-onnx/csrc/offline-recognizer.h"
  27 +#include "sherpa-onnx/csrc/offline-speech-denoiser.h"
27 #include "sherpa-onnx/csrc/online-punctuation.h" 28 #include "sherpa-onnx/csrc/online-punctuation.h"
28 #include "sherpa-onnx/csrc/online-recognizer.h" 29 #include "sherpa-onnx/csrc/online-recognizer.h"
29 #include "sherpa-onnx/csrc/resample.h" 30 #include "sherpa-onnx/csrc/resample.h"
@@ -1967,6 +1968,77 @@ int32_t SherpaOnnxFileExists(const char *filename) { @@ -1967,6 +1968,77 @@ int32_t SherpaOnnxFileExists(const char *filename) {
1967 return sherpa_onnx::FileExists(filename); 1968 return sherpa_onnx::FileExists(filename);
1968 } 1969 }
1969 1970
  1971 +struct SherpaOnnxOfflineSpeechDenoiser {
  1972 + std::unique_ptr<sherpa_onnx::OfflineSpeechDenoiser> impl;
  1973 +};
  1974 +
  1975 +static sherpa_onnx::OfflineSpeechDenoiserConfig GetOfflineSpeechDenoiserConfig(
  1976 + const SherpaOnnxOfflineSpeechDenoiserConfig *config) {
  1977 + sherpa_onnx::OfflineSpeechDenoiserConfig c;
  1978 + c.model.gtcrn.model = SHERPA_ONNX_OR(config->model.gtcrn.model, "");
  1979 + c.model.num_threads = SHERPA_ONNX_OR(config->model.num_threads, 1);
  1980 + c.model.debug = config->model.debug;
  1981 + c.model.provider = SHERPA_ONNX_OR(config->model.provider, "cpu");
  1982 +
  1983 + if (c.model.debug) {
  1984 +#if __OHOS__
  1985 + SHERPA_ONNX_LOGE("%{public}s\n", c.ToString().c_str());
  1986 +#else
  1987 + SHERPA_ONNX_LOGE("%s\n", c.ToString().c_str());
  1988 +#endif
  1989 + }
  1990 +
  1991 + return c;
  1992 +}
  1993 +
  1994 +const SherpaOnnxOfflineSpeechDenoiser *SherpaOnnxCreateOfflineSpeechDenoiser(
  1995 + const SherpaOnnxOfflineSpeechDenoiserConfig *config) {
  1996 + auto sd_config = GetOfflineSpeechDenoiserConfig(config);
  1997 +
  1998 + if (!sd_config.Validate()) {
  1999 + SHERPA_ONNX_LOGE("Errors in config");
  2000 + return nullptr;
  2001 + }
  2002 +
  2003 + SherpaOnnxOfflineSpeechDenoiser *sd = new SherpaOnnxOfflineSpeechDenoiser;
  2004 +
  2005 + sd->impl = std::make_unique<sherpa_onnx::OfflineSpeechDenoiser>(sd_config);
  2006 +
  2007 + return sd;
  2008 +}
  2009 +
  2010 +void SherpaOnnxDestroyOfflineSpeechDenoiser(
  2011 + const SherpaOnnxOfflineSpeechDenoiser *sd) {
  2012 + delete sd;
  2013 +}
  2014 +
  2015 +int32_t SherpaOnnxOfflineSpeechDenoiserGetSampleRate(
  2016 + const SherpaOnnxOfflineSpeechDenoiser *sd) {
  2017 + return sd->impl->GetSampleRate();
  2018 +}
  2019 +
  2020 +const SherpaOnnxDenoisedAudio *SherpaOnnxOfflineSpeechDenoiserRun(
  2021 + const SherpaOnnxOfflineSpeechDenoiser *sd, const float *samples, int32_t n,
  2022 + int32_t sample_rate) {
  2023 + auto audio = sd->impl->Run(samples, n, sample_rate);
  2024 +
  2025 + auto ans = new SherpaOnnxDenoisedAudio;
  2026 +
  2027 + float *denoised_samples = new float[audio.samples.size()];
  2028 + std::copy(audio.samples.begin(), audio.samples.end(), denoised_samples);
  2029 +
  2030 + ans->samples = denoised_samples;
  2031 + ans->n = audio.samples.size();
  2032 + ans->sample_rate = audio.sample_rate;
  2033 +
  2034 + return ans;
  2035 +}
  2036 +
  2037 +void SherpaOnnxDestroyDenoisedAudio(const SherpaOnnxDenoisedAudio *p) {
  2038 + delete[] p->samples;
  2039 + delete p;
  2040 +}
  2041 +
1970 #if SHERPA_ONNX_ENABLE_SPEAKER_DIARIZATION == 1 2042 #if SHERPA_ONNX_ENABLE_SPEAKER_DIARIZATION == 1
1971 2043
1972 struct SherpaOnnxOfflineSpeakerDiarization { 2044 struct SherpaOnnxOfflineSpeakerDiarization {
@@ -2244,6 +2316,19 @@ void SherpaOnnxOfflineSpeakerDiarizationDestroyResult( @@ -2244,6 +2316,19 @@ void SherpaOnnxOfflineSpeakerDiarizationDestroyResult(
2244 2316
2245 #ifdef __OHOS__ 2317 #ifdef __OHOS__
2246 2318
  2319 +const SherpaOnnxOfflineSpeechDenoiser *
  2320 +SherpaOnnxCreateOfflineSpeechDenoiserOHOS(
  2321 + const SherpaOnnxOfflineSpeechDenoiserConfig *config,
  2322 + NativeResourceManager *mgr) {
  2323 + auto sd_config = GetOfflineSpeechDenoiserConfia(config);
  2324 +
  2325 + SherpaOnnxOfflineSpeechDenoiser *sd = new SherpaOnnxOfflineSpeechDenoiser;
  2326 +
  2327 + sd->impl = std::make_unique<sherpa_onnx::OfflineSpeechDenoiser>(sd_config);
  2328 +
  2329 + return sd;
  2330 +}
  2331 +
2247 const SherpaOnnxOnlineRecognizer *SherpaOnnxCreateOnlineRecognizerOHOS( 2332 const SherpaOnnxOnlineRecognizer *SherpaOnnxCreateOnlineRecognizerOHOS(
2248 const SherpaOnnxOnlineRecognizerConfig *config, 2333 const SherpaOnnxOnlineRecognizerConfig *config,
2249 NativeResourceManager *mgr) { 2334 NativeResourceManager *mgr) {
@@ -1639,11 +1639,72 @@ SherpaOnnxOfflineSpeakerDiarizationProcessWithCallbackNoArg( @@ -1639,11 +1639,72 @@ SherpaOnnxOfflineSpeakerDiarizationProcessWithCallbackNoArg(
1639 SHERPA_ONNX_API void SherpaOnnxOfflineSpeakerDiarizationDestroyResult( 1639 SHERPA_ONNX_API void SherpaOnnxOfflineSpeakerDiarizationDestroyResult(
1640 const SherpaOnnxOfflineSpeakerDiarizationResult *r); 1640 const SherpaOnnxOfflineSpeakerDiarizationResult *r);
1641 1641
  1642 +// =========================================================================
  1643 +// For offline speech enhancement
  1644 +// =========================================================================
  1645 +SHERPA_ONNX_API typedef struct SherpaOnnxOfflineSpeechDenoiserGtcrnModelConfig {
  1646 + const char *model;
  1647 +} SherpaOnnxOfflineSpeechDenoiserGtcrnModelConfig;
  1648 +
  1649 +SHERPA_ONNX_API typedef struct SherpaOnnxOfflineSpeechDenoiserModelConfig {
  1650 + SherpaOnnxOfflineSpeechDenoiserGtcrnModelConfig gtcrn;
  1651 + int32_t num_threads;
  1652 + int32_t debug; // true to print debug information of the model
  1653 + const char *provider;
  1654 +} SherpaOnnxOfflineSpeechDenoiserModelConfig;
  1655 +
  1656 +SHERPA_ONNX_API typedef struct SherpaOnnxOfflineSpeechDenoiserConfig {
  1657 + SherpaOnnxOfflineSpeechDenoiserModelConfig model;
  1658 +} SherpaOnnxOfflineSpeechDenoiserConfig;
  1659 +
  1660 +SHERPA_ONNX_API typedef struct SherpaOnnxOfflineSpeechDenoiser
  1661 + SherpaOnnxOfflineSpeechDenoiser;
  1662 +
  1663 +// The users has to invoke SherpaOnnxDestroyOfflineSpeechDenoiser()
  1664 +// to free the returned pointer to avoid memory leak
  1665 +SHERPA_ONNX_API const SherpaOnnxOfflineSpeechDenoiser *
  1666 +SherpaOnnxCreateOfflineSpeechDenoiser(
  1667 + const SherpaOnnxOfflineSpeechDenoiserConfig *config);
  1668 +
  1669 +// Free the pointer returned by SherpaOnnxCreateOfflineSpeechDenoiser()
  1670 +SHERPA_ONNX_API void SherpaOnnxDestroyOfflineSpeechDenoiser(
  1671 + const SherpaOnnxOfflineSpeechDenoiser *sd);
  1672 +
  1673 +SHERPA_ONNX_API int32_t SherpaOnnxOfflineSpeechDenoiserGetSampleRate(
  1674 + const SherpaOnnxOfflineSpeechDenoiser *sd);
  1675 +
  1676 +SHERPA_ONNX_API typedef struct SherpaOnnxDenoisedAudio {
  1677 + const float *samples; // in the range [-1, 1]
  1678 + int32_t n; // number of samples
  1679 + int32_t sample_rate;
  1680 +} SherpaOnnxDenoisedAudio;
  1681 +
  1682 +// Run speech denosing on input samples
  1683 +// @param samples A 1-D array containing the input audio samples. Each sample
  1684 +// should be in the range [-1, 1].
  1685 +// @param n Number of samples
  1686 +// @param sample_rate Sample rate of the input samples
  1687 +//
  1688 +// The user MUST use SherpaOnnxDestroyDenoisedAudio() to free the returned
  1689 +// pointer to avoid memory leak.
  1690 +SHERPA_ONNX_API const SherpaOnnxDenoisedAudio *
  1691 +SherpaOnnxOfflineSpeechDenoiserRun(const SherpaOnnxOfflineSpeechDenoiser *sd,
  1692 + const float *samples, int32_t n,
  1693 + int32_t sample_rate);
  1694 +
  1695 +SHERPA_ONNX_API void SherpaOnnxDestroyDenoisedAudio(
  1696 + const SherpaOnnxDenoisedAudio *p);
  1697 +
1642 #ifdef __OHOS__ 1698 #ifdef __OHOS__
1643 1699
1644 // It is for HarmonyOS 1700 // It is for HarmonyOS
1645 typedef struct NativeResourceManager NativeResourceManager; 1701 typedef struct NativeResourceManager NativeResourceManager;
1646 1702
  1703 +SHERPA_ONNX_API const SherpaOnnxOfflineSpeechDenoiser *
  1704 +SherpaOnnxCreateOfflineSpeechDenoiserOHOS(
  1705 + const SherpaOnnxOfflineSpeechDenoiserConfig *config,
  1706 + NativeResourceManager *mgr);
  1707 +
1647 /// @param config Config for the recognizer. 1708 /// @param config Config for the recognizer.
1648 /// @return Return a pointer to the recognizer. The user has to invoke 1709 /// @return Return a pointer to the recognizer. The user has to invoke
1649 // SherpaOnnxDestroyOnlineRecognizer() to free it to avoid memory leak. 1710 // SherpaOnnxDestroyOnlineRecognizer() to free it to avoid memory leak.
@@ -33,7 +33,6 @@ class OfflineSpeechDenoiserGtcrnImpl : public OfflineSpeechDenoiserImpl { @@ -33,7 +33,6 @@ class OfflineSpeechDenoiserGtcrnImpl : public OfflineSpeechDenoiserImpl {
33 33
34 DenoisedAudio Run(const float *samples, int32_t n, 34 DenoisedAudio Run(const float *samples, int32_t n,
35 int32_t sample_rate) const override { 35 int32_t sample_rate) const override {
36 - SHERPA_ONNX_LOGE("n: %d, sample_rate: %d", n, sample_rate);  
37 const auto &meta = model_.GetMetaData(); 36 const auto &meta = model_.GetMetaData();
38 37
39 std::vector<float> tmp; 38 std::vector<float> tmp;