Committed by
GitHub
Add C API for speech enhancement GTCRN models (#1984)
正在显示
6 个修改的文件
包含
238 行增加
和
1 行删除
| @@ -79,6 +79,40 @@ jobs: | @@ -79,6 +79,40 @@ jobs: | ||
| 79 | otool -L ./install/lib/libsherpa-onnx-c-api.dylib | 79 | otool -L ./install/lib/libsherpa-onnx-c-api.dylib |
| 80 | fi | 80 | fi |
| 81 | 81 | ||
| 82 | + - name: Test speech enhancement (GTCRN) | ||
| 83 | + shell: bash | ||
| 84 | + run: | | ||
| 85 | + name=speech-enhancement-gtcrn-c-api | ||
| 86 | + gcc -o $name ./c-api-examples/$name.c \ | ||
| 87 | + -I ./build/install/include \ | ||
| 88 | + -L ./build/install/lib/ \ | ||
| 89 | + -l sherpa-onnx-c-api \ | ||
| 90 | + -l onnxruntime | ||
| 91 | + | ||
| 92 | + ls -lh $name | ||
| 93 | + | ||
| 94 | + if [[ ${{ matrix.os }} == ubuntu-latest || ${{ matrix.os }} == ubuntu-22.04-arm ]]; then | ||
| 95 | + ldd ./$name | ||
| 96 | + echo "----" | ||
| 97 | + readelf -d ./$name | ||
| 98 | + fi | ||
| 99 | + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speech-enhancement-models/gtcrn_simple.onnx | ||
| 100 | + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speech-enhancement-models/inp_16k.wav | ||
| 101 | + | ||
| 102 | + export LD_LIBRARY_PATH=$PWD/build/install/lib:$LD_LIBRARY_PATH | ||
| 103 | + export DYLD_LIBRARY_PATH=$PWD/build/install/lib:$DYLD_LIBRARY_PATH | ||
| 104 | + | ||
| 105 | + ./$name | ||
| 106 | + rm -fv *.onnx | ||
| 107 | + mkdir denoised-wavs | ||
| 108 | + cp -v inp_16k.wav denoised-wavs | ||
| 109 | + cp -v enhanced_16k.wav denoised-wavs | ||
| 110 | + | ||
| 111 | + - uses: actions/upload-artifact@v4 | ||
| 112 | + with: | ||
| 113 | + name: denoised-wavs-${{ matrix.os }} | ||
| 114 | + path: ./denoised-wavs/*.wav | ||
| 115 | + | ||
| 82 | - name: Test FireRedAsr | 116 | - name: Test FireRedAsr |
| 83 | shell: bash | 117 | shell: bash |
| 84 | run: | | 118 | run: | |
| @@ -7,6 +7,9 @@ target_link_libraries(decode-file-c-api sherpa-onnx-c-api cargs) | @@ -7,6 +7,9 @@ target_link_libraries(decode-file-c-api sherpa-onnx-c-api cargs) | ||
| 7 | add_executable(kws-c-api kws-c-api.c) | 7 | add_executable(kws-c-api kws-c-api.c) |
| 8 | target_link_libraries(kws-c-api sherpa-onnx-c-api) | 8 | target_link_libraries(kws-c-api sherpa-onnx-c-api) |
| 9 | 9 | ||
| 10 | +add_executable(speech-enhancement-gtcrn-c-api speech-enhancement-gtcrn-c-api.c) | ||
| 11 | +target_link_libraries(speech-enhancement-gtcrn-c-api sherpa-onnx-c-api) | ||
| 12 | + | ||
| 10 | if(SHERPA_ONNX_ENABLE_TTS) | 13 | if(SHERPA_ONNX_ENABLE_TTS) |
| 11 | add_executable(offline-tts-c-api offline-tts-c-api.c) | 14 | add_executable(offline-tts-c-api offline-tts-c-api.c) |
| 12 | target_link_libraries(offline-tts-c-api sherpa-onnx-c-api cargs) | 15 | target_link_libraries(offline-tts-c-api sherpa-onnx-c-api cargs) |
| 1 | +// c-api-examples/speech-enhancement-gtcrn-c-api.c | ||
| 2 | +// | ||
| 3 | +// Copyright (c) 2025 Xiaomi Corporation | ||
| 4 | +// | ||
| 5 | +// We assume you have pre-downloaded model | ||
| 6 | +// from | ||
| 7 | +// https://github.com/k2-fsa/sherpa-onnx/releases/tag/speech-enhancement-models | ||
| 8 | +// | ||
| 9 | +// | ||
| 10 | +// An example command to download | ||
| 11 | +// clang-format off | ||
| 12 | +/* | ||
| 13 | +wget https://github.com/k2-fsa/sherpa-onnx/releases/download/speech-enhancement-models/gtcrn_simple.onnx | ||
| 14 | +wget https://github.com/k2-fsa/sherpa-onnx/releases/download/speech-enhancement-models/inp_16k.wav | ||
| 15 | +*/ | ||
| 16 | +// clang-format on | ||
| 17 | +#include <stdio.h> | ||
| 18 | +#include <string.h> | ||
| 19 | + | ||
| 20 | +#include "sherpa-onnx/c-api/c-api.h" | ||
| 21 | + | ||
| 22 | +int32_t main() { | ||
| 23 | + SherpaOnnxOfflineSpeechDenoiserConfig config; | ||
| 24 | + const char *wav_filename = "./inp_16k.wav"; | ||
| 25 | + const char *out_wave_filename = "./enhanced_16k.wav"; | ||
| 26 | + | ||
| 27 | + memset(&config, 0, sizeof(config)); | ||
| 28 | + config.model.gtcrn.model = "./gtcrn_simple.onnx"; | ||
| 29 | + | ||
| 30 | + const SherpaOnnxOfflineSpeechDenoiser *sd = | ||
| 31 | + SherpaOnnxCreateOfflineSpeechDenoiser(&config); | ||
| 32 | + if (!sd) { | ||
| 33 | + fprintf(stderr, "Please check your config"); | ||
| 34 | + return -1; | ||
| 35 | + } | ||
| 36 | + | ||
| 37 | + const SherpaOnnxWave *wave = SherpaOnnxReadWave(wav_filename); | ||
| 38 | + if (wave == NULL) { | ||
| 39 | + SherpaOnnxDestroyOfflineSpeechDenoiser(sd); | ||
| 40 | + fprintf(stderr, "Failed to read %s\n", wav_filename); | ||
| 41 | + return -1; | ||
| 42 | + } | ||
| 43 | + | ||
| 44 | + const SherpaOnnxDenoisedAudio *denoised = SherpaOnnxOfflineSpeechDenoiserRun( | ||
| 45 | + sd, wave->samples, wave->num_samples, wave->sample_rate); | ||
| 46 | + | ||
| 47 | + SherpaOnnxWriteWave(denoised->samples, denoised->n, denoised->sample_rate, | ||
| 48 | + out_wave_filename); | ||
| 49 | + | ||
| 50 | + SherpaOnnxDestroyDenoisedAudio(denoised); | ||
| 51 | + SherpaOnnxFreeWave(wave); | ||
| 52 | + SherpaOnnxDestroyOfflineSpeechDenoiser(sd); | ||
| 53 | + | ||
| 54 | + fprintf(stdout, "Saved to %s\n", out_wave_filename); | ||
| 55 | +} |
| @@ -24,6 +24,7 @@ | @@ -24,6 +24,7 @@ | ||
| 24 | #include "sherpa-onnx/csrc/macros.h" | 24 | #include "sherpa-onnx/csrc/macros.h" |
| 25 | #include "sherpa-onnx/csrc/offline-punctuation.h" | 25 | #include "sherpa-onnx/csrc/offline-punctuation.h" |
| 26 | #include "sherpa-onnx/csrc/offline-recognizer.h" | 26 | #include "sherpa-onnx/csrc/offline-recognizer.h" |
| 27 | +#include "sherpa-onnx/csrc/offline-speech-denoiser.h" | ||
| 27 | #include "sherpa-onnx/csrc/online-punctuation.h" | 28 | #include "sherpa-onnx/csrc/online-punctuation.h" |
| 28 | #include "sherpa-onnx/csrc/online-recognizer.h" | 29 | #include "sherpa-onnx/csrc/online-recognizer.h" |
| 29 | #include "sherpa-onnx/csrc/resample.h" | 30 | #include "sherpa-onnx/csrc/resample.h" |
| @@ -1967,6 +1968,77 @@ int32_t SherpaOnnxFileExists(const char *filename) { | @@ -1967,6 +1968,77 @@ int32_t SherpaOnnxFileExists(const char *filename) { | ||
| 1967 | return sherpa_onnx::FileExists(filename); | 1968 | return sherpa_onnx::FileExists(filename); |
| 1968 | } | 1969 | } |
| 1969 | 1970 | ||
| 1971 | +struct SherpaOnnxOfflineSpeechDenoiser { | ||
| 1972 | + std::unique_ptr<sherpa_onnx::OfflineSpeechDenoiser> impl; | ||
| 1973 | +}; | ||
| 1974 | + | ||
| 1975 | +static sherpa_onnx::OfflineSpeechDenoiserConfig GetOfflineSpeechDenoiserConfig( | ||
| 1976 | + const SherpaOnnxOfflineSpeechDenoiserConfig *config) { | ||
| 1977 | + sherpa_onnx::OfflineSpeechDenoiserConfig c; | ||
| 1978 | + c.model.gtcrn.model = SHERPA_ONNX_OR(config->model.gtcrn.model, ""); | ||
| 1979 | + c.model.num_threads = SHERPA_ONNX_OR(config->model.num_threads, 1); | ||
| 1980 | + c.model.debug = config->model.debug; | ||
| 1981 | + c.model.provider = SHERPA_ONNX_OR(config->model.provider, "cpu"); | ||
| 1982 | + | ||
| 1983 | + if (c.model.debug) { | ||
| 1984 | +#if __OHOS__ | ||
| 1985 | + SHERPA_ONNX_LOGE("%{public}s\n", c.ToString().c_str()); | ||
| 1986 | +#else | ||
| 1987 | + SHERPA_ONNX_LOGE("%s\n", c.ToString().c_str()); | ||
| 1988 | +#endif | ||
| 1989 | + } | ||
| 1990 | + | ||
| 1991 | + return c; | ||
| 1992 | +} | ||
| 1993 | + | ||
| 1994 | +const SherpaOnnxOfflineSpeechDenoiser *SherpaOnnxCreateOfflineSpeechDenoiser( | ||
| 1995 | + const SherpaOnnxOfflineSpeechDenoiserConfig *config) { | ||
| 1996 | + auto sd_config = GetOfflineSpeechDenoiserConfig(config); | ||
| 1997 | + | ||
| 1998 | + if (!sd_config.Validate()) { | ||
| 1999 | + SHERPA_ONNX_LOGE("Errors in config"); | ||
| 2000 | + return nullptr; | ||
| 2001 | + } | ||
| 2002 | + | ||
| 2003 | + SherpaOnnxOfflineSpeechDenoiser *sd = new SherpaOnnxOfflineSpeechDenoiser; | ||
| 2004 | + | ||
| 2005 | + sd->impl = std::make_unique<sherpa_onnx::OfflineSpeechDenoiser>(sd_config); | ||
| 2006 | + | ||
| 2007 | + return sd; | ||
| 2008 | +} | ||
| 2009 | + | ||
| 2010 | +void SherpaOnnxDestroyOfflineSpeechDenoiser( | ||
| 2011 | + const SherpaOnnxOfflineSpeechDenoiser *sd) { | ||
| 2012 | + delete sd; | ||
| 2013 | +} | ||
| 2014 | + | ||
| 2015 | +int32_t SherpaOnnxOfflineSpeechDenoiserGetSampleRate( | ||
| 2016 | + const SherpaOnnxOfflineSpeechDenoiser *sd) { | ||
| 2017 | + return sd->impl->GetSampleRate(); | ||
| 2018 | +} | ||
| 2019 | + | ||
| 2020 | +const SherpaOnnxDenoisedAudio *SherpaOnnxOfflineSpeechDenoiserRun( | ||
| 2021 | + const SherpaOnnxOfflineSpeechDenoiser *sd, const float *samples, int32_t n, | ||
| 2022 | + int32_t sample_rate) { | ||
| 2023 | + auto audio = sd->impl->Run(samples, n, sample_rate); | ||
| 2024 | + | ||
| 2025 | + auto ans = new SherpaOnnxDenoisedAudio; | ||
| 2026 | + | ||
| 2027 | + float *denoised_samples = new float[audio.samples.size()]; | ||
| 2028 | + std::copy(audio.samples.begin(), audio.samples.end(), denoised_samples); | ||
| 2029 | + | ||
| 2030 | + ans->samples = denoised_samples; | ||
| 2031 | + ans->n = audio.samples.size(); | ||
| 2032 | + ans->sample_rate = audio.sample_rate; | ||
| 2033 | + | ||
| 2034 | + return ans; | ||
| 2035 | +} | ||
| 2036 | + | ||
| 2037 | +void SherpaOnnxDestroyDenoisedAudio(const SherpaOnnxDenoisedAudio *p) { | ||
| 2038 | + delete[] p->samples; | ||
| 2039 | + delete p; | ||
| 2040 | +} | ||
| 2041 | + | ||
| 1970 | #if SHERPA_ONNX_ENABLE_SPEAKER_DIARIZATION == 1 | 2042 | #if SHERPA_ONNX_ENABLE_SPEAKER_DIARIZATION == 1 |
| 1971 | 2043 | ||
| 1972 | struct SherpaOnnxOfflineSpeakerDiarization { | 2044 | struct SherpaOnnxOfflineSpeakerDiarization { |
| @@ -2244,6 +2316,19 @@ void SherpaOnnxOfflineSpeakerDiarizationDestroyResult( | @@ -2244,6 +2316,19 @@ void SherpaOnnxOfflineSpeakerDiarizationDestroyResult( | ||
| 2244 | 2316 | ||
| 2245 | #ifdef __OHOS__ | 2317 | #ifdef __OHOS__ |
| 2246 | 2318 | ||
| 2319 | +const SherpaOnnxOfflineSpeechDenoiser * | ||
| 2320 | +SherpaOnnxCreateOfflineSpeechDenoiserOHOS( | ||
| 2321 | + const SherpaOnnxOfflineSpeechDenoiserConfig *config, | ||
| 2322 | + NativeResourceManager *mgr) { | ||
| 2323 | + auto sd_config = GetOfflineSpeechDenoiserConfia(config); | ||
| 2324 | + | ||
| 2325 | + SherpaOnnxOfflineSpeechDenoiser *sd = new SherpaOnnxOfflineSpeechDenoiser; | ||
| 2326 | + | ||
| 2327 | + sd->impl = std::make_unique<sherpa_onnx::OfflineSpeechDenoiser>(sd_config); | ||
| 2328 | + | ||
| 2329 | + return sd; | ||
| 2330 | +} | ||
| 2331 | + | ||
| 2247 | const SherpaOnnxOnlineRecognizer *SherpaOnnxCreateOnlineRecognizerOHOS( | 2332 | const SherpaOnnxOnlineRecognizer *SherpaOnnxCreateOnlineRecognizerOHOS( |
| 2248 | const SherpaOnnxOnlineRecognizerConfig *config, | 2333 | const SherpaOnnxOnlineRecognizerConfig *config, |
| 2249 | NativeResourceManager *mgr) { | 2334 | NativeResourceManager *mgr) { |
| @@ -1639,11 +1639,72 @@ SherpaOnnxOfflineSpeakerDiarizationProcessWithCallbackNoArg( | @@ -1639,11 +1639,72 @@ SherpaOnnxOfflineSpeakerDiarizationProcessWithCallbackNoArg( | ||
| 1639 | SHERPA_ONNX_API void SherpaOnnxOfflineSpeakerDiarizationDestroyResult( | 1639 | SHERPA_ONNX_API void SherpaOnnxOfflineSpeakerDiarizationDestroyResult( |
| 1640 | const SherpaOnnxOfflineSpeakerDiarizationResult *r); | 1640 | const SherpaOnnxOfflineSpeakerDiarizationResult *r); |
| 1641 | 1641 | ||
| 1642 | +// ========================================================================= | ||
| 1643 | +// For offline speech enhancement | ||
| 1644 | +// ========================================================================= | ||
| 1645 | +SHERPA_ONNX_API typedef struct SherpaOnnxOfflineSpeechDenoiserGtcrnModelConfig { | ||
| 1646 | + const char *model; | ||
| 1647 | +} SherpaOnnxOfflineSpeechDenoiserGtcrnModelConfig; | ||
| 1648 | + | ||
| 1649 | +SHERPA_ONNX_API typedef struct SherpaOnnxOfflineSpeechDenoiserModelConfig { | ||
| 1650 | + SherpaOnnxOfflineSpeechDenoiserGtcrnModelConfig gtcrn; | ||
| 1651 | + int32_t num_threads; | ||
| 1652 | + int32_t debug; // true to print debug information of the model | ||
| 1653 | + const char *provider; | ||
| 1654 | +} SherpaOnnxOfflineSpeechDenoiserModelConfig; | ||
| 1655 | + | ||
| 1656 | +SHERPA_ONNX_API typedef struct SherpaOnnxOfflineSpeechDenoiserConfig { | ||
| 1657 | + SherpaOnnxOfflineSpeechDenoiserModelConfig model; | ||
| 1658 | +} SherpaOnnxOfflineSpeechDenoiserConfig; | ||
| 1659 | + | ||
| 1660 | +SHERPA_ONNX_API typedef struct SherpaOnnxOfflineSpeechDenoiser | ||
| 1661 | + SherpaOnnxOfflineSpeechDenoiser; | ||
| 1662 | + | ||
| 1663 | +// The users has to invoke SherpaOnnxDestroyOfflineSpeechDenoiser() | ||
| 1664 | +// to free the returned pointer to avoid memory leak | ||
| 1665 | +SHERPA_ONNX_API const SherpaOnnxOfflineSpeechDenoiser * | ||
| 1666 | +SherpaOnnxCreateOfflineSpeechDenoiser( | ||
| 1667 | + const SherpaOnnxOfflineSpeechDenoiserConfig *config); | ||
| 1668 | + | ||
| 1669 | +// Free the pointer returned by SherpaOnnxCreateOfflineSpeechDenoiser() | ||
| 1670 | +SHERPA_ONNX_API void SherpaOnnxDestroyOfflineSpeechDenoiser( | ||
| 1671 | + const SherpaOnnxOfflineSpeechDenoiser *sd); | ||
| 1672 | + | ||
| 1673 | +SHERPA_ONNX_API int32_t SherpaOnnxOfflineSpeechDenoiserGetSampleRate( | ||
| 1674 | + const SherpaOnnxOfflineSpeechDenoiser *sd); | ||
| 1675 | + | ||
| 1676 | +SHERPA_ONNX_API typedef struct SherpaOnnxDenoisedAudio { | ||
| 1677 | + const float *samples; // in the range [-1, 1] | ||
| 1678 | + int32_t n; // number of samples | ||
| 1679 | + int32_t sample_rate; | ||
| 1680 | +} SherpaOnnxDenoisedAudio; | ||
| 1681 | + | ||
| 1682 | +// Run speech denosing on input samples | ||
| 1683 | +// @param samples A 1-D array containing the input audio samples. Each sample | ||
| 1684 | +// should be in the range [-1, 1]. | ||
| 1685 | +// @param n Number of samples | ||
| 1686 | +// @param sample_rate Sample rate of the input samples | ||
| 1687 | +// | ||
| 1688 | +// The user MUST use SherpaOnnxDestroyDenoisedAudio() to free the returned | ||
| 1689 | +// pointer to avoid memory leak. | ||
| 1690 | +SHERPA_ONNX_API const SherpaOnnxDenoisedAudio * | ||
| 1691 | +SherpaOnnxOfflineSpeechDenoiserRun(const SherpaOnnxOfflineSpeechDenoiser *sd, | ||
| 1692 | + const float *samples, int32_t n, | ||
| 1693 | + int32_t sample_rate); | ||
| 1694 | + | ||
| 1695 | +SHERPA_ONNX_API void SherpaOnnxDestroyDenoisedAudio( | ||
| 1696 | + const SherpaOnnxDenoisedAudio *p); | ||
| 1697 | + | ||
| 1642 | #ifdef __OHOS__ | 1698 | #ifdef __OHOS__ |
| 1643 | 1699 | ||
| 1644 | // It is for HarmonyOS | 1700 | // It is for HarmonyOS |
| 1645 | typedef struct NativeResourceManager NativeResourceManager; | 1701 | typedef struct NativeResourceManager NativeResourceManager; |
| 1646 | 1702 | ||
| 1703 | +SHERPA_ONNX_API const SherpaOnnxOfflineSpeechDenoiser * | ||
| 1704 | +SherpaOnnxCreateOfflineSpeechDenoiserOHOS( | ||
| 1705 | + const SherpaOnnxOfflineSpeechDenoiserConfig *config, | ||
| 1706 | + NativeResourceManager *mgr); | ||
| 1707 | + | ||
| 1647 | /// @param config Config for the recognizer. | 1708 | /// @param config Config for the recognizer. |
| 1648 | /// @return Return a pointer to the recognizer. The user has to invoke | 1709 | /// @return Return a pointer to the recognizer. The user has to invoke |
| 1649 | // SherpaOnnxDestroyOnlineRecognizer() to free it to avoid memory leak. | 1710 | // SherpaOnnxDestroyOnlineRecognizer() to free it to avoid memory leak. |
| @@ -33,7 +33,6 @@ class OfflineSpeechDenoiserGtcrnImpl : public OfflineSpeechDenoiserImpl { | @@ -33,7 +33,6 @@ class OfflineSpeechDenoiserGtcrnImpl : public OfflineSpeechDenoiserImpl { | ||
| 33 | 33 | ||
| 34 | DenoisedAudio Run(const float *samples, int32_t n, | 34 | DenoisedAudio Run(const float *samples, int32_t n, |
| 35 | int32_t sample_rate) const override { | 35 | int32_t sample_rate) const override { |
| 36 | - SHERPA_ONNX_LOGE("n: %d, sample_rate: %d", n, sample_rate); | ||
| 37 | const auto &meta = model_.GetMetaData(); | 36 | const auto &meta = model_.GetMetaData(); |
| 38 | 37 | ||
| 39 | std::vector<float> tmp; | 38 | std::vector<float> tmp; |
-
请 注册 或 登录 后发表评论