正在显示
35 个修改的文件
包含
961 行增加
和
95 行删除
| 1 | +#!/usr/bin/env bash | ||
| 2 | + | ||
| 3 | +set -ex | ||
| 4 | + | ||
| 5 | +log() { | ||
| 6 | + # This function is from espnet | ||
| 7 | + local fname=${BASH_SOURCE[1]##*/} | ||
| 8 | + echo -e "$(date '+%Y-%m-%d %H:%M:%S') (${fname}:${BASH_LINENO[0]}:${FUNCNAME[1]}) $*" | ||
| 9 | +} | ||
| 10 | + | ||
| 11 | +if [ -z $EXE ]; then | ||
| 12 | + EXE=./build/bin/sherpa-onnx-offline-source-separation | ||
| 13 | +fi | ||
| 14 | + | ||
| 15 | +echo "EXE is $EXE" | ||
| 16 | +echo "PATH: $PATH" | ||
| 17 | + | ||
| 18 | +which $EXE | ||
| 19 | + | ||
| 20 | +log "------------------------------------------------------------" | ||
| 21 | +log "Run spleeter" | ||
| 22 | +log "------------------------------------------------------------" | ||
| 23 | +curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/source-separation-models/sherpa-onnx-spleeter-2stems-fp16.tar.bz2 | ||
| 24 | +tar xvf sherpa-onnx-spleeter-2stems-fp16.tar.bz2 | ||
| 25 | +rm sherpa-onnx-spleeter-2stems-fp16.tar.bz2 | ||
| 26 | + | ||
| 27 | +curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/source-separation-models/qi-feng-le-zh.wav | ||
| 28 | + | ||
| 29 | +$EXE \ | ||
| 30 | + --spleeter-vocals=sherpa-onnx-spleeter-2stems-fp16/vocals.fp16.onnx \ | ||
| 31 | + --spleeter-accompaniment=sherpa-onnx-spleeter-2stems-fp16/accompaniment.fp16.onnx \ | ||
| 32 | + --num-threads=2 \ | ||
| 33 | + --debug=1 \ | ||
| 34 | + --input-wav=./qi-feng-le-zh.wav \ | ||
| 35 | + --output-vocals-wav=spleeter_output_vocals.wav \ | ||
| 36 | + --output-accompaniment-wav=spleeter_output_accompaniment.wav | ||
| 37 | + | ||
| 38 | +rm -rf sherpa-onnx-spleeter-2stems-fp16 | ||
| 39 | + | ||
| 40 | +log "------------------------------------------------------------" | ||
| 41 | +log "Run UVR" | ||
| 42 | +log "------------------------------------------------------------" | ||
| 43 | +curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/source-separation-models/UVR-MDX-NET-Voc_FT.onnx | ||
| 44 | + | ||
| 45 | +$EXE \ | ||
| 46 | + --debug=1 \ | ||
| 47 | + --num-threads=2 \ | ||
| 48 | + --uvr-model=./UVR-MDX-NET-Voc_FT.onnx \ | ||
| 49 | + --input-wav=./qi-feng-le-zh.wav \ | ||
| 50 | + --output-vocals-wav=uvr_output_vocals.wav \ | ||
| 51 | + --output-accompaniment-wav=uvr_output_non_vocals.wav | ||
| 52 | + | ||
| 53 | +rm ./UVR-MDX-NET-Voc_FT.onnx \ | ||
| 54 | + | ||
| 55 | +mkdir source-separation-wavs | ||
| 56 | +mv qi-feng-le-zh.wav source-separation-wavs | ||
| 57 | +mv spleeter_*.wav ./source-separation-wavs | ||
| 58 | +mv uvr_*.wav ./source-separation-wavs |
| @@ -11,6 +11,7 @@ on: | @@ -11,6 +11,7 @@ on: | ||
| 11 | - '.github/scripts/test-kws.sh' | 11 | - '.github/scripts/test-kws.sh' |
| 12 | - '.github/scripts/test-online-transducer.sh' | 12 | - '.github/scripts/test-online-transducer.sh' |
| 13 | - '.github/scripts/test-offline-speech-denoiser.sh' | 13 | - '.github/scripts/test-offline-speech-denoiser.sh' |
| 14 | + - '.github/scripts/test-offline-source-separation.sh' | ||
| 14 | - '.github/scripts/test-online-paraformer.sh' | 15 | - '.github/scripts/test-online-paraformer.sh' |
| 15 | - '.github/scripts/test-offline-transducer.sh' | 16 | - '.github/scripts/test-offline-transducer.sh' |
| 16 | - '.github/scripts/test-offline-ctc.sh' | 17 | - '.github/scripts/test-offline-ctc.sh' |
| @@ -33,6 +34,7 @@ on: | @@ -33,6 +34,7 @@ on: | ||
| 33 | - '.github/workflows/linux.yaml' | 34 | - '.github/workflows/linux.yaml' |
| 34 | - '.github/scripts/test-kws.sh' | 35 | - '.github/scripts/test-kws.sh' |
| 35 | - '.github/scripts/test-offline-speech-denoiser.sh' | 36 | - '.github/scripts/test-offline-speech-denoiser.sh' |
| 37 | + - '.github/scripts/test-offline-source-separation.sh' | ||
| 36 | - '.github/scripts/test-online-transducer.sh' | 38 | - '.github/scripts/test-online-transducer.sh' |
| 37 | - '.github/scripts/test-online-paraformer.sh' | 39 | - '.github/scripts/test-online-paraformer.sh' |
| 38 | - '.github/scripts/test-offline-transducer.sh' | 40 | - '.github/scripts/test-offline-transducer.sh' |
| @@ -205,6 +207,20 @@ jobs: | @@ -205,6 +207,20 @@ jobs: | ||
| 205 | overwrite: true | 207 | overwrite: true |
| 206 | file: sherpa-onnx-*.tar.bz2 | 208 | file: sherpa-onnx-*.tar.bz2 |
| 207 | 209 | ||
| 210 | + - name: Test offline source separation | ||
| 211 | + shell: bash | ||
| 212 | + run: | | ||
| 213 | + du -h -d1 . | ||
| 214 | + export PATH=$PWD/build/bin:$PATH | ||
| 215 | + export EXE=sherpa-onnx-offline-source-separation | ||
| 216 | + | ||
| 217 | + .github/scripts/test-offline-source-separation.sh | ||
| 218 | + | ||
| 219 | + - uses: actions/upload-artifact@v4 | ||
| 220 | + with: | ||
| 221 | + name: source-separation-${{ matrix.build_type }}-with-shared-lib-${{ matrix.shared_lib }}-with-tts-${{ matrix.with_tts }} | ||
| 222 | + path: ./source-separation-wavs/*.wav | ||
| 223 | + | ||
| 208 | - name: Test offline CTC | 224 | - name: Test offline CTC |
| 209 | shell: bash | 225 | shell: bash |
| 210 | run: | | 226 | run: | |
| 1 | ### Supported functions | 1 | ### Supported functions |
| 2 | 2 | ||
| 3 | -|Speech recognition| Speech synthesis | | ||
| 4 | -|------------------|------------------| | ||
| 5 | -| ✔️ | ✔️ | | 3 | +|Speech recognition| Speech synthesis | Source separation | |
| 4 | +|------------------|------------------|-------------------| | ||
| 5 | +| ✔️ | ✔️ | ✔️ | | ||
| 6 | 6 | ||
| 7 | |Speaker identification| Speaker diarization | Speaker verification | | 7 | |Speaker identification| Speaker diarization | Speaker verification | |
| 8 | |----------------------|-------------------- |------------------------| | 8 | |----------------------|-------------------- |------------------------| |
| @@ -16,6 +16,7 @@ | @@ -16,6 +16,7 @@ | ||
| 16 | |------------------|-----------------|--------------------| | 16 | |------------------|-----------------|--------------------| |
| 17 | | ✔️ | ✔️ | ✔️ | | 17 | | ✔️ | ✔️ | ✔️ | |
| 18 | 18 | ||
| 19 | + | ||
| 19 | ### Supported platforms | 20 | ### Supported platforms |
| 20 | 21 | ||
| 21 | |Architecture| Android | iOS | Windows | macOS | linux | HarmonyOS | | 22 | |Architecture| Android | iOS | Windows | macOS | linux | HarmonyOS | |
| @@ -56,7 +57,9 @@ This repository supports running the following functions **locally** | @@ -56,7 +57,9 @@ This repository supports running the following functions **locally** | ||
| 56 | - Spoken language identification | 57 | - Spoken language identification |
| 57 | - Audio tagging | 58 | - Audio tagging |
| 58 | - VAD (e.g., [silero-vad][silero-vad]) | 59 | - VAD (e.g., [silero-vad][silero-vad]) |
| 60 | + - Speech enhancement (e.g., [gtcrn][gtcrn]) | ||
| 59 | - Keyword spotting | 61 | - Keyword spotting |
| 62 | + - Source separation (e.g., [spleeter][spleeter], [UVR][UVR]) | ||
| 60 | 63 | ||
| 61 | on the following platforms and operating systems: | 64 | on the following platforms and operating systems: |
| 62 | 65 | ||
| @@ -75,6 +78,7 @@ on the following platforms and operating systems: | @@ -75,6 +78,7 @@ on the following platforms and operating systems: | ||
| 75 | - [VisionFive 2][VisionFive 2] | 78 | - [VisionFive 2][VisionFive 2] |
| 76 | - [旭日X3派][旭日X3派] | 79 | - [旭日X3派][旭日X3派] |
| 77 | - [爱芯派][爱芯派] | 80 | - [爱芯派][爱芯派] |
| 81 | + - [RK3588][RK3588] | ||
| 78 | - etc | 82 | - etc |
| 79 | 83 | ||
| 80 | with the following APIs | 84 | with the following APIs |
| @@ -200,6 +204,7 @@ We also have spaces built using WebAssembly. They are listed below: | @@ -200,6 +204,7 @@ We also have spaces built using WebAssembly. They are listed below: | ||
| 200 | | Punctuation | [Address][punct-models] | | 204 | | Punctuation | [Address][punct-models] | |
| 201 | | Speaker segmentation | [Address][speaker-segmentation-models] | | 205 | | Speaker segmentation | [Address][speaker-segmentation-models] | |
| 202 | | Speech enhancement | [Address][speech-enhancement-models] | | 206 | | Speech enhancement | [Address][speech-enhancement-models] | |
| 207 | +| Source separation | [Address][source-separation-models] | | ||
| 203 | 208 | ||
| 204 | </details> | 209 | </details> |
| 205 | 210 | ||
| @@ -481,3 +486,8 @@ It uses sherpa-onnx for speech-to-text and text-to-speech. | @@ -481,3 +486,8 @@ It uses sherpa-onnx for speech-to-text and text-to-speech. | ||
| 481 | [NVIDIA Jetson Orin NX]: https://developer.download.nvidia.com/assets/embedded/secure/jetson/orin_nx/docs/Jetson_Orin_NX_DS-10712-001_v0.5.pdf?RCPGu9Q6OVAOv7a7vgtwc9-BLScXRIWq6cSLuditMALECJ_dOj27DgnqAPGVnT2VpiNpQan9SyFy-9zRykR58CokzbXwjSA7Gj819e91AXPrWkGZR3oS1VLxiDEpJa_Y0lr7UT-N4GnXtb8NlUkP4GkCkkF_FQivGPrAucCUywL481GH_WpP_p7ziHU1Wg==&t=eyJscyI6ImdzZW8iLCJsc2QiOiJodHRwczovL3d3dy5nb29nbGUuY29tLmhrLyJ9 | 486 | [NVIDIA Jetson Orin NX]: https://developer.download.nvidia.com/assets/embedded/secure/jetson/orin_nx/docs/Jetson_Orin_NX_DS-10712-001_v0.5.pdf?RCPGu9Q6OVAOv7a7vgtwc9-BLScXRIWq6cSLuditMALECJ_dOj27DgnqAPGVnT2VpiNpQan9SyFy-9zRykR58CokzbXwjSA7Gj819e91AXPrWkGZR3oS1VLxiDEpJa_Y0lr7UT-N4GnXtb8NlUkP4GkCkkF_FQivGPrAucCUywL481GH_WpP_p7ziHU1Wg==&t=eyJscyI6ImdzZW8iLCJsc2QiOiJodHRwczovL3d3dy5nb29nbGUuY29tLmhrLyJ9 |
| 482 | [NVIDIA Jetson Nano B01]: https://www.seeedstudio.com/blog/2020/01/16/new-revision-of-jetson-nano-dev-kit-now-supports-new-jetson-nano-module/ | 487 | [NVIDIA Jetson Nano B01]: https://www.seeedstudio.com/blog/2020/01/16/new-revision-of-jetson-nano-dev-kit-now-supports-new-jetson-nano-module/ |
| 483 | [speech-enhancement-models]: https://github.com/k2-fsa/sherpa-onnx/releases/tag/speech-enhancement-models | 488 | [speech-enhancement-models]: https://github.com/k2-fsa/sherpa-onnx/releases/tag/speech-enhancement-models |
| 489 | +[source-separation-models]: https://github.com/k2-fsa/sherpa-onnx/releases/tag/source-separation-models | ||
| 490 | +[RK3588]: https://www.rock-chips.com/uploads/pdf/2022.8.26/192/RK3588%20Brief%20Datasheet.pdf | ||
| 491 | +[spleeter]: https://github.com/deezer/spleeter | ||
| 492 | +[UVR]: https://github.com/Anjok07/ultimatevocalremovergui | ||
| 493 | +[gtcrn]: https://github.com/Xiaobin-Rong/gtcrn |
| @@ -136,7 +136,7 @@ int32_t main() { | @@ -136,7 +136,7 @@ int32_t main() { | ||
| 136 | fprintf(stderr, "Use sample rate %f for mic\n", mic_sample_rate); | 136 | fprintf(stderr, "Use sample rate %f for mic\n", mic_sample_rate); |
| 137 | mic_sample_rate = atof(sample_rate_str); | 137 | mic_sample_rate = atof(sample_rate_str); |
| 138 | } | 138 | } |
| 139 | - if(!mic.OpenDevice(device_index, mic_sample_rate, 1, RecordCallback, | 139 | + if (!mic.OpenDevice(device_index, mic_sample_rate, 1, RecordCallback, |
| 140 | nullptr) == false) { | 140 | nullptr) == false) { |
| 141 | std::cerr << "Failed to open microphone device\n"; | 141 | std::cerr << "Failed to open microphone device\n"; |
| 142 | return -1; | 142 | return -1; |
| @@ -24,7 +24,7 @@ | @@ -24,7 +24,7 @@ | ||
| 24 | #include <iostream> | 24 | #include <iostream> |
| 25 | #include <mutex> // NOLINT | 25 | #include <mutex> // NOLINT |
| 26 | #include <queue> | 26 | #include <queue> |
| 27 | -#include <thread> | 27 | +#include <thread> // NOLINT |
| 28 | #include <vector> | 28 | #include <vector> |
| 29 | 29 | ||
| 30 | #include "portaudio.h" // NOLINT | 30 | #include "portaudio.h" // NOLINT |
| 1 | +// cxx-api-examples/sherpa-display.cc | ||
| 2 | +// Copyright (c) 2025 Xiaomi Corporation | ||
| 1 | #pragma once | 3 | #pragma once |
| 2 | 4 | ||
| 3 | #include <stdlib.h> | 5 | #include <stdlib.h> |
| @@ -6,6 +8,8 @@ | @@ -6,6 +8,8 @@ | ||
| 6 | #include <iomanip> | 8 | #include <iomanip> |
| 7 | #include <sstream> | 9 | #include <sstream> |
| 8 | #include <string> | 10 | #include <string> |
| 11 | +#include <utility> | ||
| 12 | +#include <vector> | ||
| 9 | 13 | ||
| 10 | namespace sherpa_onnx::cxx { | 14 | namespace sherpa_onnx::cxx { |
| 11 | 15 |
| @@ -159,14 +159,15 @@ static sherpa_onnx::OnlineRecognizerConfig GetOnlineRecognizerConfig( | @@ -159,14 +159,15 @@ static sherpa_onnx::OnlineRecognizerConfig GetOnlineRecognizerConfig( | ||
| 159 | recognizer_config.hr.rule_fsts = SHERPA_ONNX_OR(config->hr.rule_fsts, ""); | 159 | recognizer_config.hr.rule_fsts = SHERPA_ONNX_OR(config->hr.rule_fsts, ""); |
| 160 | 160 | ||
| 161 | if (config->model_config.debug) { | 161 | if (config->model_config.debug) { |
| 162 | +#if __OHOS__ | ||
| 162 | auto str_vec = sherpa_onnx::SplitString(recognizer_config.ToString(), 128); | 163 | auto str_vec = sherpa_onnx::SplitString(recognizer_config.ToString(), 128); |
| 163 | for (const auto &s : str_vec) { | 164 | for (const auto &s : str_vec) { |
| 164 | -#if __OHOS__ | ||
| 165 | SHERPA_ONNX_LOGE("%{public}s\n", s.c_str()); | 165 | SHERPA_ONNX_LOGE("%{public}s\n", s.c_str()); |
| 166 | -#else | ||
| 167 | SHERPA_ONNX_LOGE("%s\n", s.c_str()); | 166 | SHERPA_ONNX_LOGE("%s\n", s.c_str()); |
| 168 | -#endif | ||
| 169 | } | 167 | } |
| 168 | +#else | ||
| 169 | + SHERPA_ONNX_LOGE("%s", recognizer_config.ToString().c_str()); | ||
| 170 | +#endif | ||
| 170 | } | 171 | } |
| 171 | 172 | ||
| 172 | return recognizer_config; | 173 | return recognizer_config; |
| @@ -507,14 +508,15 @@ static sherpa_onnx::OfflineRecognizerConfig GetOfflineRecognizerConfig( | @@ -507,14 +508,15 @@ static sherpa_onnx::OfflineRecognizerConfig GetOfflineRecognizerConfig( | ||
| 507 | recognizer_config.hr.rule_fsts = SHERPA_ONNX_OR(config->hr.rule_fsts, ""); | 508 | recognizer_config.hr.rule_fsts = SHERPA_ONNX_OR(config->hr.rule_fsts, ""); |
| 508 | 509 | ||
| 509 | if (config->model_config.debug) { | 510 | if (config->model_config.debug) { |
| 511 | +#if __OHOS__ | ||
| 510 | auto str_vec = sherpa_onnx::SplitString(recognizer_config.ToString(), 128); | 512 | auto str_vec = sherpa_onnx::SplitString(recognizer_config.ToString(), 128); |
| 511 | for (const auto &s : str_vec) { | 513 | for (const auto &s : str_vec) { |
| 512 | -#if __OHOS__ | ||
| 513 | SHERPA_ONNX_LOGE("%{public}s\n", s.c_str()); | 514 | SHERPA_ONNX_LOGE("%{public}s\n", s.c_str()); |
| 514 | -#else | ||
| 515 | SHERPA_ONNX_LOGE("%s\n", s.c_str()); | 515 | SHERPA_ONNX_LOGE("%s\n", s.c_str()); |
| 516 | -#endif | ||
| 517 | } | 516 | } |
| 517 | +#else | ||
| 518 | + SHERPA_ONNX_LOGE("%s", recognizer_config.ToString().c_str()); | ||
| 519 | +#endif | ||
| 518 | } | 520 | } |
| 519 | 521 | ||
| 520 | return recognizer_config; | 522 | return recognizer_config; |
| @@ -55,6 +55,8 @@ set(sources | @@ -55,6 +55,8 @@ set(sources | ||
| 55 | offline-source-separation-model-config.cc | 55 | offline-source-separation-model-config.cc |
| 56 | offline-source-separation-spleeter-model-config.cc | 56 | offline-source-separation-spleeter-model-config.cc |
| 57 | offline-source-separation-spleeter-model.cc | 57 | offline-source-separation-spleeter-model.cc |
| 58 | + offline-source-separation-uvr-model-config.cc | ||
| 59 | + offline-source-separation-uvr-model.cc | ||
| 58 | offline-source-separation.cc | 60 | offline-source-separation.cc |
| 59 | 61 | ||
| 60 | offline-stream.cc | 62 | offline-stream.cc |
| @@ -25,9 +25,7 @@ Microphone::~Microphone() { | @@ -25,9 +25,7 @@ Microphone::~Microphone() { | ||
| 25 | } | 25 | } |
| 26 | } | 26 | } |
| 27 | 27 | ||
| 28 | -int Microphone::GetDeviceCount() const { | ||
| 29 | - return Pa_GetDeviceCount(); | ||
| 30 | -} | 28 | +int Microphone::GetDeviceCount() const { return Pa_GetDeviceCount(); } |
| 31 | 29 | ||
| 32 | int Microphone::GetDefaultInputDevice() const { | 30 | int Microphone::GetDefaultInputDevice() const { |
| 33 | return Pa_GetDefaultInputDevice(); | 31 | return Pa_GetDefaultInputDevice(); |
| @@ -43,7 +41,8 @@ void Microphone::PrintDevices(int device_index) const { | @@ -43,7 +41,8 @@ void Microphone::PrintDevices(int device_index) const { | ||
| 43 | } | 41 | } |
| 44 | } | 42 | } |
| 45 | 43 | ||
| 46 | -bool Microphone::OpenDevice(int index, int sample_rate, int channel, PaStreamCallback cb, void* userdata) { | 44 | +bool Microphone::OpenDevice(int index, int sample_rate, int channel, |
| 45 | + PaStreamCallback cb, void *userdata) { | ||
| 47 | if (index < 0 || index >= Pa_GetDeviceCount()) { | 46 | if (index < 0 || index >= Pa_GetDeviceCount()) { |
| 48 | fprintf(stderr, "Invalid device index: %d\n", index); | 47 | fprintf(stderr, "Invalid device index: %d\n", index); |
| 49 | return false; | 48 | return false; |
| @@ -68,7 +67,8 @@ bool Microphone::OpenDevice(int index, int sample_rate, int channel, PaStreamCal | @@ -68,7 +67,8 @@ bool Microphone::OpenDevice(int index, int sample_rate, int channel, PaStreamCal | ||
| 68 | param.suggestedLatency = info->defaultLowInputLatency; | 67 | param.suggestedLatency = info->defaultLowInputLatency; |
| 69 | param.hostApiSpecificStreamInfo = nullptr; | 68 | param.hostApiSpecificStreamInfo = nullptr; |
| 70 | 69 | ||
| 71 | - PaError err = Pa_OpenStream(&stream, ¶m, nullptr, /* &outputParameters, */ | 70 | + PaError err = |
| 71 | + Pa_OpenStream(&stream, ¶m, nullptr, /* &outputParameters, */ | ||
| 72 | sample_rate, | 72 | sample_rate, |
| 73 | 0, // frames per buffer | 73 | 0, // frames per buffer |
| 74 | paClipOff, // we won't output out of range samples | 74 | paClipOff, // we won't output out of range samples |
| @@ -4,22 +4,27 @@ | @@ -4,22 +4,27 @@ | ||
| 4 | 4 | ||
| 5 | #ifndef SHERPA_ONNX_CSRC_MICROPHONE_H_ | 5 | #ifndef SHERPA_ONNX_CSRC_MICROPHONE_H_ |
| 6 | #define SHERPA_ONNX_CSRC_MICROPHONE_H_ | 6 | #define SHERPA_ONNX_CSRC_MICROPHONE_H_ |
| 7 | -#include "portaudio.h" // NOLINT | 7 | +#include <cstdint> |
| 8 | 8 | ||
| 9 | +#include "portaudio.h" // NOLINT | ||
| 9 | namespace sherpa_onnx { | 10 | namespace sherpa_onnx { |
| 10 | 11 | ||
| 11 | class Microphone { | 12 | class Microphone { |
| 12 | - PaStream *stream = nullptr; | ||
| 13 | public: | 13 | public: |
| 14 | Microphone(); | 14 | Microphone(); |
| 15 | ~Microphone(); | 15 | ~Microphone(); |
| 16 | 16 | ||
| 17 | - int GetDeviceCount() const; | ||
| 18 | - int GetDefaultInputDevice() const; | ||
| 19 | - void PrintDevices(int sel) const; | 17 | + int32_t GetDeviceCount() const; |
| 18 | + int32_t GetDefaultInputDevice() const; | ||
| 19 | + void PrintDevices(int32_t sel) const; | ||
| 20 | + | ||
| 21 | + bool OpenDevice(int32_t index, int32_t sample_rate, int32_t channel, | ||
| 22 | + PaStreamCallback cb, void *userdata); | ||
| 20 | 23 | ||
| 21 | - bool OpenDevice(int index, int sample_rate, int channel, PaStreamCallback cb, void* userdata); | ||
| 22 | void CloseDevice(); | 24 | void CloseDevice(); |
| 25 | + | ||
| 26 | + private: | ||
| 27 | + PaStream *stream = nullptr; | ||
| 23 | }; | 28 | }; |
| 24 | 29 | ||
| 25 | } // namespace sherpa_onnx | 30 | } // namespace sherpa_onnx |
| @@ -4,7 +4,9 @@ | @@ -4,7 +4,9 @@ | ||
| 4 | 4 | ||
| 5 | #include "sherpa-onnx/csrc/offline-source-separation-impl.h" | 5 | #include "sherpa-onnx/csrc/offline-source-separation-impl.h" |
| 6 | 6 | ||
| 7 | +#include <algorithm> | ||
| 7 | #include <memory> | 8 | #include <memory> |
| 9 | +#include <utility> | ||
| 8 | 10 | ||
| 9 | #if __ANDROID_API__ >= 9 | 11 | #if __ANDROID_API__ >= 9 |
| 10 | #include "android/asset_manager.h" | 12 | #include "android/asset_manager.h" |
| @@ -16,22 +18,93 @@ | @@ -16,22 +18,93 @@ | ||
| 16 | #endif | 18 | #endif |
| 17 | 19 | ||
| 18 | #include "sherpa-onnx/csrc/offline-source-separation-spleeter-impl.h" | 20 | #include "sherpa-onnx/csrc/offline-source-separation-spleeter-impl.h" |
| 21 | +#include "sherpa-onnx/csrc/offline-source-separation-uvr-impl.h" | ||
| 22 | +#include "sherpa-onnx/csrc/resample.h" | ||
| 19 | 23 | ||
| 20 | namespace sherpa_onnx { | 24 | namespace sherpa_onnx { |
| 21 | 25 | ||
| 22 | std::unique_ptr<OfflineSourceSeparationImpl> | 26 | std::unique_ptr<OfflineSourceSeparationImpl> |
| 23 | OfflineSourceSeparationImpl::Create( | 27 | OfflineSourceSeparationImpl::Create( |
| 24 | const OfflineSourceSeparationConfig &config) { | 28 | const OfflineSourceSeparationConfig &config) { |
| 25 | - // TODO(fangjun): Support other models | 29 | + if (!config.model.spleeter.vocals.empty()) { |
| 26 | return std::make_unique<OfflineSourceSeparationSpleeterImpl>(config); | 30 | return std::make_unique<OfflineSourceSeparationSpleeterImpl>(config); |
| 31 | + } | ||
| 32 | + | ||
| 33 | + if (!config.model.uvr.model.empty()) { | ||
| 34 | + return std::make_unique<OfflineSourceSeparationUvrImpl>(config); | ||
| 35 | + } | ||
| 36 | + | ||
| 37 | + SHERPA_ONNX_LOGE("Please provide a separation model!"); | ||
| 38 | + | ||
| 39 | + return nullptr; | ||
| 27 | } | 40 | } |
| 28 | 41 | ||
| 29 | template <typename Manager> | 42 | template <typename Manager> |
| 30 | std::unique_ptr<OfflineSourceSeparationImpl> | 43 | std::unique_ptr<OfflineSourceSeparationImpl> |
| 31 | OfflineSourceSeparationImpl::Create( | 44 | OfflineSourceSeparationImpl::Create( |
| 32 | Manager *mgr, const OfflineSourceSeparationConfig &config) { | 45 | Manager *mgr, const OfflineSourceSeparationConfig &config) { |
| 33 | - // TODO(fangjun): Support other models | 46 | + if (!config.model.spleeter.vocals.empty()) { |
| 34 | return std::make_unique<OfflineSourceSeparationSpleeterImpl>(mgr, config); | 47 | return std::make_unique<OfflineSourceSeparationSpleeterImpl>(mgr, config); |
| 48 | + } | ||
| 49 | + | ||
| 50 | + if (!config.model.uvr.model.empty()) { | ||
| 51 | + return std::make_unique<OfflineSourceSeparationUvrImpl>(mgr, config); | ||
| 52 | + } | ||
| 53 | + | ||
| 54 | + SHERPA_ONNX_LOGE("Please provide a separation model!"); | ||
| 55 | + | ||
| 56 | + return nullptr; | ||
| 57 | +} | ||
| 58 | + | ||
| 59 | +OfflineSourceSeparationInput OfflineSourceSeparationImpl::Resample( | ||
| 60 | + const OfflineSourceSeparationInput &input, bool debug /*= false*/) const { | ||
| 61 | + const OfflineSourceSeparationInput *p_input = &input; | ||
| 62 | + OfflineSourceSeparationInput tmp_input; | ||
| 63 | + | ||
| 64 | + int32_t output_sample_rate = GetOutputSampleRate(); | ||
| 65 | + | ||
| 66 | + if (input.sample_rate != output_sample_rate) { | ||
| 67 | + SHERPA_ONNX_LOGE( | ||
| 68 | + "Creating a resampler:\n" | ||
| 69 | + " in_sample_rate: %d\n" | ||
| 70 | + " output_sample_rate: %d\n", | ||
| 71 | + input.sample_rate, output_sample_rate); | ||
| 72 | + | ||
| 73 | + float min_freq = std::min<int32_t>(input.sample_rate, output_sample_rate); | ||
| 74 | + float lowpass_cutoff = 0.99 * 0.5 * min_freq; | ||
| 75 | + | ||
| 76 | + int32_t lowpass_filter_width = 6; | ||
| 77 | + auto resampler = | ||
| 78 | + std::make_unique<LinearResample>(input.sample_rate, output_sample_rate, | ||
| 79 | + lowpass_cutoff, lowpass_filter_width); | ||
| 80 | + | ||
| 81 | + std::vector<float> s; | ||
| 82 | + for (const auto &samples : input.samples.data) { | ||
| 83 | + resampler->Reset(); | ||
| 84 | + resampler->Resample(samples.data(), samples.size(), true, &s); | ||
| 85 | + tmp_input.samples.data.push_back(std::move(s)); | ||
| 86 | + } | ||
| 87 | + | ||
| 88 | + tmp_input.sample_rate = output_sample_rate; | ||
| 89 | + p_input = &tmp_input; | ||
| 90 | + } | ||
| 91 | + | ||
| 92 | + if (p_input->samples.data.size() > 1) { | ||
| 93 | + if (debug) { | ||
| 94 | + SHERPA_ONNX_LOGE("input ch1 samples size: %d", | ||
| 95 | + static_cast<int32_t>(p_input->samples.data[1].size())); | ||
| 96 | + } | ||
| 97 | + | ||
| 98 | + if (p_input->samples.data[0].size() != p_input->samples.data[1].size()) { | ||
| 99 | + SHERPA_ONNX_LOGE("ch0 samples size %d vs ch1 samples size %d", | ||
| 100 | + static_cast<int32_t>(p_input->samples.data[0].size()), | ||
| 101 | + static_cast<int32_t>(p_input->samples.data[1].size())); | ||
| 102 | + | ||
| 103 | + SHERPA_ONNX_EXIT(-1); | ||
| 104 | + } | ||
| 105 | + } | ||
| 106 | + | ||
| 107 | + return *p_input; | ||
| 35 | } | 108 | } |
| 36 | 109 | ||
| 37 | #if __ANDROID_API__ >= 9 | 110 | #if __ANDROID_API__ >= 9 |
| @@ -5,6 +5,7 @@ | @@ -5,6 +5,7 @@ | ||
| 5 | #ifndef SHERPA_ONNX_CSRC_OFFLINE_SOURCE_SEPARATION_IMPL_H_ | 5 | #ifndef SHERPA_ONNX_CSRC_OFFLINE_SOURCE_SEPARATION_IMPL_H_ |
| 6 | #define SHERPA_ONNX_CSRC_OFFLINE_SOURCE_SEPARATION_IMPL_H_ | 6 | #define SHERPA_ONNX_CSRC_OFFLINE_SOURCE_SEPARATION_IMPL_H_ |
| 7 | 7 | ||
| 8 | +#include <memory> | ||
| 8 | #include <vector> | 9 | #include <vector> |
| 9 | 10 | ||
| 10 | #include "sherpa-onnx/csrc/offline-source-separation.h" | 11 | #include "sherpa-onnx/csrc/offline-source-separation.h" |
| @@ -28,6 +29,9 @@ class OfflineSourceSeparationImpl { | @@ -28,6 +29,9 @@ class OfflineSourceSeparationImpl { | ||
| 28 | virtual int32_t GetOutputSampleRate() const = 0; | 29 | virtual int32_t GetOutputSampleRate() const = 0; |
| 29 | 30 | ||
| 30 | virtual int32_t GetNumberOfStems() const = 0; | 31 | virtual int32_t GetNumberOfStems() const = 0; |
| 32 | + | ||
| 33 | + OfflineSourceSeparationInput Resample( | ||
| 34 | + const OfflineSourceSeparationInput &input, bool debug = false) const; | ||
| 31 | }; | 35 | }; |
| 32 | 36 | ||
| 33 | } // namespace sherpa_onnx | 37 | } // namespace sherpa_onnx |
| @@ -4,10 +4,13 @@ | @@ -4,10 +4,13 @@ | ||
| 4 | 4 | ||
| 5 | #include "sherpa-onnx/csrc/offline-source-separation-model-config.h" | 5 | #include "sherpa-onnx/csrc/offline-source-separation-model-config.h" |
| 6 | 6 | ||
| 7 | +#include "sherpa-onnx/csrc/macros.h" | ||
| 8 | + | ||
| 7 | namespace sherpa_onnx { | 9 | namespace sherpa_onnx { |
| 8 | 10 | ||
| 9 | void OfflineSourceSeparationModelConfig::Register(ParseOptions *po) { | 11 | void OfflineSourceSeparationModelConfig::Register(ParseOptions *po) { |
| 10 | spleeter.Register(po); | 12 | spleeter.Register(po); |
| 13 | + uvr.Register(po); | ||
| 11 | 14 | ||
| 12 | po->Register("num-threads", &num_threads, | 15 | po->Register("num-threads", &num_threads, |
| 13 | "Number of threads to run the neural network"); | 16 | "Number of threads to run the neural network"); |
| @@ -20,7 +23,17 @@ void OfflineSourceSeparationModelConfig::Register(ParseOptions *po) { | @@ -20,7 +23,17 @@ void OfflineSourceSeparationModelConfig::Register(ParseOptions *po) { | ||
| 20 | } | 23 | } |
| 21 | 24 | ||
| 22 | bool OfflineSourceSeparationModelConfig::Validate() const { | 25 | bool OfflineSourceSeparationModelConfig::Validate() const { |
| 26 | + if (!spleeter.vocals.empty()) { | ||
| 23 | return spleeter.Validate(); | 27 | return spleeter.Validate(); |
| 28 | + } | ||
| 29 | + | ||
| 30 | + if (!uvr.model.empty()) { | ||
| 31 | + return uvr.Validate(); | ||
| 32 | + } | ||
| 33 | + | ||
| 34 | + SHERPA_ONNX_LOGE("Please specify a source separation model"); | ||
| 35 | + | ||
| 36 | + return false; | ||
| 24 | } | 37 | } |
| 25 | 38 | ||
| 26 | std::string OfflineSourceSeparationModelConfig::ToString() const { | 39 | std::string OfflineSourceSeparationModelConfig::ToString() const { |
| @@ -28,6 +41,7 @@ std::string OfflineSourceSeparationModelConfig::ToString() const { | @@ -28,6 +41,7 @@ std::string OfflineSourceSeparationModelConfig::ToString() const { | ||
| 28 | 41 | ||
| 29 | os << "OfflineSourceSeparationModelConfig("; | 42 | os << "OfflineSourceSeparationModelConfig("; |
| 30 | os << "spleeter=" << spleeter.ToString() << ", "; | 43 | os << "spleeter=" << spleeter.ToString() << ", "; |
| 44 | + os << "uvr=" << uvr.ToString() << ", "; | ||
| 31 | os << "num_threads=" << num_threads << ", "; | 45 | os << "num_threads=" << num_threads << ", "; |
| 32 | os << "debug=" << (debug ? "True" : "False") << ", "; | 46 | os << "debug=" << (debug ? "True" : "False") << ", "; |
| 33 | os << "provider=\"" << provider << "\")"; | 47 | os << "provider=\"" << provider << "\")"; |
| @@ -8,12 +8,14 @@ | @@ -8,12 +8,14 @@ | ||
| 8 | #include <string> | 8 | #include <string> |
| 9 | 9 | ||
| 10 | #include "sherpa-onnx/csrc/offline-source-separation-spleeter-model-config.h" | 10 | #include "sherpa-onnx/csrc/offline-source-separation-spleeter-model-config.h" |
| 11 | +#include "sherpa-onnx/csrc/offline-source-separation-uvr-model-config.h" | ||
| 11 | #include "sherpa-onnx/csrc/parse-options.h" | 12 | #include "sherpa-onnx/csrc/parse-options.h" |
| 12 | 13 | ||
| 13 | namespace sherpa_onnx { | 14 | namespace sherpa_onnx { |
| 14 | 15 | ||
| 15 | struct OfflineSourceSeparationModelConfig { | 16 | struct OfflineSourceSeparationModelConfig { |
| 16 | OfflineSourceSeparationSpleeterModelConfig spleeter; | 17 | OfflineSourceSeparationSpleeterModelConfig spleeter; |
| 18 | + OfflineSourceSeparationUvrModelConfig uvr; | ||
| 17 | 19 | ||
| 18 | int32_t num_threads = 1; | 20 | int32_t num_threads = 1; |
| 19 | bool debug = false; | 21 | bool debug = false; |
| @@ -23,8 +25,10 @@ struct OfflineSourceSeparationModelConfig { | @@ -23,8 +25,10 @@ struct OfflineSourceSeparationModelConfig { | ||
| 23 | 25 | ||
| 24 | OfflineSourceSeparationModelConfig( | 26 | OfflineSourceSeparationModelConfig( |
| 25 | const OfflineSourceSeparationSpleeterModelConfig &spleeter, | 27 | const OfflineSourceSeparationSpleeterModelConfig &spleeter, |
| 26 | - int32_t num_threads, bool debug, const std::string &provider) | 28 | + const OfflineSourceSeparationUvrModelConfig &uvr, int32_t num_threads, |
| 29 | + bool debug, const std::string &provider) | ||
| 27 | : spleeter(spleeter), | 30 | : spleeter(spleeter), |
| 31 | + uvr(uvr), | ||
| 28 | num_threads(num_threads), | 32 | num_threads(num_threads), |
| 29 | debug(debug), | 33 | debug(debug), |
| 30 | provider(provider) {} | 34 | provider(provider) {} |
| @@ -5,6 +5,10 @@ | @@ -5,6 +5,10 @@ | ||
| 5 | #ifndef SHERPA_ONNX_CSRC_OFFLINE_SOURCE_SEPARATION_SPLEETER_IMPL_H_ | 5 | #ifndef SHERPA_ONNX_CSRC_OFFLINE_SOURCE_SEPARATION_SPLEETER_IMPL_H_ |
| 6 | #define SHERPA_ONNX_CSRC_OFFLINE_SOURCE_SEPARATION_SPLEETER_IMPL_H_ | 6 | #define SHERPA_ONNX_CSRC_OFFLINE_SOURCE_SEPARATION_SPLEETER_IMPL_H_ |
| 7 | 7 | ||
| 8 | +#include <algorithm> | ||
| 9 | +#include <utility> | ||
| 10 | +#include <vector> | ||
| 11 | + | ||
| 8 | #include "Eigen/Dense" | 12 | #include "Eigen/Dense" |
| 9 | #include "kaldi-native-fbank/csrc/istft.h" | 13 | #include "kaldi-native-fbank/csrc/istft.h" |
| 10 | #include "kaldi-native-fbank/csrc/stft.h" | 14 | #include "kaldi-native-fbank/csrc/stft.h" |
| @@ -12,13 +16,12 @@ | @@ -12,13 +16,12 @@ | ||
| 12 | #include "sherpa-onnx/csrc/offline-source-separation-spleeter-model.h" | 16 | #include "sherpa-onnx/csrc/offline-source-separation-spleeter-model.h" |
| 13 | #include "sherpa-onnx/csrc/offline-source-separation.h" | 17 | #include "sherpa-onnx/csrc/offline-source-separation.h" |
| 14 | #include "sherpa-onnx/csrc/onnx-utils.h" | 18 | #include "sherpa-onnx/csrc/onnx-utils.h" |
| 15 | -#include "sherpa-onnx/csrc/resample.h" | ||
| 16 | 19 | ||
| 17 | namespace sherpa_onnx { | 20 | namespace sherpa_onnx { |
| 18 | 21 | ||
| 19 | class OfflineSourceSeparationSpleeterImpl : public OfflineSourceSeparationImpl { | 22 | class OfflineSourceSeparationSpleeterImpl : public OfflineSourceSeparationImpl { |
| 20 | public: | 23 | public: |
| 21 | - OfflineSourceSeparationSpleeterImpl( | 24 | + explicit OfflineSourceSeparationSpleeterImpl( |
| 22 | const OfflineSourceSeparationConfig &config) | 25 | const OfflineSourceSeparationConfig &config) |
| 23 | : config_(config), model_(config_.model) {} | 26 | : config_(config), model_(config_.model) {} |
| 24 | 27 | ||
| @@ -28,56 +31,12 @@ class OfflineSourceSeparationSpleeterImpl : public OfflineSourceSeparationImpl { | @@ -28,56 +31,12 @@ class OfflineSourceSeparationSpleeterImpl : public OfflineSourceSeparationImpl { | ||
| 28 | : config_(config), model_(mgr, config_.model) {} | 31 | : config_(config), model_(mgr, config_.model) {} |
| 29 | 32 | ||
| 30 | OfflineSourceSeparationOutput Process( | 33 | OfflineSourceSeparationOutput Process( |
| 31 | - const OfflineSourceSeparationInput &input) const override { | ||
| 32 | - const OfflineSourceSeparationInput *p_input = &input; | ||
| 33 | - OfflineSourceSeparationInput tmp_input; | ||
| 34 | - | ||
| 35 | - int32_t output_sample_rate = GetOutputSampleRate(); | ||
| 36 | - | ||
| 37 | - if (input.sample_rate != output_sample_rate) { | ||
| 38 | - SHERPA_ONNX_LOGE( | ||
| 39 | - "Creating a resampler:\n" | ||
| 40 | - " in_sample_rate: %d\n" | ||
| 41 | - " output_sample_rate: %d\n", | ||
| 42 | - input.sample_rate, output_sample_rate); | ||
| 43 | - | ||
| 44 | - float min_freq = std::min<int32_t>(input.sample_rate, output_sample_rate); | ||
| 45 | - float lowpass_cutoff = 0.99 * 0.5 * min_freq; | ||
| 46 | - | ||
| 47 | - int32_t lowpass_filter_width = 6; | ||
| 48 | - auto resampler = std::make_unique<LinearResample>( | ||
| 49 | - input.sample_rate, output_sample_rate, lowpass_cutoff, | ||
| 50 | - lowpass_filter_width); | ||
| 51 | - | ||
| 52 | - std::vector<float> s; | ||
| 53 | - for (const auto &samples : input.samples.data) { | ||
| 54 | - resampler->Reset(); | ||
| 55 | - resampler->Resample(samples.data(), samples.size(), true, &s); | ||
| 56 | - tmp_input.samples.data.push_back(std::move(s)); | ||
| 57 | - } | ||
| 58 | - | ||
| 59 | - tmp_input.sample_rate = output_sample_rate; | ||
| 60 | - p_input = &tmp_input; | ||
| 61 | - } | ||
| 62 | - | ||
| 63 | - if (p_input->samples.data.size() > 1) { | ||
| 64 | - if (config_.model.debug) { | ||
| 65 | - SHERPA_ONNX_LOGE("input ch1 samples size: %d", | ||
| 66 | - static_cast<int32_t>(p_input->samples.data[1].size())); | ||
| 67 | - } | ||
| 68 | - | ||
| 69 | - if (p_input->samples.data[0].size() != p_input->samples.data[1].size()) { | ||
| 70 | - SHERPA_ONNX_LOGE("ch0 samples size %d vs ch1 samples size %d", | ||
| 71 | - static_cast<int32_t>(p_input->samples.data[0].size()), | ||
| 72 | - static_cast<int32_t>(p_input->samples.data[1].size())); | ||
| 73 | - | ||
| 74 | - SHERPA_ONNX_EXIT(-1); | ||
| 75 | - } | ||
| 76 | - } | 34 | + const OfflineSourceSeparationInput &_input) const override { |
| 35 | + auto input = Resample(_input, config_.model.debug); | ||
| 77 | 36 | ||
| 78 | - auto stft_ch0 = ComputeStft(*p_input, 0); | 37 | + auto stft_ch0 = ComputeStft(input, 0); |
| 79 | 38 | ||
| 80 | - auto stft_ch1 = ComputeStft(*p_input, 1); | 39 | + auto stft_ch1 = ComputeStft(input, 1); |
| 81 | knf::StftResult *p_stft_ch1 = stft_ch1.real.empty() ? &stft_ch0 : &stft_ch1; | 40 | knf::StftResult *p_stft_ch1 = stft_ch1.real.empty() ? &stft_ch0 : &stft_ch1; |
| 82 | 41 | ||
| 83 | int32_t num_frames = stft_ch0.num_frames; | 42 | int32_t num_frames = stft_ch0.num_frames; |
| @@ -261,7 +220,6 @@ class OfflineSourceSeparationSpleeterImpl : public OfflineSourceSeparationImpl { | @@ -261,7 +220,6 @@ class OfflineSourceSeparationSpleeterImpl : public OfflineSourceSeparationImpl { | ||
| 261 | stft_config.win_length = meta.window_length; | 220 | stft_config.win_length = meta.window_length; |
| 262 | stft_config.window_type = meta.window_type; | 221 | stft_config.window_type = meta.window_type; |
| 263 | stft_config.center = meta.center; | 222 | stft_config.center = meta.center; |
| 264 | - stft_config.center = false; | ||
| 265 | 223 | ||
| 266 | return stft_config; | 224 | return stft_config; |
| 267 | } | 225 | } |
| 1 | // sherpa-onnx/csrc/offline-source-separation-spleeter-model-meta-data.h | 1 | // sherpa-onnx/csrc/offline-source-separation-spleeter-model-meta-data.h |
| 2 | // | 2 | // |
| 3 | -// Copyright (c) 2024 Xiaomi Corporation | 3 | +// Copyright (c) 2025 Xiaomi Corporation |
| 4 | #ifndef SHERPA_ONNX_CSRC_OFFLINE_SOURCE_SEPARATION_SPLEETER_MODEL_META_DATA_H_ | 4 | #ifndef SHERPA_ONNX_CSRC_OFFLINE_SOURCE_SEPARATION_SPLEETER_MODEL_META_DATA_H_ |
| 5 | #define SHERPA_ONNX_CSRC_OFFLINE_SOURCE_SEPARATION_SPLEETER_MODEL_META_DATA_H_ | 5 | #define SHERPA_ONNX_CSRC_OFFLINE_SOURCE_SEPARATION_SPLEETER_MODEL_META_DATA_H_ |
| 6 | 6 |
| 1 | +// sherpa-onnx/csrc/offline-source-separation-uvr-impl.h | ||
| 2 | +// | ||
| 3 | +// Copyright (c) 2025 Xiaomi Corporation | ||
| 4 | + | ||
| 5 | +#ifndef SHERPA_ONNX_CSRC_OFFLINE_SOURCE_SEPARATION_UVR_IMPL_H_ | ||
| 6 | +#define SHERPA_ONNX_CSRC_OFFLINE_SOURCE_SEPARATION_UVR_IMPL_H_ | ||
| 7 | + | ||
| 8 | +#include <algorithm> | ||
| 9 | +#include <utility> | ||
| 10 | +#include <vector> | ||
| 11 | + | ||
| 12 | +#include "Eigen/Dense" | ||
| 13 | +#include "kaldi-native-fbank/csrc/istft.h" | ||
| 14 | +#include "kaldi-native-fbank/csrc/stft.h" | ||
| 15 | +#include "sherpa-onnx/csrc/macros.h" | ||
| 16 | +#include "sherpa-onnx/csrc/offline-source-separation-uvr-model.h" | ||
| 17 | +#include "sherpa-onnx/csrc/offline-source-separation.h" | ||
| 18 | +#include "sherpa-onnx/csrc/onnx-utils.h" | ||
| 19 | +#include "sherpa-onnx/csrc/resample.h" | ||
| 20 | + | ||
| 21 | +namespace sherpa_onnx { | ||
| 22 | + | ||
| 23 | +class OfflineSourceSeparationUvrImpl : public OfflineSourceSeparationImpl { | ||
| 24 | + public: | ||
| 25 | + explicit OfflineSourceSeparationUvrImpl( | ||
| 26 | + const OfflineSourceSeparationConfig &config) | ||
| 27 | + : config_(config), model_(config_.model) {} | ||
| 28 | + | ||
| 29 | + template <typename Manager> | ||
| 30 | + OfflineSourceSeparationUvrImpl(Manager *mgr, | ||
| 31 | + const OfflineSourceSeparationConfig &config) | ||
| 32 | + : config_(config), model_(mgr, config_.model) {} | ||
| 33 | + | ||
| 34 | + OfflineSourceSeparationOutput Process( | ||
| 35 | + const OfflineSourceSeparationInput &_input) const override { | ||
| 36 | + auto input = Resample(_input, config_.model.debug); | ||
| 37 | + | ||
| 38 | + auto chunks_ch0 = SplitIntoChunks(input.samples.data[0]); | ||
| 39 | + | ||
| 40 | + std::vector<std::vector<float>> chunks_ch1; | ||
| 41 | + if (input.samples.data.size() > 1) { | ||
| 42 | + chunks_ch1 = SplitIntoChunks(input.samples.data[1]); | ||
| 43 | + } | ||
| 44 | + | ||
| 45 | + std::vector<float> samples_ch0; | ||
| 46 | + std::vector<float> samples_ch1; | ||
| 47 | + | ||
| 48 | + for (int32_t i = 0; i != static_cast<int32_t>(chunks_ch0.size()); ++i) { | ||
| 49 | + bool is_first_chunk = (i == 0); | ||
| 50 | + bool is_last_chunk = (i == static_cast<int32_t>(chunks_ch0.size()) - 1); | ||
| 51 | + | ||
| 52 | + auto s = ProcessChunk( | ||
| 53 | + chunks_ch0[i], | ||
| 54 | + chunks_ch1.empty() ? std::vector<float>{} : chunks_ch1[i], | ||
| 55 | + is_first_chunk, is_last_chunk); | ||
| 56 | + | ||
| 57 | + samples_ch0.insert(samples_ch0.end(), s.first.begin(), s.first.end()); | ||
| 58 | + samples_ch1.insert(samples_ch1.end(), s.second.begin(), s.second.end()); | ||
| 59 | + } | ||
| 60 | + | ||
| 61 | + auto &vocals_ch0 = samples_ch0; | ||
| 62 | + auto &vocals_ch1 = samples_ch1; | ||
| 63 | + | ||
| 64 | + std::vector<float> non_vocals_ch0(vocals_ch0.size()); | ||
| 65 | + std::vector<float> non_vocals_ch1(vocals_ch1.size()); | ||
| 66 | + | ||
| 67 | + Eigen::Map<Eigen::VectorXf>(non_vocals_ch0.data(), non_vocals_ch0.size()) = | ||
| 68 | + Eigen::Map<Eigen::VectorXf>(input.samples.data[0].data(), | ||
| 69 | + input.samples.data[0].size()) | ||
| 70 | + .array() - | ||
| 71 | + Eigen::Map<Eigen::VectorXf>(vocals_ch0.data(), vocals_ch0.size()) | ||
| 72 | + .array(); | ||
| 73 | + | ||
| 74 | + if (input.samples.data.size() > 1) { | ||
| 75 | + Eigen::Map<Eigen::VectorXf>(non_vocals_ch1.data(), | ||
| 76 | + non_vocals_ch1.size()) = | ||
| 77 | + Eigen::Map<Eigen::VectorXf>(input.samples.data[1].data(), | ||
| 78 | + input.samples.data[1].size()) | ||
| 79 | + .array() - | ||
| 80 | + Eigen::Map<Eigen::VectorXf>(vocals_ch1.data(), vocals_ch1.size()) | ||
| 81 | + .array(); | ||
| 82 | + } else { | ||
| 83 | + Eigen::Map<Eigen::VectorXf>(non_vocals_ch1.data(), | ||
| 84 | + non_vocals_ch1.size()) = | ||
| 85 | + Eigen::Map<Eigen::VectorXf>(input.samples.data[0].data(), | ||
| 86 | + input.samples.data[0].size()) | ||
| 87 | + .array() - | ||
| 88 | + Eigen::Map<Eigen::VectorXf>(vocals_ch1.data(), vocals_ch1.size()) | ||
| 89 | + .array(); | ||
| 90 | + } | ||
| 91 | + | ||
| 92 | + OfflineSourceSeparationOutput ans; | ||
| 93 | + ans.sample_rate = GetOutputSampleRate(); | ||
| 94 | + | ||
| 95 | + ans.stems.resize(2); | ||
| 96 | + ans.stems[0].data.reserve(2); | ||
| 97 | + ans.stems[1].data.reserve(2); | ||
| 98 | + | ||
| 99 | + ans.stems[0].data.push_back(std::move(vocals_ch0)); | ||
| 100 | + ans.stems[0].data.push_back(std::move(vocals_ch1)); | ||
| 101 | + | ||
| 102 | + ans.stems[1].data.push_back(std::move(non_vocals_ch0)); | ||
| 103 | + ans.stems[1].data.push_back(std::move(non_vocals_ch1)); | ||
| 104 | + | ||
| 105 | + return ans; | ||
| 106 | + } | ||
| 107 | + | ||
| 108 | + int32_t GetOutputSampleRate() const override { | ||
| 109 | + return model_.GetMetaData().sample_rate; | ||
| 110 | + } | ||
| 111 | + | ||
| 112 | + int32_t GetNumberOfStems() const override { | ||
| 113 | + return model_.GetMetaData().num_stems; | ||
| 114 | + } | ||
| 115 | + | ||
| 116 | + private: | ||
| 117 | + std::pair<std::vector<float>, std::vector<float>> ProcessChunk( | ||
| 118 | + const std::vector<float> &chunk_ch0, const std::vector<float> &chunk_ch1, | ||
| 119 | + bool is_first_chunk, bool is_last_chunk) const { | ||
| 120 | + int32_t pad0 = 0; | ||
| 121 | + | ||
| 122 | + auto stft_results_ch0 = ComputeStft(chunk_ch0, &pad0); | ||
| 123 | + | ||
| 124 | + int32_t pad1 = pad0; | ||
| 125 | + std::vector<knf::StftResult> stft_results_ch1; | ||
| 126 | + | ||
| 127 | + if (!chunk_ch1.empty()) { | ||
| 128 | + stft_results_ch1 = ComputeStft(chunk_ch1, &pad1); | ||
| 129 | + } else { | ||
| 130 | + stft_results_ch1 = stft_results_ch0; | ||
| 131 | + } | ||
| 132 | + | ||
| 133 | + const auto &meta_ = model_.GetMetaData(); | ||
| 134 | + | ||
| 135 | + int32_t num_frames = stft_results_ch0[0].num_frames; | ||
| 136 | + int32_t dim_f = meta_.dim_f; | ||
| 137 | + int32_t dim_t = meta_.dim_t; | ||
| 138 | + int32_t n_fft_bin = meta_.n_fft / 2 + 1; | ||
| 139 | + if (num_frames != dim_t) { | ||
| 140 | + SHERPA_ONNX_LOGE("num_frames(%d) != dim_t(%d)", num_frames, dim_t); | ||
| 141 | + SHERPA_ONNX_EXIT(-1); | ||
| 142 | + } | ||
| 143 | + | ||
| 144 | + // the first 2: number of channels | ||
| 145 | + // the second 2: real and image | ||
| 146 | + std::vector<float> x(stft_results_ch0.size() * 2 * 2 * dim_f * dim_t); | ||
| 147 | + float *px = x.data(); | ||
| 148 | + | ||
| 149 | + for (int32_t i = 0; i != static_cast<int32_t>(stft_results_ch0.size()); | ||
| 150 | + ++i) { | ||
| 151 | + const auto &ch0 = stft_results_ch0[i]; | ||
| 152 | + const auto &ch1 = stft_results_ch1[i]; | ||
| 153 | + | ||
| 154 | + const float *p_real_ch0 = ch0.real.data(); | ||
| 155 | + const float *p_imag_ch0 = ch0.imag.data(); | ||
| 156 | + | ||
| 157 | + const float *p_real_ch1 = ch1.real.data(); | ||
| 158 | + const float *p_imag_ch1 = ch1.imag.data(); | ||
| 159 | + | ||
| 160 | + for (int32_t j = 0; j != dim_f; ++j) { | ||
| 161 | + for (int32_t k = 0; k != num_frames; ++k) { | ||
| 162 | + *px = p_real_ch0[k * n_fft_bin + j]; | ||
| 163 | + ++px; | ||
| 164 | + } | ||
| 165 | + } | ||
| 166 | + | ||
| 167 | + for (int32_t j = 0; j != dim_f; ++j) { | ||
| 168 | + for (int32_t k = 0; k != num_frames; ++k) { | ||
| 169 | + *px = p_imag_ch0[k * n_fft_bin + j]; | ||
| 170 | + ++px; | ||
| 171 | + } | ||
| 172 | + } | ||
| 173 | + | ||
| 174 | + for (int32_t j = 0; j != dim_f; ++j) { | ||
| 175 | + for (int32_t k = 0; k != num_frames; ++k) { | ||
| 176 | + *px = p_real_ch1[k * n_fft_bin + j]; | ||
| 177 | + ++px; | ||
| 178 | + } | ||
| 179 | + } | ||
| 180 | + | ||
| 181 | + for (int32_t j = 0; j != dim_f; ++j) { | ||
| 182 | + for (int32_t k = 0; k != num_frames; ++k) { | ||
| 183 | + *px = p_imag_ch1[k * n_fft_bin + j]; | ||
| 184 | + ++px; | ||
| 185 | + } | ||
| 186 | + } | ||
| 187 | + } // for (int32_t i = 0; i != | ||
| 188 | + | ||
| 189 | + auto memory_info = | ||
| 190 | + Ort::MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeDefault); | ||
| 191 | + | ||
| 192 | + std::array<int64_t, 4> x_shape{ | ||
| 193 | + static_cast<int32_t>(stft_results_ch0.size()) * 4 / meta_.dim_c, | ||
| 194 | + meta_.dim_c, dim_f, dim_t}; | ||
| 195 | + | ||
| 196 | + Ort::Value x_tensor = Ort::Value::CreateTensor( | ||
| 197 | + memory_info, x.data(), x.size(), x_shape.data(), x_shape.size()); | ||
| 198 | + | ||
| 199 | + Ort::Value spec = model_.Run(std::move(x_tensor)); | ||
| 200 | + | ||
| 201 | + const float *p_spec = spec.GetTensorData<float>(); | ||
| 202 | + | ||
| 203 | + for (int32_t i = 0; i != static_cast<int32_t>(stft_results_ch0.size()); | ||
| 204 | + ++i) { | ||
| 205 | + auto &ch0 = stft_results_ch0[i]; | ||
| 206 | + auto &ch1 = stft_results_ch1[i]; | ||
| 207 | + | ||
| 208 | + float *p_real_ch0 = ch0.real.data(); | ||
| 209 | + float *p_imag_ch0 = ch0.imag.data(); | ||
| 210 | + | ||
| 211 | + float *p_real_ch1 = ch1.real.data(); | ||
| 212 | + float *p_imag_ch1 = ch1.imag.data(); | ||
| 213 | + | ||
| 214 | + for (int32_t j = 0; j != dim_f; ++j) { | ||
| 215 | + for (int32_t k = 0; k != num_frames; ++k) { | ||
| 216 | + p_real_ch0[k * n_fft_bin + j] = *p_spec; | ||
| 217 | + ++p_spec; | ||
| 218 | + } | ||
| 219 | + } | ||
| 220 | + | ||
| 221 | + for (int32_t j = 0; j != dim_f; ++j) { | ||
| 222 | + for (int32_t k = 0; k != num_frames; ++k) { | ||
| 223 | + p_imag_ch0[k * n_fft_bin + j] = *p_spec; | ||
| 224 | + ++p_spec; | ||
| 225 | + } | ||
| 226 | + } | ||
| 227 | + | ||
| 228 | + for (int32_t j = 0; j != dim_f; ++j) { | ||
| 229 | + for (int32_t k = 0; k != num_frames; ++k) { | ||
| 230 | + p_real_ch1[k * n_fft_bin + j] = *p_spec; | ||
| 231 | + ++p_spec; | ||
| 232 | + } | ||
| 233 | + } | ||
| 234 | + | ||
| 235 | + for (int32_t j = 0; j != dim_f; ++j) { | ||
| 236 | + for (int32_t k = 0; k != num_frames; ++k) { | ||
| 237 | + p_imag_ch1[k * n_fft_bin + j] = *p_spec; | ||
| 238 | + ++p_spec; | ||
| 239 | + } | ||
| 240 | + } | ||
| 241 | + | ||
| 242 | + for (int32_t k = 0; k != num_frames; ++k) { | ||
| 243 | + for (int32_t j = dim_f; j != n_fft_bin; ++j) { | ||
| 244 | + p_real_ch0[k * n_fft_bin + j] = 0; | ||
| 245 | + p_real_ch1[k * n_fft_bin + j] = 0; | ||
| 246 | + | ||
| 247 | + p_imag_ch0[k * n_fft_bin + j] = 0; | ||
| 248 | + p_imag_ch1[k * n_fft_bin + j] = 0; | ||
| 249 | + } | ||
| 250 | + } | ||
| 251 | + } | ||
| 252 | + | ||
| 253 | + auto samples_ch0 = ComputeInverseStft(stft_results_ch0, pad0, | ||
| 254 | + is_first_chunk, is_last_chunk); | ||
| 255 | + | ||
| 256 | + auto samples_ch1 = ComputeInverseStft(stft_results_ch1, pad1, | ||
| 257 | + is_first_chunk, is_last_chunk); | ||
| 258 | + | ||
| 259 | + return {std::move(samples_ch0), std::move(samples_ch1)}; | ||
| 260 | + } | ||
| 261 | + | ||
| 262 | + std::vector<float> ComputeInverseStft( | ||
| 263 | + const std::vector<knf::StftResult> &stft_result, int32_t pad, | ||
| 264 | + bool is_first_chunk, bool is_last_chunk) const { | ||
| 265 | + const auto &meta_ = model_.GetMetaData(); | ||
| 266 | + int32_t trim = meta_.n_fft / 2; | ||
| 267 | + | ||
| 268 | + int32_t margin = meta_.margin; | ||
| 269 | + | ||
| 270 | + int32_t chunk_size = meta_.num_chunks * meta_.sample_rate; | ||
| 271 | + | ||
| 272 | + if (margin > chunk_size) { | ||
| 273 | + margin = chunk_size; | ||
| 274 | + } | ||
| 275 | + | ||
| 276 | + auto stft_config = GetStftConfig(); | ||
| 277 | + knf::IStft istft(stft_config); | ||
| 278 | + | ||
| 279 | + std::vector<float> ans; | ||
| 280 | + | ||
| 281 | + for (int32_t i = 0; i != static_cast<int32_t>(stft_result.size()); ++i) { | ||
| 282 | + auto samples = istft.Compute(stft_result[i]); | ||
| 283 | + int32_t num_samples = static_cast<int32_t>(samples.size()); | ||
| 284 | + | ||
| 285 | + ans.insert(ans.end(), samples.begin() + trim, | ||
| 286 | + samples.begin() + (num_samples - trim)); | ||
| 287 | + } | ||
| 288 | + | ||
| 289 | + int32_t start = is_first_chunk ? 0 : margin; | ||
| 290 | + int32_t end = | ||
| 291 | + is_last_chunk ? (ans.size() - pad) : (ans.size() - pad - margin); | ||
| 292 | + | ||
| 293 | + return {ans.begin() + start, ans.begin() + end}; | ||
| 294 | + } | ||
| 295 | + | ||
| 296 | + std::vector<knf::StftResult> ComputeStft(const std::vector<float> &chunk, | ||
| 297 | + int32_t *pad) const { | ||
| 298 | + const auto &meta_ = model_.GetMetaData(); | ||
| 299 | + | ||
| 300 | + int32_t num_samples = static_cast<int32_t>(chunk.size()); | ||
| 301 | + int32_t trim = meta_.n_fft / 2; | ||
| 302 | + int32_t chunk_size = meta_.hop_length * (meta_.dim_t - 1); | ||
| 303 | + int32_t gen_size = chunk_size - 2 * trim; | ||
| 304 | + *pad = gen_size - num_samples % gen_size; | ||
| 305 | + | ||
| 306 | + std::vector<float> samples(trim + chunk.size() + *pad + trim); | ||
| 307 | + std::copy(chunk.begin(), chunk.end(), samples.begin() + trim); | ||
| 308 | + | ||
| 309 | + auto stft_config = GetStftConfig(); | ||
| 310 | + knf::Stft stft(stft_config); | ||
| 311 | + | ||
| 312 | + std::vector<knf::StftResult> stft_results; | ||
| 313 | + // split the chunk into short segments | ||
| 314 | + for (int32_t i = 0; i < num_samples + *pad; i += gen_size) { | ||
| 315 | + auto r = stft.Compute(samples.data() + i, chunk_size); | ||
| 316 | + stft_results.push_back(std::move(r)); | ||
| 317 | + } | ||
| 318 | + | ||
| 319 | + return stft_results; | ||
| 320 | + } | ||
| 321 | + | ||
| 322 | + std::vector<std::vector<float>> SplitIntoChunks( | ||
| 323 | + const std::vector<float> &samples) const { | ||
| 324 | + std::vector<std::vector<float>> ans; | ||
| 325 | + | ||
| 326 | + if (samples.empty()) { | ||
| 327 | + return ans; | ||
| 328 | + } | ||
| 329 | + | ||
| 330 | + const auto &meta_ = model_.GetMetaData(); | ||
| 331 | + int32_t margin = meta_.margin; | ||
| 332 | + | ||
| 333 | + int32_t chunk_size = meta_.num_chunks * meta_.sample_rate; | ||
| 334 | + | ||
| 335 | + if (static_cast<int32_t>(samples.size()) < chunk_size) { | ||
| 336 | + chunk_size = samples.size(); | ||
| 337 | + } | ||
| 338 | + | ||
| 339 | + if (margin > chunk_size) { | ||
| 340 | + margin = chunk_size; | ||
| 341 | + } | ||
| 342 | + | ||
| 343 | + for (int32_t i = 0; i < static_cast<int32_t>(samples.size()); | ||
| 344 | + i += chunk_size) { | ||
| 345 | + int32_t start = std::max<int32_t>(0, i - margin); | ||
| 346 | + int32_t end = std::min<int32_t>(i + chunk_size + margin, | ||
| 347 | + static_cast<int32_t>(samples.size())); | ||
| 348 | + if (start >= end) { | ||
| 349 | + break; | ||
| 350 | + } | ||
| 351 | + | ||
| 352 | + ans.emplace_back(samples.begin() + start, samples.begin() + end); | ||
| 353 | + | ||
| 354 | + if (end == static_cast<int32_t>(samples.size())) { | ||
| 355 | + break; | ||
| 356 | + } | ||
| 357 | + } | ||
| 358 | + | ||
| 359 | + return ans; | ||
| 360 | + } | ||
| 361 | + | ||
| 362 | + knf::StftConfig GetStftConfig() const { | ||
| 363 | + const auto &meta = model_.GetMetaData(); | ||
| 364 | + | ||
| 365 | + knf::StftConfig stft_config; | ||
| 366 | + stft_config.n_fft = meta.n_fft; | ||
| 367 | + stft_config.hop_length = meta.hop_length; | ||
| 368 | + stft_config.win_length = meta.window_length; | ||
| 369 | + stft_config.window_type = meta.window_type; | ||
| 370 | + stft_config.center = meta.center; | ||
| 371 | + | ||
| 372 | + return stft_config; | ||
| 373 | + } | ||
| 374 | + | ||
| 375 | + private: | ||
| 376 | + OfflineSourceSeparationConfig config_; | ||
| 377 | + OfflineSourceSeparationUvrModel model_; | ||
| 378 | +}; | ||
| 379 | + | ||
| 380 | +} // namespace sherpa_onnx | ||
| 381 | + | ||
| 382 | +#endif // SHERPA_ONNX_CSRC_OFFLINE_SOURCE_SEPARATION_UVR_IMPL_H_ |
| 1 | +// sherpa-onnx/csrc/offline-source-separation-uvr-model-config.cc | ||
| 2 | +// | ||
| 3 | +// Copyright (c) 2025 Xiaomi Corporation | ||
| 4 | + | ||
| 5 | +#include "sherpa-onnx/csrc/offline-source-separation-uvr-model-config.h" | ||
| 6 | + | ||
| 7 | +#include "sherpa-onnx/csrc/file-utils.h" | ||
| 8 | +#include "sherpa-onnx/csrc/macros.h" | ||
| 9 | + | ||
| 10 | +namespace sherpa_onnx { | ||
| 11 | + | ||
| 12 | +void OfflineSourceSeparationUvrModelConfig::Register(ParseOptions *po) { | ||
| 13 | + po->Register("uvr-model", &model, "Path to the UVR model"); | ||
| 14 | +} | ||
| 15 | + | ||
| 16 | +bool OfflineSourceSeparationUvrModelConfig::Validate() const { | ||
| 17 | + if (model.empty()) { | ||
| 18 | + SHERPA_ONNX_LOGE("Please provide --uvr-model"); | ||
| 19 | + return false; | ||
| 20 | + } | ||
| 21 | + | ||
| 22 | + if (!FileExists(model)) { | ||
| 23 | + SHERPA_ONNX_LOGE("UVR model '%s' does not exist. ", model.c_str()); | ||
| 24 | + return false; | ||
| 25 | + } | ||
| 26 | + | ||
| 27 | + return true; | ||
| 28 | +} | ||
| 29 | + | ||
| 30 | +std::string OfflineSourceSeparationUvrModelConfig::ToString() const { | ||
| 31 | + std::ostringstream os; | ||
| 32 | + | ||
| 33 | + os << "OfflineSourceSeparationUvrModelConfig("; | ||
| 34 | + os << "model=\"" << model << "\")"; | ||
| 35 | + | ||
| 36 | + return os.str(); | ||
| 37 | +} | ||
| 38 | + | ||
| 39 | +} // namespace sherpa_onnx |
| 1 | +// sherpa-onnx/csrc/offline-source-separation-uvr-model-config.h | ||
| 2 | +// | ||
| 3 | +// Copyright (c) 2025 Xiaomi Corporation | ||
| 4 | + | ||
| 5 | +#ifndef SHERPA_ONNX_CSRC_OFFLINE_SOURCE_SEPARATION_UVR_MODEL_CONFIG_H_ | ||
| 6 | +#define SHERPA_ONNX_CSRC_OFFLINE_SOURCE_SEPARATION_UVR_MODEL_CONFIG_H_ | ||
| 7 | + | ||
| 8 | +#include <string> | ||
| 9 | + | ||
| 10 | +#include "sherpa-onnx/csrc/offline-source-separation-uvr-model-config.h" | ||
| 11 | +#include "sherpa-onnx/csrc/parse-options.h" | ||
| 12 | + | ||
| 13 | +namespace sherpa_onnx { | ||
| 14 | + | ||
| 15 | +struct OfflineSourceSeparationUvrModelConfig { | ||
| 16 | + std::string model; | ||
| 17 | + | ||
| 18 | + OfflineSourceSeparationUvrModelConfig() = default; | ||
| 19 | + | ||
| 20 | + explicit OfflineSourceSeparationUvrModelConfig(const std::string &model) | ||
| 21 | + : model(model) {} | ||
| 22 | + | ||
| 23 | + void Register(ParseOptions *po); | ||
| 24 | + | ||
| 25 | + bool Validate() const; | ||
| 26 | + | ||
| 27 | + std::string ToString() const; | ||
| 28 | +}; | ||
| 29 | + | ||
| 30 | +} // namespace sherpa_onnx | ||
| 31 | + | ||
| 32 | +#endif // SHERPA_ONNX_CSRC_OFFLINE_SOURCE_SEPARATION_UVR_MODEL_CONFIG_H_ |
| 1 | +// sherpa-onnx/csrc/offline-source-separation-uvr-model-meta-data.h | ||
| 2 | +// | ||
| 3 | +// Copyright (c) 2025 Xiaomi Corporation | ||
| 4 | +#ifndef SHERPA_ONNX_CSRC_OFFLINE_SOURCE_SEPARATION_UVR_MODEL_META_DATA_H_ | ||
| 5 | +#define SHERPA_ONNX_CSRC_OFFLINE_SOURCE_SEPARATION_UVR_MODEL_META_DATA_H_ | ||
| 6 | + | ||
| 7 | +#include <string> | ||
| 8 | +#include <unordered_map> | ||
| 9 | +#include <vector> | ||
| 10 | + | ||
| 11 | +namespace sherpa_onnx { | ||
| 12 | + | ||
| 13 | +// See also | ||
| 14 | +// https://github.com/k2-fsa/sherpa-onnx/blob/master/scripts/uvr_mdx/test.py | ||
| 15 | +// https://github.com/k2-fsa/sherpa-onnx/blob/master/scripts/uvr_mdx/add_meta_data_and_quantize.py | ||
| 16 | +struct OfflineSourceSeparationUvrModelMetaData { | ||
| 17 | + int32_t sample_rate = 44100; | ||
| 18 | + int32_t num_stems = 2; | ||
| 19 | + int32_t dim_c = -1; | ||
| 20 | + int32_t dim_f = -1; | ||
| 21 | + int32_t dim_t = -1; | ||
| 22 | + | ||
| 23 | + int32_t n_fft = -1; | ||
| 24 | + int32_t hop_length = 1024; | ||
| 25 | + | ||
| 26 | + int32_t window_length = -1; | ||
| 27 | + int32_t center = 1; | ||
| 28 | + std::string window_type = "hann"; | ||
| 29 | + | ||
| 30 | + // the following fields are preconfigured. Please see | ||
| 31 | + // https://github.com/k2-fsa/sherpa-onnx/blob/master/scripts/uvr_mdx/test.py | ||
| 32 | + int32_t margin = 0; // changed in ./offline-source-separation-uvr-model.cc | ||
| 33 | + const int32_t num_chunks = 15; | ||
| 34 | +}; | ||
| 35 | + | ||
| 36 | +} // namespace sherpa_onnx | ||
| 37 | + | ||
| 38 | +#endif // SHERPA_ONNX_CSRC_OFFLINE_SOURCE_SEPARATION_UVR_MODEL_META_DATA_H_ |
| 1 | +// sherpa-onnx/csrc/offline-source-separation-uvr-model.cc | ||
| 2 | +// | ||
| 3 | +// Copyright (c) 2025 Xiaomi Corporation | ||
| 4 | + | ||
| 5 | +#include "sherpa-onnx/csrc/offline-source-separation-uvr-model.h" | ||
| 6 | + | ||
| 7 | +#include <memory> | ||
| 8 | +#include <string> | ||
| 9 | +#include <utility> | ||
| 10 | +#include <vector> | ||
| 11 | + | ||
| 12 | +#if __ANDROID_API__ >= 9 | ||
| 13 | +#include "android/asset_manager.h" | ||
| 14 | +#include "android/asset_manager_jni.h" | ||
| 15 | +#endif | ||
| 16 | + | ||
| 17 | +#if __OHOS__ | ||
| 18 | +#include "rawfile/raw_file_manager.h" | ||
| 19 | +#endif | ||
| 20 | + | ||
| 21 | +#include "sherpa-onnx/csrc/file-utils.h" | ||
| 22 | +#include "sherpa-onnx/csrc/onnx-utils.h" | ||
| 23 | +#include "sherpa-onnx/csrc/session.h" | ||
| 24 | +#include "sherpa-onnx/csrc/text-utils.h" | ||
| 25 | + | ||
| 26 | +namespace sherpa_onnx { | ||
| 27 | + | ||
| 28 | +class OfflineSourceSeparationUvrModel::Impl { | ||
| 29 | + public: | ||
| 30 | + explicit Impl(const OfflineSourceSeparationModelConfig &config) | ||
| 31 | + : config_(config), | ||
| 32 | + env_(ORT_LOGGING_LEVEL_ERROR), | ||
| 33 | + sess_opts_(GetSessionOptions(config)), | ||
| 34 | + allocator_{} { | ||
| 35 | + auto buf = ReadFile(config.uvr.model); | ||
| 36 | + Init(buf.data(), buf.size()); | ||
| 37 | + } | ||
| 38 | + | ||
| 39 | + template <typename Manager> | ||
| 40 | + Impl(Manager *mgr, const OfflineSourceSeparationModelConfig &config) | ||
| 41 | + : config_(config), | ||
| 42 | + env_(ORT_LOGGING_LEVEL_ERROR), | ||
| 43 | + sess_opts_(GetSessionOptions(config)), | ||
| 44 | + allocator_{} { | ||
| 45 | + auto buf = ReadFile(mgr, config.uvr.model); | ||
| 46 | + Init(buf.data(), buf.size()); | ||
| 47 | + } | ||
| 48 | + | ||
| 49 | + const OfflineSourceSeparationUvrModelMetaData &GetMetaData() const { | ||
| 50 | + return meta_; | ||
| 51 | + } | ||
| 52 | + | ||
| 53 | + Ort::Value Run(Ort::Value x) const { | ||
| 54 | + auto out = sess_->Run({}, input_names_ptr_.data(), &x, 1, | ||
| 55 | + output_names_ptr_.data(), output_names_ptr_.size()); | ||
| 56 | + return std::move(out[0]); | ||
| 57 | + } | ||
| 58 | + | ||
| 59 | + private: | ||
| 60 | + void Init(void *model_data, size_t model_data_length) { | ||
| 61 | + sess_ = std::make_unique<Ort::Session>(env_, model_data, model_data_length, | ||
| 62 | + sess_opts_); | ||
| 63 | + | ||
| 64 | + GetInputNames(sess_.get(), &input_names_, &input_names_ptr_); | ||
| 65 | + | ||
| 66 | + GetOutputNames(sess_.get(), &output_names_, &output_names_ptr_); | ||
| 67 | + | ||
| 68 | + Ort::ModelMetadata meta_data = sess_->GetModelMetadata(); | ||
| 69 | + if (config_.debug) { | ||
| 70 | + std::ostringstream os; | ||
| 71 | + os << "---UVR model---\n"; | ||
| 72 | + PrintModelMetadata(os, meta_data); | ||
| 73 | + | ||
| 74 | + os << "----------input names----------\n"; | ||
| 75 | + int32_t i = 0; | ||
| 76 | + for (const auto &s : input_names_) { | ||
| 77 | + os << i << " " << s << "\n"; | ||
| 78 | + ++i; | ||
| 79 | + } | ||
| 80 | + os << "----------output names----------\n"; | ||
| 81 | + i = 0; | ||
| 82 | + for (const auto &s : output_names_) { | ||
| 83 | + os << i << " " << s << "\n"; | ||
| 84 | + ++i; | ||
| 85 | + } | ||
| 86 | + | ||
| 87 | +#if __OHOS__ | ||
| 88 | + SHERPA_ONNX_LOGE("%{public}s\n", os.str().c_str()); | ||
| 89 | +#else | ||
| 90 | + SHERPA_ONNX_LOGE("%s\n", os.str().c_str()); | ||
| 91 | +#endif | ||
| 92 | + } | ||
| 93 | + | ||
| 94 | + Ort::AllocatorWithDefaultOptions allocator; // used in the macro below | ||
| 95 | + | ||
| 96 | + std::string model_type; | ||
| 97 | + SHERPA_ONNX_READ_META_DATA_STR(model_type, "model_type"); | ||
| 98 | + if (model_type != "UVR") { | ||
| 99 | + SHERPA_ONNX_LOGE("Expect model type 'UVR'. Given: '%s'", | ||
| 100 | + model_type.c_str()); | ||
| 101 | + SHERPA_ONNX_EXIT(-1); | ||
| 102 | + } | ||
| 103 | + | ||
| 104 | + SHERPA_ONNX_READ_META_DATA(meta_.num_stems, "stems"); | ||
| 105 | + if (meta_.num_stems != 2) { | ||
| 106 | + SHERPA_ONNX_LOGE("Only 2stems is supported. Given %d stems", | ||
| 107 | + meta_.num_stems); | ||
| 108 | + SHERPA_ONNX_EXIT(-1); | ||
| 109 | + } | ||
| 110 | + | ||
| 111 | + SHERPA_ONNX_READ_META_DATA(meta_.sample_rate, "sample_rate"); | ||
| 112 | + SHERPA_ONNX_READ_META_DATA(meta_.n_fft, "n_fft"); | ||
| 113 | + SHERPA_ONNX_READ_META_DATA(meta_.center, "center"); | ||
| 114 | + SHERPA_ONNX_READ_META_DATA(meta_.window_length, "win_length"); | ||
| 115 | + SHERPA_ONNX_READ_META_DATA(meta_.hop_length, "hop_length"); | ||
| 116 | + SHERPA_ONNX_READ_META_DATA(meta_.dim_t, "dim_t"); | ||
| 117 | + SHERPA_ONNX_READ_META_DATA(meta_.dim_f, "dim_f"); | ||
| 118 | + SHERPA_ONNX_READ_META_DATA(meta_.dim_c, "dim_c"); | ||
| 119 | + SHERPA_ONNX_READ_META_DATA_STR(meta_.window_type, "window_type"); | ||
| 120 | + | ||
| 121 | + meta_.margin = meta_.sample_rate; | ||
| 122 | + } | ||
| 123 | + | ||
| 124 | + private: | ||
| 125 | + OfflineSourceSeparationModelConfig config_; | ||
| 126 | + OfflineSourceSeparationUvrModelMetaData meta_; | ||
| 127 | + | ||
| 128 | + Ort::Env env_; | ||
| 129 | + Ort::SessionOptions sess_opts_; | ||
| 130 | + Ort::AllocatorWithDefaultOptions allocator_; | ||
| 131 | + | ||
| 132 | + std::unique_ptr<Ort::Session> sess_; | ||
| 133 | + | ||
| 134 | + std::vector<std::string> input_names_; | ||
| 135 | + std::vector<const char *> input_names_ptr_; | ||
| 136 | + | ||
| 137 | + std::vector<std::string> output_names_; | ||
| 138 | + std::vector<const char *> output_names_ptr_; | ||
| 139 | +}; | ||
| 140 | + | ||
| 141 | +OfflineSourceSeparationUvrModel::~OfflineSourceSeparationUvrModel() = default; | ||
| 142 | + | ||
| 143 | +OfflineSourceSeparationUvrModel::OfflineSourceSeparationUvrModel( | ||
| 144 | + const OfflineSourceSeparationModelConfig &config) | ||
| 145 | + : impl_(std::make_unique<Impl>(config)) {} | ||
| 146 | + | ||
| 147 | +template <typename Manager> | ||
| 148 | +OfflineSourceSeparationUvrModel::OfflineSourceSeparationUvrModel( | ||
| 149 | + Manager *mgr, const OfflineSourceSeparationModelConfig &config) | ||
| 150 | + : impl_(std::make_unique<Impl>(mgr, config)) {} | ||
| 151 | + | ||
| 152 | +Ort::Value OfflineSourceSeparationUvrModel::Run(Ort::Value x) const { | ||
| 153 | + return impl_->Run(std::move(x)); | ||
| 154 | +} | ||
| 155 | + | ||
| 156 | +const OfflineSourceSeparationUvrModelMetaData & | ||
| 157 | +OfflineSourceSeparationUvrModel::GetMetaData() const { | ||
| 158 | + return impl_->GetMetaData(); | ||
| 159 | +} | ||
| 160 | + | ||
| 161 | +#if __ANDROID_API__ >= 9 | ||
| 162 | +template OfflineSourceSeparationUvrModel::OfflineSourceSeparationUvrModel( | ||
| 163 | + AAssetManager *mgr, const OfflineSourceSeparationModelConfig &config); | ||
| 164 | +#endif | ||
| 165 | + | ||
| 166 | +#if __OHOS__ | ||
| 167 | +template OfflineSourceSeparationUvrModel::OfflineSourceSeparationUvrModel( | ||
| 168 | + NativeResourceManager *mgr, | ||
| 169 | + const OfflineSourceSeparationModelConfig &config); | ||
| 170 | +#endif | ||
| 171 | + | ||
| 172 | +} // namespace sherpa_onnx |
| 1 | +// sherpa-onnx/csrc/offline-source-separation-uvr-model.h | ||
| 2 | +// | ||
| 3 | +// Copyright (c) 2025 Xiaomi Corporation | ||
| 4 | +#ifndef SHERPA_ONNX_CSRC_OFFLINE_SOURCE_SEPARATION_UVR_MODEL_H_ | ||
| 5 | +#define SHERPA_ONNX_CSRC_OFFLINE_SOURCE_SEPARATION_UVR_MODEL_H_ | ||
| 6 | +#include <memory> | ||
| 7 | + | ||
| 8 | +#include "onnxruntime_cxx_api.h" // NOLINT | ||
| 9 | +#include "sherpa-onnx/csrc/offline-source-separation-model-config.h" | ||
| 10 | +#include "sherpa-onnx/csrc/offline-source-separation-uvr-model-meta-data.h" | ||
| 11 | + | ||
| 12 | +namespace sherpa_onnx { | ||
| 13 | + | ||
| 14 | +class OfflineSourceSeparationUvrModel { | ||
| 15 | + public: | ||
| 16 | + ~OfflineSourceSeparationUvrModel(); | ||
| 17 | + | ||
| 18 | + explicit OfflineSourceSeparationUvrModel( | ||
| 19 | + const OfflineSourceSeparationModelConfig &config); | ||
| 20 | + | ||
| 21 | + template <typename Manager> | ||
| 22 | + OfflineSourceSeparationUvrModel( | ||
| 23 | + Manager *mgr, const OfflineSourceSeparationModelConfig &config); | ||
| 24 | + | ||
| 25 | + Ort::Value Run(Ort::Value x) const; | ||
| 26 | + | ||
| 27 | + const OfflineSourceSeparationUvrModelMetaData &GetMetaData() const; | ||
| 28 | + | ||
| 29 | + private: | ||
| 30 | + class Impl; | ||
| 31 | + std::unique_ptr<Impl> impl_; | ||
| 32 | +}; | ||
| 33 | + | ||
| 34 | +} // namespace sherpa_onnx | ||
| 35 | + | ||
| 36 | +#endif // SHERPA_ONNX_CSRC_OFFLINE_SOURCE_SEPARATION_UVR_MODEL_H_ |
| @@ -19,7 +19,8 @@ struct OfflineSourceSeparationConfig { | @@ -19,7 +19,8 @@ struct OfflineSourceSeparationConfig { | ||
| 19 | 19 | ||
| 20 | OfflineSourceSeparationConfig() = default; | 20 | OfflineSourceSeparationConfig() = default; |
| 21 | 21 | ||
| 22 | - OfflineSourceSeparationConfig(const OfflineSourceSeparationModelConfig &model) | 22 | + explicit OfflineSourceSeparationConfig( |
| 23 | + const OfflineSourceSeparationModelConfig &model) | ||
| 23 | : model(model) {} | 24 | : model(model) {} |
| 24 | 25 | ||
| 25 | void Register(ParseOptions *po); | 26 | void Register(ParseOptions *po); |
| @@ -54,7 +55,7 @@ class OfflineSourceSeparation { | @@ -54,7 +55,7 @@ class OfflineSourceSeparation { | ||
| 54 | public: | 55 | public: |
| 55 | ~OfflineSourceSeparation(); | 56 | ~OfflineSourceSeparation(); |
| 56 | 57 | ||
| 57 | - OfflineSourceSeparation(const OfflineSourceSeparationConfig &config); | 58 | + explicit OfflineSourceSeparation(const OfflineSourceSeparationConfig &config); |
| 58 | 59 | ||
| 59 | template <typename Manager> | 60 | template <typename Manager> |
| 60 | OfflineSourceSeparation(Manager *mgr, | 61 | OfflineSourceSeparation(Manager *mgr, |
| @@ -101,8 +101,8 @@ for a list of pre-trained models to download. | @@ -101,8 +101,8 @@ for a list of pre-trained models to download. | ||
| 101 | mic_sample_rate = atof(pSampleRateStr); | 101 | mic_sample_rate = atof(pSampleRateStr); |
| 102 | } | 102 | } |
| 103 | 103 | ||
| 104 | - if(!mic.OpenDevice(device_index, mic_sample_rate, 1, | ||
| 105 | - RecordCallback, s.get())) { | 104 | + if (!mic.OpenDevice(device_index, mic_sample_rate, 1, RecordCallback, |
| 105 | + s.get())) { | ||
| 106 | fprintf(stderr, "portaudio error: %d\n", device_index); | 106 | fprintf(stderr, "portaudio error: %d\n", device_index); |
| 107 | exit(EXIT_FAILURE); | 107 | exit(EXIT_FAILURE); |
| 108 | } | 108 | } |
| @@ -142,8 +142,8 @@ for more models. | @@ -142,8 +142,8 @@ for more models. | ||
| 142 | mic_sample_rate = atof(pSampleRateStr); | 142 | mic_sample_rate = atof(pSampleRateStr); |
| 143 | } | 143 | } |
| 144 | 144 | ||
| 145 | - if (!mic.OpenDevice(device_index, mic_sample_rate, 1, | ||
| 146 | - RecordCallback, nullptr /* user_data */)){ | 145 | + if (!mic.OpenDevice(device_index, mic_sample_rate, 1, RecordCallback, |
| 146 | + nullptr /* user_data */)) { | ||
| 147 | fprintf(stderr, "portaudio error: %d\n", device_index); | 147 | fprintf(stderr, "portaudio error: %d\n", device_index); |
| 148 | exit(EXIT_FAILURE); | 148 | exit(EXIT_FAILURE); |
| 149 | } | 149 | } |
| @@ -244,8 +244,8 @@ Note that `zh` means Chinese, while `en` means English. | @@ -244,8 +244,8 @@ Note that `zh` means Chinese, while `en` means English. | ||
| 244 | mic_sample_rate = atof(pSampleRateStr); | 244 | mic_sample_rate = atof(pSampleRateStr); |
| 245 | } | 245 | } |
| 246 | 246 | ||
| 247 | - if (!mic.OpenDevice(device_index, mic_sample_rate, 1, | ||
| 248 | - RecordCallback, nullptr /* user_data */)){ | 247 | + if (!mic.OpenDevice(device_index, mic_sample_rate, 1, RecordCallback, |
| 248 | + nullptr /* user_data */)) { | ||
| 249 | fprintf(stderr, "portaudio error: %d\n", device_index); | 249 | fprintf(stderr, "portaudio error: %d\n", device_index); |
| 250 | exit(EXIT_FAILURE); | 250 | exit(EXIT_FAILURE); |
| 251 | } | 251 | } |
| @@ -159,8 +159,8 @@ for a list of pre-trained models to download. | @@ -159,8 +159,8 @@ for a list of pre-trained models to download. | ||
| 159 | mic_sample_rate = atof(pSampleRateStr); | 159 | mic_sample_rate = atof(pSampleRateStr); |
| 160 | } | 160 | } |
| 161 | 161 | ||
| 162 | - if (!mic.OpenDevice(device_index, mic_sample_rate, 1, | ||
| 163 | - RecordCallback, nullptr /* user_data */)){ | 162 | + if (!mic.OpenDevice(device_index, mic_sample_rate, 1, RecordCallback, |
| 163 | + nullptr /* user_data */)) { | ||
| 164 | fprintf(stderr, "portaudio error: %d\n", device_index); | 164 | fprintf(stderr, "portaudio error: %d\n", device_index); |
| 165 | exit(EXIT_FAILURE); | 165 | exit(EXIT_FAILURE); |
| 166 | } | 166 | } |
| @@ -129,8 +129,8 @@ for a list of pre-trained models to download. | @@ -129,8 +129,8 @@ for a list of pre-trained models to download. | ||
| 129 | mic_sample_rate = atof(pSampleRateStr); | 129 | mic_sample_rate = atof(pSampleRateStr); |
| 130 | } | 130 | } |
| 131 | 131 | ||
| 132 | - if (!mic.OpenDevice(device_index, mic_sample_rate, 1, | ||
| 133 | - RecordCallback, nullptr /* user_data */)){ | 132 | + if (!mic.OpenDevice(device_index, mic_sample_rate, 1, RecordCallback, |
| 133 | + nullptr /* user_data */)) { | ||
| 134 | fprintf(stderr, "portaudio error: %d\n", device_index); | 134 | fprintf(stderr, "portaudio error: %d\n", device_index); |
| 135 | exit(EXIT_FAILURE); | 135 | exit(EXIT_FAILURE); |
| 136 | } | 136 | } |
| @@ -33,6 +33,17 @@ wget https://github.com/k2-fsa/sherpa-onnx/releases/download/source-separation-m | @@ -33,6 +33,17 @@ wget https://github.com/k2-fsa/sherpa-onnx/releases/download/source-separation-m | ||
| 33 | --input-wav=audio_example.wav \ | 33 | --input-wav=audio_example.wav \ |
| 34 | --output-vocals-wav=output_vocals.wav \ | 34 | --output-vocals-wav=output_vocals.wav \ |
| 35 | --output-accompaniment-wav=output_accompaniment.wav | 35 | --output-accompaniment-wav=output_accompaniment.wav |
| 36 | + | ||
| 37 | +(2) Use UVR models | ||
| 38 | + | ||
| 39 | +wget https://github.com/k2-fsa/sherpa-onnx/releases/download/source-separation-models/UVR_MDXNET_1_9703.onnx | ||
| 40 | +wget https://github.com/k2-fsa/sherpa-onnx/releases/download/source-separation-models/audio_example.wav | ||
| 41 | + | ||
| 42 | +./bin/sherpa-onnx-offline-source-separation \ | ||
| 43 | + --uvr-model=./UVR_MDXNET_1_9703.onnx \ | ||
| 44 | + --input-wav=audio_example.wav \ | ||
| 45 | + --output-vocals-wav=output_vocals.wav \ | ||
| 46 | + --output-accompaniment-wav=output_accompaniment.wav | ||
| 36 | )usage"; | 47 | )usage"; |
| 37 | 48 | ||
| 38 | sherpa_onnx::ParseOptions po(kUsageMessage); | 49 | sherpa_onnx::ParseOptions po(kUsageMessage); |
| @@ -136,7 +136,8 @@ to download models for offline ASR. | @@ -136,7 +136,8 @@ to download models for offline ASR. | ||
| 136 | mic_sample_rate = atof(pSampleRateStr); | 136 | mic_sample_rate = atof(pSampleRateStr); |
| 137 | } | 137 | } |
| 138 | 138 | ||
| 139 | - if (!mic.OpenDevice(device_index, mic_sample_rate, 1, RecordCallback, nullptr)) { | 139 | + if (!mic.OpenDevice(device_index, mic_sample_rate, 1, RecordCallback, |
| 140 | + nullptr)) { | ||
| 140 | fprintf(stderr, "Failed to open device %d\n", device_index); | 141 | fprintf(stderr, "Failed to open device %d\n", device_index); |
| 141 | exit(EXIT_FAILURE); | 142 | exit(EXIT_FAILURE); |
| 142 | } | 143 | } |
| @@ -74,7 +74,6 @@ wget https://github.com/snakers4/silero-vad/raw/master/src/silero_vad/data/siler | @@ -74,7 +74,6 @@ wget https://github.com/snakers4/silero-vad/raw/master/src/silero_vad/data/siler | ||
| 74 | 74 | ||
| 75 | sherpa_onnx::Microphone mic; | 75 | sherpa_onnx::Microphone mic; |
| 76 | 76 | ||
| 77 | - | ||
| 78 | int32_t device_index = Pa_GetDefaultInputDevice(); | 77 | int32_t device_index = Pa_GetDefaultInputDevice(); |
| 79 | if (device_index == paNoDevice) { | 78 | if (device_index == paNoDevice) { |
| 80 | fprintf(stderr, "No default input device found\n"); | 79 | fprintf(stderr, "No default input device found\n"); |
| @@ -96,7 +95,8 @@ wget https://github.com/snakers4/silero-vad/raw/master/src/silero_vad/data/siler | @@ -96,7 +95,8 @@ wget https://github.com/snakers4/silero-vad/raw/master/src/silero_vad/data/siler | ||
| 96 | fprintf(stderr, "Use sample rate %f for mic\n", mic_sample_rate); | 95 | fprintf(stderr, "Use sample rate %f for mic\n", mic_sample_rate); |
| 97 | mic_sample_rate = atof(pSampleRateStr); | 96 | mic_sample_rate = atof(pSampleRateStr); |
| 98 | } | 97 | } |
| 99 | - if (!mic.OpenDevice(device_index, mic_sample_rate, 1, RecordCallback, nullptr)) { | 98 | + if (!mic.OpenDevice(device_index, mic_sample_rate, 1, RecordCallback, |
| 99 | + nullptr)) { | ||
| 100 | fprintf(stderr, "Failed to open microphone device %d\n", device_index); | 100 | fprintf(stderr, "Failed to open microphone device %d\n", device_index); |
| 101 | exit(EXIT_FAILURE); | 101 | exit(EXIT_FAILURE); |
| 102 | } | 102 | } |
| @@ -5,6 +5,7 @@ | @@ -5,6 +5,7 @@ | ||
| 5 | #include "sherpa-onnx/csrc/offline-tts.h" | 5 | #include "sherpa-onnx/csrc/offline-tts.h" |
| 6 | 6 | ||
| 7 | #include "sherpa-onnx/csrc/macros.h" | 7 | #include "sherpa-onnx/csrc/macros.h" |
| 8 | +#include "sherpa-onnx/csrc/text-utils.h" | ||
| 8 | #include "sherpa-onnx/csrc/wave-writer.h" | 9 | #include "sherpa-onnx/csrc/wave-writer.h" |
| 9 | #include "sherpa-onnx/jni/common.h" | 10 | #include "sherpa-onnx/jni/common.h" |
| 10 | 11 | ||
| @@ -207,7 +208,10 @@ JNIEXPORT jlong JNICALL Java_com_k2fsa_sherpa_onnx_OfflineTts_newFromAsset( | @@ -207,7 +208,10 @@ JNIEXPORT jlong JNICALL Java_com_k2fsa_sherpa_onnx_OfflineTts_newFromAsset( | ||
| 207 | } | 208 | } |
| 208 | #endif | 209 | #endif |
| 209 | auto config = sherpa_onnx::GetOfflineTtsConfig(env, _config); | 210 | auto config = sherpa_onnx::GetOfflineTtsConfig(env, _config); |
| 210 | - SHERPA_ONNX_LOGE("config:\n%s", config.ToString().c_str()); | 211 | + auto str_vec = sherpa_onnx::SplitString(config.ToString(), 128); |
| 212 | + for (const auto &s : str_vec) { | ||
| 213 | + SHERPA_ONNX_LOGE("%s", s.c_str()); | ||
| 214 | + } | ||
| 211 | 215 | ||
| 212 | auto tts = new sherpa_onnx::OfflineTts( | 216 | auto tts = new sherpa_onnx::OfflineTts( |
| 213 | #if __ANDROID_API__ >= 9 | 217 | #if __ANDROID_API__ >= 9 |
-
请 注册 或 登录 后发表评论