Fangjun Kuang
Committed by GitHub

Add C++ support for UVR models (#2269)

正在显示 35 个修改的文件 包含 967 行增加101 行删除
  1 +#!/usr/bin/env bash
  2 +
  3 +set -ex
  4 +
  5 +log() {
  6 + # This function is from espnet
  7 + local fname=${BASH_SOURCE[1]##*/}
  8 + echo -e "$(date '+%Y-%m-%d %H:%M:%S') (${fname}:${BASH_LINENO[0]}:${FUNCNAME[1]}) $*"
  9 +}
  10 +
  11 +if [ -z $EXE ]; then
  12 + EXE=./build/bin/sherpa-onnx-offline-source-separation
  13 +fi
  14 +
  15 +echo "EXE is $EXE"
  16 +echo "PATH: $PATH"
  17 +
  18 +which $EXE
  19 +
  20 +log "------------------------------------------------------------"
  21 +log "Run spleeter"
  22 +log "------------------------------------------------------------"
  23 +curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/source-separation-models/sherpa-onnx-spleeter-2stems-fp16.tar.bz2
  24 +tar xvf sherpa-onnx-spleeter-2stems-fp16.tar.bz2
  25 +rm sherpa-onnx-spleeter-2stems-fp16.tar.bz2
  26 +
  27 +curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/source-separation-models/qi-feng-le-zh.wav
  28 +
  29 +$EXE \
  30 + --spleeter-vocals=sherpa-onnx-spleeter-2stems-fp16/vocals.fp16.onnx \
  31 + --spleeter-accompaniment=sherpa-onnx-spleeter-2stems-fp16/accompaniment.fp16.onnx \
  32 + --num-threads=2 \
  33 + --debug=1 \
  34 + --input-wav=./qi-feng-le-zh.wav \
  35 + --output-vocals-wav=spleeter_output_vocals.wav \
  36 + --output-accompaniment-wav=spleeter_output_accompaniment.wav
  37 +
  38 +rm -rf sherpa-onnx-spleeter-2stems-fp16
  39 +
  40 +log "------------------------------------------------------------"
  41 +log "Run UVR"
  42 +log "------------------------------------------------------------"
  43 +curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/source-separation-models/UVR-MDX-NET-Voc_FT.onnx
  44 +
  45 +$EXE \
  46 + --debug=1 \
  47 + --num-threads=2 \
  48 + --uvr-model=./UVR-MDX-NET-Voc_FT.onnx \
  49 + --input-wav=./qi-feng-le-zh.wav \
  50 + --output-vocals-wav=uvr_output_vocals.wav \
  51 + --output-accompaniment-wav=uvr_output_non_vocals.wav
  52 +
  53 +rm ./UVR-MDX-NET-Voc_FT.onnx \
  54 +
  55 +mkdir source-separation-wavs
  56 +mv qi-feng-le-zh.wav source-separation-wavs
  57 +mv spleeter_*.wav ./source-separation-wavs
  58 +mv uvr_*.wav ./source-separation-wavs
@@ -11,6 +11,7 @@ on: @@ -11,6 +11,7 @@ on:
11 - '.github/scripts/test-kws.sh' 11 - '.github/scripts/test-kws.sh'
12 - '.github/scripts/test-online-transducer.sh' 12 - '.github/scripts/test-online-transducer.sh'
13 - '.github/scripts/test-offline-speech-denoiser.sh' 13 - '.github/scripts/test-offline-speech-denoiser.sh'
  14 + - '.github/scripts/test-offline-source-separation.sh'
14 - '.github/scripts/test-online-paraformer.sh' 15 - '.github/scripts/test-online-paraformer.sh'
15 - '.github/scripts/test-offline-transducer.sh' 16 - '.github/scripts/test-offline-transducer.sh'
16 - '.github/scripts/test-offline-ctc.sh' 17 - '.github/scripts/test-offline-ctc.sh'
@@ -33,6 +34,7 @@ on: @@ -33,6 +34,7 @@ on:
33 - '.github/workflows/linux.yaml' 34 - '.github/workflows/linux.yaml'
34 - '.github/scripts/test-kws.sh' 35 - '.github/scripts/test-kws.sh'
35 - '.github/scripts/test-offline-speech-denoiser.sh' 36 - '.github/scripts/test-offline-speech-denoiser.sh'
  37 + - '.github/scripts/test-offline-source-separation.sh'
36 - '.github/scripts/test-online-transducer.sh' 38 - '.github/scripts/test-online-transducer.sh'
37 - '.github/scripts/test-online-paraformer.sh' 39 - '.github/scripts/test-online-paraformer.sh'
38 - '.github/scripts/test-offline-transducer.sh' 40 - '.github/scripts/test-offline-transducer.sh'
@@ -205,6 +207,20 @@ jobs: @@ -205,6 +207,20 @@ jobs:
205 overwrite: true 207 overwrite: true
206 file: sherpa-onnx-*.tar.bz2 208 file: sherpa-onnx-*.tar.bz2
207 209
  210 + - name: Test offline source separation
  211 + shell: bash
  212 + run: |
  213 + du -h -d1 .
  214 + export PATH=$PWD/build/bin:$PATH
  215 + export EXE=sherpa-onnx-offline-source-separation
  216 +
  217 + .github/scripts/test-offline-source-separation.sh
  218 +
  219 + - uses: actions/upload-artifact@v4
  220 + with:
  221 + name: source-separation-${{ matrix.build_type }}-with-shared-lib-${{ matrix.shared_lib }}-with-tts-${{ matrix.with_tts }}
  222 + path: ./source-separation-wavs/*.wav
  223 +
208 - name: Test offline CTC 224 - name: Test offline CTC
209 shell: bash 225 shell: bash
210 run: | 226 run: |
1 ### Supported functions 1 ### Supported functions
2 2
3 -|Speech recognition| Speech synthesis |  
4 -|------------------|------------------|  
5 -| ✔️ | ✔️ | 3 +|Speech recognition| Speech synthesis | Source separation |
  4 +|------------------|------------------|-------------------|
  5 +| ✔️ | ✔️ | ✔️ |
6 6
7 |Speaker identification| Speaker diarization | Speaker verification | 7 |Speaker identification| Speaker diarization | Speaker verification |
8 |----------------------|-------------------- |------------------------| 8 |----------------------|-------------------- |------------------------|
@@ -16,6 +16,7 @@ @@ -16,6 +16,7 @@
16 |------------------|-----------------|--------------------| 16 |------------------|-----------------|--------------------|
17 | ✔️ | ✔️ | ✔️ | 17 | ✔️ | ✔️ | ✔️ |
18 18
  19 +
19 ### Supported platforms 20 ### Supported platforms
20 21
21 |Architecture| Android | iOS | Windows | macOS | linux | HarmonyOS | 22 |Architecture| Android | iOS | Windows | macOS | linux | HarmonyOS |
@@ -56,7 +57,9 @@ This repository supports running the following functions **locally** @@ -56,7 +57,9 @@ This repository supports running the following functions **locally**
56 - Spoken language identification 57 - Spoken language identification
57 - Audio tagging 58 - Audio tagging
58 - VAD (e.g., [silero-vad][silero-vad]) 59 - VAD (e.g., [silero-vad][silero-vad])
  60 + - Speech enhancement (e.g., [gtcrn][gtcrn])
59 - Keyword spotting 61 - Keyword spotting
  62 + - Source separation (e.g., [spleeter][spleeter], [UVR][UVR])
60 63
61 on the following platforms and operating systems: 64 on the following platforms and operating systems:
62 65
@@ -75,6 +78,7 @@ on the following platforms and operating systems: @@ -75,6 +78,7 @@ on the following platforms and operating systems:
75 - [VisionFive 2][VisionFive 2] 78 - [VisionFive 2][VisionFive 2]
76 - [旭日X3派][旭日X3派] 79 - [旭日X3派][旭日X3派]
77 - [爱芯派][爱芯派] 80 - [爱芯派][爱芯派]
  81 + - [RK3588][RK3588]
78 - etc 82 - etc
79 83
80 with the following APIs 84 with the following APIs
@@ -200,6 +204,7 @@ We also have spaces built using WebAssembly. They are listed below: @@ -200,6 +204,7 @@ We also have spaces built using WebAssembly. They are listed below:
200 | Punctuation | [Address][punct-models] | 204 | Punctuation | [Address][punct-models] |
201 | Speaker segmentation | [Address][speaker-segmentation-models] | 205 | Speaker segmentation | [Address][speaker-segmentation-models] |
202 | Speech enhancement | [Address][speech-enhancement-models] | 206 | Speech enhancement | [Address][speech-enhancement-models] |
  207 +| Source separation | [Address][source-separation-models] |
203 208
204 </details> 209 </details>
205 210
@@ -481,3 +486,8 @@ It uses sherpa-onnx for speech-to-text and text-to-speech. @@ -481,3 +486,8 @@ It uses sherpa-onnx for speech-to-text and text-to-speech.
481 [NVIDIA Jetson Orin NX]: https://developer.download.nvidia.com/assets/embedded/secure/jetson/orin_nx/docs/Jetson_Orin_NX_DS-10712-001_v0.5.pdf?RCPGu9Q6OVAOv7a7vgtwc9-BLScXRIWq6cSLuditMALECJ_dOj27DgnqAPGVnT2VpiNpQan9SyFy-9zRykR58CokzbXwjSA7Gj819e91AXPrWkGZR3oS1VLxiDEpJa_Y0lr7UT-N4GnXtb8NlUkP4GkCkkF_FQivGPrAucCUywL481GH_WpP_p7ziHU1Wg==&t=eyJscyI6ImdzZW8iLCJsc2QiOiJodHRwczovL3d3dy5nb29nbGUuY29tLmhrLyJ9 486 [NVIDIA Jetson Orin NX]: https://developer.download.nvidia.com/assets/embedded/secure/jetson/orin_nx/docs/Jetson_Orin_NX_DS-10712-001_v0.5.pdf?RCPGu9Q6OVAOv7a7vgtwc9-BLScXRIWq6cSLuditMALECJ_dOj27DgnqAPGVnT2VpiNpQan9SyFy-9zRykR58CokzbXwjSA7Gj819e91AXPrWkGZR3oS1VLxiDEpJa_Y0lr7UT-N4GnXtb8NlUkP4GkCkkF_FQivGPrAucCUywL481GH_WpP_p7ziHU1Wg==&t=eyJscyI6ImdzZW8iLCJsc2QiOiJodHRwczovL3d3dy5nb29nbGUuY29tLmhrLyJ9
482 [NVIDIA Jetson Nano B01]: https://www.seeedstudio.com/blog/2020/01/16/new-revision-of-jetson-nano-dev-kit-now-supports-new-jetson-nano-module/ 487 [NVIDIA Jetson Nano B01]: https://www.seeedstudio.com/blog/2020/01/16/new-revision-of-jetson-nano-dev-kit-now-supports-new-jetson-nano-module/
483 [speech-enhancement-models]: https://github.com/k2-fsa/sherpa-onnx/releases/tag/speech-enhancement-models 488 [speech-enhancement-models]: https://github.com/k2-fsa/sherpa-onnx/releases/tag/speech-enhancement-models
  489 +[source-separation-models]: https://github.com/k2-fsa/sherpa-onnx/releases/tag/source-separation-models
  490 +[RK3588]: https://www.rock-chips.com/uploads/pdf/2022.8.26/192/RK3588%20Brief%20Datasheet.pdf
  491 +[spleeter]: https://github.com/deezer/spleeter
  492 +[UVR]: https://github.com/Anjok07/ultimatevocalremovergui
  493 +[gtcrn]: https://github.com/Xiaobin-Rong/gtcrn
@@ -136,8 +136,8 @@ int32_t main() { @@ -136,8 +136,8 @@ int32_t main() {
136 fprintf(stderr, "Use sample rate %f for mic\n", mic_sample_rate); 136 fprintf(stderr, "Use sample rate %f for mic\n", mic_sample_rate);
137 mic_sample_rate = atof(sample_rate_str); 137 mic_sample_rate = atof(sample_rate_str);
138 } 138 }
139 - if(!mic.OpenDevice(device_index, mic_sample_rate, 1, RecordCallback,  
140 - nullptr) == false) { 139 + if (!mic.OpenDevice(device_index, mic_sample_rate, 1, RecordCallback,
  140 + nullptr) == false) {
141 std::cerr << "Failed to open microphone device\n"; 141 std::cerr << "Failed to open microphone device\n";
142 return -1; 142 return -1;
143 } 143 }
@@ -24,7 +24,7 @@ @@ -24,7 +24,7 @@
24 #include <iostream> 24 #include <iostream>
25 #include <mutex> // NOLINT 25 #include <mutex> // NOLINT
26 #include <queue> 26 #include <queue>
27 -#include <thread> 27 +#include <thread> // NOLINT
28 #include <vector> 28 #include <vector>
29 29
30 #include "portaudio.h" // NOLINT 30 #include "portaudio.h" // NOLINT
@@ -143,7 +143,7 @@ int32_t main() { @@ -143,7 +143,7 @@ int32_t main() {
143 lowpass_cutoff, lowpass_filter_width); 143 lowpass_cutoff, lowpass_filter_width);
144 } 144 }
145 if (mic.OpenDevice(device_index, mic_sample_rate, 1, RecordCallback, 145 if (mic.OpenDevice(device_index, mic_sample_rate, 1, RecordCallback,
146 - nullptr) == false) { 146 + nullptr) == false) {
147 std::cerr << "Failed to open microphone device\n"; 147 std::cerr << "Failed to open microphone device\n";
148 return -1; 148 return -1;
149 } 149 }
  1 +// cxx-api-examples/sherpa-display.cc
  2 +// Copyright (c) 2025 Xiaomi Corporation
1 #pragma once 3 #pragma once
2 4
3 #include <stdlib.h> 5 #include <stdlib.h>
@@ -6,6 +8,8 @@ @@ -6,6 +8,8 @@
6 #include <iomanip> 8 #include <iomanip>
7 #include <sstream> 9 #include <sstream>
8 #include <string> 10 #include <string>
  11 +#include <utility>
  12 +#include <vector>
9 13
10 namespace sherpa_onnx::cxx { 14 namespace sherpa_onnx::cxx {
11 15
@@ -159,14 +159,15 @@ static sherpa_onnx::OnlineRecognizerConfig GetOnlineRecognizerConfig( @@ -159,14 +159,15 @@ static sherpa_onnx::OnlineRecognizerConfig GetOnlineRecognizerConfig(
159 recognizer_config.hr.rule_fsts = SHERPA_ONNX_OR(config->hr.rule_fsts, ""); 159 recognizer_config.hr.rule_fsts = SHERPA_ONNX_OR(config->hr.rule_fsts, "");
160 160
161 if (config->model_config.debug) { 161 if (config->model_config.debug) {
  162 +#if __OHOS__
162 auto str_vec = sherpa_onnx::SplitString(recognizer_config.ToString(), 128); 163 auto str_vec = sherpa_onnx::SplitString(recognizer_config.ToString(), 128);
163 for (const auto &s : str_vec) { 164 for (const auto &s : str_vec) {
164 -#if __OHOS__  
165 SHERPA_ONNX_LOGE("%{public}s\n", s.c_str()); 165 SHERPA_ONNX_LOGE("%{public}s\n", s.c_str());
166 -#else  
167 SHERPA_ONNX_LOGE("%s\n", s.c_str()); 166 SHERPA_ONNX_LOGE("%s\n", s.c_str());
168 -#endif  
169 } 167 }
  168 +#else
  169 + SHERPA_ONNX_LOGE("%s", recognizer_config.ToString().c_str());
  170 +#endif
170 } 171 }
171 172
172 return recognizer_config; 173 return recognizer_config;
@@ -507,14 +508,15 @@ static sherpa_onnx::OfflineRecognizerConfig GetOfflineRecognizerConfig( @@ -507,14 +508,15 @@ static sherpa_onnx::OfflineRecognizerConfig GetOfflineRecognizerConfig(
507 recognizer_config.hr.rule_fsts = SHERPA_ONNX_OR(config->hr.rule_fsts, ""); 508 recognizer_config.hr.rule_fsts = SHERPA_ONNX_OR(config->hr.rule_fsts, "");
508 509
509 if (config->model_config.debug) { 510 if (config->model_config.debug) {
  511 +#if __OHOS__
510 auto str_vec = sherpa_onnx::SplitString(recognizer_config.ToString(), 128); 512 auto str_vec = sherpa_onnx::SplitString(recognizer_config.ToString(), 128);
511 for (const auto &s : str_vec) { 513 for (const auto &s : str_vec) {
512 -#if __OHOS__  
513 SHERPA_ONNX_LOGE("%{public}s\n", s.c_str()); 514 SHERPA_ONNX_LOGE("%{public}s\n", s.c_str());
514 -#else  
515 SHERPA_ONNX_LOGE("%s\n", s.c_str()); 515 SHERPA_ONNX_LOGE("%s\n", s.c_str());
516 -#endif  
517 } 516 }
  517 +#else
  518 + SHERPA_ONNX_LOGE("%s", recognizer_config.ToString().c_str());
  519 +#endif
518 } 520 }
519 521
520 return recognizer_config; 522 return recognizer_config;
@@ -55,6 +55,8 @@ set(sources @@ -55,6 +55,8 @@ set(sources
55 offline-source-separation-model-config.cc 55 offline-source-separation-model-config.cc
56 offline-source-separation-spleeter-model-config.cc 56 offline-source-separation-spleeter-model-config.cc
57 offline-source-separation-spleeter-model.cc 57 offline-source-separation-spleeter-model.cc
  58 + offline-source-separation-uvr-model-config.cc
  59 + offline-source-separation-uvr-model.cc
58 offline-source-separation.cc 60 offline-source-separation.cc
59 61
60 offline-stream.cc 62 offline-stream.cc
@@ -25,9 +25,7 @@ Microphone::~Microphone() { @@ -25,9 +25,7 @@ Microphone::~Microphone() {
25 } 25 }
26 } 26 }
27 27
28 -int Microphone::GetDeviceCount() const {  
29 - return Pa_GetDeviceCount();  
30 -} 28 +int Microphone::GetDeviceCount() const { return Pa_GetDeviceCount(); }
31 29
32 int Microphone::GetDefaultInputDevice() const { 30 int Microphone::GetDefaultInputDevice() const {
33 return Pa_GetDefaultInputDevice(); 31 return Pa_GetDefaultInputDevice();
@@ -43,7 +41,8 @@ void Microphone::PrintDevices(int device_index) const { @@ -43,7 +41,8 @@ void Microphone::PrintDevices(int device_index) const {
43 } 41 }
44 } 42 }
45 43
46 -bool Microphone::OpenDevice(int index, int sample_rate, int channel, PaStreamCallback cb, void* userdata) { 44 +bool Microphone::OpenDevice(int index, int sample_rate, int channel,
  45 + PaStreamCallback cb, void *userdata) {
47 if (index < 0 || index >= Pa_GetDeviceCount()) { 46 if (index < 0 || index >= Pa_GetDeviceCount()) {
48 fprintf(stderr, "Invalid device index: %d\n", index); 47 fprintf(stderr, "Invalid device index: %d\n", index);
49 return false; 48 return false;
@@ -68,7 +67,8 @@ bool Microphone::OpenDevice(int index, int sample_rate, int channel, PaStreamCal @@ -68,7 +67,8 @@ bool Microphone::OpenDevice(int index, int sample_rate, int channel, PaStreamCal
68 param.suggestedLatency = info->defaultLowInputLatency; 67 param.suggestedLatency = info->defaultLowInputLatency;
69 param.hostApiSpecificStreamInfo = nullptr; 68 param.hostApiSpecificStreamInfo = nullptr;
70 69
71 - PaError err = Pa_OpenStream(&stream, &param, nullptr, /* &outputParameters, */ 70 + PaError err =
  71 + Pa_OpenStream(&stream, &param, nullptr, /* &outputParameters, */
72 sample_rate, 72 sample_rate,
73 0, // frames per buffer 73 0, // frames per buffer
74 paClipOff, // we won't output out of range samples 74 paClipOff, // we won't output out of range samples
@@ -4,22 +4,27 @@ @@ -4,22 +4,27 @@
4 4
5 #ifndef SHERPA_ONNX_CSRC_MICROPHONE_H_ 5 #ifndef SHERPA_ONNX_CSRC_MICROPHONE_H_
6 #define SHERPA_ONNX_CSRC_MICROPHONE_H_ 6 #define SHERPA_ONNX_CSRC_MICROPHONE_H_
7 -#include "portaudio.h" // NOLINT 7 +#include <cstdint>
8 8
  9 +#include "portaudio.h" // NOLINT
9 namespace sherpa_onnx { 10 namespace sherpa_onnx {
10 11
11 class Microphone { 12 class Microphone {
12 - PaStream *stream = nullptr;  
13 public: 13 public:
14 Microphone(); 14 Microphone();
15 ~Microphone(); 15 ~Microphone();
16 16
17 - int GetDeviceCount() const;  
18 - int GetDefaultInputDevice() const;  
19 - void PrintDevices(int sel) const;  
20 -  
21 - bool OpenDevice(int index, int sample_rate, int channel, PaStreamCallback cb, void* userdata); 17 + int32_t GetDeviceCount() const;
  18 + int32_t GetDefaultInputDevice() const;
  19 + void PrintDevices(int32_t sel) const;
  20 +
  21 + bool OpenDevice(int32_t index, int32_t sample_rate, int32_t channel,
  22 + PaStreamCallback cb, void *userdata);
  23 +
22 void CloseDevice(); 24 void CloseDevice();
  25 +
  26 + private:
  27 + PaStream *stream = nullptr;
23 }; 28 };
24 29
25 } // namespace sherpa_onnx 30 } // namespace sherpa_onnx
@@ -4,7 +4,9 @@ @@ -4,7 +4,9 @@
4 4
5 #include "sherpa-onnx/csrc/offline-source-separation-impl.h" 5 #include "sherpa-onnx/csrc/offline-source-separation-impl.h"
6 6
  7 +#include <algorithm>
7 #include <memory> 8 #include <memory>
  9 +#include <utility>
8 10
9 #if __ANDROID_API__ >= 9 11 #if __ANDROID_API__ >= 9
10 #include "android/asset_manager.h" 12 #include "android/asset_manager.h"
@@ -16,22 +18,93 @@ @@ -16,22 +18,93 @@
16 #endif 18 #endif
17 19
18 #include "sherpa-onnx/csrc/offline-source-separation-spleeter-impl.h" 20 #include "sherpa-onnx/csrc/offline-source-separation-spleeter-impl.h"
  21 +#include "sherpa-onnx/csrc/offline-source-separation-uvr-impl.h"
  22 +#include "sherpa-onnx/csrc/resample.h"
19 23
20 namespace sherpa_onnx { 24 namespace sherpa_onnx {
21 25
22 std::unique_ptr<OfflineSourceSeparationImpl> 26 std::unique_ptr<OfflineSourceSeparationImpl>
23 OfflineSourceSeparationImpl::Create( 27 OfflineSourceSeparationImpl::Create(
24 const OfflineSourceSeparationConfig &config) { 28 const OfflineSourceSeparationConfig &config) {
25 - // TODO(fangjun): Support other models  
26 - return std::make_unique<OfflineSourceSeparationSpleeterImpl>(config); 29 + if (!config.model.spleeter.vocals.empty()) {
  30 + return std::make_unique<OfflineSourceSeparationSpleeterImpl>(config);
  31 + }
  32 +
  33 + if (!config.model.uvr.model.empty()) {
  34 + return std::make_unique<OfflineSourceSeparationUvrImpl>(config);
  35 + }
  36 +
  37 + SHERPA_ONNX_LOGE("Please provide a separation model!");
  38 +
  39 + return nullptr;
27 } 40 }
28 41
29 template <typename Manager> 42 template <typename Manager>
30 std::unique_ptr<OfflineSourceSeparationImpl> 43 std::unique_ptr<OfflineSourceSeparationImpl>
31 OfflineSourceSeparationImpl::Create( 44 OfflineSourceSeparationImpl::Create(
32 Manager *mgr, const OfflineSourceSeparationConfig &config) { 45 Manager *mgr, const OfflineSourceSeparationConfig &config) {
33 - // TODO(fangjun): Support other models  
34 - return std::make_unique<OfflineSourceSeparationSpleeterImpl>(mgr, config); 46 + if (!config.model.spleeter.vocals.empty()) {
  47 + return std::make_unique<OfflineSourceSeparationSpleeterImpl>(mgr, config);
  48 + }
  49 +
  50 + if (!config.model.uvr.model.empty()) {
  51 + return std::make_unique<OfflineSourceSeparationUvrImpl>(mgr, config);
  52 + }
  53 +
  54 + SHERPA_ONNX_LOGE("Please provide a separation model!");
  55 +
  56 + return nullptr;
  57 +}
  58 +
  59 +OfflineSourceSeparationInput OfflineSourceSeparationImpl::Resample(
  60 + const OfflineSourceSeparationInput &input, bool debug /*= false*/) const {
  61 + const OfflineSourceSeparationInput *p_input = &input;
  62 + OfflineSourceSeparationInput tmp_input;
  63 +
  64 + int32_t output_sample_rate = GetOutputSampleRate();
  65 +
  66 + if (input.sample_rate != output_sample_rate) {
  67 + SHERPA_ONNX_LOGE(
  68 + "Creating a resampler:\n"
  69 + " in_sample_rate: %d\n"
  70 + " output_sample_rate: %d\n",
  71 + input.sample_rate, output_sample_rate);
  72 +
  73 + float min_freq = std::min<int32_t>(input.sample_rate, output_sample_rate);
  74 + float lowpass_cutoff = 0.99 * 0.5 * min_freq;
  75 +
  76 + int32_t lowpass_filter_width = 6;
  77 + auto resampler =
  78 + std::make_unique<LinearResample>(input.sample_rate, output_sample_rate,
  79 + lowpass_cutoff, lowpass_filter_width);
  80 +
  81 + std::vector<float> s;
  82 + for (const auto &samples : input.samples.data) {
  83 + resampler->Reset();
  84 + resampler->Resample(samples.data(), samples.size(), true, &s);
  85 + tmp_input.samples.data.push_back(std::move(s));
  86 + }
  87 +
  88 + tmp_input.sample_rate = output_sample_rate;
  89 + p_input = &tmp_input;
  90 + }
  91 +
  92 + if (p_input->samples.data.size() > 1) {
  93 + if (debug) {
  94 + SHERPA_ONNX_LOGE("input ch1 samples size: %d",
  95 + static_cast<int32_t>(p_input->samples.data[1].size()));
  96 + }
  97 +
  98 + if (p_input->samples.data[0].size() != p_input->samples.data[1].size()) {
  99 + SHERPA_ONNX_LOGE("ch0 samples size %d vs ch1 samples size %d",
  100 + static_cast<int32_t>(p_input->samples.data[0].size()),
  101 + static_cast<int32_t>(p_input->samples.data[1].size()));
  102 +
  103 + SHERPA_ONNX_EXIT(-1);
  104 + }
  105 + }
  106 +
  107 + return *p_input;
35 } 108 }
36 109
37 #if __ANDROID_API__ >= 9 110 #if __ANDROID_API__ >= 9
@@ -5,6 +5,7 @@ @@ -5,6 +5,7 @@
5 #ifndef SHERPA_ONNX_CSRC_OFFLINE_SOURCE_SEPARATION_IMPL_H_ 5 #ifndef SHERPA_ONNX_CSRC_OFFLINE_SOURCE_SEPARATION_IMPL_H_
6 #define SHERPA_ONNX_CSRC_OFFLINE_SOURCE_SEPARATION_IMPL_H_ 6 #define SHERPA_ONNX_CSRC_OFFLINE_SOURCE_SEPARATION_IMPL_H_
7 7
  8 +#include <memory>
8 #include <vector> 9 #include <vector>
9 10
10 #include "sherpa-onnx/csrc/offline-source-separation.h" 11 #include "sherpa-onnx/csrc/offline-source-separation.h"
@@ -28,6 +29,9 @@ class OfflineSourceSeparationImpl { @@ -28,6 +29,9 @@ class OfflineSourceSeparationImpl {
28 virtual int32_t GetOutputSampleRate() const = 0; 29 virtual int32_t GetOutputSampleRate() const = 0;
29 30
30 virtual int32_t GetNumberOfStems() const = 0; 31 virtual int32_t GetNumberOfStems() const = 0;
  32 +
  33 + OfflineSourceSeparationInput Resample(
  34 + const OfflineSourceSeparationInput &input, bool debug = false) const;
31 }; 35 };
32 36
33 } // namespace sherpa_onnx 37 } // namespace sherpa_onnx
@@ -4,10 +4,13 @@ @@ -4,10 +4,13 @@
4 4
5 #include "sherpa-onnx/csrc/offline-source-separation-model-config.h" 5 #include "sherpa-onnx/csrc/offline-source-separation-model-config.h"
6 6
  7 +#include "sherpa-onnx/csrc/macros.h"
  8 +
7 namespace sherpa_onnx { 9 namespace sherpa_onnx {
8 10
9 void OfflineSourceSeparationModelConfig::Register(ParseOptions *po) { 11 void OfflineSourceSeparationModelConfig::Register(ParseOptions *po) {
10 spleeter.Register(po); 12 spleeter.Register(po);
  13 + uvr.Register(po);
11 14
12 po->Register("num-threads", &num_threads, 15 po->Register("num-threads", &num_threads,
13 "Number of threads to run the neural network"); 16 "Number of threads to run the neural network");
@@ -20,7 +23,17 @@ void OfflineSourceSeparationModelConfig::Register(ParseOptions *po) { @@ -20,7 +23,17 @@ void OfflineSourceSeparationModelConfig::Register(ParseOptions *po) {
20 } 23 }
21 24
22 bool OfflineSourceSeparationModelConfig::Validate() const { 25 bool OfflineSourceSeparationModelConfig::Validate() const {
23 - return spleeter.Validate(); 26 + if (!spleeter.vocals.empty()) {
  27 + return spleeter.Validate();
  28 + }
  29 +
  30 + if (!uvr.model.empty()) {
  31 + return uvr.Validate();
  32 + }
  33 +
  34 + SHERPA_ONNX_LOGE("Please specify a source separation model");
  35 +
  36 + return false;
24 } 37 }
25 38
26 std::string OfflineSourceSeparationModelConfig::ToString() const { 39 std::string OfflineSourceSeparationModelConfig::ToString() const {
@@ -28,6 +41,7 @@ std::string OfflineSourceSeparationModelConfig::ToString() const { @@ -28,6 +41,7 @@ std::string OfflineSourceSeparationModelConfig::ToString() const {
28 41
29 os << "OfflineSourceSeparationModelConfig("; 42 os << "OfflineSourceSeparationModelConfig(";
30 os << "spleeter=" << spleeter.ToString() << ", "; 43 os << "spleeter=" << spleeter.ToString() << ", ";
  44 + os << "uvr=" << uvr.ToString() << ", ";
31 os << "num_threads=" << num_threads << ", "; 45 os << "num_threads=" << num_threads << ", ";
32 os << "debug=" << (debug ? "True" : "False") << ", "; 46 os << "debug=" << (debug ? "True" : "False") << ", ";
33 os << "provider=\"" << provider << "\")"; 47 os << "provider=\"" << provider << "\")";
@@ -8,12 +8,14 @@ @@ -8,12 +8,14 @@
8 #include <string> 8 #include <string>
9 9
10 #include "sherpa-onnx/csrc/offline-source-separation-spleeter-model-config.h" 10 #include "sherpa-onnx/csrc/offline-source-separation-spleeter-model-config.h"
  11 +#include "sherpa-onnx/csrc/offline-source-separation-uvr-model-config.h"
11 #include "sherpa-onnx/csrc/parse-options.h" 12 #include "sherpa-onnx/csrc/parse-options.h"
12 13
13 namespace sherpa_onnx { 14 namespace sherpa_onnx {
14 15
15 struct OfflineSourceSeparationModelConfig { 16 struct OfflineSourceSeparationModelConfig {
16 OfflineSourceSeparationSpleeterModelConfig spleeter; 17 OfflineSourceSeparationSpleeterModelConfig spleeter;
  18 + OfflineSourceSeparationUvrModelConfig uvr;
17 19
18 int32_t num_threads = 1; 20 int32_t num_threads = 1;
19 bool debug = false; 21 bool debug = false;
@@ -23,8 +25,10 @@ struct OfflineSourceSeparationModelConfig { @@ -23,8 +25,10 @@ struct OfflineSourceSeparationModelConfig {
23 25
24 OfflineSourceSeparationModelConfig( 26 OfflineSourceSeparationModelConfig(
25 const OfflineSourceSeparationSpleeterModelConfig &spleeter, 27 const OfflineSourceSeparationSpleeterModelConfig &spleeter,
26 - int32_t num_threads, bool debug, const std::string &provider) 28 + const OfflineSourceSeparationUvrModelConfig &uvr, int32_t num_threads,
  29 + bool debug, const std::string &provider)
27 : spleeter(spleeter), 30 : spleeter(spleeter),
  31 + uvr(uvr),
28 num_threads(num_threads), 32 num_threads(num_threads),
29 debug(debug), 33 debug(debug),
30 provider(provider) {} 34 provider(provider) {}
@@ -5,6 +5,10 @@ @@ -5,6 +5,10 @@
5 #ifndef SHERPA_ONNX_CSRC_OFFLINE_SOURCE_SEPARATION_SPLEETER_IMPL_H_ 5 #ifndef SHERPA_ONNX_CSRC_OFFLINE_SOURCE_SEPARATION_SPLEETER_IMPL_H_
6 #define SHERPA_ONNX_CSRC_OFFLINE_SOURCE_SEPARATION_SPLEETER_IMPL_H_ 6 #define SHERPA_ONNX_CSRC_OFFLINE_SOURCE_SEPARATION_SPLEETER_IMPL_H_
7 7
  8 +#include <algorithm>
  9 +#include <utility>
  10 +#include <vector>
  11 +
8 #include "Eigen/Dense" 12 #include "Eigen/Dense"
9 #include "kaldi-native-fbank/csrc/istft.h" 13 #include "kaldi-native-fbank/csrc/istft.h"
10 #include "kaldi-native-fbank/csrc/stft.h" 14 #include "kaldi-native-fbank/csrc/stft.h"
@@ -12,13 +16,12 @@ @@ -12,13 +16,12 @@
12 #include "sherpa-onnx/csrc/offline-source-separation-spleeter-model.h" 16 #include "sherpa-onnx/csrc/offline-source-separation-spleeter-model.h"
13 #include "sherpa-onnx/csrc/offline-source-separation.h" 17 #include "sherpa-onnx/csrc/offline-source-separation.h"
14 #include "sherpa-onnx/csrc/onnx-utils.h" 18 #include "sherpa-onnx/csrc/onnx-utils.h"
15 -#include "sherpa-onnx/csrc/resample.h"  
16 19
17 namespace sherpa_onnx { 20 namespace sherpa_onnx {
18 21
19 class OfflineSourceSeparationSpleeterImpl : public OfflineSourceSeparationImpl { 22 class OfflineSourceSeparationSpleeterImpl : public OfflineSourceSeparationImpl {
20 public: 23 public:
21 - OfflineSourceSeparationSpleeterImpl( 24 + explicit OfflineSourceSeparationSpleeterImpl(
22 const OfflineSourceSeparationConfig &config) 25 const OfflineSourceSeparationConfig &config)
23 : config_(config), model_(config_.model) {} 26 : config_(config), model_(config_.model) {}
24 27
@@ -28,56 +31,12 @@ class OfflineSourceSeparationSpleeterImpl : public OfflineSourceSeparationImpl { @@ -28,56 +31,12 @@ class OfflineSourceSeparationSpleeterImpl : public OfflineSourceSeparationImpl {
28 : config_(config), model_(mgr, config_.model) {} 31 : config_(config), model_(mgr, config_.model) {}
29 32
30 OfflineSourceSeparationOutput Process( 33 OfflineSourceSeparationOutput Process(
31 - const OfflineSourceSeparationInput &input) const override {  
32 - const OfflineSourceSeparationInput *p_input = &input;  
33 - OfflineSourceSeparationInput tmp_input;  
34 -  
35 - int32_t output_sample_rate = GetOutputSampleRate();  
36 -  
37 - if (input.sample_rate != output_sample_rate) {  
38 - SHERPA_ONNX_LOGE(  
39 - "Creating a resampler:\n"  
40 - " in_sample_rate: %d\n"  
41 - " output_sample_rate: %d\n",  
42 - input.sample_rate, output_sample_rate);  
43 -  
44 - float min_freq = std::min<int32_t>(input.sample_rate, output_sample_rate);  
45 - float lowpass_cutoff = 0.99 * 0.5 * min_freq;  
46 -  
47 - int32_t lowpass_filter_width = 6;  
48 - auto resampler = std::make_unique<LinearResample>(  
49 - input.sample_rate, output_sample_rate, lowpass_cutoff,  
50 - lowpass_filter_width);  
51 -  
52 - std::vector<float> s;  
53 - for (const auto &samples : input.samples.data) {  
54 - resampler->Reset();  
55 - resampler->Resample(samples.data(), samples.size(), true, &s);  
56 - tmp_input.samples.data.push_back(std::move(s));  
57 - }  
58 -  
59 - tmp_input.sample_rate = output_sample_rate;  
60 - p_input = &tmp_input;  
61 - }  
62 -  
63 - if (p_input->samples.data.size() > 1) {  
64 - if (config_.model.debug) {  
65 - SHERPA_ONNX_LOGE("input ch1 samples size: %d",  
66 - static_cast<int32_t>(p_input->samples.data[1].size()));  
67 - }  
68 -  
69 - if (p_input->samples.data[0].size() != p_input->samples.data[1].size()) {  
70 - SHERPA_ONNX_LOGE("ch0 samples size %d vs ch1 samples size %d",  
71 - static_cast<int32_t>(p_input->samples.data[0].size()),  
72 - static_cast<int32_t>(p_input->samples.data[1].size()));  
73 -  
74 - SHERPA_ONNX_EXIT(-1);  
75 - }  
76 - } 34 + const OfflineSourceSeparationInput &_input) const override {
  35 + auto input = Resample(_input, config_.model.debug);
77 36
78 - auto stft_ch0 = ComputeStft(*p_input, 0); 37 + auto stft_ch0 = ComputeStft(input, 0);
79 38
80 - auto stft_ch1 = ComputeStft(*p_input, 1); 39 + auto stft_ch1 = ComputeStft(input, 1);
81 knf::StftResult *p_stft_ch1 = stft_ch1.real.empty() ? &stft_ch0 : &stft_ch1; 40 knf::StftResult *p_stft_ch1 = stft_ch1.real.empty() ? &stft_ch0 : &stft_ch1;
82 41
83 int32_t num_frames = stft_ch0.num_frames; 42 int32_t num_frames = stft_ch0.num_frames;
@@ -261,7 +220,6 @@ class OfflineSourceSeparationSpleeterImpl : public OfflineSourceSeparationImpl { @@ -261,7 +220,6 @@ class OfflineSourceSeparationSpleeterImpl : public OfflineSourceSeparationImpl {
261 stft_config.win_length = meta.window_length; 220 stft_config.win_length = meta.window_length;
262 stft_config.window_type = meta.window_type; 221 stft_config.window_type = meta.window_type;
263 stft_config.center = meta.center; 222 stft_config.center = meta.center;
264 - stft_config.center = false;  
265 223
266 return stft_config; 224 return stft_config;
267 } 225 }
1 -// sherpa-onnx/csrc/offline-source-separation-spleeter_model-config.cc 1 +// sherpa-onnx/csrc/offline-source-separation-spleeter-model-config.cc
2 // 2 //
3 // Copyright (c) 2025 Xiaomi Corporation 3 // Copyright (c) 2025 Xiaomi Corporation
4 4
1 -// sherpa-onnx/csrc/offline-source-separation-spleeter_model-config.h 1 +// sherpa-onnx/csrc/offline-source-separation-spleeter-model-config.h
2 // 2 //
3 // Copyright (c) 2025 Xiaomi Corporation 3 // Copyright (c) 2025 Xiaomi Corporation
4 4
1 // sherpa-onnx/csrc/offline-source-separation-spleeter-model-meta-data.h 1 // sherpa-onnx/csrc/offline-source-separation-spleeter-model-meta-data.h
2 // 2 //
3 -// Copyright (c) 2024 Xiaomi Corporation 3 +// Copyright (c) 2025 Xiaomi Corporation
4 #ifndef SHERPA_ONNX_CSRC_OFFLINE_SOURCE_SEPARATION_SPLEETER_MODEL_META_DATA_H_ 4 #ifndef SHERPA_ONNX_CSRC_OFFLINE_SOURCE_SEPARATION_SPLEETER_MODEL_META_DATA_H_
5 #define SHERPA_ONNX_CSRC_OFFLINE_SOURCE_SEPARATION_SPLEETER_MODEL_META_DATA_H_ 5 #define SHERPA_ONNX_CSRC_OFFLINE_SOURCE_SEPARATION_SPLEETER_MODEL_META_DATA_H_
6 6
  1 +// sherpa-onnx/csrc/offline-source-separation-uvr-impl.h
  2 +//
  3 +// Copyright (c) 2025 Xiaomi Corporation
  4 +
  5 +#ifndef SHERPA_ONNX_CSRC_OFFLINE_SOURCE_SEPARATION_UVR_IMPL_H_
  6 +#define SHERPA_ONNX_CSRC_OFFLINE_SOURCE_SEPARATION_UVR_IMPL_H_
  7 +
  8 +#include <algorithm>
  9 +#include <utility>
  10 +#include <vector>
  11 +
  12 +#include "Eigen/Dense"
  13 +#include "kaldi-native-fbank/csrc/istft.h"
  14 +#include "kaldi-native-fbank/csrc/stft.h"
  15 +#include "sherpa-onnx/csrc/macros.h"
  16 +#include "sherpa-onnx/csrc/offline-source-separation-uvr-model.h"
  17 +#include "sherpa-onnx/csrc/offline-source-separation.h"
  18 +#include "sherpa-onnx/csrc/onnx-utils.h"
  19 +#include "sherpa-onnx/csrc/resample.h"
  20 +
  21 +namespace sherpa_onnx {
  22 +
  23 +class OfflineSourceSeparationUvrImpl : public OfflineSourceSeparationImpl {
  24 + public:
  25 + explicit OfflineSourceSeparationUvrImpl(
  26 + const OfflineSourceSeparationConfig &config)
  27 + : config_(config), model_(config_.model) {}
  28 +
  29 + template <typename Manager>
  30 + OfflineSourceSeparationUvrImpl(Manager *mgr,
  31 + const OfflineSourceSeparationConfig &config)
  32 + : config_(config), model_(mgr, config_.model) {}
  33 +
  34 + OfflineSourceSeparationOutput Process(
  35 + const OfflineSourceSeparationInput &_input) const override {
  36 + auto input = Resample(_input, config_.model.debug);
  37 +
  38 + auto chunks_ch0 = SplitIntoChunks(input.samples.data[0]);
  39 +
  40 + std::vector<std::vector<float>> chunks_ch1;
  41 + if (input.samples.data.size() > 1) {
  42 + chunks_ch1 = SplitIntoChunks(input.samples.data[1]);
  43 + }
  44 +
  45 + std::vector<float> samples_ch0;
  46 + std::vector<float> samples_ch1;
  47 +
  48 + for (int32_t i = 0; i != static_cast<int32_t>(chunks_ch0.size()); ++i) {
  49 + bool is_first_chunk = (i == 0);
  50 + bool is_last_chunk = (i == static_cast<int32_t>(chunks_ch0.size()) - 1);
  51 +
  52 + auto s = ProcessChunk(
  53 + chunks_ch0[i],
  54 + chunks_ch1.empty() ? std::vector<float>{} : chunks_ch1[i],
  55 + is_first_chunk, is_last_chunk);
  56 +
  57 + samples_ch0.insert(samples_ch0.end(), s.first.begin(), s.first.end());
  58 + samples_ch1.insert(samples_ch1.end(), s.second.begin(), s.second.end());
  59 + }
  60 +
  61 + auto &vocals_ch0 = samples_ch0;
  62 + auto &vocals_ch1 = samples_ch1;
  63 +
  64 + std::vector<float> non_vocals_ch0(vocals_ch0.size());
  65 + std::vector<float> non_vocals_ch1(vocals_ch1.size());
  66 +
  67 + Eigen::Map<Eigen::VectorXf>(non_vocals_ch0.data(), non_vocals_ch0.size()) =
  68 + Eigen::Map<Eigen::VectorXf>(input.samples.data[0].data(),
  69 + input.samples.data[0].size())
  70 + .array() -
  71 + Eigen::Map<Eigen::VectorXf>(vocals_ch0.data(), vocals_ch0.size())
  72 + .array();
  73 +
  74 + if (input.samples.data.size() > 1) {
  75 + Eigen::Map<Eigen::VectorXf>(non_vocals_ch1.data(),
  76 + non_vocals_ch1.size()) =
  77 + Eigen::Map<Eigen::VectorXf>(input.samples.data[1].data(),
  78 + input.samples.data[1].size())
  79 + .array() -
  80 + Eigen::Map<Eigen::VectorXf>(vocals_ch1.data(), vocals_ch1.size())
  81 + .array();
  82 + } else {
  83 + Eigen::Map<Eigen::VectorXf>(non_vocals_ch1.data(),
  84 + non_vocals_ch1.size()) =
  85 + Eigen::Map<Eigen::VectorXf>(input.samples.data[0].data(),
  86 + input.samples.data[0].size())
  87 + .array() -
  88 + Eigen::Map<Eigen::VectorXf>(vocals_ch1.data(), vocals_ch1.size())
  89 + .array();
  90 + }
  91 +
  92 + OfflineSourceSeparationOutput ans;
  93 + ans.sample_rate = GetOutputSampleRate();
  94 +
  95 + ans.stems.resize(2);
  96 + ans.stems[0].data.reserve(2);
  97 + ans.stems[1].data.reserve(2);
  98 +
  99 + ans.stems[0].data.push_back(std::move(vocals_ch0));
  100 + ans.stems[0].data.push_back(std::move(vocals_ch1));
  101 +
  102 + ans.stems[1].data.push_back(std::move(non_vocals_ch0));
  103 + ans.stems[1].data.push_back(std::move(non_vocals_ch1));
  104 +
  105 + return ans;
  106 + }
  107 +
  108 + int32_t GetOutputSampleRate() const override {
  109 + return model_.GetMetaData().sample_rate;
  110 + }
  111 +
  112 + int32_t GetNumberOfStems() const override {
  113 + return model_.GetMetaData().num_stems;
  114 + }
  115 +
  116 + private:
  117 + std::pair<std::vector<float>, std::vector<float>> ProcessChunk(
  118 + const std::vector<float> &chunk_ch0, const std::vector<float> &chunk_ch1,
  119 + bool is_first_chunk, bool is_last_chunk) const {
  120 + int32_t pad0 = 0;
  121 +
  122 + auto stft_results_ch0 = ComputeStft(chunk_ch0, &pad0);
  123 +
  124 + int32_t pad1 = pad0;
  125 + std::vector<knf::StftResult> stft_results_ch1;
  126 +
  127 + if (!chunk_ch1.empty()) {
  128 + stft_results_ch1 = ComputeStft(chunk_ch1, &pad1);
  129 + } else {
  130 + stft_results_ch1 = stft_results_ch0;
  131 + }
  132 +
  133 + const auto &meta_ = model_.GetMetaData();
  134 +
  135 + int32_t num_frames = stft_results_ch0[0].num_frames;
  136 + int32_t dim_f = meta_.dim_f;
  137 + int32_t dim_t = meta_.dim_t;
  138 + int32_t n_fft_bin = meta_.n_fft / 2 + 1;
  139 + if (num_frames != dim_t) {
  140 + SHERPA_ONNX_LOGE("num_frames(%d) != dim_t(%d)", num_frames, dim_t);
  141 + SHERPA_ONNX_EXIT(-1);
  142 + }
  143 +
  144 + // the first 2: number of channels
  145 + // the second 2: real and image
  146 + std::vector<float> x(stft_results_ch0.size() * 2 * 2 * dim_f * dim_t);
  147 + float *px = x.data();
  148 +
  149 + for (int32_t i = 0; i != static_cast<int32_t>(stft_results_ch0.size());
  150 + ++i) {
  151 + const auto &ch0 = stft_results_ch0[i];
  152 + const auto &ch1 = stft_results_ch1[i];
  153 +
  154 + const float *p_real_ch0 = ch0.real.data();
  155 + const float *p_imag_ch0 = ch0.imag.data();
  156 +
  157 + const float *p_real_ch1 = ch1.real.data();
  158 + const float *p_imag_ch1 = ch1.imag.data();
  159 +
  160 + for (int32_t j = 0; j != dim_f; ++j) {
  161 + for (int32_t k = 0; k != num_frames; ++k) {
  162 + *px = p_real_ch0[k * n_fft_bin + j];
  163 + ++px;
  164 + }
  165 + }
  166 +
  167 + for (int32_t j = 0; j != dim_f; ++j) {
  168 + for (int32_t k = 0; k != num_frames; ++k) {
  169 + *px = p_imag_ch0[k * n_fft_bin + j];
  170 + ++px;
  171 + }
  172 + }
  173 +
  174 + for (int32_t j = 0; j != dim_f; ++j) {
  175 + for (int32_t k = 0; k != num_frames; ++k) {
  176 + *px = p_real_ch1[k * n_fft_bin + j];
  177 + ++px;
  178 + }
  179 + }
  180 +
  181 + for (int32_t j = 0; j != dim_f; ++j) {
  182 + for (int32_t k = 0; k != num_frames; ++k) {
  183 + *px = p_imag_ch1[k * n_fft_bin + j];
  184 + ++px;
  185 + }
  186 + }
  187 + } // for (int32_t i = 0; i !=
  188 +
  189 + auto memory_info =
  190 + Ort::MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeDefault);
  191 +
  192 + std::array<int64_t, 4> x_shape{
  193 + static_cast<int32_t>(stft_results_ch0.size()) * 4 / meta_.dim_c,
  194 + meta_.dim_c, dim_f, dim_t};
  195 +
  196 + Ort::Value x_tensor = Ort::Value::CreateTensor(
  197 + memory_info, x.data(), x.size(), x_shape.data(), x_shape.size());
  198 +
  199 + Ort::Value spec = model_.Run(std::move(x_tensor));
  200 +
  201 + const float *p_spec = spec.GetTensorData<float>();
  202 +
  203 + for (int32_t i = 0; i != static_cast<int32_t>(stft_results_ch0.size());
  204 + ++i) {
  205 + auto &ch0 = stft_results_ch0[i];
  206 + auto &ch1 = stft_results_ch1[i];
  207 +
  208 + float *p_real_ch0 = ch0.real.data();
  209 + float *p_imag_ch0 = ch0.imag.data();
  210 +
  211 + float *p_real_ch1 = ch1.real.data();
  212 + float *p_imag_ch1 = ch1.imag.data();
  213 +
  214 + for (int32_t j = 0; j != dim_f; ++j) {
  215 + for (int32_t k = 0; k != num_frames; ++k) {
  216 + p_real_ch0[k * n_fft_bin + j] = *p_spec;
  217 + ++p_spec;
  218 + }
  219 + }
  220 +
  221 + for (int32_t j = 0; j != dim_f; ++j) {
  222 + for (int32_t k = 0; k != num_frames; ++k) {
  223 + p_imag_ch0[k * n_fft_bin + j] = *p_spec;
  224 + ++p_spec;
  225 + }
  226 + }
  227 +
  228 + for (int32_t j = 0; j != dim_f; ++j) {
  229 + for (int32_t k = 0; k != num_frames; ++k) {
  230 + p_real_ch1[k * n_fft_bin + j] = *p_spec;
  231 + ++p_spec;
  232 + }
  233 + }
  234 +
  235 + for (int32_t j = 0; j != dim_f; ++j) {
  236 + for (int32_t k = 0; k != num_frames; ++k) {
  237 + p_imag_ch1[k * n_fft_bin + j] = *p_spec;
  238 + ++p_spec;
  239 + }
  240 + }
  241 +
  242 + for (int32_t k = 0; k != num_frames; ++k) {
  243 + for (int32_t j = dim_f; j != n_fft_bin; ++j) {
  244 + p_real_ch0[k * n_fft_bin + j] = 0;
  245 + p_real_ch1[k * n_fft_bin + j] = 0;
  246 +
  247 + p_imag_ch0[k * n_fft_bin + j] = 0;
  248 + p_imag_ch1[k * n_fft_bin + j] = 0;
  249 + }
  250 + }
  251 + }
  252 +
  253 + auto samples_ch0 = ComputeInverseStft(stft_results_ch0, pad0,
  254 + is_first_chunk, is_last_chunk);
  255 +
  256 + auto samples_ch1 = ComputeInverseStft(stft_results_ch1, pad1,
  257 + is_first_chunk, is_last_chunk);
  258 +
  259 + return {std::move(samples_ch0), std::move(samples_ch1)};
  260 + }
  261 +
  262 + std::vector<float> ComputeInverseStft(
  263 + const std::vector<knf::StftResult> &stft_result, int32_t pad,
  264 + bool is_first_chunk, bool is_last_chunk) const {
  265 + const auto &meta_ = model_.GetMetaData();
  266 + int32_t trim = meta_.n_fft / 2;
  267 +
  268 + int32_t margin = meta_.margin;
  269 +
  270 + int32_t chunk_size = meta_.num_chunks * meta_.sample_rate;
  271 +
  272 + if (margin > chunk_size) {
  273 + margin = chunk_size;
  274 + }
  275 +
  276 + auto stft_config = GetStftConfig();
  277 + knf::IStft istft(stft_config);
  278 +
  279 + std::vector<float> ans;
  280 +
  281 + for (int32_t i = 0; i != static_cast<int32_t>(stft_result.size()); ++i) {
  282 + auto samples = istft.Compute(stft_result[i]);
  283 + int32_t num_samples = static_cast<int32_t>(samples.size());
  284 +
  285 + ans.insert(ans.end(), samples.begin() + trim,
  286 + samples.begin() + (num_samples - trim));
  287 + }
  288 +
  289 + int32_t start = is_first_chunk ? 0 : margin;
  290 + int32_t end =
  291 + is_last_chunk ? (ans.size() - pad) : (ans.size() - pad - margin);
  292 +
  293 + return {ans.begin() + start, ans.begin() + end};
  294 + }
  295 +
  296 + std::vector<knf::StftResult> ComputeStft(const std::vector<float> &chunk,
  297 + int32_t *pad) const {
  298 + const auto &meta_ = model_.GetMetaData();
  299 +
  300 + int32_t num_samples = static_cast<int32_t>(chunk.size());
  301 + int32_t trim = meta_.n_fft / 2;
  302 + int32_t chunk_size = meta_.hop_length * (meta_.dim_t - 1);
  303 + int32_t gen_size = chunk_size - 2 * trim;
  304 + *pad = gen_size - num_samples % gen_size;
  305 +
  306 + std::vector<float> samples(trim + chunk.size() + *pad + trim);
  307 + std::copy(chunk.begin(), chunk.end(), samples.begin() + trim);
  308 +
  309 + auto stft_config = GetStftConfig();
  310 + knf::Stft stft(stft_config);
  311 +
  312 + std::vector<knf::StftResult> stft_results;
  313 + // split the chunk into short segments
  314 + for (int32_t i = 0; i < num_samples + *pad; i += gen_size) {
  315 + auto r = stft.Compute(samples.data() + i, chunk_size);
  316 + stft_results.push_back(std::move(r));
  317 + }
  318 +
  319 + return stft_results;
  320 + }
  321 +
  322 + std::vector<std::vector<float>> SplitIntoChunks(
  323 + const std::vector<float> &samples) const {
  324 + std::vector<std::vector<float>> ans;
  325 +
  326 + if (samples.empty()) {
  327 + return ans;
  328 + }
  329 +
  330 + const auto &meta_ = model_.GetMetaData();
  331 + int32_t margin = meta_.margin;
  332 +
  333 + int32_t chunk_size = meta_.num_chunks * meta_.sample_rate;
  334 +
  335 + if (static_cast<int32_t>(samples.size()) < chunk_size) {
  336 + chunk_size = samples.size();
  337 + }
  338 +
  339 + if (margin > chunk_size) {
  340 + margin = chunk_size;
  341 + }
  342 +
  343 + for (int32_t i = 0; i < static_cast<int32_t>(samples.size());
  344 + i += chunk_size) {
  345 + int32_t start = std::max<int32_t>(0, i - margin);
  346 + int32_t end = std::min<int32_t>(i + chunk_size + margin,
  347 + static_cast<int32_t>(samples.size()));
  348 + if (start >= end) {
  349 + break;
  350 + }
  351 +
  352 + ans.emplace_back(samples.begin() + start, samples.begin() + end);
  353 +
  354 + if (end == static_cast<int32_t>(samples.size())) {
  355 + break;
  356 + }
  357 + }
  358 +
  359 + return ans;
  360 + }
  361 +
  362 + knf::StftConfig GetStftConfig() const {
  363 + const auto &meta = model_.GetMetaData();
  364 +
  365 + knf::StftConfig stft_config;
  366 + stft_config.n_fft = meta.n_fft;
  367 + stft_config.hop_length = meta.hop_length;
  368 + stft_config.win_length = meta.window_length;
  369 + stft_config.window_type = meta.window_type;
  370 + stft_config.center = meta.center;
  371 +
  372 + return stft_config;
  373 + }
  374 +
  375 + private:
  376 + OfflineSourceSeparationConfig config_;
  377 + OfflineSourceSeparationUvrModel model_;
  378 +};
  379 +
  380 +} // namespace sherpa_onnx
  381 +
  382 +#endif // SHERPA_ONNX_CSRC_OFFLINE_SOURCE_SEPARATION_UVR_IMPL_H_
  1 +// sherpa-onnx/csrc/offline-source-separation-uvr-model-config.cc
  2 +//
  3 +// Copyright (c) 2025 Xiaomi Corporation
  4 +
  5 +#include "sherpa-onnx/csrc/offline-source-separation-uvr-model-config.h"
  6 +
  7 +#include "sherpa-onnx/csrc/file-utils.h"
  8 +#include "sherpa-onnx/csrc/macros.h"
  9 +
  10 +namespace sherpa_onnx {
  11 +
  12 +void OfflineSourceSeparationUvrModelConfig::Register(ParseOptions *po) {
  13 + po->Register("uvr-model", &model, "Path to the UVR model");
  14 +}
  15 +
  16 +bool OfflineSourceSeparationUvrModelConfig::Validate() const {
  17 + if (model.empty()) {
  18 + SHERPA_ONNX_LOGE("Please provide --uvr-model");
  19 + return false;
  20 + }
  21 +
  22 + if (!FileExists(model)) {
  23 + SHERPA_ONNX_LOGE("UVR model '%s' does not exist. ", model.c_str());
  24 + return false;
  25 + }
  26 +
  27 + return true;
  28 +}
  29 +
  30 +std::string OfflineSourceSeparationUvrModelConfig::ToString() const {
  31 + std::ostringstream os;
  32 +
  33 + os << "OfflineSourceSeparationUvrModelConfig(";
  34 + os << "model=\"" << model << "\")";
  35 +
  36 + return os.str();
  37 +}
  38 +
  39 +} // namespace sherpa_onnx
  1 +// sherpa-onnx/csrc/offline-source-separation-uvr-model-config.h
  2 +//
  3 +// Copyright (c) 2025 Xiaomi Corporation
  4 +
  5 +#ifndef SHERPA_ONNX_CSRC_OFFLINE_SOURCE_SEPARATION_UVR_MODEL_CONFIG_H_
  6 +#define SHERPA_ONNX_CSRC_OFFLINE_SOURCE_SEPARATION_UVR_MODEL_CONFIG_H_
  7 +
  8 +#include <string>
  9 +
  10 +#include "sherpa-onnx/csrc/offline-source-separation-uvr-model-config.h"
  11 +#include "sherpa-onnx/csrc/parse-options.h"
  12 +
  13 +namespace sherpa_onnx {
  14 +
  15 +struct OfflineSourceSeparationUvrModelConfig {
  16 + std::string model;
  17 +
  18 + OfflineSourceSeparationUvrModelConfig() = default;
  19 +
  20 + explicit OfflineSourceSeparationUvrModelConfig(const std::string &model)
  21 + : model(model) {}
  22 +
  23 + void Register(ParseOptions *po);
  24 +
  25 + bool Validate() const;
  26 +
  27 + std::string ToString() const;
  28 +};
  29 +
  30 +} // namespace sherpa_onnx
  31 +
  32 +#endif // SHERPA_ONNX_CSRC_OFFLINE_SOURCE_SEPARATION_UVR_MODEL_CONFIG_H_
  1 +// sherpa-onnx/csrc/offline-source-separation-uvr-model-meta-data.h
  2 +//
  3 +// Copyright (c) 2025 Xiaomi Corporation
  4 +#ifndef SHERPA_ONNX_CSRC_OFFLINE_SOURCE_SEPARATION_UVR_MODEL_META_DATA_H_
  5 +#define SHERPA_ONNX_CSRC_OFFLINE_SOURCE_SEPARATION_UVR_MODEL_META_DATA_H_
  6 +
  7 +#include <string>
  8 +#include <unordered_map>
  9 +#include <vector>
  10 +
  11 +namespace sherpa_onnx {
  12 +
  13 +// See also
  14 +// https://github.com/k2-fsa/sherpa-onnx/blob/master/scripts/uvr_mdx/test.py
  15 +// https://github.com/k2-fsa/sherpa-onnx/blob/master/scripts/uvr_mdx/add_meta_data_and_quantize.py
  16 +struct OfflineSourceSeparationUvrModelMetaData {
  17 + int32_t sample_rate = 44100;
  18 + int32_t num_stems = 2;
  19 + int32_t dim_c = -1;
  20 + int32_t dim_f = -1;
  21 + int32_t dim_t = -1;
  22 +
  23 + int32_t n_fft = -1;
  24 + int32_t hop_length = 1024;
  25 +
  26 + int32_t window_length = -1;
  27 + int32_t center = 1;
  28 + std::string window_type = "hann";
  29 +
  30 + // the following fields are preconfigured. Please see
  31 + // https://github.com/k2-fsa/sherpa-onnx/blob/master/scripts/uvr_mdx/test.py
  32 + int32_t margin = 0; // changed in ./offline-source-separation-uvr-model.cc
  33 + const int32_t num_chunks = 15;
  34 +};
  35 +
  36 +} // namespace sherpa_onnx
  37 +
  38 +#endif // SHERPA_ONNX_CSRC_OFFLINE_SOURCE_SEPARATION_UVR_MODEL_META_DATA_H_
  1 +// sherpa-onnx/csrc/offline-source-separation-uvr-model.cc
  2 +//
  3 +// Copyright (c) 2025 Xiaomi Corporation
  4 +
  5 +#include "sherpa-onnx/csrc/offline-source-separation-uvr-model.h"
  6 +
  7 +#include <memory>
  8 +#include <string>
  9 +#include <utility>
  10 +#include <vector>
  11 +
  12 +#if __ANDROID_API__ >= 9
  13 +#include "android/asset_manager.h"
  14 +#include "android/asset_manager_jni.h"
  15 +#endif
  16 +
  17 +#if __OHOS__
  18 +#include "rawfile/raw_file_manager.h"
  19 +#endif
  20 +
  21 +#include "sherpa-onnx/csrc/file-utils.h"
  22 +#include "sherpa-onnx/csrc/onnx-utils.h"
  23 +#include "sherpa-onnx/csrc/session.h"
  24 +#include "sherpa-onnx/csrc/text-utils.h"
  25 +
  26 +namespace sherpa_onnx {
  27 +
  28 +class OfflineSourceSeparationUvrModel::Impl {
  29 + public:
  30 + explicit Impl(const OfflineSourceSeparationModelConfig &config)
  31 + : config_(config),
  32 + env_(ORT_LOGGING_LEVEL_ERROR),
  33 + sess_opts_(GetSessionOptions(config)),
  34 + allocator_{} {
  35 + auto buf = ReadFile(config.uvr.model);
  36 + Init(buf.data(), buf.size());
  37 + }
  38 +
  39 + template <typename Manager>
  40 + Impl(Manager *mgr, const OfflineSourceSeparationModelConfig &config)
  41 + : config_(config),
  42 + env_(ORT_LOGGING_LEVEL_ERROR),
  43 + sess_opts_(GetSessionOptions(config)),
  44 + allocator_{} {
  45 + auto buf = ReadFile(mgr, config.uvr.model);
  46 + Init(buf.data(), buf.size());
  47 + }
  48 +
  49 + const OfflineSourceSeparationUvrModelMetaData &GetMetaData() const {
  50 + return meta_;
  51 + }
  52 +
  53 + Ort::Value Run(Ort::Value x) const {
  54 + auto out = sess_->Run({}, input_names_ptr_.data(), &x, 1,
  55 + output_names_ptr_.data(), output_names_ptr_.size());
  56 + return std::move(out[0]);
  57 + }
  58 +
  59 + private:
  60 + void Init(void *model_data, size_t model_data_length) {
  61 + sess_ = std::make_unique<Ort::Session>(env_, model_data, model_data_length,
  62 + sess_opts_);
  63 +
  64 + GetInputNames(sess_.get(), &input_names_, &input_names_ptr_);
  65 +
  66 + GetOutputNames(sess_.get(), &output_names_, &output_names_ptr_);
  67 +
  68 + Ort::ModelMetadata meta_data = sess_->GetModelMetadata();
  69 + if (config_.debug) {
  70 + std::ostringstream os;
  71 + os << "---UVR model---\n";
  72 + PrintModelMetadata(os, meta_data);
  73 +
  74 + os << "----------input names----------\n";
  75 + int32_t i = 0;
  76 + for (const auto &s : input_names_) {
  77 + os << i << " " << s << "\n";
  78 + ++i;
  79 + }
  80 + os << "----------output names----------\n";
  81 + i = 0;
  82 + for (const auto &s : output_names_) {
  83 + os << i << " " << s << "\n";
  84 + ++i;
  85 + }
  86 +
  87 +#if __OHOS__
  88 + SHERPA_ONNX_LOGE("%{public}s\n", os.str().c_str());
  89 +#else
  90 + SHERPA_ONNX_LOGE("%s\n", os.str().c_str());
  91 +#endif
  92 + }
  93 +
  94 + Ort::AllocatorWithDefaultOptions allocator; // used in the macro below
  95 +
  96 + std::string model_type;
  97 + SHERPA_ONNX_READ_META_DATA_STR(model_type, "model_type");
  98 + if (model_type != "UVR") {
  99 + SHERPA_ONNX_LOGE("Expect model type 'UVR'. Given: '%s'",
  100 + model_type.c_str());
  101 + SHERPA_ONNX_EXIT(-1);
  102 + }
  103 +
  104 + SHERPA_ONNX_READ_META_DATA(meta_.num_stems, "stems");
  105 + if (meta_.num_stems != 2) {
  106 + SHERPA_ONNX_LOGE("Only 2stems is supported. Given %d stems",
  107 + meta_.num_stems);
  108 + SHERPA_ONNX_EXIT(-1);
  109 + }
  110 +
  111 + SHERPA_ONNX_READ_META_DATA(meta_.sample_rate, "sample_rate");
  112 + SHERPA_ONNX_READ_META_DATA(meta_.n_fft, "n_fft");
  113 + SHERPA_ONNX_READ_META_DATA(meta_.center, "center");
  114 + SHERPA_ONNX_READ_META_DATA(meta_.window_length, "win_length");
  115 + SHERPA_ONNX_READ_META_DATA(meta_.hop_length, "hop_length");
  116 + SHERPA_ONNX_READ_META_DATA(meta_.dim_t, "dim_t");
  117 + SHERPA_ONNX_READ_META_DATA(meta_.dim_f, "dim_f");
  118 + SHERPA_ONNX_READ_META_DATA(meta_.dim_c, "dim_c");
  119 + SHERPA_ONNX_READ_META_DATA_STR(meta_.window_type, "window_type");
  120 +
  121 + meta_.margin = meta_.sample_rate;
  122 + }
  123 +
  124 + private:
  125 + OfflineSourceSeparationModelConfig config_;
  126 + OfflineSourceSeparationUvrModelMetaData meta_;
  127 +
  128 + Ort::Env env_;
  129 + Ort::SessionOptions sess_opts_;
  130 + Ort::AllocatorWithDefaultOptions allocator_;
  131 +
  132 + std::unique_ptr<Ort::Session> sess_;
  133 +
  134 + std::vector<std::string> input_names_;
  135 + std::vector<const char *> input_names_ptr_;
  136 +
  137 + std::vector<std::string> output_names_;
  138 + std::vector<const char *> output_names_ptr_;
  139 +};
  140 +
  141 +OfflineSourceSeparationUvrModel::~OfflineSourceSeparationUvrModel() = default;
  142 +
  143 +OfflineSourceSeparationUvrModel::OfflineSourceSeparationUvrModel(
  144 + const OfflineSourceSeparationModelConfig &config)
  145 + : impl_(std::make_unique<Impl>(config)) {}
  146 +
  147 +template <typename Manager>
  148 +OfflineSourceSeparationUvrModel::OfflineSourceSeparationUvrModel(
  149 + Manager *mgr, const OfflineSourceSeparationModelConfig &config)
  150 + : impl_(std::make_unique<Impl>(mgr, config)) {}
  151 +
  152 +Ort::Value OfflineSourceSeparationUvrModel::Run(Ort::Value x) const {
  153 + return impl_->Run(std::move(x));
  154 +}
  155 +
  156 +const OfflineSourceSeparationUvrModelMetaData &
  157 +OfflineSourceSeparationUvrModel::GetMetaData() const {
  158 + return impl_->GetMetaData();
  159 +}
  160 +
  161 +#if __ANDROID_API__ >= 9
  162 +template OfflineSourceSeparationUvrModel::OfflineSourceSeparationUvrModel(
  163 + AAssetManager *mgr, const OfflineSourceSeparationModelConfig &config);
  164 +#endif
  165 +
  166 +#if __OHOS__
  167 +template OfflineSourceSeparationUvrModel::OfflineSourceSeparationUvrModel(
  168 + NativeResourceManager *mgr,
  169 + const OfflineSourceSeparationModelConfig &config);
  170 +#endif
  171 +
  172 +} // namespace sherpa_onnx
  1 +// sherpa-onnx/csrc/offline-source-separation-uvr-model.h
  2 +//
  3 +// Copyright (c) 2025 Xiaomi Corporation
  4 +#ifndef SHERPA_ONNX_CSRC_OFFLINE_SOURCE_SEPARATION_UVR_MODEL_H_
  5 +#define SHERPA_ONNX_CSRC_OFFLINE_SOURCE_SEPARATION_UVR_MODEL_H_
  6 +#include <memory>
  7 +
  8 +#include "onnxruntime_cxx_api.h" // NOLINT
  9 +#include "sherpa-onnx/csrc/offline-source-separation-model-config.h"
  10 +#include "sherpa-onnx/csrc/offline-source-separation-uvr-model-meta-data.h"
  11 +
  12 +namespace sherpa_onnx {
  13 +
  14 +class OfflineSourceSeparationUvrModel {
  15 + public:
  16 + ~OfflineSourceSeparationUvrModel();
  17 +
  18 + explicit OfflineSourceSeparationUvrModel(
  19 + const OfflineSourceSeparationModelConfig &config);
  20 +
  21 + template <typename Manager>
  22 + OfflineSourceSeparationUvrModel(
  23 + Manager *mgr, const OfflineSourceSeparationModelConfig &config);
  24 +
  25 + Ort::Value Run(Ort::Value x) const;
  26 +
  27 + const OfflineSourceSeparationUvrModelMetaData &GetMetaData() const;
  28 +
  29 + private:
  30 + class Impl;
  31 + std::unique_ptr<Impl> impl_;
  32 +};
  33 +
  34 +} // namespace sherpa_onnx
  35 +
  36 +#endif // SHERPA_ONNX_CSRC_OFFLINE_SOURCE_SEPARATION_UVR_MODEL_H_
@@ -19,7 +19,8 @@ struct OfflineSourceSeparationConfig { @@ -19,7 +19,8 @@ struct OfflineSourceSeparationConfig {
19 19
20 OfflineSourceSeparationConfig() = default; 20 OfflineSourceSeparationConfig() = default;
21 21
22 - OfflineSourceSeparationConfig(const OfflineSourceSeparationModelConfig &model) 22 + explicit OfflineSourceSeparationConfig(
  23 + const OfflineSourceSeparationModelConfig &model)
23 : model(model) {} 24 : model(model) {}
24 25
25 void Register(ParseOptions *po); 26 void Register(ParseOptions *po);
@@ -54,7 +55,7 @@ class OfflineSourceSeparation { @@ -54,7 +55,7 @@ class OfflineSourceSeparation {
54 public: 55 public:
55 ~OfflineSourceSeparation(); 56 ~OfflineSourceSeparation();
56 57
57 - OfflineSourceSeparation(const OfflineSourceSeparationConfig &config); 58 + explicit OfflineSourceSeparation(const OfflineSourceSeparationConfig &config);
58 59
59 template <typename Manager> 60 template <typename Manager>
60 OfflineSourceSeparation(Manager *mgr, 61 OfflineSourceSeparation(Manager *mgr,
@@ -101,8 +101,8 @@ for a list of pre-trained models to download. @@ -101,8 +101,8 @@ for a list of pre-trained models to download.
101 mic_sample_rate = atof(pSampleRateStr); 101 mic_sample_rate = atof(pSampleRateStr);
102 } 102 }
103 103
104 - if(!mic.OpenDevice(device_index, mic_sample_rate, 1,  
105 - RecordCallback, s.get())) { 104 + if (!mic.OpenDevice(device_index, mic_sample_rate, 1, RecordCallback,
  105 + s.get())) {
106 fprintf(stderr, "portaudio error: %d\n", device_index); 106 fprintf(stderr, "portaudio error: %d\n", device_index);
107 exit(EXIT_FAILURE); 107 exit(EXIT_FAILURE);
108 } 108 }
@@ -142,8 +142,8 @@ for more models. @@ -142,8 +142,8 @@ for more models.
142 mic_sample_rate = atof(pSampleRateStr); 142 mic_sample_rate = atof(pSampleRateStr);
143 } 143 }
144 144
145 - if (!mic.OpenDevice(device_index, mic_sample_rate, 1,  
146 - RecordCallback, nullptr /* user_data */)){ 145 + if (!mic.OpenDevice(device_index, mic_sample_rate, 1, RecordCallback,
  146 + nullptr /* user_data */)) {
147 fprintf(stderr, "portaudio error: %d\n", device_index); 147 fprintf(stderr, "portaudio error: %d\n", device_index);
148 exit(EXIT_FAILURE); 148 exit(EXIT_FAILURE);
149 } 149 }
@@ -244,8 +244,8 @@ Note that `zh` means Chinese, while `en` means English. @@ -244,8 +244,8 @@ Note that `zh` means Chinese, while `en` means English.
244 mic_sample_rate = atof(pSampleRateStr); 244 mic_sample_rate = atof(pSampleRateStr);
245 } 245 }
246 246
247 - if (!mic.OpenDevice(device_index, mic_sample_rate, 1,  
248 - RecordCallback, nullptr /* user_data */)){ 247 + if (!mic.OpenDevice(device_index, mic_sample_rate, 1, RecordCallback,
  248 + nullptr /* user_data */)) {
249 fprintf(stderr, "portaudio error: %d\n", device_index); 249 fprintf(stderr, "portaudio error: %d\n", device_index);
250 exit(EXIT_FAILURE); 250 exit(EXIT_FAILURE);
251 } 251 }
@@ -159,8 +159,8 @@ for a list of pre-trained models to download. @@ -159,8 +159,8 @@ for a list of pre-trained models to download.
159 mic_sample_rate = atof(pSampleRateStr); 159 mic_sample_rate = atof(pSampleRateStr);
160 } 160 }
161 161
162 - if (!mic.OpenDevice(device_index, mic_sample_rate, 1,  
163 - RecordCallback, nullptr /* user_data */)){ 162 + if (!mic.OpenDevice(device_index, mic_sample_rate, 1, RecordCallback,
  163 + nullptr /* user_data */)) {
164 fprintf(stderr, "portaudio error: %d\n", device_index); 164 fprintf(stderr, "portaudio error: %d\n", device_index);
165 exit(EXIT_FAILURE); 165 exit(EXIT_FAILURE);
166 } 166 }
@@ -129,8 +129,8 @@ for a list of pre-trained models to download. @@ -129,8 +129,8 @@ for a list of pre-trained models to download.
129 mic_sample_rate = atof(pSampleRateStr); 129 mic_sample_rate = atof(pSampleRateStr);
130 } 130 }
131 131
132 - if (!mic.OpenDevice(device_index, mic_sample_rate, 1,  
133 - RecordCallback, nullptr /* user_data */)){ 132 + if (!mic.OpenDevice(device_index, mic_sample_rate, 1, RecordCallback,
  133 + nullptr /* user_data */)) {
134 fprintf(stderr, "portaudio error: %d\n", device_index); 134 fprintf(stderr, "portaudio error: %d\n", device_index);
135 exit(EXIT_FAILURE); 135 exit(EXIT_FAILURE);
136 } 136 }
@@ -33,6 +33,17 @@ wget https://github.com/k2-fsa/sherpa-onnx/releases/download/source-separation-m @@ -33,6 +33,17 @@ wget https://github.com/k2-fsa/sherpa-onnx/releases/download/source-separation-m
33 --input-wav=audio_example.wav \ 33 --input-wav=audio_example.wav \
34 --output-vocals-wav=output_vocals.wav \ 34 --output-vocals-wav=output_vocals.wav \
35 --output-accompaniment-wav=output_accompaniment.wav 35 --output-accompaniment-wav=output_accompaniment.wav
  36 +
  37 +(2) Use UVR models
  38 +
  39 +wget https://github.com/k2-fsa/sherpa-onnx/releases/download/source-separation-models/UVR_MDXNET_1_9703.onnx
  40 +wget https://github.com/k2-fsa/sherpa-onnx/releases/download/source-separation-models/audio_example.wav
  41 +
  42 +./bin/sherpa-onnx-offline-source-separation \
  43 + --uvr-model=./UVR_MDXNET_1_9703.onnx \
  44 + --input-wav=audio_example.wav \
  45 + --output-vocals-wav=output_vocals.wav \
  46 + --output-accompaniment-wav=output_accompaniment.wav
36 )usage"; 47 )usage";
37 48
38 sherpa_onnx::ParseOptions po(kUsageMessage); 49 sherpa_onnx::ParseOptions po(kUsageMessage);
@@ -136,7 +136,8 @@ to download models for offline ASR. @@ -136,7 +136,8 @@ to download models for offline ASR.
136 mic_sample_rate = atof(pSampleRateStr); 136 mic_sample_rate = atof(pSampleRateStr);
137 } 137 }
138 138
139 - if (!mic.OpenDevice(device_index, mic_sample_rate, 1, RecordCallback, nullptr)) { 139 + if (!mic.OpenDevice(device_index, mic_sample_rate, 1, RecordCallback,
  140 + nullptr)) {
140 fprintf(stderr, "Failed to open device %d\n", device_index); 141 fprintf(stderr, "Failed to open device %d\n", device_index);
141 exit(EXIT_FAILURE); 142 exit(EXIT_FAILURE);
142 } 143 }
@@ -74,7 +74,6 @@ wget https://github.com/snakers4/silero-vad/raw/master/src/silero_vad/data/siler @@ -74,7 +74,6 @@ wget https://github.com/snakers4/silero-vad/raw/master/src/silero_vad/data/siler
74 74
75 sherpa_onnx::Microphone mic; 75 sherpa_onnx::Microphone mic;
76 76
77 -  
78 int32_t device_index = Pa_GetDefaultInputDevice(); 77 int32_t device_index = Pa_GetDefaultInputDevice();
79 if (device_index == paNoDevice) { 78 if (device_index == paNoDevice) {
80 fprintf(stderr, "No default input device found\n"); 79 fprintf(stderr, "No default input device found\n");
@@ -96,7 +95,8 @@ wget https://github.com/snakers4/silero-vad/raw/master/src/silero_vad/data/siler @@ -96,7 +95,8 @@ wget https://github.com/snakers4/silero-vad/raw/master/src/silero_vad/data/siler
96 fprintf(stderr, "Use sample rate %f for mic\n", mic_sample_rate); 95 fprintf(stderr, "Use sample rate %f for mic\n", mic_sample_rate);
97 mic_sample_rate = atof(pSampleRateStr); 96 mic_sample_rate = atof(pSampleRateStr);
98 } 97 }
99 - if (!mic.OpenDevice(device_index, mic_sample_rate, 1, RecordCallback, nullptr)) { 98 + if (!mic.OpenDevice(device_index, mic_sample_rate, 1, RecordCallback,
  99 + nullptr)) {
100 fprintf(stderr, "Failed to open microphone device %d\n", device_index); 100 fprintf(stderr, "Failed to open microphone device %d\n", device_index);
101 exit(EXIT_FAILURE); 101 exit(EXIT_FAILURE);
102 } 102 }
@@ -5,6 +5,7 @@ @@ -5,6 +5,7 @@
5 #include "sherpa-onnx/csrc/offline-tts.h" 5 #include "sherpa-onnx/csrc/offline-tts.h"
6 6
7 #include "sherpa-onnx/csrc/macros.h" 7 #include "sherpa-onnx/csrc/macros.h"
  8 +#include "sherpa-onnx/csrc/text-utils.h"
8 #include "sherpa-onnx/csrc/wave-writer.h" 9 #include "sherpa-onnx/csrc/wave-writer.h"
9 #include "sherpa-onnx/jni/common.h" 10 #include "sherpa-onnx/jni/common.h"
10 11
@@ -207,7 +208,10 @@ JNIEXPORT jlong JNICALL Java_com_k2fsa_sherpa_onnx_OfflineTts_newFromAsset( @@ -207,7 +208,10 @@ JNIEXPORT jlong JNICALL Java_com_k2fsa_sherpa_onnx_OfflineTts_newFromAsset(
207 } 208 }
208 #endif 209 #endif
209 auto config = sherpa_onnx::GetOfflineTtsConfig(env, _config); 210 auto config = sherpa_onnx::GetOfflineTtsConfig(env, _config);
210 - SHERPA_ONNX_LOGE("config:\n%s", config.ToString().c_str()); 211 + auto str_vec = sherpa_onnx::SplitString(config.ToString(), 128);
  212 + for (const auto &s : str_vec) {
  213 + SHERPA_ONNX_LOGE("%s", s.c_str());
  214 + }
211 215
212 auto tts = new sherpa_onnx::OfflineTts( 216 auto tts = new sherpa_onnx::OfflineTts(
213 #if __ANDROID_API__ >= 9 217 #if __ANDROID_API__ >= 9