Fangjun Kuang
Committed by GitHub

Add C++ support for UVR models (#2269)

正在显示 35 个修改的文件 包含 967 行增加101 行删除
#!/usr/bin/env bash
set -ex
log() {
# This function is from espnet
local fname=${BASH_SOURCE[1]##*/}
echo -e "$(date '+%Y-%m-%d %H:%M:%S') (${fname}:${BASH_LINENO[0]}:${FUNCNAME[1]}) $*"
}
if [ -z $EXE ]; then
EXE=./build/bin/sherpa-onnx-offline-source-separation
fi
echo "EXE is $EXE"
echo "PATH: $PATH"
which $EXE
log "------------------------------------------------------------"
log "Run spleeter"
log "------------------------------------------------------------"
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/source-separation-models/sherpa-onnx-spleeter-2stems-fp16.tar.bz2
tar xvf sherpa-onnx-spleeter-2stems-fp16.tar.bz2
rm sherpa-onnx-spleeter-2stems-fp16.tar.bz2
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/source-separation-models/qi-feng-le-zh.wav
$EXE \
--spleeter-vocals=sherpa-onnx-spleeter-2stems-fp16/vocals.fp16.onnx \
--spleeter-accompaniment=sherpa-onnx-spleeter-2stems-fp16/accompaniment.fp16.onnx \
--num-threads=2 \
--debug=1 \
--input-wav=./qi-feng-le-zh.wav \
--output-vocals-wav=spleeter_output_vocals.wav \
--output-accompaniment-wav=spleeter_output_accompaniment.wav
rm -rf sherpa-onnx-spleeter-2stems-fp16
log "------------------------------------------------------------"
log "Run UVR"
log "------------------------------------------------------------"
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/source-separation-models/UVR-MDX-NET-Voc_FT.onnx
$EXE \
--debug=1 \
--num-threads=2 \
--uvr-model=./UVR-MDX-NET-Voc_FT.onnx \
--input-wav=./qi-feng-le-zh.wav \
--output-vocals-wav=uvr_output_vocals.wav \
--output-accompaniment-wav=uvr_output_non_vocals.wav
rm ./UVR-MDX-NET-Voc_FT.onnx \
mkdir source-separation-wavs
mv qi-feng-le-zh.wav source-separation-wavs
mv spleeter_*.wav ./source-separation-wavs
mv uvr_*.wav ./source-separation-wavs
... ...
... ... @@ -11,6 +11,7 @@ on:
- '.github/scripts/test-kws.sh'
- '.github/scripts/test-online-transducer.sh'
- '.github/scripts/test-offline-speech-denoiser.sh'
- '.github/scripts/test-offline-source-separation.sh'
- '.github/scripts/test-online-paraformer.sh'
- '.github/scripts/test-offline-transducer.sh'
- '.github/scripts/test-offline-ctc.sh'
... ... @@ -33,6 +34,7 @@ on:
- '.github/workflows/linux.yaml'
- '.github/scripts/test-kws.sh'
- '.github/scripts/test-offline-speech-denoiser.sh'
- '.github/scripts/test-offline-source-separation.sh'
- '.github/scripts/test-online-transducer.sh'
- '.github/scripts/test-online-paraformer.sh'
- '.github/scripts/test-offline-transducer.sh'
... ... @@ -205,6 +207,20 @@ jobs:
overwrite: true
file: sherpa-onnx-*.tar.bz2
- name: Test offline source separation
shell: bash
run: |
du -h -d1 .
export PATH=$PWD/build/bin:$PATH
export EXE=sherpa-onnx-offline-source-separation
.github/scripts/test-offline-source-separation.sh
- uses: actions/upload-artifact@v4
with:
name: source-separation-${{ matrix.build_type }}-with-shared-lib-${{ matrix.shared_lib }}-with-tts-${{ matrix.with_tts }}
path: ./source-separation-wavs/*.wav
- name: Test offline CTC
shell: bash
run: |
... ...
### Supported functions
|Speech recognition| Speech synthesis |
|------------------|------------------|
| ✔️ | ✔️ |
|Speech recognition| Speech synthesis | Source separation |
|------------------|------------------|-------------------|
| ✔️ | ✔️ | ✔️ |
|Speaker identification| Speaker diarization | Speaker verification |
|----------------------|-------------------- |------------------------|
... ... @@ -16,6 +16,7 @@
|------------------|-----------------|--------------------|
| ✔️ | ✔️ | ✔️ |
### Supported platforms
|Architecture| Android | iOS | Windows | macOS | linux | HarmonyOS |
... ... @@ -56,7 +57,9 @@ This repository supports running the following functions **locally**
- Spoken language identification
- Audio tagging
- VAD (e.g., [silero-vad][silero-vad])
- Speech enhancement (e.g., [gtcrn][gtcrn])
- Keyword spotting
- Source separation (e.g., [spleeter][spleeter], [UVR][UVR])
on the following platforms and operating systems:
... ... @@ -75,6 +78,7 @@ on the following platforms and operating systems:
- [VisionFive 2][VisionFive 2]
- [旭日X3派][旭日X3派]
- [爱芯派][爱芯派]
- [RK3588][RK3588]
- etc
with the following APIs
... ... @@ -200,6 +204,7 @@ We also have spaces built using WebAssembly. They are listed below:
| Punctuation | [Address][punct-models] |
| Speaker segmentation | [Address][speaker-segmentation-models] |
| Speech enhancement | [Address][speech-enhancement-models] |
| Source separation | [Address][source-separation-models] |
</details>
... ... @@ -481,3 +486,8 @@ It uses sherpa-onnx for speech-to-text and text-to-speech.
[NVIDIA Jetson Orin NX]: https://developer.download.nvidia.com/assets/embedded/secure/jetson/orin_nx/docs/Jetson_Orin_NX_DS-10712-001_v0.5.pdf?RCPGu9Q6OVAOv7a7vgtwc9-BLScXRIWq6cSLuditMALECJ_dOj27DgnqAPGVnT2VpiNpQan9SyFy-9zRykR58CokzbXwjSA7Gj819e91AXPrWkGZR3oS1VLxiDEpJa_Y0lr7UT-N4GnXtb8NlUkP4GkCkkF_FQivGPrAucCUywL481GH_WpP_p7ziHU1Wg==&t=eyJscyI6ImdzZW8iLCJsc2QiOiJodHRwczovL3d3dy5nb29nbGUuY29tLmhrLyJ9
[NVIDIA Jetson Nano B01]: https://www.seeedstudio.com/blog/2020/01/16/new-revision-of-jetson-nano-dev-kit-now-supports-new-jetson-nano-module/
[speech-enhancement-models]: https://github.com/k2-fsa/sherpa-onnx/releases/tag/speech-enhancement-models
[source-separation-models]: https://github.com/k2-fsa/sherpa-onnx/releases/tag/source-separation-models
[RK3588]: https://www.rock-chips.com/uploads/pdf/2022.8.26/192/RK3588%20Brief%20Datasheet.pdf
[spleeter]: https://github.com/deezer/spleeter
[UVR]: https://github.com/Anjok07/ultimatevocalremovergui
[gtcrn]: https://github.com/Xiaobin-Rong/gtcrn
... ...
... ... @@ -136,8 +136,8 @@ int32_t main() {
fprintf(stderr, "Use sample rate %f for mic\n", mic_sample_rate);
mic_sample_rate = atof(sample_rate_str);
}
if(!mic.OpenDevice(device_index, mic_sample_rate, 1, RecordCallback,
nullptr) == false) {
if (!mic.OpenDevice(device_index, mic_sample_rate, 1, RecordCallback,
nullptr) == false) {
std::cerr << "Failed to open microphone device\n";
return -1;
}
... ...
... ... @@ -24,7 +24,7 @@
#include <iostream>
#include <mutex> // NOLINT
#include <queue>
#include <thread>
#include <thread> // NOLINT
#include <vector>
#include "portaudio.h" // NOLINT
... ...
... ... @@ -143,7 +143,7 @@ int32_t main() {
lowpass_cutoff, lowpass_filter_width);
}
if (mic.OpenDevice(device_index, mic_sample_rate, 1, RecordCallback,
nullptr) == false) {
nullptr) == false) {
std::cerr << "Failed to open microphone device\n";
return -1;
}
... ...
// cxx-api-examples/sherpa-display.cc
// Copyright (c) 2025 Xiaomi Corporation
#pragma once
#include <stdlib.h>
... ... @@ -6,6 +8,8 @@
#include <iomanip>
#include <sstream>
#include <string>
#include <utility>
#include <vector>
namespace sherpa_onnx::cxx {
... ...
... ... @@ -159,14 +159,15 @@ static sherpa_onnx::OnlineRecognizerConfig GetOnlineRecognizerConfig(
recognizer_config.hr.rule_fsts = SHERPA_ONNX_OR(config->hr.rule_fsts, "");
if (config->model_config.debug) {
#if __OHOS__
auto str_vec = sherpa_onnx::SplitString(recognizer_config.ToString(), 128);
for (const auto &s : str_vec) {
#if __OHOS__
SHERPA_ONNX_LOGE("%{public}s\n", s.c_str());
#else
SHERPA_ONNX_LOGE("%s\n", s.c_str());
#endif
}
#else
SHERPA_ONNX_LOGE("%s", recognizer_config.ToString().c_str());
#endif
}
return recognizer_config;
... ... @@ -507,14 +508,15 @@ static sherpa_onnx::OfflineRecognizerConfig GetOfflineRecognizerConfig(
recognizer_config.hr.rule_fsts = SHERPA_ONNX_OR(config->hr.rule_fsts, "");
if (config->model_config.debug) {
#if __OHOS__
auto str_vec = sherpa_onnx::SplitString(recognizer_config.ToString(), 128);
for (const auto &s : str_vec) {
#if __OHOS__
SHERPA_ONNX_LOGE("%{public}s\n", s.c_str());
#else
SHERPA_ONNX_LOGE("%s\n", s.c_str());
#endif
}
#else
SHERPA_ONNX_LOGE("%s", recognizer_config.ToString().c_str());
#endif
}
return recognizer_config;
... ...
... ... @@ -55,6 +55,8 @@ set(sources
offline-source-separation-model-config.cc
offline-source-separation-spleeter-model-config.cc
offline-source-separation-spleeter-model.cc
offline-source-separation-uvr-model-config.cc
offline-source-separation-uvr-model.cc
offline-source-separation.cc
offline-stream.cc
... ...
... ... @@ -25,9 +25,7 @@ Microphone::~Microphone() {
}
}
int Microphone::GetDeviceCount() const {
return Pa_GetDeviceCount();
}
int Microphone::GetDeviceCount() const { return Pa_GetDeviceCount(); }
int Microphone::GetDefaultInputDevice() const {
return Pa_GetDefaultInputDevice();
... ... @@ -43,7 +41,8 @@ void Microphone::PrintDevices(int device_index) const {
}
}
bool Microphone::OpenDevice(int index, int sample_rate, int channel, PaStreamCallback cb, void* userdata) {
bool Microphone::OpenDevice(int index, int sample_rate, int channel,
PaStreamCallback cb, void *userdata) {
if (index < 0 || index >= Pa_GetDeviceCount()) {
fprintf(stderr, "Invalid device index: %d\n", index);
return false;
... ... @@ -68,7 +67,8 @@ bool Microphone::OpenDevice(int index, int sample_rate, int channel, PaStreamCal
param.suggestedLatency = info->defaultLowInputLatency;
param.hostApiSpecificStreamInfo = nullptr;
PaError err = Pa_OpenStream(&stream, &param, nullptr, /* &outputParameters, */
PaError err =
Pa_OpenStream(&stream, &param, nullptr, /* &outputParameters, */
sample_rate,
0, // frames per buffer
paClipOff, // we won't output out of range samples
... ...
... ... @@ -4,22 +4,27 @@
#ifndef SHERPA_ONNX_CSRC_MICROPHONE_H_
#define SHERPA_ONNX_CSRC_MICROPHONE_H_
#include "portaudio.h" // NOLINT
#include <cstdint>
#include "portaudio.h" // NOLINT
namespace sherpa_onnx {
class Microphone {
PaStream *stream = nullptr;
public:
Microphone();
~Microphone();
int GetDeviceCount() const;
int GetDefaultInputDevice() const;
void PrintDevices(int sel) const;
bool OpenDevice(int index, int sample_rate, int channel, PaStreamCallback cb, void* userdata);
int32_t GetDeviceCount() const;
int32_t GetDefaultInputDevice() const;
void PrintDevices(int32_t sel) const;
bool OpenDevice(int32_t index, int32_t sample_rate, int32_t channel,
PaStreamCallback cb, void *userdata);
void CloseDevice();
private:
PaStream *stream = nullptr;
};
} // namespace sherpa_onnx
... ...
... ... @@ -4,7 +4,9 @@
#include "sherpa-onnx/csrc/offline-source-separation-impl.h"
#include <algorithm>
#include <memory>
#include <utility>
#if __ANDROID_API__ >= 9
#include "android/asset_manager.h"
... ... @@ -16,22 +18,93 @@
#endif
#include "sherpa-onnx/csrc/offline-source-separation-spleeter-impl.h"
#include "sherpa-onnx/csrc/offline-source-separation-uvr-impl.h"
#include "sherpa-onnx/csrc/resample.h"
namespace sherpa_onnx {
std::unique_ptr<OfflineSourceSeparationImpl>
OfflineSourceSeparationImpl::Create(
const OfflineSourceSeparationConfig &config) {
// TODO(fangjun): Support other models
return std::make_unique<OfflineSourceSeparationSpleeterImpl>(config);
if (!config.model.spleeter.vocals.empty()) {
return std::make_unique<OfflineSourceSeparationSpleeterImpl>(config);
}
if (!config.model.uvr.model.empty()) {
return std::make_unique<OfflineSourceSeparationUvrImpl>(config);
}
SHERPA_ONNX_LOGE("Please provide a separation model!");
return nullptr;
}
template <typename Manager>
std::unique_ptr<OfflineSourceSeparationImpl>
OfflineSourceSeparationImpl::Create(
Manager *mgr, const OfflineSourceSeparationConfig &config) {
// TODO(fangjun): Support other models
return std::make_unique<OfflineSourceSeparationSpleeterImpl>(mgr, config);
if (!config.model.spleeter.vocals.empty()) {
return std::make_unique<OfflineSourceSeparationSpleeterImpl>(mgr, config);
}
if (!config.model.uvr.model.empty()) {
return std::make_unique<OfflineSourceSeparationUvrImpl>(mgr, config);
}
SHERPA_ONNX_LOGE("Please provide a separation model!");
return nullptr;
}
OfflineSourceSeparationInput OfflineSourceSeparationImpl::Resample(
const OfflineSourceSeparationInput &input, bool debug /*= false*/) const {
const OfflineSourceSeparationInput *p_input = &input;
OfflineSourceSeparationInput tmp_input;
int32_t output_sample_rate = GetOutputSampleRate();
if (input.sample_rate != output_sample_rate) {
SHERPA_ONNX_LOGE(
"Creating a resampler:\n"
" in_sample_rate: %d\n"
" output_sample_rate: %d\n",
input.sample_rate, output_sample_rate);
float min_freq = std::min<int32_t>(input.sample_rate, output_sample_rate);
float lowpass_cutoff = 0.99 * 0.5 * min_freq;
int32_t lowpass_filter_width = 6;
auto resampler =
std::make_unique<LinearResample>(input.sample_rate, output_sample_rate,
lowpass_cutoff, lowpass_filter_width);
std::vector<float> s;
for (const auto &samples : input.samples.data) {
resampler->Reset();
resampler->Resample(samples.data(), samples.size(), true, &s);
tmp_input.samples.data.push_back(std::move(s));
}
tmp_input.sample_rate = output_sample_rate;
p_input = &tmp_input;
}
if (p_input->samples.data.size() > 1) {
if (debug) {
SHERPA_ONNX_LOGE("input ch1 samples size: %d",
static_cast<int32_t>(p_input->samples.data[1].size()));
}
if (p_input->samples.data[0].size() != p_input->samples.data[1].size()) {
SHERPA_ONNX_LOGE("ch0 samples size %d vs ch1 samples size %d",
static_cast<int32_t>(p_input->samples.data[0].size()),
static_cast<int32_t>(p_input->samples.data[1].size()));
SHERPA_ONNX_EXIT(-1);
}
}
return *p_input;
}
#if __ANDROID_API__ >= 9
... ...
... ... @@ -5,6 +5,7 @@
#ifndef SHERPA_ONNX_CSRC_OFFLINE_SOURCE_SEPARATION_IMPL_H_
#define SHERPA_ONNX_CSRC_OFFLINE_SOURCE_SEPARATION_IMPL_H_
#include <memory>
#include <vector>
#include "sherpa-onnx/csrc/offline-source-separation.h"
... ... @@ -28,6 +29,9 @@ class OfflineSourceSeparationImpl {
virtual int32_t GetOutputSampleRate() const = 0;
virtual int32_t GetNumberOfStems() const = 0;
OfflineSourceSeparationInput Resample(
const OfflineSourceSeparationInput &input, bool debug = false) const;
};
} // namespace sherpa_onnx
... ...
... ... @@ -4,10 +4,13 @@
#include "sherpa-onnx/csrc/offline-source-separation-model-config.h"
#include "sherpa-onnx/csrc/macros.h"
namespace sherpa_onnx {
void OfflineSourceSeparationModelConfig::Register(ParseOptions *po) {
spleeter.Register(po);
uvr.Register(po);
po->Register("num-threads", &num_threads,
"Number of threads to run the neural network");
... ... @@ -20,7 +23,17 @@ void OfflineSourceSeparationModelConfig::Register(ParseOptions *po) {
}
bool OfflineSourceSeparationModelConfig::Validate() const {
return spleeter.Validate();
if (!spleeter.vocals.empty()) {
return spleeter.Validate();
}
if (!uvr.model.empty()) {
return uvr.Validate();
}
SHERPA_ONNX_LOGE("Please specify a source separation model");
return false;
}
std::string OfflineSourceSeparationModelConfig::ToString() const {
... ... @@ -28,6 +41,7 @@ std::string OfflineSourceSeparationModelConfig::ToString() const {
os << "OfflineSourceSeparationModelConfig(";
os << "spleeter=" << spleeter.ToString() << ", ";
os << "uvr=" << uvr.ToString() << ", ";
os << "num_threads=" << num_threads << ", ";
os << "debug=" << (debug ? "True" : "False") << ", ";
os << "provider=\"" << provider << "\")";
... ...
... ... @@ -8,12 +8,14 @@
#include <string>
#include "sherpa-onnx/csrc/offline-source-separation-spleeter-model-config.h"
#include "sherpa-onnx/csrc/offline-source-separation-uvr-model-config.h"
#include "sherpa-onnx/csrc/parse-options.h"
namespace sherpa_onnx {
struct OfflineSourceSeparationModelConfig {
OfflineSourceSeparationSpleeterModelConfig spleeter;
OfflineSourceSeparationUvrModelConfig uvr;
int32_t num_threads = 1;
bool debug = false;
... ... @@ -23,8 +25,10 @@ struct OfflineSourceSeparationModelConfig {
OfflineSourceSeparationModelConfig(
const OfflineSourceSeparationSpleeterModelConfig &spleeter,
int32_t num_threads, bool debug, const std::string &provider)
const OfflineSourceSeparationUvrModelConfig &uvr, int32_t num_threads,
bool debug, const std::string &provider)
: spleeter(spleeter),
uvr(uvr),
num_threads(num_threads),
debug(debug),
provider(provider) {}
... ...
... ... @@ -5,6 +5,10 @@
#ifndef SHERPA_ONNX_CSRC_OFFLINE_SOURCE_SEPARATION_SPLEETER_IMPL_H_
#define SHERPA_ONNX_CSRC_OFFLINE_SOURCE_SEPARATION_SPLEETER_IMPL_H_
#include <algorithm>
#include <utility>
#include <vector>
#include "Eigen/Dense"
#include "kaldi-native-fbank/csrc/istft.h"
#include "kaldi-native-fbank/csrc/stft.h"
... ... @@ -12,13 +16,12 @@
#include "sherpa-onnx/csrc/offline-source-separation-spleeter-model.h"
#include "sherpa-onnx/csrc/offline-source-separation.h"
#include "sherpa-onnx/csrc/onnx-utils.h"
#include "sherpa-onnx/csrc/resample.h"
namespace sherpa_onnx {
class OfflineSourceSeparationSpleeterImpl : public OfflineSourceSeparationImpl {
public:
OfflineSourceSeparationSpleeterImpl(
explicit OfflineSourceSeparationSpleeterImpl(
const OfflineSourceSeparationConfig &config)
: config_(config), model_(config_.model) {}
... ... @@ -28,56 +31,12 @@ class OfflineSourceSeparationSpleeterImpl : public OfflineSourceSeparationImpl {
: config_(config), model_(mgr, config_.model) {}
OfflineSourceSeparationOutput Process(
const OfflineSourceSeparationInput &input) const override {
const OfflineSourceSeparationInput *p_input = &input;
OfflineSourceSeparationInput tmp_input;
int32_t output_sample_rate = GetOutputSampleRate();
if (input.sample_rate != output_sample_rate) {
SHERPA_ONNX_LOGE(
"Creating a resampler:\n"
" in_sample_rate: %d\n"
" output_sample_rate: %d\n",
input.sample_rate, output_sample_rate);
float min_freq = std::min<int32_t>(input.sample_rate, output_sample_rate);
float lowpass_cutoff = 0.99 * 0.5 * min_freq;
int32_t lowpass_filter_width = 6;
auto resampler = std::make_unique<LinearResample>(
input.sample_rate, output_sample_rate, lowpass_cutoff,
lowpass_filter_width);
std::vector<float> s;
for (const auto &samples : input.samples.data) {
resampler->Reset();
resampler->Resample(samples.data(), samples.size(), true, &s);
tmp_input.samples.data.push_back(std::move(s));
}
tmp_input.sample_rate = output_sample_rate;
p_input = &tmp_input;
}
if (p_input->samples.data.size() > 1) {
if (config_.model.debug) {
SHERPA_ONNX_LOGE("input ch1 samples size: %d",
static_cast<int32_t>(p_input->samples.data[1].size()));
}
if (p_input->samples.data[0].size() != p_input->samples.data[1].size()) {
SHERPA_ONNX_LOGE("ch0 samples size %d vs ch1 samples size %d",
static_cast<int32_t>(p_input->samples.data[0].size()),
static_cast<int32_t>(p_input->samples.data[1].size()));
SHERPA_ONNX_EXIT(-1);
}
}
const OfflineSourceSeparationInput &_input) const override {
auto input = Resample(_input, config_.model.debug);
auto stft_ch0 = ComputeStft(*p_input, 0);
auto stft_ch0 = ComputeStft(input, 0);
auto stft_ch1 = ComputeStft(*p_input, 1);
auto stft_ch1 = ComputeStft(input, 1);
knf::StftResult *p_stft_ch1 = stft_ch1.real.empty() ? &stft_ch0 : &stft_ch1;
int32_t num_frames = stft_ch0.num_frames;
... ... @@ -261,7 +220,6 @@ class OfflineSourceSeparationSpleeterImpl : public OfflineSourceSeparationImpl {
stft_config.win_length = meta.window_length;
stft_config.window_type = meta.window_type;
stft_config.center = meta.center;
stft_config.center = false;
return stft_config;
}
... ...
// sherpa-onnx/csrc/offline-source-separation-spleeter_model-config.cc
// sherpa-onnx/csrc/offline-source-separation-spleeter-model-config.cc
//
// Copyright (c) 2025 Xiaomi Corporation
... ...
// sherpa-onnx/csrc/offline-source-separation-spleeter_model-config.h
// sherpa-onnx/csrc/offline-source-separation-spleeter-model-config.h
//
// Copyright (c) 2025 Xiaomi Corporation
... ...
// sherpa-onnx/csrc/offline-source-separation-spleeter-model-meta-data.h
//
// Copyright (c) 2024 Xiaomi Corporation
// Copyright (c) 2025 Xiaomi Corporation
#ifndef SHERPA_ONNX_CSRC_OFFLINE_SOURCE_SEPARATION_SPLEETER_MODEL_META_DATA_H_
#define SHERPA_ONNX_CSRC_OFFLINE_SOURCE_SEPARATION_SPLEETER_MODEL_META_DATA_H_
... ...
// sherpa-onnx/csrc/offline-source-separation-uvr-impl.h
//
// Copyright (c) 2025 Xiaomi Corporation
#ifndef SHERPA_ONNX_CSRC_OFFLINE_SOURCE_SEPARATION_UVR_IMPL_H_
#define SHERPA_ONNX_CSRC_OFFLINE_SOURCE_SEPARATION_UVR_IMPL_H_
#include <algorithm>
#include <utility>
#include <vector>
#include "Eigen/Dense"
#include "kaldi-native-fbank/csrc/istft.h"
#include "kaldi-native-fbank/csrc/stft.h"
#include "sherpa-onnx/csrc/macros.h"
#include "sherpa-onnx/csrc/offline-source-separation-uvr-model.h"
#include "sherpa-onnx/csrc/offline-source-separation.h"
#include "sherpa-onnx/csrc/onnx-utils.h"
#include "sherpa-onnx/csrc/resample.h"
namespace sherpa_onnx {
class OfflineSourceSeparationUvrImpl : public OfflineSourceSeparationImpl {
public:
explicit OfflineSourceSeparationUvrImpl(
const OfflineSourceSeparationConfig &config)
: config_(config), model_(config_.model) {}
template <typename Manager>
OfflineSourceSeparationUvrImpl(Manager *mgr,
const OfflineSourceSeparationConfig &config)
: config_(config), model_(mgr, config_.model) {}
OfflineSourceSeparationOutput Process(
const OfflineSourceSeparationInput &_input) const override {
auto input = Resample(_input, config_.model.debug);
auto chunks_ch0 = SplitIntoChunks(input.samples.data[0]);
std::vector<std::vector<float>> chunks_ch1;
if (input.samples.data.size() > 1) {
chunks_ch1 = SplitIntoChunks(input.samples.data[1]);
}
std::vector<float> samples_ch0;
std::vector<float> samples_ch1;
for (int32_t i = 0; i != static_cast<int32_t>(chunks_ch0.size()); ++i) {
bool is_first_chunk = (i == 0);
bool is_last_chunk = (i == static_cast<int32_t>(chunks_ch0.size()) - 1);
auto s = ProcessChunk(
chunks_ch0[i],
chunks_ch1.empty() ? std::vector<float>{} : chunks_ch1[i],
is_first_chunk, is_last_chunk);
samples_ch0.insert(samples_ch0.end(), s.first.begin(), s.first.end());
samples_ch1.insert(samples_ch1.end(), s.second.begin(), s.second.end());
}
auto &vocals_ch0 = samples_ch0;
auto &vocals_ch1 = samples_ch1;
std::vector<float> non_vocals_ch0(vocals_ch0.size());
std::vector<float> non_vocals_ch1(vocals_ch1.size());
Eigen::Map<Eigen::VectorXf>(non_vocals_ch0.data(), non_vocals_ch0.size()) =
Eigen::Map<Eigen::VectorXf>(input.samples.data[0].data(),
input.samples.data[0].size())
.array() -
Eigen::Map<Eigen::VectorXf>(vocals_ch0.data(), vocals_ch0.size())
.array();
if (input.samples.data.size() > 1) {
Eigen::Map<Eigen::VectorXf>(non_vocals_ch1.data(),
non_vocals_ch1.size()) =
Eigen::Map<Eigen::VectorXf>(input.samples.data[1].data(),
input.samples.data[1].size())
.array() -
Eigen::Map<Eigen::VectorXf>(vocals_ch1.data(), vocals_ch1.size())
.array();
} else {
Eigen::Map<Eigen::VectorXf>(non_vocals_ch1.data(),
non_vocals_ch1.size()) =
Eigen::Map<Eigen::VectorXf>(input.samples.data[0].data(),
input.samples.data[0].size())
.array() -
Eigen::Map<Eigen::VectorXf>(vocals_ch1.data(), vocals_ch1.size())
.array();
}
OfflineSourceSeparationOutput ans;
ans.sample_rate = GetOutputSampleRate();
ans.stems.resize(2);
ans.stems[0].data.reserve(2);
ans.stems[1].data.reserve(2);
ans.stems[0].data.push_back(std::move(vocals_ch0));
ans.stems[0].data.push_back(std::move(vocals_ch1));
ans.stems[1].data.push_back(std::move(non_vocals_ch0));
ans.stems[1].data.push_back(std::move(non_vocals_ch1));
return ans;
}
int32_t GetOutputSampleRate() const override {
return model_.GetMetaData().sample_rate;
}
int32_t GetNumberOfStems() const override {
return model_.GetMetaData().num_stems;
}
private:
std::pair<std::vector<float>, std::vector<float>> ProcessChunk(
const std::vector<float> &chunk_ch0, const std::vector<float> &chunk_ch1,
bool is_first_chunk, bool is_last_chunk) const {
int32_t pad0 = 0;
auto stft_results_ch0 = ComputeStft(chunk_ch0, &pad0);
int32_t pad1 = pad0;
std::vector<knf::StftResult> stft_results_ch1;
if (!chunk_ch1.empty()) {
stft_results_ch1 = ComputeStft(chunk_ch1, &pad1);
} else {
stft_results_ch1 = stft_results_ch0;
}
const auto &meta_ = model_.GetMetaData();
int32_t num_frames = stft_results_ch0[0].num_frames;
int32_t dim_f = meta_.dim_f;
int32_t dim_t = meta_.dim_t;
int32_t n_fft_bin = meta_.n_fft / 2 + 1;
if (num_frames != dim_t) {
SHERPA_ONNX_LOGE("num_frames(%d) != dim_t(%d)", num_frames, dim_t);
SHERPA_ONNX_EXIT(-1);
}
// the first 2: number of channels
// the second 2: real and image
std::vector<float> x(stft_results_ch0.size() * 2 * 2 * dim_f * dim_t);
float *px = x.data();
for (int32_t i = 0; i != static_cast<int32_t>(stft_results_ch0.size());
++i) {
const auto &ch0 = stft_results_ch0[i];
const auto &ch1 = stft_results_ch1[i];
const float *p_real_ch0 = ch0.real.data();
const float *p_imag_ch0 = ch0.imag.data();
const float *p_real_ch1 = ch1.real.data();
const float *p_imag_ch1 = ch1.imag.data();
for (int32_t j = 0; j != dim_f; ++j) {
for (int32_t k = 0; k != num_frames; ++k) {
*px = p_real_ch0[k * n_fft_bin + j];
++px;
}
}
for (int32_t j = 0; j != dim_f; ++j) {
for (int32_t k = 0; k != num_frames; ++k) {
*px = p_imag_ch0[k * n_fft_bin + j];
++px;
}
}
for (int32_t j = 0; j != dim_f; ++j) {
for (int32_t k = 0; k != num_frames; ++k) {
*px = p_real_ch1[k * n_fft_bin + j];
++px;
}
}
for (int32_t j = 0; j != dim_f; ++j) {
for (int32_t k = 0; k != num_frames; ++k) {
*px = p_imag_ch1[k * n_fft_bin + j];
++px;
}
}
} // for (int32_t i = 0; i !=
auto memory_info =
Ort::MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeDefault);
std::array<int64_t, 4> x_shape{
static_cast<int32_t>(stft_results_ch0.size()) * 4 / meta_.dim_c,
meta_.dim_c, dim_f, dim_t};
Ort::Value x_tensor = Ort::Value::CreateTensor(
memory_info, x.data(), x.size(), x_shape.data(), x_shape.size());
Ort::Value spec = model_.Run(std::move(x_tensor));
const float *p_spec = spec.GetTensorData<float>();
for (int32_t i = 0; i != static_cast<int32_t>(stft_results_ch0.size());
++i) {
auto &ch0 = stft_results_ch0[i];
auto &ch1 = stft_results_ch1[i];
float *p_real_ch0 = ch0.real.data();
float *p_imag_ch0 = ch0.imag.data();
float *p_real_ch1 = ch1.real.data();
float *p_imag_ch1 = ch1.imag.data();
for (int32_t j = 0; j != dim_f; ++j) {
for (int32_t k = 0; k != num_frames; ++k) {
p_real_ch0[k * n_fft_bin + j] = *p_spec;
++p_spec;
}
}
for (int32_t j = 0; j != dim_f; ++j) {
for (int32_t k = 0; k != num_frames; ++k) {
p_imag_ch0[k * n_fft_bin + j] = *p_spec;
++p_spec;
}
}
for (int32_t j = 0; j != dim_f; ++j) {
for (int32_t k = 0; k != num_frames; ++k) {
p_real_ch1[k * n_fft_bin + j] = *p_spec;
++p_spec;
}
}
for (int32_t j = 0; j != dim_f; ++j) {
for (int32_t k = 0; k != num_frames; ++k) {
p_imag_ch1[k * n_fft_bin + j] = *p_spec;
++p_spec;
}
}
for (int32_t k = 0; k != num_frames; ++k) {
for (int32_t j = dim_f; j != n_fft_bin; ++j) {
p_real_ch0[k * n_fft_bin + j] = 0;
p_real_ch1[k * n_fft_bin + j] = 0;
p_imag_ch0[k * n_fft_bin + j] = 0;
p_imag_ch1[k * n_fft_bin + j] = 0;
}
}
}
auto samples_ch0 = ComputeInverseStft(stft_results_ch0, pad0,
is_first_chunk, is_last_chunk);
auto samples_ch1 = ComputeInverseStft(stft_results_ch1, pad1,
is_first_chunk, is_last_chunk);
return {std::move(samples_ch0), std::move(samples_ch1)};
}
std::vector<float> ComputeInverseStft(
const std::vector<knf::StftResult> &stft_result, int32_t pad,
bool is_first_chunk, bool is_last_chunk) const {
const auto &meta_ = model_.GetMetaData();
int32_t trim = meta_.n_fft / 2;
int32_t margin = meta_.margin;
int32_t chunk_size = meta_.num_chunks * meta_.sample_rate;
if (margin > chunk_size) {
margin = chunk_size;
}
auto stft_config = GetStftConfig();
knf::IStft istft(stft_config);
std::vector<float> ans;
for (int32_t i = 0; i != static_cast<int32_t>(stft_result.size()); ++i) {
auto samples = istft.Compute(stft_result[i]);
int32_t num_samples = static_cast<int32_t>(samples.size());
ans.insert(ans.end(), samples.begin() + trim,
samples.begin() + (num_samples - trim));
}
int32_t start = is_first_chunk ? 0 : margin;
int32_t end =
is_last_chunk ? (ans.size() - pad) : (ans.size() - pad - margin);
return {ans.begin() + start, ans.begin() + end};
}
std::vector<knf::StftResult> ComputeStft(const std::vector<float> &chunk,
int32_t *pad) const {
const auto &meta_ = model_.GetMetaData();
int32_t num_samples = static_cast<int32_t>(chunk.size());
int32_t trim = meta_.n_fft / 2;
int32_t chunk_size = meta_.hop_length * (meta_.dim_t - 1);
int32_t gen_size = chunk_size - 2 * trim;
*pad = gen_size - num_samples % gen_size;
std::vector<float> samples(trim + chunk.size() + *pad + trim);
std::copy(chunk.begin(), chunk.end(), samples.begin() + trim);
auto stft_config = GetStftConfig();
knf::Stft stft(stft_config);
std::vector<knf::StftResult> stft_results;
// split the chunk into short segments
for (int32_t i = 0; i < num_samples + *pad; i += gen_size) {
auto r = stft.Compute(samples.data() + i, chunk_size);
stft_results.push_back(std::move(r));
}
return stft_results;
}
std::vector<std::vector<float>> SplitIntoChunks(
const std::vector<float> &samples) const {
std::vector<std::vector<float>> ans;
if (samples.empty()) {
return ans;
}
const auto &meta_ = model_.GetMetaData();
int32_t margin = meta_.margin;
int32_t chunk_size = meta_.num_chunks * meta_.sample_rate;
if (static_cast<int32_t>(samples.size()) < chunk_size) {
chunk_size = samples.size();
}
if (margin > chunk_size) {
margin = chunk_size;
}
for (int32_t i = 0; i < static_cast<int32_t>(samples.size());
i += chunk_size) {
int32_t start = std::max<int32_t>(0, i - margin);
int32_t end = std::min<int32_t>(i + chunk_size + margin,
static_cast<int32_t>(samples.size()));
if (start >= end) {
break;
}
ans.emplace_back(samples.begin() + start, samples.begin() + end);
if (end == static_cast<int32_t>(samples.size())) {
break;
}
}
return ans;
}
knf::StftConfig GetStftConfig() const {
const auto &meta = model_.GetMetaData();
knf::StftConfig stft_config;
stft_config.n_fft = meta.n_fft;
stft_config.hop_length = meta.hop_length;
stft_config.win_length = meta.window_length;
stft_config.window_type = meta.window_type;
stft_config.center = meta.center;
return stft_config;
}
private:
OfflineSourceSeparationConfig config_;
OfflineSourceSeparationUvrModel model_;
};
} // namespace sherpa_onnx
#endif // SHERPA_ONNX_CSRC_OFFLINE_SOURCE_SEPARATION_UVR_IMPL_H_
... ...
// sherpa-onnx/csrc/offline-source-separation-uvr-model-config.cc
//
// Copyright (c) 2025 Xiaomi Corporation
#include "sherpa-onnx/csrc/offline-source-separation-uvr-model-config.h"
#include "sherpa-onnx/csrc/file-utils.h"
#include "sherpa-onnx/csrc/macros.h"
namespace sherpa_onnx {
void OfflineSourceSeparationUvrModelConfig::Register(ParseOptions *po) {
po->Register("uvr-model", &model, "Path to the UVR model");
}
bool OfflineSourceSeparationUvrModelConfig::Validate() const {
if (model.empty()) {
SHERPA_ONNX_LOGE("Please provide --uvr-model");
return false;
}
if (!FileExists(model)) {
SHERPA_ONNX_LOGE("UVR model '%s' does not exist. ", model.c_str());
return false;
}
return true;
}
std::string OfflineSourceSeparationUvrModelConfig::ToString() const {
std::ostringstream os;
os << "OfflineSourceSeparationUvrModelConfig(";
os << "model=\"" << model << "\")";
return os.str();
}
} // namespace sherpa_onnx
... ...
// sherpa-onnx/csrc/offline-source-separation-uvr-model-config.h
//
// Copyright (c) 2025 Xiaomi Corporation
#ifndef SHERPA_ONNX_CSRC_OFFLINE_SOURCE_SEPARATION_UVR_MODEL_CONFIG_H_
#define SHERPA_ONNX_CSRC_OFFLINE_SOURCE_SEPARATION_UVR_MODEL_CONFIG_H_
#include <string>
#include "sherpa-onnx/csrc/offline-source-separation-uvr-model-config.h"
#include "sherpa-onnx/csrc/parse-options.h"
namespace sherpa_onnx {
struct OfflineSourceSeparationUvrModelConfig {
std::string model;
OfflineSourceSeparationUvrModelConfig() = default;
explicit OfflineSourceSeparationUvrModelConfig(const std::string &model)
: model(model) {}
void Register(ParseOptions *po);
bool Validate() const;
std::string ToString() const;
};
} // namespace sherpa_onnx
#endif // SHERPA_ONNX_CSRC_OFFLINE_SOURCE_SEPARATION_UVR_MODEL_CONFIG_H_
... ...
// sherpa-onnx/csrc/offline-source-separation-uvr-model-meta-data.h
//
// Copyright (c) 2025 Xiaomi Corporation
#ifndef SHERPA_ONNX_CSRC_OFFLINE_SOURCE_SEPARATION_UVR_MODEL_META_DATA_H_
#define SHERPA_ONNX_CSRC_OFFLINE_SOURCE_SEPARATION_UVR_MODEL_META_DATA_H_
#include <string>
#include <unordered_map>
#include <vector>
namespace sherpa_onnx {
// See also
// https://github.com/k2-fsa/sherpa-onnx/blob/master/scripts/uvr_mdx/test.py
// https://github.com/k2-fsa/sherpa-onnx/blob/master/scripts/uvr_mdx/add_meta_data_and_quantize.py
struct OfflineSourceSeparationUvrModelMetaData {
int32_t sample_rate = 44100;
int32_t num_stems = 2;
int32_t dim_c = -1;
int32_t dim_f = -1;
int32_t dim_t = -1;
int32_t n_fft = -1;
int32_t hop_length = 1024;
int32_t window_length = -1;
int32_t center = 1;
std::string window_type = "hann";
// the following fields are preconfigured. Please see
// https://github.com/k2-fsa/sherpa-onnx/blob/master/scripts/uvr_mdx/test.py
int32_t margin = 0; // changed in ./offline-source-separation-uvr-model.cc
const int32_t num_chunks = 15;
};
} // namespace sherpa_onnx
#endif // SHERPA_ONNX_CSRC_OFFLINE_SOURCE_SEPARATION_UVR_MODEL_META_DATA_H_
... ...
// sherpa-onnx/csrc/offline-source-separation-uvr-model.cc
//
// Copyright (c) 2025 Xiaomi Corporation
#include "sherpa-onnx/csrc/offline-source-separation-uvr-model.h"
#include <memory>
#include <string>
#include <utility>
#include <vector>
#if __ANDROID_API__ >= 9
#include "android/asset_manager.h"
#include "android/asset_manager_jni.h"
#endif
#if __OHOS__
#include "rawfile/raw_file_manager.h"
#endif
#include "sherpa-onnx/csrc/file-utils.h"
#include "sherpa-onnx/csrc/onnx-utils.h"
#include "sherpa-onnx/csrc/session.h"
#include "sherpa-onnx/csrc/text-utils.h"
namespace sherpa_onnx {
class OfflineSourceSeparationUvrModel::Impl {
public:
explicit Impl(const OfflineSourceSeparationModelConfig &config)
: config_(config),
env_(ORT_LOGGING_LEVEL_ERROR),
sess_opts_(GetSessionOptions(config)),
allocator_{} {
auto buf = ReadFile(config.uvr.model);
Init(buf.data(), buf.size());
}
template <typename Manager>
Impl(Manager *mgr, const OfflineSourceSeparationModelConfig &config)
: config_(config),
env_(ORT_LOGGING_LEVEL_ERROR),
sess_opts_(GetSessionOptions(config)),
allocator_{} {
auto buf = ReadFile(mgr, config.uvr.model);
Init(buf.data(), buf.size());
}
const OfflineSourceSeparationUvrModelMetaData &GetMetaData() const {
return meta_;
}
Ort::Value Run(Ort::Value x) const {
auto out = sess_->Run({}, input_names_ptr_.data(), &x, 1,
output_names_ptr_.data(), output_names_ptr_.size());
return std::move(out[0]);
}
private:
void Init(void *model_data, size_t model_data_length) {
sess_ = std::make_unique<Ort::Session>(env_, model_data, model_data_length,
sess_opts_);
GetInputNames(sess_.get(), &input_names_, &input_names_ptr_);
GetOutputNames(sess_.get(), &output_names_, &output_names_ptr_);
Ort::ModelMetadata meta_data = sess_->GetModelMetadata();
if (config_.debug) {
std::ostringstream os;
os << "---UVR model---\n";
PrintModelMetadata(os, meta_data);
os << "----------input names----------\n";
int32_t i = 0;
for (const auto &s : input_names_) {
os << i << " " << s << "\n";
++i;
}
os << "----------output names----------\n";
i = 0;
for (const auto &s : output_names_) {
os << i << " " << s << "\n";
++i;
}
#if __OHOS__
SHERPA_ONNX_LOGE("%{public}s\n", os.str().c_str());
#else
SHERPA_ONNX_LOGE("%s\n", os.str().c_str());
#endif
}
Ort::AllocatorWithDefaultOptions allocator; // used in the macro below
std::string model_type;
SHERPA_ONNX_READ_META_DATA_STR(model_type, "model_type");
if (model_type != "UVR") {
SHERPA_ONNX_LOGE("Expect model type 'UVR'. Given: '%s'",
model_type.c_str());
SHERPA_ONNX_EXIT(-1);
}
SHERPA_ONNX_READ_META_DATA(meta_.num_stems, "stems");
if (meta_.num_stems != 2) {
SHERPA_ONNX_LOGE("Only 2stems is supported. Given %d stems",
meta_.num_stems);
SHERPA_ONNX_EXIT(-1);
}
SHERPA_ONNX_READ_META_DATA(meta_.sample_rate, "sample_rate");
SHERPA_ONNX_READ_META_DATA(meta_.n_fft, "n_fft");
SHERPA_ONNX_READ_META_DATA(meta_.center, "center");
SHERPA_ONNX_READ_META_DATA(meta_.window_length, "win_length");
SHERPA_ONNX_READ_META_DATA(meta_.hop_length, "hop_length");
SHERPA_ONNX_READ_META_DATA(meta_.dim_t, "dim_t");
SHERPA_ONNX_READ_META_DATA(meta_.dim_f, "dim_f");
SHERPA_ONNX_READ_META_DATA(meta_.dim_c, "dim_c");
SHERPA_ONNX_READ_META_DATA_STR(meta_.window_type, "window_type");
meta_.margin = meta_.sample_rate;
}
private:
OfflineSourceSeparationModelConfig config_;
OfflineSourceSeparationUvrModelMetaData meta_;
Ort::Env env_;
Ort::SessionOptions sess_opts_;
Ort::AllocatorWithDefaultOptions allocator_;
std::unique_ptr<Ort::Session> sess_;
std::vector<std::string> input_names_;
std::vector<const char *> input_names_ptr_;
std::vector<std::string> output_names_;
std::vector<const char *> output_names_ptr_;
};
OfflineSourceSeparationUvrModel::~OfflineSourceSeparationUvrModel() = default;
OfflineSourceSeparationUvrModel::OfflineSourceSeparationUvrModel(
const OfflineSourceSeparationModelConfig &config)
: impl_(std::make_unique<Impl>(config)) {}
template <typename Manager>
OfflineSourceSeparationUvrModel::OfflineSourceSeparationUvrModel(
Manager *mgr, const OfflineSourceSeparationModelConfig &config)
: impl_(std::make_unique<Impl>(mgr, config)) {}
Ort::Value OfflineSourceSeparationUvrModel::Run(Ort::Value x) const {
return impl_->Run(std::move(x));
}
const OfflineSourceSeparationUvrModelMetaData &
OfflineSourceSeparationUvrModel::GetMetaData() const {
return impl_->GetMetaData();
}
#if __ANDROID_API__ >= 9
template OfflineSourceSeparationUvrModel::OfflineSourceSeparationUvrModel(
AAssetManager *mgr, const OfflineSourceSeparationModelConfig &config);
#endif
#if __OHOS__
template OfflineSourceSeparationUvrModel::OfflineSourceSeparationUvrModel(
NativeResourceManager *mgr,
const OfflineSourceSeparationModelConfig &config);
#endif
} // namespace sherpa_onnx
... ...
// sherpa-onnx/csrc/offline-source-separation-uvr-model.h
//
// Copyright (c) 2025 Xiaomi Corporation
#ifndef SHERPA_ONNX_CSRC_OFFLINE_SOURCE_SEPARATION_UVR_MODEL_H_
#define SHERPA_ONNX_CSRC_OFFLINE_SOURCE_SEPARATION_UVR_MODEL_H_
#include <memory>
#include "onnxruntime_cxx_api.h" // NOLINT
#include "sherpa-onnx/csrc/offline-source-separation-model-config.h"
#include "sherpa-onnx/csrc/offline-source-separation-uvr-model-meta-data.h"
namespace sherpa_onnx {
class OfflineSourceSeparationUvrModel {
public:
~OfflineSourceSeparationUvrModel();
explicit OfflineSourceSeparationUvrModel(
const OfflineSourceSeparationModelConfig &config);
template <typename Manager>
OfflineSourceSeparationUvrModel(
Manager *mgr, const OfflineSourceSeparationModelConfig &config);
Ort::Value Run(Ort::Value x) const;
const OfflineSourceSeparationUvrModelMetaData &GetMetaData() const;
private:
class Impl;
std::unique_ptr<Impl> impl_;
};
} // namespace sherpa_onnx
#endif // SHERPA_ONNX_CSRC_OFFLINE_SOURCE_SEPARATION_UVR_MODEL_H_
... ...
... ... @@ -19,7 +19,8 @@ struct OfflineSourceSeparationConfig {
OfflineSourceSeparationConfig() = default;
OfflineSourceSeparationConfig(const OfflineSourceSeparationModelConfig &model)
explicit OfflineSourceSeparationConfig(
const OfflineSourceSeparationModelConfig &model)
: model(model) {}
void Register(ParseOptions *po);
... ... @@ -54,7 +55,7 @@ class OfflineSourceSeparation {
public:
~OfflineSourceSeparation();
OfflineSourceSeparation(const OfflineSourceSeparationConfig &config);
explicit OfflineSourceSeparation(const OfflineSourceSeparationConfig &config);
template <typename Manager>
OfflineSourceSeparation(Manager *mgr,
... ...
... ... @@ -101,8 +101,8 @@ for a list of pre-trained models to download.
mic_sample_rate = atof(pSampleRateStr);
}
if(!mic.OpenDevice(device_index, mic_sample_rate, 1,
RecordCallback, s.get())) {
if (!mic.OpenDevice(device_index, mic_sample_rate, 1, RecordCallback,
s.get())) {
fprintf(stderr, "portaudio error: %d\n", device_index);
exit(EXIT_FAILURE);
}
... ...
... ... @@ -142,8 +142,8 @@ for more models.
mic_sample_rate = atof(pSampleRateStr);
}
if (!mic.OpenDevice(device_index, mic_sample_rate, 1,
RecordCallback, nullptr /* user_data */)){
if (!mic.OpenDevice(device_index, mic_sample_rate, 1, RecordCallback,
nullptr /* user_data */)) {
fprintf(stderr, "portaudio error: %d\n", device_index);
exit(EXIT_FAILURE);
}
... ...
... ... @@ -244,8 +244,8 @@ Note that `zh` means Chinese, while `en` means English.
mic_sample_rate = atof(pSampleRateStr);
}
if (!mic.OpenDevice(device_index, mic_sample_rate, 1,
RecordCallback, nullptr /* user_data */)){
if (!mic.OpenDevice(device_index, mic_sample_rate, 1, RecordCallback,
nullptr /* user_data */)) {
fprintf(stderr, "portaudio error: %d\n", device_index);
exit(EXIT_FAILURE);
}
... ...
... ... @@ -159,8 +159,8 @@ for a list of pre-trained models to download.
mic_sample_rate = atof(pSampleRateStr);
}
if (!mic.OpenDevice(device_index, mic_sample_rate, 1,
RecordCallback, nullptr /* user_data */)){
if (!mic.OpenDevice(device_index, mic_sample_rate, 1, RecordCallback,
nullptr /* user_data */)) {
fprintf(stderr, "portaudio error: %d\n", device_index);
exit(EXIT_FAILURE);
}
... ...
... ... @@ -129,8 +129,8 @@ for a list of pre-trained models to download.
mic_sample_rate = atof(pSampleRateStr);
}
if (!mic.OpenDevice(device_index, mic_sample_rate, 1,
RecordCallback, nullptr /* user_data */)){
if (!mic.OpenDevice(device_index, mic_sample_rate, 1, RecordCallback,
nullptr /* user_data */)) {
fprintf(stderr, "portaudio error: %d\n", device_index);
exit(EXIT_FAILURE);
}
... ...
... ... @@ -33,6 +33,17 @@ wget https://github.com/k2-fsa/sherpa-onnx/releases/download/source-separation-m
--input-wav=audio_example.wav \
--output-vocals-wav=output_vocals.wav \
--output-accompaniment-wav=output_accompaniment.wav
(2) Use UVR models
wget https://github.com/k2-fsa/sherpa-onnx/releases/download/source-separation-models/UVR_MDXNET_1_9703.onnx
wget https://github.com/k2-fsa/sherpa-onnx/releases/download/source-separation-models/audio_example.wav
./bin/sherpa-onnx-offline-source-separation \
--uvr-model=./UVR_MDXNET_1_9703.onnx \
--input-wav=audio_example.wav \
--output-vocals-wav=output_vocals.wav \
--output-accompaniment-wav=output_accompaniment.wav
)usage";
sherpa_onnx::ParseOptions po(kUsageMessage);
... ...
... ... @@ -136,7 +136,8 @@ to download models for offline ASR.
mic_sample_rate = atof(pSampleRateStr);
}
if (!mic.OpenDevice(device_index, mic_sample_rate, 1, RecordCallback, nullptr)) {
if (!mic.OpenDevice(device_index, mic_sample_rate, 1, RecordCallback,
nullptr)) {
fprintf(stderr, "Failed to open device %d\n", device_index);
exit(EXIT_FAILURE);
}
... ...
... ... @@ -74,7 +74,6 @@ wget https://github.com/snakers4/silero-vad/raw/master/src/silero_vad/data/siler
sherpa_onnx::Microphone mic;
int32_t device_index = Pa_GetDefaultInputDevice();
if (device_index == paNoDevice) {
fprintf(stderr, "No default input device found\n");
... ... @@ -96,7 +95,8 @@ wget https://github.com/snakers4/silero-vad/raw/master/src/silero_vad/data/siler
fprintf(stderr, "Use sample rate %f for mic\n", mic_sample_rate);
mic_sample_rate = atof(pSampleRateStr);
}
if (!mic.OpenDevice(device_index, mic_sample_rate, 1, RecordCallback, nullptr)) {
if (!mic.OpenDevice(device_index, mic_sample_rate, 1, RecordCallback,
nullptr)) {
fprintf(stderr, "Failed to open microphone device %d\n", device_index);
exit(EXIT_FAILURE);
}
... ...
... ... @@ -5,6 +5,7 @@
#include "sherpa-onnx/csrc/offline-tts.h"
#include "sherpa-onnx/csrc/macros.h"
#include "sherpa-onnx/csrc/text-utils.h"
#include "sherpa-onnx/csrc/wave-writer.h"
#include "sherpa-onnx/jni/common.h"
... ... @@ -207,7 +208,10 @@ JNIEXPORT jlong JNICALL Java_com_k2fsa_sherpa_onnx_OfflineTts_newFromAsset(
}
#endif
auto config = sherpa_onnx::GetOfflineTtsConfig(env, _config);
SHERPA_ONNX_LOGE("config:\n%s", config.ToString().c_str());
auto str_vec = sherpa_onnx::SplitString(config.ToString(), 128);
for (const auto &s : str_vec) {
SHERPA_ONNX_LOGE("%s", s.c_str());
}
auto tts = new sherpa_onnx::OfflineTts(
#if __ANDROID_API__ >= 9
... ...