Add Python API for source separation (#2283)

Fangjun Kuang · GitHub
Commit d57e4f84de78568936494c324c78a9d090ac68a2 d57e4f84 1 parent 6f0fac20
.github/scripts/test-python.sh
.github/workflows/run-python-test-macos.yaml
.github/workflows/run-python-test.yaml
python-api-examples/offline-source-separation-spleeter.py
python-api-examples/offline-source-separation-uvr.py
sherpa-onnx/python/csrc/CMakeLists.txt
sherpa-onnx/python/csrc/fast-clustering.cc
sherpa-onnx/python/csrc/offline-recognizer.cc
sherpa-onnx/python/csrc/offline-source-separation-model-config.cc
sherpa-onnx/python/csrc/offline-source-separation-model-config.h
sherpa-onnx/python/csrc/offline-source-separation-spleeter-model-config.cc
sherpa-onnx/python/csrc/offline-source-separation-spleeter-model-config.h
sherpa-onnx/python/csrc/offline-source-separation-uvr-model-config.cc
sherpa-onnx/python/csrc/offline-source-separation-uvr-model-config.h
sherpa-onnx/python/csrc/offline-source-separation.cc
sherpa-onnx/python/csrc/offline-source-separation.h
sherpa-onnx/python/csrc/offline-speech-denoiser.cc
sherpa-onnx/python/csrc/online-recognizer.cc
sherpa-onnx/python/csrc/sherpa-onnx.cc
sherpa-onnx/python/sherpa_onnx/__init__.py
--- a/.github/scripts/test-python.sh
查看文件 @d57e4f8
+++ b/.github/scripts/test-python.sh
查看文件 @d57e4f8
@@ -8,6 +8,32 @@ log() {
   echo -e "$(date '+%Y-%m-%d %H:%M:%S') (${fname}:${BASH_LINENO[0]}:${FUNCNAME[1]}) $*"
 }
 
+ log "test spleeter"
+ 
+ curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/source-separation-models/sherpa-onnx-spleeter-2stems-fp16.tar.bz2
+ tar xvf sherpa-onnx-spleeter-2stems-fp16.tar.bz2
+ rm sherpa-onnx-spleeter-2stems-fp16.tar.bz2
+ curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/source-separation-models/qi-feng-le-zh.wav
+ ./python-api-examples/offline-source-separation-spleeter.py
+ rm -rf sherpa-onnx-spleeter-2stems-fp16
+ rm qi-feng-le-zh.wav
+ 
+ log "test UVR"
+ 
+ curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/source-separation-models/UVR_MDXNET_9482.onnx
+ curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/source-separation-models/qi-feng-le-zh.wav
+ ./python-api-examples/offline-source-separation-uvr.py
+ rm UVR_MDXNET_9482.onnx
+ rm qi-feng-le-zh.wav
+ 
+ mkdir source-separation
+ 
+ mv spleeter-*.wav source-separation
+ mv uvr-*.wav source-separation
+ 
+ ls -lh source-separation
+ 
+ 
 log "test offline dolphin ctc"
 curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02.tar.bz2
 tar xvf sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02.tar.bz2
--- a/.github/workflows/run-python-test-macos.yaml
查看文件 @d57e4f8
+++ b/.github/workflows/run-python-test-macos.yaml
查看文件 @d57e4f8
@@ -99,5 +99,10 @@ jobs:
 
       - uses: actions/upload-artifact@v4
         with:
+           name: source-separation-${{ matrix.os }}-${{ matrix.python-version }}
+           path: ./source-separation
+ 
+       - uses: actions/upload-artifact@v4
+         with:
           name: tts-generated-test-files-${{ matrix.os }}-${{ matrix.python-version }}
           path: tts
--- a/.github/workflows/run-python-test.yaml
查看文件 @d57e4f8
+++ b/.github/workflows/run-python-test.yaml
查看文件 @d57e4f8
@@ -36,22 +36,18 @@ jobs:
       fail-fast: false
       matrix:
         include:
-           # it fails to install ffmpeg on ubuntu 20.04
-           #
-           # - os: ubuntu-20.04
-           #   python-version: "3.7"
-           # - os: ubuntu-20.04
-           #   python-version: "3.8"
-           # - os: ubuntu-20.04
-           #   python-version: "3.9"
- 
-           - os: ubuntu-22.04
+           - os: ubuntu-24.04
+             python-version: "3.8"
+           - os: ubuntu-24.04
+             python-version: "3.9"
+ 
+           - os: ubuntu-24.04
             python-version: "3.10"
-           - os: ubuntu-22.04
+           - os: ubuntu-24.04
             python-version: "3.11"
-           - os: ubuntu-22.04
+           - os: ubuntu-24.04
             python-version: "3.12"
-           - os: ubuntu-22.04
+           - os: ubuntu-24.04
             python-version: "3.13"
 
     steps:
@@ -81,10 +77,12 @@ jobs:
           python3 -m pip install --upgrade pip numpy pypinyin sentencepiece>=0.1.96 soundfile
           python3 -m pip install wheel twine setuptools
 
-       - name: Install ffmpeg
-         shell: bash
-         run: |
-           sudo apt-get install ffmpeg
+       - uses: afoley587/setup-ffmpeg@main
+         id: setup-ffmpeg
+         with:
+           ffmpeg-version: release
+           architecture: ''
+           github-token: ${{ github.server_url == 'https://github.com' && github.token || '' }}
 
       - name: Install ninja
         shell: bash
@@ -191,5 +189,10 @@ jobs:
 
       - uses: actions/upload-artifact@v4
         with:
+           name: source-separation-${{ matrix.os }}-${{ matrix.python-version }}-whl
+           path: ./source-separation
+ 
+       - uses: actions/upload-artifact@v4
+         with:
           name: tts-generated-test-files-${{ matrix.os }}-${{ matrix.python-version }}
           path: tts
--- a/python-api-examples/offline-source-separation-spleeter.py 0 → 100755
查看文件 @d57e4f8
+++ b/python-api-examples/offline-source-separation-spleeter.py 0 → 100755
查看文件 @d57e4f8
+ #!/usr/bin/env python3
+ # Copyright (c)  2025  Xiaomi Corporation
+ 
+ """
+ This file shows how to use spleeter for source separation.
+ 
+ Please first download a spleeter model from
+ 
+ https://github.com/k2-fsa/sherpa-onnx/releases/tag/source-separation-models
+ 
+ The following is an example:
+ 
+     wget https://github.com/k2-fsa/sherpa-onnx/releases/download/source-separation-models/sherpa-onnx-spleeter-2stems-fp16.tar.bz2
+ 
+ Please also download a test file
+ 
+     wget https://github.com/k2-fsa/sherpa-onnx/releases/download/source-separation-models/qi-feng-le-zh.wav
+ 
+ The test wav file is 16-bit encoded with 2 channels. If you have other
+ formats, e.g., .mp4 or .mp3, please first use ffmpeg to convert it.
+ For instance
+ 
+     ffmpeg -i your.mp4 -vn -acodec pcm_s16le -ar 44100 -ac 2 out.wav
+ 
+ Then you can use out.wav as input for this example.
+ """
+ 
+ import time
+ from pathlib import Path
+ 
+ import numpy as np
+ import sherpa_onnx
+ import soundfile as sf
+ 
+ 
+ def create_offline_source_separation():
+     # Please read the help message at the beginning of this file
+     # to download model files
+     vocals = "./sherpa-onnx-spleeter-2stems-fp16/vocals.fp16.onnx"
+     accompaniment = "./sherpa-onnx-spleeter-2stems-fp16/accompaniment.fp16.onnx"
+ 
+     if not Path(vocals).is_file():
+         raise ValueError(f"{vocals} does not exist.")
+ 
+     if not Path(accompaniment).is_file():
+         raise ValueError(f"{accompaniment} does not exist.")
+ 
+     config = sherpa_onnx.OfflineSourceSeparationConfig(
+         model=sherpa_onnx.OfflineSourceSeparationModelConfig(
+             spleeter=sherpa_onnx.OfflineSourceSeparationSpleeterModelConfig(
+                 vocals=vocals,
+                 accompaniment=accompaniment,
+             ),
+             num_threads=1,
+             debug=False,
+             provider="cpu",
+         )
+     )
+     if not config.validate():
+         raise ValueError("Please check your config.")
+ 
+     return sherpa_onnx.OfflineSourceSeparation(config)
+ 
+ 
+ def load_audio():
+     # Please read the help message at the beginning of this file to download
+     # the following wav_file
+     wav_file = "./qi-feng-le-zh.wav"
+     if not Path(wav_file).is_file():
+         raise ValueError(f"{wav_file} does not exist")
+ 
+     samples, sample_rate = sf.read(wav_file, dtype="float32", always_2d=True)
+     samples = np.transpose(samples)
+     # now samples is of shape (num_channels, num_samples)
+     assert (
+         samples.shape[1] > samples.shape[0]
+     ), f"You should use (num_channels, num_samples). {samples.shape}"
+ 
+     assert (
+         samples.dtype == np.float32
+     ), f"Expect np.float32 as dtype. Given: {samples.dtype}"
+ 
+     return samples, sample_rate
+ 
+ 
+ def main():
+     sp = create_offline_source_separation()
+     samples, sample_rate = load_audio()
+     samples = np.ascontiguousarray(samples)
+ 
+     start = time.time()
+     output = sp.process(sample_rate=sample_rate, samples=samples)
+     end = time.time()
+ 
+     print("output.sample_rate", output.sample_rate)
+ 
+     assert len(output.stems) == 2, len(output.stems)
+ 
+     vocals = output.stems[0].data
+     non_vocals = output.stems[1].data
+     # vocals.shape (num_channels, num_samples)
+ 
+     vocals = np.transpose(vocals)
+     non_vocals = np.transpose(non_vocals)
+ 
+     # vocals.shape (num_samples,num_channels)
+ 
+     sf.write("./spleeter-vocals.wav", vocals, samplerate=output.sample_rate)
+     sf.write("./spleeter-non-vocals.wav", non_vocals, samplerate=output.sample_rate)
+ 
+     elapsed_seconds = end - start
+     audio_duration = samples.shape[1] / sample_rate
+     real_time_factor = elapsed_seconds / audio_duration
+ 
+     print("Saved to ./spleeter-vocals.wav and ./spleeter-non-vocals.wav")
+     print(f"Elapsed seconds: {elapsed_seconds:.3f}")
+     print(f"Audio duration in seconds: {audio_duration:.3f}")
+     print(f"RTF: {elapsed_seconds:.3f}/{audio_duration:.3f} = {real_time_factor:.3f}")
+ 
+ 
+ if __name__ == "__main__":
+     main()
--- a/python-api-examples/offline-source-separation-uvr.py 0 → 100755
查看文件 @d57e4f8
+++ b/python-api-examples/offline-source-separation-uvr.py 0 → 100755
查看文件 @d57e4f8
+ #!/usr/bin/env python3
+ # Copyright (c)  2025  Xiaomi Corporation
+ 
+ """
+ This file shows how to use UVR for source separation.
+ 
+ Please first download a UVR model from
+ 
+ https://github.com/k2-fsa/sherpa-onnx/releases/tag/source-separation-models
+ 
+ The following is an example:
+ 
+     wget https://github.com/k2-fsa/sherpa-onnx/releases/download/source-separation-models/UVR_MDXNET_9482.onnx
+ 
+ Please also download a test file
+ 
+     wget https://github.com/k2-fsa/sherpa-onnx/releases/download/source-separation-models/qi-feng-le-zh.wav
+ 
+ The test wav file is 16-bit encoded with 2 channels. If you have other
+ formats, e.g., .mp4 or .mp3, please first use ffmpeg to convert it.
+ For instance
+ 
+     ffmpeg -i your.mp4 -vn -acodec pcm_s16le -ar 44100 -ac 2 out.wav
+ 
+ Then you can use out.wav as input for this example.
+ """
+ 
+ import time
+ from pathlib import Path
+ 
+ import numpy as np
+ import sherpa_onnx
+ import soundfile as sf
+ 
+ 
+ def create_offline_source_separation():
+     # Please read the help message at the beginning of this file
+     # to download model files
+     model = "./UVR_MDXNET_9482.onnx"
+ 
+     if not Path(model).is_file():
+         raise ValueError(f"{model} does not exist.")
+ 
+     config = sherpa_onnx.OfflineSourceSeparationConfig(
+         model=sherpa_onnx.OfflineSourceSeparationModelConfig(
+             uvr=sherpa_onnx.OfflineSourceSeparationUvrModelConfig(
+                 model=model,
+             ),
+             num_threads=1,
+             debug=False,
+             provider="cpu",
+         )
+     )
+     if not config.validate():
+         raise ValueError("Please check your config.")
+ 
+     return sherpa_onnx.OfflineSourceSeparation(config)
+ 
+ 
+ def load_audio():
+     # Please read the help message at the beginning of this file to download
+     # the following wav_file
+     wav_file = "./qi-feng-le-zh.wav"
+     if not Path(wav_file).is_file():
+         raise ValueError(f"{wav_file} does not exist")
+ 
+     samples, sample_rate = sf.read(wav_file, dtype="float32", always_2d=True)
+     samples = np.transpose(samples)
+     # now samples is of shape (num_channels, num_samples)
+     assert (
+         samples.shape[1] > samples.shape[0]
+     ), f"You should use (num_channels, num_samples). {samples.shape}"
+ 
+     assert (
+         samples.dtype == np.float32
+     ), f"Expect np.float32 as dtype. Given: {samples.dtype}"
+ 
+     return samples, sample_rate
+ 
+ 
+ def main():
+     sp = create_offline_source_separation()
+     samples, sample_rate = load_audio()
+     samples = np.ascontiguousarray(samples)
+ 
+     print("Started. Please wait")
+     start = time.time()
+     output = sp.process(sample_rate=sample_rate, samples=samples)
+     end = time.time()
+ 
+     print("output.sample_rate", output.sample_rate)
+ 
+     assert len(output.stems) == 2, len(output.stems)
+ 
+     vocals = output.stems[0].data
+     non_vocals = output.stems[1].data
+     # vocals.shape (num_channels, num_samples)
+ 
+     vocals = np.transpose(vocals)
+     non_vocals = np.transpose(non_vocals)
+ 
+     # vocals.shape (num_samples,num_channels)
+ 
+     sf.write("./uvr-vocals.wav", vocals, samplerate=output.sample_rate)
+     sf.write("./uvr-non-vocals.wav", non_vocals, samplerate=output.sample_rate)
+ 
+     elapsed_seconds = end - start
+     audio_duration = samples.shape[1] / sample_rate
+     real_time_factor = elapsed_seconds / audio_duration
+ 
+     print("Saved to ./uvr-vocals.wav and ./uvr-non-vocals.wav")
+     print(f"Elapsed seconds: {elapsed_seconds:.3f}")
+     print(f"Audio duration in seconds: {audio_duration:.3f}")
+     print(f"RTF: {elapsed_seconds:.3f}/{audio_duration:.3f} = {real_time_factor:.3f}")
+ 
+ 
+ if __name__ == "__main__":
+     main()
--- a/sherpa-onnx/python/csrc/CMakeLists.txt
查看文件 @d57e4f8
+++ b/sherpa-onnx/python/csrc/CMakeLists.txt
查看文件 @d57e4f8
@@ -20,6 +20,10 @@ set(srcs
   offline-punctuation.cc
   offline-recognizer.cc
   offline-sense-voice-model-config.cc
+   offline-source-separation-model-config.cc
+   offline-source-separation-spleeter-model-config.cc
+   offline-source-separation-uvr-model-config.cc
+   offline-source-separation.cc
   offline-speech-denoiser-gtcrn-model-config.cc
   offline-speech-denoiser-model-config.cc
   offline-speech-denoiser.cc
--- a/sherpa-onnx/python/csrc/fast-clustering.cc
查看文件 @d57e4f8
+++ b/sherpa-onnx/python/csrc/fast-clustering.cc
查看文件 @d57e4f8
@@ -9,6 +9,8 @@
 
 #include "sherpa-onnx/csrc/fast-clustering.h"
 
+ #define C_CONTIGUOUS py::detail::npy_api::constants::NPY_ARRAY_C_CONTIGUOUS_
+ 
 namespace sherpa_onnx {
 
 static void PybindFastClusteringConfig(py::module *m) {
@@ -32,6 +34,12 @@ void PybindFastClustering(py::module *m) {
           "__call__",
           [](const PyClass &self,
              py::array_t<float> features) -> std::vector<int32_t> {
+             if (!(C_CONTIGUOUS == (features.flags() & C_CONTIGUOUS))) {
+               throw py::value_error(
+                   "input features should be contiguous. Please use "
+                   "np.ascontiguousarray(features)");
+             }
+ 
             int num_dim = features.ndim();
             if (num_dim != 2) {
               std::ostringstream os;
--- a/sherpa-onnx/python/csrc/offline-recognizer.cc
查看文件 @d57e4f8
+++ b/sherpa-onnx/python/csrc/offline-recognizer.cc
查看文件 @d57e4f8
@@ -59,14 +59,14 @@ void PybindOfflineRecognizer(py::module *m) {
             return self.CreateStream(hotwords);
           },
           py::arg("hotwords"), py::call_guard<py::gil_scoped_release>())
-       .def("decode_stream", &PyClass::DecodeStream,
+       .def("decode_stream", &PyClass::DecodeStream, py::arg("s"),
            py::call_guard<py::gil_scoped_release>())
       .def(
           "decode_streams",
           [](const PyClass &self, std::vector<OfflineStream *> ss) {
             self.DecodeStreams(ss.data(), ss.size());
           },
-           py::call_guard<py::gil_scoped_release>());
+           py::arg("ss"), py::call_guard<py::gil_scoped_release>());
 }
 
 }  // namespace sherpa_onnx
--- a/sherpa-onnx/python/csrc/offline-source-separation-model-config.cc 0 → 100644
查看文件 @d57e4f8
+++ b/sherpa-onnx/python/csrc/offline-source-separation-model-config.cc 0 → 100644
查看文件 @d57e4f8
+ // sherpa-onnx/python/csrc/offline-source-separation-model-config.cc
+ //
+ // Copyright (c)  2025  Xiaomi Corporation
+ 
+ #include "sherpa-onnx/python/csrc/offline-source-separation-model-config.h"
+ 
+ #include <string>
+ 
+ #include "sherpa-onnx/csrc/offline-source-separation-model-config.h"
+ #include "sherpa-onnx/python/csrc/offline-source-separation-spleeter-model-config.h"
+ #include "sherpa-onnx/python/csrc/offline-source-separation-uvr-model-config.h"
+ 
+ namespace sherpa_onnx {
+ 
+ void PybindOfflineSourceSeparationModelConfig(py::module *m) {
+   PybindOfflineSourceSeparationSpleeterModelConfig(m);
+   PybindOfflineSourceSeparationUvrModelConfig(m);
+ 
+   using PyClass = OfflineSourceSeparationModelConfig;
+   py::class_<PyClass>(*m, "OfflineSourceSeparationModelConfig")
+       .def(py::init<const OfflineSourceSeparationSpleeterModelConfig &,
+                     const OfflineSourceSeparationUvrModelConfig &, int32_t,
+                     bool, const std::string &>(),
+            py::arg("spleeter") = OfflineSourceSeparationSpleeterModelConfig{},
+            py::arg("uvr") = OfflineSourceSeparationUvrModelConfig{},
+            py::arg("num_threads") = 1, py::arg("debug") = false,
+            py::arg("provider") = "cpu")
+       .def_readwrite("spleeter", &PyClass::spleeter)
+       .def_readwrite("uvr", &PyClass::uvr)
+       .def_readwrite("num_threads", &PyClass::num_threads)
+       .def_readwrite("debug", &PyClass::debug)
+       .def_readwrite("provider", &PyClass::provider)
+       .def("validate", &PyClass::Validate)
+       .def("__str__", &PyClass::ToString);
+ }
+ 
+ }  // namespace sherpa_onnx
--- a/sherpa-onnx/python/csrc/offline-source-separation-model-config.h 0 → 100644
查看文件 @d57e4f8
+++ b/sherpa-onnx/python/csrc/offline-source-separation-model-config.h 0 → 100644
查看文件 @d57e4f8
+ // sherpa-onnx/python/csrc/offline-source-separation-model-config.h
+ //
+ // Copyright (c)  2025  Xiaomi Corporation
+ 
+ #ifndef SHERPA_ONNX_PYTHON_CSRC_OFFLINE_SOURCE_SEPARATION_MODEL_CONFIG_H_
+ #define SHERPA_ONNX_PYTHON_CSRC_OFFLINE_SOURCE_SEPARATION_MODEL_CONFIG_H_
+ 
+ #include "sherpa-onnx/python/csrc/sherpa-onnx.h"
+ 
+ namespace sherpa_onnx {
+ 
+ void PybindOfflineSourceSeparationModelConfig(py::module *m);
+ 
+ }
+ 
+ #endif  // SHERPA_ONNX_PYTHON_CSRC_OFFLINE_SOURCE_SEPARATION_MODEL_CONFIG_H_
--- a/sherpa-onnx/python/csrc/offline-source-separation-spleeter-model-config.cc 0 → 100644
查看文件 @d57e4f8
+++ b/sherpa-onnx/python/csrc/offline-source-separation-spleeter-model-config.cc 0 → 100644
查看文件 @d57e4f8
+ // sherpa-onnx/python/csrc/offline-source-separation-spleeter-model-config.cc
+ //
+ // Copyright (c)  2025  Xiaomi Corporation
+ 
+ #include "sherpa-onnx/python/csrc/offline-source-separation-spleeter-model-config.h"
+ 
+ #include <string>
+ 
+ #include "sherpa-onnx/csrc/offline-source-separation-spleeter-model-config.h"
+ 
+ namespace sherpa_onnx {
+ 
+ void PybindOfflineSourceSeparationSpleeterModelConfig(py::module *m) {
+   using PyClass = OfflineSourceSeparationSpleeterModelConfig;
+   py::class_<PyClass>(*m, "OfflineSourceSeparationSpleeterModelConfig")
+       .def(py::init<const std::string &, const std::string &>(),
+            py::arg("vocals") = "", py::arg("accompaniment") = "")
+       .def_readwrite("vocals", &PyClass::vocals)
+       .def_readwrite("accompaniment", &PyClass::accompaniment)
+       .def("validate", &PyClass::Validate)
+       .def("__str__", &PyClass::ToString);
+ }
+ 
+ }  // namespace sherpa_onnx
--- a/sherpa-onnx/python/csrc/offline-source-separation-spleeter-model-config.h 0 → 100644
查看文件 @d57e4f8
+++ b/sherpa-onnx/python/csrc/offline-source-separation-spleeter-model-config.h 0 → 100644
查看文件 @d57e4f8
+ // sherpa-onnx/python/csrc/offline-source-separation-spleeter-model-config.h
+ //
+ // Copyright (c)  2025  Xiaomi Corporation
+ 
+ #ifndef SHERPA_ONNX_PYTHON_CSRC_OFFLINE_SOURCE_SEPARATION_SPLEETER_MODEL_CONFIG_H_
+ #define SHERPA_ONNX_PYTHON_CSRC_OFFLINE_SOURCE_SEPARATION_SPLEETER_MODEL_CONFIG_H_
+ 
+ #include "sherpa-onnx/python/csrc/sherpa-onnx.h"
+ 
+ namespace sherpa_onnx {
+ 
+ void PybindOfflineSourceSeparationSpleeterModelConfig(py::module *m);
+ 
+ }
+ 
+ #endif  // SHERPA_ONNX_PYTHON_CSRC_OFFLINE_SOURCE_SEPARATION_SPLEETER_MODEL_CONFIG_H_
--- a/sherpa-onnx/python/csrc/offline-source-separation-uvr-model-config.cc 0 → 100644
查看文件 @d57e4f8
+++ b/sherpa-onnx/python/csrc/offline-source-separation-uvr-model-config.cc 0 → 100644
查看文件 @d57e4f8
+ // sherpa-onnx/python/csrc/offline-source-separation-uvr-model-config.cc
+ //
+ // Copyright (c)  2025  Xiaomi Corporation
+ 
+ #include "sherpa-onnx/python/csrc/offline-source-separation-uvr-model-config.h"
+ 
+ #include <string>
+ 
+ #include "sherpa-onnx/csrc/offline-source-separation-uvr-model-config.h"
+ 
+ namespace sherpa_onnx {
+ 
+ void PybindOfflineSourceSeparationUvrModelConfig(py::module *m) {
+   using PyClass = OfflineSourceSeparationUvrModelConfig;
+   py::class_<PyClass>(*m, "OfflineSourceSeparationUvrModelConfig")
+       .def(py::init<const std::string &>(), py::arg("model") = "")
+       .def_readwrite("model", &PyClass::model)
+       .def("validate", &PyClass::Validate)
+       .def("__str__", &PyClass::ToString);
+ }
+ 
+ }  // namespace sherpa_onnx
--- a/sherpa-onnx/python/csrc/offline-source-separation-uvr-model-config.h 0 → 100644
查看文件 @d57e4f8
+++ b/sherpa-onnx/python/csrc/offline-source-separation-uvr-model-config.h 0 → 100644
查看文件 @d57e4f8
+ // sherpa-onnx/python/csrc/offline-source-separation-uvr-model-config.h
+ //
+ // Copyright (c)  2025  Xiaomi Corporation
+ 
+ #ifndef SHERPA_ONNX_PYTHON_CSRC_OFFLINE_SOURCE_SEPARATION_UVR_MODEL_CONFIG_H_
+ #define SHERPA_ONNX_PYTHON_CSRC_OFFLINE_SOURCE_SEPARATION_UVR_MODEL_CONFIG_H_
+ 
+ #include "sherpa-onnx/python/csrc/sherpa-onnx.h"
+ 
+ namespace sherpa_onnx {
+ 
+ void PybindOfflineSourceSeparationUvrModelConfig(py::module *m);
+ 
+ }
+ 
+ #endif  // SHERPA_ONNX_PYTHON_CSRC_OFFLINE_SOURCE_SEPARATION_UVR_MODEL_CONFIG_H_
--- a/sherpa-onnx/python/csrc/offline-source-separation.cc 0 → 100644
查看文件 @d57e4f8
+++ b/sherpa-onnx/python/csrc/offline-source-separation.cc 0 → 100644
查看文件 @d57e4f8
+ // sherpa-onnx/python/csrc/offline-source-separation-config.cc
+ //
+ // Copyright (c)  2025  Xiaomi Corporation
+ 
+ #include "sherpa-onnx/csrc/offline-source-separation.h"
+ 
+ #include <string>
+ 
+ #include "sherpa-onnx/python/csrc/offline-source-separation-model-config.h"
+ #include "sherpa-onnx/python/csrc/offline-source-separation.h"
+ 
+ #define C_CONTIGUOUS py::detail::npy_api::constants::NPY_ARRAY_C_CONTIGUOUS_
+ 
+ namespace sherpa_onnx {
+ 
+ static void PybindOfflineSourceSeparationConfig(py::module *m) {
+   PybindOfflineSourceSeparationModelConfig(m);
+ 
+   using PyClass = OfflineSourceSeparationConfig;
+   py::class_<PyClass>(*m, "OfflineSourceSeparationConfig")
+       .def(py::init<const OfflineSourceSeparationModelConfig &>(),
+            py::arg("model") = OfflineSourceSeparationModelConfig{})
+       .def_readwrite("model", &PyClass::model)
+       .def("validate", &PyClass::Validate)
+       .def("__str__", &PyClass::ToString);
+ }
+ 
+ static void PybindMultiChannelSamples(py::module *m) {
+   using PyClass = MultiChannelSamples;
+ 
+   py::class_<PyClass>(*m, "MultiChannelSamples")
+       .def_property_readonly("data", [](PyClass &self) -> py::object {
+         // if data is not empty, return a float array of
+         // shape (num_channels, num_samples)
+         int32_t num_channels = self.data.size();
+         if (num_channels == 0) {
+           return py::none();
+         }
+ 
+         int32_t num_samples = self.data[0].size();
+         if (num_samples == 0) {
+           return py::none();
+         }
+ 
+         py::array_t<float> ans({num_channels, num_samples});
+ 
+         py::buffer_info buf = ans.request();
+         auto p = static_cast<float *>(buf.ptr);
+ 
+         for (int32_t i = 0; i != num_channels; ++i) {
+           std::copy(self.data[i].begin(), self.data[i].end(),
+                     p + i * num_samples);
+         }
+ 
+         return ans;
+       });
+ }
+ 
+ static void PybindOfflineSourceSeparationOutput(py::module *m) {
+   using PyClass = OfflineSourceSeparationOutput;
+   py::class_<PyClass>(*m, "OfflineSourceSeparationOutput")
+       .def_property_readonly(
+           "sample_rate", [](const PyClass &self) { return self.sample_rate; })
+       .def_property_readonly("stems",
+                              [](const PyClass &self) { return self.stems; });
+ }
+ 
+ void PybindOfflineSourceSeparation(py::module *m) {
+   PybindOfflineSourceSeparationConfig(m);
+   PybindOfflineSourceSeparationOutput(m);
+ 
+   PybindMultiChannelSamples(m);
+ 
+   using PyClass = OfflineSourceSeparation;
+   py::class_<PyClass>(*m, "OfflineSourceSeparation")
+       .def(py::init<const OfflineSourceSeparationConfig &>(),
+            py::arg("config") = OfflineSourceSeparationConfig{})
+       .def(
+           "process",
+           [](const PyClass &self, int32_t sample_rate,
+              const py::array_t<float> &samples) {
+             if (!(C_CONTIGUOUS == (samples.flags() & C_CONTIGUOUS))) {
+               throw py::value_error(
+                   "input samples should be contiguous. Please use "
+                   "np.ascontiguousarray(samples)");
+             }
+ 
+             int num_dim = samples.ndim();
+             if (samples.ndim() != 2) {
+               std::ostringstream os;
+               os << "Expect an array of 2 dimensions [num_channels x "
+                     "num_samples]. "
+                     "Given dim: "
+                  << num_dim << "\n";
+               throw py::value_error(os.str());
+             }
+ 
+             // if num_samples is less than 10, it is very likely the user
+             // has swapped num_channels and num_samples.
+             if (samples.shape(1) < 10) {
+               std::ostringstream os;
+               os << "Expect an array of 2 dimensions [num_channels x "
+                     "num_samples]. "
+                     "Given ["
+                  << samples.shape(0) << " x " << samples.shape(1) << "]"
+                  << "\n";
+               throw py::value_error(os.str());
+             }
+ 
+             int32_t num_channels = samples.shape(0);
+             int32_t num_samples = samples.shape(1);
+             const float *p = samples.data();
+ 
+             OfflineSourceSeparationInput input;
+ 
+             input.samples.data.resize(num_channels);
+             input.sample_rate = sample_rate;
+ 
+             for (int32_t i = 0; i != num_channels; ++i) {
+               input.samples.data[i] = {p + i * num_samples,
+                                        p + (i + 1) * num_samples};
+             }
+ 
+             pybind11::gil_scoped_release release;
+ 
+             return self.Process(input);
+           },
+           py::arg("sample_rate"), py::arg("samples"),
+           "samples is of shape (num_channels, num-samples) with dtype "
+           "np.float32");
+ }
+ 
+ }  // namespace sherpa_onnx
--- a/sherpa-onnx/python/csrc/offline-source-separation.h 0 → 100644
查看文件 @d57e4f8
+++ b/sherpa-onnx/python/csrc/offline-source-separation.h 0 → 100644
查看文件 @d57e4f8
+ // sherpa-onnx/python/csrc/offline-source-separation-config.h
+ //
+ // Copyright (c)  2025  Xiaomi Corporation
+ 
+ #ifndef SHERPA_ONNX_PYTHON_CSRC_OFFLINE_SOURCE_SEPARATION_CONFIG_H_
+ #define SHERPA_ONNX_PYTHON_CSRC_OFFLINE_SOURCE_SEPARATION_CONFIG_H_
+ 
+ #include "sherpa-onnx/python/csrc/sherpa-onnx.h"
+ 
+ namespace sherpa_onnx {
+ 
+ void PybindOfflineSourceSeparation(py::module *m);
+ 
+ }
+ 
+ #endif  // SHERPA_ONNX_PYTHON_CSRC_OFFLINE_SOURCE_SEPARATION_CONFIG_H_
--- a/sherpa-onnx/python/csrc/offline-speech-denoiser.cc
查看文件 @d57e4f8
+++ b/sherpa-onnx/python/csrc/offline-speech-denoiser.cc
查看文件 @d57e4f8
@@ -47,6 +47,7 @@ void PybindOfflineSpeechDenoiser(py::module *m) {
              int32_t sample_rate) {
             return self.Run(samples.data(), samples.size(), sample_rate);
           },
+           py::arg("samples"), py::arg("sample_rate"),
           py::call_guard<py::gil_scoped_release>())
       .def(
           "run",
@@ -54,6 +55,7 @@ void PybindOfflineSpeechDenoiser(py::module *m) {
              int32_t sample_rate) {
             return self.Run(samples.data(), samples.size(), sample_rate);
           },
+           py::arg("samples"), py::arg("sample_rate"),
           py::call_guard<py::gil_scoped_release>())
       .def_property_readonly("sample_rate", &PyClass::GetSampleRate);
 }
--- a/sherpa-onnx/python/csrc/online-recognizer.cc
查看文件 @d57e4f8
+++ b/sherpa-onnx/python/csrc/online-recognizer.cc
查看文件 @d57e4f8
@@ -109,19 +109,20 @@ void PybindOnlineRecognizer(py::module *m) {
           py::arg("hotwords"), py::call_guard<py::gil_scoped_release>())
       .def("is_ready", &PyClass::IsReady,
            py::call_guard<py::gil_scoped_release>())
-       .def("decode_stream", &PyClass::DecodeStream,
+       .def("decode_stream", &PyClass::DecodeStream, py::arg("s"),
            py::call_guard<py::gil_scoped_release>())
       .def(
           "decode_streams",
           [](PyClass &self, std::vector<OnlineStream *> ss) {
             self.DecodeStreams(ss.data(), ss.size());
           },
-           py::call_guard<py::gil_scoped_release>())
-       .def("get_result", &PyClass::GetResult,
+           py::arg("ss"), py::call_guard<py::gil_scoped_release>())
+       .def("get_result", &PyClass::GetResult, py::arg("s"),
            py::call_guard<py::gil_scoped_release>())
-       .def("is_endpoint", &PyClass::IsEndpoint,
+       .def("is_endpoint", &PyClass::IsEndpoint, py::arg("s"),
            py::call_guard<py::gil_scoped_release>())
-       .def("reset", &PyClass::Reset, py::call_guard<py::gil_scoped_release>());
+       .def("reset", &PyClass::Reset, py::arg("s"),
+            py::call_guard<py::gil_scoped_release>());
 }
 
 }  // namespace sherpa_onnx
--- a/sherpa-onnx/python/csrc/sherpa-onnx.cc
查看文件 @d57e4f8
+++ b/sherpa-onnx/python/csrc/sherpa-onnx.cc
查看文件 @d57e4f8
@@ -17,6 +17,7 @@
 #include "sherpa-onnx/python/csrc/offline-model-config.h"
 #include "sherpa-onnx/python/csrc/offline-punctuation.h"
 #include "sherpa-onnx/python/csrc/offline-recognizer.h"
+ #include "sherpa-onnx/python/csrc/offline-source-separation.h"
 #include "sherpa-onnx/python/csrc/offline-speech-denoiser.h"
 #include "sherpa-onnx/python/csrc/offline-stream.h"
 #include "sherpa-onnx/python/csrc/online-ctc-fst-decoder-config.h"
@@ -110,6 +111,7 @@ PYBIND11_MODULE(_sherpa_onnx, m) {
 
   PybindAlsa(&m);
   PybindOfflineSpeechDenoiser(&m);
+   PybindOfflineSourceSeparation(&m);
 }
 
 }  // namespace sherpa_onnx
--- a/sherpa-onnx/python/sherpa_onnx/__init__.py
查看文件 @d57e4f8
+++ b/sherpa-onnx/python/sherpa_onnx/__init__.py
查看文件 @d57e4f8
@@ -11,6 +11,11 @@ from _sherpa_onnx import (
     OfflinePunctuation,
     OfflinePunctuationConfig,
     OfflinePunctuationModelConfig,
+     OfflineSourceSeparation,
+     OfflineSourceSeparationConfig,
+     OfflineSourceSeparationModelConfig,
+     OfflineSourceSeparationSpleeterModelConfig,
+     OfflineSourceSeparationUvrModelConfig,
     OfflineSpeakerDiarization,
     OfflineSpeakerDiarizationConfig,
     OfflineSpeakerDiarizationResult,