offline-speaker-diarization.cc
3.8 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
// sherpa-onnx/python/csrc/offline-speaker-diarization.cc
//
// Copyright (c) 2024 Xiaomi Corporation
#include "sherpa-onnx/python/csrc/offline-speaker-diarization.h"
#include <string>
#include <vector>
#include "sherpa-onnx/csrc/offline-speaker-diarization.h"
#include "sherpa-onnx/csrc/offline-speaker-segmentation-model-config.h"
#include "sherpa-onnx/csrc/offline-speaker-segmentation-pyannote-model-config.h"
namespace sherpa_onnx {
static void PybindOfflineSpeakerSegmentationPyannoteModelConfig(py::module *m) {
using PyClass = OfflineSpeakerSegmentationPyannoteModelConfig;
py::class_<PyClass>(*m, "OfflineSpeakerSegmentationPyannoteModelConfig")
.def(py::init<>())
.def(py::init<const std::string &>(), py::arg("model"))
.def_readwrite("model", &PyClass::model)
.def("__str__", &PyClass::ToString)
.def("validate", &PyClass::Validate);
}
static void PybindOfflineSpeakerSegmentationModelConfig(py::module *m) {
PybindOfflineSpeakerSegmentationPyannoteModelConfig(m);
using PyClass = OfflineSpeakerSegmentationModelConfig;
py::class_<PyClass>(*m, "OfflineSpeakerSegmentationModelConfig")
.def(py::init<>())
.def(py::init<const OfflineSpeakerSegmentationPyannoteModelConfig &,
int32_t, bool, const std::string &>(),
py::arg("pyannote"), py::arg("num_threads") = 1,
py::arg("debug") = false, py::arg("provider") = "cpu")
.def_readwrite("pyannote", &PyClass::pyannote)
.def_readwrite("num_threads", &PyClass::num_threads)
.def_readwrite("debug", &PyClass::debug)
.def_readwrite("provider", &PyClass::provider)
.def("__str__", &PyClass::ToString)
.def("validate", &PyClass::Validate);
}
static void PybindOfflineSpeakerDiarizationConfig(py::module *m) {
PybindOfflineSpeakerSegmentationModelConfig(m);
using PyClass = OfflineSpeakerDiarizationConfig;
py::class_<PyClass>(*m, "OfflineSpeakerDiarizationConfig")
.def(py::init<const OfflineSpeakerSegmentationModelConfig &,
const SpeakerEmbeddingExtractorConfig &,
const FastClusteringConfig &, float, float>(),
py::arg("segmentation"), py::arg("embedding"), py::arg("clustering"),
py::arg("min_duration_on") = 0.3, py::arg("min_duration_off") = 0.5)
.def_readwrite("segmentation", &PyClass::segmentation)
.def_readwrite("embedding", &PyClass::embedding)
.def_readwrite("clustering", &PyClass::clustering)
.def_readwrite("min_duration_on", &PyClass::min_duration_on)
.def_readwrite("min_duration_off", &PyClass::min_duration_off)
.def("__str__", &PyClass::ToString)
.def("validate", &PyClass::Validate);
}
void PybindOfflineSpeakerDiarization(py::module *m) {
PybindOfflineSpeakerDiarizationConfig(m);
using PyClass = OfflineSpeakerDiarization;
py::class_<PyClass>(*m, "OfflineSpeakerDiarization")
.def(py::init<const OfflineSpeakerDiarizationConfig &>(),
py::arg("config"))
.def_property_readonly("sample_rate", &PyClass::SampleRate)
.def("set_config", &PyClass::SetConfig, py::arg("config"))
.def(
"process",
[](const PyClass &self, const std::vector<float> samples,
std::function<int32_t(int32_t, int32_t)> callback) {
if (!callback) {
return self.Process(samples.data(), samples.size());
}
std::function<int32_t(int32_t, int32_t, void *)> callback_wrapper =
[callback](int32_t processed_chunks, int32_t num_chunks,
void *) -> int32_t {
callback(processed_chunks, num_chunks);
return 0;
};
return self.Process(samples.data(), samples.size(),
callback_wrapper);
},
py::arg("samples"), py::arg("callback") = py::none());
}
} // namespace sherpa_onnx