offline-tts.cc
5.0 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
// sherpa-onnx/python/csrc/offline-tts.cc
//
// Copyright (c) 2023 Xiaomi Corporation
#include "sherpa-onnx/python/csrc/offline-tts.h"
#include <algorithm>
#include <string>
#include "sherpa-onnx/csrc/offline-tts.h"
#include "sherpa-onnx/python/csrc/offline-tts-model-config.h"
namespace sherpa_onnx {
static void PybindGeneratedAudio(py::module *m) {
using PyClass = GeneratedAudio;
py::class_<PyClass>(*m, "GeneratedAudio")
.def(py::init<>())
.def_readwrite("samples", &PyClass::samples)
.def_readwrite("sample_rate", &PyClass::sample_rate)
.def("__str__", [](PyClass &self) {
std::ostringstream os;
os << "GeneratedAudio(sample_rate=" << self.sample_rate << ", ";
os << "num_samples=" << self.samples.size() << ")";
return os.str();
});
}
static void PybindOfflineTtsConfig(py::module *m) {
PybindOfflineTtsModelConfig(m);
using PyClass = OfflineTtsConfig;
py::class_<PyClass>(*m, "OfflineTtsConfig")
.def(py::init<>())
.def(py::init<const OfflineTtsModelConfig &, const std::string &,
const std::string &, int32_t, float>(),
py::arg("model"), py::arg("rule_fsts") = "",
py::arg("rule_fars") = "", py::arg("max_num_sentences") = 1,
py::arg("silence_scale") = 0.2)
.def_readwrite("model", &PyClass::model)
.def_readwrite("rule_fsts", &PyClass::rule_fsts)
.def_readwrite("rule_fars", &PyClass::rule_fars)
.def_readwrite("max_num_sentences", &PyClass::max_num_sentences)
.def_readwrite("silence_scale", &PyClass::silence_scale)
.def("validate", &PyClass::Validate)
.def("__str__", &PyClass::ToString);
}
void PybindOfflineTts(py::module *m) {
PybindOfflineTtsConfig(m);
PybindGeneratedAudio(m);
using PyClass = OfflineTts;
py::class_<PyClass>(*m, "OfflineTts")
.def(py::init<const OfflineTtsConfig &>(), py::arg("config"),
py::call_guard<py::gil_scoped_release>())
.def_property_readonly("sample_rate", &PyClass::SampleRate)
.def_property_readonly("num_speakers", &PyClass::NumSpeakers)
.def(
"generate",
[](const PyClass &self, const std::string &text, int64_t sid,
float speed,
std::function<int32_t(py::array_t<float>, float)> callback)
-> GeneratedAudio {
if (!callback) {
return self.Generate(text, sid, speed);
}
std::function<int32_t(const float *, int32_t, float)>
callback_wrapper = [callback](const float *samples, int32_t n,
float progress) {
// CAUTION(fangjun): we have to copy samples since it is
// freed once the call back returns.
pybind11::gil_scoped_acquire acquire;
pybind11::array_t<float> array(n);
py::buffer_info buf = array.request();
auto p = static_cast<float *>(buf.ptr);
std::copy(samples, samples + n, p);
return callback(array, progress);
};
return self.Generate(text, sid, speed, callback_wrapper);
},
py::arg("text"), py::arg("sid") = 0, py::arg("speed") = 1.0,
py::arg("callback") = py::none(),
py::call_guard<py::gil_scoped_release>())
.def(
"generate",
[](const PyClass &self, const std::string &text,
const std::string &prompt_text,
const std::vector<float> &prompt_samples, int32_t sample_rate,
float speed, int32_t num_steps,
std::function<int32_t(py::array_t<float>, float)> callback)
-> GeneratedAudio {
if (!callback) {
return self.Generate(text, prompt_text, prompt_samples,
sample_rate, speed, num_steps);
}
std::function<int32_t(const float *, int32_t, float)>
callback_wrapper = [callback](const float *samples, int32_t n,
float progress) {
// CAUTION(fangjun): we have to copy samples since it is
// freed once the call back returns.
pybind11::gil_scoped_acquire acquire;
pybind11::array_t<float> array(n);
py::buffer_info buf = array.request();
auto p = static_cast<float *>(buf.ptr);
std::copy(samples, samples + n, p);
return callback(array, progress);
};
return self.Generate(text, prompt_text, prompt_samples, sample_rate,
speed, num_steps, callback_wrapper);
},
py::arg("text"), py::arg("prompt_text"), py::arg("prompt_samples"),
py::arg("sample_rate"), py::arg("speed") = 1.0,
py::arg("num_steps") = 4, py::arg("callback") = py::none(),
py::call_guard<py::gil_scoped_release>());
}
} // namespace sherpa_onnx