offline-tts-zipvoice-model-config.h
1.7 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
// sherpa-onnx/csrc/offline-tts-zipvoice-model-config.h
//
// Copyright (c) 2025 Xiaomi Corporation
#ifndef SHERPA_ONNX_CSRC_OFFLINE_TTS_ZIPVOICE_MODEL_CONFIG_H_
#define SHERPA_ONNX_CSRC_OFFLINE_TTS_ZIPVOICE_MODEL_CONFIG_H_
#include <cstdint>
#include <string>
#include "sherpa-onnx/csrc/parse-options.h"
namespace sherpa_onnx {
struct OfflineTtsZipvoiceModelConfig {
std::string tokens;
std::string text_model;
std::string flow_matching_model;
std::string vocoder;
// If data_dir is given, lexicon is ignored
// data_dir is for piper-phonemize, which uses espeak-ng
std::string data_dir;
// Used for converting Chinese characters to pinyin
std::string pinyin_dict;
float feat_scale = 0.1;
float t_shift = 0.5;
float target_rms = 0.1;
float guidance_scale = 1.0;
OfflineTtsZipvoiceModelConfig() = default;
OfflineTtsZipvoiceModelConfig(
const std::string &tokens, const std::string &text_model,
const std::string &flow_matching_model, const std::string &vocoder,
const std::string &data_dir, const std::string &pinyin_dict,
float feat_scale = 0.1, float t_shift = 0.5, float target_rms = 0.1,
float guidance_scale = 1.0)
: tokens(tokens),
text_model(text_model),
flow_matching_model(flow_matching_model),
vocoder(vocoder),
data_dir(data_dir),
pinyin_dict(pinyin_dict),
feat_scale(feat_scale),
t_shift(t_shift),
target_rms(target_rms),
guidance_scale(guidance_scale) {}
void Register(ParseOptions *po);
bool Validate() const;
std::string ToString() const;
};
} // namespace sherpa_onnx
#endif // SHERPA_ONNX_CSRC_OFFLINE_TTS_ZIPVOICE_MODEL_CONFIG_H_