正在显示
2 个修改的文件
包含
35 行增加
和
2 行删除
| @@ -69,7 +69,9 @@ class KokoroMultiLangLexicon::Impl { | @@ -69,7 +69,9 @@ class KokoroMultiLangLexicon::Impl { | ||
| 69 | 69 | ||
| 70 | std::vector<TokenIDs> ConvertTextToTokenIds(const std::string &_text, | 70 | std::vector<TokenIDs> ConvertTextToTokenIds(const std::string &_text, |
| 71 | const std::string &voice) const { | 71 | const std::string &voice) const { |
| 72 | - std::string text = ToLowerCase(_text); | 72 | + // we cannot convert text to lowercase here since it will affect |
| 73 | + // how piper_phonemize handles punctuations inside the text | ||
| 74 | + std::string text = _text; | ||
| 73 | if (debug_) { | 75 | if (debug_) { |
| 74 | SHERPA_ONNX_LOGE("After converting to lowercase:\n%s", text.c_str()); | 76 | SHERPA_ONNX_LOGE("After converting to lowercase:\n%s", text.c_str()); |
| 75 | } | 77 | } |
| @@ -300,7 +302,8 @@ class KokoroMultiLangLexicon::Impl { | @@ -300,7 +302,8 @@ class KokoroMultiLangLexicon::Impl { | ||
| 300 | 302 | ||
| 301 | this_sentence.push_back(0); | 303 | this_sentence.push_back(0); |
| 302 | 304 | ||
| 303 | - for (const auto &word : words) { | 305 | + for (const auto &_word : words) { |
| 306 | + auto word = ToLowerCase(_word); | ||
| 304 | if (IsPunctuation(word)) { | 307 | if (IsPunctuation(word)) { |
| 305 | this_sentence.push_back(token2id_.at(word)); | 308 | this_sentence.push_back(token2id_.at(word)); |
| 306 | 309 |
| @@ -32,6 +32,32 @@ | @@ -32,6 +32,32 @@ | ||
| 32 | 32 | ||
| 33 | namespace sherpa_onnx { | 33 | namespace sherpa_onnx { |
| 34 | 34 | ||
| 35 | +// Encode a single char32_t to UTF-8 string. For debugging only | ||
| 36 | +static std::string ToString(char32_t cp) { | ||
| 37 | + std::string result; | ||
| 38 | + | ||
| 39 | + if (cp <= 0x7F) { | ||
| 40 | + result += static_cast<char>(cp); | ||
| 41 | + } else if (cp <= 0x7FF) { | ||
| 42 | + result += static_cast<char>(0xC0 | ((cp >> 6) & 0x1F)); | ||
| 43 | + result += static_cast<char>(0x80 | (cp & 0x3F)); | ||
| 44 | + } else if (cp <= 0xFFFF) { | ||
| 45 | + result += static_cast<char>(0xE0 | ((cp >> 12) & 0x0F)); | ||
| 46 | + result += static_cast<char>(0x80 | ((cp >> 6) & 0x3F)); | ||
| 47 | + result += static_cast<char>(0x80 | (cp & 0x3F)); | ||
| 48 | + } else if (cp <= 0x10FFFF) { | ||
| 49 | + result += static_cast<char>(0xF0 | ((cp >> 18) & 0x07)); | ||
| 50 | + result += static_cast<char>(0x80 | ((cp >> 12) & 0x3F)); | ||
| 51 | + result += static_cast<char>(0x80 | ((cp >> 6) & 0x3F)); | ||
| 52 | + result += static_cast<char>(0x80 | (cp & 0x3F)); | ||
| 53 | + } else { | ||
| 54 | + SHERPA_ONNX_LOGE("Invalid Unicode code point: %d", | ||
| 55 | + static_cast<int32_t>(cp)); | ||
| 56 | + } | ||
| 57 | + | ||
| 58 | + return result; | ||
| 59 | +} | ||
| 60 | + | ||
| 35 | void CallPhonemizeEspeak(const std::string &text, | 61 | void CallPhonemizeEspeak(const std::string &text, |
| 36 | piper::eSpeakPhonemeConfig &config, // NOLINT | 62 | piper::eSpeakPhonemeConfig &config, // NOLINT |
| 37 | std::vector<std::vector<piper::Phoneme>> *phonemes) { | 63 | std::vector<std::vector<piper::Phoneme>> *phonemes) { |
| @@ -165,6 +191,7 @@ static std::vector<std::vector<int64_t>> PiperPhonemesToIdsKokoro( | @@ -165,6 +191,7 @@ static std::vector<std::vector<int64_t>> PiperPhonemesToIdsKokoro( | ||
| 165 | current.push_back(0); | 191 | current.push_back(0); |
| 166 | 192 | ||
| 167 | for (auto p : phonemes) { | 193 | for (auto p : phonemes) { |
| 194 | + // SHERPA_ONNX_LOGE("%d %s", static_cast<int32_t>(p), ToString(p).c_str()); | ||
| 168 | if (token2id.count(p)) { | 195 | if (token2id.count(p)) { |
| 169 | if (current.size() > max_len - 1) { | 196 | if (current.size() > max_len - 1) { |
| 170 | current.push_back(0); | 197 | current.push_back(0); |
| @@ -175,6 +202,9 @@ static std::vector<std::vector<int64_t>> PiperPhonemesToIdsKokoro( | @@ -175,6 +202,9 @@ static std::vector<std::vector<int64_t>> PiperPhonemesToIdsKokoro( | ||
| 175 | } | 202 | } |
| 176 | 203 | ||
| 177 | current.push_back(token2id.at(p)); | 204 | current.push_back(token2id.at(p)); |
| 205 | + if (p == '.') { | ||
| 206 | + current.push_back(token2id.at(' ')); | ||
| 207 | + } | ||
| 178 | } else { | 208 | } else { |
| 179 | SHERPA_ONNX_LOGE("Skip unknown phonemes. Unicode codepoint: \\U+%04x.", | 209 | SHERPA_ONNX_LOGE("Skip unknown phonemes. Unicode codepoint: \\U+%04x.", |
| 180 | static_cast<uint32_t>(p)); | 210 | static_cast<uint32_t>(p)); |
-
请 注册 或 登录 后发表评论