Committed by
GitHub
Support specifying pronunciations of phrases in Chinese TTS. (#2507)
This PR implements support for specifying pronunciations of phrases in Chinese TTS by modifying the lexicon processing logic. The change introduces a greedy longest-match algorithm that attempts to match multi-word phrases before falling back to individual word processing.
正在显示
1 个修改的文件
包含
56 行增加
和
1 行删除
| @@ -36,6 +36,22 @@ static bool IsPunct(const std::string &s) { | @@ -36,6 +36,22 @@ static bool IsPunct(const std::string &s) { | ||
| 36 | return puncts.count(s); | 36 | return puncts.count(s); |
| 37 | } | 37 | } |
| 38 | 38 | ||
| 39 | +// end is inclusive | ||
| 40 | +static std::string GetWord(const std::vector<std::string> &words, int32_t start, | ||
| 41 | + int32_t end) { | ||
| 42 | + std::string ans; | ||
| 43 | + | ||
| 44 | + if (start >= words.size() || end >= words.size()) { | ||
| 45 | + return ans; | ||
| 46 | + } | ||
| 47 | + | ||
| 48 | + for (int32_t i = start; i <= end; ++i) { | ||
| 49 | + ans += words[i]; | ||
| 50 | + } | ||
| 51 | + | ||
| 52 | + return ans; | ||
| 53 | +} | ||
| 54 | + | ||
| 39 | class JiebaLexicon::Impl { | 55 | class JiebaLexicon::Impl { |
| 40 | public: | 56 | public: |
| 41 | Impl(const std::string &lexicon, const std::string &tokens, | 57 | Impl(const std::string &lexicon, const std::string &tokens, |
| @@ -160,7 +176,46 @@ class JiebaLexicon::Impl { | @@ -160,7 +176,46 @@ class JiebaLexicon::Impl { | ||
| 160 | std::vector<TokenIDs> ans; | 176 | std::vector<TokenIDs> ans; |
| 161 | std::vector<int64_t> this_sentence; | 177 | std::vector<int64_t> this_sentence; |
| 162 | 178 | ||
| 163 | - for (const auto &w : words) { | 179 | + int32_t num_words = static_cast<int32_t>(words.size()); |
| 180 | + int32_t max_len = 10; | ||
| 181 | + | ||
| 182 | + for (int32_t i = 0; i < num_words;) { | ||
| 183 | + int32_t start = i; | ||
| 184 | + int32_t end = std::min(i + max_len, num_words - 1); | ||
| 185 | + | ||
| 186 | + std::string w; | ||
| 187 | + while (end > start) { | ||
| 188 | + auto this_word = GetWord(words, start, end); | ||
| 189 | + if (debug_) { | ||
| 190 | +#if __OHOS__ | ||
| 191 | + SHERPA_ONNX_LOGE("%{public}d-%{public}d: %{public}s", start, end, | ||
| 192 | + this_word.c_str()); | ||
| 193 | +#else | ||
| 194 | + SHERPA_ONNX_LOGE("%d-%d: %s", start, end, this_word.c_str()); | ||
| 195 | +#endif | ||
| 196 | + } | ||
| 197 | + if (word2ids_.count(this_word)) { | ||
| 198 | + i = end + 1; | ||
| 199 | + w = std::move(this_word); | ||
| 200 | + if (debug_) { | ||
| 201 | +#if __OHOS__ | ||
| 202 | + SHERPA_ONNX_LOGE("matched %{public}d-%{public}d: %{public}s", start, | ||
| 203 | + end, w.c_str()); | ||
| 204 | +#else | ||
| 205 | + SHERPA_ONNX_LOGE("matched %d-%d: %s", start, end, w.c_str()); | ||
| 206 | +#endif | ||
| 207 | + } | ||
| 208 | + break; | ||
| 209 | + } | ||
| 210 | + | ||
| 211 | + end -= 1; | ||
| 212 | + } | ||
| 213 | + | ||
| 214 | + if (w.empty()) { | ||
| 215 | + w = words[i]; | ||
| 216 | + i += 1; | ||
| 217 | + } | ||
| 218 | + | ||
| 164 | auto ids = ConvertWordToIds(w); | 219 | auto ids = ConvertWordToIds(w); |
| 165 | if (ids.empty()) { | 220 | if (ids.empty()) { |
| 166 | #if __OHOS__ | 221 | #if __OHOS__ |
-
请 注册 或 登录 后发表评论