Committed by
GitHub
Fix looking up OOVs in lexicon.txt for MeloTTS models. (#1266)
If an English word does not exist in the lexicon, we split it into characters. For instance, if the word TTS does not exist in lexicon.txt, we split it into 3 characters T, T, and S.
正在显示
1 个修改的文件
包含
16 行增加
和
0 行删除
| @@ -136,6 +136,22 @@ class MeloTtsLexicon::Impl { | @@ -136,6 +136,22 @@ class MeloTtsLexicon::Impl { | ||
| 136 | ans.tokens.insert(ans.tokens.end(), ids.tokens.begin(), | 136 | ans.tokens.insert(ans.tokens.end(), ids.tokens.begin(), |
| 137 | ids.tokens.end()); | 137 | ids.tokens.end()); |
| 138 | ans.tones.insert(ans.tones.end(), ids.tones.begin(), ids.tones.end()); | 138 | ans.tones.insert(ans.tones.end(), ids.tones.begin(), ids.tones.end()); |
| 139 | + } else { | ||
| 140 | + // If the lexicon does not contain the word, we split the word into | ||
| 141 | + // characters. | ||
| 142 | + // | ||
| 143 | + // For instance, if the word is TTS and it is does not exist | ||
| 144 | + // in the lexicon, we split it into 3 characters: T T S | ||
| 145 | + std::string s; | ||
| 146 | + for (char c : word) { | ||
| 147 | + s = c; | ||
| 148 | + if (word2ids_.count(s)) { | ||
| 149 | + const auto &t = word2ids_.at(s); | ||
| 150 | + ans.tokens.insert(ans.tokens.end(), t.tokens.begin(), | ||
| 151 | + t.tokens.end()); | ||
| 152 | + ans.tones.insert(ans.tones.end(), t.tones.begin(), t.tones.end()); | ||
| 153 | + } | ||
| 154 | + } | ||
| 139 | } | 155 | } |
| 140 | } | 156 | } |
| 141 | 157 |
-
请 注册 或 登录 后发表评论