Fangjun Kuang
Committed by GitHub

Fix looking up OOVs in lexicon.txt for MeloTTS models. (#1266)

If an English word does not exist in the lexicon, we split
it into characters. For instance, if the word TTS does not
exist in lexicon.txt, we split it into 3 characters T, T, and S.
@@ -136,6 +136,22 @@ class MeloTtsLexicon::Impl { @@ -136,6 +136,22 @@ class MeloTtsLexicon::Impl {
136 ans.tokens.insert(ans.tokens.end(), ids.tokens.begin(), 136 ans.tokens.insert(ans.tokens.end(), ids.tokens.begin(),
137 ids.tokens.end()); 137 ids.tokens.end());
138 ans.tones.insert(ans.tones.end(), ids.tones.begin(), ids.tones.end()); 138 ans.tones.insert(ans.tones.end(), ids.tones.begin(), ids.tones.end());
  139 + } else {
  140 + // If the lexicon does not contain the word, we split the word into
  141 + // characters.
  142 + //
  143 + // For instance, if the word is TTS and it is does not exist
  144 + // in the lexicon, we split it into 3 characters: T T S
  145 + std::string s;
  146 + for (char c : word) {
  147 + s = c;
  148 + if (word2ids_.count(s)) {
  149 + const auto &t = word2ids_.at(s);
  150 + ans.tokens.insert(ans.tokens.end(), t.tokens.begin(),
  151 + t.tokens.end());
  152 + ans.tones.insert(ans.tones.end(), t.tones.begin(), t.tones.end());
  153 + }
  154 + }
139 } 155 }
140 } 156 }
141 157