Fangjun Kuang
Committed by GitHub

Support Spanish in TTS (#396)

@@ -131,6 +131,8 @@ std::vector<int64_t> Lexicon::ConvertTextToTokenIds( @@ -131,6 +131,8 @@ std::vector<int64_t> Lexicon::ConvertTextToTokenIds(
131 return ConvertTextToTokenIdsEnglish(text); 131 return ConvertTextToTokenIdsEnglish(text);
132 case Language::kGerman: 132 case Language::kGerman:
133 return ConvertTextToTokenIdsGerman(text); 133 return ConvertTextToTokenIdsGerman(text);
  134 + case Language::kSpanish:
  135 + return ConvertTextToTokenIdsSpanish(text);
134 case Language::kChinese: 136 case Language::kChinese:
135 return ConvertTextToTokenIdsChinese(text); 137 return ConvertTextToTokenIdsChinese(text);
136 default: 138 default:
@@ -250,6 +252,8 @@ void Lexicon::InitLanguage(const std::string &_lang) { @@ -250,6 +252,8 @@ void Lexicon::InitLanguage(const std::string &_lang) {
250 language_ = Language::kEnglish; 252 language_ = Language::kEnglish;
251 } else if (lang == "german") { 253 } else if (lang == "german") {
252 language_ = Language::kGerman; 254 language_ = Language::kGerman;
  255 + } else if (lang == "spanish") {
  256 + language_ = Language::kSpanish;
253 } else if (lang == "chinese") { 257 } else if (lang == "chinese") {
254 language_ = Language::kChinese; 258 language_ = Language::kChinese;
255 } else { 259 } else {
@@ -41,6 +41,11 @@ class Lexicon { @@ -41,6 +41,11 @@ class Lexicon {
41 return ConvertTextToTokenIdsEnglish(text); 41 return ConvertTextToTokenIdsEnglish(text);
42 } 42 }
43 43
  44 + std::vector<int64_t> ConvertTextToTokenIdsSpanish(
  45 + const std::string &text) const {
  46 + return ConvertTextToTokenIdsEnglish(text);
  47 + }
  48 +
44 std::vector<int64_t> ConvertTextToTokenIdsEnglish( 49 std::vector<int64_t> ConvertTextToTokenIdsEnglish(
45 const std::string &text) const; 50 const std::string &text) const;
46 51
@@ -56,6 +61,7 @@ class Lexicon { @@ -56,6 +61,7 @@ class Lexicon {
56 enum class Language { 61 enum class Language {
57 kEnglish, 62 kEnglish,
58 kGerman, 63 kGerman,
  64 + kSpanish,
59 kChinese, 65 kChinese,
60 kUnknown, 66 kUnknown,
61 }; 67 };
@@ -164,7 +164,7 @@ template bool SplitStringToFloats(const std::string &full, const char *delim, @@ -164,7 +164,7 @@ template bool SplitStringToFloats(const std::string &full, const char *delim,
164 std::vector<double> *out); 164 std::vector<double> *out);
165 165
166 static bool IsPunct(char c) { return c != '\'' && std::ispunct(c); } 166 static bool IsPunct(char c) { return c != '\'' && std::ispunct(c); }
167 -static bool IsGermanUmlauts(const std::string &words) { 167 +static bool IsGermanUmlauts(const std::string &word) {
168 // ä 0xC3 0xA4 168 // ä 0xC3 0xA4
169 // ö 0xC3 0xB6 169 // ö 0xC3 0xB6
170 // ü 0xC3 0xBC 170 // ü 0xC3 0xBC
@@ -173,12 +173,12 @@ static bool IsGermanUmlauts(const std::string &words) { @@ -173,12 +173,12 @@ static bool IsGermanUmlauts(const std::string &words) {
173 // Ü 0xC3 0x9C 173 // Ü 0xC3 0x9C
174 // ß 0xC3 0x9F 174 // ß 0xC3 0x9F
175 175
176 - if (words.size() != 2 || static_cast<uint8_t>(words[0]) != 0xc3) { 176 + if (word.size() != 2 || static_cast<uint8_t>(word[0]) != 0xc3) {
177 return false; 177 return false;
178 } 178 }
179 179
180 - auto c = static_cast<uint8_t>(words[1]);  
181 - if (c == 0xa4 || c == 0xb6 || c == 0xbC || c == 0x84 || c == 0x96 || 180 + auto c = static_cast<uint8_t>(word[1]);
  181 + if (c == 0xa4 || c == 0xb6 || c == 0xbc || c == 0x84 || c == 0x96 ||
182 c == 0x9c || c == 0x9f) { 182 c == 0x9c || c == 0x9f) {
183 return true; 183 return true;
184 } 184 }
@@ -186,6 +186,33 @@ static bool IsGermanUmlauts(const std::string &words) { @@ -186,6 +186,33 @@ static bool IsGermanUmlauts(const std::string &words) {
186 return false; 186 return false;
187 } 187 }
188 188
  189 +// see https://www.tandem.net/blog/spanish-accents
  190 +static bool IsSpanishDiacritic(const std::string &word) {
  191 + // á 0xC3 0xA1
  192 + // é 0xC3 0xA9
  193 + // í 0xC3 0xAD
  194 + // ó 0xC3 0xB3
  195 + // ú 0xC3 0xBA
  196 + // ü 0xC3 0xBC
  197 + // ñ 0xC3 0xB1
  198 +
  199 + if (word.size() != 2 || static_cast<uint8_t>(word[0]) != 0xc3) {
  200 + return false;
  201 + }
  202 +
  203 + auto c = static_cast<uint8_t>(word[1]);
  204 + if (c == 0xa1 || c == 0xa9 || c == 0xad || c == 0xb3 || c == 0xba ||
  205 + c == 0xbc || c == 0xb1) {
  206 + return true;
  207 + }
  208 +
  209 + return false;
  210 +}
  211 +
  212 +static bool IsSpecial(const std::string &w) {
  213 + return IsGermanUmlauts(w) || IsSpanishDiacritic(w);
  214 +}
  215 +
189 static std::vector<std::string> MergeCharactersIntoWords( 216 static std::vector<std::string> MergeCharactersIntoWords(
190 const std::vector<std::string> &words) { 217 const std::vector<std::string> &words) {
191 std::vector<std::string> ans; 218 std::vector<std::string> ans;
@@ -196,7 +223,7 @@ static std::vector<std::string> MergeCharactersIntoWords( @@ -196,7 +223,7 @@ static std::vector<std::string> MergeCharactersIntoWords(
196 223
197 while (i < n) { 224 while (i < n) {
198 const auto &w = words[i]; 225 const auto &w = words[i];
199 - if (w.size() >= 3 || (w.size() == 2 && !IsGermanUmlauts(w)) || 226 + if (w.size() >= 3 || (w.size() == 2 && !IsSpecial(w)) ||
200 (w.size() == 1 && (IsPunct(w[0]) || std::isspace(w[0])))) { 227 (w.size() == 1 && (IsPunct(w[0]) || std::isspace(w[0])))) {
201 if (prev != -1) { 228 if (prev != -1) {
202 std::string t; 229 std::string t;
@@ -215,7 +242,7 @@ static std::vector<std::string> MergeCharactersIntoWords( @@ -215,7 +242,7 @@ static std::vector<std::string> MergeCharactersIntoWords(
215 } 242 }
216 243
217 // e.g., öffnen 244 // e.g., öffnen
218 - if (w.size() == 1 || (w.size() == 2 && IsGermanUmlauts(w))) { 245 + if (w.size() == 1 || (w.size() == 2 && IsSpecial(w))) {
219 if (prev == -1) { 246 if (prev == -1) {
220 prev = i; 247 prev = i;
221 } 248 }