Fangjun Kuang
Committed by GitHub

Support VITS models from icefall. (#625)

@@ -205,7 +205,8 @@ class OfflineTtsVitsImpl : public OfflineTtsImpl { @@ -205,7 +205,8 @@ class OfflineTtsVitsImpl : public OfflineTtsImpl {
205 if (meta_data.frontend == "characters") { 205 if (meta_data.frontend == "characters") {
206 frontend_ = std::make_unique<OfflineTtsCharacterFrontend>( 206 frontend_ = std::make_unique<OfflineTtsCharacterFrontend>(
207 mgr, config_.model.vits.tokens, meta_data); 207 mgr, config_.model.vits.tokens, meta_data);
208 - } else if ((meta_data.is_piper || meta_data.is_coqui) && 208 + } else if ((meta_data.is_piper || meta_data.is_coqui ||
  209 + meta_data.is_icefall) &&
209 !config_.model.vits.data_dir.empty()) { 210 !config_.model.vits.data_dir.empty()) {
210 frontend_ = std::make_unique<PiperPhonemizeLexicon>( 211 frontend_ = std::make_unique<PiperPhonemizeLexicon>(
211 mgr, config_.model.vits.tokens, config_.model.vits.data_dir, 212 mgr, config_.model.vits.tokens, config_.model.vits.data_dir,
@@ -231,7 +232,8 @@ class OfflineTtsVitsImpl : public OfflineTtsImpl { @@ -231,7 +232,8 @@ class OfflineTtsVitsImpl : public OfflineTtsImpl {
231 if (meta_data.frontend == "characters") { 232 if (meta_data.frontend == "characters") {
232 frontend_ = std::make_unique<OfflineTtsCharacterFrontend>( 233 frontend_ = std::make_unique<OfflineTtsCharacterFrontend>(
233 config_.model.vits.tokens, meta_data); 234 config_.model.vits.tokens, meta_data);
234 - } else if ((meta_data.is_piper || meta_data.is_coqui) && 235 + } else if ((meta_data.is_piper || meta_data.is_coqui ||
  236 + meta_data.is_icefall) &&
235 !config_.model.vits.data_dir.empty()) { 237 !config_.model.vits.data_dir.empty()) {
236 frontend_ = std::make_unique<PiperPhonemizeLexicon>( 238 frontend_ = std::make_unique<PiperPhonemizeLexicon>(
237 config_.model.vits.tokens, config_.model.vits.data_dir, 239 config_.model.vits.tokens, config_.model.vits.data_dir,
@@ -20,6 +20,7 @@ struct OfflineTtsVitsModelMetaData { @@ -20,6 +20,7 @@ struct OfflineTtsVitsModelMetaData {
20 20
21 bool is_piper = false; 21 bool is_piper = false;
22 bool is_coqui = false; 22 bool is_coqui = false;
  23 + bool is_icefall = false;
23 24
24 // the following options are for models from coqui-ai/TTS 25 // the following options are for models from coqui-ai/TTS
25 int32_t blank_id = 0; 26 int32_t blank_id = 0;
@@ -110,6 +110,10 @@ class OfflineTtsVitsModel::Impl { @@ -110,6 +110,10 @@ class OfflineTtsVitsModel::Impl {
110 if (comment.find("coqui") != std::string::npos) { 110 if (comment.find("coqui") != std::string::npos) {
111 meta_data_.is_coqui = true; 111 meta_data_.is_coqui = true;
112 } 112 }
  113 +
  114 + if (comment.find("icefall") != std::string::npos) {
  115 + meta_data_.is_icefall = true;
  116 + }
113 } 117 }
114 118
115 Ort::Value RunVitsPiperOrCoqui(Ort::Value x, int64_t sid, float speed) { 119 Ort::Value RunVitsPiperOrCoqui(Ort::Value x, int64_t sid, float speed) {
@@ -236,7 +236,7 @@ std::vector<std::vector<int64_t>> PiperPhonemizeLexicon::ConvertTextToTokenIds( @@ -236,7 +236,7 @@ std::vector<std::vector<int64_t>> PiperPhonemizeLexicon::ConvertTextToTokenIds(
236 236
237 std::vector<int64_t> phoneme_ids; 237 std::vector<int64_t> phoneme_ids;
238 238
239 - if (meta_data_.is_piper) { 239 + if (meta_data_.is_piper || meta_data_.is_icefall) {
240 for (const auto &p : phonemes) { 240 for (const auto &p : phonemes) {
241 phoneme_ids = PiperPhonemesToIds(token2id_, p); 241 phoneme_ids = PiperPhonemesToIds(token2id_, p);
242 ans.push_back(std::move(phoneme_ids)); 242 ans.push_back(std::move(phoneme_ids));
@@ -105,11 +105,13 @@ static Ort::SessionOptions GetSessionOptionsImpl(int32_t num_threads, @@ -105,11 +105,13 @@ static Ort::SessionOptions GetSessionOptionsImpl(int32_t num_threads,
105 } else { 105 } else {
106 SHERPA_ONNX_LOGE("Use nnapi"); 106 SHERPA_ONNX_LOGE("Use nnapi");
107 } 107 }
108 -#else 108 +#elif defined(__ANDROID_API__)
109 SHERPA_ONNX_LOGE( 109 SHERPA_ONNX_LOGE(
110 "Android NNAPI requires API level >= 27. Current API level %d " 110 "Android NNAPI requires API level >= 27. Current API level %d "
111 "Fallback to cpu!", 111 "Fallback to cpu!",
112 (int32_t)__ANDROID_API__); 112 (int32_t)__ANDROID_API__);
  113 +#else
  114 + SHERPA_ONNX_LOGE("NNAPI is for Android only. Fallback to cpu");
113 #endif 115 #endif
114 break; 116 break;
115 } 117 }