Fangjun Kuang
Committed by GitHub

Support Ukrainian VITS models from coqui-ai/TTS (#469)

@@ -431,15 +431,12 @@ void CNonStreamingTextToSpeechDlg::Init() { @@ -431,15 +431,12 @@ void CNonStreamingTextToSpeechDlg::Init() {
431 ok = false; 431 ok = false;
432 } 432 }
433 433
434 - if (!Exists("./lexicon.txt") && !Exists("./espeak-ng-data/phontab")) {  
435 - error_message += "Cannot find espeak-ng-data directory or ./lexicon.txt\r\n";  
436 - ok = false;  
437 - }  
438 -  
439 if (!Exists("./tokens.txt")) { 434 if (!Exists("./tokens.txt")) {
440 error_message += "Cannot find ./tokens.txt\r\n"; 435 error_message += "Cannot find ./tokens.txt\r\n";
441 ok = false; 436 ok = false;
442 } 437 }
  438 + // it is OK to leave lexicon.txt and espeak-ng-data empty
  439 + // since models using characters don't need them
443 440
444 if (!ok) { 441 if (!ok) {
445 generate_btn_.EnableWindow(FALSE); 442 generate_btn_.EnableWindow(FALSE);
@@ -470,7 +467,7 @@ void CNonStreamingTextToSpeechDlg::Init() { @@ -470,7 +467,7 @@ void CNonStreamingTextToSpeechDlg::Init() {
470 config.model.vits.model = "./model.onnx"; 467 config.model.vits.model = "./model.onnx";
471 if (Exists("./espeak-ng-data/phontab")) { 468 if (Exists("./espeak-ng-data/phontab")) {
472 config.model.vits.data_dir = "./espeak-ng-data"; 469 config.model.vits.data_dir = "./espeak-ng-data";
473 - } else { 470 + } else if (Exists("./lexicon.txt")) {
474 config.model.vits.lexicon = "./lexicon.txt"; 471 config.model.vits.lexicon = "./lexicon.txt";
475 } 472 }
476 config.model.vits.tokens = "./tokens.txt"; 473 config.model.vits.tokens = "./tokens.txt";
@@ -41,6 +41,7 @@ set(sources @@ -41,6 +41,7 @@ set(sources
41 offline-transducer-model-config.cc 41 offline-transducer-model-config.cc
42 offline-transducer-model.cc 42 offline-transducer-model.cc
43 offline-transducer-modified-beam-search-decoder.cc 43 offline-transducer-modified-beam-search-decoder.cc
  44 + offline-tts-character-frontend.cc
44 offline-wenet-ctc-model-config.cc 45 offline-wenet-ctc-model-config.cc
45 offline-wenet-ctc-model.cc 46 offline-wenet-ctc-model.cc
46 offline-whisper-greedy-search-decoder.cc 47 offline-whisper-greedy-search-decoder.cc
  1 +// sherpa-onnx/csrc/offline-tts-character-frontend.cc
  2 +//
  3 +// Copyright (c) 2023 Xiaomi Corporation
  4 +
  5 +#if __ANDROID_API__ >= 9
  6 +#include <strstream>
  7 +
  8 +#include "android/asset_manager.h"
  9 +#include "android/asset_manager_jni.h"
  10 +#endif
  11 +#include <algorithm>
  12 +#include <cctype>
  13 +#include <codecvt>
  14 +#include <fstream>
  15 +#include <locale>
  16 +#include <sstream>
  17 +#include <utility>
  18 +
  19 +#include "sherpa-onnx/csrc/macros.h"
  20 +#include "sherpa-onnx/csrc/offline-tts-character-frontend.h"
  21 +#include "sherpa-onnx/csrc/onnx-utils.h"
  22 +
  23 +namespace sherpa_onnx {
  24 +
  25 +static std::unordered_map<char32_t, int32_t> ReadTokens(std::istream &is) {
  26 + std::wstring_convert<std::codecvt_utf8<char32_t>, char32_t> conv;
  27 + std::unordered_map<char32_t, int32_t> token2id;
  28 +
  29 + std::string line;
  30 +
  31 + std::string sym;
  32 + std::u32string s;
  33 + int32_t id;
  34 + while (std::getline(is, line)) {
  35 + std::istringstream iss(line);
  36 + iss >> sym;
  37 + if (iss.eof()) {
  38 + id = atoi(sym.c_str());
  39 + sym = " ";
  40 + } else {
  41 + iss >> id;
  42 + }
  43 +
  44 + // eat the trailing \r\n on windows
  45 + iss >> std::ws;
  46 + if (!iss.eof()) {
  47 + SHERPA_ONNX_LOGE("Error when reading tokens: %s", line.c_str());
  48 + exit(-1);
  49 + }
  50 +
  51 + // Form models from coqui-ai/TTS, we have saved the IDs of the following
  52 + // symbols in OfflineTtsVitsModelMetaData, so it is safe to skip them here.
  53 + if (sym == "<PAD>" || sym == "<EOS>" || sym == "<BOS>" || sym == "<BLNK>") {
  54 + continue;
  55 + }
  56 +
  57 + s = conv.from_bytes(sym);
  58 + if (s.size() != 1) {
  59 + SHERPA_ONNX_LOGE("Error when reading tokens at Line %s. size: %d",
  60 + line.c_str(), static_cast<int32_t>(s.size()));
  61 + exit(-1);
  62 + }
  63 +
  64 + char32_t c = s[0];
  65 +
  66 + if (token2id.count(c)) {
  67 + SHERPA_ONNX_LOGE("Duplicated token %s. Line %s. Existing ID: %d",
  68 + sym.c_str(), line.c_str(), token2id.at(c));
  69 + exit(-1);
  70 + }
  71 +
  72 + token2id.insert({c, id});
  73 + }
  74 +
  75 + return token2id;
  76 +}
  77 +
  78 +OfflineTtsCharacterFrontend::OfflineTtsCharacterFrontend(
  79 + const std::string &tokens, const OfflineTtsVitsModelMetaData &meta_data)
  80 + : meta_data_(meta_data) {
  81 + std::ifstream is(tokens);
  82 + token2id_ = ReadTokens(is);
  83 +}
  84 +
  85 +#if __ANDROID_API__ >= 9
  86 +OfflineTtsCharacterFrontend::OfflineTtsCharacterFrontend(
  87 + AAssetManager *mgr, const std::string &tokens,
  88 + const OfflineTtsVitsModelMetaData &meta_data)
  89 + : meta_data_(meta_data) {
  90 + auto buf = ReadFile(mgr, tokens);
  91 + std::istrstream is(buf.data(), buf.size());
  92 + token2id_ = ReadTokens(is);
  93 +}
  94 +
  95 +#endif
  96 +
  97 +std::vector<std::vector<int64_t>>
  98 +OfflineTtsCharacterFrontend::ConvertTextToTokenIds(
  99 + const std::string &_text, const std::string &voice /*= ""*/) const {
  100 + // see
  101 + // https://github.com/coqui-ai/TTS/blob/dev/TTS/tts/utils/text/tokenizer.py#L87
  102 + int32_t use_eos_bos = meta_data_.use_eos_bos;
  103 + int32_t bos_id = meta_data_.bos_id;
  104 + int32_t eos_id = meta_data_.eos_id;
  105 + int32_t blank_id = meta_data_.blank_id;
  106 + int32_t add_blank = meta_data_.add_blank;
  107 +
  108 + std::string text(_text.size(), 0);
  109 + std::transform(_text.begin(), _text.end(), text.begin(),
  110 + [](auto c) { return std::tolower(c); });
  111 +
  112 + std::wstring_convert<std::codecvt_utf8<char32_t>, char32_t> conv;
  113 + std::u32string s = conv.from_bytes(text);
  114 +
  115 + std::vector<std::vector<int64_t>> ans;
  116 +
  117 + std::vector<int64_t> this_sentence;
  118 + if (add_blank) {
  119 + if (use_eos_bos) {
  120 + this_sentence.push_back(bos_id);
  121 + }
  122 +
  123 + this_sentence.push_back(blank_id);
  124 +
  125 + for (char32_t c : s) {
  126 + if (token2id_.count(c)) {
  127 + this_sentence.push_back(token2id_.at(c));
  128 + this_sentence.push_back(blank_id);
  129 + } else {
  130 + SHERPA_ONNX_LOGE("Skip unknown character. Unicode codepoint: \\U+%04x.",
  131 + static_cast<uint32_t>(c));
  132 + }
  133 +
  134 + if (c == '.' || c == ':' || c == '?' || c == '!') {
  135 + // end of a sentence
  136 + if (use_eos_bos) {
  137 + this_sentence.push_back(eos_id);
  138 + }
  139 +
  140 + ans.push_back(std::move(this_sentence));
  141 +
  142 + // re-initialize this_sentence
  143 + if (use_eos_bos) {
  144 + this_sentence.push_back(bos_id);
  145 + }
  146 + this_sentence.push_back(blank_id);
  147 + }
  148 + }
  149 +
  150 + if (use_eos_bos) {
  151 + this_sentence.push_back(eos_id);
  152 + }
  153 +
  154 + if (this_sentence.size() > 1 + use_eos_bos) {
  155 + ans.push_back(std::move(this_sentence));
  156 + }
  157 + } else {
  158 + // not adding blank
  159 + if (use_eos_bos) {
  160 + this_sentence.push_back(bos_id);
  161 + }
  162 +
  163 + for (char32_t c : s) {
  164 + if (token2id_.count(c)) {
  165 + this_sentence.push_back(token2id_.at(c));
  166 + }
  167 +
  168 + if (c == '.' || c == ':' || c == '?' || c == '!') {
  169 + // end of a sentence
  170 + if (use_eos_bos) {
  171 + this_sentence.push_back(eos_id);
  172 + }
  173 +
  174 + ans.push_back(std::move(this_sentence));
  175 +
  176 + // re-initialize this_sentence
  177 + if (use_eos_bos) {
  178 + this_sentence.push_back(bos_id);
  179 + }
  180 + }
  181 + }
  182 +
  183 + if (this_sentence.size() > 1) {
  184 + ans.push_back(std::move(this_sentence));
  185 + }
  186 + }
  187 +
  188 + return ans;
  189 +}
  190 +
  191 +} // namespace sherpa_onnx
  1 +// sherpa-onnx/csrc/offline-tts-character-frontend.h
  2 +//
  3 +// Copyright (c) 2023 Xiaomi Corporation
  4 +
  5 +#ifndef SHERPA_ONNX_CSRC_OFFLINE_TTS_CHARACTER_FRONTEND_H_
  6 +#define SHERPA_ONNX_CSRC_OFFLINE_TTS_CHARACTER_FRONTEND_H_
  7 +#include <cstdint>
  8 +#include <string>
  9 +#include <unordered_map>
  10 +#include <vector>
  11 +
  12 +#if __ANDROID_API__ >= 9
  13 +#include "android/asset_manager.h"
  14 +#include "android/asset_manager_jni.h"
  15 +#endif
  16 +
  17 +#include "sherpa-onnx/csrc/offline-tts-frontend.h"
  18 +#include "sherpa-onnx/csrc/offline-tts-vits-model-metadata.h"
  19 +
  20 +namespace sherpa_onnx {
  21 +
  22 +class OfflineTtsCharacterFrontend : public OfflineTtsFrontend {
  23 + public:
  24 + OfflineTtsCharacterFrontend(const std::string &tokens,
  25 + const OfflineTtsVitsModelMetaData &meta_data);
  26 +
  27 +#if __ANDROID_API__ >= 9
  28 + OfflineTtsCharacterFrontend(AAssetManager *mgr, const std::string &tokens,
  29 + const OfflineTtsVitsModelMetaData &meta_data);
  30 +
  31 +#endif
  32 + /** Convert a string to token IDs.
  33 + *
  34 + * @param text The input text.
  35 + * Example 1: "This is the first sample sentence; this is the
  36 + * second one." Example 2: "这是第一句。这是第二句。"
  37 + * @param voice Optional. It is for espeak-ng.
  38 + *
  39 + * @return Return a vector-of-vector of token IDs. Each subvector contains
  40 + * a sentence that can be processed independently.
  41 + * If a frontend does not support splitting the text into
  42 + * sentences, the resulting vector contains only one subvector.
  43 + */
  44 + std::vector<std::vector<int64_t>> ConvertTextToTokenIds(
  45 + const std::string &text, const std::string &voice = "") const override;
  46 +
  47 + private:
  48 + OfflineTtsVitsModelMetaData meta_data_;
  49 + std::unordered_map<char32_t, int32_t> token2id_;
  50 +};
  51 +
  52 +} // namespace sherpa_onnx
  53 +
  54 +#endif // SHERPA_ONNX_CSRC_OFFLINE_TTS_CHARACTER_FRONTEND_H_
@@ -18,6 +18,7 @@ @@ -18,6 +18,7 @@
18 #include "kaldifst/csrc/text-normalizer.h" 18 #include "kaldifst/csrc/text-normalizer.h"
19 #include "sherpa-onnx/csrc/lexicon.h" 19 #include "sherpa-onnx/csrc/lexicon.h"
20 #include "sherpa-onnx/csrc/macros.h" 20 #include "sherpa-onnx/csrc/macros.h"
  21 +#include "sherpa-onnx/csrc/offline-tts-character-frontend.h"
21 #include "sherpa-onnx/csrc/offline-tts-frontend.h" 22 #include "sherpa-onnx/csrc/offline-tts-frontend.h"
22 #include "sherpa-onnx/csrc/offline-tts-impl.h" 23 #include "sherpa-onnx/csrc/offline-tts-impl.h"
23 #include "sherpa-onnx/csrc/offline-tts-vits-model.h" 24 #include "sherpa-onnx/csrc/offline-tts-vits-model.h"
@@ -116,7 +117,9 @@ class OfflineTtsVitsImpl : public OfflineTtsImpl { @@ -116,7 +117,9 @@ class OfflineTtsVitsImpl : public OfflineTtsImpl {
116 return {}; 117 return {};
117 } 118 }
118 119
119 - if (meta_data.add_blank && config_.model.vits.data_dir.empty()) { 120 + // TODO(fangjun): add blank inside the frontend, not here
  121 + if (meta_data.add_blank && config_.model.vits.data_dir.empty() &&
  122 + meta_data.frontend != "characters") {
120 for (auto &k : x) { 123 for (auto &k : x) {
121 k = AddBlank(k); 124 k = AddBlank(k);
122 } 125 }
@@ -195,12 +198,22 @@ class OfflineTtsVitsImpl : public OfflineTtsImpl { @@ -195,12 +198,22 @@ class OfflineTtsVitsImpl : public OfflineTtsImpl {
195 void InitFrontend(AAssetManager *mgr) { 198 void InitFrontend(AAssetManager *mgr) {
196 const auto &meta_data = model_->GetMetaData(); 199 const auto &meta_data = model_->GetMetaData();
197 200
198 - if ((meta_data.is_piper || meta_data.is_coqui) &&  
199 - !config_.model.vits.data_dir.empty()) { 201 + if (meta_data.frontend == "characters") {
  202 + frontend_ = std::make_unique<OfflineTtsCharacterFrontend>(
  203 + mgr, config_.model.vits.tokens, meta_data);
  204 + } else if ((meta_data.is_piper || meta_data.is_coqui) &&
  205 + !config_.model.vits.data_dir.empty()) {
200 frontend_ = std::make_unique<PiperPhonemizeLexicon>( 206 frontend_ = std::make_unique<PiperPhonemizeLexicon>(
201 mgr, config_.model.vits.tokens, config_.model.vits.data_dir, 207 mgr, config_.model.vits.tokens, config_.model.vits.data_dir,
202 meta_data); 208 meta_data);
203 } else { 209 } else {
  210 + if (config_.model.vits.lexicon.empty()) {
  211 + SHERPA_ONNX_LOGE(
  212 + "Not a model using characters as modeling unit. Please provide "
  213 + "--vits-lexicon if you leave --vits-data-dir empty");
  214 + exit(-1);
  215 + }
  216 +
204 frontend_ = std::make_unique<Lexicon>( 217 frontend_ = std::make_unique<Lexicon>(
205 mgr, config_.model.vits.lexicon, config_.model.vits.tokens, 218 mgr, config_.model.vits.lexicon, config_.model.vits.tokens,
206 meta_data.punctuations, meta_data.language, config_.model.debug); 219 meta_data.punctuations, meta_data.language, config_.model.debug);
@@ -211,12 +224,21 @@ class OfflineTtsVitsImpl : public OfflineTtsImpl { @@ -211,12 +224,21 @@ class OfflineTtsVitsImpl : public OfflineTtsImpl {
211 void InitFrontend() { 224 void InitFrontend() {
212 const auto &meta_data = model_->GetMetaData(); 225 const auto &meta_data = model_->GetMetaData();
213 226
214 - if ((meta_data.is_piper || meta_data.is_coqui) &&  
215 - !config_.model.vits.data_dir.empty()) { 227 + if (meta_data.frontend == "characters") {
  228 + frontend_ = std::make_unique<OfflineTtsCharacterFrontend>(
  229 + config_.model.vits.tokens, meta_data);
  230 + } else if ((meta_data.is_piper || meta_data.is_coqui) &&
  231 + !config_.model.vits.data_dir.empty()) {
216 frontend_ = std::make_unique<PiperPhonemizeLexicon>( 232 frontend_ = std::make_unique<PiperPhonemizeLexicon>(
217 config_.model.vits.tokens, config_.model.vits.data_dir, 233 config_.model.vits.tokens, config_.model.vits.data_dir,
218 model_->GetMetaData()); 234 model_->GetMetaData());
219 } else { 235 } else {
  236 + if (config_.model.vits.lexicon.empty()) {
  237 + SHERPA_ONNX_LOGE(
  238 + "Not a model using characters as modeling unit. Please provide "
  239 + "--vits-lexicon if you leave --vits-data-dir empty");
  240 + exit(-1);
  241 + }
220 frontend_ = std::make_unique<Lexicon>( 242 frontend_ = std::make_unique<Lexicon>(
221 config_.model.vits.lexicon, config_.model.vits.tokens, 243 config_.model.vits.lexicon, config_.model.vits.tokens,
222 meta_data.punctuations, meta_data.language, config_.model.debug); 244 meta_data.punctuations, meta_data.language, config_.model.debug);
@@ -44,19 +44,7 @@ bool OfflineTtsVitsModelConfig::Validate() const { @@ -44,19 +44,7 @@ bool OfflineTtsVitsModelConfig::Validate() const {
44 return false; 44 return false;
45 } 45 }
46 46
47 - if (data_dir.empty()) {  
48 - if (lexicon.empty()) {  
49 - SHERPA_ONNX_LOGE(  
50 - "Please provide --vits-lexicon if you leave --vits-data-dir empty");  
51 - return false;  
52 - }  
53 -  
54 - if (!FileExists(lexicon)) {  
55 - SHERPA_ONNX_LOGE("--vits-lexicon: %s does not exist", lexicon.c_str());  
56 - return false;  
57 - }  
58 -  
59 - } else { 47 + if (!data_dir.empty()) {
60 if (!FileExists(data_dir + "/phontab")) { 48 if (!FileExists(data_dir + "/phontab")) {
61 SHERPA_ONNX_LOGE("%s/phontab does not exist. Skipping test", 49 SHERPA_ONNX_LOGE("%s/phontab does not exist. Skipping test",
62 data_dir.c_str()); 50 data_dir.c_str());
@@ -10,15 +10,14 @@ @@ -10,15 +10,14 @@
10 10
11 namespace sherpa_onnx { 11 namespace sherpa_onnx {
12 12
  13 +// If you are not sure what each field means, please
  14 +// have a look of the Python file in the model directory that
  15 +// you have downloaded.
13 struct OfflineTtsVitsModelMetaData { 16 struct OfflineTtsVitsModelMetaData {
14 - int32_t sample_rate; 17 + int32_t sample_rate = 0;
15 int32_t add_blank = 0; 18 int32_t add_blank = 0;
16 int32_t num_speakers = 0; 19 int32_t num_speakers = 0;
17 20
18 - std::string punctuations;  
19 - std::string language;  
20 - std::string voice;  
21 -  
22 bool is_piper = false; 21 bool is_piper = false;
23 bool is_coqui = false; 22 bool is_coqui = false;
24 23
@@ -27,6 +26,12 @@ struct OfflineTtsVitsModelMetaData { @@ -27,6 +26,12 @@ struct OfflineTtsVitsModelMetaData {
27 int32_t bos_id = 0; 26 int32_t bos_id = 0;
28 int32_t eos_id = 0; 27 int32_t eos_id = 0;
29 int32_t use_eos_bos = 0; 28 int32_t use_eos_bos = 0;
  29 + int32_t pad_id = 0;
  30 +
  31 + std::string punctuations;
  32 + std::string language;
  33 + std::string voice;
  34 + std::string frontend; // characters
30 }; 35 };
31 36
32 } // namespace sherpa_onnx 37 } // namespace sherpa_onnx
@@ -87,13 +87,18 @@ class OfflineTtsVitsModel::Impl { @@ -87,13 +87,18 @@ class OfflineTtsVitsModel::Impl {
87 SHERPA_ONNX_READ_META_DATA_STR_WITH_DEFAULT(meta_data_.punctuations, 87 SHERPA_ONNX_READ_META_DATA_STR_WITH_DEFAULT(meta_data_.punctuations,
88 "punctuation", ""); 88 "punctuation", "");
89 SHERPA_ONNX_READ_META_DATA_STR(meta_data_.language, "language"); 89 SHERPA_ONNX_READ_META_DATA_STR(meta_data_.language, "language");
  90 +
90 SHERPA_ONNX_READ_META_DATA_STR_WITH_DEFAULT(meta_data_.voice, "voice", ""); 91 SHERPA_ONNX_READ_META_DATA_STR_WITH_DEFAULT(meta_data_.voice, "voice", "");
91 92
  93 + SHERPA_ONNX_READ_META_DATA_STR_WITH_DEFAULT(meta_data_.frontend, "frontend",
  94 + "");
  95 +
92 SHERPA_ONNX_READ_META_DATA_WITH_DEFAULT(meta_data_.blank_id, "blank_id", 0); 96 SHERPA_ONNX_READ_META_DATA_WITH_DEFAULT(meta_data_.blank_id, "blank_id", 0);
93 SHERPA_ONNX_READ_META_DATA_WITH_DEFAULT(meta_data_.bos_id, "bos_id", 0); 97 SHERPA_ONNX_READ_META_DATA_WITH_DEFAULT(meta_data_.bos_id, "bos_id", 0);
94 SHERPA_ONNX_READ_META_DATA_WITH_DEFAULT(meta_data_.eos_id, "eos_id", 0); 98 SHERPA_ONNX_READ_META_DATA_WITH_DEFAULT(meta_data_.eos_id, "eos_id", 0);
95 SHERPA_ONNX_READ_META_DATA_WITH_DEFAULT(meta_data_.use_eos_bos, 99 SHERPA_ONNX_READ_META_DATA_WITH_DEFAULT(meta_data_.use_eos_bos,
96 "use_eos_bos", 0); 100 "use_eos_bos", 0);
  101 + SHERPA_ONNX_READ_META_DATA_WITH_DEFAULT(meta_data_.pad_id, "pad_id", 0);
97 102
98 std::string comment; 103 std::string comment;
99 SHERPA_ONNX_READ_META_DATA_STR(comment, "comment"); 104 SHERPA_ONNX_READ_META_DATA_STR(comment, "comment");
@@ -142,16 +147,25 @@ class OfflineTtsVitsModel::Impl { @@ -142,16 +147,25 @@ class OfflineTtsVitsModel::Impl {
142 Ort::Value sid_tensor = 147 Ort::Value sid_tensor =
143 Ort::Value::CreateTensor(memory_info, &sid, 1, &sid_shape, 1); 148 Ort::Value::CreateTensor(memory_info, &sid, 1, &sid_shape, 1);
144 149
  150 + int64_t lang_id_shape = 1;
  151 + int64_t lang_id = 0;
  152 + Ort::Value lang_id_tensor =
  153 + Ort::Value::CreateTensor(memory_info, &lang_id, 1, &lang_id_shape, 1);
  154 +
145 std::vector<Ort::Value> inputs; 155 std::vector<Ort::Value> inputs;
146 - inputs.reserve(4); 156 + inputs.reserve(5);
147 inputs.push_back(std::move(x)); 157 inputs.push_back(std::move(x));
148 inputs.push_back(std::move(x_length)); 158 inputs.push_back(std::move(x_length));
149 inputs.push_back(std::move(scales_tensor)); 159 inputs.push_back(std::move(scales_tensor));
150 160
151 - if (input_names_.size() == 4 && input_names_.back() == "sid") { 161 + if (input_names_.size() >= 4 && input_names_[3] == "sid") {
152 inputs.push_back(std::move(sid_tensor)); 162 inputs.push_back(std::move(sid_tensor));
153 } 163 }
154 164
  165 + if (input_names_.size() >= 5 && input_names_[4] == "langid") {
  166 + inputs.push_back(std::move(lang_id_tensor));
  167 + }
  168 +
155 auto out = 169 auto out =
156 sess_->Run({}, input_names_ptr_.data(), inputs.data(), inputs.size(), 170 sess_->Run({}, input_names_ptr_.data(), inputs.data(), inputs.size(),
157 output_names_ptr_.data(), output_names_ptr_.size()); 171 output_names_ptr_.data(), output_names_ptr_.size());
@@ -123,7 +123,6 @@ static std::vector<int64_t> CoquiPhonemesToIds( @@ -123,7 +123,6 @@ static std::vector<int64_t> CoquiPhonemesToIds(
123 int32_t blank_id = meta_data.blank_id; 123 int32_t blank_id = meta_data.blank_id;
124 int32_t add_blank = meta_data.add_blank; 124 int32_t add_blank = meta_data.add_blank;
125 int32_t comma_id = token2id.at(','); 125 int32_t comma_id = token2id.at(',');
126 - SHERPA_ONNX_LOGE("comma id: %d", comma_id);  
127 126
128 std::vector<int64_t> ans; 127 std::vector<int64_t> ans;
129 if (add_blank) { 128 if (add_blank) {