Fangjun Kuang
Committed by GitHub

Support printing input text and words after splitting (#376)

@@ -76,7 +76,9 @@ static std::vector<int32_t> ConvertTokensToIds( @@ -76,7 +76,9 @@ static std::vector<int32_t> ConvertTokensToIds(
76 } 76 }
77 77
78 Lexicon::Lexicon(const std::string &lexicon, const std::string &tokens, 78 Lexicon::Lexicon(const std::string &lexicon, const std::string &tokens,
79 - const std::string &punctuations, const std::string &language) { 79 + const std::string &punctuations, const std::string &language,
  80 + bool debug /*= false*/)
  81 + : debug_(debug) {
80 InitLanguage(language); 82 InitLanguage(language);
81 InitTokens(tokens); 83 InitTokens(tokens);
82 InitLexicon(lexicon); 84 InitLexicon(lexicon);
@@ -102,6 +104,20 @@ std::vector<int64_t> Lexicon::ConvertTextToTokenIdsChinese( @@ -102,6 +104,20 @@ std::vector<int64_t> Lexicon::ConvertTextToTokenIdsChinese(
102 const std::string &text) const { 104 const std::string &text) const {
103 std::vector<std::string> words = SplitUtf8(text); 105 std::vector<std::string> words = SplitUtf8(text);
104 106
  107 + if (debug_) {
  108 + fprintf(stderr, "Input text in string: %s\n", text.c_str());
  109 + fprintf(stderr, "Input text in bytes:");
  110 + for (uint8_t c : text) {
  111 + fprintf(stderr, " %02x", c);
  112 + }
  113 + fprintf(stderr, "\n");
  114 + fprintf(stderr, "After splitting to words:");
  115 + for (const auto &w : words) {
  116 + fprintf(stderr, " %s", w.c_str());
  117 + }
  118 + fprintf(stderr, "\n");
  119 + }
  120 +
105 std::vector<int64_t> ans; 121 std::vector<int64_t> ans;
106 122
107 auto sil = token2id_.at("sil"); 123 auto sil = token2id_.at("sil");
@@ -134,6 +150,21 @@ std::vector<int64_t> Lexicon::ConvertTextToTokenIdsEnglish( @@ -134,6 +150,21 @@ std::vector<int64_t> Lexicon::ConvertTextToTokenIdsEnglish(
134 ToLowerCase(&text); 150 ToLowerCase(&text);
135 151
136 std::vector<std::string> words = SplitUtf8(text); 152 std::vector<std::string> words = SplitUtf8(text);
  153 +
  154 + if (debug_) {
  155 + fprintf(stderr, "Input text (lowercase) in string: %s\n", text.c_str());
  156 + fprintf(stderr, "Input text in bytes:");
  157 + for (uint8_t c : text) {
  158 + fprintf(stderr, " %02x", c);
  159 + }
  160 + fprintf(stderr, "\n");
  161 + fprintf(stderr, "After splitting to words:");
  162 + for (const auto &w : words) {
  163 + fprintf(stderr, " %s", w.c_str());
  164 + }
  165 + fprintf(stderr, "\n");
  166 + }
  167 +
137 int32_t blank = token2id_.at(" "); 168 int32_t blank = token2id_.at(" ");
138 169
139 std::vector<int64_t> ans; 170 std::vector<int64_t> ans;
@@ -17,7 +17,8 @@ namespace sherpa_onnx { @@ -17,7 +17,8 @@ namespace sherpa_onnx {
17 class Lexicon { 17 class Lexicon {
18 public: 18 public:
19 Lexicon(const std::string &lexicon, const std::string &tokens, 19 Lexicon(const std::string &lexicon, const std::string &tokens,
20 - const std::string &punctuations, const std::string &language); 20 + const std::string &punctuations, const std::string &language,
  21 + bool debug = false);
21 22
22 std::vector<int64_t> ConvertTextToTokenIds(const std::string &text) const; 23 std::vector<int64_t> ConvertTextToTokenIds(const std::string &text) const;
23 24
@@ -45,6 +46,7 @@ class Lexicon { @@ -45,6 +46,7 @@ class Lexicon {
45 std::unordered_set<std::string> punctuations_; 46 std::unordered_set<std::string> punctuations_;
46 std::unordered_map<std::string, int32_t> token2id_; 47 std::unordered_map<std::string, int32_t> token2id_;
47 Language language_; 48 Language language_;
  49 + bool debug_;
48 // 50 //
49 }; 51 };
50 52
@@ -21,7 +21,8 @@ class OfflineTtsVitsImpl : public OfflineTtsImpl { @@ -21,7 +21,8 @@ class OfflineTtsVitsImpl : public OfflineTtsImpl {
21 explicit OfflineTtsVitsImpl(const OfflineTtsConfig &config) 21 explicit OfflineTtsVitsImpl(const OfflineTtsConfig &config)
22 : model_(std::make_unique<OfflineTtsVitsModel>(config.model)), 22 : model_(std::make_unique<OfflineTtsVitsModel>(config.model)),
23 lexicon_(config.model.vits.lexicon, config.model.vits.tokens, 23 lexicon_(config.model.vits.lexicon, config.model.vits.tokens,
24 - model_->Punctuations(), model_->Language()) {} 24 + model_->Punctuations(), model_->Language(),
  25 + config.model.debug) {}
25 26
26 GeneratedAudio Generate(const std::string &text, 27 GeneratedAudio Generate(const std::string &text,
27 int64_t sid = 0) const override { 28 int64_t sid = 0) const override {