继续操作前请注册或者登录。
Fangjun Kuang
Committed by GitHub

Add more debug info for vits tts (#2491)

... ... @@ -188,23 +188,34 @@ class JiebaLexicon::Impl {
private:
std::vector<int32_t> ConvertWordToIds(const std::string &w) const {
if (word2ids_.count(w)) {
return word2ids_.at(w);
}
if (token2id_.count(w)) {
return {token2id_.at(w)};
}
std::vector<int32_t> ans;
std::vector<std::string> words = SplitUtf8(w);
for (const auto &word : words) {
if (word2ids_.count(word)) {
auto ids = ConvertWordToIds(word);
ans.insert(ans.end(), ids.begin(), ids.end());
if (word2ids_.count(w)) {
ans = word2ids_.at(w);
} else if (token2id_.count(w)) {
ans = {token2id_.at(w)};
} else {
std::vector<std::string> words = SplitUtf8(w);
for (const auto &word : words) {
if (word2ids_.count(word)) {
auto ids = ConvertWordToIds(word);
ans.insert(ans.end(), ids.begin(), ids.end());
}
}
}
if (debug_) {
std::ostringstream os;
os << w << ": ";
for (auto i : ans) {
os << id2token_.at(i) << " ";
}
os << "\n";
#if __OHOS__
SHERPA_ONNX_LOGE("%{public}s", os.str().c_str());
#else
SHERPA_ONNX_LOGE("%s", os.str().c_str());
#endif
}
return ans;
}
... ... @@ -234,6 +245,12 @@ class JiebaLexicon::Impl {
if (!token2id_.count(";") && token2id_.count(",")) {
token2id_[";"] = token2id_[","];
}
if (debug_) {
for (const auto &p : token2id_) {
id2token_[p.second] = p.first;
}
}
}
void InitLexicon(std::istream &is) {
... ... @@ -272,6 +289,11 @@ class JiebaLexicon::Impl {
std::vector<int32_t> ids = ConvertTokensToIds(token2id_, token_list);
if (ids.empty()) {
#if __OHOS__
SHERPA_ONNX_LOGE("Empty token ids for %{public}s", line.c_str());
#else
SHERPA_ONNX_LOGE("Empty token ids for %s", line.c_str());
#endif
continue;
}
... ... @@ -286,6 +308,8 @@ class JiebaLexicon::Impl {
// tokens.txt is saved in token2id_
std::unordered_map<std::string, int32_t> token2id_;
std::unordered_map<int32_t, std::string> id2token_;
std::unique_ptr<cppjieba::Jieba> jieba_;
bool debug_ = false;
};
... ...
... ... @@ -85,6 +85,11 @@ std::vector<int32_t> ConvertTokensToIds(
ids.reserve(tokens.size());
for (const auto &s : tokens) {
if (!token2id.count(s)) {
#if __OHOS__
SHERPA_ONNX_LOGE("Unknown token: %{public}s", s.c_str());
#else
SHERPA_ONNX_LOGE("Unknown token: %s", s.c_str());
#endif
return {};
}
int32_t id = token2id.at(s);
... ... @@ -346,8 +351,12 @@ void Lexicon::InitLanguage(const std::string &_lang) {
} else if (!lang.empty()) {
language_ = Language::kNotChinese;
} else {
#if __OHOS__
SHERPA_ONNX_LOGE("Unknown language: %{public}s", _lang.c_str());
#else
SHERPA_ONNX_LOGE("Unknown language: %s", _lang.c_str());
exit(-1);
#endif
SHERPA_ONNX_EXIT(-1);
}
}
... ...