Toggle navigation
Toggle navigation
此项目
正在载入...
Sign in
xuning
/
sherpaonnx
转到一个项目
Toggle navigation
项目
群组
代码片段
帮助
Toggle navigation pinning
Project
Activity
Repository
Pipelines
Graphs
Issues
0
Merge Requests
0
Wiki
Network
Create a new issue
Builds
Commits
继续操作前请注册或者登录。
Authored by
Fangjun Kuang
2025-08-14 17:39:13 +0800
Browse Files
Options
Browse Files
Download
Email Patches
Plain Diff
Committed by
GitHub
2025-08-14 17:39:13 +0800
Commit
bec3e3dc4afc506e79df6f1ea000dd456377a6af
bec3e3dc
1 parent
1357950a
Add more debug info for vits tts (#2491)
隐藏空白字符变更
内嵌
并排对比
正在显示
2 个修改的文件
包含
47 行增加
和
14 行删除
sherpa-onnx/csrc/jieba-lexicon.cc
sherpa-onnx/csrc/lexicon.cc
sherpa-onnx/csrc/jieba-lexicon.cc
查看文件 @
bec3e3d
...
...
@@ -188,23 +188,34 @@ class JiebaLexicon::Impl {
private
:
std
::
vector
<
int32_t
>
ConvertWordToIds
(
const
std
::
string
&
w
)
const
{
if
(
word2ids_
.
count
(
w
))
{
return
word2ids_
.
at
(
w
);
}
if
(
token2id_
.
count
(
w
))
{
return
{
token2id_
.
at
(
w
)};
}
std
::
vector
<
int32_t
>
ans
;
std
::
vector
<
std
::
string
>
words
=
SplitUtf8
(
w
);
for
(
const
auto
&
word
:
words
)
{
if
(
word2ids_
.
count
(
word
))
{
auto
ids
=
ConvertWordToIds
(
word
);
ans
.
insert
(
ans
.
end
(),
ids
.
begin
(),
ids
.
end
());
if
(
word2ids_
.
count
(
w
))
{
ans
=
word2ids_
.
at
(
w
);
}
else
if
(
token2id_
.
count
(
w
))
{
ans
=
{
token2id_
.
at
(
w
)};
}
else
{
std
::
vector
<
std
::
string
>
words
=
SplitUtf8
(
w
);
for
(
const
auto
&
word
:
words
)
{
if
(
word2ids_
.
count
(
word
))
{
auto
ids
=
ConvertWordToIds
(
word
);
ans
.
insert
(
ans
.
end
(),
ids
.
begin
(),
ids
.
end
());
}
}
}
if
(
debug_
)
{
std
::
ostringstream
os
;
os
<<
w
<<
": "
;
for
(
auto
i
:
ans
)
{
os
<<
id2token_
.
at
(
i
)
<<
" "
;
}
os
<<
"
\n
"
;
#if __OHOS__
SHERPA_ONNX_LOGE
(
"%{public}s"
,
os
.
str
().
c_str
());
#else
SHERPA_ONNX_LOGE
(
"%s"
,
os
.
str
().
c_str
());
#endif
}
return
ans
;
}
...
...
@@ -234,6 +245,12 @@ class JiebaLexicon::Impl {
if
(
!
token2id_
.
count
(
";"
)
&&
token2id_
.
count
(
","
))
{
token2id_
[
";"
]
=
token2id_
[
","
];
}
if
(
debug_
)
{
for
(
const
auto
&
p
:
token2id_
)
{
id2token_
[
p
.
second
]
=
p
.
first
;
}
}
}
void
InitLexicon
(
std
::
istream
&
is
)
{
...
...
@@ -272,6 +289,11 @@ class JiebaLexicon::Impl {
std
::
vector
<
int32_t
>
ids
=
ConvertTokensToIds
(
token2id_
,
token_list
);
if
(
ids
.
empty
())
{
#if __OHOS__
SHERPA_ONNX_LOGE
(
"Empty token ids for %{public}s"
,
line
.
c_str
());
#else
SHERPA_ONNX_LOGE
(
"Empty token ids for %s"
,
line
.
c_str
());
#endif
continue
;
}
...
...
@@ -286,6 +308,8 @@ class JiebaLexicon::Impl {
// tokens.txt is saved in token2id_
std
::
unordered_map
<
std
::
string
,
int32_t
>
token2id_
;
std
::
unordered_map
<
int32_t
,
std
::
string
>
id2token_
;
std
::
unique_ptr
<
cppjieba
::
Jieba
>
jieba_
;
bool
debug_
=
false
;
};
...
...
sherpa-onnx/csrc/lexicon.cc
查看文件 @
bec3e3d
...
...
@@ -85,6 +85,11 @@ std::vector<int32_t> ConvertTokensToIds(
ids
.
reserve
(
tokens
.
size
());
for
(
const
auto
&
s
:
tokens
)
{
if
(
!
token2id
.
count
(
s
))
{
#if __OHOS__
SHERPA_ONNX_LOGE
(
"Unknown token: %{public}s"
,
s
.
c_str
());
#else
SHERPA_ONNX_LOGE
(
"Unknown token: %s"
,
s
.
c_str
());
#endif
return
{};
}
int32_t
id
=
token2id
.
at
(
s
);
...
...
@@ -346,8 +351,12 @@ void Lexicon::InitLanguage(const std::string &_lang) {
}
else
if
(
!
lang
.
empty
())
{
language_
=
Language
::
kNotChinese
;
}
else
{
#if __OHOS__
SHERPA_ONNX_LOGE
(
"Unknown language: %{public}s"
,
_lang
.
c_str
());
#else
SHERPA_ONNX_LOGE
(
"Unknown language: %s"
,
_lang
.
c_str
());
exit
(
-
1
);
#endif
SHERPA_ONNX_EXIT
(
-
1
);
}
}
...
...
请
注册
或
登录
后发表评论