Toggle navigation
Toggle navigation
此项目
正在载入...
Sign in
xuning
/
sherpaonnx
转到一个项目
Toggle navigation
项目
群组
代码片段
帮助
Toggle navigation pinning
Project
Activity
Repository
Pipelines
Graphs
Issues
0
Merge Requests
0
Wiki
Network
Create a new issue
Builds
Commits
Authored by
Fangjun Kuang
2023-10-20 12:06:30 +0800
Browse Files
Options
Browse Files
Download
Email Patches
Plain Diff
Committed by
GitHub
2023-10-20 12:06:30 +0800
Commit
3ba9a4932f52b86d668821e0036b44495cf649f9
3ba9a493
1 parent
2a932aca
Support printing input text and words after splitting (#376)
显示空白字符变更
内嵌
并排对比
正在显示
3 个修改的文件
包含
37 行增加
和
3 行删除
sherpa-onnx/csrc/lexicon.cc
sherpa-onnx/csrc/lexicon.h
sherpa-onnx/csrc/offline-tts-vits-impl.h
sherpa-onnx/csrc/lexicon.cc
查看文件 @
3ba9a49
...
...
@@ -76,7 +76,9 @@ static std::vector<int32_t> ConvertTokensToIds(
}
Lexicon
::
Lexicon
(
const
std
::
string
&
lexicon
,
const
std
::
string
&
tokens
,
const
std
::
string
&
punctuations
,
const
std
::
string
&
language
)
{
const
std
::
string
&
punctuations
,
const
std
::
string
&
language
,
bool
debug
/*= false*/
)
:
debug_
(
debug
)
{
InitLanguage
(
language
);
InitTokens
(
tokens
);
InitLexicon
(
lexicon
);
...
...
@@ -102,6 +104,20 @@ std::vector<int64_t> Lexicon::ConvertTextToTokenIdsChinese(
const
std
::
string
&
text
)
const
{
std
::
vector
<
std
::
string
>
words
=
SplitUtf8
(
text
);
if
(
debug_
)
{
fprintf
(
stderr
,
"Input text in string: %s
\n
"
,
text
.
c_str
());
fprintf
(
stderr
,
"Input text in bytes:"
);
for
(
uint8_t
c
:
text
)
{
fprintf
(
stderr
,
" %02x"
,
c
);
}
fprintf
(
stderr
,
"
\n
"
);
fprintf
(
stderr
,
"After splitting to words:"
);
for
(
const
auto
&
w
:
words
)
{
fprintf
(
stderr
,
" %s"
,
w
.
c_str
());
}
fprintf
(
stderr
,
"
\n
"
);
}
std
::
vector
<
int64_t
>
ans
;
auto
sil
=
token2id_
.
at
(
"sil"
);
...
...
@@ -134,6 +150,21 @@ std::vector<int64_t> Lexicon::ConvertTextToTokenIdsEnglish(
ToLowerCase
(
&
text
);
std
::
vector
<
std
::
string
>
words
=
SplitUtf8
(
text
);
if
(
debug_
)
{
fprintf
(
stderr
,
"Input text (lowercase) in string: %s
\n
"
,
text
.
c_str
());
fprintf
(
stderr
,
"Input text in bytes:"
);
for
(
uint8_t
c
:
text
)
{
fprintf
(
stderr
,
" %02x"
,
c
);
}
fprintf
(
stderr
,
"
\n
"
);
fprintf
(
stderr
,
"After splitting to words:"
);
for
(
const
auto
&
w
:
words
)
{
fprintf
(
stderr
,
" %s"
,
w
.
c_str
());
}
fprintf
(
stderr
,
"
\n
"
);
}
int32_t
blank
=
token2id_
.
at
(
" "
);
std
::
vector
<
int64_t
>
ans
;
...
...
sherpa-onnx/csrc/lexicon.h
查看文件 @
3ba9a49
...
...
@@ -17,7 +17,8 @@ namespace sherpa_onnx {
class
Lexicon
{
public
:
Lexicon
(
const
std
::
string
&
lexicon
,
const
std
::
string
&
tokens
,
const
std
::
string
&
punctuations
,
const
std
::
string
&
language
);
const
std
::
string
&
punctuations
,
const
std
::
string
&
language
,
bool
debug
=
false
);
std
::
vector
<
int64_t
>
ConvertTextToTokenIds
(
const
std
::
string
&
text
)
const
;
...
...
@@ -45,6 +46,7 @@ class Lexicon {
std
::
unordered_set
<
std
::
string
>
punctuations_
;
std
::
unordered_map
<
std
::
string
,
int32_t
>
token2id_
;
Language
language_
;
bool
debug_
;
//
};
...
...
sherpa-onnx/csrc/offline-tts-vits-impl.h
查看文件 @
3ba9a49
...
...
@@ -21,7 +21,8 @@ class OfflineTtsVitsImpl : public OfflineTtsImpl {
explicit
OfflineTtsVitsImpl
(
const
OfflineTtsConfig
&
config
)
:
model_
(
std
::
make_unique
<
OfflineTtsVitsModel
>
(
config
.
model
)),
lexicon_
(
config
.
model
.
vits
.
lexicon
,
config
.
model
.
vits
.
tokens
,
model_
->
Punctuations
(),
model_
->
Language
())
{}
model_
->
Punctuations
(),
model_
->
Language
(),
config
.
model
.
debug
)
{}
GeneratedAudio
Generate
(
const
std
::
string
&
text
,
int64_t
sid
=
0
)
const
override
{
...
...
请
注册
或
登录
后发表评论