Toggle navigation
Toggle navigation
此项目
正在载入...
Sign in
xuning
/
sherpaonnx
转到一个项目
Toggle navigation
项目
群组
代码片段
帮助
Toggle navigation pinning
Project
Activity
Repository
Pipelines
Graphs
Issues
0
Merge Requests
0
Wiki
Network
Create a new issue
Builds
Commits
Authored by
Fangjun Kuang
2025-08-26 12:03:02 +0800
Browse Files
Options
Browse Files
Download
Email Patches
Plain Diff
Committed by
GitHub
2025-08-26 12:03:02 +0800
Commit
9d0adcd3f53e4594112d66ebc6bfe213243121f6
9d0adcd3
1 parent
f45cd87a
Support BPE models with byte fallback. (#2531)
显示空白字符变更
内嵌
并排对比
正在显示
2 个修改的文件
包含
18 行增加
和
3 行删除
sherpa-onnx/csrc/symbol-table.cc
sherpa-onnx/csrc/symbol-table.h
sherpa-onnx/csrc/symbol-table.cc
查看文件 @
9d0adcd
...
...
@@ -171,6 +171,12 @@ SymbolTable::SymbolTable(Manager *mgr, const std::string &filename) {
void
SymbolTable
::
Init
(
std
::
istream
&
is
)
{
sym2id_
=
ReadTokens
(
is
,
&
id2sym_
);
is_bbpe_
=
IsByteBPE
(
sym2id_
);
if
(
sym2id_
.
count
(
"<0x00>"
)
&&
sym2id_
.
count
(
"<0xFF>"
)
&&
((
sym2id_
.
at
(
"<0xFF>"
)
-
sym2id_
.
at
(
"<0x00>"
))
==
255
))
{
is_bpe_with_byte_fallback_
=
true
;
id_for_0x00_
=
sym2id_
.
at
(
"<0x00>"
);
}
}
std
::
string
SymbolTable
::
ToString
()
const
{
...
...
@@ -197,13 +203,13 @@ const std::string SymbolTable::operator[](int32_t id) const {
// id 0 is blank, id 1 is sos/eos, id 2 is unk
//
// Note: For moonshine models, 0 is <unk>, 1, is <s>, 2 is</s>
if
(
i
d
>=
3
&&
id
<=
258
&&
sym
.
size
()
==
6
&&
sym
[
0
]
==
'<'
&&
if
(
i
s_bpe_with_byte_fallback_
&&
sym
.
size
()
==
6
&&
sym
[
0
]
==
'<'
&&
sym
[
1
]
==
'0'
&&
sym
[
2
]
==
'x'
&&
sym
[
5
]
==
'>'
)
{
std
::
ostringstream
os
;
os
<<
std
::
hex
<<
std
::
uppercase
<<
(
id
-
3
);
os
<<
std
::
hex
<<
std
::
uppercase
<<
(
id
-
id_for_0x00_
);
if
(
std
::
string
(
sym
.
data
()
+
3
,
sym
.
data
()
+
5
)
==
os
.
str
())
{
uint8_t
i
=
id
-
3
;
uint8_t
i
=
id
-
id_for_0x00_
;
sym
=
std
::
string
(
&
i
,
&
i
+
1
);
}
}
...
...
sherpa-onnx/csrc/symbol-table.h
查看文件 @
9d0adcd
...
...
@@ -66,6 +66,15 @@ class SymbolTable {
private
:
std
::
unordered_map
<
std
::
string
,
int32_t
>
sym2id_
;
std
::
unordered_map
<
int32_t
,
std
::
string
>
id2sym_
;
// see https://github.com/k2-fsa/sherpa-onnx/issues/2524
bool
is_bpe_with_byte_fallback_
=
false
;
// used only when is_bpe_with_byte_fallback_ is true. It is the ID
// of <0x00> in tokens.txt
int32_t
id_for_0x00_
=
0
;
// true for byte BPE. false for non byte BPE.
bool
is_bbpe_
=
false
;
};
...
...
请
注册
或
登录
后发表评论