Toggle navigation
Toggle navigation
此项目
正在载入...
Sign in
xuning
/
sherpaonnx
转到一个项目
Toggle navigation
项目
群组
代码片段
帮助
Toggle navigation pinning
Project
Activity
Repository
Pipelines
Graphs
Issues
0
Merge Requests
0
Wiki
Network
Create a new issue
Builds
Commits
Authored by
Fangjun Kuang
2024-01-03 16:03:24 +0800
Browse Files
Options
Browse Files
Download
Email Patches
Plain Diff
Committed by
GitHub
2024-01-03 16:03:24 +0800
Commit
e215d0c39aae2e3ab6ed71b0d3b809742d0f52a0
e215d0c3
1 parent
d0114217
Fix Byte BPE string results for Python. (#512)
It ignores invalid UTF8 strings.
隐藏空白字符变更
内嵌
并排对比
正在显示
6 个修改的文件
包含
54 行增加
和
3 行删除
sherpa-onnx/csrc/offline-recognizer-ctc-impl.h
sherpa-onnx/csrc/offline-recognizer-transducer-impl.h
sherpa-onnx/csrc/online-recognizer-ctc-impl.h
sherpa-onnx/csrc/online-recognizer-transducer-impl.h
sherpa-onnx/python/csrc/offline-stream.cc
sherpa-onnx/python/csrc/online-recognizer.cc
sherpa-onnx/csrc/offline-recognizer-ctc-impl.h
查看文件 @
e215d0c
...
...
@@ -5,7 +5,9 @@
#ifndef SHERPA_ONNX_CSRC_OFFLINE_RECOGNIZER_CTC_IMPL_H_
#define SHERPA_ONNX_CSRC_OFFLINE_RECOGNIZER_CTC_IMPL_H_
#include <ios>
#include <memory>
#include <sstream>
#include <string>
#include <utility>
#include <vector>
...
...
@@ -42,6 +44,15 @@ static OfflineRecognitionResult Convert(const OfflineCtcDecoderResult &src,
}
auto
sym
=
sym_table
[
src
.
tokens
[
i
]];
text
.
append
(
sym
);
if
(
sym
.
size
()
==
1
&&
sym
[
0
]
!=
' '
)
{
// for byte bpe models
std
::
ostringstream
os
;
os
<<
"<0x"
<<
std
::
hex
<<
std
::
uppercase
<<
(
static_cast
<
int32_t
>
(
sym
[
0
])
&
0xff
)
<<
">"
;
sym
=
os
.
str
();
}
r
.
tokens
.
push_back
(
std
::
move
(
sym
));
}
r
.
text
=
std
::
move
(
text
);
...
...
sherpa-onnx/csrc/offline-recognizer-transducer-impl.h
查看文件 @
e215d0c
...
...
@@ -6,8 +6,10 @@
#define SHERPA_ONNX_CSRC_OFFLINE_RECOGNIZER_TRANSDUCER_IMPL_H_
#include <fstream>
#include <ios>
#include <memory>
#include <regex> // NOLINT
#include <sstream>
#include <string>
#include <utility>
#include <vector>
...
...
@@ -44,6 +46,14 @@ static OfflineRecognitionResult Convert(
auto
sym
=
sym_table
[
i
];
text
.
append
(
sym
);
if
(
sym
.
size
()
==
1
&&
sym
[
0
]
!=
' '
)
{
// for byte bpe models
std
::
ostringstream
os
;
os
<<
"<0x"
<<
std
::
hex
<<
std
::
uppercase
<<
(
static_cast
<
int32_t
>
(
sym
[
0
])
&
0xff
)
<<
">"
;
sym
=
os
.
str
();
}
r
.
tokens
.
push_back
(
std
::
move
(
sym
));
}
r
.
text
=
std
::
move
(
text
);
...
...
sherpa-onnx/csrc/online-recognizer-ctc-impl.h
查看文件 @
e215d0c
...
...
@@ -6,7 +6,9 @@
#define SHERPA_ONNX_CSRC_ONLINE_RECOGNIZER_CTC_IMPL_H_
#include <algorithm>
#include <ios>
#include <memory>
#include <sstream>
#include <string>
#include <utility>
#include <vector>
...
...
@@ -35,6 +37,15 @@ static OnlineRecognizerResult Convert(const OnlineCtcDecoderResult &src,
auto
sym
=
sym_table
[
i
];
r
.
text
.
append
(
sym
);
if
(
sym
.
size
()
==
1
&&
sym
[
0
]
!=
' '
)
{
// for byte bpe models
std
::
ostringstream
os
;
os
<<
"<0x"
<<
std
::
hex
<<
std
::
uppercase
<<
(
static_cast
<
int32_t
>
(
sym
[
0
])
&
0xff
)
<<
">"
;
sym
=
os
.
str
();
}
r
.
tokens
.
push_back
(
std
::
move
(
sym
));
}
...
...
sherpa-onnx/csrc/online-recognizer-transducer-impl.h
查看文件 @
e215d0c
...
...
@@ -6,8 +6,10 @@
#define SHERPA_ONNX_CSRC_ONLINE_RECOGNIZER_TRANSDUCER_IMPL_H_
#include <algorithm>
#include <ios>
#include <memory>
#include <regex> // NOLINT
#include <sstream>
#include <string>
#include <utility>
#include <vector>
...
...
@@ -47,6 +49,15 @@ static OnlineRecognizerResult Convert(const OnlineTransducerDecoderResult &src,
auto
sym
=
sym_table
[
i
];
r
.
text
.
append
(
sym
);
if
(
sym
.
size
()
==
1
&&
sym
[
0
]
!=
' '
)
{
// for byte bpe models
std
::
ostringstream
os
;
os
<<
"<0x"
<<
std
::
hex
<<
std
::
uppercase
<<
(
static_cast
<
int32_t
>
(
sym
[
0
])
&
0xff
)
<<
">"
;
sym
=
os
.
str
();
}
r
.
tokens
.
push_back
(
std
::
move
(
sym
));
}
...
...
sherpa-onnx/python/csrc/offline-stream.cc
查看文件 @
e215d0c
...
...
@@ -23,8 +23,12 @@ Args:
static
void
PybindOfflineRecognitionResult
(
py
::
module
*
m
)
{
// NOLINT
using
PyClass
=
OfflineRecognitionResult
;
py
::
class_
<
PyClass
>
(
*
m
,
"OfflineRecognitionResult"
)
.
def_property_readonly
(
"text"
,
[](
const
PyClass
&
self
)
{
return
self
.
text
;
})
.
def_property_readonly
(
"text"
,
[](
const
PyClass
&
self
)
->
py
::
str
{
return
py
::
str
(
PyUnicode_DecodeUTF8
(
self
.
text
.
c_str
(),
self
.
text
.
size
(),
"ignore"
));
})
.
def_property_readonly
(
"tokens"
,
[](
const
PyClass
&
self
)
{
return
self
.
tokens
;
})
.
def_property_readonly
(
...
...
sherpa-onnx/python/csrc/online-recognizer.cc
查看文件 @
e215d0c
...
...
@@ -15,7 +15,11 @@ static void PybindOnlineRecognizerResult(py::module *m) {
using
PyClass
=
OnlineRecognizerResult
;
py
::
class_
<
PyClass
>
(
*
m
,
"OnlineRecognizerResult"
)
.
def_property_readonly
(
"text"
,
[](
PyClass
&
self
)
->
std
::
string
{
return
self
.
text
;
})
"text"
,
[](
PyClass
&
self
)
->
py
::
str
{
return
py
::
str
(
PyUnicode_DecodeUTF8
(
self
.
text
.
c_str
(),
self
.
text
.
size
(),
"ignore"
));
})
.
def_property_readonly
(
"tokens"
,
[](
PyClass
&
self
)
->
std
::
vector
<
std
::
string
>
{
return
self
.
tokens
;
})
...
...
请
注册
或
登录
后发表评论