Toggle navigation
Toggle navigation
此项目
正在载入...
Sign in
xuning
/
sherpaonnx
转到一个项目
Toggle navigation
项目
群组
代码片段
帮助
Toggle navigation pinning
Project
Activity
Repository
Pipelines
Graphs
Issues
0
Merge Requests
0
Wiki
Network
Create a new issue
Builds
Commits
Authored by
Fangjun Kuang
2024-06-06 14:22:39 +0800
Browse Files
Options
Browse Files
Download
Email Patches
Plain Diff
Committed by
GitHub
2024-06-06 14:22:39 +0800
Commit
1a43d1e37f2a65a7326e75be4607b4996f9737a8
1a43d1e3
1 parent
69347ffc
Support getting word IDs for CTC HLG decoding. (#978)
显示空白字符变更
内嵌
并排对比
正在显示
13 个修改的文件
包含
59 行增加
和
12 行删除
sherpa-onnx/csrc/offline-ctc-decoder.h
sherpa-onnx/csrc/offline-ctc-fst-decoder.cc
sherpa-onnx/csrc/offline-paraformer-greedy-search-decoder.cc
sherpa-onnx/csrc/offline-recognizer-ctc-impl.h
sherpa-onnx/csrc/offline-stream.cc
sherpa-onnx/csrc/offline-stream.h
sherpa-onnx/csrc/online-ctc-decoder.h
sherpa-onnx/csrc/online-ctc-fst-decoder.cc
sherpa-onnx/csrc/online-recognizer-ctc-impl.h
sherpa-onnx/csrc/online-recognizer.cc
sherpa-onnx/csrc/online-recognizer.h
sherpa-onnx/python/csrc/offline-stream.cc
sherpa-onnx/python/csrc/online-recognizer.cc
sherpa-onnx/csrc/offline-ctc-decoder.h
查看文件 @
1a43d1e
...
...
@@ -15,8 +15,16 @@ struct OfflineCtcDecoderResult {
/// The decoded token IDs
std
::
vector
<
int64_t
>
tokens
;
/// The decoded word IDs
/// Note: tokens.size() is usually not equal to words.size()
/// words is empty for greedy search decoding.
/// it is not empty when an HLG graph or an HLG graph is used.
std
::
vector
<
int32_t
>
words
;
/// timestamps[i] contains the output frame index where tokens[i] is decoded.
/// Note: The index is after subsampling
///
/// tokens.size() == timestamps.size()
std
::
vector
<
int32_t
>
timestamps
;
};
...
...
sherpa-onnx/csrc/offline-ctc-fst-decoder.cc
查看文件 @
1a43d1e
...
...
@@ -108,6 +108,9 @@ static OfflineCtcDecoderResult DecodeOne(kaldi_decoder::FasterDecoder *decoder,
// -1 here since the input labels are incremented during graph
// construction
r
.
tokens
.
push_back
(
arc
.
ilabel
-
1
);
if
(
arc
.
olabel
!=
0
)
{
r
.
words
.
push_back
(
arc
.
olabel
);
}
r
.
timestamps
.
push_back
(
t
);
prev
=
arc
.
ilabel
;
...
...
sherpa-onnx/csrc/offline-paraformer-greedy-search-decoder.cc
查看文件 @
1a43d1e
...
...
@@ -64,10 +64,6 @@ OfflineParaformerGreedySearchDecoder::Decode(
if
(
timestamps
.
size
()
==
results
[
i
].
tokens
.
size
())
{
results
[
i
].
timestamps
=
std
::
move
(
timestamps
);
}
else
{
SHERPA_ONNX_LOGE
(
"time stamp for batch: %d, %d vs %d"
,
i
,
static_cast
<
int32_t
>
(
results
[
i
].
tokens
.
size
()),
static_cast
<
int32_t
>
(
timestamps
.
size
()));
}
}
}
...
...
sherpa-onnx/csrc/offline-recognizer-ctc-impl.h
查看文件 @
1a43d1e
...
...
@@ -65,6 +65,8 @@ static OfflineRecognitionResult Convert(const OfflineCtcDecoderResult &src,
r
.
timestamps
.
push_back
(
time
);
}
r
.
words
=
std
::
move
(
src
.
words
);
return
r
;
}
...
...
sherpa-onnx/csrc/offline-stream.cc
查看文件 @
1a43d1e
...
...
@@ -339,6 +339,20 @@ std::string OfflineRecognitionResult::AsJsonString() const {
}
sep
=
", "
;
}
os
<<
"], "
;
sep
=
""
;
os
<<
"
\"
"
<<
"words"
<<
"
\"
"
<<
": "
;
os
<<
"["
;
for
(
int32_t
w
:
words
)
{
os
<<
sep
<<
w
;
sep
=
", "
;
}
os
<<
"]"
;
os
<<
"}"
;
...
...
sherpa-onnx/csrc/offline-stream.h
查看文件 @
1a43d1e
...
...
@@ -30,6 +30,8 @@ struct OfflineRecognitionResult {
/// timestamps[i] records the time in seconds when tokens[i] is decoded.
std
::
vector
<
float
>
timestamps
;
std
::
vector
<
int32_t
>
words
;
std
::
string
AsJsonString
()
const
;
};
...
...
sherpa-onnx/csrc/online-ctc-decoder.h
查看文件 @
1a43d1e
...
...
@@ -22,8 +22,16 @@ struct OnlineCtcDecoderResult {
/// The decoded token IDs
std
::
vector
<
int64_t
>
tokens
;
/// The decoded word IDs
/// Note: tokens.size() is usually not equal to words.size()
/// words is empty for greedy search decoding.
/// it is not empty when an HLG graph or an HLG graph is used.
std
::
vector
<
int32_t
>
words
;
/// timestamps[i] contains the output frame index where tokens[i] is decoded.
/// Note: The index is after subsampling
///
/// tokens.size() == timestamps.size()
std
::
vector
<
int32_t
>
timestamps
;
int32_t
num_trailing_blanks
=
0
;
...
...
sherpa-onnx/csrc/online-ctc-fst-decoder.cc
查看文件 @
1a43d1e
...
...
@@ -51,9 +51,9 @@ static void DecodeOne(const float *log_probs, int32_t num_rows,
bool
ok
=
decoder
->
GetBestPath
(
&
fst_out
);
if
(
ok
)
{
std
::
vector
<
int32_t
>
isymbols_out
;
std
::
vector
<
int32_t
>
osymbols_out_unused
;
ok
=
fst
::
GetLinearSymbolSequence
(
fst_out
,
&
isymbols_out
,
&
osymbols_out_unused
,
nullptr
);
std
::
vector
<
int32_t
>
osymbols_out
;
ok
=
fst
::
GetLinearSymbolSequence
(
fst_out
,
&
isymbols_out
,
&
osymbols_out
,
nullptr
);
std
::
vector
<
int64_t
>
tokens
;
tokens
.
reserve
(
isymbols_out
.
size
());
...
...
@@ -83,6 +83,7 @@ static void DecodeOne(const float *log_probs, int32_t num_rows,
}
result
->
tokens
=
std
::
move
(
tokens
);
result
->
words
=
std
::
move
(
osymbols_out
);
result
->
timestamps
=
std
::
move
(
timestamps
);
// no need to set frame_offset
}
...
...
sherpa-onnx/csrc/online-recognizer-ctc-impl.h
查看文件 @
1a43d1e
...
...
@@ -59,6 +59,7 @@ static OnlineRecognizerResult Convert(const OnlineCtcDecoderResult &src,
}
r
.
segment
=
segment
;
r
.
words
=
std
::
move
(
src
.
words
);
r
.
start_time
=
frames_since_start
*
frame_shift_ms
/
1000
.;
return
r
;
...
...
sherpa-onnx/csrc/online-recognizer.cc
查看文件 @
1a43d1e
...
...
@@ -22,14 +22,16 @@ namespace sherpa_onnx {
template
<
typename
T
>
std
::
string
VecToString
(
const
std
::
vector
<
T
>
&
vec
,
int32_t
precision
=
6
)
{
std
::
ostringstream
oss
;
if
(
precision
!=
0
)
{
oss
<<
std
::
fixed
<<
std
::
setprecision
(
precision
);
oss
<<
"[ "
;
}
oss
<<
"["
;
std
::
string
sep
=
""
;
for
(
const
auto
&
item
:
vec
)
{
oss
<<
sep
<<
item
;
sep
=
", "
;
}
oss
<<
"
]"
;
oss
<<
"]"
;
return
oss
.
str
();
}
...
...
@@ -38,26 +40,29 @@ template <> // explicit specialization for T = std::string
std
::
string
VecToString
<
std
::
string
>
(
const
std
::
vector
<
std
::
string
>
&
vec
,
int32_t
)
{
// ignore 2nd arg
std
::
ostringstream
oss
;
oss
<<
"[
"
;
oss
<<
"["
;
std
::
string
sep
=
""
;
for
(
const
auto
&
item
:
vec
)
{
oss
<<
sep
<<
"
\"
"
<<
item
<<
"
\"
"
;
sep
=
", "
;
}
oss
<<
"
]"
;
oss
<<
"]"
;
return
oss
.
str
();
}
std
::
string
OnlineRecognizerResult
::
AsJsonString
()
const
{
std
::
ostringstream
os
;
os
<<
"{ "
;
os
<<
"
\"
text
\"
: "
<<
"
\"
"
<<
text
<<
"
\"
"
<<
", "
;
os
<<
"
\"
text
\"
: "
<<
"
\"
"
<<
text
<<
"
\"
"
<<
", "
;
os
<<
"
\"
tokens
\"
: "
<<
VecToString
(
tokens
)
<<
", "
;
os
<<
"
\"
timestamps
\"
: "
<<
VecToString
(
timestamps
,
2
)
<<
", "
;
os
<<
"
\"
ys_probs
\"
: "
<<
VecToString
(
ys_probs
,
6
)
<<
", "
;
os
<<
"
\"
lm_probs
\"
: "
<<
VecToString
(
lm_probs
,
6
)
<<
", "
;
os
<<
"
\"
context_scores
\"
: "
<<
VecToString
(
context_scores
,
6
)
<<
", "
;
os
<<
"
\"
segment
\"
: "
<<
segment
<<
", "
;
os
<<
"
\"
words
\"
: "
<<
VecToString
(
words
,
0
)
<<
", "
;
os
<<
"
\"
start_time
\"
: "
<<
std
::
fixed
<<
std
::
setprecision
(
2
)
<<
start_time
<<
", "
;
os
<<
"
\"
is_final
\"
: "
<<
(
is_final
?
"true"
:
"false"
);
...
...
sherpa-onnx/csrc/online-recognizer.h
查看文件 @
1a43d1e
...
...
@@ -47,6 +47,8 @@ struct OnlineRecognizerResult {
/// log-domain scores from "hot-phrase" contextual boosting
std
::
vector
<
float
>
context_scores
;
std
::
vector
<
int32_t
>
words
;
/// ID of this segment
/// When an endpoint is detected, it is incremented
int32_t
segment
=
0
;
...
...
sherpa-onnx/python/csrc/offline-stream.cc
查看文件 @
1a43d1e
...
...
@@ -34,6 +34,8 @@ static void PybindOfflineRecognitionResult(py::module *m) { // NOLINT
})
.
def_property_readonly
(
"tokens"
,
[](
const
PyClass
&
self
)
{
return
self
.
tokens
;
})
.
def_property_readonly
(
"words"
,
[](
const
PyClass
&
self
)
{
return
self
.
words
;
})
.
def_property_readonly
(
"timestamps"
,
[](
const
PyClass
&
self
)
{
return
self
.
timestamps
;
});
}
...
...
sherpa-onnx/python/csrc/online-recognizer.cc
查看文件 @
1a43d1e
...
...
@@ -41,6 +41,9 @@ static void PybindOnlineRecognizerResult(py::module *m) {
.
def_property_readonly
(
"segment"
,
[](
PyClass
&
self
)
->
int32_t
{
return
self
.
segment
;
})
.
def_property_readonly
(
"words"
,
[](
PyClass
&
self
)
->
std
::
vector
<
int32_t
>
{
return
self
.
words
;
})
.
def_property_readonly
(
"is_final"
,
[](
PyClass
&
self
)
->
bool
{
return
self
.
is_final
;
})
.
def
(
"__str__"
,
&
PyClass
::
AsJsonString
,
py
::
call_guard
<
py
::
gil_scoped_release
>
())
...
...
请
注册
或
登录
后发表评论