Toggle navigation
Toggle navigation
此项目
正在载入...
Sign in
xuning
/
sherpaonnx
转到一个项目
Toggle navigation
项目
群组
代码片段
帮助
Toggle navigation pinning
Project
Activity
Repository
Pipelines
Graphs
Issues
0
Merge Requests
0
Wiki
Network
Create a new issue
Builds
Commits
Authored by
Lim Yao Chong
2024-09-09 10:26:53 +0800
Browse Files
Options
Browse Files
Download
Email Patches
Plain Diff
Committed by
GitHub
2024-09-09 10:26:53 +0800
Commit
3bffc24d64ead94ae1e4c156cc9d638febedecb9
3bffc24d
1 parent
857cb507
Add Python binding for online punctuation models (#1312)
显示空白字符变更
内嵌
并排对比
正在显示
8 个修改的文件
包含
133 行增加
和
0 行删除
.github/scripts/test-python.sh
.gitignore
python-api-examples/add-punctuation-online.py
sherpa-onnx/python/csrc/CMakeLists.txt
sherpa-onnx/python/csrc/online-punctuation.cc
sherpa-onnx/python/csrc/online-punctuation.h
sherpa-onnx/python/csrc/sherpa-onnx.cc
sherpa-onnx/python/sherpa_onnx/__init__.py
.github/scripts/test-python.sh
查看文件 @
3bffc24
...
...
@@ -91,6 +91,18 @@ python3 ./python-api-examples/add-punctuation.py
rm -rf
$repo
log
"test online punctuation"
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/punctuation-models/sherpa-onnx-online-punct-en-2024-08-06.tar.bz2
tar xvf sherpa-onnx-online-punct-en-2024-08-06.tar.bz2
rm sherpa-onnx-online-punct-en-2024-08-06.tar.bz2
repo
=
sherpa-onnx-online-punct-en-2024-08-06
ls -lh
$repo
python3 ./python-api-examples/add-punctuation-online.py
rm -rf
$repo
log
"test audio tagging"
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/audio-tagging-models/sherpa-onnx-zipformer-audio-tagging-2024-04-09.tar.bz2
...
...
.gitignore
查看文件 @
3bffc24
...
...
@@ -117,3 +117,4 @@ sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17
vits-melo-tts-zh_en
*.o
*.ppu
sherpa-onnx-online-punct-en-2024-08-06
...
...
python-api-examples/add-punctuation-online.py
0 → 100755
查看文件 @
3bffc24
#!/usr/bin/env python3
"""
This script shows how to add punctuations to text using sherpa-onnx Python API.
Please download the model from
https://github.com/k2-fsa/sherpa-onnx/releases/tag/punctuation-models
The following is an example
wget https://github.com/k2-fsa/sherpa-onnx/releases/download/punctuation-models/sherpa-onnx-online-punct-en-2024-08-06.tar.bz2
tar xvf sherpa-onnx-online-punct-en-2024-08-06.tar.bz2
rm sherpa-onnx-online-punct-en-2024-08-06.tar.bz2
"""
from
pathlib
import
Path
import
sherpa_onnx
def
main
():
model
=
"./sherpa-onnx-online-punct-en-2024-08-06/model.onnx"
bpe
=
"./sherpa-onnx-online-punct-en-2024-08-06/bpe.vocab"
if
not
Path
(
model
)
.
is_file
():
raise
ValueError
(
f
"{model} does not exist"
)
if
not
Path
(
bpe
)
.
is_file
():
raise
ValueError
(
f
"{bpe} does not exist"
)
model_config
=
sherpa_onnx
.
OnlinePunctuationModelConfig
(
cnn_bilstm
=
model
,
bpe_vocab
=
bpe
)
config
=
sherpa_onnx
.
OnlinePunctuationConfig
(
model_config
=
model_config
)
punct
=
sherpa_onnx
.
OnlinePunctuation
(
config
)
texts
=
[
"how are you i am fine thank you"
,
"The African blogosphere is rapidly expanding bringing more voices online in the form of commentaries opinions analyses rants and poetry"
,
]
for
text
in
texts
:
text_with_punct
=
punct
.
add_punctuation_with_case
(
text
)
print
(
"----------"
)
print
(
f
"input : {text}"
)
print
(
f
"output: {text_with_punct}"
)
print
(
"----------"
)
if
__name__
==
"__main__"
:
main
()
...
...
sherpa-onnx/python/csrc/CMakeLists.txt
查看文件 @
3bffc24
...
...
@@ -27,6 +27,7 @@ set(srcs
online-model-config.cc
online-nemo-ctc-model-config.cc
online-paraformer-model-config.cc
online-punctuation.cc
online-recognizer.cc
online-stream.cc
online-transducer-model-config.cc
...
...
sherpa-onnx/python/csrc/online-punctuation.cc
0 → 100644
查看文件 @
3bffc24
// sherpa-onnx/python/csrc/online-punctuation.cc
//
// Copyright (c) 2024
#include "sherpa-onnx/python/csrc/online-punctuation.h"
#include "sherpa-onnx/csrc/online-punctuation.h"
namespace
sherpa_onnx
{
static
void
PybindOnlinePunctuationModelConfig
(
py
::
module
*
m
)
{
using
PyClass
=
OnlinePunctuationModelConfig
;
py
::
class_
<
PyClass
>
(
*
m
,
"OnlinePunctuationModelConfig"
)
.
def
(
py
::
init
<>
())
.
def
(
py
::
init
<
const
std
::
string
&
,
const
std
::
string
&
,
int32_t
,
bool
,
const
std
::
string
&>
(),
py
::
arg
(
"cnn_bilstm"
),
py
::
arg
(
"bpe_vocab"
),
py
::
arg
(
"num_threads"
)
=
1
,
py
::
arg
(
"debug"
)
=
false
,
py
::
arg
(
"provider"
)
=
"cpu"
)
.
def_readwrite
(
"cnn_bilstm"
,
&
PyClass
::
cnn_bilstm
)
.
def_readwrite
(
"bpe_vocab"
,
&
PyClass
::
bpe_vocab
)
.
def_readwrite
(
"num_threads"
,
&
PyClass
::
num_threads
)
.
def_readwrite
(
"debug"
,
&
PyClass
::
debug
)
.
def_readwrite
(
"provider"
,
&
PyClass
::
provider
)
.
def
(
"validate"
,
&
PyClass
::
Validate
)
.
def
(
"__str__"
,
&
PyClass
::
ToString
);
}
static
void
PybindOnlinePunctuationConfig
(
py
::
module
*
m
)
{
PybindOnlinePunctuationModelConfig
(
m
);
using
PyClass
=
OnlinePunctuationConfig
;
py
::
class_
<
PyClass
>
(
*
m
,
"OnlinePunctuationConfig"
)
.
def
(
py
::
init
<>
())
.
def
(
py
::
init
<
const
OnlinePunctuationModelConfig
&>
(),
py
::
arg
(
"model_config"
))
.
def_readwrite
(
"model_config"
,
&
PyClass
::
model
)
.
def
(
"validate"
,
&
PyClass
::
Validate
)
.
def
(
"__str__"
,
&
PyClass
::
ToString
);
}
void
PybindOnlinePunctuation
(
py
::
module
*
m
)
{
PybindOnlinePunctuationConfig
(
m
);
using
PyClass
=
OnlinePunctuation
;
py
::
class_
<
PyClass
>
(
*
m
,
"OnlinePunctuation"
)
.
def
(
py
::
init
<
const
OnlinePunctuationConfig
&>
(),
py
::
arg
(
"config"
),
py
::
call_guard
<
py
::
gil_scoped_release
>
())
.
def
(
"add_punctuation_with_case"
,
&
PyClass
::
AddPunctuationWithCase
,
py
::
arg
(
"text"
),
py
::
call_guard
<
py
::
gil_scoped_release
>
());
}
}
// namespace sherpa_onnx
...
...
sherpa-onnx/python/csrc/online-punctuation.h
0 → 100644
查看文件 @
3bffc24
// sherpa-onnx/python/csrc/online-punctuation.h
//
// Copyright (c) 2024
#ifndef SHERPA_ONNX_PYTHON_CSRC_ONLINE_PUNCTUATION_H_
#define SHERPA_ONNX_PYTHON_CSRC_ONLINE_PUNCTUATION_H_
#include "sherpa-onnx/python/csrc/sherpa-onnx.h"
namespace
sherpa_onnx
{
void
PybindOnlinePunctuation
(
py
::
module
*
m
);
}
#endif // SHERPA_ONNX_PYTHON_CSRC_ONLINE_PUNCTUATION_H_
...
...
sherpa-onnx/python/csrc/sherpa-onnx.cc
查看文件 @
3bffc24
...
...
@@ -20,6 +20,7 @@
#include "sherpa-onnx/python/csrc/online-ctc-fst-decoder-config.h"
#include "sherpa-onnx/python/csrc/online-lm-config.h"
#include "sherpa-onnx/python/csrc/online-model-config.h"
#include "sherpa-onnx/python/csrc/online-punctuation.h"
#include "sherpa-onnx/python/csrc/online-recognizer.h"
#include "sherpa-onnx/python/csrc/online-stream.h"
#include "sherpa-onnx/python/csrc/speaker-embedding-extractor.h"
...
...
@@ -42,6 +43,7 @@ PYBIND11_MODULE(_sherpa_onnx, m) {
PybindWaveWriter
(
&
m
);
PybindAudioTagging
(
&
m
);
PybindOfflinePunctuation
(
&
m
);
PybindOnlinePunctuation
(
&
m
);
PybindFeatures
(
&
m
);
PybindOnlineCtcFstDecoderConfig
(
&
m
);
...
...
sherpa-onnx/python/sherpa_onnx/__init__.py
查看文件 @
3bffc24
...
...
@@ -15,6 +15,9 @@ from _sherpa_onnx import (
OfflineTtsModelConfig
,
OfflineTtsVitsModelConfig
,
OfflineZipformerAudioTaggingModelConfig
,
OnlinePunctuation
,
OnlinePunctuationConfig
,
OnlinePunctuationModelConfig
,
OnlineStream
,
SileroVadModelConfig
,
SpeakerEmbeddingExtractor
,
...
...
请
注册
或
登录
后发表评论