Toggle navigation
Toggle navigation
此项目
正在载入...
Sign in
xuning
/
sherpaonnx
转到一个项目
Toggle navigation
项目
群组
代码片段
帮助
Toggle navigation pinning
Project
Activity
Repository
Pipelines
Graphs
Issues
0
Merge Requests
0
Wiki
Network
Create a new issue
Builds
Commits
Authored by
Fangjun Kuang
2024-04-13 13:28:17 +0800
Browse Files
Options
Browse Files
Download
Email Patches
Plain Diff
Committed by
GitHub
2024-04-13 13:28:17 +0800
Commit
68b8b88b5a01a8064f9fa04d3ecb67e675a01b61
68b8b88b
1 parent
329fe1aa
Add Python API for punctuation models. (#762)
隐藏空白字符变更
内嵌
并排对比
正在显示
14 个修改的文件
包含
136 行增加
和
6 行删除
.github/scripts/test-offline-punctuation.sh
.github/scripts/test-python.sh
.gitignore
go-api-examples/vad-asr-paraformer/run.sh
go-api-examples/vad-asr-whisper/run.sh
go-api-examples/vad-speaker-identification/run.sh
go-api-examples/vad-spoken-language-identification/run.sh
go-api-examples/vad/run.sh
python-api-examples/add-punctuation.py
sherpa-onnx/python/csrc/CMakeLists.txt
sherpa-onnx/python/csrc/offline-punctuation.cc
sherpa-onnx/python/csrc/offline-punctuation.h
sherpa-onnx/python/csrc/sherpa-onnx.cc
sherpa-onnx/python/sherpa_onnx/__init__.py
.github/scripts/test-offline-punctuation.sh
查看文件 @
68b8b88
...
...
@@ -14,7 +14,7 @@ echo "PATH: $PATH"
which
$EXE
log
"------------------------------------------------------------"
log
"Download model "
log
"Download
the punctuation
model "
log
"------------------------------------------------------------"
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/punctuation-models/sherpa-onnx-punct-ct-transformer-zh-en-vocab272727-2024-04-12.tar.bz2
...
...
.github/scripts/test-python.sh
查看文件 @
68b8b88
...
...
@@ -8,6 +8,18 @@ log() {
echo
-e
"
$(
date
'+%Y-%m-%d %H:%M:%S'
)
(
${
fname
}
:
${
BASH_LINENO
[0]
}
:
${
FUNCNAME
[1]
}
)
$*
"
}
log
"test offline punctuation"
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/punctuation-models/sherpa-onnx-punct-ct-transformer-zh-en-vocab272727-2024-04-12.tar.bz2
tar xvf sherpa-onnx-punct-ct-transformer-zh-en-vocab272727-2024-04-12.tar.bz2
rm sherpa-onnx-punct-ct-transformer-zh-en-vocab272727-2024-04-12.tar.bz2
repo
=
sherpa-onnx-punct-ct-transformer-zh-en-vocab272727-2024-04-12
ls -lh
$repo
python3 ./python-api-examples/add-punctuation.py
rm -rf
$repo
log
"test audio tagging"
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/audio-tagging-models/sherpa-onnx-zipformer-audio-tagging-2024-04-09.tar.bz2
...
...
.gitignore
查看文件 @
68b8b88
...
...
@@ -91,3 +91,4 @@ sr-data
*xcworkspace/xcuserdata/*
vits-icefall-*
sherpa-onnx-punct-ct-transformer-zh-en-vocab272727-2024-04-12
...
...
go-api-examples/vad-asr-paraformer/run.sh
查看文件 @
68b8b88
...
...
@@ -2,7 +2,7 @@
if
[
! -f ./silero_vad.onnx
]
;
then
curl -SL -O https://github.com/snakers4/silero-vad/
blob
/master/files/silero_vad.onnx
curl -SL -O https://github.com/snakers4/silero-vad/
raw
/master/files/silero_vad.onnx
fi
if
[
! -f ./sherpa-onnx-paraformer-trilingual-zh-cantonese-en/model.int8.onnx
]
;
then
...
...
go-api-examples/vad-asr-whisper/run.sh
查看文件 @
68b8b88
...
...
@@ -2,7 +2,7 @@
if
[
! -f ./silero_vad.onnx
]
;
then
curl -SL -O https://github.com/snakers4/silero-vad/
blob
/master/files/silero_vad.onnx
curl -SL -O https://github.com/snakers4/silero-vad/
raw
/master/files/silero_vad.onnx
fi
if
[
! -f ./sherpa-onnx-whisper-tiny.en/tiny.en-encoder.int8.onnx
]
;
then
...
...
go-api-examples/vad-speaker-identification/run.sh
查看文件 @
68b8b88
...
...
@@ -9,7 +9,7 @@ if [ ! -f ./sr-data/enroll/fangjun-sr-1.wav ]; then
fi
if
[
! -f ./silero_vad.onnx
]
;
then
curl -SL -O https://github.com/snakers4/silero-vad/
blob
/master/files/silero_vad.onnx
curl -SL -O https://github.com/snakers4/silero-vad/
raw
/master/files/silero_vad.onnx
fi
go mod tidy
...
...
go-api-examples/vad-spoken-language-identification/run.sh
查看文件 @
68b8b88
...
...
@@ -2,7 +2,7 @@
if
[
! -f ./silero_vad.onnx
]
;
then
curl -SL -O https://github.com/snakers4/silero-vad/
blob
/master/files/silero_vad.onnx
curl -SL -O https://github.com/snakers4/silero-vad/
raw
/master/files/silero_vad.onnx
fi
if
[
! -f ./sherpa-onnx-whisper-tiny/tiny-encoder.int8.onnx
]
;
then
...
...
go-api-examples/vad/run.sh
查看文件 @
68b8b88
...
...
@@ -2,7 +2,7 @@
if
[
! -f ./silero_vad.onnx
]
;
then
curl -SL -O https://github.com/snakers4/silero-vad/
blob
/master/files/silero_vad.onnx
curl -SL -O https://github.com/snakers4/silero-vad/
raw
/master/files/silero_vad.onnx
fi
go mod tidy
...
...
python-api-examples/add-punctuation.py
0 → 100755
查看文件 @
68b8b88
#!/usr/bin/env python3
"""
This script shows how to add punctuations to text using sherpa-onnx Python API.
Please download the model from
https://github.com/k2-fsa/sherpa-onnx/releases/tag/punctuation-models
The following is an example
wget https://github.com/k2-fsa/sherpa-onnx/releases/download/punctuation-models/sherpa-onnx-punct-ct-transformer-zh-en-vocab272727-2024-04-12.tar.bz2
tar xvf sherpa-onnx-punct-ct-transformer-zh-en-vocab272727-2024-04-12.tar.bz2
rm sherpa-onnx-punct-ct-transformer-zh-en-vocab272727-2024-04-12.tar.bz2
"""
from
pathlib
import
Path
import
sherpa_onnx
def
main
():
model
=
"./sherpa-onnx-punct-ct-transformer-zh-en-vocab272727-2024-04-12/model.onnx"
if
not
Path
(
model
)
.
is_file
():
raise
ValueError
(
f
"{model} does not exist"
)
config
=
sherpa_onnx
.
OfflinePunctuationConfig
(
model
=
sherpa_onnx
.
OfflinePunctuationModelConfig
(
ct_transformer
=
model
),
)
punct
=
sherpa_onnx
.
OfflinePunctuation
(
config
)
text_list
=
[
"这是一个测试你好吗How are you我很好thank you are you ok谢谢你"
,
"我们都是木头人不会说话不会动"
,
"The African blogosphere is rapidly expanding bringing more voices online in the form of commentaries opinions analyses rants and poetry"
,
]
for
text
in
text_list
:
text_with_punct
=
punct
.
add_punctuation
(
text
)
print
(
"----------"
)
print
(
f
"input: {text}"
)
print
(
f
"output: {text_with_punct}"
)
print
(
"----------"
)
if
__name__
==
"__main__"
:
main
()
...
...
sherpa-onnx/python/csrc/CMakeLists.txt
查看文件 @
68b8b88
...
...
@@ -12,6 +12,7 @@ set(srcs
offline-model-config.cc
offline-nemo-enc-dec-ctc-model-config.cc
offline-paraformer-model-config.cc
offline-punctuation.cc
offline-recognizer.cc
offline-stream.cc
offline-tdnn-model-config.cc
...
...
sherpa-onnx/python/csrc/offline-punctuation.cc
0 → 100644
查看文件 @
68b8b88
// sherpa-onnx/python/csrc/offline-punctuation.cc
//
// Copyright (c) 2024 Xiaomi Corporation
#include "sherpa-onnx/python/csrc/offline-punctuation.h"
#include "sherpa-onnx/csrc/offline-punctuation.h"
namespace
sherpa_onnx
{
static
void
PybindOfflinePunctuationModelConfig
(
py
::
module
*
m
)
{
using
PyClass
=
OfflinePunctuationModelConfig
;
py
::
class_
<
PyClass
>
(
*
m
,
"OfflinePunctuationModelConfig"
)
.
def
(
py
::
init
<>
())
.
def
(
py
::
init
<
const
std
::
string
&
,
int32_t
,
bool
,
const
std
::
string
&>
(),
py
::
arg
(
"ct_transformer"
),
py
::
arg
(
"num_threads"
)
=
1
,
py
::
arg
(
"debug"
)
=
false
,
py
::
arg
(
"provider"
)
=
"cpu"
)
.
def_readwrite
(
"ct_transformer"
,
&
PyClass
::
ct_transformer
)
.
def_readwrite
(
"num_threads"
,
&
PyClass
::
num_threads
)
.
def_readwrite
(
"debug"
,
&
PyClass
::
debug
)
.
def_readwrite
(
"provider"
,
&
PyClass
::
provider
)
.
def
(
"validate"
,
&
PyClass
::
Validate
)
.
def
(
"__str__"
,
&
PyClass
::
ToString
);
}
static
void
PybindOfflinePunctuationConfig
(
py
::
module
*
m
)
{
PybindOfflinePunctuationModelConfig
(
m
);
using
PyClass
=
OfflinePunctuationConfig
;
py
::
class_
<
PyClass
>
(
*
m
,
"OfflinePunctuationConfig"
)
.
def
(
py
::
init
<>
())
.
def
(
py
::
init
<
const
OfflinePunctuationModelConfig
&>
(),
py
::
arg
(
"model"
))
.
def_readwrite
(
"model"
,
&
PyClass
::
model
)
.
def
(
"validate"
,
&
PyClass
::
Validate
)
.
def
(
"__str__"
,
&
PyClass
::
ToString
);
}
void
PybindOfflinePunctuation
(
py
::
module
*
m
)
{
PybindOfflinePunctuationConfig
(
m
);
using
PyClass
=
OfflinePunctuation
;
py
::
class_
<
PyClass
>
(
*
m
,
"OfflinePunctuation"
)
.
def
(
py
::
init
<
const
OfflinePunctuationConfig
&>
(),
py
::
arg
(
"config"
),
py
::
call_guard
<
py
::
gil_scoped_release
>
())
.
def
(
"add_punctuation"
,
&
PyClass
::
AddPunctuation
,
py
::
arg
(
"text"
),
py
::
call_guard
<
py
::
gil_scoped_release
>
());
}
}
// namespace sherpa_onnx
...
...
sherpa-onnx/python/csrc/offline-punctuation.h
0 → 100644
查看文件 @
68b8b88
// sherpa-onnx/python/csrc/offline-punctuation.h
//
// Copyright (c) 2024 Xiaomi Corporation
#ifndef SHERPA_ONNX_PYTHON_CSRC_OFFLINE_PUNCTUATION_H_
#define SHERPA_ONNX_PYTHON_CSRC_OFFLINE_PUNCTUATION_H_
#include "sherpa-onnx/python/csrc/sherpa-onnx.h"
namespace
sherpa_onnx
{
void
PybindOfflinePunctuation
(
py
::
module
*
m
);
}
#endif // SHERPA_ONNX_PYTHON_CSRC_OFFLINE_PUNCTUATION_H_
...
...
sherpa-onnx/python/csrc/sherpa-onnx.cc
查看文件 @
68b8b88
...
...
@@ -14,6 +14,7 @@
#include "sherpa-onnx/python/csrc/offline-ctc-fst-decoder-config.h"
#include "sherpa-onnx/python/csrc/offline-lm-config.h"
#include "sherpa-onnx/python/csrc/offline-model-config.h"
#include "sherpa-onnx/python/csrc/offline-punctuation.h"
#include "sherpa-onnx/python/csrc/offline-recognizer.h"
#include "sherpa-onnx/python/csrc/offline-stream.h"
#include "sherpa-onnx/python/csrc/online-ctc-fst-decoder-config.h"
...
...
@@ -40,6 +41,7 @@ PYBIND11_MODULE(_sherpa_onnx, m) {
PybindWaveWriter
(
&
m
);
PybindAudioTagging
(
&
m
);
PybindOfflinePunctuation
(
&
m
);
PybindFeatures
(
&
m
);
PybindOnlineCtcFstDecoderConfig
(
&
m
);
...
...
sherpa-onnx/python/sherpa_onnx/__init__.py
查看文件 @
68b8b88
...
...
@@ -6,6 +6,9 @@ from _sherpa_onnx import (
AudioTaggingModelConfig
,
CircularBuffer
,
Display
,
OfflinePunctuation
,
OfflinePunctuationConfig
,
OfflinePunctuationModelConfig
,
OfflineStream
,
OfflineTts
,
OfflineTtsConfig
,
...
...
请
注册
或
登录
后发表评论