Toggle navigation
Toggle navigation
此项目
正在载入...
Sign in
xuning
/
sherpaonnx
转到一个项目
Toggle navigation
项目
群组
代码片段
帮助
Toggle navigation pinning
Project
Activity
Repository
Pipelines
Graphs
Issues
0
Merge Requests
0
Wiki
Network
Create a new issue
Builds
Commits
Authored by
Fangjun Kuang
2024-10-27 09:39:09 +0800
Browse Files
Options
Browse Files
Download
Email Patches
Plain Diff
Committed by
GitHub
2024-10-27 09:39:09 +0800
Commit
3d3edabb5ff9fbd68f511d2fffea9353e539c2e7
3d3edabb
1 parent
052b8645
Add Go API for Moonshine models (#1479)
隐藏空白字符变更
内嵌
并排对比
正在显示
6 个修改的文件
包含
115 行增加
和
55 行删除
.github/workflows/test-go.yaml
go-api-examples/README.md
go-api-examples/non-streaming-decode-files/main.go
go-api-examples/non-streaming-decode-files/run-moonshine.sh
scripts/go/_internal/non-streaming-decode-files/run-moonshine.sh
scripts/go/sherpa_onnx.go
.github/workflows/test-go.yaml
查看文件 @
3d3edab
...
...
@@ -134,6 +134,53 @@ jobs:
name
:
${{ matrix.os }}-libs
path
:
to-upload/
-
name
:
Test non-streaming decoding files
shell
:
bash
run
:
|
cd scripts/go/_internal/non-streaming-decode-files/
ls -lh
go mod tidy
cat go.mod
go build
ls -lh
echo "Test Moonshine"
./run-moonshine.sh
rm -rf sherpa-onnx-*
echo "Test SenseVoice ctc"
./run-sense-voice-small.sh
rm -rf sherpa-onnx-sense-*
echo "Test telespeech ctc"
./run-telespeech-ctc.sh
rm -rf sherpa-onnx-telespeech-ctc-*
echo "Test transducer"
./run-transducer.sh
rm -rf sherpa-onnx-zipformer-en-2023-06-26
echo "Test transducer"
./run-transducer.sh
rm -rf sherpa-onnx-zipformer-en-2023-06-26
echo "Test paraformer"
./run-paraformer.sh
./run-paraformer-itn.sh
rm -rf sherpa-onnx-paraformer-zh-2023-09-14
echo "Test NeMo CTC"
./run-nemo-ctc.sh
rm -rf sherpa-onnx-nemo-ctc-en-conformer-medium
echo "Test Whisper tiny.en"
./run-whisper.sh
rm -rf sherpa-onnx-whisper-tiny.en
echo "Test Tdnn yesno"
./run-tdnn-yesno.sh
rm -rf sherpa-onnx-tdnn-yesno
-
name
:
Test adding punctuation
shell
:
bash
run
:
|
...
...
@@ -193,49 +240,6 @@ jobs:
name
:
tts-waves-${{ matrix.os }}
path
:
tts-waves
-
name
:
Test non-streaming decoding files
shell
:
bash
run
:
|
cd scripts/go/_internal/non-streaming-decode-files/
ls -lh
go mod tidy
cat go.mod
go build
ls -lh
echo "Test SenseVoice ctc"
./run-sense-voice-small.sh
rm -rf sherpa-onnx-sense-*
echo "Test telespeech ctc"
./run-telespeech-ctc.sh
rm -rf sherpa-onnx-telespeech-ctc-*
echo "Test transducer"
./run-transducer.sh
rm -rf sherpa-onnx-zipformer-en-2023-06-26
echo "Test transducer"
./run-transducer.sh
rm -rf sherpa-onnx-zipformer-en-2023-06-26
echo "Test paraformer"
./run-paraformer.sh
./run-paraformer-itn.sh
rm -rf sherpa-onnx-paraformer-zh-2023-09-14
echo "Test NeMo CTC"
./run-nemo-ctc.sh
rm -rf sherpa-onnx-nemo-ctc-en-conformer-medium
echo "Test Whisper tiny.en"
./run-whisper.sh
rm -rf sherpa-onnx-whisper-tiny.en
echo "Test Tdnn yesno"
./run-tdnn-yesno.sh
rm -rf sherpa-onnx-tdnn-yesno
-
name
:
Test streaming decoding files
shell
:
bash
run
:
|
...
...
go-api-examples/README.md
查看文件 @
3d3edab
...
...
@@ -6,28 +6,41 @@ Please refer to the documentation
https://k2-fsa.github.io/sherpa/onnx/go-api/index.html
for details.
-
[
./add-punctuation
](
./add-punctuation
)
It shows how to use
a punctuation model to add punctuations to text
-
[
./non-streaming-decode-files
](
./non-streaming-decode-files
)
It shows how to use
a non-streaming ASR model to decode files
-
[
./non-streaming-speaker-diarization
](
./non-streaming-speaker-diarization
)
It shows how to use
a speaker segmentation model and a speaker embedding model for speaker diarization.
-
[
./non-streaming-tts
](
./non-streaming-tts
)
It shows how to use a non-streaming TTS
model to convert text to speech
-
[
./real-time-speech-recognition-from-microphone
](
./real-time-speech-recognition-from-microphone
)
It shows how to use a streaming ASR model to recognize speech from a microphone in real-time
-
[
./speaker-identification
](
./speaker-identification
)
It shows how to use a speaker
embedding model for speaker identification.
-
[
./streaming-decode-files
](
./streaming-decode-files
)
It shows how to use a streaming
model for streaming speech recognition
-
[
./streaming-hlg-decoding
](
./streaming-hlg-decoding
)
It shows how to use a streaming
model for streaming speech recognition with HLG decoding
-
[
./vad
](
./vad
)
It shows how to use silero VAD with Golang.
-
[
./vad-asr-
whisper
](
./vad-asr-whisper
)
It shows how to use silero VAD + Whisp
er
-
[
./vad-asr-
paraformer
](
./vad-asr-paraformer
)
It shows how to use silero VAD + Paraform
er
for speech recognition.
-
[
./vad-asr-paraformer
](
./vad-asr-paraformer
)
It shows how to use silero VAD + Paraformer
-
[
./vad-asr-whisper
](
./vad-asr-whisper
)
It shows how to use silero VAD + Whisper
-
[
./vad-speaker-identification
](
./vad-speaker-identification
)
It shows how to use Go API for VAD + speaker identification.
for speech recognition.
-
[
./vad-spoken-language-identification
](
./vad-spoken-language-identification
)
It shows how to use silero VAD + Whisper
for spoken language identification.
-
[
./speaker-identification
](
./speaker-identification
)
It shows how to use Go API for speaker identification.
-
[
./vad-speaker-identification
](
./vad-speaker-identification
)
It shows how to use Go API for VAD + speaker identification.
[
sherpa-onnx
]:
https://github.com/k2-fsa/sherpa-onnx
...
...
go-api-examples/non-streaming-decode-files/main.go
查看文件 @
3d3edab
...
...
@@ -34,6 +34,11 @@ func main() {
flag
.
StringVar
(
&
config
.
ModelConfig
.
Whisper
.
Task
,
"whisper-task"
,
"transcribe"
,
"transcribe or translate"
)
flag
.
IntVar
(
&
config
.
ModelConfig
.
Whisper
.
TailPaddings
,
"whisper-tail-paddings"
,
-
1
,
"tail paddings for whisper"
)
flag
.
StringVar
(
&
config
.
ModelConfig
.
Moonshine
.
Preprocessor
,
"moonshine-preprocessor"
,
""
,
"Path to the moonshine preprocessor model"
)
flag
.
StringVar
(
&
config
.
ModelConfig
.
Moonshine
.
Encoder
,
"moonshine-encoder"
,
""
,
"Path to the moonshine encoder model"
)
flag
.
StringVar
(
&
config
.
ModelConfig
.
Moonshine
.
UncachedDecoder
,
"moonshine-uncached-decoder"
,
""
,
"Path to the moonshine uncached decoder model"
)
flag
.
StringVar
(
&
config
.
ModelConfig
.
Moonshine
.
CachedDecoder
,
"moonshine-cached-decoder"
,
""
,
"Path to the moonshine cached decoder model"
)
flag
.
StringVar
(
&
config
.
ModelConfig
.
Tdnn
.
Model
,
"tdnn-model"
,
""
,
"Path to the tdnn model"
)
flag
.
StringVar
(
&
config
.
ModelConfig
.
SenseVoice
.
Model
,
"sense-voice-model"
,
""
,
"Path to the SenseVoice model"
)
...
...
@@ -85,12 +90,8 @@ func main() {
log
.
Println
(
"Emotion: "
+
result
.
Emotion
)
log
.
Println
(
"Lang: "
+
result
.
Lang
)
log
.
Println
(
"Event: "
+
result
.
Event
)
for
_
,
v
:=
range
result
.
Timestamps
{
log
.
Printf
(
"Timestamp: %+v
\n
"
,
v
)
}
for
_
,
v
:=
range
result
.
Tokens
{
log
.
Println
(
"Token: "
+
v
)
}
log
.
Printf
(
"Timestamp: %v
\n
"
,
result
.
Timestamps
)
log
.
Printf
(
"Tokens: %v
\n
"
,
result
.
Tokens
)
log
.
Printf
(
"Wave duration: %v seconds"
,
float32
(
len
(
samples
))
/
float32
(
sampleRate
))
}
...
...
go-api-examples/non-streaming-decode-files/run-moonshine.sh
0 → 100755
查看文件 @
3d3edab
#!/usr/bin/env bash
set
-ex
if
[
! -f ./sherpa-onnx-moonshine-tiny-en-int8/tokens.txt
]
;
then
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-moonshine-tiny-en-int8.tar.bz2
tar xvf sherpa-onnx-moonshine-tiny-en-int8.tar.bz2
rm sherpa-onnx-moonshine-tiny-en-int8.tar.bz2
fi
go mod tidy
go build
./non-streaming-decode-files
\
--moonshine-preprocessor
=
./sherpa-onnx-moonshine-tiny-en-int8/preprocess.onnx
\
--moonshine-encoder
=
./sherpa-onnx-moonshine-tiny-en-int8/encode.int8.onnx
\
--moonshine-uncached-decoder
=
./sherpa-onnx-moonshine-tiny-en-int8/uncached_decode.int8.onnx
\
--moonshine-cached-decoder
=
./sherpa-onnx-moonshine-tiny-en-int8/cached_decode.int8.onnx
\
--tokens
=
./sherpa-onnx-moonshine-tiny-en-int8/tokens.txt
\
./sherpa-onnx-moonshine-tiny-en-int8/test_wavs/0.wav
...
...
scripts/go/_internal/non-streaming-decode-files/run-moonshine.sh
0 → 120000
查看文件 @
3d3edab
../../../../go-api-examples/non-streaming-decode-files/run-moonshine.sh
\ No newline at end of file
...
...
scripts/go/sherpa_onnx.go
查看文件 @
3d3edab
...
...
@@ -382,6 +382,13 @@ type OfflineWhisperModelConfig struct {
TailPaddings
int
}
type
OfflineMoonshineModelConfig
struct
{
Preprocessor
string
Encoder
string
UncachedDecoder
string
CachedDecoder
string
}
type
OfflineTdnnModelConfig
struct
{
Model
string
}
...
...
@@ -405,6 +412,7 @@ type OfflineModelConfig struct {
Whisper
OfflineWhisperModelConfig
Tdnn
OfflineTdnnModelConfig
SenseVoice
OfflineSenseVoiceModelConfig
Moonshine
OfflineMoonshineModelConfig
Tokens
string
// Path to tokens.txt
// Number of threads to use for neural network computation
...
...
@@ -515,6 +523,18 @@ func NewOfflineRecognizer(config *OfflineRecognizerConfig) *OfflineRecognizer {
c
.
model_config
.
sense_voice
.
use_itn
=
C
.
int
(
config
.
ModelConfig
.
SenseVoice
.
UseInverseTextNormalization
)
c
.
model_config
.
moonshine
.
preprocessor
=
C
.
CString
(
config
.
ModelConfig
.
Moonshine
.
Preprocessor
)
defer
C
.
free
(
unsafe
.
Pointer
(
c
.
model_config
.
moonshine
.
preprocessor
))
c
.
model_config
.
moonshine
.
encoder
=
C
.
CString
(
config
.
ModelConfig
.
Moonshine
.
Encoder
)
defer
C
.
free
(
unsafe
.
Pointer
(
c
.
model_config
.
moonshine
.
encoder
))
c
.
model_config
.
moonshine
.
uncached_decoder
=
C
.
CString
(
config
.
ModelConfig
.
Moonshine
.
UncachedDecoder
)
defer
C
.
free
(
unsafe
.
Pointer
(
c
.
model_config
.
moonshine
.
uncached_decoder
))
c
.
model_config
.
moonshine
.
cached_decoder
=
C
.
CString
(
config
.
ModelConfig
.
Moonshine
.
CachedDecoder
)
defer
C
.
free
(
unsafe
.
Pointer
(
c
.
model_config
.
moonshine
.
cached_decoder
))
c
.
model_config
.
tokens
=
C
.
CString
(
config
.
ModelConfig
.
Tokens
)
defer
C
.
free
(
unsafe
.
Pointer
(
c
.
model_config
.
tokens
))
...
...
请
注册
或
登录
后发表评论