Toggle navigation
Toggle navigation
此项目
正在载入...
Sign in
xuning
/
sherpaonnx
转到一个项目
Toggle navigation
项目
群组
代码片段
帮助
Toggle navigation pinning
Project
Activity
Repository
Pipelines
Graphs
Issues
0
Merge Requests
0
Wiki
Network
Create a new issue
Builds
Commits
Authored by
Fangjun Kuang
2024-03-27 19:40:25 +0800
Browse Files
Options
Browse Files
Download
Email Patches
Plain Diff
Committed by
GitHub
2024-03-27 19:40:25 +0800
Commit
a042f440767ef6bd78b696db68318e13da852aad
a042f440
1 parent
12efbf73
Add Golang API for spoken language identification. (#709)
隐藏空白字符变更
内嵌
并排对比
正在显示
10 个修改的文件
包含
242 行增加
和
1 行删除
go-api-examples/README.md
go-api-examples/vad-spoken-language-identification/go.mod
go-api-examples/vad-spoken-language-identification/main.go
go-api-examples/vad-spoken-language-identification/run.sh
scripts/go/_internal/vad-spoken-language-identification/.gitignore
scripts/go/_internal/vad-spoken-language-identification/go.mod
scripts/go/_internal/vad-spoken-language-identification/main.go
scripts/go/_internal/vad-spoken-language-identification/run.sh
scripts/go/sherpa_onnx.go
sherpa-onnx/csrc/spoken-language-identification.cc
go-api-examples/README.md
查看文件 @
a042f44
...
...
@@ -23,4 +23,7 @@ for details.
-
[
./vad-asr-paraformer
](
./vad-asr-paraformer
)
It shows how to use silero VAD + Paraformer
for speech recognition.
-
[
./vad-spoken-language-identification
](
./vad-spoken-language-identification
)
It shows how to use silero VAD + Whisper
for spoken language identification.
[
sherpa-onnx
]:
https://github.com/k2-fsa/sherpa-onnx
...
...
go-api-examples/vad-spoken-language-identification/go.mod
0 → 100644
查看文件 @
a042f44
module vad-spoken-language-identification
go 1.12
...
...
go-api-examples/vad-spoken-language-identification/main.go
0 → 100644
查看文件 @
a042f44
package
main
import
(
"fmt"
iso639
"github.com/barbashov/iso639-3"
"github.com/gordonklaus/portaudio"
sherpa
"github.com/k2-fsa/sherpa-onnx-go/sherpa_onnx"
"log"
)
func
main
()
{
log
.
SetFlags
(
log
.
LstdFlags
|
log
.
Lmicroseconds
)
// 1. Create VAD
config
:=
sherpa
.
VadModelConfig
{}
// Please download silero_vad.onnx from
// https://github.com/snakers4/silero-vad/blob/master/files/silero_vad.onnx
config
.
SileroVad
.
Model
=
"./silero_vad.onnx"
config
.
SileroVad
.
Threshold
=
0.5
config
.
SileroVad
.
MinSilenceDuration
=
0.5
config
.
SileroVad
.
MinSpeechDuration
=
0.25
config
.
SileroVad
.
WindowSize
=
512
config
.
SampleRate
=
16000
config
.
NumThreads
=
1
config
.
Provider
=
"cpu"
config
.
Debug
=
1
var
bufferSizeInSeconds
float32
=
20
vad
:=
sherpa
.
NewVoiceActivityDetector
(
&
config
,
bufferSizeInSeconds
)
defer
sherpa
.
DeleteVoiceActivityDetector
(
vad
)
// 2. Create spoken language identifier
c
:=
sherpa
.
SpokenLanguageIdentificationConfig
{}
c
.
Whisper
.
Encoder
=
"./sherpa-onnx-whisper-tiny/tiny-encoder.int8.onnx"
c
.
Whisper
.
Decoder
=
"./sherpa-onnx-whisper-tiny/tiny-decoder.int8.onnx"
c
.
NumThreads
=
2
c
.
Debug
=
1
c
.
Provider
=
"cpu"
slid
:=
sherpa
.
NewSpokenLanguageIdentification
(
&
c
)
defer
sherpa
.
DeleteSpokenLanguageIdentification
(
slid
)
err
:=
portaudio
.
Initialize
()
if
err
!=
nil
{
log
.
Fatalf
(
"Unable to initialize portaudio: %v
\n
"
,
err
)
}
defer
portaudio
.
Terminate
()
default_device
,
err
:=
portaudio
.
DefaultInputDevice
()
if
err
!=
nil
{
log
.
Fatal
(
"Failed to get default input device: %v
\n
"
,
err
)
}
log
.
Printf
(
"Selected default input device: %s
\n
"
,
default_device
.
Name
)
param
:=
portaudio
.
StreamParameters
{}
param
.
Input
.
Device
=
default_device
param
.
Input
.
Channels
=
1
param
.
Input
.
Latency
=
default_device
.
DefaultHighInputLatency
param
.
SampleRate
=
float64
(
config
.
SampleRate
)
param
.
FramesPerBuffer
=
0
param
.
Flags
=
portaudio
.
ClipOff
// you can choose another value for 0.1 if you want
samplesPerCall
:=
int32
(
param
.
SampleRate
*
0.1
)
// 0.1 second
samples
:=
make
([]
float32
,
samplesPerCall
)
s
,
err
:=
portaudio
.
OpenStream
(
param
,
samples
)
if
err
!=
nil
{
log
.
Fatalf
(
"Failed to open the stream"
)
}
defer
s
.
Close
()
chk
(
s
.
Start
())
log
.
Print
(
"Started! Please speak"
)
printed
:=
false
k
:=
0
for
{
chk
(
s
.
Read
())
vad
.
AcceptWaveform
(
samples
)
if
vad
.
IsSpeech
()
&&
!
printed
{
printed
=
true
log
.
Print
(
"Detected speech
\n
"
)
}
if
!
vad
.
IsSpeech
()
{
printed
=
false
}
for
!
vad
.
IsEmpty
()
{
speechSegment
:=
vad
.
Front
()
vad
.
Pop
()
duration
:=
float32
(
len
(
speechSegment
.
Samples
))
/
float32
(
config
.
SampleRate
)
audio
:=
&
sherpa
.
GeneratedAudio
{}
audio
.
Samples
=
speechSegment
.
Samples
audio
.
SampleRate
=
config
.
SampleRate
// Now decode it
go
decode
(
slid
,
audio
,
k
)
k
+=
1
log
.
Printf
(
"Duration: %.2f seconds
\n
"
,
duration
)
}
}
chk
(
s
.
Stop
())
}
func
decode
(
slid
*
sherpa
.
SpokenLanguageIdentification
,
audio
*
sherpa
.
GeneratedAudio
,
id
int
)
{
stream
:=
slid
.
CreateStream
()
defer
sherpa
.
DeleteOfflineStream
(
stream
)
stream
.
AcceptWaveform
(
audio
.
SampleRate
,
audio
.
Samples
)
result
:=
slid
.
Compute
(
stream
)
lang
:=
iso639
.
FromPart1Code
(
result
.
Lang
)
.
Name
log
.
Printf
(
"Detected language: %v"
,
lang
)
duration
:=
float32
(
len
(
audio
.
Samples
))
/
float32
(
audio
.
SampleRate
)
filename
:=
fmt
.
Sprintf
(
"seg-%d-%.2f-seconds-%s.wav"
,
id
,
duration
,
lang
)
ok
:=
audio
.
Save
(
filename
)
if
ok
{
log
.
Printf
(
"Saved to %s"
,
filename
)
}
log
.
Print
(
"----------
\n
"
)
}
func
chk
(
err
error
)
{
if
err
!=
nil
{
panic
(
err
)
}
}
...
...
go-api-examples/vad-spoken-language-identification/run.sh
0 → 100755
查看文件 @
a042f44
#!/usr/bin/env bash
if
[
! -f ./silero_vad.onnx
]
;
then
curl -SL -O https://github.com/snakers4/silero-vad/blob/master/files/silero_vad.onnx
fi
if
[
! -f ./sherpa-onnx-whisper-tiny/tiny-encoder.int8.onnx
]
;
then
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-whisper-tiny.tar.bz2
tar xvf sherpa-onnx-whisper-tiny.tar.bz2
rm sherpa-onnx-whisper-tiny.tar.bz2
fi
go mod tidy
go build
./vad-spoken-language-identification
...
...
scripts/go/_internal/vad-spoken-language-identification/.gitignore
0 → 100644
查看文件 @
a042f44
vad-spoken-language-identification
...
...
scripts/go/_internal/vad-spoken-language-identification/go.mod
0 → 100644
查看文件 @
a042f44
module vad-spoken-language-identification
go 1.12
replace github.com/k2-fsa/sherpa-onnx-go/sherpa_onnx => ../
...
...
scripts/go/_internal/vad-spoken-language-identification/main.go
0 → 120000
查看文件 @
a042f44
/
Users
/
fangjun
/
open
-
source
/
sherpa
-
onnx
/
go
-
api
-
examples
/
vad
-
spoken
-
language
-
identification
/
main
.
go
\ No newline at end of file
...
...
scripts/go/_internal/vad-spoken-language-identification/run.sh
0 → 120000
查看文件 @
a042f44
/Users/fangjun/open-source/sherpa-onnx/go-api-examples/vad-spoken-language-identification/run.sh
\ No newline at end of file
...
...
scripts/go/sherpa_onnx.go
查看文件 @
a042f44
...
...
@@ -783,3 +783,72 @@ func (vad *VoiceActivityDetector) Front() *SpeechSegment {
func
(
vad
*
VoiceActivityDetector
)
Reset
()
{
C
.
SherpaOnnxVoiceActivityDetectorReset
(
vad
.
impl
)
}
// Spoken language identification
type
SpokenLanguageIdentificationWhisperConfig
struct
{
Encoder
string
Decoder
string
TailPaddings
int
}
type
SpokenLanguageIdentificationConfig
struct
{
Whisper
SpokenLanguageIdentificationWhisperConfig
NumThreads
int
Debug
int
Provider
string
}
type
SpokenLanguageIdentification
struct
{
impl
*
C
.
struct_SherpaOnnxSpokenLanguageIdentification
}
type
SpokenLanguageIdentificationResult
struct
{
Lang
string
}
func
NewSpokenLanguageIdentification
(
config
*
SpokenLanguageIdentificationConfig
)
*
SpokenLanguageIdentification
{
c
:=
C
.
struct_SherpaOnnxSpokenLanguageIdentificationConfig
{}
c
.
whisper
.
encoder
=
C
.
CString
(
config
.
Whisper
.
Encoder
)
defer
C
.
free
(
unsafe
.
Pointer
(
c
.
whisper
.
encoder
))
c
.
whisper
.
decoder
=
C
.
CString
(
config
.
Whisper
.
Decoder
)
defer
C
.
free
(
unsafe
.
Pointer
(
c
.
whisper
.
decoder
))
c
.
whisper
.
tail_paddings
=
C
.
int
(
config
.
Whisper
.
TailPaddings
)
c
.
num_threads
=
C
.
int
(
config
.
NumThreads
)
c
.
debug
=
C
.
int
(
config
.
Debug
)
c
.
provider
=
C
.
CString
(
config
.
Provider
)
defer
C
.
free
(
unsafe
.
Pointer
(
c
.
provider
))
slid
:=
&
SpokenLanguageIdentification
{}
slid
.
impl
=
C
.
SherpaOnnxCreateSpokenLanguageIdentification
(
&
c
)
return
slid
}
func
DeleteSpokenLanguageIdentification
(
slid
*
SpokenLanguageIdentification
)
{
C
.
SherpaOnnxDestroySpokenLanguageIdentification
(
slid
.
impl
)
slid
.
impl
=
nil
}
// The user has to invoke DeleteOfflineStream() to free the returned value
// to avoid memory leak
func
(
slid
*
SpokenLanguageIdentification
)
CreateStream
()
*
OfflineStream
{
stream
:=
&
OfflineStream
{}
stream
.
impl
=
C
.
SherpaOnnxSpokenLanguageIdentificationCreateOfflineStream
(
slid
.
impl
)
return
stream
}
func
(
slid
*
SpokenLanguageIdentification
)
Compute
(
stream
*
OfflineStream
)
*
SpokenLanguageIdentificationResult
{
r
:=
C
.
SherpaOnnxSpokenLanguageIdentificationCompute
(
slid
.
impl
,
stream
.
impl
)
// defer C.SherpaOnnxDestroySpokenLanguageIdentificationResult(r)
ans
:=
&
SpokenLanguageIdentificationResult
{}
ans
.
Lang
=
C
.
GoString
(
r
.
lang
)
return
ans
}
...
...
sherpa-onnx/csrc/spoken-language-identification.cc
查看文件 @
a042f44
...
...
@@ -91,7 +91,7 @@ std::string SpokenLanguageIdentificationConfig::ToString() const {
std
::
ostringstream
os
;
os
<<
"SpokenLanguageIdentificationConfig("
;
os
<<
"whisper=
\"
"
<<
whisper
.
ToString
()
<<
"
\
"
, "
;
os
<<
"whisper=
"
<<
whisper
.
ToString
()
<<
", "
;
os
<<
"num_threads="
<<
num_threads
<<
", "
;
os
<<
"debug="
<<
(
debug
?
"True"
:
"False"
)
<<
", "
;
os
<<
"provider=
\"
"
<<
provider
<<
"
\"
)"
;
...
...
请
注册
或
登录
后发表评论