Toggle navigation
Toggle navigation
此项目
正在载入...
Sign in
xuning
/
sherpaonnx
转到一个项目
Toggle navigation
项目
群组
代码片段
帮助
Toggle navigation pinning
Project
Activity
Repository
Pipelines
Graphs
Issues
0
Merge Requests
0
Wiki
Network
Create a new issue
Builds
Commits
Authored by
Fangjun Kuang
2025-07-12 15:45:49 +0800
Browse Files
Options
Browse Files
Download
Email Patches
Plain Diff
Committed by
GitHub
2025-07-12 15:45:49 +0800
Commit
0b8ce73dbf0e436080bda9961b570f00f824d921
0b8ce73d
1 parent
27784987
Add Go API for ten-vad (#2384)
显示空白字符变更
内嵌
并排对比
正在显示
3 个修改的文件
包含
114 行增加
和
37 行删除
go-api-examples/vad/main.go
go-api-examples/vad/run.sh
scripts/go/sherpa_onnx.go
go-api-examples/vad/main.go
查看文件 @
0b8ce73
...
...
@@ -2,9 +2,10 @@ package main
import
(
"fmt"
portaudio
"github.com/csukuangfj/portaudio-
go"
"github.com/gen2brain/mal
go"
sherpa
"github.com/k2-fsa/sherpa-onnx-go/sherpa_onnx"
"log"
"os"
)
func
main
()
{
...
...
@@ -13,62 +14,79 @@ func main() {
config
:=
sherpa
.
VadModelConfig
{}
// Please download silero_vad.onnx from
// https://github.com/snakers4/silero-vad/raw/master/src/silero_vad/data/silero_vad.onnx
// https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx
// or ten-vad.onnx from
// https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/ten-vad.onnx
if
FileExists
(
"./silero_vad.onnx"
)
{
fmt
.
Println
(
"Use silero-vad"
)
config
.
SileroVad
.
Model
=
"./silero_vad.onnx"
config
.
SileroVad
.
Threshold
=
0.5
config
.
SileroVad
.
MinSilenceDuration
=
0.5
config
.
SileroVad
.
MinSpeechDuration
=
0.25
config
.
SileroVad
.
MaxSpeechDuration
=
10
config
.
SileroVad
.
WindowSize
=
512
}
else
if
FileExists
(
"./ten-vad.onnx"
)
{
fmt
.
Println
(
"Use ten-vad"
)
config
.
TenVad
.
Model
=
"./ten-vad.onnx"
config
.
TenVad
.
Threshold
=
0.5
config
.
TenVad
.
MinSilenceDuration
=
0.5
config
.
TenVad
.
MinSpeechDuration
=
0.25
config
.
TenVad
.
MaxSpeechDuration
=
10
config
.
TenVad
.
WindowSize
=
256
}
else
{
fmt
.
Println
(
"Please download either ./silero_vad.onnx or ./ten-vad.onnx"
)
return
}
config
.
SampleRate
=
16000
config
.
NumThreads
=
1
config
.
Provider
=
"cpu"
config
.
Debug
=
1
windowSize
:=
config
.
SileroVad
.
WindowSize
if
config
.
TenVad
.
Model
!=
""
{
windowSize
=
config
.
TenVad
.
WindowSize
}
var
bufferSizeInSeconds
float32
=
5
vad
:=
sherpa
.
NewVoiceActivityDetector
(
&
config
,
bufferSizeInSeconds
)
defer
sherpa
.
DeleteVoiceActivityDetector
(
vad
)
err
:=
portaudio
.
Initialize
()
if
err
!=
nil
{
log
.
Fatalf
(
"Unable to initialize portaudio: %v
\n
"
,
err
)
}
defer
portaudio
.
Terminate
()
buffer
:=
sherpa
.
NewCircularBuffer
(
10
*
config
.
SampleRate
)
defer
sherpa
.
DeleteCircularBuffer
(
buffer
)
default_device
,
err
:=
portaudio
.
DefaultInputDevice
()
if
err
!=
nil
{
log
.
Fatal
(
"Failed to get default input device: %v
\n
"
,
err
)
}
log
.
Printf
(
"Selected default input device: %s
\n
"
,
default_device
.
Name
)
param
:=
portaudio
.
StreamParameters
{}
param
.
Input
.
Device
=
default_device
param
.
Input
.
Channels
=
1
param
.
Input
.
Latency
=
default_device
.
DefaultLowInputLatency
ctx
,
err
:=
malgo
.
InitContext
(
nil
,
malgo
.
ContextConfig
{},
func
(
message
string
)
{
fmt
.
Printf
(
"LOG <%v>"
,
message
)
})
chk
(
err
)
param
.
SampleRate
=
float64
(
config
.
SampleRate
)
param
.
FramesPerBuffer
=
0
param
.
Flags
=
portaudio
.
ClipOff
defer
func
()
{
_
=
ctx
.
Uninit
()
ctx
.
Free
()
}()
// you can choose another value for 0.1 if you want
samplesPerCall
:=
int32
(
param
.
SampleRate
*
0.1
)
// 0.1 second
samples
:=
make
([]
float32
,
samplesPerCall
)
deviceConfig
:=
malgo
.
DefaultDeviceConfig
(
malgo
.
Duplex
)
deviceConfig
.
Capture
.
Format
=
malgo
.
FormatS16
deviceConfig
.
Capture
.
Channels
=
1
deviceConfig
.
Playback
.
Format
=
malgo
.
FormatS16
deviceConfig
.
Playback
.
Channels
=
1
deviceConfig
.
SampleRate
=
16000
deviceConfig
.
Alsa
.
NoMMap
=
1
s
,
err
:=
portaudio
.
OpenStream
(
param
,
samples
)
if
err
!=
nil
{
log
.
Fatalf
(
"Failed to open the stream"
)
}
defer
s
.
Close
()
chk
(
s
.
Start
())
log
.
Print
(
"Started! Please speak"
)
printed
:=
false
k
:=
0
for
{
chk
(
s
.
Read
())
vad
.
AcceptWaveform
(
samples
)
onRecvFrames
:=
func
(
_
,
pSample
[]
byte
,
framecount
uint32
)
{
samples
:=
samplesInt16ToFloat
(
pSample
)
buffer
.
Push
(
samples
)
for
buffer
.
Size
()
>=
windowSize
{
head
:=
buffer
.
Head
()
s
:=
buffer
.
Get
(
head
,
windowSize
)
buffer
.
Pop
(
windowSize
)
vad
.
AcceptWaveform
(
s
)
if
vad
.
IsSpeech
()
&&
!
printed
{
printed
=
true
...
...
@@ -101,8 +119,22 @@ func main() {
log
.
Print
(
"----------
\n
"
)
}
}
}
captureCallbacks
:=
malgo
.
DeviceCallbacks
{
Data
:
onRecvFrames
,
}
device
,
err
:=
malgo
.
InitDevice
(
ctx
.
Context
,
deviceConfig
,
captureCallbacks
)
chk
(
err
)
err
=
device
.
Start
()
chk
(
err
)
fmt
.
Println
(
"Started. Please speak. Press ctrl + C to exit"
)
fmt
.
Scanln
()
device
.
Uninit
()
chk
(
s
.
Stop
())
}
func
chk
(
err
error
)
{
...
...
@@ -110,3 +142,25 @@ func chk(err error) {
panic
(
err
)
}
}
func
samplesInt16ToFloat
(
inSamples
[]
byte
)
[]
float32
{
numSamples
:=
len
(
inSamples
)
/
2
outSamples
:=
make
([]
float32
,
numSamples
)
for
i
:=
0
;
i
!=
numSamples
;
i
++
{
// Decode two bytes into an int16 using bit manipulation
s16
:=
int16
(
inSamples
[
2
*
i
])
|
int16
(
inSamples
[
2
*
i
+
1
])
<<
8
outSamples
[
i
]
=
float32
(
s16
)
/
32768
}
return
outSamples
}
func
FileExists
(
path
string
)
bool
{
_
,
err
:=
os
.
Stat
(
path
)
if
err
==
nil
{
return
true
}
return
false
}
...
...
go-api-examples/vad/run.sh
查看文件 @
0b8ce73
...
...
@@ -3,7 +3,11 @@
set
-ex
if
[
! -f ./silero_vad.onnx
]
;
then
curl -SL -O https://github.com/snakers4/silero-vad/raw/master/src/silero_vad/data/silero_vad.onnx
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx
fi
if
[
! -f ./ten-vad.onnx
]
;
then
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/ten-vad.onnx
fi
go mod tidy
...
...
scripts/go/sherpa_onnx.go
查看文件 @
0b8ce73
...
...
@@ -1142,8 +1142,18 @@ type SileroVadModelConfig struct {
MaxSpeechDuration
float32
}
type
TenVadModelConfig
struct
{
Model
string
Threshold
float32
MinSilenceDuration
float32
MinSpeechDuration
float32
WindowSize
int
MaxSpeechDuration
float32
}
type
VadModelConfig
struct
{
SileroVad
SileroVadModelConfig
TenVad
TenVadModelConfig
SampleRate
int
NumThreads
int
Provider
string
...
...
@@ -1220,6 +1230,15 @@ func NewVoiceActivityDetector(config *VadModelConfig, bufferSizeInSeconds float3
c
.
silero_vad
.
window_size
=
C
.
int
(
config
.
SileroVad
.
WindowSize
)
c
.
silero_vad
.
max_speech_duration
=
C
.
float
(
config
.
SileroVad
.
MaxSpeechDuration
)
c
.
ten_vad
.
model
=
C
.
CString
(
config
.
TenVad
.
Model
)
defer
C
.
free
(
unsafe
.
Pointer
(
c
.
ten_vad
.
model
))
c
.
ten_vad
.
threshold
=
C
.
float
(
config
.
TenVad
.
Threshold
)
c
.
ten_vad
.
min_silence_duration
=
C
.
float
(
config
.
TenVad
.
MinSilenceDuration
)
c
.
ten_vad
.
min_speech_duration
=
C
.
float
(
config
.
TenVad
.
MinSpeechDuration
)
c
.
ten_vad
.
window_size
=
C
.
int
(
config
.
TenVad
.
WindowSize
)
c
.
ten_vad
.
max_speech_duration
=
C
.
float
(
config
.
TenVad
.
MaxSpeechDuration
)
c
.
sample_rate
=
C
.
int
(
config
.
SampleRate
)
c
.
num_threads
=
C
.
int
(
config
.
NumThreads
)
c
.
provider
=
C
.
CString
(
config
.
Provider
)
...
...
请
注册
或
登录
后发表评论