main.go
2.6 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
package main
import (
sherpa "github.com/k2-fsa/sherpa-onnx-go/sherpa_onnx"
"log"
)
/*
Usage:
Step 1: Download a speaker segmentation model
Please visit https://github.com/k2-fsa/sherpa-onnx/releases/tag/speaker-segmentation-models
for a list of available models. The following is an example
wget https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-segmentation-models/sherpa-onnx-pyannote-segmentation-3-0.tar.bz2
tar xvf sherpa-onnx-pyannote-segmentation-3-0.tar.bz2
rm sherpa-onnx-pyannote-segmentation-3-0.tar.bz2
Step 2: Download a speaker embedding extractor model
Please visit https://github.com/k2-fsa/sherpa-onnx/releases/tag/speaker-recongition-models
for a list of available models. The following is an example
wget https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-recongition-models/3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx
Step 3. Download test wave files
Please visit https://github.com/k2-fsa/sherpa-onnx/releases/tag/speaker-segmentation-models
for a list of available test wave files. The following is an example
wget https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-segmentation-models/0-four-speakers-zh.wav
Step 4. Run it
*/
func initSpeakerDiarization() *sherpa.OfflineSpeakerDiarization {
config := sherpa.OfflineSpeakerDiarizationConfig{}
config.Segmentation.Pyannote.Model = "./sherpa-onnx-pyannote-segmentation-3-0/model.onnx"
config.Embedding.Model = "./3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx"
// The test wave file contains 4 speakers, so we use 4 here
config.Clustering.NumClusters = 4
// if you don't know the actual numbers in the wave file,
// then please don't set NumClusters; you need to use
//
// config.Clustering.Threshold = 0.5
//
// A larger Threshold leads to fewer clusters
// A smaller Threshold leads to more clusters
sd := sherpa.NewOfflineSpeakerDiarization(&config)
return sd
}
func main() {
wave_filename := "./0-four-speakers-zh.wav"
wave := sherpa.ReadWave(wave_filename)
if wave == nil {
log.Printf("Failed to read %v", wave_filename)
return
}
sd := initSpeakerDiarization()
if sd == nil {
log.Printf("Please check your config")
return
}
defer sherpa.DeleteOfflineSpeakerDiarization(sd)
if wave.SampleRate != sd.SampleRate() {
log.Printf("Expected sample rate: %v, given: %d\n", sd.SampleRate(), wave.SampleRate)
return
}
log.Println("Started")
segments := sd.Process(wave.Samples)
n := len(segments)
for i := 0; i < n; i++ {
log.Printf("%.3f -- %.3f speaker_%02d\n", segments[i].Start, segments[i].End, segments[i].Speaker)
}
}