Program.cs
3.0 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
// Copyright (c) 2024 Xiaomi Corporation
//
// This file shows how to use sherpa-onnx C# API for speaker diarization
/*
Usage:
Step 1: Download a speaker segmentation model
Please visit https://github.com/k2-fsa/sherpa-onnx/releases/tag/speaker-segmentation-models
for a list of available models. The following is an example
wget https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-segmentation-models/sherpa-onnx-pyannote-segmentation-3-0.tar.bz2
tar xvf sherpa-onnx-pyannote-segmentation-3-0.tar.bz2
rm sherpa-onnx-pyannote-segmentation-3-0.tar.bz2
Step 2: Download a speaker embedding extractor model
Please visit https://github.com/k2-fsa/sherpa-onnx/releases/tag/speaker-recongition-models
for a list of available models. The following is an example
wget https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-recongition-models/3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx
Step 3. Download test wave files
Please visit https://github.com/k2-fsa/sherpa-onnx/releases/tag/speaker-segmentation-models
for a list of available test wave files. The following is an example
wget https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-segmentation-models/0-four-speakers-zh.wav
Step 4. Run it
dotnet run
*/
using SherpaOnnx;
using System;
class OfflineSpeakerDiarizationDemo
{
static void Main(string[] args)
{
var config = new OfflineSpeakerDiarizationConfig();
config.Segmentation.Pyannote.Model = "./sherpa-onnx-pyannote-segmentation-3-0/model.onnx";
config.Embedding.Model = "./3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx";
// the test wave ./0-four-speakers-zh.wav has 4 speakers, so
// we set num_clusters to 4
//
config.Clustering.NumClusters = 4;
// If you don't know the number of speakers in the test wave file, please
// use
// config.Clustering.Threshold = 0.5; // You need to tune this threshold
var sd = new OfflineSpeakerDiarization(config);
var testWaveFile = "./0-four-speakers-zh.wav";
WaveReader waveReader = new WaveReader(testWaveFile);
if (sd.SampleRate != waveReader.SampleRate)
{
Console.WriteLine($"Expected sample rate: {sd.SampleRate}. Given: {waveReader.SampleRate}");
return;
}
Console.WriteLine("Started");
// var segments = sd.Process(waveReader.Samples); // this one is also ok
var MyProgressCallback = (int numProcessedChunks, int numTotalChunks, IntPtr arg) =>
{
float progress = 100.0F * numProcessedChunks / numTotalChunks;
Console.WriteLine("Progress {0}%", String.Format("{0:0.00}", progress));
return 0;
};
var callback = new OfflineSpeakerDiarizationProgressCallback(MyProgressCallback);
var segments = sd.ProcessWithCallback(waveReader.Samples, callback, IntPtr.Zero);
foreach (var s in segments)
{
Console.WriteLine("{0} -- {1} speaker_{2}", String.Format("{0:0.00}", s.Start), String.Format("{0:0.00}", s.End), s.Speaker);
}
}
}