Program.cs
4.2 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
// Copyright (c) 2024 Xiaomi Corporation
//
// This file shows how to do keyword spotting with sherpa-onnx.
//
// 1. Download a model from
// https://github.com/k2-fsa/sherpa-onnx/releases/tag/kws-models
//
// wget https://github.com/k2-fsa/sherpa-onnx/releases/download/kws-models/sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01.tar.bz2
// tar xvf sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01.tar.bz2
//
// 2. Now run it
//
// dotnet run
using SherpaOnnx;
using System.Collections.Generic;
using System.Runtime.InteropServices;
using System;
using PortAudioSharp;
class KeywordSpotterDemo
{
static void Main(string[] args)
{
var config = new KeywordSpotterConfig();
config.FeatConfig.SampleRate = 16000;
config.FeatConfig.FeatureDim = 80;
config.ModelConfig.Transducer.Encoder = "./sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/encoder-epoch-12-avg-2-chunk-16-left-64.onnx";
config.ModelConfig.Transducer.Decoder = "./sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/decoder-epoch-12-avg-2-chunk-16-left-64.onnx";
config.ModelConfig.Transducer.Joiner = "./sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/joiner-epoch-12-avg-2-chunk-16-left-64.onnx";
config.ModelConfig.Tokens = "./sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/tokens.txt";
config.ModelConfig.Provider = "cpu";
config.ModelConfig.NumThreads = 1;
config.ModelConfig.Debug = 1;
config.KeywordsFile = "./sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/test_wavs/test_keywords.txt";
var kws = new KeywordSpotter(config);
var filename = "./sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/test_wavs/3.wav";
WaveReader waveReader = new WaveReader(filename);
Console.WriteLine("----------Use pre-defined keywords----------");
OnlineStream s = kws.CreateStream();
Console.WriteLine(PortAudio.VersionInfo.versionText);
PortAudio.Initialize();
Console.WriteLine($"Number of devices: {PortAudio.DeviceCount}");
for (int i = 0; i != PortAudio.DeviceCount; ++i)
{
Console.WriteLine($" Device {i}");
DeviceInfo deviceInfo = PortAudio.GetDeviceInfo(i);
Console.WriteLine($" Name: {deviceInfo.name}");
Console.WriteLine($" Max input channels: {deviceInfo.maxInputChannels}");
Console.WriteLine($" Default sample rate: {deviceInfo.defaultSampleRate}");
}
int deviceIndex = PortAudio.DefaultInputDevice;
if (deviceIndex == PortAudio.NoDevice)
{
Console.WriteLine("No default input device found");
Environment.Exit(1);
}
DeviceInfo info = PortAudio.GetDeviceInfo(deviceIndex);
Console.WriteLine();
Console.WriteLine($"Use default device {deviceIndex} ({info.name})");
StreamParameters param = new StreamParameters();
param.device = deviceIndex;
param.channelCount = 1;
param.sampleFormat = SampleFormat.Float32;
param.suggestedLatency = info.defaultLowInputLatency;
param.hostApiSpecificStreamInfo = IntPtr.Zero;
PortAudioSharp.Stream.Callback callback = (IntPtr input, IntPtr output,
UInt32 frameCount,
ref StreamCallbackTimeInfo timeInfo,
StreamCallbackFlags statusFlags,
IntPtr userData
) =>
{
float[] samples = new float[frameCount];
Marshal.Copy(input, samples, 0, (Int32)frameCount);
s.AcceptWaveform(config.FeatConfig.SampleRate, samples);
return StreamCallbackResult.Continue;
};
PortAudioSharp.Stream stream = new PortAudioSharp.Stream(inParams: param, outParams: null, sampleRate: config.FeatConfig.SampleRate,
framesPerBuffer: 0,
streamFlags: StreamFlags.ClipOff,
callback: callback,
userData: IntPtr.Zero
);
Console.WriteLine(param);
Console.WriteLine("Started! Please speak");
stream.Start();
while (true)
{
while (kws.IsReady(s))
{
kws.Decode(s);
}
var result = kws.GetResult(s);
if (result.Keyword != "")
{
Console.WriteLine("Detected: {0}", result.Keyword);
}
Thread.Sleep(200); // ms
}
PortAudio.Terminate();
}
}