sherpa-onnx-keyword-spotter-alsa.cc
3.0 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
// sherpa-onnx/csrc/sherpa-onnx-keyword-spotter-alsa.cc
//
// Copyright (c) 2024 Xiaomi Corporation
#include <signal.h>
#include <stdio.h>
#include <stdlib.h>
#include <algorithm>
#include <cstdint>
#include "sherpa-onnx/csrc/alsa.h"
#include "sherpa-onnx/csrc/display.h"
#include "sherpa-onnx/csrc/keyword-spotter.h"
#include "sherpa-onnx/csrc/parse-options.h"
bool stop = false;
static void Handler(int sig) {
stop = true;
fprintf(stderr, "\nCaught Ctrl + C. Exiting...\n");
}
int main(int32_t argc, char *argv[]) {
signal(SIGINT, Handler);
const char *kUsageMessage = R"usage(
Usage:
./bin/sherpa-onnx-keyword-spotter-alsa \
--tokens=/path/to/tokens.txt \
--encoder=/path/to/encoder.onnx \
--decoder=/path/to/decoder.onnx \
--joiner=/path/to/joiner.onnx \
--provider=cpu \
--num-threads=2 \
--keywords-file=keywords.txt \
device_name
Please refer to
https://k2-fsa.github.io/sherpa/onnx/kws/pretrained_models/index.html
for a list of pre-trained models to download.
The device name specifies which microphone to use in case there are several
on your system. You can use
arecord -l
to find all available microphones on your computer. For instance, if it outputs
**** List of CAPTURE Hardware Devices ****
card 3: UACDemoV10 [UACDemoV1.0], device 0: USB Audio [USB Audio]
Subdevices: 1/1
Subdevice #0: subdevice #0
and if you want to select card 3 and device 0 on that card, please use:
plughw:3,0
as the device_name.
)usage";
sherpa_onnx::ParseOptions po(kUsageMessage);
sherpa_onnx::KeywordSpotterConfig config;
config.Register(&po);
po.Read(argc, argv);
if (po.NumArgs() != 1) {
fprintf(stderr, "Please provide only 1 argument: the device name\n");
po.PrintUsage();
exit(EXIT_FAILURE);
}
fprintf(stderr, "%s\n", config.ToString().c_str());
if (!config.Validate()) {
fprintf(stderr, "Errors in config!\n");
return -1;
}
sherpa_onnx::KeywordSpotter spotter(config);
int32_t expected_sample_rate = config.feat_config.sampling_rate;
std::string device_name = po.GetArg(1);
sherpa_onnx::Alsa alsa(device_name.c_str());
fprintf(stderr, "Use recording device: %s\n", device_name.c_str());
if (alsa.GetExpectedSampleRate() != expected_sample_rate) {
fprintf(stderr, "sample rate: %d != %d\n", alsa.GetExpectedSampleRate(),
expected_sample_rate);
exit(-1);
}
int32_t chunk = 0.1 * alsa.GetActualSampleRate();
std::string last_text;
auto stream = spotter.CreateStream();
sherpa_onnx::Display display;
int32_t keyword_index = 0;
while (!stop) {
const std::vector<float> &samples = alsa.Read(chunk);
stream->AcceptWaveform(expected_sample_rate, samples.data(),
samples.size());
while (spotter.IsReady(stream.get())) {
spotter.DecodeStream(stream.get());
}
const auto r = spotter.GetResult(stream.get());
if (!r.keyword.empty()) {
display.Print(keyword_index, r.AsJsonString());
fflush(stderr);
keyword_index++;
}
}
return 0;
}