silero-vad-model-config.cc
2.6 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
// sherpa-onnx/csrc/silero-vad-model-config.cc
//
// Copyright (c) 2023 Xiaomi Corporation
#include "sherpa-onnx/csrc/silero-vad-model-config.h"
#include "sherpa-onnx/csrc/file-utils.h"
#include "sherpa-onnx/csrc/macros.h"
namespace sherpa_onnx {
void SileroVadModelConfig::Register(ParseOptions *po) {
po->Register("silero-vad-model", &model, "Path to silero VAD ONNX model.");
po->Register("silero-vad-threshold", &threshold,
"Speech threshold. Silero VAD outputs speech probabilities for "
"each audio chunk, probabilities ABOVE this value are "
"considered as SPEECH. It is better to tune this parameter for "
"each dataset separately, but lazy "
"0.5 is pretty good for most datasets.");
po->Register(
"silero-vad-min-silence-duration", &min_silence_duration,
"In seconds. In the end of each speech chunk wait for "
"--silero-vad-min-silence-duration seconds before separating it");
po->Register("silero-vad-min-speech-duration", &min_speech_duration,
"In seconds. In the end of each silence chunk wait for "
"--silero-vad-min-speech-duration seconds before separating it");
po->Register(
"silero-vad-window-size", &window_size,
"In samples. Audio chunks of --silero-vad-window-size samples are fed "
"to the silero VAD model. WARNING! Silero VAD models were trained using "
"512, 1024, 1536 samples for 16000 sample rate and 256, 512, 768 samples "
"for 8000 sample rate. Values other than these may affect model "
"perfomance!");
}
bool SileroVadModelConfig::Validate() const {
if (model.empty()) {
SHERPA_ONNX_LOGE("Please provide --silero-vad-model");
return false;
}
if (!FileExists(model)) {
SHERPA_ONNX_LOGE("Silero vad model file '%s' does not exist",
model.c_str());
return false;
}
if (threshold < 0.01) {
SHERPA_ONNX_LOGE(
"Please use a larger value for --silero-vad-threshold. Given: %f",
threshold);
return false;
}
if (threshold >= 1) {
SHERPA_ONNX_LOGE(
"Please use a smaller value for --silero-vad-threshold. Given: %f",
threshold);
return false;
}
return true;
}
std::string SileroVadModelConfig::ToString() const {
std::ostringstream os;
os << "SilerVadModelConfig(";
os << "model=\"" << model << "\", ";
os << "threshold=" << threshold << ", ";
os << "min_silence_duration=" << min_silence_duration << ", ";
os << "min_speech_duration=" << min_speech_duration << ", ";
os << "window_size=" << window_size << ")";
return os.str();
}
} // namespace sherpa_onnx