Fangjun Kuang
Committed by GitHub

add alsa example for vad+offline asr (#2020)

@@ -71,10 +71,11 @@ def get_binaries(): @@ -71,10 +71,11 @@ def get_binaries():
71 binaries += [ 71 binaries += [
72 "sherpa-onnx-alsa", 72 "sherpa-onnx-alsa",
73 "sherpa-onnx-alsa-offline", 73 "sherpa-onnx-alsa-offline",
  74 + "sherpa-onnx-alsa-offline-audio-tagging",
74 "sherpa-onnx-alsa-offline-speaker-identification", 75 "sherpa-onnx-alsa-offline-speaker-identification",
75 "sherpa-onnx-offline-tts-play-alsa", 76 "sherpa-onnx-offline-tts-play-alsa",
76 "sherpa-onnx-vad-alsa", 77 "sherpa-onnx-vad-alsa",
77 - "sherpa-onnx-alsa-offline-audio-tagging", 78 + "sherpa-onnx-vad-alsa-offline-asr",
78 ] 79 ]
79 80
80 if is_windows(): 81 if is_windows():
@@ -380,6 +380,7 @@ if(SHERPA_ONNX_HAS_ALSA AND SHERPA_ONNX_ENABLE_BINARY) @@ -380,6 +380,7 @@ if(SHERPA_ONNX_HAS_ALSA AND SHERPA_ONNX_ENABLE_BINARY)
380 add_executable(sherpa-onnx-alsa-offline-speaker-identification sherpa-onnx-alsa-offline-speaker-identification.cc alsa.cc) 380 add_executable(sherpa-onnx-alsa-offline-speaker-identification sherpa-onnx-alsa-offline-speaker-identification.cc alsa.cc)
381 add_executable(sherpa-onnx-keyword-spotter-alsa sherpa-onnx-keyword-spotter-alsa.cc alsa.cc) 381 add_executable(sherpa-onnx-keyword-spotter-alsa sherpa-onnx-keyword-spotter-alsa.cc alsa.cc)
382 add_executable(sherpa-onnx-vad-alsa sherpa-onnx-vad-alsa.cc alsa.cc) 382 add_executable(sherpa-onnx-vad-alsa sherpa-onnx-vad-alsa.cc alsa.cc)
  383 + add_executable(sherpa-onnx-vad-alsa-offline-asr sherpa-onnx-vad-alsa-offline-asr.cc alsa.cc)
383 384
384 385
385 if(SHERPA_ONNX_ENABLE_TTS) 386 if(SHERPA_ONNX_ENABLE_TTS)
@@ -392,6 +393,7 @@ if(SHERPA_ONNX_HAS_ALSA AND SHERPA_ONNX_ENABLE_BINARY) @@ -392,6 +393,7 @@ if(SHERPA_ONNX_HAS_ALSA AND SHERPA_ONNX_ENABLE_BINARY)
392 sherpa-onnx-alsa-offline-speaker-identification 393 sherpa-onnx-alsa-offline-speaker-identification
393 sherpa-onnx-keyword-spotter-alsa 394 sherpa-onnx-keyword-spotter-alsa
394 sherpa-onnx-vad-alsa 395 sherpa-onnx-vad-alsa
  396 + sherpa-onnx-vad-alsa-offline-asr
395 sherpa-onnx-alsa-offline-audio-tagging 397 sherpa-onnx-alsa-offline-audio-tagging
396 ) 398 )
397 399
  1 +// sherpa-onnx/csrc/sherpa-onnx-vad-alsa-offline-asr.cc
  2 +//
  3 +// Copyright (c) 2022-2025 Xiaomi Corporation
  4 +
  5 +#include <signal.h>
  6 +#include <stdio.h>
  7 +#include <stdlib.h>
  8 +
  9 +#include <algorithm>
  10 +#include <mutex> // NOLINT
  11 +
  12 +#include "sherpa-onnx/csrc/alsa.h"
  13 +#include "sherpa-onnx/csrc/circular-buffer.h"
  14 +#include "sherpa-onnx/csrc/offline-recognizer.h"
  15 +#include "sherpa-onnx/csrc/resample.h"
  16 +#include "sherpa-onnx/csrc/voice-activity-detector.h"
  17 +
  18 +bool stop = false;
  19 +static void Handler(int32_t /*sig*/) {
  20 + stop = true;
  21 + fprintf(stderr, "\nCaught Ctrl + C. Exiting...\n");
  22 +}
  23 +
  24 +int32_t main(int32_t argc, char *argv[]) {
  25 + signal(SIGINT, Handler);
  26 +
  27 + const char *kUsageMessage = R"usage(
  28 +This program shows how to use a streaming VAD with non-streaming ASR in
  29 +sherpa-onnx.
  30 +
  31 +Please download silero_vad.onnx from
  32 +https://github.com/snakers4/silero-vad/raw/master/src/silero_vad/data/silero_vad.onnx
  33 +
  34 +For instance, use
  35 +wget https://github.com/snakers4/silero-vad/raw/master/src/silero_vad/data/silero_vad.onnx
  36 +
  37 +Please refer to ./sherpa-onnx-microphone-offline.cc
  38 +to download models for offline ASR.
  39 +
  40 +(1) Transducer from icefall
  41 +
  42 + ./bin/sherpa-onnx-vad-microphone-offline-asr \
  43 + --silero-vad-model=/path/to/silero_vad.onnx \
  44 + --tokens=/path/to/tokens.txt \
  45 + --encoder=/path/to/encoder.onnx \
  46 + --decoder=/path/to/decoder.onnx \
  47 + --joiner=/path/to/joiner.onnx \
  48 + device_name
  49 +
  50 +(2) Paraformer from FunASR
  51 +
  52 + ./bin/sherpa-onnx-vad-microphone-offline-asr \
  53 + --silero-vad-model=/path/to/silero_vad.onnx \
  54 + --tokens=/path/to/tokens.txt \
  55 + --paraformer=/path/to/model.onnx \
  56 + device_name
  57 +
  58 +(3) Whisper models
  59 +
  60 + ./bin/sherpa-onnx-vad-microphone-offline-asr \
  61 + --silero-vad-model=/path/to/silero_vad.onnx \
  62 + --whisper-encoder=./sherpa-onnx-whisper-base.en/base.en-encoder.int8.onnx \
  63 + --whisper-decoder=./sherpa-onnx-whisper-base.en/base.en-decoder.int8.onnx \
  64 + --tokens=./sherpa-onnx-whisper-base.en/base.en-tokens.txt \
  65 + device_name
  66 +
  67 +The device name specifies which microphone to use in case there are several
  68 +on your system. You can use
  69 +
  70 + arecord -l
  71 +
  72 +to find all available microphones on your computer. For instance, if it outputs
  73 +
  74 +**** List of CAPTURE Hardware Devices ****
  75 +card 3: UACDemoV10 [UACDemoV1.0], device 0: USB Audio [USB Audio]
  76 + Subdevices: 1/1
  77 + Subdevice #0: subdevice #0
  78 +
  79 +and if you want to select card 3 and device 0 on that card, please use:
  80 +
  81 + plughw:3,0
  82 +
  83 +as the device_name.
  84 +)usage";
  85 +
  86 + sherpa_onnx::ParseOptions po(kUsageMessage);
  87 + sherpa_onnx::VadModelConfig vad_config;
  88 +
  89 + sherpa_onnx::OfflineRecognizerConfig asr_config;
  90 +
  91 + vad_config.Register(&po);
  92 + asr_config.Register(&po);
  93 +
  94 + po.Read(argc, argv);
  95 + if (po.NumArgs() != 1) {
  96 + fprintf(stderr, "Please provide only 1 argument: the device name\n");
  97 + po.PrintUsage();
  98 + exit(EXIT_FAILURE);
  99 + }
  100 +
  101 + fprintf(stderr, "%s\n", vad_config.ToString().c_str());
  102 + fprintf(stderr, "%s\n", asr_config.ToString().c_str());
  103 +
  104 + if (!vad_config.Validate()) {
  105 + fprintf(stderr, "Errors in vad_config!\n");
  106 + return -1;
  107 + }
  108 +
  109 + if (!asr_config.Validate()) {
  110 + fprintf(stderr, "Errors in asr_config!\n");
  111 + return -1;
  112 + }
  113 +
  114 + fprintf(stderr, "Creating recognizer ...\n");
  115 + sherpa_onnx::OfflineRecognizer recognizer(asr_config);
  116 + fprintf(stderr, "Recognizer created!\n");
  117 +
  118 + auto vad = std::make_unique<sherpa_onnx::VoiceActivityDetector>(vad_config);
  119 +
  120 + std::string device_name = po.GetArg(1);
  121 + sherpa_onnx::Alsa alsa(device_name.c_str());
  122 + fprintf(stderr, "Use recording device: %s\n", device_name.c_str());
  123 +
  124 + int32_t sample_rate = 16000;
  125 +
  126 + if (alsa.GetExpectedSampleRate() != sample_rate) {
  127 + fprintf(stderr, "sample rate: %d != %d\n", alsa.GetExpectedSampleRate(),
  128 + sample_rate);
  129 + exit(-1);
  130 + }
  131 +
  132 + int32_t chunk = 0.1 * alsa.GetActualSampleRate();
  133 +
  134 + fprintf(stderr, "Started. Please speak\n");
  135 +
  136 + int32_t window_size = vad_config.silero_vad.window_size;
  137 + int32_t index = 0;
  138 +
  139 + while (!stop) {
  140 + const std::vector<float> &samples = alsa.Read(chunk);
  141 + vad->AcceptWaveform(samples.data(), samples.size());
  142 +
  143 + while (!vad->Empty()) {
  144 + const auto &segment = vad->Front();
  145 + auto s = recognizer.CreateStream();
  146 + s->AcceptWaveform(sample_rate, segment.samples.data(),
  147 + segment.samples.size());
  148 + recognizer.DecodeStream(s.get());
  149 + const auto &result = s->GetResult();
  150 + if (!result.text.empty()) {
  151 + fprintf(stderr, "%2d: %s\n", index, result.text.c_str());
  152 + ++index;
  153 + }
  154 + vad->Pop();
  155 + }
  156 + }
  157 +
  158 + return 0;
  159 +}
@@ -115,11 +115,20 @@ to download models for offline ASR. @@ -115,11 +115,20 @@ to download models for offline ASR.
115 115
116 PaDeviceIndex num_devices = Pa_GetDeviceCount(); 116 PaDeviceIndex num_devices = Pa_GetDeviceCount();
117 fprintf(stderr, "Num devices: %d\n", num_devices); 117 fprintf(stderr, "Num devices: %d\n", num_devices);
  118 + if (num_devices == 0) {
  119 + fprintf(stderr,
  120 + " If you are using Linux, please try "
  121 + "./build/bin/sherpa-onnx-vad-alsa-offline-asr\n");
  122 + exit(-1);
  123 + }
118 124
119 int32_t device_index = Pa_GetDefaultInputDevice(); 125 int32_t device_index = Pa_GetDefaultInputDevice();
120 126
121 if (device_index == paNoDevice) { 127 if (device_index == paNoDevice) {
122 fprintf(stderr, "No default input device found\n"); 128 fprintf(stderr, "No default input device found\n");
  129 + fprintf(stderr,
  130 + " If you are using Linux, please try "
  131 + "./build/bin/sherpa-onnx-vad-alsa-offline-asr\n");
123 exit(EXIT_FAILURE); 132 exit(EXIT_FAILURE);
124 } 133 }
125 134
@@ -87,6 +87,8 @@ std::unique_ptr<Vocoder> Vocoder::Create(const OfflineTtsModelConfig &config) { @@ -87,6 +87,8 @@ std::unique_ptr<Vocoder> Vocoder::Create(const OfflineTtsModelConfig &config) {
87 SHERPA_ONNX_LOGE("Unknown model type in vocoder!"); 87 SHERPA_ONNX_LOGE("Unknown model type in vocoder!");
88 return nullptr; 88 return nullptr;
89 } 89 }
  90 +
  91 + return nullptr;
90 } 92 }
91 93
92 template <typename Manager> 94 template <typename Manager>