Fangjun Kuang
Committed by GitHub

Add microphone support for offline recognizer (#104)

@@ -107,6 +107,11 @@ if(SHERPA_ONNX_ENABLE_PORTAUDIO) @@ -107,6 +107,11 @@ if(SHERPA_ONNX_ENABLE_PORTAUDIO)
107 microphone.cc 107 microphone.cc
108 ) 108 )
109 109
  110 + add_executable(sherpa-onnx-microphone-offline
  111 + sherpa-onnx-microphone-offline.cc
  112 + microphone.cc
  113 + )
  114 +
110 if(BUILD_SHARED_LIBS) 115 if(BUILD_SHARED_LIBS)
111 set(PA_LIB portaudio) 116 set(PA_LIB portaudio)
112 else() 117 else()
@@ -114,8 +119,15 @@ if(SHERPA_ONNX_ENABLE_PORTAUDIO) @@ -114,8 +119,15 @@ if(SHERPA_ONNX_ENABLE_PORTAUDIO)
114 endif() 119 endif()
115 120
116 target_link_libraries(sherpa-onnx-microphone PRIVATE ${PA_LIB} sherpa-onnx-core) 121 target_link_libraries(sherpa-onnx-microphone PRIVATE ${PA_LIB} sherpa-onnx-core)
117 -  
118 - install(TARGETS sherpa-onnx-microphone DESTINATION bin) 122 + target_link_libraries(sherpa-onnx-microphone-offline PRIVATE ${PA_LIB} sherpa-onnx-core)
  123 +
  124 + install(
  125 + TARGETS
  126 + sherpa-onnx-microphone
  127 + sherpa-onnx-microphone-offline
  128 + DESTINATION
  129 + bin
  130 + )
119 endif() 131 endif()
120 132
121 if(SHERPA_ONNX_ENABLE_WEBSOCKET) 133 if(SHERPA_ONNX_ENABLE_WEBSOCKET)
  1 +// sherpa-onnx/csrc/sherpa-onnx-microphone-offline.cc
  2 +//
  3 +// Copyright (c) 2022-2023 Xiaomi Corporation
  4 +
  5 +#include <signal.h>
  6 +#include <stdio.h>
  7 +#include <stdlib.h>
  8 +
  9 +#include <algorithm>
  10 +#include <cctype> // std::tolower
  11 +#include <thread> // NOLINT
  12 +
  13 +#include "portaudio.h" // NOLINT
  14 +#include "sherpa-onnx/csrc/macros.h"
  15 +#include "sherpa-onnx/csrc/microphone.h"
  16 +#include "sherpa-onnx/csrc/offline-recognizer.h"
  17 +
  18 +enum class State {
  19 + kIdle,
  20 + kRecording,
  21 + kDecoding,
  22 +};
  23 +
  24 +State state = State::kIdle;
  25 +
  26 +// true to stop the program and exit
  27 +bool stop = false;
  28 +
  29 +std::vector<float> samples;
  30 +std::mutex samples_mutex;
  31 +
  32 +static void DetectKeyPress() {
  33 + SHERPA_ONNX_LOGE("Press Enter to start");
  34 + int32_t key;
  35 + while (!stop && (key = getchar())) {
  36 + if (key != 0x0a) {
  37 + continue;
  38 + }
  39 +
  40 + switch (state) {
  41 + case State::kIdle:
  42 + SHERPA_ONNX_LOGE("Start recording. Press Enter to stop recording");
  43 + state = State::kRecording;
  44 + {
  45 + std::lock_guard<std::mutex> lock(samples_mutex);
  46 + samples.clear();
  47 + }
  48 + break;
  49 + case State::kRecording:
  50 + SHERPA_ONNX_LOGE("Stop recording. Decoding ...");
  51 + state = State::kDecoding;
  52 + break;
  53 + case State::kDecoding:
  54 + break;
  55 + }
  56 + }
  57 +}
  58 +
  59 +static int32_t RecordCallback(const void *input_buffer,
  60 + void * /*output_buffer*/,
  61 + unsigned long frames_per_buffer, // NOLINT
  62 + const PaStreamCallbackTimeInfo * /*time_info*/,
  63 + PaStreamCallbackFlags /*status_flags*/,
  64 + void *user_data) {
  65 + std::lock_guard<std::mutex> lock(samples_mutex);
  66 +
  67 + auto p = reinterpret_cast<const float *>(input_buffer);
  68 + samples.insert(samples.end(), p, p + frames_per_buffer);
  69 +
  70 + return stop ? paComplete : paContinue;
  71 +}
  72 +
  73 +static void Handler(int32_t sig) {
  74 + stop = true;
  75 + fprintf(stderr, "\nCaught Ctrl + C. Press Enter to exit\n");
  76 +}
  77 +
  78 +int32_t main(int32_t argc, char *argv[]) {
  79 + signal(SIGINT, Handler);
  80 +
  81 + const char *kUsageMessage = R"usage(
  82 +This program uses non-streaming models with microphone for speech recognition.
  83 +Usage:
  84 +
  85 +(1) Transducer from icefall
  86 +
  87 + ./bin/sherpa-onnx-microphone-offline \
  88 + --tokens=/path/to/tokens.txt \
  89 + --encoder=/path/to/encoder.onnx \
  90 + --decoder=/path/to/decoder.onnx \
  91 + --joiner=/path/to/joiner.onnx \
  92 + --num-threads=2 \
  93 + --decoding-method=greedy_search
  94 +
  95 +(2) Paraformer from FunASR
  96 +
  97 + ./bin/sherpa-onnx-microphone-offline \
  98 + --tokens=/path/to/tokens.txt \
  99 + --paraformer=/path/to/model.onnx \
  100 + --num-threads=2 \
  101 + --decoding-method=greedy_search
  102 +
  103 +Default value for num_threads is 2.
  104 +Valid values for decoding_method: greedy_search.
  105 +
  106 +Please refer to
  107 +https://k2-fsa.github.io/sherpa/onnx/pretrained_models/index.html
  108 +for a list of pre-trained models to download.
  109 +)usage";
  110 +
  111 + sherpa_onnx::ParseOptions po(kUsageMessage);
  112 + sherpa_onnx::OfflineRecognizerConfig config;
  113 + config.Register(&po);
  114 +
  115 + po.Read(argc, argv);
  116 + if (po.NumArgs() != 0) {
  117 + po.PrintUsage();
  118 + exit(EXIT_FAILURE);
  119 + }
  120 +
  121 + fprintf(stderr, "%s\n", config.ToString().c_str());
  122 +
  123 + if (!config.Validate()) {
  124 + fprintf(stderr, "Errors in config!\n");
  125 + return -1;
  126 + }
  127 +
  128 + SHERPA_ONNX_LOGE("Creating recognizer ...");
  129 + sherpa_onnx::OfflineRecognizer recognizer(config);
  130 + SHERPA_ONNX_LOGE("Recognizer created!");
  131 +
  132 + sherpa_onnx::Microphone mic;
  133 +
  134 + PaDeviceIndex num_devices = Pa_GetDeviceCount();
  135 + fprintf(stderr, "Num devices: %d\n", num_devices);
  136 +
  137 + PaStreamParameters param;
  138 +
  139 + param.device = Pa_GetDefaultInputDevice();
  140 + if (param.device == paNoDevice) {
  141 + fprintf(stderr, "No default input device found\n");
  142 + exit(EXIT_FAILURE);
  143 + }
  144 + fprintf(stderr, "Use default device: %d\n", param.device);
  145 +
  146 + const PaDeviceInfo *info = Pa_GetDeviceInfo(param.device);
  147 + fprintf(stderr, " Name: %s\n", info->name);
  148 + fprintf(stderr, " Max input channels: %d\n", info->maxInputChannels);
  149 +
  150 + param.channelCount = 1;
  151 + param.sampleFormat = paFloat32;
  152 +
  153 + param.suggestedLatency = info->defaultLowInputLatency;
  154 + param.hostApiSpecificStreamInfo = nullptr;
  155 + float sample_rate = 16000;
  156 +
  157 + PaStream *stream;
  158 + PaError err =
  159 + Pa_OpenStream(&stream, &param, nullptr, /* &outputParameters, */
  160 + sample_rate,
  161 + 0, // frames per buffer
  162 + paClipOff, // we won't output out of range samples
  163 + // so don't bother clipping them
  164 + RecordCallback, nullptr);
  165 + if (err != paNoError) {
  166 + fprintf(stderr, "portaudio error: %s\n", Pa_GetErrorText(err));
  167 + exit(EXIT_FAILURE);
  168 + }
  169 +
  170 + err = Pa_StartStream(stream);
  171 + fprintf(stderr, "Started\n");
  172 +
  173 + if (err != paNoError) {
  174 + fprintf(stderr, "portaudio error: %s\n", Pa_GetErrorText(err));
  175 + exit(EXIT_FAILURE);
  176 + }
  177 +
  178 + std::thread t(DetectKeyPress);
  179 + while (!stop) {
  180 + switch (state) {
  181 + case State::kIdle:
  182 + break;
  183 + case State::kRecording:
  184 + break;
  185 + case State::kDecoding: {
  186 + std::vector<float> buf;
  187 + {
  188 + std::lock_guard<std::mutex> lock(samples_mutex);
  189 + buf = std::move(samples);
  190 + }
  191 +
  192 + auto s = recognizer.CreateStream();
  193 + s->AcceptWaveform(sample_rate, buf.data(), buf.size());
  194 + recognizer.DecodeStream(s.get());
  195 + SHERPA_ONNX_LOGE("Decoding Done! Result is:");
  196 + SHERPA_ONNX_LOGE("%s", s->GetResult().text.c_str());
  197 +
  198 + state = State::kIdle;
  199 + SHERPA_ONNX_LOGE("Press Enter to start");
  200 + break;
  201 + }
  202 + }
  203 +
  204 + Pa_Sleep(20); // sleep for 20ms
  205 + }
  206 + t.join();
  207 +
  208 + err = Pa_CloseStream(stream);
  209 + if (err != paNoError) {
  210 + fprintf(stderr, "portaudio error: %s\n", Pa_GetErrorText(err));
  211 + exit(EXIT_FAILURE);
  212 + }
  213 +
  214 + return 0;
  215 +}
@@ -66,6 +66,7 @@ for a list of pre-trained models to download. @@ -66,6 +66,7 @@ for a list of pre-trained models to download.
66 return -1; 66 return -1;
67 } 67 }
68 68
  69 + fprintf(stderr, "Creating recognizer ...\n");
69 sherpa_onnx::OfflineRecognizer recognizer(config); 70 sherpa_onnx::OfflineRecognizer recognizer(config);
70 71
71 auto begin = std::chrono::steady_clock::now(); 72 auto begin = std::chrono::steady_clock::now();