Add microphone support for offline recognizer (#104)

Fangjun Kuang · GitHub
Commit b911915a32e752b28524a8ef5a292157de0ebc27 b911915a 1 parent 6707ec41
sherpa-onnx/csrc/CMakeLists.txt
sherpa-onnx/csrc/sherpa-onnx-microphone-offline.cc
sherpa-onnx/csrc/sherpa-onnx-offline.cc
--- a/sherpa-onnx/csrc/CMakeLists.txt
查看文件 @b911915
+++ b/sherpa-onnx/csrc/CMakeLists.txt
查看文件 @b911915
@@ -107,6 +107,11 @@ if(SHERPA_ONNX_ENABLE_PORTAUDIO)
     microphone.cc
   )
 
+   add_executable(sherpa-onnx-microphone-offline
+     sherpa-onnx-microphone-offline.cc
+     microphone.cc
+   )
+ 
   if(BUILD_SHARED_LIBS)
     set(PA_LIB portaudio)
   else()
@@ -114,8 +119,15 @@ if(SHERPA_ONNX_ENABLE_PORTAUDIO)
   endif()
 
   target_link_libraries(sherpa-onnx-microphone PRIVATE ${PA_LIB} sherpa-onnx-core)
+   target_link_libraries(sherpa-onnx-microphone-offline PRIVATE ${PA_LIB} sherpa-onnx-core)
 
-   install(TARGETS sherpa-onnx-microphone DESTINATION bin)
+   install(
+     TARGETS
+       sherpa-onnx-microphone
+       sherpa-onnx-microphone-offline
+     DESTINATION
+       bin
+   )
 endif()
 
 if(SHERPA_ONNX_ENABLE_WEBSOCKET)
--- a/sherpa-onnx/csrc/sherpa-onnx-microphone-offline.cc 0 → 100644
查看文件 @b911915
+++ b/sherpa-onnx/csrc/sherpa-onnx-microphone-offline.cc 0 → 100644
查看文件 @b911915
+ // sherpa-onnx/csrc/sherpa-onnx-microphone-offline.cc
+ //
+ // Copyright (c)  2022-2023  Xiaomi Corporation
+ 
+ #include <signal.h>
+ #include <stdio.h>
+ #include <stdlib.h>
+ 
+ #include <algorithm>
+ #include <cctype>  // std::tolower
+ #include <thread>  // NOLINT
+ 
+ #include "portaudio.h"  // NOLINT
+ #include "sherpa-onnx/csrc/macros.h"
+ #include "sherpa-onnx/csrc/microphone.h"
+ #include "sherpa-onnx/csrc/offline-recognizer.h"
+ 
+ enum class State {
+   kIdle,
+   kRecording,
+   kDecoding,
+ };
+ 
+ State state = State::kIdle;
+ 
+ // true to stop the program and exit
+ bool stop = false;
+ 
+ std::vector<float> samples;
+ std::mutex samples_mutex;
+ 
+ static void DetectKeyPress() {
+   SHERPA_ONNX_LOGE("Press Enter to start");
+   int32_t key;
+   while (!stop && (key = getchar())) {
+     if (key != 0x0a) {
+       continue;
+     }
+ 
+     switch (state) {
+       case State::kIdle:
+         SHERPA_ONNX_LOGE("Start recording. Press Enter to stop recording");
+         state = State::kRecording;
+         {
+           std::lock_guard<std::mutex> lock(samples_mutex);
+           samples.clear();
+         }
+         break;
+       case State::kRecording:
+         SHERPA_ONNX_LOGE("Stop recording. Decoding ...");
+         state = State::kDecoding;
+         break;
+       case State::kDecoding:
+         break;
+     }
+   }
+ }
+ 
+ static int32_t RecordCallback(const void *input_buffer,
+                               void * /*output_buffer*/,
+                               unsigned long frames_per_buffer,  // NOLINT
+                               const PaStreamCallbackTimeInfo * /*time_info*/,
+                               PaStreamCallbackFlags /*status_flags*/,
+                               void *user_data) {
+   std::lock_guard<std::mutex> lock(samples_mutex);
+ 
+   auto p = reinterpret_cast<const float *>(input_buffer);
+   samples.insert(samples.end(), p, p + frames_per_buffer);
+ 
+   return stop ? paComplete : paContinue;
+ }
+ 
+ static void Handler(int32_t sig) {
+   stop = true;
+   fprintf(stderr, "\nCaught Ctrl + C. Press Enter to exit\n");
+ }
+ 
+ int32_t main(int32_t argc, char *argv[]) {
+   signal(SIGINT, Handler);
+ 
+   const char *kUsageMessage = R"usage(
+ This program uses non-streaming models with microphone for speech recognition.
+ Usage:
+ 
+ (1) Transducer from icefall
+ 
+   ./bin/sherpa-onnx-microphone-offline \
+     --tokens=/path/to/tokens.txt \
+     --encoder=/path/to/encoder.onnx \
+     --decoder=/path/to/decoder.onnx \
+     --joiner=/path/to/joiner.onnx \
+     --num-threads=2 \
+     --decoding-method=greedy_search
+ 
+ (2) Paraformer from FunASR
+ 
+   ./bin/sherpa-onnx-microphone-offline \
+     --tokens=/path/to/tokens.txt \
+     --paraformer=/path/to/model.onnx \
+     --num-threads=2 \
+     --decoding-method=greedy_search
+ 
+ Default value for num_threads is 2.
+ Valid values for decoding_method: greedy_search.
+ 
+ Please refer to
+ https://k2-fsa.github.io/sherpa/onnx/pretrained_models/index.html
+ for a list of pre-trained models to download.
+ )usage";
+ 
+   sherpa_onnx::ParseOptions po(kUsageMessage);
+   sherpa_onnx::OfflineRecognizerConfig config;
+   config.Register(&po);
+ 
+   po.Read(argc, argv);
+   if (po.NumArgs() != 0) {
+     po.PrintUsage();
+     exit(EXIT_FAILURE);
+   }
+ 
+   fprintf(stderr, "%s\n", config.ToString().c_str());
+ 
+   if (!config.Validate()) {
+     fprintf(stderr, "Errors in config!\n");
+     return -1;
+   }
+ 
+   SHERPA_ONNX_LOGE("Creating recognizer ...");
+   sherpa_onnx::OfflineRecognizer recognizer(config);
+   SHERPA_ONNX_LOGE("Recognizer created!");
+ 
+   sherpa_onnx::Microphone mic;
+ 
+   PaDeviceIndex num_devices = Pa_GetDeviceCount();
+   fprintf(stderr, "Num devices: %d\n", num_devices);
+ 
+   PaStreamParameters param;
+ 
+   param.device = Pa_GetDefaultInputDevice();
+   if (param.device == paNoDevice) {
+     fprintf(stderr, "No default input device found\n");
+     exit(EXIT_FAILURE);
+   }
+   fprintf(stderr, "Use default device: %d\n", param.device);
+ 
+   const PaDeviceInfo *info = Pa_GetDeviceInfo(param.device);
+   fprintf(stderr, "  Name: %s\n", info->name);
+   fprintf(stderr, "  Max input channels: %d\n", info->maxInputChannels);
+ 
+   param.channelCount = 1;
+   param.sampleFormat = paFloat32;
+ 
+   param.suggestedLatency = info->defaultLowInputLatency;
+   param.hostApiSpecificStreamInfo = nullptr;
+   float sample_rate = 16000;
+ 
+   PaStream *stream;
+   PaError err =
+       Pa_OpenStream(&stream, &param, nullptr, /* &outputParameters, */
+                     sample_rate,
+                     0,          // frames per buffer
+                     paClipOff,  // we won't output out of range samples
+                                 // so don't bother clipping them
+                     RecordCallback, nullptr);
+   if (err != paNoError) {
+     fprintf(stderr, "portaudio error: %s\n", Pa_GetErrorText(err));
+     exit(EXIT_FAILURE);
+   }
+ 
+   err = Pa_StartStream(stream);
+   fprintf(stderr, "Started\n");
+ 
+   if (err != paNoError) {
+     fprintf(stderr, "portaudio error: %s\n", Pa_GetErrorText(err));
+     exit(EXIT_FAILURE);
+   }
+ 
+   std::thread t(DetectKeyPress);
+   while (!stop) {
+     switch (state) {
+       case State::kIdle:
+         break;
+       case State::kRecording:
+         break;
+       case State::kDecoding: {
+         std::vector<float> buf;
+         {
+           std::lock_guard<std::mutex> lock(samples_mutex);
+           buf = std::move(samples);
+         }
+ 
+         auto s = recognizer.CreateStream();
+         s->AcceptWaveform(sample_rate, buf.data(), buf.size());
+         recognizer.DecodeStream(s.get());
+         SHERPA_ONNX_LOGE("Decoding Done! Result is:");
+         SHERPA_ONNX_LOGE("%s", s->GetResult().text.c_str());
+ 
+         state = State::kIdle;
+         SHERPA_ONNX_LOGE("Press Enter to start");
+         break;
+       }
+     }
+ 
+     Pa_Sleep(20);  // sleep for 20ms
+   }
+   t.join();
+ 
+   err = Pa_CloseStream(stream);
+   if (err != paNoError) {
+     fprintf(stderr, "portaudio error: %s\n", Pa_GetErrorText(err));
+     exit(EXIT_FAILURE);
+   }
+ 
+   return 0;
+ }
--- a/sherpa-onnx/csrc/sherpa-onnx-offline.cc
查看文件 @b911915
+++ b/sherpa-onnx/csrc/sherpa-onnx-offline.cc
查看文件 @b911915
@@ -66,6 +66,7 @@ for a list of pre-trained models to download.
     return -1;
   }
 
+   fprintf(stderr, "Creating recognizer ...\n");
   sherpa_onnx::OfflineRecognizer recognizer(config);
 
   auto begin = std::chrono::steady_clock::now();