add portaudio for reading microphones (#55)

Fangjun Kuang · GitHub
Commit a65dcf77b3306015f644dc677af29477dbafcb95 a65dcf77 1 parent 12438436
CMakeLists.txt
cmake/portaudio.cmake
sherpa-onnx/csrc/CMakeLists.txt
sherpa-onnx/csrc/microphone.cc
sherpa-onnx/csrc/microphone.h
sherpa-onnx/csrc/sherpa-onnx-microphone.cc
--- a/CMakeLists.txt
查看文件 @a65dcf7
+++ b/CMakeLists.txt
查看文件 @a65dcf7
@@ -15,6 +15,7 @@ option(SHERPA_ONNX_ENABLE_PYTHON "Whether to build Python" OFF)
 option(SHERPA_ONNX_ENABLE_TESTS "Whether to build tests" OFF)
 option(SHERPA_ONNX_ENABLE_CHECK "Whether to build with assert" ON)
 option(BUILD_SHARED_LIBS "Whether to build shared libraries" OFF)
+ option(SHERPA_ONNX_ENABLE_PORTAUDIO "Whether to build with portaudio" ON)
 
 set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/lib")
 set(CMAKE_LIBRARY_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/lib")
@@ -49,6 +50,7 @@ message(STATUS "BUILD_SHARED_LIBS ${BUILD_SHARED_LIBS}")
 message(STATUS "SHERPA_ONNX_ENABLE_PYTHON ${SHERPA_ONNX_ENABLE_PYTHON}")
 message(STATUS "SHERPA_ONNX_ENABLE_TESTS ${SHERPA_ONNX_ENABLE_TESTS}")
 message(STATUS "SHERPA_ONNX_ENABLE_CHECK ${SHERPA_ONNX_ENABLE_CHECK}")
+ message(STATUS "SHERPA_ONNX_ENABLE_PORTAUDIO ${SHERPA_ONNX_ENABLE_PORTAUDIO}")
 
 set(CMAKE_CXX_STANDARD 14 CACHE STRING "The C++ version to be used.")
 set(CMAKE_CXX_EXTENSIONS OFF)
@@ -68,6 +70,10 @@ list(APPEND CMAKE_MODULE_PATH ${CMAKE_SOURCE_DIR}/cmake)
 include(kaldi-native-fbank)
 include(onnxruntime)
 
+ if(SHERPA_ONNX_ENABLE_PORTAUDIO)
+   include(portaudio)
+ endif()
+ 
 if(SHERPA_ONNX_ENABLE_PYTHON)
   include(pybind11)
 endif()
--- a/cmake/portaudio.cmake 0 → 100644
查看文件 @a65dcf7
+++ b/cmake/portaudio.cmake 0 → 100644
查看文件 @a65dcf7
+ function(download_portaudio)
+   include(FetchContent)
+ 
+   set(portaudio_URL  "http://files.portaudio.com/archives/pa_stable_v190700_20210406.tgz")
+   set(portaudio_HASH "SHA256=47efbf42c77c19a05d22e627d42873e991ec0c1357219c0d74ce6a2948cb2def")
+ 
+   # If you don't have access to the Internet, please download it to your
+   # local drive and modify the following line according to your needs.
+   set(possible_file_locations
+     $ENV{HOME}/Downloads/pa_stable_v190700_20210406.tgz
+     $ENV{HOME}/asr/pa_stable_v190700_20210406.tgz
+     ${PROJECT_SOURCE_DIR}/pa_stable_v190700_20210406.tgz
+     ${PROJECT_BINARY_DIR}/pa_stable_v190700_20210406.tgz
+     /tmp/pa_stable_v190700_20210406.tgz
+   )
+ 
+   foreach(f IN LISTS possible_file_locations)
+     if(EXISTS ${f})
+       set(portaudio_URL  "file://${f}")
+       break()
+     endif()
+   endforeach()
+ 
+   if(BUILD_SHARED_LIBS)
+     set(PA_BUILD_SHARED ON CACHE BOOL "" FORCE)
+     set(PA_BUILD_STATIC OFF CACHE BOOL "" FORCE)
+   else()
+     set(PA_BUILD_SHARED OFF CACHE BOOL "" FORCE)
+     set(PA_BUILD_STATIC ON CACHE BOOL "" FORCE)
+   endif()
+ 
+   FetchContent_Declare(portaudio
+     URL               ${portaudio_URL}
+     URL_HASH          ${portaudio_HASH}
+   )
+ 
+   FetchContent_GetProperties(portaudio)
+   if(NOT portaudio_POPULATED)
+     message(STATUS "Downloading portaudio from ${portaudio_URL}")
+     FetchContent_Populate(portaudio)
+   endif()
+   message(STATUS "portaudio is downloaded to ${portaudio_SOURCE_DIR}")
+   message(STATUS "portaudio's binary dir is ${portaudio_BINARY_DIR}")
+ 
+   if(APPLE)
+     set(CMAKE_MACOSX_RPATH ON) # to solve the following warning on macOS
+   endif()
+ 
+   add_subdirectory(${portaudio_SOURCE_DIR} ${portaudio_BINARY_DIR} EXCLUDE_FROM_ALL)
+ endfunction()
+ 
+ download_portaudio()
+ 
+ # Note
+ # See http://portaudio.com/docs/v19-doxydocs/tutorial_start.html
+ # for how to use portaudio
--- a/sherpa-onnx/csrc/CMakeLists.txt
查看文件 @a65dcf7
+++ b/sherpa-onnx/csrc/CMakeLists.txt
查看文件 @a65dcf7
@@ -65,6 +65,24 @@ if(SHERPA_ONNX_HAS_ALSA)
   install(TARGETS sherpa-onnx-alsa DESTINATION bin)
 endif()
 
+ if(SHERPA_ONNX_ENABLE_PORTAUDIO)
+   add_executable(sherpa-onnx-microphone
+     sherpa-onnx-microphone.cc
+     microphone.cc
+   )
+ 
+   if(BUILD_SHARED_LIBS)
+     set(PA_LIB portaudio)
+   else()
+     set(PA_LIB portaudio_static)
+   endif()
+ 
+   target_link_libraries(sherpa-onnx-microphone PRIVATE ${PA_LIB} sherpa-onnx-core)
+ 
+   install(TARGETS sherpa-onnx-microphone DESTINATION bin)
+ endif()
+ 
+ 
 if(SHERPA_ONNX_ENABLE_TESTS)
   set(sherpa_onnx_test_srcs
     cat-test.cc
--- a/sherpa-onnx/csrc/microphone.cc 0 → 100644
查看文件 @a65dcf7
+++ b/sherpa-onnx/csrc/microphone.cc 0 → 100644
查看文件 @a65dcf7
+ // sherpa-onnx/csrc/microphone.cc
+ //
+ // Copyright (c)  2022-2023  Xiaomi Corporation
+ 
+ #include "sherpa-onnx/csrc/microphone.h"
+ 
+ #include <stdio.h>
+ #include <stdlib.h>
+ 
+ #include "portaudio.h"  // NOLINT
+ 
+ namespace sherpa_onnx {
+ 
+ Microphone::Microphone() {
+   PaError err = Pa_Initialize();
+   if (err != paNoError) {
+     fprintf(stderr, "portaudio error: %s\n", Pa_GetErrorText(err));
+     exit(-1);
+   }
+ }
+ 
+ Microphone::~Microphone() {
+   PaError err = Pa_Terminate();
+   if (err != paNoError) {
+     fprintf(stderr, "portaudio error: %s\n", Pa_GetErrorText(err));
+     exit(-1);
+   }
+ }
+ 
+ }  // namespace sherpa_onnx
--- a/sherpa-onnx/csrc/microphone.h 0 → 100644
查看文件 @a65dcf7
+++ b/sherpa-onnx/csrc/microphone.h 0 → 100644
查看文件 @a65dcf7
+ // sherpa-onnx/csrc/microphone.h
+ //
+ // Copyright (c)  2022-2023  Xiaomi Corporation
+ 
+ #ifndef SHERPA_ONNX_CSRC_MICROPHONE_H_
+ #define SHERPA_ONNX_CSRC_MICROPHONE_H_
+ 
+ namespace sherpa_onnx {
+ 
+ class Microphone {
+  public:
+   Microphone();
+   ~Microphone();
+ };
+ 
+ }  // namespace sherpa_onnx
+ 
+ #endif  // SHERPA_ONNX_CSRC_MICROPHONE_H_
--- a/sherpa-onnx/csrc/sherpa-onnx-microphone.cc 0 → 100644
查看文件 @a65dcf7
+++ b/sherpa-onnx/csrc/sherpa-onnx-microphone.cc 0 → 100644
查看文件 @a65dcf7
+ // sherpa-onnx/csrc/sherpa-onnx-microphone.cc
+ //
+ // Copyright (c)  2022-2023  Xiaomi Corporation
+ 
+ #include <signal.h>
+ #include <stdio.h>
+ #include <stdlib.h>
+ 
+ #include <algorithm>
+ #include <cctype>  // std::tolower
+ 
+ #include "portaudio.h"  // NOLINT
+ #include "sherpa-onnx/csrc/display.h"
+ #include "sherpa-onnx/csrc/microphone.h"
+ #include "sherpa-onnx/csrc/online-recognizer.h"
+ 
+ bool stop = false;
+ 
+ static int32_t RecordCallback(const void *input_buffer,
+                               void * /*output_buffer*/,
+                               unsigned long frames_per_buffer,  // NOLINT
+                               const PaStreamCallbackTimeInfo * /*time_info*/,
+                               PaStreamCallbackFlags /*status_flags*/,
+                               void *user_data) {
+   auto stream = reinterpret_cast<sherpa_onnx::OnlineStream *>(user_data);
+ 
+   stream->AcceptWaveform(16000, reinterpret_cast<const float *>(input_buffer),
+                          frames_per_buffer);
+ 
+   return stop ? paComplete : paContinue;
+ }
+ 
+ static void Handler(int32_t sig) {
+   stop = true;
+   fprintf(stderr, "\nCaught Ctrl + C. Exiting...\n");
+ }
+ 
+ int32_t main(int32_t argc, char *argv[]) {
+   if (argc < 5 || argc > 6) {
+     const char *usage = R"usage(
+ Usage:
+   ./bin/sherpa-onnx-microphone \
+     /path/to/tokens.txt \
+     /path/to/encoder.onnx\
+     /path/to/decoder.onnx\
+     /path/to/joiner.onnx\
+     [num_threads]
+ 
+ Please refer to
+ https://k2-fsa.github.io/sherpa/onnx/pretrained_models/index.html
+ for a list of pre-trained models to download.
+ )usage";
+     fprintf(stderr, "%s\n", usage);
+     fprintf(stderr, "argc, %d\n", argc);
+ 
+     return 0;
+   }
+   signal(SIGINT, Handler);
+ 
+   sherpa_onnx::OnlineRecognizerConfig config;
+   config.tokens = argv[1];
+ 
+   config.model_config.debug = false;
+   config.model_config.encoder_filename = argv[2];
+   config.model_config.decoder_filename = argv[3];
+   config.model_config.joiner_filename = argv[4];
+ 
+   config.model_config.num_threads = 2;
+   if (argc == 6 && atoi(argv[5]) > 0) {
+     config.model_config.num_threads = atoi(argv[5]);
+   }
+ 
+   config.enable_endpoint = true;
+ 
+   config.endpoint_config.rule1.min_trailing_silence = 2.4;
+   config.endpoint_config.rule2.min_trailing_silence = 1.2;
+   config.endpoint_config.rule3.min_utterance_length = 300;
+ 
+   fprintf(stderr, "%s\n", config.ToString().c_str());
+ 
+   sherpa_onnx::OnlineRecognizer recognizer(config);
+   auto s = recognizer.CreateStream();
+ 
+   sherpa_onnx::Microphone mic;
+ 
+   PaDeviceIndex num_devices = Pa_GetDeviceCount();
+   fprintf(stderr, "Num devices: %d\n", num_devices);
+ 
+   PaStreamParameters param;
+ 
+   param.device = Pa_GetDefaultInputDevice();
+   if (param.device == paNoDevice) {
+     fprintf(stderr, "No default input device found\n");
+     exit(EXIT_FAILURE);
+   }
+   fprintf(stderr, "Use default device: %d\n", param.device);
+ 
+   const PaDeviceInfo *info = Pa_GetDeviceInfo(param.device);
+   fprintf(stderr, "  Name: %s\n", info->name);
+   fprintf(stderr, "  Max input channels: %d\n", info->maxInputChannels);
+ 
+   param.channelCount = 1;
+   param.sampleFormat = paFloat32;
+ 
+   param.suggestedLatency = info->defaultLowInputLatency;
+   param.hostApiSpecificStreamInfo = nullptr;
+   const float sample_rate = 16000;
+ 
+   PaStream *stream;
+   PaError err =
+       Pa_OpenStream(&stream, &param, nullptr, /* &outputParameters, */
+                     sample_rate,
+                     0,          // frames per buffer
+                     paClipOff,  // we won't output out of range samples
+                                 // so don't bother clipping them
+                     RecordCallback, s.get());
+   if (err != paNoError) {
+     fprintf(stderr, "portaudio error: %s\n", Pa_GetErrorText(err));
+     exit(EXIT_FAILURE);
+   }
+ 
+   err = Pa_StartStream(stream);
+   fprintf(stderr, "Started\n");
+ 
+   if (err != paNoError) {
+     fprintf(stderr, "portaudio error: %s\n", Pa_GetErrorText(err));
+     exit(EXIT_FAILURE);
+   }
+ 
+   std::string last_text;
+   int32_t segment_index = 0;
+   sherpa_onnx::Display display;
+   while (!stop) {
+     while (recognizer.IsReady(s.get())) {
+       recognizer.DecodeStream(s.get());
+     }
+ 
+     auto text = recognizer.GetResult(s.get()).text;
+     bool is_endpoint = recognizer.IsEndpoint(s.get());
+ 
+     if (!text.empty() && last_text != text) {
+       last_text = text;
+ 
+       std::transform(text.begin(), text.end(), text.begin(),
+                      [](auto c) { return std::tolower(c); });
+ 
+       display.Print(segment_index, text);
+     }
+ 
+     if (!text.empty() && is_endpoint) {
+       ++segment_index;
+       recognizer.Reset(s.get());
+     }
+ 
+     Pa_Sleep(20);  // sleep for 20ms
+   }
+ 
+   err = Pa_CloseStream(stream);
+   if (err != paNoError) {
+     fprintf(stderr, "portaudio error: %s\n", Pa_GetErrorText(err));
+     exit(EXIT_FAILURE);
+   }
+ 
+   return 0;
+ }