Fangjun Kuang
Committed by GitHub

add portaudio for reading microphones (#55)

@@ -15,6 +15,7 @@ option(SHERPA_ONNX_ENABLE_PYTHON "Whether to build Python" OFF) @@ -15,6 +15,7 @@ option(SHERPA_ONNX_ENABLE_PYTHON "Whether to build Python" OFF)
15 option(SHERPA_ONNX_ENABLE_TESTS "Whether to build tests" OFF) 15 option(SHERPA_ONNX_ENABLE_TESTS "Whether to build tests" OFF)
16 option(SHERPA_ONNX_ENABLE_CHECK "Whether to build with assert" ON) 16 option(SHERPA_ONNX_ENABLE_CHECK "Whether to build with assert" ON)
17 option(BUILD_SHARED_LIBS "Whether to build shared libraries" OFF) 17 option(BUILD_SHARED_LIBS "Whether to build shared libraries" OFF)
  18 +option(SHERPA_ONNX_ENABLE_PORTAUDIO "Whether to build with portaudio" ON)
18 19
19 set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/lib") 20 set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/lib")
20 set(CMAKE_LIBRARY_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/lib") 21 set(CMAKE_LIBRARY_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/lib")
@@ -49,6 +50,7 @@ message(STATUS "BUILD_SHARED_LIBS ${BUILD_SHARED_LIBS}") @@ -49,6 +50,7 @@ message(STATUS "BUILD_SHARED_LIBS ${BUILD_SHARED_LIBS}")
49 message(STATUS "SHERPA_ONNX_ENABLE_PYTHON ${SHERPA_ONNX_ENABLE_PYTHON}") 50 message(STATUS "SHERPA_ONNX_ENABLE_PYTHON ${SHERPA_ONNX_ENABLE_PYTHON}")
50 message(STATUS "SHERPA_ONNX_ENABLE_TESTS ${SHERPA_ONNX_ENABLE_TESTS}") 51 message(STATUS "SHERPA_ONNX_ENABLE_TESTS ${SHERPA_ONNX_ENABLE_TESTS}")
51 message(STATUS "SHERPA_ONNX_ENABLE_CHECK ${SHERPA_ONNX_ENABLE_CHECK}") 52 message(STATUS "SHERPA_ONNX_ENABLE_CHECK ${SHERPA_ONNX_ENABLE_CHECK}")
  53 +message(STATUS "SHERPA_ONNX_ENABLE_PORTAUDIO ${SHERPA_ONNX_ENABLE_PORTAUDIO}")
52 54
53 set(CMAKE_CXX_STANDARD 14 CACHE STRING "The C++ version to be used.") 55 set(CMAKE_CXX_STANDARD 14 CACHE STRING "The C++ version to be used.")
54 set(CMAKE_CXX_EXTENSIONS OFF) 56 set(CMAKE_CXX_EXTENSIONS OFF)
@@ -68,6 +70,10 @@ list(APPEND CMAKE_MODULE_PATH ${CMAKE_SOURCE_DIR}/cmake) @@ -68,6 +70,10 @@ list(APPEND CMAKE_MODULE_PATH ${CMAKE_SOURCE_DIR}/cmake)
68 include(kaldi-native-fbank) 70 include(kaldi-native-fbank)
69 include(onnxruntime) 71 include(onnxruntime)
70 72
  73 +if(SHERPA_ONNX_ENABLE_PORTAUDIO)
  74 + include(portaudio)
  75 +endif()
  76 +
71 if(SHERPA_ONNX_ENABLE_PYTHON) 77 if(SHERPA_ONNX_ENABLE_PYTHON)
72 include(pybind11) 78 include(pybind11)
73 endif() 79 endif()
  1 +function(download_portaudio)
  2 + include(FetchContent)
  3 +
  4 + set(portaudio_URL "http://files.portaudio.com/archives/pa_stable_v190700_20210406.tgz")
  5 + set(portaudio_HASH "SHA256=47efbf42c77c19a05d22e627d42873e991ec0c1357219c0d74ce6a2948cb2def")
  6 +
  7 + # If you don't have access to the Internet, please download it to your
  8 + # local drive and modify the following line according to your needs.
  9 + set(possible_file_locations
  10 + $ENV{HOME}/Downloads/pa_stable_v190700_20210406.tgz
  11 + $ENV{HOME}/asr/pa_stable_v190700_20210406.tgz
  12 + ${PROJECT_SOURCE_DIR}/pa_stable_v190700_20210406.tgz
  13 + ${PROJECT_BINARY_DIR}/pa_stable_v190700_20210406.tgz
  14 + /tmp/pa_stable_v190700_20210406.tgz
  15 + )
  16 +
  17 + foreach(f IN LISTS possible_file_locations)
  18 + if(EXISTS ${f})
  19 + set(portaudio_URL "file://${f}")
  20 + break()
  21 + endif()
  22 + endforeach()
  23 +
  24 + if(BUILD_SHARED_LIBS)
  25 + set(PA_BUILD_SHARED ON CACHE BOOL "" FORCE)
  26 + set(PA_BUILD_STATIC OFF CACHE BOOL "" FORCE)
  27 + else()
  28 + set(PA_BUILD_SHARED OFF CACHE BOOL "" FORCE)
  29 + set(PA_BUILD_STATIC ON CACHE BOOL "" FORCE)
  30 + endif()
  31 +
  32 + FetchContent_Declare(portaudio
  33 + URL ${portaudio_URL}
  34 + URL_HASH ${portaudio_HASH}
  35 + )
  36 +
  37 + FetchContent_GetProperties(portaudio)
  38 + if(NOT portaudio_POPULATED)
  39 + message(STATUS "Downloading portaudio from ${portaudio_URL}")
  40 + FetchContent_Populate(portaudio)
  41 + endif()
  42 + message(STATUS "portaudio is downloaded to ${portaudio_SOURCE_DIR}")
  43 + message(STATUS "portaudio's binary dir is ${portaudio_BINARY_DIR}")
  44 +
  45 + if(APPLE)
  46 + set(CMAKE_MACOSX_RPATH ON) # to solve the following warning on macOS
  47 + endif()
  48 +
  49 + add_subdirectory(${portaudio_SOURCE_DIR} ${portaudio_BINARY_DIR} EXCLUDE_FROM_ALL)
  50 +endfunction()
  51 +
  52 +download_portaudio()
  53 +
  54 +# Note
  55 +# See http://portaudio.com/docs/v19-doxydocs/tutorial_start.html
  56 +# for how to use portaudio
@@ -65,6 +65,24 @@ if(SHERPA_ONNX_HAS_ALSA) @@ -65,6 +65,24 @@ if(SHERPA_ONNX_HAS_ALSA)
65 install(TARGETS sherpa-onnx-alsa DESTINATION bin) 65 install(TARGETS sherpa-onnx-alsa DESTINATION bin)
66 endif() 66 endif()
67 67
  68 +if(SHERPA_ONNX_ENABLE_PORTAUDIO)
  69 + add_executable(sherpa-onnx-microphone
  70 + sherpa-onnx-microphone.cc
  71 + microphone.cc
  72 + )
  73 +
  74 + if(BUILD_SHARED_LIBS)
  75 + set(PA_LIB portaudio)
  76 + else()
  77 + set(PA_LIB portaudio_static)
  78 + endif()
  79 +
  80 + target_link_libraries(sherpa-onnx-microphone PRIVATE ${PA_LIB} sherpa-onnx-core)
  81 +
  82 + install(TARGETS sherpa-onnx-microphone DESTINATION bin)
  83 +endif()
  84 +
  85 +
68 if(SHERPA_ONNX_ENABLE_TESTS) 86 if(SHERPA_ONNX_ENABLE_TESTS)
69 set(sherpa_onnx_test_srcs 87 set(sherpa_onnx_test_srcs
70 cat-test.cc 88 cat-test.cc
  1 +// sherpa-onnx/csrc/microphone.cc
  2 +//
  3 +// Copyright (c) 2022-2023 Xiaomi Corporation
  4 +
  5 +#include "sherpa-onnx/csrc/microphone.h"
  6 +
  7 +#include <stdio.h>
  8 +#include <stdlib.h>
  9 +
  10 +#include "portaudio.h" // NOLINT
  11 +
  12 +namespace sherpa_onnx {
  13 +
  14 +Microphone::Microphone() {
  15 + PaError err = Pa_Initialize();
  16 + if (err != paNoError) {
  17 + fprintf(stderr, "portaudio error: %s\n", Pa_GetErrorText(err));
  18 + exit(-1);
  19 + }
  20 +}
  21 +
  22 +Microphone::~Microphone() {
  23 + PaError err = Pa_Terminate();
  24 + if (err != paNoError) {
  25 + fprintf(stderr, "portaudio error: %s\n", Pa_GetErrorText(err));
  26 + exit(-1);
  27 + }
  28 +}
  29 +
  30 +} // namespace sherpa_onnx
  1 +// sherpa-onnx/csrc/microphone.h
  2 +//
  3 +// Copyright (c) 2022-2023 Xiaomi Corporation
  4 +
  5 +#ifndef SHERPA_ONNX_CSRC_MICROPHONE_H_
  6 +#define SHERPA_ONNX_CSRC_MICROPHONE_H_
  7 +
  8 +namespace sherpa_onnx {
  9 +
  10 +class Microphone {
  11 + public:
  12 + Microphone();
  13 + ~Microphone();
  14 +};
  15 +
  16 +} // namespace sherpa_onnx
  17 +
  18 +#endif // SHERPA_ONNX_CSRC_MICROPHONE_H_
  1 +// sherpa-onnx/csrc/sherpa-onnx-microphone.cc
  2 +//
  3 +// Copyright (c) 2022-2023 Xiaomi Corporation
  4 +
  5 +#include <signal.h>
  6 +#include <stdio.h>
  7 +#include <stdlib.h>
  8 +
  9 +#include <algorithm>
  10 +#include <cctype> // std::tolower
  11 +
  12 +#include "portaudio.h" // NOLINT
  13 +#include "sherpa-onnx/csrc/display.h"
  14 +#include "sherpa-onnx/csrc/microphone.h"
  15 +#include "sherpa-onnx/csrc/online-recognizer.h"
  16 +
  17 +bool stop = false;
  18 +
  19 +static int32_t RecordCallback(const void *input_buffer,
  20 + void * /*output_buffer*/,
  21 + unsigned long frames_per_buffer, // NOLINT
  22 + const PaStreamCallbackTimeInfo * /*time_info*/,
  23 + PaStreamCallbackFlags /*status_flags*/,
  24 + void *user_data) {
  25 + auto stream = reinterpret_cast<sherpa_onnx::OnlineStream *>(user_data);
  26 +
  27 + stream->AcceptWaveform(16000, reinterpret_cast<const float *>(input_buffer),
  28 + frames_per_buffer);
  29 +
  30 + return stop ? paComplete : paContinue;
  31 +}
  32 +
  33 +static void Handler(int32_t sig) {
  34 + stop = true;
  35 + fprintf(stderr, "\nCaught Ctrl + C. Exiting...\n");
  36 +}
  37 +
  38 +int32_t main(int32_t argc, char *argv[]) {
  39 + if (argc < 5 || argc > 6) {
  40 + const char *usage = R"usage(
  41 +Usage:
  42 + ./bin/sherpa-onnx-microphone \
  43 + /path/to/tokens.txt \
  44 + /path/to/encoder.onnx\
  45 + /path/to/decoder.onnx\
  46 + /path/to/joiner.onnx\
  47 + [num_threads]
  48 +
  49 +Please refer to
  50 +https://k2-fsa.github.io/sherpa/onnx/pretrained_models/index.html
  51 +for a list of pre-trained models to download.
  52 +)usage";
  53 + fprintf(stderr, "%s\n", usage);
  54 + fprintf(stderr, "argc, %d\n", argc);
  55 +
  56 + return 0;
  57 + }
  58 + signal(SIGINT, Handler);
  59 +
  60 + sherpa_onnx::OnlineRecognizerConfig config;
  61 + config.tokens = argv[1];
  62 +
  63 + config.model_config.debug = false;
  64 + config.model_config.encoder_filename = argv[2];
  65 + config.model_config.decoder_filename = argv[3];
  66 + config.model_config.joiner_filename = argv[4];
  67 +
  68 + config.model_config.num_threads = 2;
  69 + if (argc == 6 && atoi(argv[5]) > 0) {
  70 + config.model_config.num_threads = atoi(argv[5]);
  71 + }
  72 +
  73 + config.enable_endpoint = true;
  74 +
  75 + config.endpoint_config.rule1.min_trailing_silence = 2.4;
  76 + config.endpoint_config.rule2.min_trailing_silence = 1.2;
  77 + config.endpoint_config.rule3.min_utterance_length = 300;
  78 +
  79 + fprintf(stderr, "%s\n", config.ToString().c_str());
  80 +
  81 + sherpa_onnx::OnlineRecognizer recognizer(config);
  82 + auto s = recognizer.CreateStream();
  83 +
  84 + sherpa_onnx::Microphone mic;
  85 +
  86 + PaDeviceIndex num_devices = Pa_GetDeviceCount();
  87 + fprintf(stderr, "Num devices: %d\n", num_devices);
  88 +
  89 + PaStreamParameters param;
  90 +
  91 + param.device = Pa_GetDefaultInputDevice();
  92 + if (param.device == paNoDevice) {
  93 + fprintf(stderr, "No default input device found\n");
  94 + exit(EXIT_FAILURE);
  95 + }
  96 + fprintf(stderr, "Use default device: %d\n", param.device);
  97 +
  98 + const PaDeviceInfo *info = Pa_GetDeviceInfo(param.device);
  99 + fprintf(stderr, " Name: %s\n", info->name);
  100 + fprintf(stderr, " Max input channels: %d\n", info->maxInputChannels);
  101 +
  102 + param.channelCount = 1;
  103 + param.sampleFormat = paFloat32;
  104 +
  105 + param.suggestedLatency = info->defaultLowInputLatency;
  106 + param.hostApiSpecificStreamInfo = nullptr;
  107 + const float sample_rate = 16000;
  108 +
  109 + PaStream *stream;
  110 + PaError err =
  111 + Pa_OpenStream(&stream, &param, nullptr, /* &outputParameters, */
  112 + sample_rate,
  113 + 0, // frames per buffer
  114 + paClipOff, // we won't output out of range samples
  115 + // so don't bother clipping them
  116 + RecordCallback, s.get());
  117 + if (err != paNoError) {
  118 + fprintf(stderr, "portaudio error: %s\n", Pa_GetErrorText(err));
  119 + exit(EXIT_FAILURE);
  120 + }
  121 +
  122 + err = Pa_StartStream(stream);
  123 + fprintf(stderr, "Started\n");
  124 +
  125 + if (err != paNoError) {
  126 + fprintf(stderr, "portaudio error: %s\n", Pa_GetErrorText(err));
  127 + exit(EXIT_FAILURE);
  128 + }
  129 +
  130 + std::string last_text;
  131 + int32_t segment_index = 0;
  132 + sherpa_onnx::Display display;
  133 + while (!stop) {
  134 + while (recognizer.IsReady(s.get())) {
  135 + recognizer.DecodeStream(s.get());
  136 + }
  137 +
  138 + auto text = recognizer.GetResult(s.get()).text;
  139 + bool is_endpoint = recognizer.IsEndpoint(s.get());
  140 +
  141 + if (!text.empty() && last_text != text) {
  142 + last_text = text;
  143 +
  144 + std::transform(text.begin(), text.end(), text.begin(),
  145 + [](auto c) { return std::tolower(c); });
  146 +
  147 + display.Print(segment_index, text);
  148 + }
  149 +
  150 + if (!text.empty() && is_endpoint) {
  151 + ++segment_index;
  152 + recognizer.Reset(s.get());
  153 + }
  154 +
  155 + Pa_Sleep(20); // sleep for 20ms
  156 + }
  157 +
  158 + err = Pa_CloseStream(stream);
  159 + if (err != paNoError) {
  160 + fprintf(stderr, "portaudio error: %s\n", Pa_GetErrorText(err));
  161 + exit(EXIT_FAILURE);
  162 + }
  163 +
  164 + return 0;
  165 +}