Committed by
GitHub
add portaudio for reading microphones (#55)
正在显示
6 个修改的文件
包含
293 行增加
和
0 行删除
| @@ -15,6 +15,7 @@ option(SHERPA_ONNX_ENABLE_PYTHON "Whether to build Python" OFF) | @@ -15,6 +15,7 @@ option(SHERPA_ONNX_ENABLE_PYTHON "Whether to build Python" OFF) | ||
| 15 | option(SHERPA_ONNX_ENABLE_TESTS "Whether to build tests" OFF) | 15 | option(SHERPA_ONNX_ENABLE_TESTS "Whether to build tests" OFF) |
| 16 | option(SHERPA_ONNX_ENABLE_CHECK "Whether to build with assert" ON) | 16 | option(SHERPA_ONNX_ENABLE_CHECK "Whether to build with assert" ON) |
| 17 | option(BUILD_SHARED_LIBS "Whether to build shared libraries" OFF) | 17 | option(BUILD_SHARED_LIBS "Whether to build shared libraries" OFF) |
| 18 | +option(SHERPA_ONNX_ENABLE_PORTAUDIO "Whether to build with portaudio" ON) | ||
| 18 | 19 | ||
| 19 | set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/lib") | 20 | set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/lib") |
| 20 | set(CMAKE_LIBRARY_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/lib") | 21 | set(CMAKE_LIBRARY_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/lib") |
| @@ -49,6 +50,7 @@ message(STATUS "BUILD_SHARED_LIBS ${BUILD_SHARED_LIBS}") | @@ -49,6 +50,7 @@ message(STATUS "BUILD_SHARED_LIBS ${BUILD_SHARED_LIBS}") | ||
| 49 | message(STATUS "SHERPA_ONNX_ENABLE_PYTHON ${SHERPA_ONNX_ENABLE_PYTHON}") | 50 | message(STATUS "SHERPA_ONNX_ENABLE_PYTHON ${SHERPA_ONNX_ENABLE_PYTHON}") |
| 50 | message(STATUS "SHERPA_ONNX_ENABLE_TESTS ${SHERPA_ONNX_ENABLE_TESTS}") | 51 | message(STATUS "SHERPA_ONNX_ENABLE_TESTS ${SHERPA_ONNX_ENABLE_TESTS}") |
| 51 | message(STATUS "SHERPA_ONNX_ENABLE_CHECK ${SHERPA_ONNX_ENABLE_CHECK}") | 52 | message(STATUS "SHERPA_ONNX_ENABLE_CHECK ${SHERPA_ONNX_ENABLE_CHECK}") |
| 53 | +message(STATUS "SHERPA_ONNX_ENABLE_PORTAUDIO ${SHERPA_ONNX_ENABLE_PORTAUDIO}") | ||
| 52 | 54 | ||
| 53 | set(CMAKE_CXX_STANDARD 14 CACHE STRING "The C++ version to be used.") | 55 | set(CMAKE_CXX_STANDARD 14 CACHE STRING "The C++ version to be used.") |
| 54 | set(CMAKE_CXX_EXTENSIONS OFF) | 56 | set(CMAKE_CXX_EXTENSIONS OFF) |
| @@ -68,6 +70,10 @@ list(APPEND CMAKE_MODULE_PATH ${CMAKE_SOURCE_DIR}/cmake) | @@ -68,6 +70,10 @@ list(APPEND CMAKE_MODULE_PATH ${CMAKE_SOURCE_DIR}/cmake) | ||
| 68 | include(kaldi-native-fbank) | 70 | include(kaldi-native-fbank) |
| 69 | include(onnxruntime) | 71 | include(onnxruntime) |
| 70 | 72 | ||
| 73 | +if(SHERPA_ONNX_ENABLE_PORTAUDIO) | ||
| 74 | + include(portaudio) | ||
| 75 | +endif() | ||
| 76 | + | ||
| 71 | if(SHERPA_ONNX_ENABLE_PYTHON) | 77 | if(SHERPA_ONNX_ENABLE_PYTHON) |
| 72 | include(pybind11) | 78 | include(pybind11) |
| 73 | endif() | 79 | endif() |
cmake/portaudio.cmake
0 → 100644
| 1 | +function(download_portaudio) | ||
| 2 | + include(FetchContent) | ||
| 3 | + | ||
| 4 | + set(portaudio_URL "http://files.portaudio.com/archives/pa_stable_v190700_20210406.tgz") | ||
| 5 | + set(portaudio_HASH "SHA256=47efbf42c77c19a05d22e627d42873e991ec0c1357219c0d74ce6a2948cb2def") | ||
| 6 | + | ||
| 7 | + # If you don't have access to the Internet, please download it to your | ||
| 8 | + # local drive and modify the following line according to your needs. | ||
| 9 | + set(possible_file_locations | ||
| 10 | + $ENV{HOME}/Downloads/pa_stable_v190700_20210406.tgz | ||
| 11 | + $ENV{HOME}/asr/pa_stable_v190700_20210406.tgz | ||
| 12 | + ${PROJECT_SOURCE_DIR}/pa_stable_v190700_20210406.tgz | ||
| 13 | + ${PROJECT_BINARY_DIR}/pa_stable_v190700_20210406.tgz | ||
| 14 | + /tmp/pa_stable_v190700_20210406.tgz | ||
| 15 | + ) | ||
| 16 | + | ||
| 17 | + foreach(f IN LISTS possible_file_locations) | ||
| 18 | + if(EXISTS ${f}) | ||
| 19 | + set(portaudio_URL "file://${f}") | ||
| 20 | + break() | ||
| 21 | + endif() | ||
| 22 | + endforeach() | ||
| 23 | + | ||
| 24 | + if(BUILD_SHARED_LIBS) | ||
| 25 | + set(PA_BUILD_SHARED ON CACHE BOOL "" FORCE) | ||
| 26 | + set(PA_BUILD_STATIC OFF CACHE BOOL "" FORCE) | ||
| 27 | + else() | ||
| 28 | + set(PA_BUILD_SHARED OFF CACHE BOOL "" FORCE) | ||
| 29 | + set(PA_BUILD_STATIC ON CACHE BOOL "" FORCE) | ||
| 30 | + endif() | ||
| 31 | + | ||
| 32 | + FetchContent_Declare(portaudio | ||
| 33 | + URL ${portaudio_URL} | ||
| 34 | + URL_HASH ${portaudio_HASH} | ||
| 35 | + ) | ||
| 36 | + | ||
| 37 | + FetchContent_GetProperties(portaudio) | ||
| 38 | + if(NOT portaudio_POPULATED) | ||
| 39 | + message(STATUS "Downloading portaudio from ${portaudio_URL}") | ||
| 40 | + FetchContent_Populate(portaudio) | ||
| 41 | + endif() | ||
| 42 | + message(STATUS "portaudio is downloaded to ${portaudio_SOURCE_DIR}") | ||
| 43 | + message(STATUS "portaudio's binary dir is ${portaudio_BINARY_DIR}") | ||
| 44 | + | ||
| 45 | + if(APPLE) | ||
| 46 | + set(CMAKE_MACOSX_RPATH ON) # to solve the following warning on macOS | ||
| 47 | + endif() | ||
| 48 | + | ||
| 49 | + add_subdirectory(${portaudio_SOURCE_DIR} ${portaudio_BINARY_DIR} EXCLUDE_FROM_ALL) | ||
| 50 | +endfunction() | ||
| 51 | + | ||
| 52 | +download_portaudio() | ||
| 53 | + | ||
| 54 | +# Note | ||
| 55 | +# See http://portaudio.com/docs/v19-doxydocs/tutorial_start.html | ||
| 56 | +# for how to use portaudio |
| @@ -65,6 +65,24 @@ if(SHERPA_ONNX_HAS_ALSA) | @@ -65,6 +65,24 @@ if(SHERPA_ONNX_HAS_ALSA) | ||
| 65 | install(TARGETS sherpa-onnx-alsa DESTINATION bin) | 65 | install(TARGETS sherpa-onnx-alsa DESTINATION bin) |
| 66 | endif() | 66 | endif() |
| 67 | 67 | ||
| 68 | +if(SHERPA_ONNX_ENABLE_PORTAUDIO) | ||
| 69 | + add_executable(sherpa-onnx-microphone | ||
| 70 | + sherpa-onnx-microphone.cc | ||
| 71 | + microphone.cc | ||
| 72 | + ) | ||
| 73 | + | ||
| 74 | + if(BUILD_SHARED_LIBS) | ||
| 75 | + set(PA_LIB portaudio) | ||
| 76 | + else() | ||
| 77 | + set(PA_LIB portaudio_static) | ||
| 78 | + endif() | ||
| 79 | + | ||
| 80 | + target_link_libraries(sherpa-onnx-microphone PRIVATE ${PA_LIB} sherpa-onnx-core) | ||
| 81 | + | ||
| 82 | + install(TARGETS sherpa-onnx-microphone DESTINATION bin) | ||
| 83 | +endif() | ||
| 84 | + | ||
| 85 | + | ||
| 68 | if(SHERPA_ONNX_ENABLE_TESTS) | 86 | if(SHERPA_ONNX_ENABLE_TESTS) |
| 69 | set(sherpa_onnx_test_srcs | 87 | set(sherpa_onnx_test_srcs |
| 70 | cat-test.cc | 88 | cat-test.cc |
sherpa-onnx/csrc/microphone.cc
0 → 100644
| 1 | +// sherpa-onnx/csrc/microphone.cc | ||
| 2 | +// | ||
| 3 | +// Copyright (c) 2022-2023 Xiaomi Corporation | ||
| 4 | + | ||
| 5 | +#include "sherpa-onnx/csrc/microphone.h" | ||
| 6 | + | ||
| 7 | +#include <stdio.h> | ||
| 8 | +#include <stdlib.h> | ||
| 9 | + | ||
| 10 | +#include "portaudio.h" // NOLINT | ||
| 11 | + | ||
| 12 | +namespace sherpa_onnx { | ||
| 13 | + | ||
| 14 | +Microphone::Microphone() { | ||
| 15 | + PaError err = Pa_Initialize(); | ||
| 16 | + if (err != paNoError) { | ||
| 17 | + fprintf(stderr, "portaudio error: %s\n", Pa_GetErrorText(err)); | ||
| 18 | + exit(-1); | ||
| 19 | + } | ||
| 20 | +} | ||
| 21 | + | ||
| 22 | +Microphone::~Microphone() { | ||
| 23 | + PaError err = Pa_Terminate(); | ||
| 24 | + if (err != paNoError) { | ||
| 25 | + fprintf(stderr, "portaudio error: %s\n", Pa_GetErrorText(err)); | ||
| 26 | + exit(-1); | ||
| 27 | + } | ||
| 28 | +} | ||
| 29 | + | ||
| 30 | +} // namespace sherpa_onnx |
sherpa-onnx/csrc/microphone.h
0 → 100644
| 1 | +// sherpa-onnx/csrc/microphone.h | ||
| 2 | +// | ||
| 3 | +// Copyright (c) 2022-2023 Xiaomi Corporation | ||
| 4 | + | ||
| 5 | +#ifndef SHERPA_ONNX_CSRC_MICROPHONE_H_ | ||
| 6 | +#define SHERPA_ONNX_CSRC_MICROPHONE_H_ | ||
| 7 | + | ||
| 8 | +namespace sherpa_onnx { | ||
| 9 | + | ||
| 10 | +class Microphone { | ||
| 11 | + public: | ||
| 12 | + Microphone(); | ||
| 13 | + ~Microphone(); | ||
| 14 | +}; | ||
| 15 | + | ||
| 16 | +} // namespace sherpa_onnx | ||
| 17 | + | ||
| 18 | +#endif // SHERPA_ONNX_CSRC_MICROPHONE_H_ |
sherpa-onnx/csrc/sherpa-onnx-microphone.cc
0 → 100644
| 1 | +// sherpa-onnx/csrc/sherpa-onnx-microphone.cc | ||
| 2 | +// | ||
| 3 | +// Copyright (c) 2022-2023 Xiaomi Corporation | ||
| 4 | + | ||
| 5 | +#include <signal.h> | ||
| 6 | +#include <stdio.h> | ||
| 7 | +#include <stdlib.h> | ||
| 8 | + | ||
| 9 | +#include <algorithm> | ||
| 10 | +#include <cctype> // std::tolower | ||
| 11 | + | ||
| 12 | +#include "portaudio.h" // NOLINT | ||
| 13 | +#include "sherpa-onnx/csrc/display.h" | ||
| 14 | +#include "sherpa-onnx/csrc/microphone.h" | ||
| 15 | +#include "sherpa-onnx/csrc/online-recognizer.h" | ||
| 16 | + | ||
| 17 | +bool stop = false; | ||
| 18 | + | ||
| 19 | +static int32_t RecordCallback(const void *input_buffer, | ||
| 20 | + void * /*output_buffer*/, | ||
| 21 | + unsigned long frames_per_buffer, // NOLINT | ||
| 22 | + const PaStreamCallbackTimeInfo * /*time_info*/, | ||
| 23 | + PaStreamCallbackFlags /*status_flags*/, | ||
| 24 | + void *user_data) { | ||
| 25 | + auto stream = reinterpret_cast<sherpa_onnx::OnlineStream *>(user_data); | ||
| 26 | + | ||
| 27 | + stream->AcceptWaveform(16000, reinterpret_cast<const float *>(input_buffer), | ||
| 28 | + frames_per_buffer); | ||
| 29 | + | ||
| 30 | + return stop ? paComplete : paContinue; | ||
| 31 | +} | ||
| 32 | + | ||
| 33 | +static void Handler(int32_t sig) { | ||
| 34 | + stop = true; | ||
| 35 | + fprintf(stderr, "\nCaught Ctrl + C. Exiting...\n"); | ||
| 36 | +} | ||
| 37 | + | ||
| 38 | +int32_t main(int32_t argc, char *argv[]) { | ||
| 39 | + if (argc < 5 || argc > 6) { | ||
| 40 | + const char *usage = R"usage( | ||
| 41 | +Usage: | ||
| 42 | + ./bin/sherpa-onnx-microphone \ | ||
| 43 | + /path/to/tokens.txt \ | ||
| 44 | + /path/to/encoder.onnx\ | ||
| 45 | + /path/to/decoder.onnx\ | ||
| 46 | + /path/to/joiner.onnx\ | ||
| 47 | + [num_threads] | ||
| 48 | + | ||
| 49 | +Please refer to | ||
| 50 | +https://k2-fsa.github.io/sherpa/onnx/pretrained_models/index.html | ||
| 51 | +for a list of pre-trained models to download. | ||
| 52 | +)usage"; | ||
| 53 | + fprintf(stderr, "%s\n", usage); | ||
| 54 | + fprintf(stderr, "argc, %d\n", argc); | ||
| 55 | + | ||
| 56 | + return 0; | ||
| 57 | + } | ||
| 58 | + signal(SIGINT, Handler); | ||
| 59 | + | ||
| 60 | + sherpa_onnx::OnlineRecognizerConfig config; | ||
| 61 | + config.tokens = argv[1]; | ||
| 62 | + | ||
| 63 | + config.model_config.debug = false; | ||
| 64 | + config.model_config.encoder_filename = argv[2]; | ||
| 65 | + config.model_config.decoder_filename = argv[3]; | ||
| 66 | + config.model_config.joiner_filename = argv[4]; | ||
| 67 | + | ||
| 68 | + config.model_config.num_threads = 2; | ||
| 69 | + if (argc == 6 && atoi(argv[5]) > 0) { | ||
| 70 | + config.model_config.num_threads = atoi(argv[5]); | ||
| 71 | + } | ||
| 72 | + | ||
| 73 | + config.enable_endpoint = true; | ||
| 74 | + | ||
| 75 | + config.endpoint_config.rule1.min_trailing_silence = 2.4; | ||
| 76 | + config.endpoint_config.rule2.min_trailing_silence = 1.2; | ||
| 77 | + config.endpoint_config.rule3.min_utterance_length = 300; | ||
| 78 | + | ||
| 79 | + fprintf(stderr, "%s\n", config.ToString().c_str()); | ||
| 80 | + | ||
| 81 | + sherpa_onnx::OnlineRecognizer recognizer(config); | ||
| 82 | + auto s = recognizer.CreateStream(); | ||
| 83 | + | ||
| 84 | + sherpa_onnx::Microphone mic; | ||
| 85 | + | ||
| 86 | + PaDeviceIndex num_devices = Pa_GetDeviceCount(); | ||
| 87 | + fprintf(stderr, "Num devices: %d\n", num_devices); | ||
| 88 | + | ||
| 89 | + PaStreamParameters param; | ||
| 90 | + | ||
| 91 | + param.device = Pa_GetDefaultInputDevice(); | ||
| 92 | + if (param.device == paNoDevice) { | ||
| 93 | + fprintf(stderr, "No default input device found\n"); | ||
| 94 | + exit(EXIT_FAILURE); | ||
| 95 | + } | ||
| 96 | + fprintf(stderr, "Use default device: %d\n", param.device); | ||
| 97 | + | ||
| 98 | + const PaDeviceInfo *info = Pa_GetDeviceInfo(param.device); | ||
| 99 | + fprintf(stderr, " Name: %s\n", info->name); | ||
| 100 | + fprintf(stderr, " Max input channels: %d\n", info->maxInputChannels); | ||
| 101 | + | ||
| 102 | + param.channelCount = 1; | ||
| 103 | + param.sampleFormat = paFloat32; | ||
| 104 | + | ||
| 105 | + param.suggestedLatency = info->defaultLowInputLatency; | ||
| 106 | + param.hostApiSpecificStreamInfo = nullptr; | ||
| 107 | + const float sample_rate = 16000; | ||
| 108 | + | ||
| 109 | + PaStream *stream; | ||
| 110 | + PaError err = | ||
| 111 | + Pa_OpenStream(&stream, ¶m, nullptr, /* &outputParameters, */ | ||
| 112 | + sample_rate, | ||
| 113 | + 0, // frames per buffer | ||
| 114 | + paClipOff, // we won't output out of range samples | ||
| 115 | + // so don't bother clipping them | ||
| 116 | + RecordCallback, s.get()); | ||
| 117 | + if (err != paNoError) { | ||
| 118 | + fprintf(stderr, "portaudio error: %s\n", Pa_GetErrorText(err)); | ||
| 119 | + exit(EXIT_FAILURE); | ||
| 120 | + } | ||
| 121 | + | ||
| 122 | + err = Pa_StartStream(stream); | ||
| 123 | + fprintf(stderr, "Started\n"); | ||
| 124 | + | ||
| 125 | + if (err != paNoError) { | ||
| 126 | + fprintf(stderr, "portaudio error: %s\n", Pa_GetErrorText(err)); | ||
| 127 | + exit(EXIT_FAILURE); | ||
| 128 | + } | ||
| 129 | + | ||
| 130 | + std::string last_text; | ||
| 131 | + int32_t segment_index = 0; | ||
| 132 | + sherpa_onnx::Display display; | ||
| 133 | + while (!stop) { | ||
| 134 | + while (recognizer.IsReady(s.get())) { | ||
| 135 | + recognizer.DecodeStream(s.get()); | ||
| 136 | + } | ||
| 137 | + | ||
| 138 | + auto text = recognizer.GetResult(s.get()).text; | ||
| 139 | + bool is_endpoint = recognizer.IsEndpoint(s.get()); | ||
| 140 | + | ||
| 141 | + if (!text.empty() && last_text != text) { | ||
| 142 | + last_text = text; | ||
| 143 | + | ||
| 144 | + std::transform(text.begin(), text.end(), text.begin(), | ||
| 145 | + [](auto c) { return std::tolower(c); }); | ||
| 146 | + | ||
| 147 | + display.Print(segment_index, text); | ||
| 148 | + } | ||
| 149 | + | ||
| 150 | + if (!text.empty() && is_endpoint) { | ||
| 151 | + ++segment_index; | ||
| 152 | + recognizer.Reset(s.get()); | ||
| 153 | + } | ||
| 154 | + | ||
| 155 | + Pa_Sleep(20); // sleep for 20ms | ||
| 156 | + } | ||
| 157 | + | ||
| 158 | + err = Pa_CloseStream(stream); | ||
| 159 | + if (err != paNoError) { | ||
| 160 | + fprintf(stderr, "portaudio error: %s\n", Pa_GetErrorText(err)); | ||
| 161 | + exit(EXIT_FAILURE); | ||
| 162 | + } | ||
| 163 | + | ||
| 164 | + return 0; | ||
| 165 | +} |
-
请 注册 或 登录 后发表评论