Committed by
GitHub
Add microphone support for offline recognizer (#104)
正在显示
3 个修改的文件
包含
230 行增加
和
2 行删除
| @@ -107,6 +107,11 @@ if(SHERPA_ONNX_ENABLE_PORTAUDIO) | @@ -107,6 +107,11 @@ if(SHERPA_ONNX_ENABLE_PORTAUDIO) | ||
| 107 | microphone.cc | 107 | microphone.cc |
| 108 | ) | 108 | ) |
| 109 | 109 | ||
| 110 | + add_executable(sherpa-onnx-microphone-offline | ||
| 111 | + sherpa-onnx-microphone-offline.cc | ||
| 112 | + microphone.cc | ||
| 113 | + ) | ||
| 114 | + | ||
| 110 | if(BUILD_SHARED_LIBS) | 115 | if(BUILD_SHARED_LIBS) |
| 111 | set(PA_LIB portaudio) | 116 | set(PA_LIB portaudio) |
| 112 | else() | 117 | else() |
| @@ -114,8 +119,15 @@ if(SHERPA_ONNX_ENABLE_PORTAUDIO) | @@ -114,8 +119,15 @@ if(SHERPA_ONNX_ENABLE_PORTAUDIO) | ||
| 114 | endif() | 119 | endif() |
| 115 | 120 | ||
| 116 | target_link_libraries(sherpa-onnx-microphone PRIVATE ${PA_LIB} sherpa-onnx-core) | 121 | target_link_libraries(sherpa-onnx-microphone PRIVATE ${PA_LIB} sherpa-onnx-core) |
| 117 | - | ||
| 118 | - install(TARGETS sherpa-onnx-microphone DESTINATION bin) | 122 | + target_link_libraries(sherpa-onnx-microphone-offline PRIVATE ${PA_LIB} sherpa-onnx-core) |
| 123 | + | ||
| 124 | + install( | ||
| 125 | + TARGETS | ||
| 126 | + sherpa-onnx-microphone | ||
| 127 | + sherpa-onnx-microphone-offline | ||
| 128 | + DESTINATION | ||
| 129 | + bin | ||
| 130 | + ) | ||
| 119 | endif() | 131 | endif() |
| 120 | 132 | ||
| 121 | if(SHERPA_ONNX_ENABLE_WEBSOCKET) | 133 | if(SHERPA_ONNX_ENABLE_WEBSOCKET) |
| 1 | +// sherpa-onnx/csrc/sherpa-onnx-microphone-offline.cc | ||
| 2 | +// | ||
| 3 | +// Copyright (c) 2022-2023 Xiaomi Corporation | ||
| 4 | + | ||
| 5 | +#include <signal.h> | ||
| 6 | +#include <stdio.h> | ||
| 7 | +#include <stdlib.h> | ||
| 8 | + | ||
| 9 | +#include <algorithm> | ||
| 10 | +#include <cctype> // std::tolower | ||
| 11 | +#include <thread> // NOLINT | ||
| 12 | + | ||
| 13 | +#include "portaudio.h" // NOLINT | ||
| 14 | +#include "sherpa-onnx/csrc/macros.h" | ||
| 15 | +#include "sherpa-onnx/csrc/microphone.h" | ||
| 16 | +#include "sherpa-onnx/csrc/offline-recognizer.h" | ||
| 17 | + | ||
| 18 | +enum class State { | ||
| 19 | + kIdle, | ||
| 20 | + kRecording, | ||
| 21 | + kDecoding, | ||
| 22 | +}; | ||
| 23 | + | ||
| 24 | +State state = State::kIdle; | ||
| 25 | + | ||
| 26 | +// true to stop the program and exit | ||
| 27 | +bool stop = false; | ||
| 28 | + | ||
| 29 | +std::vector<float> samples; | ||
| 30 | +std::mutex samples_mutex; | ||
| 31 | + | ||
| 32 | +static void DetectKeyPress() { | ||
| 33 | + SHERPA_ONNX_LOGE("Press Enter to start"); | ||
| 34 | + int32_t key; | ||
| 35 | + while (!stop && (key = getchar())) { | ||
| 36 | + if (key != 0x0a) { | ||
| 37 | + continue; | ||
| 38 | + } | ||
| 39 | + | ||
| 40 | + switch (state) { | ||
| 41 | + case State::kIdle: | ||
| 42 | + SHERPA_ONNX_LOGE("Start recording. Press Enter to stop recording"); | ||
| 43 | + state = State::kRecording; | ||
| 44 | + { | ||
| 45 | + std::lock_guard<std::mutex> lock(samples_mutex); | ||
| 46 | + samples.clear(); | ||
| 47 | + } | ||
| 48 | + break; | ||
| 49 | + case State::kRecording: | ||
| 50 | + SHERPA_ONNX_LOGE("Stop recording. Decoding ..."); | ||
| 51 | + state = State::kDecoding; | ||
| 52 | + break; | ||
| 53 | + case State::kDecoding: | ||
| 54 | + break; | ||
| 55 | + } | ||
| 56 | + } | ||
| 57 | +} | ||
| 58 | + | ||
| 59 | +static int32_t RecordCallback(const void *input_buffer, | ||
| 60 | + void * /*output_buffer*/, | ||
| 61 | + unsigned long frames_per_buffer, // NOLINT | ||
| 62 | + const PaStreamCallbackTimeInfo * /*time_info*/, | ||
| 63 | + PaStreamCallbackFlags /*status_flags*/, | ||
| 64 | + void *user_data) { | ||
| 65 | + std::lock_guard<std::mutex> lock(samples_mutex); | ||
| 66 | + | ||
| 67 | + auto p = reinterpret_cast<const float *>(input_buffer); | ||
| 68 | + samples.insert(samples.end(), p, p + frames_per_buffer); | ||
| 69 | + | ||
| 70 | + return stop ? paComplete : paContinue; | ||
| 71 | +} | ||
| 72 | + | ||
| 73 | +static void Handler(int32_t sig) { | ||
| 74 | + stop = true; | ||
| 75 | + fprintf(stderr, "\nCaught Ctrl + C. Press Enter to exit\n"); | ||
| 76 | +} | ||
| 77 | + | ||
| 78 | +int32_t main(int32_t argc, char *argv[]) { | ||
| 79 | + signal(SIGINT, Handler); | ||
| 80 | + | ||
| 81 | + const char *kUsageMessage = R"usage( | ||
| 82 | +This program uses non-streaming models with microphone for speech recognition. | ||
| 83 | +Usage: | ||
| 84 | + | ||
| 85 | +(1) Transducer from icefall | ||
| 86 | + | ||
| 87 | + ./bin/sherpa-onnx-microphone-offline \ | ||
| 88 | + --tokens=/path/to/tokens.txt \ | ||
| 89 | + --encoder=/path/to/encoder.onnx \ | ||
| 90 | + --decoder=/path/to/decoder.onnx \ | ||
| 91 | + --joiner=/path/to/joiner.onnx \ | ||
| 92 | + --num-threads=2 \ | ||
| 93 | + --decoding-method=greedy_search | ||
| 94 | + | ||
| 95 | +(2) Paraformer from FunASR | ||
| 96 | + | ||
| 97 | + ./bin/sherpa-onnx-microphone-offline \ | ||
| 98 | + --tokens=/path/to/tokens.txt \ | ||
| 99 | + --paraformer=/path/to/model.onnx \ | ||
| 100 | + --num-threads=2 \ | ||
| 101 | + --decoding-method=greedy_search | ||
| 102 | + | ||
| 103 | +Default value for num_threads is 2. | ||
| 104 | +Valid values for decoding_method: greedy_search. | ||
| 105 | + | ||
| 106 | +Please refer to | ||
| 107 | +https://k2-fsa.github.io/sherpa/onnx/pretrained_models/index.html | ||
| 108 | +for a list of pre-trained models to download. | ||
| 109 | +)usage"; | ||
| 110 | + | ||
| 111 | + sherpa_onnx::ParseOptions po(kUsageMessage); | ||
| 112 | + sherpa_onnx::OfflineRecognizerConfig config; | ||
| 113 | + config.Register(&po); | ||
| 114 | + | ||
| 115 | + po.Read(argc, argv); | ||
| 116 | + if (po.NumArgs() != 0) { | ||
| 117 | + po.PrintUsage(); | ||
| 118 | + exit(EXIT_FAILURE); | ||
| 119 | + } | ||
| 120 | + | ||
| 121 | + fprintf(stderr, "%s\n", config.ToString().c_str()); | ||
| 122 | + | ||
| 123 | + if (!config.Validate()) { | ||
| 124 | + fprintf(stderr, "Errors in config!\n"); | ||
| 125 | + return -1; | ||
| 126 | + } | ||
| 127 | + | ||
| 128 | + SHERPA_ONNX_LOGE("Creating recognizer ..."); | ||
| 129 | + sherpa_onnx::OfflineRecognizer recognizer(config); | ||
| 130 | + SHERPA_ONNX_LOGE("Recognizer created!"); | ||
| 131 | + | ||
| 132 | + sherpa_onnx::Microphone mic; | ||
| 133 | + | ||
| 134 | + PaDeviceIndex num_devices = Pa_GetDeviceCount(); | ||
| 135 | + fprintf(stderr, "Num devices: %d\n", num_devices); | ||
| 136 | + | ||
| 137 | + PaStreamParameters param; | ||
| 138 | + | ||
| 139 | + param.device = Pa_GetDefaultInputDevice(); | ||
| 140 | + if (param.device == paNoDevice) { | ||
| 141 | + fprintf(stderr, "No default input device found\n"); | ||
| 142 | + exit(EXIT_FAILURE); | ||
| 143 | + } | ||
| 144 | + fprintf(stderr, "Use default device: %d\n", param.device); | ||
| 145 | + | ||
| 146 | + const PaDeviceInfo *info = Pa_GetDeviceInfo(param.device); | ||
| 147 | + fprintf(stderr, " Name: %s\n", info->name); | ||
| 148 | + fprintf(stderr, " Max input channels: %d\n", info->maxInputChannels); | ||
| 149 | + | ||
| 150 | + param.channelCount = 1; | ||
| 151 | + param.sampleFormat = paFloat32; | ||
| 152 | + | ||
| 153 | + param.suggestedLatency = info->defaultLowInputLatency; | ||
| 154 | + param.hostApiSpecificStreamInfo = nullptr; | ||
| 155 | + float sample_rate = 16000; | ||
| 156 | + | ||
| 157 | + PaStream *stream; | ||
| 158 | + PaError err = | ||
| 159 | + Pa_OpenStream(&stream, ¶m, nullptr, /* &outputParameters, */ | ||
| 160 | + sample_rate, | ||
| 161 | + 0, // frames per buffer | ||
| 162 | + paClipOff, // we won't output out of range samples | ||
| 163 | + // so don't bother clipping them | ||
| 164 | + RecordCallback, nullptr); | ||
| 165 | + if (err != paNoError) { | ||
| 166 | + fprintf(stderr, "portaudio error: %s\n", Pa_GetErrorText(err)); | ||
| 167 | + exit(EXIT_FAILURE); | ||
| 168 | + } | ||
| 169 | + | ||
| 170 | + err = Pa_StartStream(stream); | ||
| 171 | + fprintf(stderr, "Started\n"); | ||
| 172 | + | ||
| 173 | + if (err != paNoError) { | ||
| 174 | + fprintf(stderr, "portaudio error: %s\n", Pa_GetErrorText(err)); | ||
| 175 | + exit(EXIT_FAILURE); | ||
| 176 | + } | ||
| 177 | + | ||
| 178 | + std::thread t(DetectKeyPress); | ||
| 179 | + while (!stop) { | ||
| 180 | + switch (state) { | ||
| 181 | + case State::kIdle: | ||
| 182 | + break; | ||
| 183 | + case State::kRecording: | ||
| 184 | + break; | ||
| 185 | + case State::kDecoding: { | ||
| 186 | + std::vector<float> buf; | ||
| 187 | + { | ||
| 188 | + std::lock_guard<std::mutex> lock(samples_mutex); | ||
| 189 | + buf = std::move(samples); | ||
| 190 | + } | ||
| 191 | + | ||
| 192 | + auto s = recognizer.CreateStream(); | ||
| 193 | + s->AcceptWaveform(sample_rate, buf.data(), buf.size()); | ||
| 194 | + recognizer.DecodeStream(s.get()); | ||
| 195 | + SHERPA_ONNX_LOGE("Decoding Done! Result is:"); | ||
| 196 | + SHERPA_ONNX_LOGE("%s", s->GetResult().text.c_str()); | ||
| 197 | + | ||
| 198 | + state = State::kIdle; | ||
| 199 | + SHERPA_ONNX_LOGE("Press Enter to start"); | ||
| 200 | + break; | ||
| 201 | + } | ||
| 202 | + } | ||
| 203 | + | ||
| 204 | + Pa_Sleep(20); // sleep for 20ms | ||
| 205 | + } | ||
| 206 | + t.join(); | ||
| 207 | + | ||
| 208 | + err = Pa_CloseStream(stream); | ||
| 209 | + if (err != paNoError) { | ||
| 210 | + fprintf(stderr, "portaudio error: %s\n", Pa_GetErrorText(err)); | ||
| 211 | + exit(EXIT_FAILURE); | ||
| 212 | + } | ||
| 213 | + | ||
| 214 | + return 0; | ||
| 215 | +} |
| @@ -66,6 +66,7 @@ for a list of pre-trained models to download. | @@ -66,6 +66,7 @@ for a list of pre-trained models to download. | ||
| 66 | return -1; | 66 | return -1; |
| 67 | } | 67 | } |
| 68 | 68 | ||
| 69 | + fprintf(stderr, "Creating recognizer ...\n"); | ||
| 69 | sherpa_onnx::OfflineRecognizer recognizer(config); | 70 | sherpa_onnx::OfflineRecognizer recognizer(config); |
| 70 | 71 | ||
| 71 | auto begin = std::chrono::steady_clock::now(); | 72 | auto begin = std::chrono::steady_clock::now(); |
-
请 注册 或 登录 后发表评论