Daniel Doña
Committed by GitHub

Add function 'tolowerUnicode' in sherpa-onnx-microphone (fix #791) (#812)

... ... @@ -7,7 +7,8 @@
#include <stdlib.h>
#include <algorithm>
#include <cctype> // std::tolower
#include <clocale>
#include <cwctype>
#include "portaudio.h" // NOLINT
#include "sherpa-onnx/csrc/display.h"
... ... @@ -37,6 +38,31 @@ static void Handler(int32_t sig) {
fprintf(stderr, "\nCaught Ctrl + C. Exiting...\n");
}
static std::string tolowerUnicode(const std::string& input_str) {
// Use system locale
std::setlocale(LC_ALL, "");
// From char string to wchar string
std::wstring input_wstr(input_str.size()+1, '\0');
std::mbstowcs(&input_wstr[0], input_str.c_str(), input_str.size());
std::wstring lowercase_wstr;
for (wchar_t wc : input_wstr) {
if (std::iswupper(wc)) {
lowercase_wstr += std::towlower(wc);
} else {
lowercase_wstr += wc;
}
}
// Back to char string
std::string lowercase_str(input_str.size()+1, '\0');
std:wcstombs(&lowercase_str[0], lowercase_wstr.c_str(), lowercase_wstr.size());
return lowercase_str;
}
int32_t main(int32_t argc, char *argv[]) {
signal(SIGINT, Handler);
... ... @@ -172,11 +198,7 @@ for a list of pre-trained models to download.
if (!text.empty() && last_text != text) {
last_text = text;
std::transform(text.begin(), text.end(), text.begin(),
[](auto c) { return std::tolower(c); });
display.Print(segment_index, text);
display.Print(segment_index, tolowerUnicode(text));
fflush(stderr);
}
... ...