Daniel Doña
Committed by GitHub

Add function 'tolowerUnicode' in sherpa-onnx-microphone (fix #791) (#812)

@@ -7,7 +7,8 @@ @@ -7,7 +7,8 @@
7 #include <stdlib.h> 7 #include <stdlib.h>
8 8
9 #include <algorithm> 9 #include <algorithm>
10 -#include <cctype> // std::tolower 10 +#include <clocale>
  11 +#include <cwctype>
11 12
12 #include "portaudio.h" // NOLINT 13 #include "portaudio.h" // NOLINT
13 #include "sherpa-onnx/csrc/display.h" 14 #include "sherpa-onnx/csrc/display.h"
@@ -37,6 +38,31 @@ static void Handler(int32_t sig) { @@ -37,6 +38,31 @@ static void Handler(int32_t sig) {
37 fprintf(stderr, "\nCaught Ctrl + C. Exiting...\n"); 38 fprintf(stderr, "\nCaught Ctrl + C. Exiting...\n");
38 } 39 }
39 40
  41 +static std::string tolowerUnicode(const std::string& input_str) {
  42 +
  43 + // Use system locale
  44 + std::setlocale(LC_ALL, "");
  45 +
  46 + // From char string to wchar string
  47 + std::wstring input_wstr(input_str.size()+1, '\0');
  48 + std::mbstowcs(&input_wstr[0], input_str.c_str(), input_str.size());
  49 + std::wstring lowercase_wstr;
  50 +
  51 + for (wchar_t wc : input_wstr) {
  52 + if (std::iswupper(wc)) {
  53 + lowercase_wstr += std::towlower(wc);
  54 + } else {
  55 + lowercase_wstr += wc;
  56 + }
  57 + }
  58 +
  59 + // Back to char string
  60 + std::string lowercase_str(input_str.size()+1, '\0');
  61 + std:wcstombs(&lowercase_str[0], lowercase_wstr.c_str(), lowercase_wstr.size());
  62 +
  63 + return lowercase_str;
  64 +}
  65 +
40 int32_t main(int32_t argc, char *argv[]) { 66 int32_t main(int32_t argc, char *argv[]) {
41 signal(SIGINT, Handler); 67 signal(SIGINT, Handler);
42 68
@@ -172,11 +198,7 @@ for a list of pre-trained models to download. @@ -172,11 +198,7 @@ for a list of pre-trained models to download.
172 198
173 if (!text.empty() && last_text != text) { 199 if (!text.empty() && last_text != text) {
174 last_text = text; 200 last_text = text;
175 -  
176 - std::transform(text.begin(), text.end(), text.begin(),  
177 - [](auto c) { return std::tolower(c); });  
178 -  
179 - display.Print(segment_index, text); 201 + display.Print(segment_index, tolowerUnicode(text));
180 fflush(stderr); 202 fflush(stderr);
181 } 203 }
182 204