Fangjun Kuang
Committed by GitHub

Support passing utf-8 strings from JavaScript to C++. (#1355)

We first convert utf-16 strings to Uint8Array and then we
pass the array to C++.
1 { 1 {
2 "dependencies": { 2 "dependencies": {
3 - "sherpa-onnx-node": "^1.10.26" 3 + "sherpa-onnx-node": "^1.10.27"
4 } 4 }
5 } 5 }
@@ -3,6 +3,19 @@ const sherpa_onnx = require('sherpa-onnx-node'); @@ -3,6 +3,19 @@ const sherpa_onnx = require('sherpa-onnx-node');
3 3
4 // Please download test files from 4 // Please download test files from
5 // https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models 5 // https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
  6 +
  7 +
  8 +// If your path contains non-ascii characters, e.g., Chinese, you can use
  9 +// the following code
  10 +//
  11 +
  12 +// let encoder = new TextEncoder();
  13 +// let tokens = encoder.encode(
  14 +// './sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/测试.txt');
  15 +// let model = encoder.encode(
  16 +// './sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/测试.int8.onnx');
  17 +
  18 +
6 const config = { 19 const config = {
7 'featConfig': { 20 'featConfig': {
8 'sampleRate': 16000, 21 'sampleRate': 16000,
@@ -12,9 +25,11 @@ const config = { @@ -12,9 +25,11 @@ const config = {
12 'senseVoice': { 25 'senseVoice': {
13 'model': 26 'model':
14 './sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/model.int8.onnx', 27 './sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/model.int8.onnx',
  28 + // 'model': model,
15 'useInverseTextNormalization': 1, 29 'useInverseTextNormalization': 1,
16 }, 30 },
17 'tokens': './sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/tokens.txt', 31 'tokens': './sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/tokens.txt',
  32 + // 'tokens': tokens,
18 'numThreads': 2, 33 'numThreads': 2,
19 'provider': 'cpu', 34 'provider': 'cpu',
20 'debug': 1, 35 'debug': 1,
@@ -7,17 +7,24 @@ @@ -7,17 +7,24 @@
7 #include <algorithm> 7 #include <algorithm>
8 #include <string> 8 #include <string>
9 9
10 -#define SHERPA_ONNX_ASSIGN_ATTR_STR(c_name, js_name) \  
11 - do { \  
12 - if (o.Has(#js_name) && o.Get(#js_name).IsString()) { \  
13 - Napi::String _str = o.Get(#js_name).As<Napi::String>(); \  
14 - std::string s = _str.Utf8Value(); \  
15 - char *p = new char[s.size() + 1]; \  
16 - std::copy(s.begin(), s.end(), p); \  
17 - p[s.size()] = 0; \  
18 - \  
19 - c.c_name = p; \  
20 - } \ 10 +#define SHERPA_ONNX_ASSIGN_ATTR_STR(c_name, js_name) \
  11 + do { \
  12 + if (o.Has(#js_name) && o.Get(#js_name).IsString()) { \
  13 + Napi::String _str = o.Get(#js_name).As<Napi::String>(); \
  14 + std::string s = _str.Utf8Value(); \
  15 + char *p = new char[s.size() + 1]; \
  16 + std::copy(s.begin(), s.end(), p); \
  17 + p[s.size()] = 0; \
  18 + \
  19 + c.c_name = p; \
  20 + } else if (o.Has(#js_name) && o.Get(#js_name).IsTypedArray()) { \
  21 + Napi::Uint8Array _array = o.Get(#js_name).As<Napi::Uint8Array>(); \
  22 + char *p = new char[_array.ElementLength() + 1]; \
  23 + std::copy(_array.Data(), _array.Data() + _array.ElementLength(), p); \
  24 + p[_array.ElementLength()] = '\0'; \
  25 + \
  26 + c.c_name = p; \
  27 + } \
21 } while (0) 28 } while (0)
22 29
23 #define SHERPA_ONNX_ASSIGN_ATTR_INT32(c_name, js_name) \ 30 #define SHERPA_ONNX_ASSIGN_ATTR_INT32(c_name, js_name) \