Fangjun Kuang
Committed by GitHub

Support passing utf-8 strings from JavaScript to C++. (#1355)

We first convert utf-16 strings to Uint8Array and then we
pass the array to C++.
1 { 1 {
2 "dependencies": { 2 "dependencies": {
3 - "sherpa-onnx-node": "^1.10.26" 3 + "sherpa-onnx-node": "^1.10.27"
4 } 4 }
5 } 5 }
@@ -3,6 +3,19 @@ const sherpa_onnx = require('sherpa-onnx-node'); @@ -3,6 +3,19 @@ const sherpa_onnx = require('sherpa-onnx-node');
3 3
4 // Please download test files from 4 // Please download test files from
5 // https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models 5 // https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
  6 +
  7 +
  8 +// If your path contains non-ascii characters, e.g., Chinese, you can use
  9 +// the following code
  10 +//
  11 +
  12 +// let encoder = new TextEncoder();
  13 +// let tokens = encoder.encode(
  14 +// './sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/测试.txt');
  15 +// let model = encoder.encode(
  16 +// './sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/测试.int8.onnx');
  17 +
  18 +
6 const config = { 19 const config = {
7 'featConfig': { 20 'featConfig': {
8 'sampleRate': 16000, 21 'sampleRate': 16000,
@@ -12,9 +25,11 @@ const config = { @@ -12,9 +25,11 @@ const config = {
12 'senseVoice': { 25 'senseVoice': {
13 'model': 26 'model':
14 './sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/model.int8.onnx', 27 './sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/model.int8.onnx',
  28 + // 'model': model,
15 'useInverseTextNormalization': 1, 29 'useInverseTextNormalization': 1,
16 }, 30 },
17 'tokens': './sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/tokens.txt', 31 'tokens': './sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/tokens.txt',
  32 + // 'tokens': tokens,
18 'numThreads': 2, 33 'numThreads': 2,
19 'provider': 'cpu', 34 'provider': 'cpu',
20 'debug': 1, 35 'debug': 1,
@@ -17,6 +17,13 @@ @@ -17,6 +17,13 @@
17 p[s.size()] = 0; \ 17 p[s.size()] = 0; \
18 \ 18 \
19 c.c_name = p; \ 19 c.c_name = p; \
  20 + } else if (o.Has(#js_name) && o.Get(#js_name).IsTypedArray()) { \
  21 + Napi::Uint8Array _array = o.Get(#js_name).As<Napi::Uint8Array>(); \
  22 + char *p = new char[_array.ElementLength() + 1]; \
  23 + std::copy(_array.Data(), _array.Data() + _array.ElementLength(), p); \
  24 + p[_array.ElementLength()] = '\0'; \
  25 + \
  26 + c.c_name = p; \
20 } \ 27 } \
21 } while (0) 28 } while (0)
22 29