Fangjun Kuang
Committed by GitHub

Add on-device tex-to-speech (TTS) demo for HarmonyOS (#1590)

正在显示 61 个修改的文件 包含 1930 行增加117 行删除
1 !build-profile.json5 1 !build-profile.json5
  2 +*.har
1 -export { readWave, readWaveFromBinary } from "libsherpa_onnx.so"; 1 +export {
  2 + listRawfileDir,
  3 + readWave,
  4 + readWaveFromBinary,
  5 +} from "libsherpa_onnx.so";
2 6
3 export { 7 export {
4 CircularBuffer, 8 CircularBuffer,
@@ -4,7 +4,7 @@ @@ -4,7 +4,7 @@
4 "externalNativeOptions": { 4 "externalNativeOptions": {
5 "path": "./src/main/cpp/CMakeLists.txt", 5 "path": "./src/main/cpp/CMakeLists.txt",
6 "arguments": "", 6 "arguments": "",
7 - "cppFlags": "", 7 + "cppFlags": "-std=c++17",
8 "abiFilters": [ 8 "abiFilters": [
9 "arm64-v8a", 9 "arm64-v8a",
10 "x86_64", 10 "x86_64",
@@ -2,6 +2,10 @@ @@ -2,6 +2,10 @@
2 cmake_minimum_required(VERSION 3.13.0) 2 cmake_minimum_required(VERSION 3.13.0)
3 project(myNpmLib) 3 project(myNpmLib)
4 4
  5 +if (NOT CMAKE_CXX_STANDARD)
  6 + set(CMAKE_CXX_STANDARD 17 CACHE STRING "The C++ version to use")
  7 +endif()
  8 +
5 # Disable warning about 9 # Disable warning about
6 # 10 #
7 # "The DOWNLOAD_EXTRACT_TIMESTAMP option was not given and policy CMP0135 is 11 # "The DOWNLOAD_EXTRACT_TIMESTAMP option was not given and policy CMP0135 is
@@ -46,6 +50,7 @@ add_library(sherpa_onnx SHARED @@ -46,6 +50,7 @@ add_library(sherpa_onnx SHARED
46 speaker-identification.cc 50 speaker-identification.cc
47 spoken-language-identification.cc 51 spoken-language-identification.cc
48 streaming-asr.cc 52 streaming-asr.cc
  53 + utils.cc
49 vad.cc 54 vad.cc
50 wave-reader.cc 55 wave-reader.cc
51 wave-writer.cc 56 wave-writer.cc
@@ -213,12 +213,13 @@ static Napi::Number OfflineTtsNumSpeakersWrapper( @@ -213,12 +213,13 @@ static Napi::Number OfflineTtsNumSpeakersWrapper(
213 return Napi::Number::New(env, num_speakers); 213 return Napi::Number::New(env, num_speakers);
214 } 214 }
215 215
  216 +// synchronous version
216 static Napi::Object OfflineTtsGenerateWrapper(const Napi::CallbackInfo &info) { 217 static Napi::Object OfflineTtsGenerateWrapper(const Napi::CallbackInfo &info) {
217 Napi::Env env = info.Env(); 218 Napi::Env env = info.Env();
218 219
219 if (info.Length() != 2) { 220 if (info.Length() != 2) {
220 std::ostringstream os; 221 std::ostringstream os;
221 - os << "Expect only 1 argument. Given: " << info.Length(); 222 + os << "Expect only 2 arguments. Given: " << info.Length();
222 223
223 Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException(); 224 Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();
224 225
@@ -298,8 +299,8 @@ static Napi::Object OfflineTtsGenerateWrapper(const Napi::CallbackInfo &info) { @@ -298,8 +299,8 @@ static Napi::Object OfflineTtsGenerateWrapper(const Napi::CallbackInfo &info) {
298 int32_t sid = obj.Get("sid").As<Napi::Number>().Int32Value(); 299 int32_t sid = obj.Get("sid").As<Napi::Number>().Int32Value();
299 float speed = obj.Get("speed").As<Napi::Number>().FloatValue(); 300 float speed = obj.Get("speed").As<Napi::Number>().FloatValue();
300 301
301 - const SherpaOnnxGeneratedAudio *audio =  
302 - SherpaOnnxOfflineTtsGenerate(tts, text.c_str(), sid, speed); 302 + const SherpaOnnxGeneratedAudio *audio;
  303 + audio = SherpaOnnxOfflineTtsGenerate(tts, text.c_str(), sid, speed);
303 304
304 if (enable_external_buffer) { 305 if (enable_external_buffer) {
305 Napi::ArrayBuffer arrayBuffer = Napi::ArrayBuffer::New( 306 Napi::ArrayBuffer arrayBuffer = Napi::ArrayBuffer::New(
@@ -334,6 +335,256 @@ static Napi::Object OfflineTtsGenerateWrapper(const Napi::CallbackInfo &info) { @@ -334,6 +335,256 @@ static Napi::Object OfflineTtsGenerateWrapper(const Napi::CallbackInfo &info) {
334 } 335 }
335 } 336 }
336 337
  338 +struct TtsCallbackData {
  339 + std::vector<float> samples;
  340 + float progress;
  341 + bool processed = false;
  342 + bool cancelled = false;
  343 +};
  344 +
  345 +// see
  346 +// https://github.com/nodejs/node-addon-examples/blob/main/src/6-threadsafe-function/typed_threadsafe_function/node-addon-api/clock.cc
  347 +void InvokeJsCallback(Napi::Env env, Napi::Function callback,
  348 + Napi::Reference<Napi::Value> *context,
  349 + TtsCallbackData *data) {
  350 + if (env != nullptr) {
  351 + if (callback != nullptr) {
  352 + Napi::ArrayBuffer arrayBuffer =
  353 + Napi::ArrayBuffer::New(env, sizeof(float) * data->samples.size());
  354 +
  355 + Napi::Float32Array float32Array =
  356 + Napi::Float32Array::New(env, data->samples.size(), arrayBuffer, 0);
  357 +
  358 + std::copy(data->samples.begin(), data->samples.end(),
  359 + float32Array.Data());
  360 +
  361 + Napi::Object arg = Napi::Object::New(env);
  362 + arg.Set(Napi::String::New(env, "samples"), float32Array);
  363 + arg.Set(Napi::String::New(env, "progress"), data->progress);
  364 +
  365 + auto v = callback.Call(context->Value(), {arg});
  366 + data->processed = true;
  367 + if (v.IsNumber() && v.As<Napi::Number>().Int32Value()) {
  368 + data->cancelled = false;
  369 + } else {
  370 + data->cancelled = true;
  371 + }
  372 + }
  373 + }
  374 +}
  375 +
  376 +using TSFN = Napi::TypedThreadSafeFunction<Napi::Reference<Napi::Value>,
  377 + TtsCallbackData, InvokeJsCallback>;
  378 +
  379 +class TtsGenerateWorker : public Napi::AsyncWorker {
  380 + public:
  381 + TtsGenerateWorker(const Napi::Env &env, TSFN tsfn, SherpaOnnxOfflineTts *tts,
  382 + const std::string &text, float speed, int32_t sid,
  383 + bool use_external_buffer)
  384 + : tsfn_(tsfn),
  385 + Napi::AsyncWorker{env, "TtsGenerateWorker"},
  386 + deferred_(env),
  387 + tts_(tts),
  388 + text_(text),
  389 + speed_(speed),
  390 + sid_(sid),
  391 + use_external_buffer_(use_external_buffer) {}
  392 +
  393 + Napi::Promise Promise() { return deferred_.Promise(); }
  394 +
  395 + ~TtsGenerateWorker() {
  396 + for (auto d : data_list_) {
  397 + delete d;
  398 + }
  399 + }
  400 +
  401 + protected:
  402 + void Execute() override {
  403 + auto callback = [](const float *samples, int32_t n, float progress,
  404 + void *arg) -> int32_t {
  405 + TtsGenerateWorker *_this = reinterpret_cast<TtsGenerateWorker *>(arg);
  406 +
  407 + for (auto d : _this->data_list_) {
  408 + if (d->cancelled) {
  409 + OH_LOG_INFO(LOG_APP, "TtsGenerate is cancelled");
  410 + return 0;
  411 + }
  412 + }
  413 +
  414 + auto data = new TtsCallbackData;
  415 + data->samples = std::vector<float>{samples, samples + n};
  416 + data->progress = progress;
  417 + _this->data_list_.push_back(data);
  418 +
  419 + _this->tsfn_.NonBlockingCall(data);
  420 +
  421 + return 1;
  422 + };
  423 + audio_ = SherpaOnnxOfflineTtsGenerateWithProgressCallbackWithArg(
  424 + tts_, text_.c_str(), sid_, speed_, callback, this);
  425 +
  426 + tsfn_.Release();
  427 + }
  428 +
  429 + void OnOK() override {
  430 + Napi::Env env = deferred_.Env();
  431 + Napi::Object ans = Napi::Object::New(env);
  432 + if (use_external_buffer_) {
  433 + Napi::ArrayBuffer arrayBuffer = Napi::ArrayBuffer::New(
  434 + env, const_cast<float *>(audio_->samples), sizeof(float) * audio_->n,
  435 + [](Napi::Env /*env*/, void * /*data*/,
  436 + const SherpaOnnxGeneratedAudio *hint) {
  437 + SherpaOnnxDestroyOfflineTtsGeneratedAudio(hint);
  438 + },
  439 + audio_);
  440 + Napi::Float32Array float32Array =
  441 + Napi::Float32Array::New(env, audio_->n, arrayBuffer, 0);
  442 +
  443 + ans.Set(Napi::String::New(env, "samples"), float32Array);
  444 + ans.Set(Napi::String::New(env, "sampleRate"), audio_->sample_rate);
  445 + } else {
  446 + // don't use external buffer
  447 + Napi::ArrayBuffer arrayBuffer =
  448 + Napi::ArrayBuffer::New(env, sizeof(float) * audio_->n);
  449 +
  450 + Napi::Float32Array float32Array =
  451 + Napi::Float32Array::New(env, audio_->n, arrayBuffer, 0);
  452 +
  453 + std::copy(audio_->samples, audio_->samples + audio_->n,
  454 + float32Array.Data());
  455 +
  456 + ans.Set(Napi::String::New(env, "samples"), float32Array);
  457 + ans.Set(Napi::String::New(env, "sampleRate"), audio_->sample_rate);
  458 + SherpaOnnxDestroyOfflineTtsGeneratedAudio(audio_);
  459 + }
  460 +
  461 + deferred_.Resolve(ans);
  462 + }
  463 +
  464 + private:
  465 + TSFN tsfn_;
  466 + Napi::Promise::Deferred deferred_;
  467 + SherpaOnnxOfflineTts *tts_;
  468 + std::string text_;
  469 + float speed_;
  470 + int32_t sid_;
  471 + bool use_external_buffer_;
  472 +
  473 + const SherpaOnnxGeneratedAudio *audio_;
  474 +
  475 + std::vector<TtsCallbackData *> data_list_;
  476 +};
  477 +
  478 +static Napi::Object OfflineTtsGenerateAsyncWrapper(
  479 + const Napi::CallbackInfo &info) {
  480 + Napi::Env env = info.Env();
  481 +
  482 + if (info.Length() != 2) {
  483 + std::ostringstream os;
  484 + os << "Expect only 2 arguments. Given: " << info.Length();
  485 +
  486 + Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();
  487 +
  488 + return {};
  489 + }
  490 +
  491 + if (!info[0].IsExternal()) {
  492 + Napi::TypeError::New(env, "Argument 0 should be an offline tts pointer.")
  493 + .ThrowAsJavaScriptException();
  494 +
  495 + return {};
  496 + }
  497 +
  498 + SherpaOnnxOfflineTts *tts =
  499 + info[0].As<Napi::External<SherpaOnnxOfflineTts>>().Data();
  500 +
  501 + if (!info[1].IsObject()) {
  502 + Napi::TypeError::New(env, "Argument 1 should be an object")
  503 + .ThrowAsJavaScriptException();
  504 +
  505 + return {};
  506 + }
  507 +
  508 + Napi::Object obj = info[1].As<Napi::Object>();
  509 +
  510 + if (!obj.Has("text")) {
  511 + Napi::TypeError::New(env, "The argument object should have a field text")
  512 + .ThrowAsJavaScriptException();
  513 +
  514 + return {};
  515 + }
  516 +
  517 + if (!obj.Get("text").IsString()) {
  518 + Napi::TypeError::New(env, "The object['text'] should be a string")
  519 + .ThrowAsJavaScriptException();
  520 +
  521 + return {};
  522 + }
  523 +
  524 + if (!obj.Has("sid")) {
  525 + Napi::TypeError::New(env, "The argument object should have a field sid")
  526 + .ThrowAsJavaScriptException();
  527 +
  528 + return {};
  529 + }
  530 +
  531 + if (!obj.Get("sid").IsNumber()) {
  532 + Napi::TypeError::New(env, "The object['sid'] should be a number")
  533 + .ThrowAsJavaScriptException();
  534 +
  535 + return {};
  536 + }
  537 +
  538 + if (!obj.Has("speed")) {
  539 + Napi::TypeError::New(env, "The argument object should have a field speed")
  540 + .ThrowAsJavaScriptException();
  541 +
  542 + return {};
  543 + }
  544 +
  545 + if (!obj.Get("speed").IsNumber()) {
  546 + Napi::TypeError::New(env, "The object['speed'] should be a number")
  547 + .ThrowAsJavaScriptException();
  548 +
  549 + return {};
  550 + }
  551 +
  552 + bool enable_external_buffer = true;
  553 + if (obj.Has("enableExternalBuffer") &&
  554 + obj.Get("enableExternalBuffer").IsBoolean()) {
  555 + enable_external_buffer =
  556 + obj.Get("enableExternalBuffer").As<Napi::Boolean>().Value();
  557 + }
  558 +
  559 + Napi::String _text = obj.Get("text").As<Napi::String>();
  560 + std::string text = _text.Utf8Value();
  561 + int32_t sid = obj.Get("sid").As<Napi::Number>().Int32Value();
  562 + float speed = obj.Get("speed").As<Napi::Number>().FloatValue();
  563 +
  564 + Napi::Function cb;
  565 + if (obj.Has("callback") && obj.Get("callback").IsFunction()) {
  566 + cb = obj.Get("callback").As<Napi::Function>();
  567 + }
  568 +
  569 + auto context =
  570 + new Napi::Reference<Napi::Value>(Napi::Persistent(info.This()));
  571 +
  572 + TSFN tsfn = TSFN::New(
  573 + env,
  574 + cb, // JavaScript function called asynchronously
  575 + "TtsGenerateFunc", // Name
  576 + 0, // Unlimited queue
  577 + 1, // Only one thread will use this initially
  578 + context,
  579 + [](Napi::Env, void *, Napi::Reference<Napi::Value> *ctx) { delete ctx; });
  580 +
  581 + const SherpaOnnxGeneratedAudio *audio;
  582 + TtsGenerateWorker *worker = new TtsGenerateWorker(
  583 + env, tsfn, tts, text, speed, sid, enable_external_buffer);
  584 + worker->Queue();
  585 + return worker->Promise();
  586 +}
  587 +
337 void InitNonStreamingTts(Napi::Env env, Napi::Object exports) { 588 void InitNonStreamingTts(Napi::Env env, Napi::Object exports) {
338 exports.Set(Napi::String::New(env, "createOfflineTts"), 589 exports.Set(Napi::String::New(env, "createOfflineTts"),
339 Napi::Function::New(env, CreateOfflineTtsWrapper)); 590 Napi::Function::New(env, CreateOfflineTtsWrapper));
@@ -346,4 +597,7 @@ void InitNonStreamingTts(Napi::Env env, Napi::Object exports) { @@ -346,4 +597,7 @@ void InitNonStreamingTts(Napi::Env env, Napi::Object exports) {
346 597
347 exports.Set(Napi::String::New(env, "offlineTtsGenerate"), 598 exports.Set(Napi::String::New(env, "offlineTtsGenerate"),
348 Napi::Function::New(env, OfflineTtsGenerateWrapper)); 599 Napi::Function::New(env, OfflineTtsGenerateWrapper));
  600 +
  601 + exports.Set(Napi::String::New(env, "offlineTtsGenerateAsync"),
  602 + Napi::Function::New(env, OfflineTtsGenerateAsyncWrapper));
349 } 603 }
@@ -27,6 +27,10 @@ void InitKeywordSpotting(Napi::Env env, Napi::Object exports); @@ -27,6 +27,10 @@ void InitKeywordSpotting(Napi::Env env, Napi::Object exports);
27 27
28 void InitNonStreamingSpeakerDiarization(Napi::Env env, Napi::Object exports); 28 void InitNonStreamingSpeakerDiarization(Napi::Env env, Napi::Object exports);
29 29
  30 +#if __OHOS__
  31 +void InitUtils(Napi::Env env, Napi::Object exports);
  32 +#endif
  33 +
30 Napi::Object Init(Napi::Env env, Napi::Object exports) { 34 Napi::Object Init(Napi::Env env, Napi::Object exports) {
31 InitStreamingAsr(env, exports); 35 InitStreamingAsr(env, exports);
32 InitNonStreamingAsr(env, exports); 36 InitNonStreamingAsr(env, exports);
@@ -41,7 +45,15 @@ Napi::Object Init(Napi::Env env, Napi::Object exports) { @@ -41,7 +45,15 @@ Napi::Object Init(Napi::Env env, Napi::Object exports) {
41 InitKeywordSpotting(env, exports); 45 InitKeywordSpotting(env, exports);
42 InitNonStreamingSpeakerDiarization(env, exports); 46 InitNonStreamingSpeakerDiarization(env, exports);
43 47
  48 +#if __OHOS__
  49 + InitUtils(env, exports);
  50 +#endif
  51 +
44 return exports; 52 return exports;
45 } 53 }
46 54
  55 +#if __OHOS__
  56 +NODE_API_MODULE(sherpa_onnx, Init)
  57 +#else
47 NODE_API_MODULE(addon, Init) 58 NODE_API_MODULE(addon, Init)
  59 +#endif
  1 +export const listRawfileDir: (mgr: object, dir: string) => Array<string>;
  2 +
1 export const readWave: (filename: string, enableExternalBuffer: boolean = true) => {samples: Float32Array, sampleRate: number}; 3 export const readWave: (filename: string, enableExternalBuffer: boolean = true) => {samples: Float32Array, sampleRate: number};
2 export const readWaveFromBinary: (data: Uint8Array, enableExternalBuffer: boolean = true) => {samples: Float32Array, sampleRate: number}; 4 export const readWaveFromBinary: (data: Uint8Array, enableExternalBuffer: boolean = true) => {samples: Float32Array, sampleRate: number};
3 export const createCircularBuffer: (capacity: number) => object; 5 export const createCircularBuffer: (capacity: number) => object;
@@ -37,4 +39,11 @@ export const getOnlineStreamResultAsJson: (handle: object, streamHandle: object) @@ -37,4 +39,11 @@ export const getOnlineStreamResultAsJson: (handle: object, streamHandle: object)
37 export const createOfflineTts: (config: object, mgr?: object) => object; 39 export const createOfflineTts: (config: object, mgr?: object) => object;
38 export const getOfflineTtsNumSpeakers: (handle: object) => number; 40 export const getOfflineTtsNumSpeakers: (handle: object) => number;
39 export const getOfflineTtsSampleRate: (handle: object) => number; 41 export const getOfflineTtsSampleRate: (handle: object) => number;
40 -export const offlineTtsGenerate: (handle: object, input: object) => object; 42 +
  43 +export type TtsOutput = {
  44 + samples: Float32Array;
  45 + sampleRate: number;
  46 +};
  47 +
  48 +export const offlineTtsGenerate: (handle: object, input: object) => TtsOutput;
  49 +export const offlineTtsGenerateAsync: (handle: object, input: object) => Promise<TtsOutput>;
  1 +// Copyright (c) 2024 Xiaomi Corporation
  2 +
  3 +#include <memory>
  4 +#include <sstream>
  5 +#include <string>
  6 +#include <vector>
  7 +
  8 +#include "macros.h" // NOLINT
  9 +#include "napi.h" // NOLINT
  10 +
  11 +static std::vector<std::string> GetFilenames(NativeResourceManager *mgr,
  12 + const std::string &d) {
  13 + std::unique_ptr<RawDir, decltype(&OH_ResourceManager_CloseRawDir)> raw_dir(
  14 + OH_ResourceManager_OpenRawDir(mgr, d.c_str()),
  15 + &OH_ResourceManager_CloseRawDir);
  16 + int count = OH_ResourceManager_GetRawFileCount(raw_dir.get());
  17 + std::vector<std::string> ans;
  18 + ans.reserve(count);
  19 + for (int32_t i = 0; i < count; ++i) {
  20 + std::string filename = OH_ResourceManager_GetRawFileName(raw_dir.get(), i);
  21 + bool is_dir = OH_ResourceManager_IsRawDir(
  22 + mgr, d.empty() ? filename.c_str() : (d + "/" + filename).c_str());
  23 + if (is_dir) {
  24 + auto files = GetFilenames(mgr, d.empty() ? filename : d + "/" + filename);
  25 + for (auto &f : files) {
  26 + ans.push_back(std::move(f));
  27 + }
  28 + } else {
  29 + if (d.empty()) {
  30 + ans.push_back(std::move(filename));
  31 + } else {
  32 + ans.push_back(d + "/" + filename);
  33 + }
  34 + }
  35 + }
  36 +
  37 + return ans;
  38 +}
  39 +
  40 +static Napi::Array ListRawFileDir(const Napi::CallbackInfo &info) {
  41 + Napi::Env env = info.Env();
  42 +
  43 + if (info.Length() != 2) {
  44 + std::ostringstream os;
  45 + os << "Expect only 2 arguments. Given: " << info.Length();
  46 +
  47 + Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();
  48 +
  49 + return {};
  50 + }
  51 +
  52 + std::unique_ptr<NativeResourceManager,
  53 + decltype(&OH_ResourceManager_ReleaseNativeResourceManager)>
  54 + mgr(OH_ResourceManager_InitNativeResourceManager(env, info[0]),
  55 + &OH_ResourceManager_ReleaseNativeResourceManager);
  56 +
  57 + if (!info[1].IsString()) {
  58 + Napi::TypeError::New(env, "Argument 1 should be a string")
  59 + .ThrowAsJavaScriptException();
  60 +
  61 + return {};
  62 + }
  63 +
  64 + std::string dir = info[1].As<Napi::String>().Utf8Value();
  65 +
  66 + auto files = GetFilenames(mgr.get(), dir);
  67 + Napi::Array ans = Napi::Array::New(env, files.size());
  68 + for (int32_t i = 0; i != files.size(); ++i) {
  69 + ans[i] = Napi::String::New(env, files[i]);
  70 + }
  71 + return ans;
  72 +}
  73 +void InitUtils(Napi::Env env, Napi::Object exports) {
  74 + exports.Set(Napi::String::New(env, "listRawfileDir"),
  75 + Napi::Function::New(env, ListRawFileDir));
  76 +}
@@ -3,6 +3,7 @@ import { @@ -3,6 +3,7 @@ import {
3 getOfflineTtsNumSpeakers, 3 getOfflineTtsNumSpeakers,
4 getOfflineTtsSampleRate, 4 getOfflineTtsSampleRate,
5 offlineTtsGenerate, 5 offlineTtsGenerate,
  6 + offlineTtsGenerateAsync,
6 } from "libsherpa_onnx.so"; 7 } from "libsherpa_onnx.so";
7 8
8 export class OfflineTtsVitsModelConfig { 9 export class OfflineTtsVitsModelConfig {
@@ -16,14 +17,14 @@ export class OfflineTtsVitsModelConfig { @@ -16,14 +17,14 @@ export class OfflineTtsVitsModelConfig {
16 public lengthScale: number = 1.0; 17 public lengthScale: number = 1.0;
17 } 18 }
18 19
19 -export class OfflineTtsModelConfig{ 20 +export class OfflineTtsModelConfig {
20 public vits: OfflineTtsVitsModelConfig = new OfflineTtsVitsModelConfig(); 21 public vits: OfflineTtsVitsModelConfig = new OfflineTtsVitsModelConfig();
21 public numThreads: number = 1; 22 public numThreads: number = 1;
22 public debug: boolean = false; 23 public debug: boolean = false;
23 public provider: string = 'cpu'; 24 public provider: string = 'cpu';
24 } 25 }
25 26
26 -export class OfflineTtsConfig{ 27 +export class OfflineTtsConfig {
27 public model: OfflineTtsModelConfig = new OfflineTtsModelConfig(); 28 public model: OfflineTtsModelConfig = new OfflineTtsModelConfig();
28 public ruleFsts: string = ''; 29 public ruleFsts: string = '';
29 public ruleFars: string = ''; 30 public ruleFars: string = '';
@@ -35,17 +36,24 @@ export class TtsOutput { @@ -35,17 +36,24 @@ export class TtsOutput {
35 public sampleRate: number = 0; 36 public sampleRate: number = 0;
36 } 37 }
37 38
  39 +interface TtsCallbackData {
  40 + samples: Float32Array;
  41 + progress: number;
  42 +}
  43 +
38 export class TtsInput { 44 export class TtsInput {
39 public text: string = ''; 45 public text: string = '';
40 public sid: number = 0; 46 public sid: number = 0;
41 public speed: number = 1.0; 47 public speed: number = 1.0;
  48 + public callback?: (data: TtsCallbackData) => number;
42 } 49 }
43 50
44 export class OfflineTts { 51 export class OfflineTts {
45 - private handle: object;  
46 public config: OfflineTtsConfig; 52 public config: OfflineTtsConfig;
47 public numSpeakers: number; 53 public numSpeakers: number;
48 public sampleRate: number; 54 public sampleRate: number;
  55 + private handle: object;
  56 +
49 constructor(config: OfflineTtsConfig, mgr?: object) { 57 constructor(config: OfflineTtsConfig, mgr?: object) {
50 this.handle = createOfflineTts(config, mgr); 58 this.handle = createOfflineTts(config, mgr);
51 this.config = config; 59 this.config = config;
@@ -63,4 +71,8 @@ export class OfflineTts { @@ -63,4 +71,8 @@ export class OfflineTts {
63 generate(input: TtsInput): TtsOutput { 71 generate(input: TtsInput): TtsOutput {
64 return offlineTtsGenerate(this.handle, input) as TtsOutput; 72 return offlineTtsGenerate(this.handle, input) as TtsOutput;
65 } 73 }
  74 +
  75 + generateAsync(input: TtsInput): Promise<TtsOutput> {
  76 + return offlineTtsGenerateAsync(this.handle, input);
  77 + }
66 } 78 }
@@ -57,7 +57,6 @@ export class CircularBuffer { @@ -57,7 +57,6 @@ export class CircularBuffer {
57 57
58 // samples is a float32 array 58 // samples is a float32 array
59 push(samples: Float32Array) { 59 push(samples: Float32Array) {
60 - console.log(`here samples: ${samples}`);  
61 circularBufferPush(this.handle, samples); 60 circularBufferPush(this.handle, samples);
62 } 61 }
63 62
  1 +/node_modules
  2 +/oh_modules
  3 +/local.properties
  4 +/.idea
  5 +**/build
  6 +/.hvigor
  7 +.cxx
  8 +/.clangd
  9 +/.clang-format
  10 +/.clang-tidy
  11 +**/.test
  12 +/.appanalyzer
  1 +{
  2 + "app": {
  3 + "bundleName": "com.k2fsa.sherpa.onnx.tts",
  4 + "vendor": "next-gen Kaldi",
  5 + "versionCode": 1000000,
  6 + "versionName": "1.0.0",
  7 + "icon": "$media:app_icon",
  8 + "label": "$string:app_name"
  9 + }
  10 +}
  1 +{
  2 + "string": [
  3 + {
  4 + "name": "app_name",
  5 + "value": "SherpaOnnxTts"
  6 + }
  7 + ]
  8 +}
  1 +{
  2 + "app": {
  3 + "signingConfigs": [],
  4 + "products": [
  5 + {
  6 + "name": "default",
  7 + "signingConfig": "default",
  8 + "compatibleSdkVersion": "4.0.0(10)",
  9 + "runtimeOS": "HarmonyOS",
  10 + "buildOption": {
  11 + "strictMode": {
  12 + "caseSensitiveCheck": true,
  13 + }
  14 + }
  15 + }
  16 + ],
  17 + "buildModeSet": [
  18 + {
  19 + "name": "debug",
  20 + },
  21 + {
  22 + "name": "release"
  23 + }
  24 + ]
  25 + },
  26 + "modules": [
  27 + {
  28 + "name": "entry",
  29 + "srcPath": "./entry",
  30 + "targets": [
  31 + {
  32 + "name": "default",
  33 + "applyToProducts": [
  34 + "default"
  35 + ]
  36 + }
  37 + ]
  38 + }
  39 + ]
  40 +}
  1 +{
  2 + "files": [
  3 + "**/*.ets"
  4 + ],
  5 + "ignore": [
  6 + "**/src/ohosTest/**/*",
  7 + "**/src/test/**/*",
  8 + "**/src/mock/**/*",
  9 + "**/node_modules/**/*",
  10 + "**/oh_modules/**/*",
  11 + "**/build/**/*",
  12 + "**/.preview/**/*"
  13 + ],
  14 + "ruleSet": [
  15 + "plugin:@performance/recommended",
  16 + "plugin:@typescript-eslint/recommended"
  17 + ],
  18 + "rules": {
  19 + }
  20 +}
  1 +/node_modules
  2 +/oh_modules
  3 +/.preview
  4 +/build
  5 +/.cxx
  6 +/.test
  1 +{
  2 + "apiType": "stageMode",
  3 + "buildOption": {
  4 + "sourceOption": {
  5 + "workers": [
  6 + "./src/main/ets/workers/NonStreamingTtsWorker.ets"
  7 + ]
  8 + }
  9 + },
  10 + "buildOptionSet": [
  11 + {
  12 + "name": "release",
  13 + "arkOptions": {
  14 + "obfuscation": {
  15 + "ruleOptions": {
  16 + "enable": false,
  17 + "files": [
  18 + "./obfuscation-rules.txt"
  19 + ]
  20 + }
  21 + }
  22 + }
  23 + },
  24 + ],
  25 + "targets": [
  26 + {
  27 + "name": "default"
  28 + },
  29 + {
  30 + "name": "ohosTest",
  31 + }
  32 + ]
  33 +}
  1 +import { hapTasks } from '@ohos/hvigor-ohos-plugin';
  2 +
  3 +export default {
  4 + system: hapTasks, /* Built-in plugin of Hvigor. It cannot be modified. */
  5 + plugins:[] /* Custom plugin to extend the functionality of Hvigor. */
  6 +}
  1 +# Define project specific obfuscation rules here.
  2 +# You can include the obfuscation configuration files in the current module's build-profile.json5.
  3 +#
  4 +# For more details, see
  5 +# https://developer.huawei.com/consumer/cn/doc/harmonyos-guides-V5/source-obfuscation-V5
  6 +
  7 +# Obfuscation options:
  8 +# -disable-obfuscation: disable all obfuscations
  9 +# -enable-property-obfuscation: obfuscate the property names
  10 +# -enable-toplevel-obfuscation: obfuscate the names in the global scope
  11 +# -compact: remove unnecessary blank spaces and all line feeds
  12 +# -remove-log: remove all console.* statements
  13 +# -print-namecache: print the name cache that contains the mapping from the old names to new names
  14 +# -apply-namecache: reuse the given cache file
  15 +
  16 +# Keep options:
  17 +# -keep-property-name: specifies property names that you want to keep
  18 +# -keep-global-name: specifies names that you want to keep in the global scope
  19 +
  20 +-enable-property-obfuscation
  21 +-enable-toplevel-obfuscation
  22 +-enable-filename-obfuscation
  23 +-enable-export-obfuscation
  1 +{
  2 + "meta": {
  3 + "stableOrder": true
  4 + },
  5 + "lockfileVersion": 3,
  6 + "ATTENTION": "THIS IS AN AUTOGENERATED FILE. DO NOT EDIT THIS FILE DIRECTLY.",
  7 + "specifiers": {
  8 + "libsherpa_onnx.so@../oh_modules/.ohpm/sherpa_onnx@1.10.32/oh_modules/sherpa_onnx/src/main/cpp/types/libsherpa_onnx": "libsherpa_onnx.so@../oh_modules/.ohpm/sherpa_onnx@1.10.32/oh_modules/sherpa_onnx/src/main/cpp/types/libsherpa_onnx",
  9 + "sherpa_onnx@1.10.32": "sherpa_onnx@1.10.32"
  10 + },
  11 + "packages": {
  12 + "libsherpa_onnx.so@../oh_modules/.ohpm/sherpa_onnx@1.10.32/oh_modules/sherpa_onnx/src/main/cpp/types/libsherpa_onnx": {
  13 + "name": "libsherpa_onnx.so",
  14 + "version": "1.0.0",
  15 + "resolved": "../oh_modules/.ohpm/sherpa_onnx@1.10.32/oh_modules/sherpa_onnx/src/main/cpp/types/libsherpa_onnx",
  16 + "registryType": "local"
  17 + },
  18 + "sherpa_onnx@1.10.32": {
  19 + "name": "sherpa_onnx",
  20 + "version": "1.10.32",
  21 + "integrity": "sha512-yHYmWoeqhrunOqGr9gxPJJH/8+rdwcKFOW6onYByVObQVpbqypslg301IjGm9xpnc5bJEkO3S9sra2zQTpPA/w==",
  22 + "resolved": "https://ohpm.openharmony.cn/ohpm/sherpa_onnx/-/sherpa_onnx-1.10.32.har",
  23 + "registryType": "ohpm",
  24 + "dependencies": {
  25 + "libsherpa_onnx.so": "file:./src/main/cpp/types/libsherpa_onnx"
  26 + }
  27 + }
  28 + }
  29 +}
  1 +{
  2 + "name": "entry",
  3 + "version": "1.0.0",
  4 + "description": "Please describe the basic information.",
  5 + "main": "",
  6 + "author": "",
  7 + "license": "",
  8 + "dependencies": {
  9 + "sherpa_onnx": "1.10.32",
  10 + }
  11 +}
  12 +
  1 +import AbilityConstant from '@ohos.app.ability.AbilityConstant';
  2 +import hilog from '@ohos.hilog';
  3 +import UIAbility from '@ohos.app.ability.UIAbility';
  4 +import Want from '@ohos.app.ability.Want';
  5 +import window from '@ohos.window';
  6 +
  7 +export default class EntryAbility extends UIAbility {
  8 + onCreate(want: Want, launchParam: AbilityConstant.LaunchParam): void {
  9 + hilog.info(0x0000, 'testTag', '%{public}s', 'Ability onCreate');
  10 + }
  11 +
  12 + onDestroy(): void {
  13 + hilog.info(0x0000, 'testTag', '%{public}s', 'Ability onDestroy');
  14 + }
  15 +
  16 + onWindowStageCreate(windowStage: window.WindowStage): void {
  17 + // Main window is created, set main page for this ability
  18 + hilog.info(0x0000, 'testTag', '%{public}s', 'Ability onWindowStageCreate');
  19 +
  20 + windowStage.loadContent('pages/Index', (err) => {
  21 + if (err.code) {
  22 + hilog.error(0x0000, 'testTag', 'Failed to load the content. Cause: %{public}s', JSON.stringify(err) ?? '');
  23 + return;
  24 + }
  25 + hilog.info(0x0000, 'testTag', 'Succeeded in loading the content.');
  26 + });
  27 + }
  28 +
  29 + onWindowStageDestroy(): void {
  30 + // Main window is destroyed, release UI related resources
  31 + hilog.info(0x0000, 'testTag', '%{public}s', 'Ability onWindowStageDestroy');
  32 + }
  33 +
  34 + onForeground(): void {
  35 + // Ability has brought to foreground
  36 + hilog.info(0x0000, 'testTag', '%{public}s', 'Ability onForeground');
  37 + }
  38 +
  39 + onBackground(): void {
  40 + // Ability has back to background
  41 + hilog.info(0x0000, 'testTag', '%{public}s', 'Ability onBackground');
  42 + }
  43 +}
  1 +import hilog from '@ohos.hilog';
  2 +import BackupExtensionAbility, { BundleVersion } from '@ohos.application.BackupExtensionAbility';
  3 +
  4 +export default class EntryBackupAbility extends BackupExtensionAbility {
  5 + async onBackup() {
  6 + hilog.info(0x0000, 'testTag', 'onBackup ok');
  7 + }
  8 +
  9 + async onRestore(bundleVersion: BundleVersion) {
  10 + hilog.info(0x0000, 'testTag', 'onRestore ok %{public}s', JSON.stringify(bundleVersion));
  11 + }
  12 +}
  1 +import { CircularBuffer } from 'sherpa_onnx';
  2 +import worker, { MessageEvents } from '@ohos.worker';
  3 +import { audio } from '@kit.AudioKit';
  4 +import picker from '@ohos.file.picker';
  5 +import fs from '@ohos.file.fs';
  6 +import systemTime from '@ohos.systemTime';
  7 +
  8 +
  9 +function savePcmToWav(filename: string, samples: Int16Array, sampleRate: number) {
  10 + const fp = fs.openSync(filename, fs.OpenMode.READ_WRITE | fs.OpenMode.CREATE);
  11 +
  12 + const header = new ArrayBuffer(44);
  13 + const view = new DataView(header);
  14 +
  15 + // http://soundfile.sapp.org/doc/WaveFormat/
  16 + // F F I R
  17 + view.setUint32(0, 0x46464952, true); // chunkID
  18 + view.setUint32(4, 36 + samples.length * 2, true); // chunkSize // E V A W
  19 + view.setUint32(8, 0x45564157, true); // format // // t m f
  20 + view.setUint32(12, 0x20746d66, true); // subchunk1ID
  21 + view.setUint32(16, 16, true); // subchunk1Size, 16 for PCM
  22 + view.setUint32(20, 1, true); // audioFormat, 1 for PCM
  23 + view.setUint16(22, 1, true); // numChannels: 1 channel
  24 + view.setUint32(24, sampleRate, true); // sampleRate
  25 + view.setUint32(28, sampleRate * 2, true); // byteRate
  26 + view.setUint16(32, 2, true); // blockAlign
  27 + view.setUint16(34, 16, true); // bitsPerSample
  28 + view.setUint32(36, 0x61746164, true); // Subchunk2ID
  29 + view.setUint32(40, samples.length * 2, true); // subchunk2Size
  30 +
  31 + fs.writeSync(fp.fd, new Uint8Array(header).buffer, { length: header.byteLength });
  32 + fs.writeSync(fp.fd, samples.buffer, { length: samples.buffer.byteLength });
  33 +
  34 + fs.closeSync(fp.fd);
  35 +}
  36 +
  37 +function toInt16Samples(samples: Float32Array): Int16Array {
  38 + const int16Samples = new Int16Array(samples.length);
  39 + for (let i = 0; i < samples.length; ++i) {
  40 + let s = samples[i] * 32767;
  41 + s = s > 32767 ? 32767 : s;
  42 + s = s < -32768 ? -32768 : s;
  43 + int16Samples[i] = s;
  44 + }
  45 +
  46 + return int16Samples;
  47 +}
  48 +
  49 +
  50 +@Entry
  51 +@Component
  52 +struct Index {
  53 + @State currentIndex: number = 0;
  54 + @State title: string = 'Next-gen Kaldi: Text-to-speech';
  55 + @State info: string = '';
  56 + @State btnStartCaption: string = 'Start';
  57 + @State btnStartEnabled: boolean = false;
  58 + @State btnStopCaption: string = 'Stop';
  59 + @State btnStopEnabled: boolean = false;
  60 + @State btnSaveCaption: string = 'Save';
  61 + @State btnSaveEnabled: boolean = false;
  62 + @State progress: number = 0;
  63 + @State sid: string = '0';
  64 + @State speechSpeed: string = '1.0';
  65 + @State isGenerating: boolean = false;
  66 + @State initTtsDone: boolean = false;
  67 + @State ttsGeneratedDone: boolean = true;
  68 + @State numSpeakers: number = 1;
  69 + @State initAudioDone: boolean = false;
  70 + private controller: TabsController = new TabsController();
  71 + private cancelled: boolean = false;
  72 + private sampleRate: number = 0;
  73 + private startTime: number = 0;
  74 + private stopTime: number = 0;
  75 + private inputText: string = '';
  76 + // it specifies only the initial capacity.
  77 + private workerInstance?: worker.ThreadWorker
  78 + private readonly scriptURL: string = 'entry/ets/workers/NonStreamingTtsWorker.ets'
  79 + // note that circular buffer can automatically resize.
  80 + private sampleBuffer: CircularBuffer = new CircularBuffer(16000 * 5);
  81 + private finalSamples: Float32Array | null = null;
  82 + private audioRenderer: audio.AudioRenderer | null = null;
  83 +
  84 + initAudioRenderer() {
  85 + if (this.audioRenderer) {
  86 + console.log(`Audio renderer has already been created. Skip creating`);
  87 + return;
  88 + } // see // https://developer.huawei.com/consumer/cn/doc/harmonyos-guides-V5/using-audiorenderer-for-playback-V5
  89 + console.log('Initializing audio renderer');
  90 + const audioStreamInfo: audio.AudioStreamInfo = {
  91 + samplingRate: this.sampleRate,
  92 + channels: audio.AudioChannel.CHANNEL_1, // 通道
  93 + sampleFormat: audio.AudioSampleFormat.SAMPLE_FORMAT_S16LE,
  94 + encodingType: audio.AudioEncodingType.ENCODING_TYPE_RAW
  95 + };
  96 +
  97 + const audioRendererInfo: audio.AudioRendererInfo = {
  98 + usage: audio.StreamUsage.STREAM_USAGE_MUSIC, rendererFlags: 0
  99 + };
  100 +
  101 + const audioRendererOptions: audio.AudioRendererOptions = {
  102 + streamInfo: audioStreamInfo, rendererInfo: audioRendererInfo
  103 + };
  104 +
  105 + audio.createAudioRenderer(audioRendererOptions, (err, renderer) => {
  106 + if (!err) {
  107 + console.log('audio renderer initialized successfully');
  108 + this.initAudioDone = true;
  109 + if (renderer) {
  110 + this.audioRenderer = renderer;
  111 + this.audioRenderer.on("writeData", this.audioPlayCallback);
  112 + if (this.sampleBuffer.size()) {
  113 + this.audioRenderer.start();
  114 + }
  115 + } else {
  116 + console.log(`returned audio renderer is ${renderer}`);
  117 + }
  118 + } else {
  119 + console.log(`Failed to initialize audio renderer. error message: ${err.message}, error code: ${err.code}`);
  120 + }
  121 + });
  122 + }
  123 +
  124 + async aboutToAppear() {
  125 + this.initAudioRenderer();
  126 +
  127 + this.workerInstance = new worker.ThreadWorker(this.scriptURL, {
  128 + name: 'NonStreaming TTS worker'
  129 + });
  130 + this.workerInstance.onmessage = (e: MessageEvents) => {
  131 + const msgType = e.data['msgType'] as string;
  132 + console.log(`received msg from worker: ${msgType}`);
  133 +
  134 + if (msgType == 'init-tts-done') {
  135 + this.info = 'Model initialized!\nPlease enter text and press start.';
  136 + this.sampleRate = e.data['sampleRate'] as number;
  137 + this.numSpeakers = e.data['numSpeakers'] as number;
  138 +
  139 + this.initTtsDone = true;
  140 + }
  141 +
  142 + if (msgType == 'tts-generate-partial') {
  143 + if (this.cancelled) {
  144 + return;
  145 + }
  146 +
  147 + const samples: Float32Array = e.data['samples'] as Float32Array;
  148 + const progress: number = e.data['progress'] as number;
  149 + this.progress = progress;
  150 +
  151 + this.sampleBuffer.push(samples);
  152 +
  153 + if (!this.initAudioDone) {
  154 + this.initAudioRenderer();
  155 + }
  156 +
  157 + if (this.audioRenderer && this.audioRenderer?.state != audio.AudioState.STATE_RUNNING) {
  158 + this.audioRenderer.start();
  159 + }
  160 + }
  161 +
  162 + if (msgType == 'tts-generate-done') {
  163 + this.isGenerating = false;
  164 + const samples: Float32Array = e.data['samples'] as Float32Array;
  165 +
  166 + systemTime.getRealTime((err, data) => {
  167 +
  168 + if (err) {
  169 + console.log(`Failed to get stop time`)
  170 + } else {
  171 + this.stopTime = data;
  172 +
  173 + const audioDuration = samples.length / this.sampleRate;
  174 + const elapsedSeconds = (this.stopTime - this.startTime) / 1000;
  175 + const RTF = elapsedSeconds / audioDuration;
  176 +
  177 + this.info = `Audio duration: ${audioDuration} s
  178 +Elapsed: ${elapsedSeconds} s
  179 +RTF = ${elapsedSeconds.toFixed(2)}/${audioDuration.toFixed(2)} = ${RTF.toFixed(3)}
  180 +`;
  181 + if (this.cancelled) {
  182 + this.info += '\nCancelled.';
  183 + }
  184 + }
  185 + });
  186 +
  187 + this.finalSamples = samples;
  188 + this.ttsGeneratedDone = true;
  189 + this.btnSaveEnabled = true;
  190 +
  191 + this.ttsGeneratedDone = true;
  192 +
  193 + if (this.audioRenderer && this.audioRenderer?.state != audio.AudioState.STATE_RUNNING &&
  194 + this.sampleBuffer.size() == 0) {
  195 + this.sampleBuffer.push(samples);
  196 + this.progress = 1;
  197 + this.audioRenderer.start();
  198 + }
  199 +
  200 + if (!this.initAudioDone) {
  201 + this.btnStartEnabled = true;
  202 + this.btnStopEnabled = false;
  203 + this.info += '\nAudio renderer is not initialized. Disable playing audio.';
  204 + }
  205 + }
  206 + }
  207 +
  208 + this.info = 'Initializing TTS model ...';
  209 + this.workerInstance.postMessage({ msgType: 'init-tts', context: getContext() });
  210 + }
  211 +
  212 + @Builder
  213 + TabBuilder(title: string, targetIndex: number, selectedImg: Resource, normalImg: Resource) {
  214 + Column() {
  215 + Image(this.currentIndex == targetIndex ? selectedImg : normalImg).size({ width: 25, height: 25 })
  216 + Text(title).fontColor(this.currentIndex == targetIndex ? '#28bff1' : '#8a8a8a')
  217 + }.width('100%').height(50).justifyContent(FlexAlign.Center).onClick(() => {
  218 + this.currentIndex = targetIndex;
  219 + this.controller.changeIndex(this.currentIndex);
  220 + })
  221 + }
  222 +
  223 + build() {
  224 + Column() {
  225 + Tabs({ barPosition: BarPosition.End, controller: this.controller }) {
  226 + TabContent() {
  227 + Column({ space: 10 }) {
  228 + Text(this.title).fontSize(20).fontWeight(FontWeight.Bold);
  229 + if (this.numSpeakers > 1) {
  230 + Row({ space: 10 }) {
  231 + Text(`Speaker ID (0-${this.numSpeakers - 1})`).width('60%')
  232 +
  233 + TextInput({ text: this.sid }).onChange((text) => {
  234 + this.sid = text.trim();
  235 + }).width('20%')
  236 + }.justifyContent(FlexAlign.Center)
  237 + }
  238 +
  239 + Row() {
  240 + Text('Speech speed').width('60%');
  241 +
  242 + TextInput({ text: this.speechSpeed }).onChange((text) => {
  243 + this.speechSpeed = text.trim();
  244 + }).width('20%')
  245 + }
  246 +
  247 + Row({ space: 10 }) {
  248 + Button(this.btnStartCaption).enabled(this.btnStartEnabled).onClick(async () => {
  249 + let sid = parseInt(this.sid);
  250 + if (sid.toString() != this.sid) {
  251 + this.info = 'Please input a valid speaker ID';
  252 + return;
  253 + }
  254 +
  255 + let speed = parseFloat(this.speechSpeed);
  256 + if (isNaN(speed)) {
  257 + this.info = 'Please enter a valid speech speed';
  258 + return;
  259 + }
  260 +
  261 + if (speed <= 0) {
  262 + this.info = 'Please enter a positive speech speed';
  263 + return;
  264 + }
  265 +
  266 + if (this.workerInstance && this.initTtsDone) {
  267 + this.info = 'Generating...';
  268 + this.cancelled = false;
  269 + this.finalSamples = null;
  270 + this.sampleBuffer.reset();
  271 + this.ttsGeneratedDone = false;
  272 + this.progress = 0;
  273 +
  274 + this.btnStartEnabled = false;
  275 + this.btnStopEnabled = true;
  276 + this.btnSaveEnabled = false;
  277 + console.log(`sending ${this.inputText}`)
  278 + this.ttsGeneratedDone = false;
  279 + this.startTime = await systemTime.getRealTime();
  280 + this.workerInstance?.postMessage({
  281 + msgType: 'tts-generate',
  282 + text: this.inputText,
  283 + sid: sid,
  284 + speed: speed,
  285 + });
  286 + this.isGenerating = true;
  287 + this.info = '';
  288 + } else {
  289 + this.info = 'Failed to initialize tts model';
  290 + this.btnStartEnabled = false;
  291 + }
  292 + });
  293 +
  294 + Button(this.btnStopCaption).enabled(this.btnStopEnabled).onClick(() => {
  295 + this.ttsGeneratedDone = true;
  296 + this.btnStartEnabled = true;
  297 + this.btnStopEnabled = false;
  298 + this.sampleBuffer.reset();
  299 + this.cancelled = true;
  300 + this.isGenerating = false;
  301 +
  302 + if (this.workerInstance && this.initTtsDone) {
  303 + this.workerInstance.postMessage({ msgType: 'tts-generate-cancel' });
  304 + }
  305 + this.audioRenderer?.stop();
  306 + })
  307 +
  308 + Button(this.btnSaveCaption).enabled(this.btnSaveEnabled).onClick(() => {
  309 + if (!this.finalSamples || this.finalSamples.length == 0) {
  310 +
  311 + this.btnSaveEnabled = false;
  312 + return;
  313 + }
  314 +
  315 + let uri: string = '';
  316 +
  317 + const audioOptions = new picker.AudioSaveOptions(); // audioOptions.newFileNames = ['o.wav'];
  318 +
  319 + const audioViewPicker = new picker.AudioViewPicker();
  320 +
  321 + audioViewPicker.save(audioOptions).then((audioSelectResult: Array<string>) => {
  322 + uri = audioSelectResult[0];
  323 + if (this.finalSamples) {
  324 + savePcmToWav(uri, toInt16Samples(this.finalSamples), this.sampleRate);
  325 + console.log(`Saved to ${uri}`);
  326 + this.info += `\nSaved to ${uri}`;
  327 + }
  328 + });
  329 + });
  330 + }
  331 +
  332 + if (this.info != '') {
  333 + TextArea({ text: this.info }).focusable(false);
  334 + }
  335 + if (this.progress > 0) {
  336 + Row() {
  337 + Progress({ value: 0, total: 100, type: ProgressType.Capsule })
  338 + .width('80%')
  339 + .height(20)
  340 + .value(this.progress * 100);
  341 +
  342 + Text(`${(this.progress * 100).toFixed(2)}%`).width('15%')
  343 + }.width('100%').justifyContent(FlexAlign.Center)
  344 + }
  345 +
  346 + TextArea({ placeholder: 'Input text for TTS and click the start button' })
  347 + .width('100%')
  348 + .height('100%')
  349 + .focusable(this.isGenerating == false && this.initTtsDone)
  350 + .onChange((text) => {
  351 + this.inputText = text;
  352 + if (text.trim() == '') {
  353 + this.btnStartEnabled = false;
  354 + return;
  355 + }
  356 + this.btnStartEnabled = true;
  357 + })
  358 + }.width('100%')
  359 +
  360 + // see https://composeicons.com/
  361 + }.tabBar(this.TabBuilder('TTS', 0, $r('app.media.home'), $r('app.media.home')))
  362 +
  363 + TabContent() {
  364 + Column({space: 10}) {
  365 + Text(this.title).fontSize(20).fontWeight(FontWeight.Bold);
  366 + TextArea({text: `
  367 +Everyting is open-sourced.
  368 +
  369 +It runs locally, without accessing the network
  370 +
  371 +See also https://github.com/k2-fsa/sherpa-onnx
  372 +
  373 +新一代 Kaldi QQ 和微信交流群: 请看
  374 +
  375 +https://k2-fsa.github.io/sherpa/social-groups.html
  376 +
  377 +微信公众号: 新一代 Kaldi
  378 + `}).width('100%')
  379 + .height('100%')
  380 + .focusable(false)
  381 + }.justifyContent(FlexAlign.Start)
  382 + }.tabBar(this.TabBuilder('Help', 1, $r('app.media.info'), $r('app.media.info')))
  383 + }.scrollable(false)
  384 + }
  385 + }
  386 +
  387 + private audioPlayCallback = (buffer: ArrayBuffer) => {
  388 + const numSamples = buffer.byteLength / 2;
  389 + if (this.sampleBuffer.size() >= numSamples) {
  390 + const samples: Float32Array = this.sampleBuffer.get(this.sampleBuffer.head(), numSamples);
  391 +
  392 + const int16Samples = new Int16Array(buffer);
  393 + for (let i = 0; i < numSamples; ++i) {
  394 + let s = samples[i] * 32767;
  395 + s = s > 32767 ? 32767 : s;
  396 + s = s < -32768 ? -32768 : s;
  397 + int16Samples[i] = s;
  398 + }
  399 + this.sampleBuffer.pop(numSamples);
  400 + } else {
  401 + (new Int16Array(buffer)).fill(0);
  402 + if (this.ttsGeneratedDone) {
  403 + this.audioRenderer?.stop();
  404 + this.btnStartEnabled = true;
  405 + this.btnStopEnabled = false;
  406 + }
  407 + }
  408 + };
  409 +}
  1 +import worker, { ThreadWorkerGlobalScope, MessageEvents, ErrorEvent } from '@ohos.worker';
  2 +
  3 +import { fileIo as fs } from '@kit.CoreFileKit';
  4 +
  5 +import {OfflineTtsConfig, OfflineTts, listRawfileDir, TtsInput, TtsOutput} from 'sherpa_onnx';
  6 +import { buffer } from '@kit.ArkTS';
  7 +
  8 +const workerPort: ThreadWorkerGlobalScope = worker.workerPort;
  9 +
  10 +let tts: OfflineTts;
  11 +let cancelled = false;
  12 +
  13 +function mkdir(context: Context, parts: string[]) {
  14 + const path = parts.join('/');
  15 + if (fs.accessSync(path)) {
  16 + return;
  17 + }
  18 +
  19 + const sandboxPath: string = context.getApplicationContext().filesDir;
  20 + let d = sandboxPath
  21 + for (const p of parts) {
  22 + d = d + '/' + p;
  23 +
  24 + if (fs.accessSync(d)) {
  25 + continue;
  26 + }
  27 +
  28 + fs.mkdirSync(d);
  29 + }
  30 +}
  31 +
  32 +function copyRawFileDirToSandbox(context: Context, srcDir: string) {
  33 + let mgr = context.resourceManager;
  34 + const allFiles: string[] = listRawfileDir(mgr, srcDir);
  35 + for (const src of allFiles) {
  36 + const parts: string[] = src.split('/');
  37 + if (parts.length != 1) {
  38 + mkdir(context, parts.slice(0, -1));
  39 + }
  40 +
  41 + copyRawFileToSandbox(context, src, src);
  42 + }
  43 +}
  44 +
  45 +function copyRawFileToSandbox(context: Context, src: string, dst: string) {
  46 + // see https://blog.csdn.net/weixin_44640245/article/details/142634846
  47 + // https://developer.huawei.com/consumer/cn/doc/harmonyos-guides-V5/rawfile-guidelines-V5
  48 + let uint8Array: Uint8Array = context.resourceManager.getRawFileContentSync(src);
  49 +
  50 + // https://developer.huawei.com/consumer/cn/doc/harmonyos-references-V5/js-apis-file-fs-V5#fsmkdir
  51 + let sandboxPath: string = context.getApplicationContext().filesDir;
  52 + let filepath = sandboxPath + '/' + dst;
  53 +
  54 + if (fs.accessSync(filepath)) {
  55 + // if the destination exists and has the expected file size,
  56 + // then we skip copying it
  57 + let stat = fs.statSync(filepath);
  58 + if (stat.size == uint8Array.length) {
  59 + return;
  60 + }
  61 + }
  62 +
  63 + const fp = fs.openSync(filepath, fs.OpenMode.WRITE_ONLY | fs.OpenMode.CREATE | fs.OpenMode.TRUNC);
  64 + fs.writeSync(fp.fd, buffer.from(uint8Array).buffer)
  65 + fs.close(fp.fd);
  66 +}
  67 +
  68 +function initTts(context: Context): OfflineTts {
  69 + // Such a design is to make it easier to build flutter APPs with
  70 + // github actions for a variety of tts models
  71 + //
  72 + // See https://github.com/k2-fsa/sherpa-onnx/blob/master/scripts/flutter/generate-tts.py
  73 + // for details
  74 +
  75 + let modelDir = '';
  76 + let modelName = '';
  77 + let ruleFsts = '';
  78 + let ruleFars = '';
  79 + let lexicon = '';
  80 + let dataDir = '';
  81 + let dictDir = '';
  82 + // You can select an example below and change it according to match your
  83 + // selected tts model
  84 +
  85 + // ============================================================
  86 + // Your change starts here
  87 + // ============================================================
  88 +
  89 + // Example 1:
  90 + // modelDir = 'vits-vctk';
  91 + // modelName = 'vits-vctk.onnx';
  92 + // lexicon = 'lexicon.txt';
  93 +
  94 + // Example 2:
  95 + // https://github.com/k2-fsa/sherpa-onnx/releases/tag/tts-models
  96 + // https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-piper-en_US-amy-low.tar.bz2
  97 + // modelDir = 'vits-piper-en_US-amy-low';
  98 + // modelName = 'en_US-amy-low.onnx';
  99 + // dataDir = 'espeak-ng-data';
  100 +
  101 + // Example 3:
  102 + // https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-icefall-zh-aishell3.tar.bz2
  103 + // modelDir = 'vits-icefall-zh-aishell3';
  104 + // modelName = 'model.onnx';
  105 + // ruleFsts = 'phone.fst,date.fst,number.fst,new_heteronym.fst';
  106 + // ruleFars = 'rule.far';
  107 + // lexicon = 'lexicon.txt';
  108 +
  109 + // Example 4:
  110 + // https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/vits.html#csukuangfj-vits-zh-hf-fanchen-c-chinese-187-speakers
  111 + // modelDir = 'vits-zh-hf-fanchen-C';
  112 + // modelName = 'vits-zh-hf-fanchen-C.onnx';
  113 + // lexicon = 'lexicon.txt';
  114 + // dictDir = 'dict';
  115 +
  116 + // Example 5:
  117 + // https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-coqui-de-css10.tar.bz2
  118 + // modelDir = 'vits-coqui-de-css10';
  119 + // modelName = 'model.onnx';
  120 +
  121 + // Example 6
  122 + // https://github.com/k2-fsa/sherpa-onnx/releases/tag/tts-models
  123 + // https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-piper-en_US-libritts_r-medium.tar.bz2
  124 + // modelDir = 'vits-piper-en_US-libritts_r-medium';
  125 + // modelName = 'en_US-libritts_r-medium.onnx';
  126 + // dataDir = 'espeak-ng-data';
  127 +
  128 + // Example 7
  129 + // https://github.com/k2-fsa/sherpa-onnx/releases/tag/tts-models
  130 + // https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-melo-tts-zh_en.tar.bz2
  131 + // modelDir = 'vits-melo-tts-zh_en';
  132 + // modelName = 'model.onnx';
  133 + // lexicon = 'lexicon.txt';
  134 + // dictDir = 'dict';
  135 + // ruleFsts = `date.fst,phone.fst,number.fst`;
  136 +
  137 + // ============================================================
  138 + // Please don't change the remaining part of this function
  139 + // ============================================================
  140 +
  141 + if (modelName == '') {
  142 + throw new Error('You are supposed to select a model by changing the code before you run the app');
  143 + }
  144 +
  145 + modelName = modelDir + '/' + modelName;
  146 +
  147 + if (ruleFsts != '') {
  148 + let fsts = ruleFsts.split(',')
  149 + let tmp: string[] = [];
  150 + for (const f of fsts) {
  151 + tmp.push(modelDir + '/' + f);
  152 + }
  153 + ruleFsts = tmp.join(',');
  154 + }
  155 +
  156 + if (ruleFars != '') {
  157 + let fars = ruleFars.split(',')
  158 + let tmp: string[] = [];
  159 + for (const f of fars) {
  160 + tmp.push(modelDir + '/' + f);
  161 + }
  162 + ruleFars = tmp.join(',');
  163 + }
  164 +
  165 + if (lexicon != '') {
  166 + lexicon = modelDir + '/' + lexicon;
  167 + }
  168 +
  169 + if (dataDir != '') {
  170 + copyRawFileDirToSandbox(context, modelDir + '/' + dataDir)
  171 + let sandboxPath: string = context.getApplicationContext().filesDir;
  172 + dataDir = sandboxPath + '/' + modelDir + '/' + dataDir;
  173 + }
  174 +
  175 + if (dictDir != '') {
  176 + copyRawFileDirToSandbox(context, modelDir + '/' + dictDir)
  177 + let sandboxPath: string = context.getApplicationContext().filesDir;
  178 + dictDir = sandboxPath + '/' + modelDir + '/' + dictDir;
  179 + }
  180 +
  181 + const tokens = modelDir + '/tokens.txt';
  182 +
  183 + const config: OfflineTtsConfig = new OfflineTtsConfig();
  184 + config.model.vits.model = modelName;
  185 + config.model.vits.lexicon = lexicon;
  186 + config.model.vits.tokens = tokens;
  187 + config.model.vits.dataDir = dataDir;
  188 + config.model.vits.dictDir = dictDir;
  189 + config.model.numThreads = 2;
  190 + config.model.debug = true;
  191 + config.ruleFsts = ruleFsts;
  192 + config.ruleFars = ruleFars;
  193 +
  194 + return new OfflineTts(config, context.resourceManager);
  195 +}
  196 +
  197 +interface TtsCallbackData {
  198 + samples: Float32Array;
  199 + progress: number;
  200 +}
  201 +
  202 +function callback(data: TtsCallbackData): number {
  203 + workerPort.postMessage({
  204 + 'msgType': 'tts-generate-partial',
  205 + samples: Float32Array.from(data.samples),
  206 + progress: data.progress,
  207 + });
  208 +
  209 + // 0 means to stop generating in C++
  210 + // 1 means to continue generating in C++
  211 + return cancelled? 0 : 1;
  212 +}
  213 +
  214 +/**
  215 + * Defines the event handler to be called when the worker thread receives a message sent by the host thread.
  216 + * The event handler is executed in the worker thread.
  217 + *
  218 + * @param e message data
  219 + */
  220 +workerPort.onmessage = (e: MessageEvents) => {
  221 + const msgType = e.data['msgType'] as string;
  222 + console.log(`msg-type: ${msgType}`);
  223 + if (msgType == 'init-tts' && !tts) {
  224 + const context = e.data['context'] as Context;
  225 + tts = initTts(context);
  226 + workerPort.postMessage({ 'msgType': 'init-tts-done',
  227 + sampleRate: tts.sampleRate,
  228 + numSpeakers: tts.numSpeakers,
  229 + });
  230 + }
  231 +
  232 + if (msgType == 'tts-generate-cancel') {
  233 + cancelled = true;
  234 + }
  235 +
  236 + if (msgType == 'tts-generate') {
  237 + const text = e.data['text'] as string;
  238 + console.log(`recevied text ${text}`);
  239 + const input: TtsInput = new TtsInput();
  240 + input.text = text;
  241 + input.sid = e.data['sid'] as number;
  242 + input.speed = e.data['speed'] as number;
  243 + input.callback = callback;
  244 +
  245 + cancelled = false;
  246 + if (true) {
  247 + tts.generateAsync(input).then((ttsOutput: TtsOutput) => {
  248 + console.log(`sampleRate: ${ttsOutput.sampleRate}`);
  249 +
  250 + workerPort.postMessage({
  251 + 'msgType': 'tts-generate-done',
  252 + samples: Float32Array.from(ttsOutput.samples),
  253 + });
  254 +
  255 + });
  256 + } else {
  257 + const ttsOutput: TtsOutput = tts.generate(input);
  258 + workerPort.postMessage({
  259 + 'msgType': 'tts-generate-done',
  260 + samples: Float32Array.from(ttsOutput.samples),
  261 + });
  262 + }
  263 +
  264 +
  265 + }
  266 +}
  267 +
  268 +/**
  269 + * Defines the event handler to be called when the worker receives a message that cannot be deserialized.
  270 + * The event handler is executed in the worker thread.
  271 + *
  272 + * @param e message data
  273 + */
  274 +workerPort.onmessageerror = (e: MessageEvents) => {
  275 +}
  276 +
  277 +/**
  278 + * Defines the event handler to be called when an exception occurs during worker execution.
  279 + * The event handler is executed in the worker thread.
  280 + *
  281 + * @param e error message
  282 + */
  283 +workerPort.onerror = (e: ErrorEvent) => {
  284 +}
  1 +{
  2 + "module": {
  3 + "name": "entry",
  4 + "type": "entry",
  5 + "description": "$string:module_desc",
  6 + "mainElement": "EntryAbility",
  7 + "deviceTypes": [
  8 + "phone",
  9 + "tablet",
  10 + "2in1"
  11 + ],
  12 + "deliveryWithInstall": true,
  13 + "installationFree": false,
  14 + "pages": "$profile:main_pages",
  15 + "abilities": [
  16 + {
  17 + "name": "EntryAbility",
  18 + "srcEntry": "./ets/entryability/EntryAbility.ets",
  19 + "description": "$string:EntryAbility_desc",
  20 + "icon": "$media:layered_image",
  21 + "label": "$string:EntryAbility_label",
  22 + "startWindowIcon": "$media:startIcon",
  23 + "startWindowBackground": "$color:start_window_background",
  24 + "exported": true,
  25 + "skills": [
  26 + {
  27 + "entities": [
  28 + "entity.system.home"
  29 + ],
  30 + "actions": [
  31 + "action.system.home"
  32 + ]
  33 + }
  34 + ]
  35 + }
  36 + ],
  37 + "extensionAbilities": [
  38 + {
  39 + "name": "EntryBackupAbility",
  40 + "srcEntry": "./ets/entrybackupability/EntryBackupAbility.ets",
  41 + "type": "backup",
  42 + "exported": false,
  43 + "metadata": [
  44 + {
  45 + "name": "ohos.extension.backup",
  46 + "resource": "$profile:backup_config"
  47 + }
  48 + ],
  49 + }
  50 + ]
  51 + }
  52 +}
  1 +{
  2 + "color": [
  3 + {
  4 + "name": "start_window_background",
  5 + "value": "#FFFFFF"
  6 + }
  7 + ]
  8 +}
  1 +{
  2 + "string": [
  3 + {
  4 + "name": "module_desc",
  5 + "value": "On-device text-to-speech with Next-gen Kaldi"
  6 + },
  7 + {
  8 + "name": "EntryAbility_desc",
  9 + "value": "On-device text-to-speech with Next-gen Kaldi"
  10 + },
  11 + {
  12 + "name": "EntryAbility_label",
  13 + "value": "TTS"
  14 + }
  15 + ]
  16 +}
  1 +<svg xmlns="http://www.w3.org/2000/svg" width="24" height="24" viewBox="0 -960 960 960"><path d="m480-840 440 330-48 64-72-54v380H160v-380l-72 54-48-64zM294-478q0 53 57 113t129 125q72-65 129-125t57-113q0-44-30-73t-72-29q-26 0-47.5 10.5T480-542q-15-17-37.5-27.5T396-580q-42 0-72 29t-30 73m426 278v-360L480-740 240-560v360zm0 0H240z"/></svg>
  1 +<svg xmlns="http://www.w3.org/2000/svg" width="24" height="24" viewBox="0 -960 960 960"><path d="M440-280h80v-240h-80zm40-320q17 0 28.5-11.5T520-640t-11.5-28.5T480-680t-28.5 11.5T440-640t11.5 28.5T480-600m0 520q-83 0-156-31.5T197-197t-85.5-127T80-480t31.5-156T197-763t127-85.5T480-880t156 31.5T763-763t85.5 127T880-480t-31.5 156T763-197t-127 85.5T480-80m0-80q134 0 227-93t93-227-93-227-227-93-227 93-93 227 93 227 227 93m0-320"/></svg>
  1 +{
  2 + "layered-image":
  3 + {
  4 + "background" : "$media:background",
  5 + "foreground" : "$media:foreground"
  6 + }
  7 +}
  1 +{
  2 + "string": [
  3 + {
  4 + "name": "module_desc",
  5 + "value": "On-device text-to-speech with Next-gen Kaldi"
  6 + },
  7 + {
  8 + "name": "EntryAbility_desc",
  9 + "value": "On-device text-to-speech with Next-gen Kaldi"
  10 + },
  11 + {
  12 + "name": "EntryAbility_label",
  13 + "value": "TTS"
  14 + }
  15 + ]
  16 +}
  1 +{
  2 + "string": [
  3 + {
  4 + "name": "module_desc",
  5 + "value": "使用新一代Kaldi进行本地离线语音合成"
  6 + },
  7 + {
  8 + "name": "EntryAbility_desc",
  9 + "value": "使用新一代Kaldi进行本地离线语音合成"
  10 + },
  11 + {
  12 + "name": "EntryAbility_label",
  13 + "value": "本地语音合成"
  14 + }
  15 + ]
  16 +}
  1 +import hilog from '@ohos.hilog';
  2 +import { describe, beforeAll, beforeEach, afterEach, afterAll, it, expect } from '@ohos/hypium';
  3 +
  4 +export default function abilityTest() {
  5 + describe('ActsAbilityTest', () => {
  6 + // Defines a test suite. Two parameters are supported: test suite name and test suite function.
  7 + beforeAll(() => {
  8 + // Presets an action, which is performed only once before all test cases of the test suite start.
  9 + // This API supports only one parameter: preset action function.
  10 + })
  11 + beforeEach(() => {
  12 + // Presets an action, which is performed before each unit test case starts.
  13 + // The number of execution times is the same as the number of test cases defined by **it**.
  14 + // This API supports only one parameter: preset action function.
  15 + })
  16 + afterEach(() => {
  17 + // Presets a clear action, which is performed after each unit test case ends.
  18 + // The number of execution times is the same as the number of test cases defined by **it**.
  19 + // This API supports only one parameter: clear action function.
  20 + })
  21 + afterAll(() => {
  22 + // Presets a clear action, which is performed after all test cases of the test suite end.
  23 + // This API supports only one parameter: clear action function.
  24 + })
  25 + it('assertContain', 0, () => {
  26 + // Defines a test case. This API supports three parameters: test case name, filter parameter, and test case function.
  27 + hilog.info(0x0000, 'testTag', '%{public}s', 'it begin');
  28 + let a = 'abc';
  29 + let b = 'b';
  30 + // Defines a variety of assertion methods, which are used to declare expected boolean conditions.
  31 + expect(a).assertContain(b);
  32 + expect(a).assertEqual(a);
  33 + })
  34 + })
  35 +}
  1 +import abilityTest from './Ability.test';
  2 +
  3 +export default function testsuite() {
  4 + abilityTest();
  5 +}
  1 +{
  2 + "module": {
  3 + "name": "entry_test",
  4 + "type": "feature",
  5 + "deviceTypes": [
  6 + "phone",
  7 + "tablet",
  8 + "2in1"
  9 + ],
  10 + "deliveryWithInstall": true,
  11 + "installationFree": false
  12 + }
  13 +}
  1 +import localUnitTest from './LocalUnit.test';
  2 +
  3 +export default function testsuite() {
  4 + localUnitTest();
  5 +}
  1 +import { describe, beforeAll, beforeEach, afterEach, afterAll, it, expect } from '@ohos/hypium';
  2 +
  3 +export default function localUnitTest() {
  4 + describe('localUnitTest', () => {
  5 + // Defines a test suite. Two parameters are supported: test suite name and test suite function.
  6 + beforeAll(() => {
  7 + // Presets an action, which is performed only once before all test cases of the test suite start.
  8 + // This API supports only one parameter: preset action function.
  9 + });
  10 + beforeEach(() => {
  11 + // Presets an action, which is performed before each unit test case starts.
  12 + // The number of execution times is the same as the number of test cases defined by **it**.
  13 + // This API supports only one parameter: preset action function.
  14 + });
  15 + afterEach(() => {
  16 + // Presets a clear action, which is performed after each unit test case ends.
  17 + // The number of execution times is the same as the number of test cases defined by **it**.
  18 + // This API supports only one parameter: clear action function.
  19 + });
  20 + afterAll(() => {
  21 + // Presets a clear action, which is performed after all test cases of the test suite end.
  22 + // This API supports only one parameter: clear action function.
  23 + });
  24 + it('assertContain', 0, () => {
  25 + // Defines a test case. This API supports three parameters: test case name, filter parameter, and test case function.
  26 + let a = 'abc';
  27 + let b = 'b';
  28 + // Defines a variety of assertion methods, which are used to declare expected boolean conditions.
  29 + expect(a).assertContain(b);
  30 + expect(a).assertEqual(a);
  31 + });
  32 + });
  33 +}
  1 +{
  2 + "modelVersion": "5.0.0",
  3 + "dependencies": {
  4 + },
  5 + "execution": {
  6 + // "analyze": "normal", /* Define the build analyze mode. Value: [ "normal" | "advanced" | false ]. Default: "normal" */
  7 + // "daemon": true, /* Enable daemon compilation. Value: [ true | false ]. Default: true */
  8 + // "incremental": true, /* Enable incremental compilation. Value: [ true | false ]. Default: true */
  9 + // "parallel": true, /* Enable parallel compilation. Value: [ true | false ]. Default: true */
  10 + // "typeCheck": false, /* Enable typeCheck. Value: [ true | false ]. Default: false */
  11 + },
  12 + "logging": {
  13 + // "level": "info" /* Define the log level. Value: [ "debug" | "info" | "warn" | "error" ]. Default: "info" */
  14 + },
  15 + "debugging": {
  16 + // "stacktrace": false /* Disable stacktrace compilation. Value: [ true | false ]. Default: false */
  17 + },
  18 + "nodeOptions": {
  19 + // "maxOldSpaceSize": 8192 /* Enable nodeOptions maxOldSpaceSize compilation. Unit M. Used for the daemon process. Default: 8192*/
  20 + // "exposeGC": true /* Enable to trigger garbage collection explicitly. Default: true*/
  21 + }
  22 +}
  1 +import { appTasks } from '@ohos/hvigor-ohos-plugin';
  2 +
  3 +export default {
  4 + system: appTasks, /* Built-in plugin of Hvigor. It cannot be modified. */
  5 + plugins:[] /* Custom plugin to extend the functionality of Hvigor. */
  6 +}
  1 +{
  2 + "meta": {
  3 + "stableOrder": true
  4 + },
  5 + "lockfileVersion": 3,
  6 + "ATTENTION": "THIS IS AN AUTOGENERATED FILE. DO NOT EDIT THIS FILE DIRECTLY.",
  7 + "specifiers": {
  8 + "@ohos/hypium@1.0.19": "@ohos/hypium@1.0.19"
  9 + },
  10 + "packages": {
  11 + "@ohos/hypium@1.0.19": {
  12 + "name": "@ohos/hypium",
  13 + "version": "1.0.19",
  14 + "integrity": "sha512-cEjDgLFCm3cWZDeRXk7agBUkPqjWxUo6AQeiu0gEkb3J8ESqlduQLSIXeo3cCsm8U/asL7iKjF85ZyOuufAGSQ==",
  15 + "resolved": "https://ohpm.openharmony.cn/ohpm/@ohos/hypium/-/hypium-1.0.19.har",
  16 + "registryType": "ohpm"
  17 + }
  18 + }
  19 +}
  1 +{
  2 + "modelVersion": "5.0.0",
  3 + "description": "Please describe the basic information.",
  4 + "dependencies": {
  5 + },
  6 + "devDependencies": {
  7 + "@ohos/hypium": "1.0.19"
  8 + }
  9 +}
@@ -11,6 +11,7 @@ import { audio } from '@kit.AudioKit'; @@ -11,6 +11,7 @@ import { audio } from '@kit.AudioKit';
11 @Entry 11 @Entry
12 @Component 12 @Component
13 struct Index { 13 struct Index {
  14 + @State title: string = 'Next-gen Kaldi: VAD + ASR';
14 @State currentIndex: number = 0; 15 @State currentIndex: number = 0;
15 @State resultForFile: string = ''; 16 @State resultForFile: string = '';
16 @State progressForFile: number = 0; 17 @State progressForFile: number = 0;
@@ -73,13 +74,11 @@ struct Index { @@ -73,13 +74,11 @@ struct Index {
73 }; 74 };
74 75
75 const audioCapturerInfo: audio.AudioCapturerInfo = { 76 const audioCapturerInfo: audio.AudioCapturerInfo = {
76 - source: audio.SourceType.SOURCE_TYPE_MIC,  
77 - capturerFlags: 0 77 + source: audio.SourceType.SOURCE_TYPE_MIC, capturerFlags: 0
78 }; 78 };
79 79
80 const audioCapturerOptions: audio.AudioCapturerOptions = { 80 const audioCapturerOptions: audio.AudioCapturerOptions = {
81 - streamInfo: audioStreamInfo,  
82 - capturerInfo: audioCapturerInfo 81 + streamInfo: audioStreamInfo, capturerInfo: audioCapturerInfo
83 82
84 }; 83 };
85 audio.createAudioCapturer(audioCapturerOptions, (err, data) => { 84 audio.createAudioCapturer(audioCapturerOptions, (err, data) => {
@@ -162,15 +161,9 @@ struct Index { @@ -162,15 +161,9 @@ struct Index {
162 @Builder 161 @Builder
163 TabBuilder(title: string, targetIndex: number, selectedImg: Resource, normalImg: Resource) { 162 TabBuilder(title: string, targetIndex: number, selectedImg: Resource, normalImg: Resource) {
164 Column() { 163 Column() {
165 - Image(this.currentIndex == targetIndex ? selectedImg : normalImg)  
166 - .size({ width: 25, height: 25 })  
167 - Text(title)  
168 - .fontColor(this.currentIndex == targetIndex ? '#28bff1' : '#8a8a8a')  
169 - }  
170 - .width('100%')  
171 - .height(50)  
172 - .justifyContent(FlexAlign.Center)  
173 - .onClick(() => { 164 + Image(this.currentIndex == targetIndex ? selectedImg : normalImg).size({ width: 25, height: 25 })
  165 + Text(title).fontColor(this.currentIndex == targetIndex ? '#28bff1' : '#8a8a8a')
  166 + }.width('100%').height(50).justifyContent(FlexAlign.Center).onClick(() => {
174 this.currentIndex = targetIndex; 167 this.currentIndex = targetIndex;
175 this.controller.changeIndex(this.currentIndex); 168 this.controller.changeIndex(this.currentIndex);
176 }) 169 })
@@ -181,11 +174,7 @@ struct Index { @@ -181,11 +174,7 @@ struct Index {
181 Tabs({ barPosition: BarPosition.End, controller: this.controller }) { 174 Tabs({ barPosition: BarPosition.End, controller: this.controller }) {
182 TabContent() { 175 TabContent() {
183 Column({ space: 10 }) { 176 Column({ space: 10 }) {
184 - Text('Next-gen Kaldi: VAD + ASR')  
185 - .fontColor('#182431')  
186 - .fontSize(25)  
187 - .lineHeight(41)  
188 - .fontWeight(500) 177 + Text(this.title).fontSize(20).fontWeight(FontWeight.Bold);
189 178
190 Button('Select .wav file (16kHz) ') 179 Button('Select .wav file (16kHz) ')
191 .enabled(this.selectFileBtnEnabled) 180 .enabled(this.selectFileBtnEnabled)
@@ -211,8 +200,7 @@ struct Index { @@ -211,8 +200,7 @@ struct Index {
211 200
212 if (this.workerInstance) { 201 if (this.workerInstance) {
213 this.workerInstance.postMessage({ 202 this.workerInstance.postMessage({
214 - msgType: 'non-streaming-asr-vad-decode',  
215 - filename: result[0], 203 + msgType: 'non-streaming-asr-vad-decode', filename: result[0],
216 }); 204 });
217 } else { 205 } else {
218 console.log(`this worker instance is undefined ${this.workerInstance}`); 206 console.log(`this worker instance is undefined ${this.workerInstance}`);
@@ -236,80 +224,86 @@ struct Index { @@ -236,80 +224,86 @@ struct Index {
236 }.width('100%').justifyContent(FlexAlign.Center) 224 }.width('100%').justifyContent(FlexAlign.Center)
237 } 225 }
238 226
239 - TextArea({ text: this.resultForFile }).width('100%').lineSpacing({ value: 10, unit: LengthUnit.VP });  
240 -  
241 - }  
242 - .alignItems(HorizontalAlign.Center)  
243 - .justifyContent(FlexAlign.Start) 227 + TextArea({ text: this.resultForFile })
  228 + .width('100%')
  229 + .lineSpacing({ value: 10, unit: LengthUnit.VP })
  230 + .height('100%');
  231 + }.alignItems(HorizontalAlign.Center).justifyContent(FlexAlign.Start)
244 }.tabBar(this.TabBuilder('From file', 0, $r('app.media.icon_doc'), $r('app.media.icon_doc_default'))) 232 }.tabBar(this.TabBuilder('From file', 0, $r('app.media.icon_doc'), $r('app.media.icon_doc_default')))
245 233
246 TabContent() { 234 TabContent() {
247 - Column() {  
248 - Button(this.message)  
249 - .enabled(this.micInitDone)  
250 - .onClick(() => {  
251 - console.log('clicked mic button');  
252 - this.resultForMic = '';  
253 - if (this.mic) {  
254 - if (this.micStarted) {  
255 - this.mic.stop();  
256 - this.message = "Start recording";  
257 - this.micStarted = false;  
258 - console.log('mic stopped');  
259 -  
260 - const samples = this.flatten(this.sampleList);  
261 - let s = 0;  
262 - for (let i = 0; i < samples.length; ++i) {  
263 - s += samples[i];  
264 - }  
265 - console.log(`samples ${samples.length}, sum: ${s}`);  
266 -  
267 - if (this.workerInstance) {  
268 - console.log('decode mic');  
269 - this.workerInstance.postMessage({  
270 - msgType: 'non-streaming-asr-vad-mic',  
271 - samples,  
272 - });  
273 - } else {  
274 - console.log(`this worker instance is undefined ${this.workerInstance}`);  
275 - } 235 + Column({ space: 10 }) {
  236 + Text(this.title).fontSize(20).fontWeight(FontWeight.Bold);
  237 + Button(this.message).enabled(this.micInitDone).onClick(() => {
  238 + console.log('clicked mic button');
  239 + this.resultForMic = '';
  240 + if (this.mic) {
  241 + if (this.micStarted) {
  242 + this.mic.stop();
  243 + this.message = "Start recording";
  244 + this.micStarted = false;
  245 + console.log('mic stopped');
  246 +
  247 + const samples = this.flatten(this.sampleList);
  248 + let s = 0;
  249 + for (let i = 0; i < samples.length; ++i) {
  250 + s += samples[i];
  251 + }
  252 + console.log(`samples ${samples.length}, sum: ${s}`);
  253 +
  254 + if (this.workerInstance) {
  255 + console.log('decode mic');
  256 + this.workerInstance.postMessage({
  257 + msgType: 'non-streaming-asr-vad-mic', samples,
  258 + });
276 } else { 259 } else {
277 - this.sampleList = [];  
278 - this.mic.start();  
279 - this.message = "Stop recording";  
280 - this.micStarted = true;  
281 - console.log('mic started'); 260 + console.log(`this worker instance is undefined ${this.workerInstance}`);
282 } 261 }
  262 + } else {
  263 + this.sampleList = [];
  264 + this.mic.start();
  265 + this.message = "Stop recording";
  266 + this.micStarted = true;
  267 + console.log('mic started');
283 } 268 }
284 - }); 269 + }
  270 + });
285 271
286 Text(`Supported languages: ${this.lang}`) 272 Text(`Supported languages: ${this.lang}`)
287 273
288 - TextArea({ text: this.resultForMic }).width('100%').lineSpacing({ value: 10, unit: LengthUnit.VP });  
289 - }  
290 - .alignItems(HorizontalAlign.Center)  
291 - .justifyContent(FlexAlign.Start) 274 + TextArea({ text: this.resultForMic })
  275 + .width('100%')
  276 + .lineSpacing({ value: 10, unit: LengthUnit.VP })
  277 + .width('100%')
  278 + .height('100%');
  279 + }.alignItems(HorizontalAlign.Center).justifyContent(FlexAlign.Start)
292 } 280 }
293 .tabBar(this.TabBuilder('From mic', 1, $r('app.media.ic_public_input_voice'), 281 .tabBar(this.TabBuilder('From mic', 1, $r('app.media.ic_public_input_voice'),
294 $r('app.media.ic_public_input_voice_default'))) 282 $r('app.media.ic_public_input_voice_default')))
295 283
296 TabContent() { 284 TabContent() {
297 - Column() {  
298 - Text("Everything is open-sourced");  
299 - Divider();  
300 - Text("It runs locally, without accessing the network");  
301 - Divider();  
302 - Text("See also https://github.com/k2-fsa/sherpa-onnx");  
303 - Divider();  
304 - Text("and https://k2-fsa.github.io/sherpa/social-groups.html"); 285 + Column({ space: 10 }) {
  286 + Text(this.title).fontSize(20).fontWeight(FontWeight.Bold);
  287 + TextArea({
  288 + text: `
  289 +Everyting is open-sourced.
  290 +
  291 +It runs locally, without accessing the network
  292 +
  293 +See also https://github.com/k2-fsa/sherpa-onnx
  294 +
  295 +新一代 Kaldi QQ 和微信交流群: 请看
  296 +
  297 +https://k2-fsa.github.io/sherpa/social-groups.html
  298 +
  299 +微信公众号: 新一代 Kaldi
  300 + `
  301 + }).width('100%').height('100%').focusable(false)
305 }.justifyContent(FlexAlign.Start) 302 }.justifyContent(FlexAlign.Start)
306 - }.tabBar(this.TabBuilder('Help', 2, $r('app.media.info_circle'),  
307 - $r('app.media.info_circle_default'))) 303 + }.tabBar(this.TabBuilder('Help', 2, $r('app.media.info_circle'), $r('app.media.info_circle_default')))
308 304
309 }.scrollable(false) 305 }.scrollable(false)
310 - }  
311 - .width('100%')  
312 - .justifyContent(FlexAlign.Start) 306 + }.width('100%').justifyContent(FlexAlign.Start)
313 } 307 }
314 308
315 private micCallback = (buffer: ArrayBuffer) => { 309 private micCallback = (buffer: ArrayBuffer) => {
@@ -2,19 +2,19 @@ @@ -2,19 +2,19 @@
2 "string": [ 2 "string": [
3 { 3 {
4 "name": "module_desc", 4 "name": "module_desc",
5 - "value": "VAD+ASR with Next-gen Kaldi" 5 + "value": "On-device VAD+ASR with Next-gen Kaldi"
6 }, 6 },
7 { 7 {
8 "name": "EntryAbility_desc", 8 "name": "EntryAbility_desc",
9 - "value": "VAD+ASR" 9 + "value": "On-device VAD+ASR with Next-gen Kaldi"
10 }, 10 },
11 { 11 {
12 "name": "EntryAbility_label", 12 "name": "EntryAbility_label",
13 - "value": "VAD_ASR" 13 + "value": "On-device speech recognition"
14 }, 14 },
15 { 15 {
16 "name": "mic_reason", 16 "name": "mic_reason",
17 - "value": "access the microhone for speech recognition" 17 + "value": "access the microhone for on-device speech recognition with Next-gen Kaldi"
18 } 18 }
19 ] 19 ]
20 } 20 }
@@ -2,15 +2,19 @@ @@ -2,15 +2,19 @@
2 "string": [ 2 "string": [
3 { 3 {
4 "name": "module_desc", 4 "name": "module_desc",
5 - "value": "module description" 5 + "value": "On-device VAD+ASR with Next-gen Kaldi"
6 }, 6 },
7 { 7 {
8 "name": "EntryAbility_desc", 8 "name": "EntryAbility_desc",
9 - "value": "description" 9 + "value": "On-device VAD+ASR with Next-gen Kaldi"
10 }, 10 },
11 { 11 {
12 "name": "EntryAbility_label", 12 "name": "EntryAbility_label",
13 - "value": "label" 13 + "value": "On-device speech recognition"
  14 + },
  15 + {
  16 + "name": "mic_reason",
  17 + "value": "access the microhone for on-device speech recognition with Next-gen Kaldi"
14 } 18 }
15 ] 19 ]
16 } 20 }
@@ -2,15 +2,19 @@ @@ -2,15 +2,19 @@
2 "string": [ 2 "string": [
3 { 3 {
4 "name": "module_desc", 4 "name": "module_desc",
5 - "value": "模块描述" 5 + "value": "基于新一代Kaldi的本地语音识别"
6 }, 6 },
7 { 7 {
8 "name": "EntryAbility_desc", 8 "name": "EntryAbility_desc",
9 - "value": "description" 9 + "value": "基于新一代Kaldi的本地语音识别"
10 }, 10 },
11 { 11 {
12 "name": "EntryAbility_label", 12 "name": "EntryAbility_label",
13 - "value": "label" 13 + "value": "本地语音识别"
  14 + },
  15 + {
  16 + "name": "mic_reason",
  17 + "value": "使用新一代Kaldi, 访问麦克风进行本地语音识别 (不需要联网)"
14 } 18 }
15 ] 19 ]
16 } 20 }
@@ -1169,6 +1169,17 @@ SherpaOnnxOfflineTtsGenerateWithProgressCallback( @@ -1169,6 +1169,17 @@ SherpaOnnxOfflineTtsGenerateWithProgressCallback(
1169 return SherpaOnnxOfflineTtsGenerateInternal(tts, text, sid, speed, wrapper); 1169 return SherpaOnnxOfflineTtsGenerateInternal(tts, text, sid, speed, wrapper);
1170 } 1170 }
1171 1171
  1172 +const SherpaOnnxGeneratedAudio *
  1173 +SherpaOnnxOfflineTtsGenerateWithProgressCallbackWithArg(
  1174 + const SherpaOnnxOfflineTts *tts, const char *text, int32_t sid, float speed,
  1175 + SherpaOnnxGeneratedAudioProgressCallbackWithArg callback, void *arg) {
  1176 + auto wrapper = [callback, arg](const float *samples, int32_t n,
  1177 + float progress) {
  1178 + return callback(samples, n, progress, arg);
  1179 + };
  1180 + return SherpaOnnxOfflineTtsGenerateInternal(tts, text, sid, speed, wrapper);
  1181 +}
  1182 +
1172 const SherpaOnnxGeneratedAudio *SherpaOnnxOfflineTtsGenerateWithCallbackWithArg( 1183 const SherpaOnnxGeneratedAudio *SherpaOnnxOfflineTtsGenerateWithCallbackWithArg(
1173 const SherpaOnnxOfflineTts *tts, const char *text, int32_t sid, float speed, 1184 const SherpaOnnxOfflineTts *tts, const char *text, int32_t sid, float speed,
1174 SherpaOnnxGeneratedAudioCallbackWithArg callback, void *arg) { 1185 SherpaOnnxGeneratedAudioCallbackWithArg callback, void *arg) {
@@ -930,6 +930,9 @@ typedef int32_t (*SherpaOnnxGeneratedAudioCallbackWithArg)(const float *samples, @@ -930,6 +930,9 @@ typedef int32_t (*SherpaOnnxGeneratedAudioCallbackWithArg)(const float *samples,
930 typedef int32_t (*SherpaOnnxGeneratedAudioProgressCallback)( 930 typedef int32_t (*SherpaOnnxGeneratedAudioProgressCallback)(
931 const float *samples, int32_t n, float p); 931 const float *samples, int32_t n, float p);
932 932
  933 +typedef int32_t (*SherpaOnnxGeneratedAudioProgressCallbackWithArg)(
  934 + const float *samples, int32_t n, float p, void *arg);
  935 +
933 SHERPA_ONNX_API typedef struct SherpaOnnxOfflineTts SherpaOnnxOfflineTts; 936 SHERPA_ONNX_API typedef struct SherpaOnnxOfflineTts SherpaOnnxOfflineTts;
934 937
935 // Create an instance of offline TTS. The user has to use DestroyOfflineTts() 938 // Create an instance of offline TTS. The user has to use DestroyOfflineTts()
@@ -964,11 +967,19 @@ SherpaOnnxOfflineTtsGenerateWithCallback( @@ -964,11 +967,19 @@ SherpaOnnxOfflineTtsGenerateWithCallback(
964 const SherpaOnnxOfflineTts *tts, const char *text, int32_t sid, float speed, 967 const SherpaOnnxOfflineTts *tts, const char *text, int32_t sid, float speed,
965 SherpaOnnxGeneratedAudioCallback callback); 968 SherpaOnnxGeneratedAudioCallback callback);
966 969
  970 +SHERPA_ONNX_API
967 const SherpaOnnxGeneratedAudio * 971 const SherpaOnnxGeneratedAudio *
968 SherpaOnnxOfflineTtsGenerateWithProgressCallback( 972 SherpaOnnxOfflineTtsGenerateWithProgressCallback(
969 const SherpaOnnxOfflineTts *tts, const char *text, int32_t sid, float speed, 973 const SherpaOnnxOfflineTts *tts, const char *text, int32_t sid, float speed,
  974 +
970 SherpaOnnxGeneratedAudioProgressCallback callback); 975 SherpaOnnxGeneratedAudioProgressCallback callback);
971 976
  977 +SHERPA_ONNX_API
  978 +const SherpaOnnxGeneratedAudio *
  979 +SherpaOnnxOfflineTtsGenerateWithProgressCallbackWithArg(
  980 + const SherpaOnnxOfflineTts *tts, const char *text, int32_t sid, float speed,
  981 + SherpaOnnxGeneratedAudioProgressCallbackWithArg callback, void *arg);
  982 +
972 // Same as SherpaOnnxGeneratedAudioCallback but you can pass an additional 983 // Same as SherpaOnnxGeneratedAudioCallback but you can pass an additional
973 // `void* arg` to the callback. 984 // `void* arg` to the callback.
974 SHERPA_ONNX_API const SherpaOnnxGeneratedAudio * 985 SHERPA_ONNX_API const SherpaOnnxGeneratedAudio *
@@ -22,8 +22,14 @@ CircularBuffer::CircularBuffer(int32_t capacity) { @@ -22,8 +22,14 @@ CircularBuffer::CircularBuffer(int32_t capacity) {
22 void CircularBuffer::Resize(int32_t new_capacity) { 22 void CircularBuffer::Resize(int32_t new_capacity) {
23 int32_t capacity = static_cast<int32_t>(buffer_.size()); 23 int32_t capacity = static_cast<int32_t>(buffer_.size());
24 if (new_capacity <= capacity) { 24 if (new_capacity <= capacity) {
  25 +#if __OHOS__
  26 + SHERPA_ONNX_LOGE(
  27 + "new_capacity (%{public}d) <= original capacity (%{public}d). Skip it.",
  28 + new_capacity, capacity);
  29 +#else
25 SHERPA_ONNX_LOGE("new_capacity (%d) <= original capacity (%d). Skip it.", 30 SHERPA_ONNX_LOGE("new_capacity (%d) <= original capacity (%d). Skip it.",
26 new_capacity, capacity); 31 new_capacity, capacity);
  32 +#endif
27 return; 33 return;
28 } 34 }
29 35
@@ -90,10 +96,18 @@ void CircularBuffer::Push(const float *p, int32_t n) { @@ -90,10 +96,18 @@ void CircularBuffer::Push(const float *p, int32_t n) {
90 int32_t size = Size(); 96 int32_t size = Size();
91 if (n + size > capacity) { 97 if (n + size > capacity) {
92 int32_t new_capacity = std::max(capacity * 2, n + size); 98 int32_t new_capacity = std::max(capacity * 2, n + size);
  99 +#if __OHOS__
  100 + SHERPA_ONNX_LOGE(
  101 + "Overflow! n: %{public}d, size: %{public}d, n+size: %{public}d, "
  102 + "capacity: %{public}d. Increase "
  103 + "capacity to: %{public}d. (Original data is copied. No data loss!)",
  104 + n, size, n + size, capacity, new_capacity);
  105 +#else
93 SHERPA_ONNX_LOGE( 106 SHERPA_ONNX_LOGE(
94 "Overflow! n: %d, size: %d, n+size: %d, capacity: %d. Increase " 107 "Overflow! n: %d, size: %d, n+size: %d, capacity: %d. Increase "
95 - "capacity to: %d", 108 + "capacity to: %d. (Original data is copied. No data loss!)",
96 n, size, n + size, capacity, new_capacity); 109 n, size, n + size, capacity, new_capacity);
  110 +#endif
97 Resize(new_capacity); 111 Resize(new_capacity);
98 112
99 capacity = new_capacity; 113 capacity = new_capacity;
@@ -7,6 +7,7 @@ @@ -7,6 +7,7 @@
7 #include <algorithm> 7 #include <algorithm>
8 #include <cctype> 8 #include <cctype>
9 #include <fstream> 9 #include <fstream>
  10 +#include <iomanip>
10 #include <memory> 11 #include <memory>
11 #include <sstream> 12 #include <sstream>
12 #include <strstream> 13 #include <strstream>
@@ -159,17 +160,26 @@ std::vector<TokenIDs> Lexicon::ConvertTextToTokenIdsChinese( @@ -159,17 +160,26 @@ std::vector<TokenIDs> Lexicon::ConvertTextToTokenIdsChinese(
159 words = ProcessHeteronyms(words); 160 words = ProcessHeteronyms(words);
160 161
161 if (debug_) { 162 if (debug_) {
162 - fprintf(stderr, "Input text in string: %s\n", text.c_str());  
163 - fprintf(stderr, "Input text in bytes:"); 163 + std::ostringstream os;
  164 +
  165 + os << "Input text in string: " << text << "\n";
  166 + os << "Input text in bytes:";
164 for (uint8_t c : text) { 167 for (uint8_t c : text) {
165 - fprintf(stderr, " %02x", c); 168 + os << " 0x" << std::setfill('0') << std::setw(2) << std::right << std::hex
  169 + << c;
166 } 170 }
167 - fprintf(stderr, "\n");  
168 - fprintf(stderr, "After splitting to words:"); 171 + os << "\n";
  172 + os << "After splitting to words:";
169 for (const auto &w : words) { 173 for (const auto &w : words) {
170 - fprintf(stderr, " %s", w.c_str()); 174 + os << " " << w;
171 } 175 }
172 - fprintf(stderr, "\n"); 176 + os << "\n";
  177 +
  178 +#if __OHOS__
  179 + SHERPA_ONNX_LOGE("%{public}s", os.str().c_str());
  180 +#else
  181 + SHERPA_ONNX_LOGE("%s", os.str().c_str());
  182 +#endif
173 } 183 }
174 184
175 std::vector<TokenIDs> ans; 185 std::vector<TokenIDs> ans;
@@ -259,17 +269,26 @@ std::vector<TokenIDs> Lexicon::ConvertTextToTokenIdsNotChinese( @@ -259,17 +269,26 @@ std::vector<TokenIDs> Lexicon::ConvertTextToTokenIdsNotChinese(
259 std::vector<std::string> words = SplitUtf8(text); 269 std::vector<std::string> words = SplitUtf8(text);
260 270
261 if (debug_) { 271 if (debug_) {
262 - fprintf(stderr, "Input text (lowercase) in string: %s\n", text.c_str());  
263 - fprintf(stderr, "Input text in bytes:"); 272 + std::ostringstream os;
  273 +
  274 + os << "Input text (lowercase) in string: " << text << "\n";
  275 + os << "Input text in bytes:";
264 for (uint8_t c : text) { 276 for (uint8_t c : text) {
265 - fprintf(stderr, " %02x", c); 277 + os << " 0x" << std::setfill('0') << std::setw(2) << std::right << std::hex
  278 + << c;
266 } 279 }
267 - fprintf(stderr, "\n");  
268 - fprintf(stderr, "After splitting to words:"); 280 + os << "\n";
  281 + os << "After splitting to words:";
269 for (const auto &w : words) { 282 for (const auto &w : words) {
270 - fprintf(stderr, " %s", w.c_str()); 283 + os << " " << w;
271 } 284 }
272 - fprintf(stderr, "\n"); 285 + os << "\n";
  286 +
  287 +#if __OHOS__
  288 + SHERPA_ONNX_LOGE("%{public}s", os.str().c_str());
  289 +#else
  290 + SHERPA_ONNX_LOGE("%s", os.str().c_str());
  291 +#endif
273 } 292 }
274 293
275 int32_t blank = token2id_.at(" "); 294 int32_t blank = token2id_.at(" ");
@@ -6,11 +6,21 @@ @@ -6,11 +6,21 @@
6 6
7 #include <fstream> 7 #include <fstream>
8 #include <regex> // NOLINT 8 #include <regex> // NOLINT
  9 +#include <strstream>
9 #include <utility> 10 #include <utility>
  11 +#if __ANDROID_API__ >= 9
  12 +#include "android/asset_manager.h"
  13 +#include "android/asset_manager_jni.h"
  14 +#endif
  15 +
  16 +#if __OHOS__
  17 +#include "rawfile/raw_file_manager.h"
  18 +#endif
10 19
11 #include "cppjieba/Jieba.hpp" 20 #include "cppjieba/Jieba.hpp"
12 #include "sherpa-onnx/csrc/file-utils.h" 21 #include "sherpa-onnx/csrc/file-utils.h"
13 #include "sherpa-onnx/csrc/macros.h" 22 #include "sherpa-onnx/csrc/macros.h"
  23 +#include "sherpa-onnx/csrc/onnx-utils.h"
14 #include "sherpa-onnx/csrc/symbol-table.h" 24 #include "sherpa-onnx/csrc/symbol-table.h"
15 #include "sherpa-onnx/csrc/text-utils.h" 25 #include "sherpa-onnx/csrc/text-utils.h"
16 26
@@ -62,6 +72,60 @@ class MeloTtsLexicon::Impl { @@ -62,6 +72,60 @@ class MeloTtsLexicon::Impl {
62 } 72 }
63 } 73 }
64 74
  75 + template <typename Manager>
  76 + Impl(Manager *mgr, const std::string &lexicon, const std::string &tokens,
  77 + const std::string &dict_dir,
  78 + const OfflineTtsVitsModelMetaData &meta_data, bool debug)
  79 + : meta_data_(meta_data), debug_(debug) {
  80 + std::string dict = dict_dir + "/jieba.dict.utf8";
  81 + std::string hmm = dict_dir + "/hmm_model.utf8";
  82 + std::string user_dict = dict_dir + "/user.dict.utf8";
  83 + std::string idf = dict_dir + "/idf.utf8";
  84 + std::string stop_word = dict_dir + "/stop_words.utf8";
  85 +
  86 + AssertFileExists(dict);
  87 + AssertFileExists(hmm);
  88 + AssertFileExists(user_dict);
  89 + AssertFileExists(idf);
  90 + AssertFileExists(stop_word);
  91 +
  92 + jieba_ =
  93 + std::make_unique<cppjieba::Jieba>(dict, hmm, user_dict, idf, stop_word);
  94 +
  95 + {
  96 + auto buf = ReadFile(mgr, tokens);
  97 +
  98 + std::istrstream is(buf.data(), buf.size());
  99 + InitTokens(is);
  100 + }
  101 +
  102 + {
  103 + auto buf = ReadFile(mgr, lexicon);
  104 +
  105 + std::istrstream is(buf.data(), buf.size());
  106 + InitLexicon(is);
  107 + }
  108 + }
  109 +
  110 + template <typename Manager>
  111 + Impl(Manager *mgr, const std::string &lexicon, const std::string &tokens,
  112 + const OfflineTtsVitsModelMetaData &meta_data, bool debug)
  113 + : meta_data_(meta_data), debug_(debug) {
  114 + {
  115 + auto buf = ReadFile(mgr, tokens);
  116 +
  117 + std::istrstream is(buf.data(), buf.size());
  118 + InitTokens(is);
  119 + }
  120 +
  121 + {
  122 + auto buf = ReadFile(mgr, lexicon);
  123 +
  124 + std::istrstream is(buf.data(), buf.size());
  125 + InitLexicon(is);
  126 + }
  127 + }
  128 +
65 std::vector<TokenIDs> ConvertTextToTokenIds(const std::string &_text) const { 129 std::vector<TokenIDs> ConvertTextToTokenIds(const std::string &_text) const {
66 std::string text = ToLowerCase(_text); 130 std::string text = ToLowerCase(_text);
67 // see 131 // see
@@ -84,17 +148,24 @@ class MeloTtsLexicon::Impl { @@ -84,17 +148,24 @@ class MeloTtsLexicon::Impl {
84 jieba_->Cut(text, words, is_hmm); 148 jieba_->Cut(text, words, is_hmm);
85 149
86 if (debug_) { 150 if (debug_) {
87 - SHERPA_ONNX_LOGE("input text: %s", text.c_str());  
88 - SHERPA_ONNX_LOGE("after replacing punctuations: %s", s.c_str());  
89 -  
90 std::ostringstream os; 151 std::ostringstream os;
91 std::string sep = ""; 152 std::string sep = "";
92 for (const auto &w : words) { 153 for (const auto &w : words) {
93 os << sep << w; 154 os << sep << w;
94 sep = "_"; 155 sep = "_";
95 } 156 }
  157 +#if __OHOS__
  158 + SHERPA_ONNX_LOGE("input text: %{public}s", text.c_str());
  159 + SHERPA_ONNX_LOGE("after replacing punctuations: %{public}s", s.c_str());
  160 +
  161 + SHERPA_ONNX_LOGE("after jieba processing: %{public}s",
  162 + os.str().c_str());
  163 +#else
  164 + SHERPA_ONNX_LOGE("input text: %s", text.c_str());
  165 + SHERPA_ONNX_LOGE("after replacing punctuations: %s", s.c_str());
96 166
97 SHERPA_ONNX_LOGE("after jieba processing: %s", os.str().c_str()); 167 SHERPA_ONNX_LOGE("after jieba processing: %s", os.str().c_str());
  168 +#endif
98 } 169 }
99 } else { 170 } else {
100 words = SplitUtf8(text); 171 words = SplitUtf8(text);
@@ -102,7 +173,7 @@ class MeloTtsLexicon::Impl { @@ -102,7 +173,7 @@ class MeloTtsLexicon::Impl {
102 if (debug_) { 173 if (debug_) {
103 fprintf(stderr, "Input text in string (lowercase): %s\n", text.c_str()); 174 fprintf(stderr, "Input text in string (lowercase): %s\n", text.c_str());
104 fprintf(stderr, "Input text in bytes (lowercase):"); 175 fprintf(stderr, "Input text in bytes (lowercase):");
105 - for (uint8_t c : text) { 176 + for (int8_t c : text) {
106 fprintf(stderr, " %02x", c); 177 fprintf(stderr, " %02x", c);
107 } 178 }
108 fprintf(stderr, "\n"); 179 fprintf(stderr, "\n");
@@ -307,9 +378,48 @@ MeloTtsLexicon::MeloTtsLexicon(const std::string &lexicon, @@ -307,9 +378,48 @@ MeloTtsLexicon::MeloTtsLexicon(const std::string &lexicon,
307 bool debug) 378 bool debug)
308 : impl_(std::make_unique<Impl>(lexicon, tokens, meta_data, debug)) {} 379 : impl_(std::make_unique<Impl>(lexicon, tokens, meta_data, debug)) {}
309 380
  381 +template <typename Manager>
  382 +MeloTtsLexicon::MeloTtsLexicon(Manager *mgr, const std::string &lexicon,
  383 + const std::string &tokens,
  384 + const std::string &dict_dir,
  385 + const OfflineTtsVitsModelMetaData &meta_data,
  386 + bool debug)
  387 + : impl_(std::make_unique<Impl>(mgr, lexicon, tokens, dict_dir, meta_data,
  388 + debug)) {}
  389 +
  390 +template <typename Manager>
  391 +MeloTtsLexicon::MeloTtsLexicon(Manager *mgr, const std::string &lexicon,
  392 + const std::string &tokens,
  393 + const OfflineTtsVitsModelMetaData &meta_data,
  394 + bool debug)
  395 + : impl_(std::make_unique<Impl>(mgr, lexicon, tokens, meta_data, debug)) {}
  396 +
310 std::vector<TokenIDs> MeloTtsLexicon::ConvertTextToTokenIds( 397 std::vector<TokenIDs> MeloTtsLexicon::ConvertTextToTokenIds(
311 const std::string &text, const std::string & /*unused_voice = ""*/) const { 398 const std::string &text, const std::string & /*unused_voice = ""*/) const {
312 return impl_->ConvertTextToTokenIds(text); 399 return impl_->ConvertTextToTokenIds(text);
313 } 400 }
314 401
  402 +#if __ANDROID_API__ >= 9
  403 +template MeloTtsLexicon::MeloTtsLexicon(
  404 + AAssetManager *mgr, const std::string &lexicon, const std::string &tokens,
  405 + const std::string &dict_dir, const OfflineTtsVitsModelMetaData &meta_data,
  406 + bool debug);
  407 +
  408 +template MeloTtsLexicon::MeloTtsLexicon(
  409 + AAssetManager *mgr, const std::string &lexicon, const std::string &tokens,
  410 + const OfflineTtsVitsModelMetaData &meta_data, bool debug);
  411 +#endif
  412 +
  413 +#if __OHOS__
  414 +template MeloTtsLexicon::MeloTtsLexicon(
  415 + NativeResourceManager *mgr, const std::string &lexicon,
  416 + const std::string &tokens, const std::string &dict_dir,
  417 + const OfflineTtsVitsModelMetaData &meta_data, bool debug);
  418 +
  419 +template MeloTtsLexicon::MeloTtsLexicon(
  420 + NativeResourceManager *mgr, const std::string &lexicon,
  421 + const std::string &tokens, const OfflineTtsVitsModelMetaData &meta_data,
  422 + bool debug);
  423 +#endif
  424 +
315 } // namespace sherpa_onnx 425 } // namespace sherpa_onnx
@@ -25,6 +25,16 @@ class MeloTtsLexicon : public OfflineTtsFrontend { @@ -25,6 +25,16 @@ class MeloTtsLexicon : public OfflineTtsFrontend {
25 MeloTtsLexicon(const std::string &lexicon, const std::string &tokens, 25 MeloTtsLexicon(const std::string &lexicon, const std::string &tokens,
26 const OfflineTtsVitsModelMetaData &meta_data, bool debug); 26 const OfflineTtsVitsModelMetaData &meta_data, bool debug);
27 27
  28 + template <typename Manager>
  29 + MeloTtsLexicon(Manager *mgr, const std::string &lexicon,
  30 + const std::string &tokens, const std::string &dict_dir,
  31 + const OfflineTtsVitsModelMetaData &meta_data, bool debug);
  32 +
  33 + template <typename Manager>
  34 + MeloTtsLexicon(Manager *mgr, const std::string &lexicon,
  35 + const std::string &tokens,
  36 + const OfflineTtsVitsModelMetaData &meta_data, bool debug);
  37 +
28 std::vector<TokenIDs> ConvertTextToTokenIds( 38 std::vector<TokenIDs> ConvertTextToTokenIds(
29 const std::string &text, 39 const std::string &text,
30 const std::string &unused_voice = "") const override; 40 const std::string &unused_voice = "") const override;
@@ -40,7 +40,11 @@ class OfflineTtsVitsImpl : public OfflineTtsImpl { @@ -40,7 +40,11 @@ class OfflineTtsVitsImpl : public OfflineTtsImpl {
40 tn_list_.reserve(files.size()); 40 tn_list_.reserve(files.size());
41 for (const auto &f : files) { 41 for (const auto &f : files) {
42 if (config.model.debug) { 42 if (config.model.debug) {
  43 +#if __OHOS__
  44 + SHERPA_ONNX_LOGE("rule fst: %{public}s", f.c_str());
  45 +#else
43 SHERPA_ONNX_LOGE("rule fst: %s", f.c_str()); 46 SHERPA_ONNX_LOGE("rule fst: %s", f.c_str());
  47 +#endif
44 } 48 }
45 tn_list_.push_back(std::make_unique<kaldifst::TextNormalizer>(f)); 49 tn_list_.push_back(std::make_unique<kaldifst::TextNormalizer>(f));
46 } 50 }
@@ -57,7 +61,11 @@ class OfflineTtsVitsImpl : public OfflineTtsImpl { @@ -57,7 +61,11 @@ class OfflineTtsVitsImpl : public OfflineTtsImpl {
57 61
58 for (const auto &f : files) { 62 for (const auto &f : files) {
59 if (config.model.debug) { 63 if (config.model.debug) {
  64 +#if __OHOS__
60 SHERPA_ONNX_LOGE("rule far: %s", f.c_str()); 65 SHERPA_ONNX_LOGE("rule far: %s", f.c_str());
  66 +#else
  67 + SHERPA_ONNX_LOGE("rule far: %{public}s", f.c_str());
  68 +#endif
61 } 69 }
62 std::unique_ptr<fst::FarReader<fst::StdArc>> reader( 70 std::unique_ptr<fst::FarReader<fst::StdArc>> reader(
63 fst::FarReader<fst::StdArc>::Open(f)); 71 fst::FarReader<fst::StdArc>::Open(f));
@@ -88,7 +96,11 @@ class OfflineTtsVitsImpl : public OfflineTtsImpl { @@ -88,7 +96,11 @@ class OfflineTtsVitsImpl : public OfflineTtsImpl {
88 tn_list_.reserve(files.size()); 96 tn_list_.reserve(files.size());
89 for (const auto &f : files) { 97 for (const auto &f : files) {
90 if (config.model.debug) { 98 if (config.model.debug) {
  99 +#if __OHOS__
  100 + SHERPA_ONNX_LOGE("rule fst: %{public}s", f.c_str());
  101 +#else
91 SHERPA_ONNX_LOGE("rule fst: %s", f.c_str()); 102 SHERPA_ONNX_LOGE("rule fst: %s", f.c_str());
  103 +#endif
92 } 104 }
93 auto buf = ReadFile(mgr, f); 105 auto buf = ReadFile(mgr, f);
94 std::istrstream is(buf.data(), buf.size()); 106 std::istrstream is(buf.data(), buf.size());
@@ -103,7 +115,11 @@ class OfflineTtsVitsImpl : public OfflineTtsImpl { @@ -103,7 +115,11 @@ class OfflineTtsVitsImpl : public OfflineTtsImpl {
103 115
104 for (const auto &f : files) { 116 for (const auto &f : files) {
105 if (config.model.debug) { 117 if (config.model.debug) {
  118 +#if __OHOS__
  119 + SHERPA_ONNX_LOGE("rule far: %{public}s", f.c_str());
  120 +#else
106 SHERPA_ONNX_LOGE("rule far: %s", f.c_str()); 121 SHERPA_ONNX_LOGE("rule far: %s", f.c_str());
  122 +#endif
107 } 123 }
108 124
109 auto buf = ReadFile(mgr, f); 125 auto buf = ReadFile(mgr, f);
@@ -156,14 +172,22 @@ class OfflineTtsVitsImpl : public OfflineTtsImpl { @@ -156,14 +172,22 @@ class OfflineTtsVitsImpl : public OfflineTtsImpl {
156 172
157 std::string text = _text; 173 std::string text = _text;
158 if (config_.model.debug) { 174 if (config_.model.debug) {
  175 +#if __OHOS__
  176 + SHERPA_ONNX_LOGE("Raw text: %{public}s", text.c_str());
  177 +#else
159 SHERPA_ONNX_LOGE("Raw text: %s", text.c_str()); 178 SHERPA_ONNX_LOGE("Raw text: %s", text.c_str());
  179 +#endif
160 } 180 }
161 181
162 if (!tn_list_.empty()) { 182 if (!tn_list_.empty()) {
163 for (const auto &tn : tn_list_) { 183 for (const auto &tn : tn_list_) {
164 text = tn->Normalize(text); 184 text = tn->Normalize(text);
165 if (config_.model.debug) { 185 if (config_.model.debug) {
  186 +#if __OHOS__
  187 + SHERPA_ONNX_LOGE("After normalizing: %{public}s", text.c_str());
  188 +#else
166 SHERPA_ONNX_LOGE("After normalizing: %s", text.c_str()); 189 SHERPA_ONNX_LOGE("After normalizing: %s", text.c_str());
  190 +#endif
167 } 191 }
168 } 192 }
169 } 193 }
@@ -226,10 +250,17 @@ class OfflineTtsVitsImpl : public OfflineTtsImpl { @@ -226,10 +250,17 @@ class OfflineTtsVitsImpl : public OfflineTtsImpl {
226 int32_t num_batches = x_size / batch_size; 250 int32_t num_batches = x_size / batch_size;
227 251
228 if (config_.model.debug) { 252 if (config_.model.debug) {
  253 +#if __OHOS__
  254 + SHERPA_ONNX_LOGE(
  255 + "Text is too long. Split it into %{public}d batches. batch size: "
  256 + "%{public}d. Number of sentences: %{public}d",
  257 + num_batches, batch_size, x_size);
  258 +#else
229 SHERPA_ONNX_LOGE( 259 SHERPA_ONNX_LOGE(
230 "Text is too long. Split it into %d batches. batch size: %d. Number " 260 "Text is too long. Split it into %d batches. batch size: %d. Number "
231 "of sentences: %d", 261 "of sentences: %d",
232 num_batches, batch_size, x_size); 262 num_batches, batch_size, x_size);
  263 +#endif
233 } 264 }
234 265
235 GeneratedAudio ans; 266 GeneratedAudio ans;
@@ -255,7 +286,7 @@ class OfflineTtsVitsImpl : public OfflineTtsImpl { @@ -255,7 +286,7 @@ class OfflineTtsVitsImpl : public OfflineTtsImpl {
255 audio.samples.end()); 286 audio.samples.end());
256 if (callback) { 287 if (callback) {
257 should_continue = callback(audio.samples.data(), audio.samples.size(), 288 should_continue = callback(audio.samples.data(), audio.samples.size(),
258 - b * 1.0 / num_batches); 289 + (b + 1) * 1.0 / num_batches);
259 // Caution(fangjun): audio is freed when the callback returns, so users 290 // Caution(fangjun): audio is freed when the callback returns, so users
260 // should copy the data if they want to access the data after 291 // should copy the data if they want to access the data after
261 // the callback returns to avoid segmentation fault. 292 // the callback returns to avoid segmentation fault.
@@ -297,6 +328,16 @@ class OfflineTtsVitsImpl : public OfflineTtsImpl { @@ -297,6 +328,16 @@ class OfflineTtsVitsImpl : public OfflineTtsImpl {
297 if (meta_data.frontend == "characters") { 328 if (meta_data.frontend == "characters") {
298 frontend_ = std::make_unique<OfflineTtsCharacterFrontend>( 329 frontend_ = std::make_unique<OfflineTtsCharacterFrontend>(
299 mgr, config_.model.vits.tokens, meta_data); 330 mgr, config_.model.vits.tokens, meta_data);
  331 + } else if (meta_data.jieba && !config_.model.vits.dict_dir.empty() &&
  332 + meta_data.is_melo_tts) {
  333 + frontend_ = std::make_unique<MeloTtsLexicon>(
  334 + mgr, config_.model.vits.lexicon, config_.model.vits.tokens,
  335 + config_.model.vits.dict_dir, model_->GetMetaData(),
  336 + config_.model.debug);
  337 + } else if (meta_data.is_melo_tts && meta_data.language == "English") {
  338 + frontend_ = std::make_unique<MeloTtsLexicon>(
  339 + mgr, config_.model.vits.lexicon, config_.model.vits.tokens,
  340 + model_->GetMetaData(), config_.model.debug);
300 } else if ((meta_data.is_piper || meta_data.is_coqui || 341 } else if ((meta_data.is_piper || meta_data.is_coqui ||
301 meta_data.is_icefall) && 342 meta_data.is_icefall) &&
302 !config_.model.vits.data_dir.empty()) { 343 !config_.model.vits.data_dir.empty()) {
@@ -144,7 +144,11 @@ class OfflineTtsVitsModel::Impl { @@ -144,7 +144,11 @@ class OfflineTtsVitsModel::Impl {
144 ++i; 144 ++i;
145 } 145 }
146 146
  147 +#if __OHOS__
  148 + SHERPA_ONNX_LOGE("%{public}s\n", os.str().c_str());
  149 +#else
147 SHERPA_ONNX_LOGE("%s\n", os.str().c_str()); 150 SHERPA_ONNX_LOGE("%s\n", os.str().c_str());
  151 +#endif
148 } 152 }
149 153
150 Ort::AllocatorWithDefaultOptions allocator; // used in the macro below 154 Ort::AllocatorWithDefaultOptions allocator; // used in the macro below