Fangjun Kuang
Committed by GitHub

Add on-device tex-to-speech (TTS) demo for HarmonyOS (#1590)

正在显示 61 个修改的文件 包含 1901 行增加88 行删除
!build-profile.json5
*.har
... ...
export { readWave, readWaveFromBinary } from "libsherpa_onnx.so";
export {
listRawfileDir,
readWave,
readWaveFromBinary,
} from "libsherpa_onnx.so";
export {
CircularBuffer,
... ...
... ... @@ -4,7 +4,7 @@
"externalNativeOptions": {
"path": "./src/main/cpp/CMakeLists.txt",
"arguments": "",
"cppFlags": "",
"cppFlags": "-std=c++17",
"abiFilters": [
"arm64-v8a",
"x86_64",
... ...
... ... @@ -2,6 +2,10 @@
cmake_minimum_required(VERSION 3.13.0)
project(myNpmLib)
if (NOT CMAKE_CXX_STANDARD)
set(CMAKE_CXX_STANDARD 17 CACHE STRING "The C++ version to use")
endif()
# Disable warning about
#
# "The DOWNLOAD_EXTRACT_TIMESTAMP option was not given and policy CMP0135 is
... ... @@ -46,6 +50,7 @@ add_library(sherpa_onnx SHARED
speaker-identification.cc
spoken-language-identification.cc
streaming-asr.cc
utils.cc
vad.cc
wave-reader.cc
wave-writer.cc
... ...
... ... @@ -213,12 +213,13 @@ static Napi::Number OfflineTtsNumSpeakersWrapper(
return Napi::Number::New(env, num_speakers);
}
// synchronous version
static Napi::Object OfflineTtsGenerateWrapper(const Napi::CallbackInfo &info) {
Napi::Env env = info.Env();
if (info.Length() != 2) {
std::ostringstream os;
os << "Expect only 1 argument. Given: " << info.Length();
os << "Expect only 2 arguments. Given: " << info.Length();
Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();
... ... @@ -298,8 +299,8 @@ static Napi::Object OfflineTtsGenerateWrapper(const Napi::CallbackInfo &info) {
int32_t sid = obj.Get("sid").As<Napi::Number>().Int32Value();
float speed = obj.Get("speed").As<Napi::Number>().FloatValue();
const SherpaOnnxGeneratedAudio *audio =
SherpaOnnxOfflineTtsGenerate(tts, text.c_str(), sid, speed);
const SherpaOnnxGeneratedAudio *audio;
audio = SherpaOnnxOfflineTtsGenerate(tts, text.c_str(), sid, speed);
if (enable_external_buffer) {
Napi::ArrayBuffer arrayBuffer = Napi::ArrayBuffer::New(
... ... @@ -334,6 +335,256 @@ static Napi::Object OfflineTtsGenerateWrapper(const Napi::CallbackInfo &info) {
}
}
struct TtsCallbackData {
std::vector<float> samples;
float progress;
bool processed = false;
bool cancelled = false;
};
// see
// https://github.com/nodejs/node-addon-examples/blob/main/src/6-threadsafe-function/typed_threadsafe_function/node-addon-api/clock.cc
void InvokeJsCallback(Napi::Env env, Napi::Function callback,
Napi::Reference<Napi::Value> *context,
TtsCallbackData *data) {
if (env != nullptr) {
if (callback != nullptr) {
Napi::ArrayBuffer arrayBuffer =
Napi::ArrayBuffer::New(env, sizeof(float) * data->samples.size());
Napi::Float32Array float32Array =
Napi::Float32Array::New(env, data->samples.size(), arrayBuffer, 0);
std::copy(data->samples.begin(), data->samples.end(),
float32Array.Data());
Napi::Object arg = Napi::Object::New(env);
arg.Set(Napi::String::New(env, "samples"), float32Array);
arg.Set(Napi::String::New(env, "progress"), data->progress);
auto v = callback.Call(context->Value(), {arg});
data->processed = true;
if (v.IsNumber() && v.As<Napi::Number>().Int32Value()) {
data->cancelled = false;
} else {
data->cancelled = true;
}
}
}
}
using TSFN = Napi::TypedThreadSafeFunction<Napi::Reference<Napi::Value>,
TtsCallbackData, InvokeJsCallback>;
class TtsGenerateWorker : public Napi::AsyncWorker {
public:
TtsGenerateWorker(const Napi::Env &env, TSFN tsfn, SherpaOnnxOfflineTts *tts,
const std::string &text, float speed, int32_t sid,
bool use_external_buffer)
: tsfn_(tsfn),
Napi::AsyncWorker{env, "TtsGenerateWorker"},
deferred_(env),
tts_(tts),
text_(text),
speed_(speed),
sid_(sid),
use_external_buffer_(use_external_buffer) {}
Napi::Promise Promise() { return deferred_.Promise(); }
~TtsGenerateWorker() {
for (auto d : data_list_) {
delete d;
}
}
protected:
void Execute() override {
auto callback = [](const float *samples, int32_t n, float progress,
void *arg) -> int32_t {
TtsGenerateWorker *_this = reinterpret_cast<TtsGenerateWorker *>(arg);
for (auto d : _this->data_list_) {
if (d->cancelled) {
OH_LOG_INFO(LOG_APP, "TtsGenerate is cancelled");
return 0;
}
}
auto data = new TtsCallbackData;
data->samples = std::vector<float>{samples, samples + n};
data->progress = progress;
_this->data_list_.push_back(data);
_this->tsfn_.NonBlockingCall(data);
return 1;
};
audio_ = SherpaOnnxOfflineTtsGenerateWithProgressCallbackWithArg(
tts_, text_.c_str(), sid_, speed_, callback, this);
tsfn_.Release();
}
void OnOK() override {
Napi::Env env = deferred_.Env();
Napi::Object ans = Napi::Object::New(env);
if (use_external_buffer_) {
Napi::ArrayBuffer arrayBuffer = Napi::ArrayBuffer::New(
env, const_cast<float *>(audio_->samples), sizeof(float) * audio_->n,
[](Napi::Env /*env*/, void * /*data*/,
const SherpaOnnxGeneratedAudio *hint) {
SherpaOnnxDestroyOfflineTtsGeneratedAudio(hint);
},
audio_);
Napi::Float32Array float32Array =
Napi::Float32Array::New(env, audio_->n, arrayBuffer, 0);
ans.Set(Napi::String::New(env, "samples"), float32Array);
ans.Set(Napi::String::New(env, "sampleRate"), audio_->sample_rate);
} else {
// don't use external buffer
Napi::ArrayBuffer arrayBuffer =
Napi::ArrayBuffer::New(env, sizeof(float) * audio_->n);
Napi::Float32Array float32Array =
Napi::Float32Array::New(env, audio_->n, arrayBuffer, 0);
std::copy(audio_->samples, audio_->samples + audio_->n,
float32Array.Data());
ans.Set(Napi::String::New(env, "samples"), float32Array);
ans.Set(Napi::String::New(env, "sampleRate"), audio_->sample_rate);
SherpaOnnxDestroyOfflineTtsGeneratedAudio(audio_);
}
deferred_.Resolve(ans);
}
private:
TSFN tsfn_;
Napi::Promise::Deferred deferred_;
SherpaOnnxOfflineTts *tts_;
std::string text_;
float speed_;
int32_t sid_;
bool use_external_buffer_;
const SherpaOnnxGeneratedAudio *audio_;
std::vector<TtsCallbackData *> data_list_;
};
static Napi::Object OfflineTtsGenerateAsyncWrapper(
const Napi::CallbackInfo &info) {
Napi::Env env = info.Env();
if (info.Length() != 2) {
std::ostringstream os;
os << "Expect only 2 arguments. Given: " << info.Length();
Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();
return {};
}
if (!info[0].IsExternal()) {
Napi::TypeError::New(env, "Argument 0 should be an offline tts pointer.")
.ThrowAsJavaScriptException();
return {};
}
SherpaOnnxOfflineTts *tts =
info[0].As<Napi::External<SherpaOnnxOfflineTts>>().Data();
if (!info[1].IsObject()) {
Napi::TypeError::New(env, "Argument 1 should be an object")
.ThrowAsJavaScriptException();
return {};
}
Napi::Object obj = info[1].As<Napi::Object>();
if (!obj.Has("text")) {
Napi::TypeError::New(env, "The argument object should have a field text")
.ThrowAsJavaScriptException();
return {};
}
if (!obj.Get("text").IsString()) {
Napi::TypeError::New(env, "The object['text'] should be a string")
.ThrowAsJavaScriptException();
return {};
}
if (!obj.Has("sid")) {
Napi::TypeError::New(env, "The argument object should have a field sid")
.ThrowAsJavaScriptException();
return {};
}
if (!obj.Get("sid").IsNumber()) {
Napi::TypeError::New(env, "The object['sid'] should be a number")
.ThrowAsJavaScriptException();
return {};
}
if (!obj.Has("speed")) {
Napi::TypeError::New(env, "The argument object should have a field speed")
.ThrowAsJavaScriptException();
return {};
}
if (!obj.Get("speed").IsNumber()) {
Napi::TypeError::New(env, "The object['speed'] should be a number")
.ThrowAsJavaScriptException();
return {};
}
bool enable_external_buffer = true;
if (obj.Has("enableExternalBuffer") &&
obj.Get("enableExternalBuffer").IsBoolean()) {
enable_external_buffer =
obj.Get("enableExternalBuffer").As<Napi::Boolean>().Value();
}
Napi::String _text = obj.Get("text").As<Napi::String>();
std::string text = _text.Utf8Value();
int32_t sid = obj.Get("sid").As<Napi::Number>().Int32Value();
float speed = obj.Get("speed").As<Napi::Number>().FloatValue();
Napi::Function cb;
if (obj.Has("callback") && obj.Get("callback").IsFunction()) {
cb = obj.Get("callback").As<Napi::Function>();
}
auto context =
new Napi::Reference<Napi::Value>(Napi::Persistent(info.This()));
TSFN tsfn = TSFN::New(
env,
cb, // JavaScript function called asynchronously
"TtsGenerateFunc", // Name
0, // Unlimited queue
1, // Only one thread will use this initially
context,
[](Napi::Env, void *, Napi::Reference<Napi::Value> *ctx) { delete ctx; });
const SherpaOnnxGeneratedAudio *audio;
TtsGenerateWorker *worker = new TtsGenerateWorker(
env, tsfn, tts, text, speed, sid, enable_external_buffer);
worker->Queue();
return worker->Promise();
}
void InitNonStreamingTts(Napi::Env env, Napi::Object exports) {
exports.Set(Napi::String::New(env, "createOfflineTts"),
Napi::Function::New(env, CreateOfflineTtsWrapper));
... ... @@ -346,4 +597,7 @@ void InitNonStreamingTts(Napi::Env env, Napi::Object exports) {
exports.Set(Napi::String::New(env, "offlineTtsGenerate"),
Napi::Function::New(env, OfflineTtsGenerateWrapper));
exports.Set(Napi::String::New(env, "offlineTtsGenerateAsync"),
Napi::Function::New(env, OfflineTtsGenerateAsyncWrapper));
}
... ...
... ... @@ -27,6 +27,10 @@ void InitKeywordSpotting(Napi::Env env, Napi::Object exports);
void InitNonStreamingSpeakerDiarization(Napi::Env env, Napi::Object exports);
#if __OHOS__
void InitUtils(Napi::Env env, Napi::Object exports);
#endif
Napi::Object Init(Napi::Env env, Napi::Object exports) {
InitStreamingAsr(env, exports);
InitNonStreamingAsr(env, exports);
... ... @@ -41,7 +45,15 @@ Napi::Object Init(Napi::Env env, Napi::Object exports) {
InitKeywordSpotting(env, exports);
InitNonStreamingSpeakerDiarization(env, exports);
#if __OHOS__
InitUtils(env, exports);
#endif
return exports;
}
#if __OHOS__
NODE_API_MODULE(sherpa_onnx, Init)
#else
NODE_API_MODULE(addon, Init)
#endif
... ...
export const listRawfileDir: (mgr: object, dir: string) => Array<string>;
export const readWave: (filename: string, enableExternalBuffer: boolean = true) => {samples: Float32Array, sampleRate: number};
export const readWaveFromBinary: (data: Uint8Array, enableExternalBuffer: boolean = true) => {samples: Float32Array, sampleRate: number};
export const createCircularBuffer: (capacity: number) => object;
... ... @@ -37,4 +39,11 @@ export const getOnlineStreamResultAsJson: (handle: object, streamHandle: object)
export const createOfflineTts: (config: object, mgr?: object) => object;
export const getOfflineTtsNumSpeakers: (handle: object) => number;
export const getOfflineTtsSampleRate: (handle: object) => number;
export const offlineTtsGenerate: (handle: object, input: object) => object;
export type TtsOutput = {
samples: Float32Array;
sampleRate: number;
};
export const offlineTtsGenerate: (handle: object, input: object) => TtsOutput;
export const offlineTtsGenerateAsync: (handle: object, input: object) => Promise<TtsOutput>;
... ...
// Copyright (c) 2024 Xiaomi Corporation
#include <memory>
#include <sstream>
#include <string>
#include <vector>
#include "macros.h" // NOLINT
#include "napi.h" // NOLINT
static std::vector<std::string> GetFilenames(NativeResourceManager *mgr,
const std::string &d) {
std::unique_ptr<RawDir, decltype(&OH_ResourceManager_CloseRawDir)> raw_dir(
OH_ResourceManager_OpenRawDir(mgr, d.c_str()),
&OH_ResourceManager_CloseRawDir);
int count = OH_ResourceManager_GetRawFileCount(raw_dir.get());
std::vector<std::string> ans;
ans.reserve(count);
for (int32_t i = 0; i < count; ++i) {
std::string filename = OH_ResourceManager_GetRawFileName(raw_dir.get(), i);
bool is_dir = OH_ResourceManager_IsRawDir(
mgr, d.empty() ? filename.c_str() : (d + "/" + filename).c_str());
if (is_dir) {
auto files = GetFilenames(mgr, d.empty() ? filename : d + "/" + filename);
for (auto &f : files) {
ans.push_back(std::move(f));
}
} else {
if (d.empty()) {
ans.push_back(std::move(filename));
} else {
ans.push_back(d + "/" + filename);
}
}
}
return ans;
}
static Napi::Array ListRawFileDir(const Napi::CallbackInfo &info) {
Napi::Env env = info.Env();
if (info.Length() != 2) {
std::ostringstream os;
os << "Expect only 2 arguments. Given: " << info.Length();
Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();
return {};
}
std::unique_ptr<NativeResourceManager,
decltype(&OH_ResourceManager_ReleaseNativeResourceManager)>
mgr(OH_ResourceManager_InitNativeResourceManager(env, info[0]),
&OH_ResourceManager_ReleaseNativeResourceManager);
if (!info[1].IsString()) {
Napi::TypeError::New(env, "Argument 1 should be a string")
.ThrowAsJavaScriptException();
return {};
}
std::string dir = info[1].As<Napi::String>().Utf8Value();
auto files = GetFilenames(mgr.get(), dir);
Napi::Array ans = Napi::Array::New(env, files.size());
for (int32_t i = 0; i != files.size(); ++i) {
ans[i] = Napi::String::New(env, files[i]);
}
return ans;
}
void InitUtils(Napi::Env env, Napi::Object exports) {
exports.Set(Napi::String::New(env, "listRawfileDir"),
Napi::Function::New(env, ListRawFileDir));
}
... ...
... ... @@ -3,6 +3,7 @@ import {
getOfflineTtsNumSpeakers,
getOfflineTtsSampleRate,
offlineTtsGenerate,
offlineTtsGenerateAsync,
} from "libsherpa_onnx.so";
export class OfflineTtsVitsModelConfig {
... ... @@ -16,14 +17,14 @@ export class OfflineTtsVitsModelConfig {
public lengthScale: number = 1.0;
}
export class OfflineTtsModelConfig{
export class OfflineTtsModelConfig {
public vits: OfflineTtsVitsModelConfig = new OfflineTtsVitsModelConfig();
public numThreads: number = 1;
public debug: boolean = false;
public provider: string = 'cpu';
}
export class OfflineTtsConfig{
export class OfflineTtsConfig {
public model: OfflineTtsModelConfig = new OfflineTtsModelConfig();
public ruleFsts: string = '';
public ruleFars: string = '';
... ... @@ -35,17 +36,24 @@ export class TtsOutput {
public sampleRate: number = 0;
}
interface TtsCallbackData {
samples: Float32Array;
progress: number;
}
export class TtsInput {
public text: string = '';
public sid: number = 0;
public speed: number = 1.0;
public callback?: (data: TtsCallbackData) => number;
}
export class OfflineTts {
private handle: object;
public config: OfflineTtsConfig;
public numSpeakers: number;
public sampleRate: number;
private handle: object;
constructor(config: OfflineTtsConfig, mgr?: object) {
this.handle = createOfflineTts(config, mgr);
this.config = config;
... ... @@ -63,4 +71,8 @@ export class OfflineTts {
generate(input: TtsInput): TtsOutput {
return offlineTtsGenerate(this.handle, input) as TtsOutput;
}
generateAsync(input: TtsInput): Promise<TtsOutput> {
return offlineTtsGenerateAsync(this.handle, input);
}
}
\ No newline at end of file
... ...
... ... @@ -57,7 +57,6 @@ export class CircularBuffer {
// samples is a float32 array
push(samples: Float32Array) {
console.log(`here samples: ${samples}`);
circularBufferPush(this.handle, samples);
}
... ...
/node_modules
/oh_modules
/local.properties
/.idea
**/build
/.hvigor
.cxx
/.clangd
/.clang-format
/.clang-tidy
**/.test
/.appanalyzer
\ No newline at end of file
... ...
{
"app": {
"bundleName": "com.k2fsa.sherpa.onnx.tts",
"vendor": "next-gen Kaldi",
"versionCode": 1000000,
"versionName": "1.0.0",
"icon": "$media:app_icon",
"label": "$string:app_name"
}
}
... ...
{
"string": [
{
"name": "app_name",
"value": "SherpaOnnxTts"
}
]
}
... ...
{
"app": {
"signingConfigs": [],
"products": [
{
"name": "default",
"signingConfig": "default",
"compatibleSdkVersion": "4.0.0(10)",
"runtimeOS": "HarmonyOS",
"buildOption": {
"strictMode": {
"caseSensitiveCheck": true,
}
}
}
],
"buildModeSet": [
{
"name": "debug",
},
{
"name": "release"
}
]
},
"modules": [
{
"name": "entry",
"srcPath": "./entry",
"targets": [
{
"name": "default",
"applyToProducts": [
"default"
]
}
]
}
]
}
\ No newline at end of file
... ...
{
"files": [
"**/*.ets"
],
"ignore": [
"**/src/ohosTest/**/*",
"**/src/test/**/*",
"**/src/mock/**/*",
"**/node_modules/**/*",
"**/oh_modules/**/*",
"**/build/**/*",
"**/.preview/**/*"
],
"ruleSet": [
"plugin:@performance/recommended",
"plugin:@typescript-eslint/recommended"
],
"rules": {
}
}
\ No newline at end of file
... ...
/node_modules
/oh_modules
/.preview
/build
/.cxx
/.test
\ No newline at end of file
... ...
{
"apiType": "stageMode",
"buildOption": {
"sourceOption": {
"workers": [
"./src/main/ets/workers/NonStreamingTtsWorker.ets"
]
}
},
"buildOptionSet": [
{
"name": "release",
"arkOptions": {
"obfuscation": {
"ruleOptions": {
"enable": false,
"files": [
"./obfuscation-rules.txt"
]
}
}
}
},
],
"targets": [
{
"name": "default"
},
{
"name": "ohosTest",
}
]
}
\ No newline at end of file
... ...
import { hapTasks } from '@ohos/hvigor-ohos-plugin';
export default {
system: hapTasks, /* Built-in plugin of Hvigor. It cannot be modified. */
plugins:[] /* Custom plugin to extend the functionality of Hvigor. */
}
... ...
# Define project specific obfuscation rules here.
# You can include the obfuscation configuration files in the current module's build-profile.json5.
#
# For more details, see
# https://developer.huawei.com/consumer/cn/doc/harmonyos-guides-V5/source-obfuscation-V5
# Obfuscation options:
# -disable-obfuscation: disable all obfuscations
# -enable-property-obfuscation: obfuscate the property names
# -enable-toplevel-obfuscation: obfuscate the names in the global scope
# -compact: remove unnecessary blank spaces and all line feeds
# -remove-log: remove all console.* statements
# -print-namecache: print the name cache that contains the mapping from the old names to new names
# -apply-namecache: reuse the given cache file
# Keep options:
# -keep-property-name: specifies property names that you want to keep
# -keep-global-name: specifies names that you want to keep in the global scope
-enable-property-obfuscation
-enable-toplevel-obfuscation
-enable-filename-obfuscation
-enable-export-obfuscation
\ No newline at end of file
... ...
{
"meta": {
"stableOrder": true
},
"lockfileVersion": 3,
"ATTENTION": "THIS IS AN AUTOGENERATED FILE. DO NOT EDIT THIS FILE DIRECTLY.",
"specifiers": {
"libsherpa_onnx.so@../oh_modules/.ohpm/sherpa_onnx@1.10.32/oh_modules/sherpa_onnx/src/main/cpp/types/libsherpa_onnx": "libsherpa_onnx.so@../oh_modules/.ohpm/sherpa_onnx@1.10.32/oh_modules/sherpa_onnx/src/main/cpp/types/libsherpa_onnx",
"sherpa_onnx@1.10.32": "sherpa_onnx@1.10.32"
},
"packages": {
"libsherpa_onnx.so@../oh_modules/.ohpm/sherpa_onnx@1.10.32/oh_modules/sherpa_onnx/src/main/cpp/types/libsherpa_onnx": {
"name": "libsherpa_onnx.so",
"version": "1.0.0",
"resolved": "../oh_modules/.ohpm/sherpa_onnx@1.10.32/oh_modules/sherpa_onnx/src/main/cpp/types/libsherpa_onnx",
"registryType": "local"
},
"sherpa_onnx@1.10.32": {
"name": "sherpa_onnx",
"version": "1.10.32",
"integrity": "sha512-yHYmWoeqhrunOqGr9gxPJJH/8+rdwcKFOW6onYByVObQVpbqypslg301IjGm9xpnc5bJEkO3S9sra2zQTpPA/w==",
"resolved": "https://ohpm.openharmony.cn/ohpm/sherpa_onnx/-/sherpa_onnx-1.10.32.har",
"registryType": "ohpm",
"dependencies": {
"libsherpa_onnx.so": "file:./src/main/cpp/types/libsherpa_onnx"
}
}
}
}
\ No newline at end of file
... ...
{
"name": "entry",
"version": "1.0.0",
"description": "Please describe the basic information.",
"main": "",
"author": "",
"license": "",
"dependencies": {
"sherpa_onnx": "1.10.32",
}
}
... ...
import AbilityConstant from '@ohos.app.ability.AbilityConstant';
import hilog from '@ohos.hilog';
import UIAbility from '@ohos.app.ability.UIAbility';
import Want from '@ohos.app.ability.Want';
import window from '@ohos.window';
export default class EntryAbility extends UIAbility {
onCreate(want: Want, launchParam: AbilityConstant.LaunchParam): void {
hilog.info(0x0000, 'testTag', '%{public}s', 'Ability onCreate');
}
onDestroy(): void {
hilog.info(0x0000, 'testTag', '%{public}s', 'Ability onDestroy');
}
onWindowStageCreate(windowStage: window.WindowStage): void {
// Main window is created, set main page for this ability
hilog.info(0x0000, 'testTag', '%{public}s', 'Ability onWindowStageCreate');
windowStage.loadContent('pages/Index', (err) => {
if (err.code) {
hilog.error(0x0000, 'testTag', 'Failed to load the content. Cause: %{public}s', JSON.stringify(err) ?? '');
return;
}
hilog.info(0x0000, 'testTag', 'Succeeded in loading the content.');
});
}
onWindowStageDestroy(): void {
// Main window is destroyed, release UI related resources
hilog.info(0x0000, 'testTag', '%{public}s', 'Ability onWindowStageDestroy');
}
onForeground(): void {
// Ability has brought to foreground
hilog.info(0x0000, 'testTag', '%{public}s', 'Ability onForeground');
}
onBackground(): void {
// Ability has back to background
hilog.info(0x0000, 'testTag', '%{public}s', 'Ability onBackground');
}
}
... ...
import hilog from '@ohos.hilog';
import BackupExtensionAbility, { BundleVersion } from '@ohos.application.BackupExtensionAbility';
export default class EntryBackupAbility extends BackupExtensionAbility {
async onBackup() {
hilog.info(0x0000, 'testTag', 'onBackup ok');
}
async onRestore(bundleVersion: BundleVersion) {
hilog.info(0x0000, 'testTag', 'onRestore ok %{public}s', JSON.stringify(bundleVersion));
}
}
\ No newline at end of file
... ...
import { CircularBuffer } from 'sherpa_onnx';
import worker, { MessageEvents } from '@ohos.worker';
import { audio } from '@kit.AudioKit';
import picker from '@ohos.file.picker';
import fs from '@ohos.file.fs';
import systemTime from '@ohos.systemTime';
function savePcmToWav(filename: string, samples: Int16Array, sampleRate: number) {
const fp = fs.openSync(filename, fs.OpenMode.READ_WRITE | fs.OpenMode.CREATE);
const header = new ArrayBuffer(44);
const view = new DataView(header);
// http://soundfile.sapp.org/doc/WaveFormat/
// F F I R
view.setUint32(0, 0x46464952, true); // chunkID
view.setUint32(4, 36 + samples.length * 2, true); // chunkSize // E V A W
view.setUint32(8, 0x45564157, true); // format // // t m f
view.setUint32(12, 0x20746d66, true); // subchunk1ID
view.setUint32(16, 16, true); // subchunk1Size, 16 for PCM
view.setUint32(20, 1, true); // audioFormat, 1 for PCM
view.setUint16(22, 1, true); // numChannels: 1 channel
view.setUint32(24, sampleRate, true); // sampleRate
view.setUint32(28, sampleRate * 2, true); // byteRate
view.setUint16(32, 2, true); // blockAlign
view.setUint16(34, 16, true); // bitsPerSample
view.setUint32(36, 0x61746164, true); // Subchunk2ID
view.setUint32(40, samples.length * 2, true); // subchunk2Size
fs.writeSync(fp.fd, new Uint8Array(header).buffer, { length: header.byteLength });
fs.writeSync(fp.fd, samples.buffer, { length: samples.buffer.byteLength });
fs.closeSync(fp.fd);
}
function toInt16Samples(samples: Float32Array): Int16Array {
const int16Samples = new Int16Array(samples.length);
for (let i = 0; i < samples.length; ++i) {
let s = samples[i] * 32767;
s = s > 32767 ? 32767 : s;
s = s < -32768 ? -32768 : s;
int16Samples[i] = s;
}
return int16Samples;
}
@Entry
@Component
struct Index {
@State currentIndex: number = 0;
@State title: string = 'Next-gen Kaldi: Text-to-speech';
@State info: string = '';
@State btnStartCaption: string = 'Start';
@State btnStartEnabled: boolean = false;
@State btnStopCaption: string = 'Stop';
@State btnStopEnabled: boolean = false;
@State btnSaveCaption: string = 'Save';
@State btnSaveEnabled: boolean = false;
@State progress: number = 0;
@State sid: string = '0';
@State speechSpeed: string = '1.0';
@State isGenerating: boolean = false;
@State initTtsDone: boolean = false;
@State ttsGeneratedDone: boolean = true;
@State numSpeakers: number = 1;
@State initAudioDone: boolean = false;
private controller: TabsController = new TabsController();
private cancelled: boolean = false;
private sampleRate: number = 0;
private startTime: number = 0;
private stopTime: number = 0;
private inputText: string = '';
// it specifies only the initial capacity.
private workerInstance?: worker.ThreadWorker
private readonly scriptURL: string = 'entry/ets/workers/NonStreamingTtsWorker.ets'
// note that circular buffer can automatically resize.
private sampleBuffer: CircularBuffer = new CircularBuffer(16000 * 5);
private finalSamples: Float32Array | null = null;
private audioRenderer: audio.AudioRenderer | null = null;
initAudioRenderer() {
if (this.audioRenderer) {
console.log(`Audio renderer has already been created. Skip creating`);
return;
} // see // https://developer.huawei.com/consumer/cn/doc/harmonyos-guides-V5/using-audiorenderer-for-playback-V5
console.log('Initializing audio renderer');
const audioStreamInfo: audio.AudioStreamInfo = {
samplingRate: this.sampleRate,
channels: audio.AudioChannel.CHANNEL_1, // 通道
sampleFormat: audio.AudioSampleFormat.SAMPLE_FORMAT_S16LE,
encodingType: audio.AudioEncodingType.ENCODING_TYPE_RAW
};
const audioRendererInfo: audio.AudioRendererInfo = {
usage: audio.StreamUsage.STREAM_USAGE_MUSIC, rendererFlags: 0
};
const audioRendererOptions: audio.AudioRendererOptions = {
streamInfo: audioStreamInfo, rendererInfo: audioRendererInfo
};
audio.createAudioRenderer(audioRendererOptions, (err, renderer) => {
if (!err) {
console.log('audio renderer initialized successfully');
this.initAudioDone = true;
if (renderer) {
this.audioRenderer = renderer;
this.audioRenderer.on("writeData", this.audioPlayCallback);
if (this.sampleBuffer.size()) {
this.audioRenderer.start();
}
} else {
console.log(`returned audio renderer is ${renderer}`);
}
} else {
console.log(`Failed to initialize audio renderer. error message: ${err.message}, error code: ${err.code}`);
}
});
}
async aboutToAppear() {
this.initAudioRenderer();
this.workerInstance = new worker.ThreadWorker(this.scriptURL, {
name: 'NonStreaming TTS worker'
});
this.workerInstance.onmessage = (e: MessageEvents) => {
const msgType = e.data['msgType'] as string;
console.log(`received msg from worker: ${msgType}`);
if (msgType == 'init-tts-done') {
this.info = 'Model initialized!\nPlease enter text and press start.';
this.sampleRate = e.data['sampleRate'] as number;
this.numSpeakers = e.data['numSpeakers'] as number;
this.initTtsDone = true;
}
if (msgType == 'tts-generate-partial') {
if (this.cancelled) {
return;
}
const samples: Float32Array = e.data['samples'] as Float32Array;
const progress: number = e.data['progress'] as number;
this.progress = progress;
this.sampleBuffer.push(samples);
if (!this.initAudioDone) {
this.initAudioRenderer();
}
if (this.audioRenderer && this.audioRenderer?.state != audio.AudioState.STATE_RUNNING) {
this.audioRenderer.start();
}
}
if (msgType == 'tts-generate-done') {
this.isGenerating = false;
const samples: Float32Array = e.data['samples'] as Float32Array;
systemTime.getRealTime((err, data) => {
if (err) {
console.log(`Failed to get stop time`)
} else {
this.stopTime = data;
const audioDuration = samples.length / this.sampleRate;
const elapsedSeconds = (this.stopTime - this.startTime) / 1000;
const RTF = elapsedSeconds / audioDuration;
this.info = `Audio duration: ${audioDuration} s
Elapsed: ${elapsedSeconds} s
RTF = ${elapsedSeconds.toFixed(2)}/${audioDuration.toFixed(2)} = ${RTF.toFixed(3)}
`;
if (this.cancelled) {
this.info += '\nCancelled.';
}
}
});
this.finalSamples = samples;
this.ttsGeneratedDone = true;
this.btnSaveEnabled = true;
this.ttsGeneratedDone = true;
if (this.audioRenderer && this.audioRenderer?.state != audio.AudioState.STATE_RUNNING &&
this.sampleBuffer.size() == 0) {
this.sampleBuffer.push(samples);
this.progress = 1;
this.audioRenderer.start();
}
if (!this.initAudioDone) {
this.btnStartEnabled = true;
this.btnStopEnabled = false;
this.info += '\nAudio renderer is not initialized. Disable playing audio.';
}
}
}
this.info = 'Initializing TTS model ...';
this.workerInstance.postMessage({ msgType: 'init-tts', context: getContext() });
}
@Builder
TabBuilder(title: string, targetIndex: number, selectedImg: Resource, normalImg: Resource) {
Column() {
Image(this.currentIndex == targetIndex ? selectedImg : normalImg).size({ width: 25, height: 25 })
Text(title).fontColor(this.currentIndex == targetIndex ? '#28bff1' : '#8a8a8a')
}.width('100%').height(50).justifyContent(FlexAlign.Center).onClick(() => {
this.currentIndex = targetIndex;
this.controller.changeIndex(this.currentIndex);
})
}
build() {
Column() {
Tabs({ barPosition: BarPosition.End, controller: this.controller }) {
TabContent() {
Column({ space: 10 }) {
Text(this.title).fontSize(20).fontWeight(FontWeight.Bold);
if (this.numSpeakers > 1) {
Row({ space: 10 }) {
Text(`Speaker ID (0-${this.numSpeakers - 1})`).width('60%')
TextInput({ text: this.sid }).onChange((text) => {
this.sid = text.trim();
}).width('20%')
}.justifyContent(FlexAlign.Center)
}
Row() {
Text('Speech speed').width('60%');
TextInput({ text: this.speechSpeed }).onChange((text) => {
this.speechSpeed = text.trim();
}).width('20%')
}
Row({ space: 10 }) {
Button(this.btnStartCaption).enabled(this.btnStartEnabled).onClick(async () => {
let sid = parseInt(this.sid);
if (sid.toString() != this.sid) {
this.info = 'Please input a valid speaker ID';
return;
}
let speed = parseFloat(this.speechSpeed);
if (isNaN(speed)) {
this.info = 'Please enter a valid speech speed';
return;
}
if (speed <= 0) {
this.info = 'Please enter a positive speech speed';
return;
}
if (this.workerInstance && this.initTtsDone) {
this.info = 'Generating...';
this.cancelled = false;
this.finalSamples = null;
this.sampleBuffer.reset();
this.ttsGeneratedDone = false;
this.progress = 0;
this.btnStartEnabled = false;
this.btnStopEnabled = true;
this.btnSaveEnabled = false;
console.log(`sending ${this.inputText}`)
this.ttsGeneratedDone = false;
this.startTime = await systemTime.getRealTime();
this.workerInstance?.postMessage({
msgType: 'tts-generate',
text: this.inputText,
sid: sid,
speed: speed,
});
this.isGenerating = true;
this.info = '';
} else {
this.info = 'Failed to initialize tts model';
this.btnStartEnabled = false;
}
});
Button(this.btnStopCaption).enabled(this.btnStopEnabled).onClick(() => {
this.ttsGeneratedDone = true;
this.btnStartEnabled = true;
this.btnStopEnabled = false;
this.sampleBuffer.reset();
this.cancelled = true;
this.isGenerating = false;
if (this.workerInstance && this.initTtsDone) {
this.workerInstance.postMessage({ msgType: 'tts-generate-cancel' });
}
this.audioRenderer?.stop();
})
Button(this.btnSaveCaption).enabled(this.btnSaveEnabled).onClick(() => {
if (!this.finalSamples || this.finalSamples.length == 0) {
this.btnSaveEnabled = false;
return;
}
let uri: string = '';
const audioOptions = new picker.AudioSaveOptions(); // audioOptions.newFileNames = ['o.wav'];
const audioViewPicker = new picker.AudioViewPicker();
audioViewPicker.save(audioOptions).then((audioSelectResult: Array<string>) => {
uri = audioSelectResult[0];
if (this.finalSamples) {
savePcmToWav(uri, toInt16Samples(this.finalSamples), this.sampleRate);
console.log(`Saved to ${uri}`);
this.info += `\nSaved to ${uri}`;
}
});
});
}
if (this.info != '') {
TextArea({ text: this.info }).focusable(false);
}
if (this.progress > 0) {
Row() {
Progress({ value: 0, total: 100, type: ProgressType.Capsule })
.width('80%')
.height(20)
.value(this.progress * 100);
Text(`${(this.progress * 100).toFixed(2)}%`).width('15%')
}.width('100%').justifyContent(FlexAlign.Center)
}
TextArea({ placeholder: 'Input text for TTS and click the start button' })
.width('100%')
.height('100%')
.focusable(this.isGenerating == false && this.initTtsDone)
.onChange((text) => {
this.inputText = text;
if (text.trim() == '') {
this.btnStartEnabled = false;
return;
}
this.btnStartEnabled = true;
})
}.width('100%')
// see https://composeicons.com/
}.tabBar(this.TabBuilder('TTS', 0, $r('app.media.home'), $r('app.media.home')))
TabContent() {
Column({space: 10}) {
Text(this.title).fontSize(20).fontWeight(FontWeight.Bold);
TextArea({text: `
Everyting is open-sourced.
It runs locally, without accessing the network
See also https://github.com/k2-fsa/sherpa-onnx
新一代 Kaldi QQ 和微信交流群: 请看
https://k2-fsa.github.io/sherpa/social-groups.html
微信公众号: 新一代 Kaldi
`}).width('100%')
.height('100%')
.focusable(false)
}.justifyContent(FlexAlign.Start)
}.tabBar(this.TabBuilder('Help', 1, $r('app.media.info'), $r('app.media.info')))
}.scrollable(false)
}
}
private audioPlayCallback = (buffer: ArrayBuffer) => {
const numSamples = buffer.byteLength / 2;
if (this.sampleBuffer.size() >= numSamples) {
const samples: Float32Array = this.sampleBuffer.get(this.sampleBuffer.head(), numSamples);
const int16Samples = new Int16Array(buffer);
for (let i = 0; i < numSamples; ++i) {
let s = samples[i] * 32767;
s = s > 32767 ? 32767 : s;
s = s < -32768 ? -32768 : s;
int16Samples[i] = s;
}
this.sampleBuffer.pop(numSamples);
} else {
(new Int16Array(buffer)).fill(0);
if (this.ttsGeneratedDone) {
this.audioRenderer?.stop();
this.btnStartEnabled = true;
this.btnStopEnabled = false;
}
}
};
}
\ No newline at end of file
... ...
import worker, { ThreadWorkerGlobalScope, MessageEvents, ErrorEvent } from '@ohos.worker';
import { fileIo as fs } from '@kit.CoreFileKit';
import {OfflineTtsConfig, OfflineTts, listRawfileDir, TtsInput, TtsOutput} from 'sherpa_onnx';
import { buffer } from '@kit.ArkTS';
const workerPort: ThreadWorkerGlobalScope = worker.workerPort;
let tts: OfflineTts;
let cancelled = false;
function mkdir(context: Context, parts: string[]) {
const path = parts.join('/');
if (fs.accessSync(path)) {
return;
}
const sandboxPath: string = context.getApplicationContext().filesDir;
let d = sandboxPath
for (const p of parts) {
d = d + '/' + p;
if (fs.accessSync(d)) {
continue;
}
fs.mkdirSync(d);
}
}
function copyRawFileDirToSandbox(context: Context, srcDir: string) {
let mgr = context.resourceManager;
const allFiles: string[] = listRawfileDir(mgr, srcDir);
for (const src of allFiles) {
const parts: string[] = src.split('/');
if (parts.length != 1) {
mkdir(context, parts.slice(0, -1));
}
copyRawFileToSandbox(context, src, src);
}
}
function copyRawFileToSandbox(context: Context, src: string, dst: string) {
// see https://blog.csdn.net/weixin_44640245/article/details/142634846
// https://developer.huawei.com/consumer/cn/doc/harmonyos-guides-V5/rawfile-guidelines-V5
let uint8Array: Uint8Array = context.resourceManager.getRawFileContentSync(src);
// https://developer.huawei.com/consumer/cn/doc/harmonyos-references-V5/js-apis-file-fs-V5#fsmkdir
let sandboxPath: string = context.getApplicationContext().filesDir;
let filepath = sandboxPath + '/' + dst;
if (fs.accessSync(filepath)) {
// if the destination exists and has the expected file size,
// then we skip copying it
let stat = fs.statSync(filepath);
if (stat.size == uint8Array.length) {
return;
}
}
const fp = fs.openSync(filepath, fs.OpenMode.WRITE_ONLY | fs.OpenMode.CREATE | fs.OpenMode.TRUNC);
fs.writeSync(fp.fd, buffer.from(uint8Array).buffer)
fs.close(fp.fd);
}
function initTts(context: Context): OfflineTts {
// Such a design is to make it easier to build flutter APPs with
// github actions for a variety of tts models
//
// See https://github.com/k2-fsa/sherpa-onnx/blob/master/scripts/flutter/generate-tts.py
// for details
let modelDir = '';
let modelName = '';
let ruleFsts = '';
let ruleFars = '';
let lexicon = '';
let dataDir = '';
let dictDir = '';
// You can select an example below and change it according to match your
// selected tts model
// ============================================================
// Your change starts here
// ============================================================
// Example 1:
// modelDir = 'vits-vctk';
// modelName = 'vits-vctk.onnx';
// lexicon = 'lexicon.txt';
// Example 2:
// https://github.com/k2-fsa/sherpa-onnx/releases/tag/tts-models
// https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-piper-en_US-amy-low.tar.bz2
// modelDir = 'vits-piper-en_US-amy-low';
// modelName = 'en_US-amy-low.onnx';
// dataDir = 'espeak-ng-data';
// Example 3:
// https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-icefall-zh-aishell3.tar.bz2
// modelDir = 'vits-icefall-zh-aishell3';
// modelName = 'model.onnx';
// ruleFsts = 'phone.fst,date.fst,number.fst,new_heteronym.fst';
// ruleFars = 'rule.far';
// lexicon = 'lexicon.txt';
// Example 4:
// https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/vits.html#csukuangfj-vits-zh-hf-fanchen-c-chinese-187-speakers
// modelDir = 'vits-zh-hf-fanchen-C';
// modelName = 'vits-zh-hf-fanchen-C.onnx';
// lexicon = 'lexicon.txt';
// dictDir = 'dict';
// Example 5:
// https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-coqui-de-css10.tar.bz2
// modelDir = 'vits-coqui-de-css10';
// modelName = 'model.onnx';
// Example 6
// https://github.com/k2-fsa/sherpa-onnx/releases/tag/tts-models
// https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-piper-en_US-libritts_r-medium.tar.bz2
// modelDir = 'vits-piper-en_US-libritts_r-medium';
// modelName = 'en_US-libritts_r-medium.onnx';
// dataDir = 'espeak-ng-data';
// Example 7
// https://github.com/k2-fsa/sherpa-onnx/releases/tag/tts-models
// https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-melo-tts-zh_en.tar.bz2
// modelDir = 'vits-melo-tts-zh_en';
// modelName = 'model.onnx';
// lexicon = 'lexicon.txt';
// dictDir = 'dict';
// ruleFsts = `date.fst,phone.fst,number.fst`;
// ============================================================
// Please don't change the remaining part of this function
// ============================================================
if (modelName == '') {
throw new Error('You are supposed to select a model by changing the code before you run the app');
}
modelName = modelDir + '/' + modelName;
if (ruleFsts != '') {
let fsts = ruleFsts.split(',')
let tmp: string[] = [];
for (const f of fsts) {
tmp.push(modelDir + '/' + f);
}
ruleFsts = tmp.join(',');
}
if (ruleFars != '') {
let fars = ruleFars.split(',')
let tmp: string[] = [];
for (const f of fars) {
tmp.push(modelDir + '/' + f);
}
ruleFars = tmp.join(',');
}
if (lexicon != '') {
lexicon = modelDir + '/' + lexicon;
}
if (dataDir != '') {
copyRawFileDirToSandbox(context, modelDir + '/' + dataDir)
let sandboxPath: string = context.getApplicationContext().filesDir;
dataDir = sandboxPath + '/' + modelDir + '/' + dataDir;
}
if (dictDir != '') {
copyRawFileDirToSandbox(context, modelDir + '/' + dictDir)
let sandboxPath: string = context.getApplicationContext().filesDir;
dictDir = sandboxPath + '/' + modelDir + '/' + dictDir;
}
const tokens = modelDir + '/tokens.txt';
const config: OfflineTtsConfig = new OfflineTtsConfig();
config.model.vits.model = modelName;
config.model.vits.lexicon = lexicon;
config.model.vits.tokens = tokens;
config.model.vits.dataDir = dataDir;
config.model.vits.dictDir = dictDir;
config.model.numThreads = 2;
config.model.debug = true;
config.ruleFsts = ruleFsts;
config.ruleFars = ruleFars;
return new OfflineTts(config, context.resourceManager);
}
interface TtsCallbackData {
samples: Float32Array;
progress: number;
}
function callback(data: TtsCallbackData): number {
workerPort.postMessage({
'msgType': 'tts-generate-partial',
samples: Float32Array.from(data.samples),
progress: data.progress,
});
// 0 means to stop generating in C++
// 1 means to continue generating in C++
return cancelled? 0 : 1;
}
/**
* Defines the event handler to be called when the worker thread receives a message sent by the host thread.
* The event handler is executed in the worker thread.
*
* @param e message data
*/
workerPort.onmessage = (e: MessageEvents) => {
const msgType = e.data['msgType'] as string;
console.log(`msg-type: ${msgType}`);
if (msgType == 'init-tts' && !tts) {
const context = e.data['context'] as Context;
tts = initTts(context);
workerPort.postMessage({ 'msgType': 'init-tts-done',
sampleRate: tts.sampleRate,
numSpeakers: tts.numSpeakers,
});
}
if (msgType == 'tts-generate-cancel') {
cancelled = true;
}
if (msgType == 'tts-generate') {
const text = e.data['text'] as string;
console.log(`recevied text ${text}`);
const input: TtsInput = new TtsInput();
input.text = text;
input.sid = e.data['sid'] as number;
input.speed = e.data['speed'] as number;
input.callback = callback;
cancelled = false;
if (true) {
tts.generateAsync(input).then((ttsOutput: TtsOutput) => {
console.log(`sampleRate: ${ttsOutput.sampleRate}`);
workerPort.postMessage({
'msgType': 'tts-generate-done',
samples: Float32Array.from(ttsOutput.samples),
});
});
} else {
const ttsOutput: TtsOutput = tts.generate(input);
workerPort.postMessage({
'msgType': 'tts-generate-done',
samples: Float32Array.from(ttsOutput.samples),
});
}
}
}
/**
* Defines the event handler to be called when the worker receives a message that cannot be deserialized.
* The event handler is executed in the worker thread.
*
* @param e message data
*/
workerPort.onmessageerror = (e: MessageEvents) => {
}
/**
* Defines the event handler to be called when an exception occurs during worker execution.
* The event handler is executed in the worker thread.
*
* @param e error message
*/
workerPort.onerror = (e: ErrorEvent) => {
}
... ...
{
"module": {
"name": "entry",
"type": "entry",
"description": "$string:module_desc",
"mainElement": "EntryAbility",
"deviceTypes": [
"phone",
"tablet",
"2in1"
],
"deliveryWithInstall": true,
"installationFree": false,
"pages": "$profile:main_pages",
"abilities": [
{
"name": "EntryAbility",
"srcEntry": "./ets/entryability/EntryAbility.ets",
"description": "$string:EntryAbility_desc",
"icon": "$media:layered_image",
"label": "$string:EntryAbility_label",
"startWindowIcon": "$media:startIcon",
"startWindowBackground": "$color:start_window_background",
"exported": true,
"skills": [
{
"entities": [
"entity.system.home"
],
"actions": [
"action.system.home"
]
}
]
}
],
"extensionAbilities": [
{
"name": "EntryBackupAbility",
"srcEntry": "./ets/entrybackupability/EntryBackupAbility.ets",
"type": "backup",
"exported": false,
"metadata": [
{
"name": "ohos.extension.backup",
"resource": "$profile:backup_config"
}
],
}
]
}
}
\ No newline at end of file
... ...
{
"color": [
{
"name": "start_window_background",
"value": "#FFFFFF"
}
]
}
\ No newline at end of file
... ...
{
"string": [
{
"name": "module_desc",
"value": "On-device text-to-speech with Next-gen Kaldi"
},
{
"name": "EntryAbility_desc",
"value": "On-device text-to-speech with Next-gen Kaldi"
},
{
"name": "EntryAbility_label",
"value": "TTS"
}
]
}
\ No newline at end of file
... ...
<svg xmlns="http://www.w3.org/2000/svg" width="24" height="24" viewBox="0 -960 960 960"><path d="m480-840 440 330-48 64-72-54v380H160v-380l-72 54-48-64zM294-478q0 53 57 113t129 125q72-65 129-125t57-113q0-44-30-73t-72-29q-26 0-47.5 10.5T480-542q-15-17-37.5-27.5T396-580q-42 0-72 29t-30 73m426 278v-360L480-740 240-560v360zm0 0H240z"/></svg>
\ No newline at end of file
... ...
<svg xmlns="http://www.w3.org/2000/svg" width="24" height="24" viewBox="0 -960 960 960"><path d="M440-280h80v-240h-80zm40-320q17 0 28.5-11.5T520-640t-11.5-28.5T480-680t-28.5 11.5T440-640t11.5 28.5T480-600m0 520q-83 0-156-31.5T197-197t-85.5-127T80-480t31.5-156T197-763t127-85.5T480-880t156 31.5T763-763t85.5 127T880-480t-31.5 156T763-197t-127 85.5T480-80m0-80q134 0 227-93t93-227-93-227-227-93-227 93-93 227 93 227 227 93m0-320"/></svg>
\ No newline at end of file
... ...
{
"layered-image":
{
"background" : "$media:background",
"foreground" : "$media:foreground"
}
}
\ No newline at end of file
... ...
{
"allowToBackupRestore": true
}
\ No newline at end of file
... ...
{
"string": [
{
"name": "module_desc",
"value": "On-device text-to-speech with Next-gen Kaldi"
},
{
"name": "EntryAbility_desc",
"value": "On-device text-to-speech with Next-gen Kaldi"
},
{
"name": "EntryAbility_label",
"value": "TTS"
}
]
}
\ No newline at end of file
... ...
{
"string": [
{
"name": "module_desc",
"value": "使用新一代Kaldi进行本地离线语音合成"
},
{
"name": "EntryAbility_desc",
"value": "使用新一代Kaldi进行本地离线语音合成"
},
{
"name": "EntryAbility_label",
"value": "本地语音合成"
}
]
}
\ No newline at end of file
... ...
import hilog from '@ohos.hilog';
import { describe, beforeAll, beforeEach, afterEach, afterAll, it, expect } from '@ohos/hypium';
export default function abilityTest() {
describe('ActsAbilityTest', () => {
// Defines a test suite. Two parameters are supported: test suite name and test suite function.
beforeAll(() => {
// Presets an action, which is performed only once before all test cases of the test suite start.
// This API supports only one parameter: preset action function.
})
beforeEach(() => {
// Presets an action, which is performed before each unit test case starts.
// The number of execution times is the same as the number of test cases defined by **it**.
// This API supports only one parameter: preset action function.
})
afterEach(() => {
// Presets a clear action, which is performed after each unit test case ends.
// The number of execution times is the same as the number of test cases defined by **it**.
// This API supports only one parameter: clear action function.
})
afterAll(() => {
// Presets a clear action, which is performed after all test cases of the test suite end.
// This API supports only one parameter: clear action function.
})
it('assertContain', 0, () => {
// Defines a test case. This API supports three parameters: test case name, filter parameter, and test case function.
hilog.info(0x0000, 'testTag', '%{public}s', 'it begin');
let a = 'abc';
let b = 'b';
// Defines a variety of assertion methods, which are used to declare expected boolean conditions.
expect(a).assertContain(b);
expect(a).assertEqual(a);
})
})
}
\ No newline at end of file
... ...
import abilityTest from './Ability.test';
export default function testsuite() {
abilityTest();
}
\ No newline at end of file
... ...
{
"module": {
"name": "entry_test",
"type": "feature",
"deviceTypes": [
"phone",
"tablet",
"2in1"
],
"deliveryWithInstall": true,
"installationFree": false
}
}
... ...
import localUnitTest from './LocalUnit.test';
export default function testsuite() {
localUnitTest();
}
\ No newline at end of file
... ...
import { describe, beforeAll, beforeEach, afterEach, afterAll, it, expect } from '@ohos/hypium';
export default function localUnitTest() {
describe('localUnitTest', () => {
// Defines a test suite. Two parameters are supported: test suite name and test suite function.
beforeAll(() => {
// Presets an action, which is performed only once before all test cases of the test suite start.
// This API supports only one parameter: preset action function.
});
beforeEach(() => {
// Presets an action, which is performed before each unit test case starts.
// The number of execution times is the same as the number of test cases defined by **it**.
// This API supports only one parameter: preset action function.
});
afterEach(() => {
// Presets a clear action, which is performed after each unit test case ends.
// The number of execution times is the same as the number of test cases defined by **it**.
// This API supports only one parameter: clear action function.
});
afterAll(() => {
// Presets a clear action, which is performed after all test cases of the test suite end.
// This API supports only one parameter: clear action function.
});
it('assertContain', 0, () => {
// Defines a test case. This API supports three parameters: test case name, filter parameter, and test case function.
let a = 'abc';
let b = 'b';
// Defines a variety of assertion methods, which are used to declare expected boolean conditions.
expect(a).assertContain(b);
expect(a).assertEqual(a);
});
});
}
\ No newline at end of file
... ...
{
"modelVersion": "5.0.0",
"dependencies": {
},
"execution": {
// "analyze": "normal", /* Define the build analyze mode. Value: [ "normal" | "advanced" | false ]. Default: "normal" */
// "daemon": true, /* Enable daemon compilation. Value: [ true | false ]. Default: true */
// "incremental": true, /* Enable incremental compilation. Value: [ true | false ]. Default: true */
// "parallel": true, /* Enable parallel compilation. Value: [ true | false ]. Default: true */
// "typeCheck": false, /* Enable typeCheck. Value: [ true | false ]. Default: false */
},
"logging": {
// "level": "info" /* Define the log level. Value: [ "debug" | "info" | "warn" | "error" ]. Default: "info" */
},
"debugging": {
// "stacktrace": false /* Disable stacktrace compilation. Value: [ true | false ]. Default: false */
},
"nodeOptions": {
// "maxOldSpaceSize": 8192 /* Enable nodeOptions maxOldSpaceSize compilation. Unit M. Used for the daemon process. Default: 8192*/
// "exposeGC": true /* Enable to trigger garbage collection explicitly. Default: true*/
}
}
... ...
import { appTasks } from '@ohos/hvigor-ohos-plugin';
export default {
system: appTasks, /* Built-in plugin of Hvigor. It cannot be modified. */
plugins:[] /* Custom plugin to extend the functionality of Hvigor. */
}
... ...
{
"meta": {
"stableOrder": true
},
"lockfileVersion": 3,
"ATTENTION": "THIS IS AN AUTOGENERATED FILE. DO NOT EDIT THIS FILE DIRECTLY.",
"specifiers": {
"@ohos/hypium@1.0.19": "@ohos/hypium@1.0.19"
},
"packages": {
"@ohos/hypium@1.0.19": {
"name": "@ohos/hypium",
"version": "1.0.19",
"integrity": "sha512-cEjDgLFCm3cWZDeRXk7agBUkPqjWxUo6AQeiu0gEkb3J8ESqlduQLSIXeo3cCsm8U/asL7iKjF85ZyOuufAGSQ==",
"resolved": "https://ohpm.openharmony.cn/ohpm/@ohos/hypium/-/hypium-1.0.19.har",
"registryType": "ohpm"
}
}
}
\ No newline at end of file
... ...
{
"modelVersion": "5.0.0",
"description": "Please describe the basic information.",
"dependencies": {
},
"devDependencies": {
"@ohos/hypium": "1.0.19"
}
}
... ...
... ... @@ -11,6 +11,7 @@ import { audio } from '@kit.AudioKit';
@Entry
@Component
struct Index {
@State title: string = 'Next-gen Kaldi: VAD + ASR';
@State currentIndex: number = 0;
@State resultForFile: string = '';
@State progressForFile: number = 0;
... ... @@ -73,13 +74,11 @@ struct Index {
};
const audioCapturerInfo: audio.AudioCapturerInfo = {
source: audio.SourceType.SOURCE_TYPE_MIC,
capturerFlags: 0
source: audio.SourceType.SOURCE_TYPE_MIC, capturerFlags: 0
};
const audioCapturerOptions: audio.AudioCapturerOptions = {
streamInfo: audioStreamInfo,
capturerInfo: audioCapturerInfo
streamInfo: audioStreamInfo, capturerInfo: audioCapturerInfo
};
audio.createAudioCapturer(audioCapturerOptions, (err, data) => {
... ... @@ -162,15 +161,9 @@ struct Index {
@Builder
TabBuilder(title: string, targetIndex: number, selectedImg: Resource, normalImg: Resource) {
Column() {
Image(this.currentIndex == targetIndex ? selectedImg : normalImg)
.size({ width: 25, height: 25 })
Text(title)
.fontColor(this.currentIndex == targetIndex ? '#28bff1' : '#8a8a8a')
}
.width('100%')
.height(50)
.justifyContent(FlexAlign.Center)
.onClick(() => {
Image(this.currentIndex == targetIndex ? selectedImg : normalImg).size({ width: 25, height: 25 })
Text(title).fontColor(this.currentIndex == targetIndex ? '#28bff1' : '#8a8a8a')
}.width('100%').height(50).justifyContent(FlexAlign.Center).onClick(() => {
this.currentIndex = targetIndex;
this.controller.changeIndex(this.currentIndex);
})
... ... @@ -181,11 +174,7 @@ struct Index {
Tabs({ barPosition: BarPosition.End, controller: this.controller }) {
TabContent() {
Column({ space: 10 }) {
Text('Next-gen Kaldi: VAD + ASR')
.fontColor('#182431')
.fontSize(25)
.lineHeight(41)
.fontWeight(500)
Text(this.title).fontSize(20).fontWeight(FontWeight.Bold);
Button('Select .wav file (16kHz) ')
.enabled(this.selectFileBtnEnabled)
... ... @@ -211,8 +200,7 @@ struct Index {
if (this.workerInstance) {
this.workerInstance.postMessage({
msgType: 'non-streaming-asr-vad-decode',
filename: result[0],
msgType: 'non-streaming-asr-vad-decode', filename: result[0],
});
} else {
console.log(`this worker instance is undefined ${this.workerInstance}`);
... ... @@ -236,18 +224,17 @@ struct Index {
}.width('100%').justifyContent(FlexAlign.Center)
}
TextArea({ text: this.resultForFile }).width('100%').lineSpacing({ value: 10, unit: LengthUnit.VP });
}
.alignItems(HorizontalAlign.Center)
.justifyContent(FlexAlign.Start)
TextArea({ text: this.resultForFile })
.width('100%')
.lineSpacing({ value: 10, unit: LengthUnit.VP })
.height('100%');
}.alignItems(HorizontalAlign.Center).justifyContent(FlexAlign.Start)
}.tabBar(this.TabBuilder('From file', 0, $r('app.media.icon_doc'), $r('app.media.icon_doc_default')))
TabContent() {
Column() {
Button(this.message)
.enabled(this.micInitDone)
.onClick(() => {
Column({ space: 10 }) {
Text(this.title).fontSize(20).fontWeight(FontWeight.Bold);
Button(this.message).enabled(this.micInitDone).onClick(() => {
console.log('clicked mic button');
this.resultForMic = '';
if (this.mic) {
... ... @@ -267,8 +254,7 @@ struct Index {
if (this.workerInstance) {
console.log('decode mic');
this.workerInstance.postMessage({
msgType: 'non-streaming-asr-vad-mic',
samples,
msgType: 'non-streaming-asr-vad-mic', samples,
});
} else {
console.log(`this worker instance is undefined ${this.workerInstance}`);
... ... @@ -285,31 +271,39 @@ struct Index {
Text(`Supported languages: ${this.lang}`)
TextArea({ text: this.resultForMic }).width('100%').lineSpacing({ value: 10, unit: LengthUnit.VP });
}
.alignItems(HorizontalAlign.Center)
.justifyContent(FlexAlign.Start)
TextArea({ text: this.resultForMic })
.width('100%')
.lineSpacing({ value: 10, unit: LengthUnit.VP })
.width('100%')
.height('100%');
}.alignItems(HorizontalAlign.Center).justifyContent(FlexAlign.Start)
}
.tabBar(this.TabBuilder('From mic', 1, $r('app.media.ic_public_input_voice'),
$r('app.media.ic_public_input_voice_default')))
TabContent() {
Column() {
Text("Everything is open-sourced");
Divider();
Text("It runs locally, without accessing the network");
Divider();
Text("See also https://github.com/k2-fsa/sherpa-onnx");
Divider();
Text("and https://k2-fsa.github.io/sherpa/social-groups.html");
Column({ space: 10 }) {
Text(this.title).fontSize(20).fontWeight(FontWeight.Bold);
TextArea({
text: `
Everyting is open-sourced.
It runs locally, without accessing the network
See also https://github.com/k2-fsa/sherpa-onnx
新一代 Kaldi QQ 和微信交流群: 请看
https://k2-fsa.github.io/sherpa/social-groups.html
微信公众号: 新一代 Kaldi
`
}).width('100%').height('100%').focusable(false)
}.justifyContent(FlexAlign.Start)
}.tabBar(this.TabBuilder('Help', 2, $r('app.media.info_circle'),
$r('app.media.info_circle_default')))
}.tabBar(this.TabBuilder('Help', 2, $r('app.media.info_circle'), $r('app.media.info_circle_default')))
}.scrollable(false)
}
.width('100%')
.justifyContent(FlexAlign.Start)
}.width('100%').justifyContent(FlexAlign.Start)
}
private micCallback = (buffer: ArrayBuffer) => {
... ...
... ... @@ -2,19 +2,19 @@
"string": [
{
"name": "module_desc",
"value": "VAD+ASR with Next-gen Kaldi"
"value": "On-device VAD+ASR with Next-gen Kaldi"
},
{
"name": "EntryAbility_desc",
"value": "VAD+ASR"
"value": "On-device VAD+ASR with Next-gen Kaldi"
},
{
"name": "EntryAbility_label",
"value": "VAD_ASR"
"value": "On-device speech recognition"
},
{
"name": "mic_reason",
"value": "access the microhone for speech recognition"
"value": "access the microhone for on-device speech recognition with Next-gen Kaldi"
}
]
}
\ No newline at end of file
... ...
... ... @@ -2,15 +2,19 @@
"string": [
{
"name": "module_desc",
"value": "module description"
"value": "On-device VAD+ASR with Next-gen Kaldi"
},
{
"name": "EntryAbility_desc",
"value": "description"
"value": "On-device VAD+ASR with Next-gen Kaldi"
},
{
"name": "EntryAbility_label",
"value": "label"
"value": "On-device speech recognition"
},
{
"name": "mic_reason",
"value": "access the microhone for on-device speech recognition with Next-gen Kaldi"
}
]
}
\ No newline at end of file
... ...
... ... @@ -2,15 +2,19 @@
"string": [
{
"name": "module_desc",
"value": "模块描述"
"value": "基于新一代Kaldi的本地语音识别"
},
{
"name": "EntryAbility_desc",
"value": "description"
"value": "基于新一代Kaldi的本地语音识别"
},
{
"name": "EntryAbility_label",
"value": "label"
"value": "本地语音识别"
},
{
"name": "mic_reason",
"value": "使用新一代Kaldi, 访问麦克风进行本地语音识别 (不需要联网)"
}
]
}
\ No newline at end of file
... ...
... ... @@ -1169,6 +1169,17 @@ SherpaOnnxOfflineTtsGenerateWithProgressCallback(
return SherpaOnnxOfflineTtsGenerateInternal(tts, text, sid, speed, wrapper);
}
const SherpaOnnxGeneratedAudio *
SherpaOnnxOfflineTtsGenerateWithProgressCallbackWithArg(
const SherpaOnnxOfflineTts *tts, const char *text, int32_t sid, float speed,
SherpaOnnxGeneratedAudioProgressCallbackWithArg callback, void *arg) {
auto wrapper = [callback, arg](const float *samples, int32_t n,
float progress) {
return callback(samples, n, progress, arg);
};
return SherpaOnnxOfflineTtsGenerateInternal(tts, text, sid, speed, wrapper);
}
const SherpaOnnxGeneratedAudio *SherpaOnnxOfflineTtsGenerateWithCallbackWithArg(
const SherpaOnnxOfflineTts *tts, const char *text, int32_t sid, float speed,
SherpaOnnxGeneratedAudioCallbackWithArg callback, void *arg) {
... ...
... ... @@ -930,6 +930,9 @@ typedef int32_t (*SherpaOnnxGeneratedAudioCallbackWithArg)(const float *samples,
typedef int32_t (*SherpaOnnxGeneratedAudioProgressCallback)(
const float *samples, int32_t n, float p);
typedef int32_t (*SherpaOnnxGeneratedAudioProgressCallbackWithArg)(
const float *samples, int32_t n, float p, void *arg);
SHERPA_ONNX_API typedef struct SherpaOnnxOfflineTts SherpaOnnxOfflineTts;
// Create an instance of offline TTS. The user has to use DestroyOfflineTts()
... ... @@ -964,11 +967,19 @@ SherpaOnnxOfflineTtsGenerateWithCallback(
const SherpaOnnxOfflineTts *tts, const char *text, int32_t sid, float speed,
SherpaOnnxGeneratedAudioCallback callback);
SHERPA_ONNX_API
const SherpaOnnxGeneratedAudio *
SherpaOnnxOfflineTtsGenerateWithProgressCallback(
const SherpaOnnxOfflineTts *tts, const char *text, int32_t sid, float speed,
SherpaOnnxGeneratedAudioProgressCallback callback);
SHERPA_ONNX_API
const SherpaOnnxGeneratedAudio *
SherpaOnnxOfflineTtsGenerateWithProgressCallbackWithArg(
const SherpaOnnxOfflineTts *tts, const char *text, int32_t sid, float speed,
SherpaOnnxGeneratedAudioProgressCallbackWithArg callback, void *arg);
// Same as SherpaOnnxGeneratedAudioCallback but you can pass an additional
// `void* arg` to the callback.
SHERPA_ONNX_API const SherpaOnnxGeneratedAudio *
... ...
... ... @@ -22,8 +22,14 @@ CircularBuffer::CircularBuffer(int32_t capacity) {
void CircularBuffer::Resize(int32_t new_capacity) {
int32_t capacity = static_cast<int32_t>(buffer_.size());
if (new_capacity <= capacity) {
#if __OHOS__
SHERPA_ONNX_LOGE(
"new_capacity (%{public}d) <= original capacity (%{public}d). Skip it.",
new_capacity, capacity);
#else
SHERPA_ONNX_LOGE("new_capacity (%d) <= original capacity (%d). Skip it.",
new_capacity, capacity);
#endif
return;
}
... ... @@ -90,10 +96,18 @@ void CircularBuffer::Push(const float *p, int32_t n) {
int32_t size = Size();
if (n + size > capacity) {
int32_t new_capacity = std::max(capacity * 2, n + size);
#if __OHOS__
SHERPA_ONNX_LOGE(
"Overflow! n: %{public}d, size: %{public}d, n+size: %{public}d, "
"capacity: %{public}d. Increase "
"capacity to: %{public}d. (Original data is copied. No data loss!)",
n, size, n + size, capacity, new_capacity);
#else
SHERPA_ONNX_LOGE(
"Overflow! n: %d, size: %d, n+size: %d, capacity: %d. Increase "
"capacity to: %d",
"capacity to: %d. (Original data is copied. No data loss!)",
n, size, n + size, capacity, new_capacity);
#endif
Resize(new_capacity);
capacity = new_capacity;
... ...
... ... @@ -7,6 +7,7 @@
#include <algorithm>
#include <cctype>
#include <fstream>
#include <iomanip>
#include <memory>
#include <sstream>
#include <strstream>
... ... @@ -159,17 +160,26 @@ std::vector<TokenIDs> Lexicon::ConvertTextToTokenIdsChinese(
words = ProcessHeteronyms(words);
if (debug_) {
fprintf(stderr, "Input text in string: %s\n", text.c_str());
fprintf(stderr, "Input text in bytes:");
std::ostringstream os;
os << "Input text in string: " << text << "\n";
os << "Input text in bytes:";
for (uint8_t c : text) {
fprintf(stderr, " %02x", c);
os << " 0x" << std::setfill('0') << std::setw(2) << std::right << std::hex
<< c;
}
fprintf(stderr, "\n");
fprintf(stderr, "After splitting to words:");
os << "\n";
os << "After splitting to words:";
for (const auto &w : words) {
fprintf(stderr, " %s", w.c_str());
os << " " << w;
}
fprintf(stderr, "\n");
os << "\n";
#if __OHOS__
SHERPA_ONNX_LOGE("%{public}s", os.str().c_str());
#else
SHERPA_ONNX_LOGE("%s", os.str().c_str());
#endif
}
std::vector<TokenIDs> ans;
... ... @@ -259,17 +269,26 @@ std::vector<TokenIDs> Lexicon::ConvertTextToTokenIdsNotChinese(
std::vector<std::string> words = SplitUtf8(text);
if (debug_) {
fprintf(stderr, "Input text (lowercase) in string: %s\n", text.c_str());
fprintf(stderr, "Input text in bytes:");
std::ostringstream os;
os << "Input text (lowercase) in string: " << text << "\n";
os << "Input text in bytes:";
for (uint8_t c : text) {
fprintf(stderr, " %02x", c);
os << " 0x" << std::setfill('0') << std::setw(2) << std::right << std::hex
<< c;
}
fprintf(stderr, "\n");
fprintf(stderr, "After splitting to words:");
os << "\n";
os << "After splitting to words:";
for (const auto &w : words) {
fprintf(stderr, " %s", w.c_str());
os << " " << w;
}
fprintf(stderr, "\n");
os << "\n";
#if __OHOS__
SHERPA_ONNX_LOGE("%{public}s", os.str().c_str());
#else
SHERPA_ONNX_LOGE("%s", os.str().c_str());
#endif
}
int32_t blank = token2id_.at(" ");
... ...
... ... @@ -6,11 +6,21 @@
#include <fstream>
#include <regex> // NOLINT
#include <strstream>
#include <utility>
#if __ANDROID_API__ >= 9
#include "android/asset_manager.h"
#include "android/asset_manager_jni.h"
#endif
#if __OHOS__
#include "rawfile/raw_file_manager.h"
#endif
#include "cppjieba/Jieba.hpp"
#include "sherpa-onnx/csrc/file-utils.h"
#include "sherpa-onnx/csrc/macros.h"
#include "sherpa-onnx/csrc/onnx-utils.h"
#include "sherpa-onnx/csrc/symbol-table.h"
#include "sherpa-onnx/csrc/text-utils.h"
... ... @@ -62,6 +72,60 @@ class MeloTtsLexicon::Impl {
}
}
template <typename Manager>
Impl(Manager *mgr, const std::string &lexicon, const std::string &tokens,
const std::string &dict_dir,
const OfflineTtsVitsModelMetaData &meta_data, bool debug)
: meta_data_(meta_data), debug_(debug) {
std::string dict = dict_dir + "/jieba.dict.utf8";
std::string hmm = dict_dir + "/hmm_model.utf8";
std::string user_dict = dict_dir + "/user.dict.utf8";
std::string idf = dict_dir + "/idf.utf8";
std::string stop_word = dict_dir + "/stop_words.utf8";
AssertFileExists(dict);
AssertFileExists(hmm);
AssertFileExists(user_dict);
AssertFileExists(idf);
AssertFileExists(stop_word);
jieba_ =
std::make_unique<cppjieba::Jieba>(dict, hmm, user_dict, idf, stop_word);
{
auto buf = ReadFile(mgr, tokens);
std::istrstream is(buf.data(), buf.size());
InitTokens(is);
}
{
auto buf = ReadFile(mgr, lexicon);
std::istrstream is(buf.data(), buf.size());
InitLexicon(is);
}
}
template <typename Manager>
Impl(Manager *mgr, const std::string &lexicon, const std::string &tokens,
const OfflineTtsVitsModelMetaData &meta_data, bool debug)
: meta_data_(meta_data), debug_(debug) {
{
auto buf = ReadFile(mgr, tokens);
std::istrstream is(buf.data(), buf.size());
InitTokens(is);
}
{
auto buf = ReadFile(mgr, lexicon);
std::istrstream is(buf.data(), buf.size());
InitLexicon(is);
}
}
std::vector<TokenIDs> ConvertTextToTokenIds(const std::string &_text) const {
std::string text = ToLowerCase(_text);
// see
... ... @@ -84,17 +148,24 @@ class MeloTtsLexicon::Impl {
jieba_->Cut(text, words, is_hmm);
if (debug_) {
SHERPA_ONNX_LOGE("input text: %s", text.c_str());
SHERPA_ONNX_LOGE("after replacing punctuations: %s", s.c_str());
std::ostringstream os;
std::string sep = "";
for (const auto &w : words) {
os << sep << w;
sep = "_";
}
#if __OHOS__
SHERPA_ONNX_LOGE("input text: %{public}s", text.c_str());
SHERPA_ONNX_LOGE("after replacing punctuations: %{public}s", s.c_str());
SHERPA_ONNX_LOGE("after jieba processing: %{public}s",
os.str().c_str());
#else
SHERPA_ONNX_LOGE("input text: %s", text.c_str());
SHERPA_ONNX_LOGE("after replacing punctuations: %s", s.c_str());
SHERPA_ONNX_LOGE("after jieba processing: %s", os.str().c_str());
#endif
}
} else {
words = SplitUtf8(text);
... ... @@ -102,7 +173,7 @@ class MeloTtsLexicon::Impl {
if (debug_) {
fprintf(stderr, "Input text in string (lowercase): %s\n", text.c_str());
fprintf(stderr, "Input text in bytes (lowercase):");
for (uint8_t c : text) {
for (int8_t c : text) {
fprintf(stderr, " %02x", c);
}
fprintf(stderr, "\n");
... ... @@ -307,9 +378,48 @@ MeloTtsLexicon::MeloTtsLexicon(const std::string &lexicon,
bool debug)
: impl_(std::make_unique<Impl>(lexicon, tokens, meta_data, debug)) {}
template <typename Manager>
MeloTtsLexicon::MeloTtsLexicon(Manager *mgr, const std::string &lexicon,
const std::string &tokens,
const std::string &dict_dir,
const OfflineTtsVitsModelMetaData &meta_data,
bool debug)
: impl_(std::make_unique<Impl>(mgr, lexicon, tokens, dict_dir, meta_data,
debug)) {}
template <typename Manager>
MeloTtsLexicon::MeloTtsLexicon(Manager *mgr, const std::string &lexicon,
const std::string &tokens,
const OfflineTtsVitsModelMetaData &meta_data,
bool debug)
: impl_(std::make_unique<Impl>(mgr, lexicon, tokens, meta_data, debug)) {}
std::vector<TokenIDs> MeloTtsLexicon::ConvertTextToTokenIds(
const std::string &text, const std::string & /*unused_voice = ""*/) const {
return impl_->ConvertTextToTokenIds(text);
}
#if __ANDROID_API__ >= 9
template MeloTtsLexicon::MeloTtsLexicon(
AAssetManager *mgr, const std::string &lexicon, const std::string &tokens,
const std::string &dict_dir, const OfflineTtsVitsModelMetaData &meta_data,
bool debug);
template MeloTtsLexicon::MeloTtsLexicon(
AAssetManager *mgr, const std::string &lexicon, const std::string &tokens,
const OfflineTtsVitsModelMetaData &meta_data, bool debug);
#endif
#if __OHOS__
template MeloTtsLexicon::MeloTtsLexicon(
NativeResourceManager *mgr, const std::string &lexicon,
const std::string &tokens, const std::string &dict_dir,
const OfflineTtsVitsModelMetaData &meta_data, bool debug);
template MeloTtsLexicon::MeloTtsLexicon(
NativeResourceManager *mgr, const std::string &lexicon,
const std::string &tokens, const OfflineTtsVitsModelMetaData &meta_data,
bool debug);
#endif
} // namespace sherpa_onnx
... ...
... ... @@ -25,6 +25,16 @@ class MeloTtsLexicon : public OfflineTtsFrontend {
MeloTtsLexicon(const std::string &lexicon, const std::string &tokens,
const OfflineTtsVitsModelMetaData &meta_data, bool debug);
template <typename Manager>
MeloTtsLexicon(Manager *mgr, const std::string &lexicon,
const std::string &tokens, const std::string &dict_dir,
const OfflineTtsVitsModelMetaData &meta_data, bool debug);
template <typename Manager>
MeloTtsLexicon(Manager *mgr, const std::string &lexicon,
const std::string &tokens,
const OfflineTtsVitsModelMetaData &meta_data, bool debug);
std::vector<TokenIDs> ConvertTextToTokenIds(
const std::string &text,
const std::string &unused_voice = "") const override;
... ...
... ... @@ -40,7 +40,11 @@ class OfflineTtsVitsImpl : public OfflineTtsImpl {
tn_list_.reserve(files.size());
for (const auto &f : files) {
if (config.model.debug) {
#if __OHOS__
SHERPA_ONNX_LOGE("rule fst: %{public}s", f.c_str());
#else
SHERPA_ONNX_LOGE("rule fst: %s", f.c_str());
#endif
}
tn_list_.push_back(std::make_unique<kaldifst::TextNormalizer>(f));
}
... ... @@ -57,7 +61,11 @@ class OfflineTtsVitsImpl : public OfflineTtsImpl {
for (const auto &f : files) {
if (config.model.debug) {
#if __OHOS__
SHERPA_ONNX_LOGE("rule far: %s", f.c_str());
#else
SHERPA_ONNX_LOGE("rule far: %{public}s", f.c_str());
#endif
}
std::unique_ptr<fst::FarReader<fst::StdArc>> reader(
fst::FarReader<fst::StdArc>::Open(f));
... ... @@ -88,7 +96,11 @@ class OfflineTtsVitsImpl : public OfflineTtsImpl {
tn_list_.reserve(files.size());
for (const auto &f : files) {
if (config.model.debug) {
#if __OHOS__
SHERPA_ONNX_LOGE("rule fst: %{public}s", f.c_str());
#else
SHERPA_ONNX_LOGE("rule fst: %s", f.c_str());
#endif
}
auto buf = ReadFile(mgr, f);
std::istrstream is(buf.data(), buf.size());
... ... @@ -103,7 +115,11 @@ class OfflineTtsVitsImpl : public OfflineTtsImpl {
for (const auto &f : files) {
if (config.model.debug) {
#if __OHOS__
SHERPA_ONNX_LOGE("rule far: %{public}s", f.c_str());
#else
SHERPA_ONNX_LOGE("rule far: %s", f.c_str());
#endif
}
auto buf = ReadFile(mgr, f);
... ... @@ -156,14 +172,22 @@ class OfflineTtsVitsImpl : public OfflineTtsImpl {
std::string text = _text;
if (config_.model.debug) {
#if __OHOS__
SHERPA_ONNX_LOGE("Raw text: %{public}s", text.c_str());
#else
SHERPA_ONNX_LOGE("Raw text: %s", text.c_str());
#endif
}
if (!tn_list_.empty()) {
for (const auto &tn : tn_list_) {
text = tn->Normalize(text);
if (config_.model.debug) {
#if __OHOS__
SHERPA_ONNX_LOGE("After normalizing: %{public}s", text.c_str());
#else
SHERPA_ONNX_LOGE("After normalizing: %s", text.c_str());
#endif
}
}
}
... ... @@ -226,10 +250,17 @@ class OfflineTtsVitsImpl : public OfflineTtsImpl {
int32_t num_batches = x_size / batch_size;
if (config_.model.debug) {
#if __OHOS__
SHERPA_ONNX_LOGE(
"Text is too long. Split it into %{public}d batches. batch size: "
"%{public}d. Number of sentences: %{public}d",
num_batches, batch_size, x_size);
#else
SHERPA_ONNX_LOGE(
"Text is too long. Split it into %d batches. batch size: %d. Number "
"of sentences: %d",
num_batches, batch_size, x_size);
#endif
}
GeneratedAudio ans;
... ... @@ -255,7 +286,7 @@ class OfflineTtsVitsImpl : public OfflineTtsImpl {
audio.samples.end());
if (callback) {
should_continue = callback(audio.samples.data(), audio.samples.size(),
b * 1.0 / num_batches);
(b + 1) * 1.0 / num_batches);
// Caution(fangjun): audio is freed when the callback returns, so users
// should copy the data if they want to access the data after
// the callback returns to avoid segmentation fault.
... ... @@ -297,6 +328,16 @@ class OfflineTtsVitsImpl : public OfflineTtsImpl {
if (meta_data.frontend == "characters") {
frontend_ = std::make_unique<OfflineTtsCharacterFrontend>(
mgr, config_.model.vits.tokens, meta_data);
} else if (meta_data.jieba && !config_.model.vits.dict_dir.empty() &&
meta_data.is_melo_tts) {
frontend_ = std::make_unique<MeloTtsLexicon>(
mgr, config_.model.vits.lexicon, config_.model.vits.tokens,
config_.model.vits.dict_dir, model_->GetMetaData(),
config_.model.debug);
} else if (meta_data.is_melo_tts && meta_data.language == "English") {
frontend_ = std::make_unique<MeloTtsLexicon>(
mgr, config_.model.vits.lexicon, config_.model.vits.tokens,
model_->GetMetaData(), config_.model.debug);
} else if ((meta_data.is_piper || meta_data.is_coqui ||
meta_data.is_icefall) &&
!config_.model.vits.data_dir.empty()) {
... ...
... ... @@ -144,7 +144,11 @@ class OfflineTtsVitsModel::Impl {
++i;
}
#if __OHOS__
SHERPA_ONNX_LOGE("%{public}s\n", os.str().c_str());
#else
SHERPA_ONNX_LOGE("%s\n", os.str().c_str());
#endif
}
Ort::AllocatorWithDefaultOptions allocator; // used in the macro below
... ...