Support Kokoro TTS for HarmonyOS. (#1743)

Fangjun Kuang · GitHub
Commit bc3322e5a6750aa2de7184cb2de29795abe27817 bc3322e5 1 parent 5bcd7e10
harmony-os/SherpaOnnxHar/sherpa_onnx/BuildProfile.ets
harmony-os/SherpaOnnxHar/sherpa_onnx/Index.ets
harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/ets/components/NonStreamingTts.ets
harmony-os/SherpaOnnxTts/entry/src/main/ets/pages/Index.ets
harmony-os/SherpaOnnxTts/entry/src/main/ets/workers/NonStreamingTtsWorker.ets
--- a/harmony-os/SherpaOnnxHar/sherpa_onnx/BuildProfile.ets
查看文件 @bc3322e
+++ b/harmony-os/SherpaOnnxHar/sherpa_onnx/BuildProfile.ets
查看文件 @bc3322e
 /**
  * Use these variables when you tailor your ArkTS code. They must be of the const type.
  */
- export const HAR_VERSION = '1.10.37';
+ export const HAR_VERSION = '1.10.40';
 export const BUILD_MODE_NAME = 'debug';
 export const DEBUG = true;
 export const TARGET_NAME = 'default';
--- a/harmony-os/SherpaOnnxHar/sherpa_onnx/Index.ets
查看文件 @bc3322e
+++ b/harmony-os/SherpaOnnxHar/sherpa_onnx/Index.ets
查看文件 @bc3322e
@@ -31,7 +31,8 @@ export { OnlineStream,
   OnlineRecognizer,
 } from './src/main/ets/components/StreamingAsr';
 
- export { OfflineTtsMatchaModelConfig,
+ export { OfflineTtsKokoroModelConfig,
+   OfflineTtsMatchaModelConfig,
   OfflineTtsVitsModelConfig,
   OfflineTtsModelConfig,
   OfflineTtsConfig,
--- a/harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/ets/components/NonStreamingTts.ets
查看文件 @bc3322e
+++ b/harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/ets/components/NonStreamingTts.ets
查看文件 @bc3322e
@@ -28,9 +28,18 @@ export class OfflineTtsMatchaModelConfig {
   public lengthScale: number = 1.0;
 }
 
+ export class OfflineTtsKokoroModelConfig {
+   public model: string = '';
+   public voices: string = '';
+   public tokens: string = '';
+   public dataDir: string = '';
+   public lengthScale: number = 1.0;
+ }
+ 
 export class OfflineTtsModelConfig {
   public vits: OfflineTtsVitsModelConfig = new OfflineTtsVitsModelConfig();
   public matcha: OfflineTtsMatchaModelConfig = new OfflineTtsMatchaModelConfig();
+   public kokoro: OfflineTtsKokoroModelConfig = new OfflineTtsKokoroModelConfig();
   public numThreads: number = 1;
   public debug: boolean = false;
   public provider: string = 'cpu';
--- a/harmony-os/SherpaOnnxTts/entry/src/main/ets/pages/Index.ets
查看文件 @bc3322e
+++ b/harmony-os/SherpaOnnxTts/entry/src/main/ets/pages/Index.ets
查看文件 @bc3322e
@@ -66,6 +66,7 @@ struct Index {
   @State initTtsDone: boolean = false;
   @State ttsGeneratedDone: boolean = true;
   @State numSpeakers: number = 1;
+   @State numThreads: number = 1;
   @State initAudioDone: boolean = false;
   private controller: TabsController = new TabsController();
   private cancelled: boolean = false;
@@ -135,6 +136,7 @@ struct Index {
         this.info = 'Model initialized!\nPlease enter text and press start.';
         this.sampleRate = e.data['sampleRate'] as number;
         this.numSpeakers = e.data['numSpeakers'] as number;
+         this.numThreads = e.data['numThreads'] as number;
 
         this.initTtsDone = true;
       }
@@ -177,6 +179,7 @@ struct Index {
             this.info = `Audio duration: ${audioDuration} s
 Elapsed: ${elapsedSeconds} s
 RTF = ${elapsedSeconds.toFixed(2)}/${audioDuration.toFixed(2)} = ${RTF.toFixed(3)}
+ Number of threads: ${this.numThreads}
 `;
             if (this.cancelled) {
               this.info += '\nCancelled.';
--- a/harmony-os/SherpaOnnxTts/entry/src/main/ets/workers/NonStreamingTtsWorker.ets
查看文件 @bc3322e
+++ b/harmony-os/SherpaOnnxTts/entry/src/main/ets/workers/NonStreamingTtsWorker.ets
查看文件 @bc3322e
@@ -2,7 +2,7 @@ import worker, { ThreadWorkerGlobalScope, MessageEvents, ErrorEvent } from '@oho
 
 import { fileIo as fs } from '@kit.CoreFileKit';
 
- import {OfflineTtsConfig, OfflineTts, listRawfileDir, TtsInput, TtsOutput} from 'sherpa_onnx';
+ import { OfflineTtsConfig, OfflineTts, listRawfileDir, TtsInput, TtsOutput } from 'sherpa_onnx';
 import { buffer } from '@kit.ArkTS';
 
 const workerPort: ThreadWorkerGlobalScope = worker.workerPort;
@@ -42,9 +42,12 @@ function copyRawFileDirToSandbox(context: Context, srcDir: string) {
   }
 }
 
- function copyRawFileToSandbox(context: Context, src: string, dst: string) {
-   // see https://blog.csdn.net/weixin_44640245/article/details/142634846
-   // https://developer.huawei.com/consumer/cn/doc/harmonyos-guides-V5/rawfile-guidelines-V5
+ function copyRawFileToSandbox(context: Context, src: string,
+   dst: string) {
+   /* see
+    https://blog.csdn.net/weixin_44640245/article/details/142634846
+    https://developer.huawei.com/consumer/cn/doc/harmonyos-guides-V5/rawfile-guidelines-V5
+    */
   let uint8Array: Uint8Array = context.resourceManager.getRawFileContentSync(src);
 
   // https://developer.huawei.com/consumer/cn/doc/harmonyos-references-V5/js-apis-file-fs-V5#fsmkdir
@@ -52,8 +55,9 @@ function copyRawFileToSandbox(context: Context, src: string, dst: string) {
   let filepath = sandboxPath + '/' + dst;
 
   if (fs.accessSync(filepath)) {
-     // if the destination exists and has the expected file size,
-     // then we skip copying it
+     /* if the destination exists and has the expected file size
+        then we skip copying it
+      */
     let stat = fs.statSync(filepath);
     if (stat.size == uint8Array.length) {
       return;
@@ -66,11 +70,12 @@ function copyRawFileToSandbox(context: Context, src: string, dst: string) {
 }
 
 function initTts(context: Context): OfflineTts {
-   // Such a design is to make it easier to build flutter APPs with
-   // github actions for a variety of tts models
-   //
-   // See https://github.com/k2-fsa/sherpa-onnx/blob/master/scripts/flutter/generate-tts.py
-   // for details
+   /* Such a design is to make it easier to build flutter APPs with
+      github actions for a variety of tts models
+ 
+      See https://github.com/k2-fsa/sherpa-onnx/blob/master/scripts/flutter/generate-tts.py
+      for details
+    */
 
   let modelDir = '';
 
@@ -83,13 +88,19 @@ function initTts(context: Context): OfflineTts {
   let vocoder = '';
   // for Matcha end
 
+   // for Kokoro begin
+   let voices = '';
+   // for Kokoro end
+ 
   let ruleFsts = '';
   let ruleFars = '';
   let lexicon = '';
   let dataDir = '';
   let dictDir = '';
-   // You can select an example below and change it according to match your
-   // selected tts model
+   /*
+     You can select an example below and change it according to match your
+     selected tts model
+    */
 
   // ============================================================
   // Your change starts here
@@ -146,19 +157,26 @@ function initTts(context: Context): OfflineTts {
   // Example 8
   // https://github.com/k2-fsa/sherpa-onnx/releases/tag/tts-models
   // https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/matcha.html#matcha-icefall-zh-baker-chinese-1-female-speaker
-   // modelDir = 'matcha-icefall-zh-baker'
-   // acousticModelName = 'model-steps-3.onnx'
-   // vocoder = 'hifigan_v2.onnx'
-   // lexicon = 'lexicon.txt'
+   // modelDir = 'matcha-icefall-zh-baker';
+   // acousticModelName = 'model-steps-3.onnx';
+   // vocoder = 'hifigan_v2.onnx';
+   // lexicon = 'lexicon.txt';
   // dictDir = 'dict';
   // ruleFsts = `date.fst,phone.fst,number.fst`;
 
   // Example 9
   // https://github.com/k2-fsa/sherpa-onnx/releases/tag/tts-models
   // https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/matcha.html#matcha-icefall-en-us-ljspeech-american-english-1-female-speaker
-   // modelDir = 'matcha-icefall-en_US-ljspeech'
-   // acousticModelName = 'model-steps-3.onnx'
-   // vocoder = 'hifigan_v2.onnx'
+   // modelDir = 'matcha-icefall-en_US-ljspeech';
+   // acousticModelName = 'model-steps-3.onnx';
+   // vocoder = 'hifigan_v2.onnx';
+   // dataDir = 'espeak-ng-data';
+ 
+   // Example 10
+   // https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/kokoro.html#kokoro-en-v0-19-english-11-speakers
+   // modelDir = 'kokoro-en-v0_19';
+   // modelName = 'model.onnx';
+   // voices = 'voices.bin'
   // dataDir = 'espeak-ng-data';
 
   // ============================================================
@@ -185,6 +203,10 @@ function initTts(context: Context): OfflineTts {
     acousticModelName = modelDir + '/' + acousticModelName;
   }
 
+   if (voices != '') {
+     voices = modelDir + '/' + voices;
+   }
+ 
   if (ruleFsts != '') {
     let fsts = ruleFsts.split(',')
     let tmp: string[] = [];
@@ -222,7 +244,12 @@ function initTts(context: Context): OfflineTts {
   const tokens = modelDir + '/tokens.txt';
 
   const config: OfflineTtsConfig = new OfflineTtsConfig();
+   if (voices != '') {
+     config.model.vits.model = '';
+   } else {
     config.model.vits.model = modelName;
+   }
+ 
   config.model.vits.lexicon = lexicon;
   config.model.vits.tokens = tokens;
   config.model.vits.dataDir = dataDir;
@@ -235,6 +262,15 @@ function initTts(context: Context): OfflineTts {
   config.model.matcha.dataDir = dataDir;
   config.model.matcha.dictDir = dictDir;
 
+   if (voices != '') {
+     config.model.kokoro.model = modelName;
+   } else {
+     config.model.kokoro.model = '';
+   }
+   config.model.kokoro.voices = voices;
+   config.model.kokoro.tokens = tokens;
+   config.model.kokoro.dataDir = dataDir;
+ 
   config.model.numThreads = 2;
   config.model.debug = true;
   config.ruleFsts = ruleFsts;
@@ -250,14 +286,12 @@ interface TtsCallbackData {
 
 function callback(data: TtsCallbackData): number {
   workerPort.postMessage({
-     'msgType': 'tts-generate-partial',
-     samples: Float32Array.from(data.samples),
-     progress: data.progress,
+     'msgType': 'tts-generate-partial', samples: Float32Array.from(data.samples), progress: data.progress,
   });
 
   // 0 means to stop generating in C++
   // 1 means to continue generating in C++
-   return cancelled? 0 : 1;
+   return cancelled ? 0 : 1;
 }
 
 /**
@@ -272,9 +306,11 @@ workerPort.onmessage = (e: MessageEvents) => {
   if (msgType == 'init-tts' && !tts) {
     const context = e.data['context'] as Context;
     tts = initTts(context);
-     workerPort.postMessage({ 'msgType': 'init-tts-done',
+     workerPort.postMessage({
+       'msgType': 'init-tts-done',
       sampleRate: tts.sampleRate,
       numSpeakers: tts.numSpeakers,
+       numThreads: tts.config.model.numThreads,
     });
   }
 
@@ -297,16 +333,14 @@ workerPort.onmessage = (e: MessageEvents) => {
         console.log(`sampleRate: ${ttsOutput.sampleRate}`);
 
         workerPort.postMessage({
-           'msgType': 'tts-generate-done',
-           samples: Float32Array.from(ttsOutput.samples),
+           'msgType': 'tts-generate-done', samples: Float32Array.from(ttsOutput.samples),
         });
 
       });
     } else {
       const ttsOutput: TtsOutput = tts.generate(input);
       workerPort.postMessage({
-         'msgType': 'tts-generate-done',
-         samples: Float32Array.from(ttsOutput.samples),
+         'msgType': 'tts-generate-done', samples: Float32Array.from(ttsOutput.samples),
       });
     }