Add Dart API for Kokoro TTS 1.0 (#1806)

Fangjun Kuang · GitHub
Commit 35f5ff31664958f66131f2c07c09dd40d565e498 35f5ff31 1 parent ae32dfaa
.github/scripts/test-dart.sh
dart-api-examples/tts/bin/kokoro-zh-en.dart
dart-api-examples/tts/run-kokoro-en.sh
dart-api-examples/tts/run-kokoro-zh-en.sh
flutter/sherpa_onnx/lib/src/sherpa_onnx_bindings.dart
flutter/sherpa_onnx/lib/src/tts.dart
--- a/.github/scripts/test-dart.sh
查看文件 @35f5ff3
+++ b/.github/scripts/test-dart.sh
查看文件 @35f5ff3
@@ -7,6 +7,7 @@ cd dart-api-examples
 pushd tts
 
 echo '----------matcha tts----------'
+ ./run-kokoro-zh-en.sh
 ./run-kokoro-en.sh
 ./run-matcha-zh.sh
 ./run-matcha-en.sh
--- a/dart-api-examples/tts/bin/kokoro-zh-en.dart 0 → 100644
查看文件 @35f5ff3
+++ b/dart-api-examples/tts/bin/kokoro-zh-en.dart 0 → 100644
查看文件 @35f5ff3
+ // Copyright (c)  2025  Xiaomi Corporation
+ import 'dart:io';
+ 
+ import 'package:args/args.dart';
+ import 'package:sherpa_onnx/sherpa_onnx.dart' as sherpa_onnx;
+ 
+ import './init.dart';
+ 
+ void main(List<String> arguments) async {
+   await initSherpaOnnx();
+ 
+   final parser = ArgParser()
+     ..addOption('model', help: 'Path to the onnx model')
+     ..addOption('voices', help: 'Path to the voices.bin')
+     ..addOption('tokens', help: 'Path to tokens.txt')
+     ..addOption(
+       'data-dir',
+       help: 'Path to espeak-ng-data directory',
+       defaultsTo: '',
+     )
+     ..addOption(
+       'dict-dir',
+       help: 'Path to dict directory',
+       defaultsTo: '',
+     )
+     ..addOption(
+       'lexicon',
+       help: 'Path to lexicon files',
+       defaultsTo: '',
+     )
+     ..addOption('rule-fsts', help: 'Path to rule fsts', defaultsTo: '')
+     ..addOption('rule-fars', help: 'Path to rule fars', defaultsTo: '')
+     ..addOption('text', help: 'Text to generate TTS for')
+     ..addOption('output-wav', help: 'Filename to save the generated audio')
+     ..addOption('speed', help: 'Speech speed', defaultsTo: '1.0')
+     ..addOption(
+       'sid',
+       help: 'Speaker ID to select. Used only for multi-speaker TTS',
+       defaultsTo: '0',
+     );
+   final res = parser.parse(arguments);
+   if (res['model'] == null ||
+       res['voices'] == null ||
+       res['tokens'] == null ||
+       res['data-dir'] == null ||
+       res['dict-dir'] == null ||
+       res['lexicon'] == null ||
+       res['output-wav'] == null ||
+       res['text'] == null) {
+     print(parser.usage);
+     exit(1);
+   }
+   final model = res['model'] as String;
+   final voices = res['voices'] as String;
+   final tokens = res['tokens'] as String;
+   final dataDir = res['data-dir'] as String;
+   final dictDir = res['dict-dir'] as String;
+   final lexicon = res['lexicon'] as String;
+   final ruleFsts = res['rule-fsts'] as String;
+   final ruleFars = res['rule-fars'] as String;
+   final text = res['text'] as String;
+   final outputWav = res['output-wav'] as String;
+   var speed = double.tryParse(res['speed'] as String) ?? 1.0;
+   final sid = int.tryParse(res['sid'] as String) ?? 0;
+ 
+   if (speed == 0) {
+     speed = 1.0;
+   }
+ 
+   final kokoro = sherpa_onnx.OfflineTtsKokoroModelConfig(
+     model: model,
+     voices: voices,
+     tokens: tokens,
+     dataDir: dataDir,
+     lengthScale: 1 / speed,
+     dictDir: dictDir,
+     lexicon: lexicon,
+   );
+ 
+   final modelConfig = sherpa_onnx.OfflineTtsModelConfig(
+     kokoro: kokoro,
+     numThreads: 1,
+     debug: true,
+   );
+   final config = sherpa_onnx.OfflineTtsConfig(
+     model: modelConfig,
+     maxNumSenetences: 1,
+     ruleFsts: ruleFsts,
+     ruleFars: ruleFars,
+   );
+ 
+   final tts = sherpa_onnx.OfflineTts(config);
+   final audio = tts.generate(text: text, sid: sid, speed: speed);
+   tts.free();
+ 
+   sherpa_onnx.writeWave(
+     filename: outputWav,
+     samples: audio.samples,
+     sampleRate: audio.sampleRate,
+   );
+   print('Saved to $outputWav');
+ }
--- a/dart-api-examples/tts/run-kokoro-en.sh
查看文件 @35f5ff3
+++ b/dart-api-examples/tts/run-kokoro-en.sh
查看文件 @35f5ff3
@@ -22,6 +22,6 @@ dart run \
   --sid 9 \
   --speed 1.0 \
   --output-wav kokoro-en-9.wav \
-   --text "Friends fell out often because life was changing so fast. The easiest thing in the world was to lose touch with someone." \
+   --text "Friends fell out often because life was changing so fast. The easiest thing in the world was to lose touch with someone."
 
 ls -lh *.wav
--- a/dart-api-examples/tts/run-kokoro-zh-en.sh 0 → 100755
查看文件 @35f5ff3
+++ b/dart-api-examples/tts/run-kokoro-zh-en.sh 0 → 100755
查看文件 @35f5ff3
+ #!/usr/bin/env bash
+ 
+ set -ex
+ 
+ dart pub get
+ 
+ # please visit
+ # https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/kokoro.html
+ # to download more models
+ if [ ! -f ./kokoro-multi-lang-v1_0/model.onnx ]; then
+   curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/kokoro-multi-lang-v1_0.tar.bz2
+   tar xf kokoro-multi-lang-v1_0.tar.bz2
+   rm kokoro-multi-lang-v1_0.tar.bz2
+ fi
+ 
+ dart run \
+   ./bin/kokoro-zh-en.dart \
+   --model ./kokoro-multi-lang-v1_0/model.onnx \
+   --voices ./kokoro-multi-lang-v1_0/voices.bin \
+   --tokens ./kokoro-multi-lang-v1_0/tokens.txt \
+   --data-dir ./kokoro-multi-lang-v1_0/espeak-ng-data \
+   --dict-dir ./kokoro-multi-lang-v1_0/dict \
+   --lexicon ./kokoro-multi-lang-v1_0/lexicon-us-en.txt,./kokoro-multi-lang-v1_0/lexicon-zh.txt \
+   --sid 45 \
+   --speed 1.0 \
+   --output-wav kokoro-zh-en-45.wav \
+   --text "中英文语音合成测试。This is generated by next generation Kaldi using Kokoro without Misaki. 你觉得中英文说的如何呢？"
+ 
+ ls -lh *.wav
--- a/flutter/sherpa_onnx/lib/src/sherpa_onnx_bindings.dart
查看文件 @35f5ff3
+++ b/flutter/sherpa_onnx/lib/src/sherpa_onnx_bindings.dart
查看文件 @35f5ff3
@@ -155,6 +155,8 @@ final class SherpaOnnxOfflineTtsKokoroModelConfig extends Struct {
 
   @Float()
   external double lengthScale;
+   external Pointer<Utf8> dictDir;
+   external Pointer<Utf8> lexicon;
 }
 
 final class SherpaOnnxOfflineTtsModelConfig extends Struct {
--- a/flutter/sherpa_onnx/lib/src/tts.dart
查看文件 @35f5ff3
+++ b/flutter/sherpa_onnx/lib/src/tts.dart
查看文件 @35f5ff3
@@ -67,11 +67,13 @@ class OfflineTtsKokoroModelConfig {
     this.tokens = '',
     this.dataDir = '',
     this.lengthScale = 1.0,
+     this.dictDir = '',
+     this.lexicon = '',
   });
 
   @override
   String toString() {
-     return 'OfflineTtsKokoroModelConfig(model: $model, voices: $voices, tokens: $tokens, dataDir: $dataDir, lengthScale: $lengthScale)';
+     return 'OfflineTtsKokoroModelConfig(model: $model, voices: $voices, tokens: $tokens, dataDir: $dataDir, lengthScale: $lengthScale, dictDir: $dictDir, lexicon: $lexicon)';
   }
 
   final String model;
@@ -79,6 +81,8 @@ class OfflineTtsKokoroModelConfig {
   final String tokens;
   final String dataDir;
   final double lengthScale;
+   final String dictDir;
+   final String lexicon;
 }
 
 class OfflineTtsModelConfig {
@@ -166,6 +170,8 @@ class OfflineTts {
     c.ref.model.kokoro.tokens = config.model.kokoro.tokens.toNativeUtf8();
     c.ref.model.kokoro.dataDir = config.model.kokoro.dataDir.toNativeUtf8();
     c.ref.model.kokoro.lengthScale = config.model.kokoro.lengthScale;
+     c.ref.model.kokoro.dictDir = config.model.kokoro.dictDir.toNativeUtf8();
+     c.ref.model.kokoro.lexicon = config.model.kokoro.lexicon.toNativeUtf8();
 
     c.ref.model.numThreads = config.model.numThreads;
     c.ref.model.debug = config.model.debug ? 1 : 0;
@@ -181,6 +187,8 @@ class OfflineTts {
     calloc.free(c.ref.ruleFsts);
     calloc.free(c.ref.model.provider);
 
+     calloc.free(c.ref.model.kokoro.lexicon);
+     calloc.free(c.ref.model.kokoro.dictDir);
     calloc.free(c.ref.model.kokoro.dataDir);
     calloc.free(c.ref.model.kokoro.tokens);
     calloc.free(c.ref.model.kokoro.voices);