Fangjun Kuang
Committed by GitHub

Add Dart API for Kokoro TTS 1.0 (#1806)

@@ -7,6 +7,7 @@ cd dart-api-examples @@ -7,6 +7,7 @@ cd dart-api-examples
7 pushd tts 7 pushd tts
8 8
9 echo '----------matcha tts----------' 9 echo '----------matcha tts----------'
  10 +./run-kokoro-zh-en.sh
10 ./run-kokoro-en.sh 11 ./run-kokoro-en.sh
11 ./run-matcha-zh.sh 12 ./run-matcha-zh.sh
12 ./run-matcha-en.sh 13 ./run-matcha-en.sh
  1 +// Copyright (c) 2025 Xiaomi Corporation
  2 +import 'dart:io';
  3 +
  4 +import 'package:args/args.dart';
  5 +import 'package:sherpa_onnx/sherpa_onnx.dart' as sherpa_onnx;
  6 +
  7 +import './init.dart';
  8 +
  9 +void main(List<String> arguments) async {
  10 + await initSherpaOnnx();
  11 +
  12 + final parser = ArgParser()
  13 + ..addOption('model', help: 'Path to the onnx model')
  14 + ..addOption('voices', help: 'Path to the voices.bin')
  15 + ..addOption('tokens', help: 'Path to tokens.txt')
  16 + ..addOption(
  17 + 'data-dir',
  18 + help: 'Path to espeak-ng-data directory',
  19 + defaultsTo: '',
  20 + )
  21 + ..addOption(
  22 + 'dict-dir',
  23 + help: 'Path to dict directory',
  24 + defaultsTo: '',
  25 + )
  26 + ..addOption(
  27 + 'lexicon',
  28 + help: 'Path to lexicon files',
  29 + defaultsTo: '',
  30 + )
  31 + ..addOption('rule-fsts', help: 'Path to rule fsts', defaultsTo: '')
  32 + ..addOption('rule-fars', help: 'Path to rule fars', defaultsTo: '')
  33 + ..addOption('text', help: 'Text to generate TTS for')
  34 + ..addOption('output-wav', help: 'Filename to save the generated audio')
  35 + ..addOption('speed', help: 'Speech speed', defaultsTo: '1.0')
  36 + ..addOption(
  37 + 'sid',
  38 + help: 'Speaker ID to select. Used only for multi-speaker TTS',
  39 + defaultsTo: '0',
  40 + );
  41 + final res = parser.parse(arguments);
  42 + if (res['model'] == null ||
  43 + res['voices'] == null ||
  44 + res['tokens'] == null ||
  45 + res['data-dir'] == null ||
  46 + res['dict-dir'] == null ||
  47 + res['lexicon'] == null ||
  48 + res['output-wav'] == null ||
  49 + res['text'] == null) {
  50 + print(parser.usage);
  51 + exit(1);
  52 + }
  53 + final model = res['model'] as String;
  54 + final voices = res['voices'] as String;
  55 + final tokens = res['tokens'] as String;
  56 + final dataDir = res['data-dir'] as String;
  57 + final dictDir = res['dict-dir'] as String;
  58 + final lexicon = res['lexicon'] as String;
  59 + final ruleFsts = res['rule-fsts'] as String;
  60 + final ruleFars = res['rule-fars'] as String;
  61 + final text = res['text'] as String;
  62 + final outputWav = res['output-wav'] as String;
  63 + var speed = double.tryParse(res['speed'] as String) ?? 1.0;
  64 + final sid = int.tryParse(res['sid'] as String) ?? 0;
  65 +
  66 + if (speed == 0) {
  67 + speed = 1.0;
  68 + }
  69 +
  70 + final kokoro = sherpa_onnx.OfflineTtsKokoroModelConfig(
  71 + model: model,
  72 + voices: voices,
  73 + tokens: tokens,
  74 + dataDir: dataDir,
  75 + lengthScale: 1 / speed,
  76 + dictDir: dictDir,
  77 + lexicon: lexicon,
  78 + );
  79 +
  80 + final modelConfig = sherpa_onnx.OfflineTtsModelConfig(
  81 + kokoro: kokoro,
  82 + numThreads: 1,
  83 + debug: true,
  84 + );
  85 + final config = sherpa_onnx.OfflineTtsConfig(
  86 + model: modelConfig,
  87 + maxNumSenetences: 1,
  88 + ruleFsts: ruleFsts,
  89 + ruleFars: ruleFars,
  90 + );
  91 +
  92 + final tts = sherpa_onnx.OfflineTts(config);
  93 + final audio = tts.generate(text: text, sid: sid, speed: speed);
  94 + tts.free();
  95 +
  96 + sherpa_onnx.writeWave(
  97 + filename: outputWav,
  98 + samples: audio.samples,
  99 + sampleRate: audio.sampleRate,
  100 + );
  101 + print('Saved to $outputWav');
  102 +}
@@ -22,6 +22,6 @@ dart run \ @@ -22,6 +22,6 @@ dart run \
22 --sid 9 \ 22 --sid 9 \
23 --speed 1.0 \ 23 --speed 1.0 \
24 --output-wav kokoro-en-9.wav \ 24 --output-wav kokoro-en-9.wav \
25 - --text "Friends fell out often because life was changing so fast. The easiest thing in the world was to lose touch with someone." \ 25 + --text "Friends fell out often because life was changing so fast. The easiest thing in the world was to lose touch with someone."
26 26
27 ls -lh *.wav 27 ls -lh *.wav
  1 +#!/usr/bin/env bash
  2 +
  3 +set -ex
  4 +
  5 +dart pub get
  6 +
  7 +# please visit
  8 +# https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/kokoro.html
  9 +# to download more models
  10 +if [ ! -f ./kokoro-multi-lang-v1_0/model.onnx ]; then
  11 + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/kokoro-multi-lang-v1_0.tar.bz2
  12 + tar xf kokoro-multi-lang-v1_0.tar.bz2
  13 + rm kokoro-multi-lang-v1_0.tar.bz2
  14 +fi
  15 +
  16 +dart run \
  17 + ./bin/kokoro-zh-en.dart \
  18 + --model ./kokoro-multi-lang-v1_0/model.onnx \
  19 + --voices ./kokoro-multi-lang-v1_0/voices.bin \
  20 + --tokens ./kokoro-multi-lang-v1_0/tokens.txt \
  21 + --data-dir ./kokoro-multi-lang-v1_0/espeak-ng-data \
  22 + --dict-dir ./kokoro-multi-lang-v1_0/dict \
  23 + --lexicon ./kokoro-multi-lang-v1_0/lexicon-us-en.txt,./kokoro-multi-lang-v1_0/lexicon-zh.txt \
  24 + --sid 45 \
  25 + --speed 1.0 \
  26 + --output-wav kokoro-zh-en-45.wav \
  27 + --text "中英文语音合成测试。This is generated by next generation Kaldi using Kokoro without Misaki. 你觉得中英文说的如何呢?"
  28 +
  29 +ls -lh *.wav
@@ -155,6 +155,8 @@ final class SherpaOnnxOfflineTtsKokoroModelConfig extends Struct { @@ -155,6 +155,8 @@ final class SherpaOnnxOfflineTtsKokoroModelConfig extends Struct {
155 155
156 @Float() 156 @Float()
157 external double lengthScale; 157 external double lengthScale;
  158 + external Pointer<Utf8> dictDir;
  159 + external Pointer<Utf8> lexicon;
158 } 160 }
159 161
160 final class SherpaOnnxOfflineTtsModelConfig extends Struct { 162 final class SherpaOnnxOfflineTtsModelConfig extends Struct {
@@ -67,11 +67,13 @@ class OfflineTtsKokoroModelConfig { @@ -67,11 +67,13 @@ class OfflineTtsKokoroModelConfig {
67 this.tokens = '', 67 this.tokens = '',
68 this.dataDir = '', 68 this.dataDir = '',
69 this.lengthScale = 1.0, 69 this.lengthScale = 1.0,
  70 + this.dictDir = '',
  71 + this.lexicon = '',
70 }); 72 });
71 73
72 @override 74 @override
73 String toString() { 75 String toString() {
74 - return 'OfflineTtsKokoroModelConfig(model: $model, voices: $voices, tokens: $tokens, dataDir: $dataDir, lengthScale: $lengthScale)'; 76 + return 'OfflineTtsKokoroModelConfig(model: $model, voices: $voices, tokens: $tokens, dataDir: $dataDir, lengthScale: $lengthScale, dictDir: $dictDir, lexicon: $lexicon)';
75 } 77 }
76 78
77 final String model; 79 final String model;
@@ -79,6 +81,8 @@ class OfflineTtsKokoroModelConfig { @@ -79,6 +81,8 @@ class OfflineTtsKokoroModelConfig {
79 final String tokens; 81 final String tokens;
80 final String dataDir; 82 final String dataDir;
81 final double lengthScale; 83 final double lengthScale;
  84 + final String dictDir;
  85 + final String lexicon;
82 } 86 }
83 87
84 class OfflineTtsModelConfig { 88 class OfflineTtsModelConfig {
@@ -166,6 +170,8 @@ class OfflineTts { @@ -166,6 +170,8 @@ class OfflineTts {
166 c.ref.model.kokoro.tokens = config.model.kokoro.tokens.toNativeUtf8(); 170 c.ref.model.kokoro.tokens = config.model.kokoro.tokens.toNativeUtf8();
167 c.ref.model.kokoro.dataDir = config.model.kokoro.dataDir.toNativeUtf8(); 171 c.ref.model.kokoro.dataDir = config.model.kokoro.dataDir.toNativeUtf8();
168 c.ref.model.kokoro.lengthScale = config.model.kokoro.lengthScale; 172 c.ref.model.kokoro.lengthScale = config.model.kokoro.lengthScale;
  173 + c.ref.model.kokoro.dictDir = config.model.kokoro.dictDir.toNativeUtf8();
  174 + c.ref.model.kokoro.lexicon = config.model.kokoro.lexicon.toNativeUtf8();
169 175
170 c.ref.model.numThreads = config.model.numThreads; 176 c.ref.model.numThreads = config.model.numThreads;
171 c.ref.model.debug = config.model.debug ? 1 : 0; 177 c.ref.model.debug = config.model.debug ? 1 : 0;
@@ -181,6 +187,8 @@ class OfflineTts { @@ -181,6 +187,8 @@ class OfflineTts {
181 calloc.free(c.ref.ruleFsts); 187 calloc.free(c.ref.ruleFsts);
182 calloc.free(c.ref.model.provider); 188 calloc.free(c.ref.model.provider);
183 189
  190 + calloc.free(c.ref.model.kokoro.lexicon);
  191 + calloc.free(c.ref.model.kokoro.dictDir);
184 calloc.free(c.ref.model.kokoro.dataDir); 192 calloc.free(c.ref.model.kokoro.dataDir);
185 calloc.free(c.ref.model.kokoro.tokens); 193 calloc.free(c.ref.model.kokoro.tokens);
186 calloc.free(c.ref.model.kokoro.voices); 194 calloc.free(c.ref.model.kokoro.voices);