正在显示
6 个修改的文件
包含
144 行增加
和
2 行删除
| @@ -7,6 +7,7 @@ cd dart-api-examples | @@ -7,6 +7,7 @@ cd dart-api-examples | ||
| 7 | pushd tts | 7 | pushd tts |
| 8 | 8 | ||
| 9 | echo '----------matcha tts----------' | 9 | echo '----------matcha tts----------' |
| 10 | +./run-kokoro-zh-en.sh | ||
| 10 | ./run-kokoro-en.sh | 11 | ./run-kokoro-en.sh |
| 11 | ./run-matcha-zh.sh | 12 | ./run-matcha-zh.sh |
| 12 | ./run-matcha-en.sh | 13 | ./run-matcha-en.sh |
dart-api-examples/tts/bin/kokoro-zh-en.dart
0 → 100644
| 1 | +// Copyright (c) 2025 Xiaomi Corporation | ||
| 2 | +import 'dart:io'; | ||
| 3 | + | ||
| 4 | +import 'package:args/args.dart'; | ||
| 5 | +import 'package:sherpa_onnx/sherpa_onnx.dart' as sherpa_onnx; | ||
| 6 | + | ||
| 7 | +import './init.dart'; | ||
| 8 | + | ||
| 9 | +void main(List<String> arguments) async { | ||
| 10 | + await initSherpaOnnx(); | ||
| 11 | + | ||
| 12 | + final parser = ArgParser() | ||
| 13 | + ..addOption('model', help: 'Path to the onnx model') | ||
| 14 | + ..addOption('voices', help: 'Path to the voices.bin') | ||
| 15 | + ..addOption('tokens', help: 'Path to tokens.txt') | ||
| 16 | + ..addOption( | ||
| 17 | + 'data-dir', | ||
| 18 | + help: 'Path to espeak-ng-data directory', | ||
| 19 | + defaultsTo: '', | ||
| 20 | + ) | ||
| 21 | + ..addOption( | ||
| 22 | + 'dict-dir', | ||
| 23 | + help: 'Path to dict directory', | ||
| 24 | + defaultsTo: '', | ||
| 25 | + ) | ||
| 26 | + ..addOption( | ||
| 27 | + 'lexicon', | ||
| 28 | + help: 'Path to lexicon files', | ||
| 29 | + defaultsTo: '', | ||
| 30 | + ) | ||
| 31 | + ..addOption('rule-fsts', help: 'Path to rule fsts', defaultsTo: '') | ||
| 32 | + ..addOption('rule-fars', help: 'Path to rule fars', defaultsTo: '') | ||
| 33 | + ..addOption('text', help: 'Text to generate TTS for') | ||
| 34 | + ..addOption('output-wav', help: 'Filename to save the generated audio') | ||
| 35 | + ..addOption('speed', help: 'Speech speed', defaultsTo: '1.0') | ||
| 36 | + ..addOption( | ||
| 37 | + 'sid', | ||
| 38 | + help: 'Speaker ID to select. Used only for multi-speaker TTS', | ||
| 39 | + defaultsTo: '0', | ||
| 40 | + ); | ||
| 41 | + final res = parser.parse(arguments); | ||
| 42 | + if (res['model'] == null || | ||
| 43 | + res['voices'] == null || | ||
| 44 | + res['tokens'] == null || | ||
| 45 | + res['data-dir'] == null || | ||
| 46 | + res['dict-dir'] == null || | ||
| 47 | + res['lexicon'] == null || | ||
| 48 | + res['output-wav'] == null || | ||
| 49 | + res['text'] == null) { | ||
| 50 | + print(parser.usage); | ||
| 51 | + exit(1); | ||
| 52 | + } | ||
| 53 | + final model = res['model'] as String; | ||
| 54 | + final voices = res['voices'] as String; | ||
| 55 | + final tokens = res['tokens'] as String; | ||
| 56 | + final dataDir = res['data-dir'] as String; | ||
| 57 | + final dictDir = res['dict-dir'] as String; | ||
| 58 | + final lexicon = res['lexicon'] as String; | ||
| 59 | + final ruleFsts = res['rule-fsts'] as String; | ||
| 60 | + final ruleFars = res['rule-fars'] as String; | ||
| 61 | + final text = res['text'] as String; | ||
| 62 | + final outputWav = res['output-wav'] as String; | ||
| 63 | + var speed = double.tryParse(res['speed'] as String) ?? 1.0; | ||
| 64 | + final sid = int.tryParse(res['sid'] as String) ?? 0; | ||
| 65 | + | ||
| 66 | + if (speed == 0) { | ||
| 67 | + speed = 1.0; | ||
| 68 | + } | ||
| 69 | + | ||
| 70 | + final kokoro = sherpa_onnx.OfflineTtsKokoroModelConfig( | ||
| 71 | + model: model, | ||
| 72 | + voices: voices, | ||
| 73 | + tokens: tokens, | ||
| 74 | + dataDir: dataDir, | ||
| 75 | + lengthScale: 1 / speed, | ||
| 76 | + dictDir: dictDir, | ||
| 77 | + lexicon: lexicon, | ||
| 78 | + ); | ||
| 79 | + | ||
| 80 | + final modelConfig = sherpa_onnx.OfflineTtsModelConfig( | ||
| 81 | + kokoro: kokoro, | ||
| 82 | + numThreads: 1, | ||
| 83 | + debug: true, | ||
| 84 | + ); | ||
| 85 | + final config = sherpa_onnx.OfflineTtsConfig( | ||
| 86 | + model: modelConfig, | ||
| 87 | + maxNumSenetences: 1, | ||
| 88 | + ruleFsts: ruleFsts, | ||
| 89 | + ruleFars: ruleFars, | ||
| 90 | + ); | ||
| 91 | + | ||
| 92 | + final tts = sherpa_onnx.OfflineTts(config); | ||
| 93 | + final audio = tts.generate(text: text, sid: sid, speed: speed); | ||
| 94 | + tts.free(); | ||
| 95 | + | ||
| 96 | + sherpa_onnx.writeWave( | ||
| 97 | + filename: outputWav, | ||
| 98 | + samples: audio.samples, | ||
| 99 | + sampleRate: audio.sampleRate, | ||
| 100 | + ); | ||
| 101 | + print('Saved to $outputWav'); | ||
| 102 | +} |
| @@ -22,6 +22,6 @@ dart run \ | @@ -22,6 +22,6 @@ dart run \ | ||
| 22 | --sid 9 \ | 22 | --sid 9 \ |
| 23 | --speed 1.0 \ | 23 | --speed 1.0 \ |
| 24 | --output-wav kokoro-en-9.wav \ | 24 | --output-wav kokoro-en-9.wav \ |
| 25 | - --text "Friends fell out often because life was changing so fast. The easiest thing in the world was to lose touch with someone." \ | 25 | + --text "Friends fell out often because life was changing so fast. The easiest thing in the world was to lose touch with someone." |
| 26 | 26 | ||
| 27 | ls -lh *.wav | 27 | ls -lh *.wav |
dart-api-examples/tts/run-kokoro-zh-en.sh
0 → 100755
| 1 | +#!/usr/bin/env bash | ||
| 2 | + | ||
| 3 | +set -ex | ||
| 4 | + | ||
| 5 | +dart pub get | ||
| 6 | + | ||
| 7 | +# please visit | ||
| 8 | +# https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/kokoro.html | ||
| 9 | +# to download more models | ||
| 10 | +if [ ! -f ./kokoro-multi-lang-v1_0/model.onnx ]; then | ||
| 11 | + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/kokoro-multi-lang-v1_0.tar.bz2 | ||
| 12 | + tar xf kokoro-multi-lang-v1_0.tar.bz2 | ||
| 13 | + rm kokoro-multi-lang-v1_0.tar.bz2 | ||
| 14 | +fi | ||
| 15 | + | ||
| 16 | +dart run \ | ||
| 17 | + ./bin/kokoro-zh-en.dart \ | ||
| 18 | + --model ./kokoro-multi-lang-v1_0/model.onnx \ | ||
| 19 | + --voices ./kokoro-multi-lang-v1_0/voices.bin \ | ||
| 20 | + --tokens ./kokoro-multi-lang-v1_0/tokens.txt \ | ||
| 21 | + --data-dir ./kokoro-multi-lang-v1_0/espeak-ng-data \ | ||
| 22 | + --dict-dir ./kokoro-multi-lang-v1_0/dict \ | ||
| 23 | + --lexicon ./kokoro-multi-lang-v1_0/lexicon-us-en.txt,./kokoro-multi-lang-v1_0/lexicon-zh.txt \ | ||
| 24 | + --sid 45 \ | ||
| 25 | + --speed 1.0 \ | ||
| 26 | + --output-wav kokoro-zh-en-45.wav \ | ||
| 27 | + --text "中英文语音合成测试。This is generated by next generation Kaldi using Kokoro without Misaki. 你觉得中英文说的如何呢?" | ||
| 28 | + | ||
| 29 | +ls -lh *.wav |
| @@ -155,6 +155,8 @@ final class SherpaOnnxOfflineTtsKokoroModelConfig extends Struct { | @@ -155,6 +155,8 @@ final class SherpaOnnxOfflineTtsKokoroModelConfig extends Struct { | ||
| 155 | 155 | ||
| 156 | @Float() | 156 | @Float() |
| 157 | external double lengthScale; | 157 | external double lengthScale; |
| 158 | + external Pointer<Utf8> dictDir; | ||
| 159 | + external Pointer<Utf8> lexicon; | ||
| 158 | } | 160 | } |
| 159 | 161 | ||
| 160 | final class SherpaOnnxOfflineTtsModelConfig extends Struct { | 162 | final class SherpaOnnxOfflineTtsModelConfig extends Struct { |
| @@ -67,11 +67,13 @@ class OfflineTtsKokoroModelConfig { | @@ -67,11 +67,13 @@ class OfflineTtsKokoroModelConfig { | ||
| 67 | this.tokens = '', | 67 | this.tokens = '', |
| 68 | this.dataDir = '', | 68 | this.dataDir = '', |
| 69 | this.lengthScale = 1.0, | 69 | this.lengthScale = 1.0, |
| 70 | + this.dictDir = '', | ||
| 71 | + this.lexicon = '', | ||
| 70 | }); | 72 | }); |
| 71 | 73 | ||
| 72 | @override | 74 | @override |
| 73 | String toString() { | 75 | String toString() { |
| 74 | - return 'OfflineTtsKokoroModelConfig(model: $model, voices: $voices, tokens: $tokens, dataDir: $dataDir, lengthScale: $lengthScale)'; | 76 | + return 'OfflineTtsKokoroModelConfig(model: $model, voices: $voices, tokens: $tokens, dataDir: $dataDir, lengthScale: $lengthScale, dictDir: $dictDir, lexicon: $lexicon)'; |
| 75 | } | 77 | } |
| 76 | 78 | ||
| 77 | final String model; | 79 | final String model; |
| @@ -79,6 +81,8 @@ class OfflineTtsKokoroModelConfig { | @@ -79,6 +81,8 @@ class OfflineTtsKokoroModelConfig { | ||
| 79 | final String tokens; | 81 | final String tokens; |
| 80 | final String dataDir; | 82 | final String dataDir; |
| 81 | final double lengthScale; | 83 | final double lengthScale; |
| 84 | + final String dictDir; | ||
| 85 | + final String lexicon; | ||
| 82 | } | 86 | } |
| 83 | 87 | ||
| 84 | class OfflineTtsModelConfig { | 88 | class OfflineTtsModelConfig { |
| @@ -166,6 +170,8 @@ class OfflineTts { | @@ -166,6 +170,8 @@ class OfflineTts { | ||
| 166 | c.ref.model.kokoro.tokens = config.model.kokoro.tokens.toNativeUtf8(); | 170 | c.ref.model.kokoro.tokens = config.model.kokoro.tokens.toNativeUtf8(); |
| 167 | c.ref.model.kokoro.dataDir = config.model.kokoro.dataDir.toNativeUtf8(); | 171 | c.ref.model.kokoro.dataDir = config.model.kokoro.dataDir.toNativeUtf8(); |
| 168 | c.ref.model.kokoro.lengthScale = config.model.kokoro.lengthScale; | 172 | c.ref.model.kokoro.lengthScale = config.model.kokoro.lengthScale; |
| 173 | + c.ref.model.kokoro.dictDir = config.model.kokoro.dictDir.toNativeUtf8(); | ||
| 174 | + c.ref.model.kokoro.lexicon = config.model.kokoro.lexicon.toNativeUtf8(); | ||
| 169 | 175 | ||
| 170 | c.ref.model.numThreads = config.model.numThreads; | 176 | c.ref.model.numThreads = config.model.numThreads; |
| 171 | c.ref.model.debug = config.model.debug ? 1 : 0; | 177 | c.ref.model.debug = config.model.debug ? 1 : 0; |
| @@ -181,6 +187,8 @@ class OfflineTts { | @@ -181,6 +187,8 @@ class OfflineTts { | ||
| 181 | calloc.free(c.ref.ruleFsts); | 187 | calloc.free(c.ref.ruleFsts); |
| 182 | calloc.free(c.ref.model.provider); | 188 | calloc.free(c.ref.model.provider); |
| 183 | 189 | ||
| 190 | + calloc.free(c.ref.model.kokoro.lexicon); | ||
| 191 | + calloc.free(c.ref.model.kokoro.dictDir); | ||
| 184 | calloc.free(c.ref.model.kokoro.dataDir); | 192 | calloc.free(c.ref.model.kokoro.dataDir); |
| 185 | calloc.free(c.ref.model.kokoro.tokens); | 193 | calloc.free(c.ref.model.kokoro.tokens); |
| 186 | calloc.free(c.ref.model.kokoro.voices); | 194 | calloc.free(c.ref.model.kokoro.voices); |
-
请 注册 或 登录 后发表评论